###################################################################### # Copyright (C) 1999-2001, International Business Machines # Corporation and others. All Rights Reserved. ###################################################################### # See: ftp://elsie.nci.nih.gov/pub/tzdata # where is "1999b" or a similar string. ###################################################################### # This package handles the parsing of time zone files. # Author: Alan Liu ###################################################################### # Usage: # Call ParseFile for each file to be imported. Then call ParseZoneTab # to add country data. Then call Postprocess to remove unused rules. package TZ; use strict; use Carp; use vars qw(@ISA @EXPORT $VERSION $YEAR $STANDARD); require 'dumpvar.pl'; @ISA = qw(Exporter); @EXPORT = qw(ParseFile Postprocess ParseZoneTab ); $VERSION = '0.2'; $STANDARD = '-'; # Name of the Standard Time rule ###################################################################### # Read the tzdata zone.tab file and add a {country} field to zones # in the given hash. # Param: File name (/zone.tab) # Param: Ref to hash of zones # Param: Ref to hash of links sub ParseZoneTab { my ($FILE, $ZONES, $LINKS) = @_; my %linkEntries; local(*FILE); open(FILE,"<$FILE") or confess "Can't open $FILE: $!"; while () { # Handle comments s/\#.*//; next if (!/\S/); if (/^\s*([A-Z]{2})\s+[-+0-9]+\s+(\S+)/) { my ($country, $zone) = ($1, $2); if (exists $ZONES->{$zone}) { $ZONES->{$zone}->{country} = $country; } elsif (exists $LINKS->{$zone}) { # We have a country mapping for a zone that isn't in # our hash. This means it is a link entry. Save this # then handle it below. $linkEntries{$zone} = $country; } else { print STDERR "Nonexistent zone $zone in $FILE\n"; } } else { confess "Can't parse line \"$_\" of $FILE"; } } close(FILE); # Now that we have mapped all of the zones in %$ZONES (except # those without country affiliations), process the link entries. # For those zones in the table that differ by country from their # source zone, instantiate a new zone in the new country. An # example is Europe/Vatican, which is linked to Europe/Rome. If # we don't instantiate it, we have nothing for Vatican City. # Another example is America/Shiprock, which links to # America/Denver. These are identical and both in the US, so we # don't instantiate America/Shiprock. foreach my $zone (keys %linkEntries) { my $country = $linkEntries{$zone}; my $linkZone = $LINKS->{$zone}; my $linkCountry = $ZONES->{$linkZone}->{country}; if ($linkCountry ne $country) { # print "Cloning $zone ($country) from $linkZone ($linkCountry)\n"; _CloneZone($ZONES, $LINKS->{$zone}, $zone); $ZONES->{$zone}->{country} = $country; } } } ###################################################################### # Param: File name # Param: Ref to hash of zones # Param: Ref to hash of rules # Parma: Ref to hash of links # Param: Current year sub ParseFile { my ($FILE, $ZONES, $RULES, $LINKS, $YEAR) = @_; local(*FILE); open(FILE,"<$FILE") or confess "Can't open $FILE: $!"; my $zone; # Current zone my $badLineCount = 0; while () { # Handle comments and blanks s/\#.*//; next if (!/\S/); #|# Zone NAME GMTOFF RULES FORMAT [UNTIL] #|Zone America/Montreal -4:54:16 - LMT 1884 #| -5:00 Mont E%sT #|Zone America/Thunder_Bay -5:57:00 - LMT 1895 #| -5:00 Canada E%sT 1970 #| -5:00 Mont E%sT 1973 #| -5:00 - EST 1974 #| -5:00 Canada E%sT my ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil); if (/^zone/i) { # Zone block start if (/^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/i || /^zone\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)()/i) { $zone = $1; ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = ($2, $3, $4, $5); } else { print STDERR "Can't parse in $FILE: $_"; ++$badLineCount; } } elsif (/^\s/ && $zone) { # Zone continuation if (/^\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/ || /^\s+(\S+)\s+(\S+)\s+(\S+)()/) { ($zoneGmtoff, $zoneRule, $zoneFormat, $zoneUntil) = ($1, $2, $3, $4); } else { print STDERR "Can't parse in $FILE: $_"; ++$badLineCount; } } elsif (/^rule/i) { # Here is where we parse a single line of the rule table. # Our goal is to accept only rules applying to the current # year. This is normally a matter of accepting rules # that match the current year. However, in some cases this # is more complicated. For example: #|# Tonga #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S #|Rule Tonga 1999 max - Oct Sat>=1 2:00s 1:00 S #|Rule Tonga 2000 max - Apr Sun>=16 2:00s 0 - # To handle this properly, we save every rule we encounter # (thus overwriting older ones with newer ones, since rules # are listed in order), and also use slot [2] to mark when # we see a current year rule. When that happens, we stop # saving rules. Thus we match the latest rule we see, or # a matching rule if we find one. The format of slot [2] # is just a 2 bit flag ([2]&1 means slot [0] matched, # [2]&2 means slot [1] matched). # Note that later, when the rules are post processed # (see Postprocess), the slot [2] will be overwritten # with the compressed rule string used to implement # equality testing. $zone = undef; # Rule #|# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S #|Rule US 1918 1919 - Mar lastSun 2:00 1:00 W # War #|Rule US 1918 1919 - Oct lastSun 2:00 0 S #|Rule US 1942 only - Feb 9 2:00 1:00 W # War #|Rule US 1945 only - Sep 30 2:00 0 S #|Rule US 1967 max - Oct lastSun 2:00 0 S #|Rule US 1967 1973 - Apr lastSun 2:00 1:00 D #|Rule US 1974 only - Jan 6 2:00 1:00 D #|Rule US 1975 only - Feb 23 2:00 1:00 D #|Rule US 1976 1986 - Apr lastSun 2:00 1:00 D #|Rule US 1987 max - Apr Sun>=1 2:00 1:00 D if (/^rule\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+ (\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)/xi) { my ($name, $from, $to, $type, $in, $on, $at, $save, $letter) = ($1, $2, $3, $4, $5, $6, $7, $8, $9); my $i = $save ? 0:1; if (!exists $RULES->{$name}) { $RULES->{$name} = []; } my $ruleArray = $RULES->{$name}; # Check our bit mask to see if we've already matched # a current rule. If so, do nothing. If not, then # save this rule line as the best one so far. if (@{$ruleArray} < 3 || !($ruleArray->[2] & 1 << $i)) { my $h = $ruleArray->[$i]; $ruleArray->[$i]->{from} = $from; $ruleArray->[$i]->{to} = $to; $ruleArray->[$i]->{type} = $type; $ruleArray->[$i]->{in} = $in; $ruleArray->[$i]->{on} = $on; $ruleArray->[$i]->{at} = $at; $ruleArray->[$i]->{save} = $save; $ruleArray->[$i]->{letter} = $letter; # Does this rule match the current year? If so, # set the bit mask so we don't overwrite this rule. # This makes us ingore rules for subsequent years # that are already listed in the database -- as long # as we have an overriding rule for the current year. if (($from == $YEAR && $to =~ /only/i) || ($from <= $YEAR && (($to =~ /^\d/ && $YEAR <= $to) || $to =~ /max/i))) { $ruleArray->[2] |= 1 << $i; $ruleArray->[3] |= 1 << $i; } } } else { print STDERR "Can't parse in $FILE: $_"; ++$badLineCount; } } elsif (/^link/i) { #|# Old names, for S5 users #| #|# Link LINK-FROM LINK-TO #|Link America/New_York EST5EDT #|Link America/Chicago CST6CDT #|Link America/Denver MST7MDT #|Link America/Los_Angeles PST8PDT #|Link America/Indianapolis EST #|Link America/Phoenix MST #|Link Pacific/Honolulu HST # # There are also links for country-specific zones. # These are zones the differ only in that they belong # to a different country. E.g., #|Link Europe/Rome Europe/Vatican #|Link Europe/Rome Europe/San_Marino if (/^link\s+(\S+)\s+(\S+)/i) { my ($from, $to) = ($1, $2); # Record all links in $%LINKS $LINKS->{$to} = $from; } else { print STDERR "Can't parse in $FILE: $_"; ++$badLineCount; } } else { # Unexpected line print STDERR "Ignoring in $FILE: $_"; ++$badLineCount; } if ($zoneRule && ($zoneUntil !~ /\S/ || ($zoneUntil =~ /^\d/ && $zoneUntil >= $YEAR))) { $ZONES->{$zone}->{gmtoff} = $zoneGmtoff; $ZONES->{$zone}->{rule} = $zoneRule; $ZONES->{$zone}->{format} = $zoneFormat; $ZONES->{$zone}->{until} = $zoneUntil; } } close(FILE); } ###################################################################### # Param: Ref to hash of zones # Param: Ref to hash of rules sub Postprocess { my ($ZONES, $RULES) = @_; my %ruleInUse; # We no longer store links in the zone hash, so we don't need to do this. # # Eliminate zone links that have no corresponding zone # foreach (keys %$ZONES) { # if (exists $ZONES->{$_}->{link} && !exists $ZONES->{$_}->{rule}) { # if (0) { # print STDERR # "Deleting link from historical/nonexistent zone: ", # $_, " -> ", $ZONES->{$_}->{link}, "\n"; # } # delete $ZONES->{$_}; # } # } # Check that each zone has a corresponding rule. At the same # time, build up a hash that marks each rule that is in use. foreach (sort keys %$ZONES) { my $ruleName = $ZONES->{$_}->{rule}; next if ($ruleName eq $STANDARD); if (exists $RULES->{$ruleName}) { $ruleInUse{$ruleName} = 1; } else { # This means the zone is using the standard rule now $ZONES->{$_}->{rule} = $STANDARD; } } # Check that both parts are there for rules # Check for unused rules # Make coded string for comparisons foreach (keys %$RULES) { if (!exists $ruleInUse{$_}) { if (0) { print STDERR "Deleting historical/unused rule: $_\n"; } delete $RULES->{$_}; } elsif (!$RULES->{$_}->[0] || !$RULES->{$_}->[1]) { print STDERR "Rule doesn't have both parts: $_\n"; } else { # Generate coded string # This has all the data about a rule; it can be used # to see if two rules behave identically $RULES->{$_}->[2] = lc($RULES->{$_}->[0]->{in} . "," . $RULES->{$_}->[0]->{on} . "," . $RULES->{$_}->[0]->{at} . "," . $RULES->{$_}->[0]->{save} . ";" . $RULES->{$_}->[1]->{in} . "," . $RULES->{$_}->[1]->{on} . "," . $RULES->{$_}->[1]->{at}); # [1]->{save} is always zero } } } ###################################################################### # Create a clone of the zone $oldID named $newID in the hash $ZONES. # Param: ref to hash of zones # Param: ID of zone to clone # Param: ID of new zone sub _CloneZone { my $ZONES = shift; my $oldID = shift; my $newID = shift; for my $field (keys %{$ZONES->{$oldID}}) { $ZONES->{$newID}->{$field} = $ZONES->{$oldID}->{$field}; } }