filterRFC3454.pl   [plain text]


#/usr/bin/perl
# Copyright (c) 2001-2003 International Business Machines
# Corporation and others. All Rights Reserved.

####################################################################################
# filterRFC3454.pl:
# This tool filters the RFC-3454 txt file for String prep tables
# Author: Ram Viswanadha
#        
####################################################################################

use File::Find;
use File::Basename;
use IO::File;
use Cwd;
use File::Copy;
use Getopt::Long;
use File::Path;
use File::Copy;

$warning = "###################\n# WARNING: This table is generated by filterRFC3454.pl tool. DO NOT EDIT \n#################\n\n";
#run the program
main();

#---------------------------------------------------------------------
# The main program

sub main(){
  GetOptions(
           "--sourcedir=s" => \$sourceDir,
           "--destdir=s" => \$destDir,
           "--filename=s" => \$fileName,
           );
  usage() unless defined $sourceDir;
  usage() unless defined $destDir;
  usage() unless defined $fileName;

  $infile = $sourceDir."/".$fileName;
  $inFH = IO::File->new($infile,"r")
            or die  "could not open the file for reading: $! \n";

  while(defined ($line=<$inFH>)){
      next unless $line=~ /Start\sTable/;
      if($line =~ /A.1/){
            createUnassignedTable($inFH,$destDir);
      }
      if($line =~ /B.1/){
            createCaseMapNoNorm($inFH,$destDir);
      }
      if($line =~ /B.2/){
            createCaseMap($inFH,$destDir);
      }
      if($line =~ /C.*/ ){
            createProhibitedTable($inFH,$destDir,$line);
      }
  }
  close($inFH);
}

#-----------------------------------------------------------------------
sub readPrint{
    local ($inFH, $outFH,$comment, $print) = @_;
    $count = 0;
    print $outFH $comment."\n";
    while(defined ($line = <$inFH>)){
        next if $line =~ /Hoffman\s\&\sBlanchet/;  # ignore heading
        next if $line =~ /RFC\s3454/; # ignore heading
        next if $line =~ /\f/;  # ignore form feed
        next if $line eq "\n";  # ignore blank lines
        # break if "End Table" is found
        if( $line =~ /End\sTable/){
            print $outFH "\n# Total code points $count\n\n";
            return;
        }
        if($print==1){
            print $line;
        }
        $line =~ s/-/../;
        $line =~ s/^\s+//;
        if($line =~ /\;/){
        }else{
            $line =~ s/$/;/;
        }
        if($line =~ /\.\./){
            ($code, $noise) = split /;/ , $line;
            ($startStr, $endStr ) = split /\.\./, $code;
            $start = atoi($startStr);
            $end   = atoi($endStr);
            #print $start."     ".$end."\n";
            while($start <= $end){
                $count++;
                $start++;
            }
        }else{
              $count++;
        }
        print $outFH $line;
    }
}
#-----------------------------------------------------------------------
sub atoi {
    my $t;
    foreach my $d (split(//, shift())) {
        $t = $t * 16 + $d;
    }
    return $t;
}
#-----------------------------------------------------------------------
sub createUnassignedTable{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_A_1.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table A.1 from RFC 3454\n";
    readPrint($inFH,$outFH, $comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMapNoNorm{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_B_1.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table B.1 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createCaseMap{
    ($inFH,$destDir) = @_;
    $outfile = $destDir."/"."rfc3454_B_2.txt";
    $outFH = IO::File->new($outfile,"w")
            or die  "could not open the file $outfile for writing: $! \n";
    $comment = $warning."# This file contains code points from Table B.2 from RFC 3454\n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub createProhibitedTable{
    ($inFH,$destDir,$line) = @_;
    $outfile = $destDir."/"."rfc3454_C_X.txt";
    if($line =~ /C.1.1/ && stat($outfile)){
        unlink($outfile)
            or die "could not delete the file $outfile : $! \n";

    }
    $line =~ s/Start//;
    $line =~ s/-//g;
    $comment = $warning."# code points from $line";
    $outFH = IO::File->new($outfile, "a")
            or die  "could not open the file $outfile for writing: $! \n";
    readPrint($inFH,$outFH,$comment);
    close($outFH);
}
#-----------------------------------------------------------------------
sub usage {
    print << "END";
Usage:
filterRFC3454.pl
Options:
        --sourcedir=<directory>
        --destdir=<directory>
        --filename=<name of RFC file>

e.g.: filterRFC3454.pl --sourcedir=. --destdir=./output --filename=rfc3454.txt

filterRFC3454.pl filters the RFC file and creates String prep table files.
The RFC text can be downloaded from ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt

END
  exit(0);
}