fix_print_html.lib   [plain text]


#
# fix_print_html.lib
#
#   Dan Scott  / <dan.scott (at) acm.org>
#   Ferg       / <gferg (at) sgi.com>
#
#   Used to prepare single-file HTML variant for PDF/Postscript creation
#   thru htmldoc.
#
# log:
#     16Oct2000 - initial entry <gferg (at) sgi.com>
#     03Apr2001 - fix for <preface>
#
#

sub fix_print_html {

   my($in,$out,$ttl) = @_;

   open(IN_FILE, "< $in") || do {
        print "fix_print_html: cannot open $in: $!\n";
        return 0;
   };

   my($buf,$ttl_buf) = '';
   my($indx) = -1;
   my($is_article) = 0;
   while(<IN_FILE>) {

         if( $indx == 1 ) {

             # ignore everything until we see the chapter or sect
             #
             if( $_ =~ /CLASS="CHAP/i || $_ =~ /CLASS="PREF/i ) {

                 $buf .= $_;
                 $indx++;

             } elsif( $_ =~ /CLASS="SECT/ || $_ =~ /CLASS="sect/ )  {

                 $buf .= $_;
                 $indx++;
                 $is_article = 1;

             } else {
                 next;
             }

         } elsif( $indx == 0 ) {

             # write out the title page file
             #
             if( $_ =~ /CLASS="TOC"/ ) {

                 $ttl_buf .= "></DIV>\n</BODY>\n</HTML>\n"; 
                 $ttl_buf =~ s/<\/H1\n/<\/H1\n><P><BR><BR\n/ms;
                 
                 open(TOC_FILE, "> $ttl") || do {
                      print "fix_print_html: cannot open $ttl: $!\n";
                      close(IN_FILE);
                      return 0;
                 };
                 print TOC_FILE $ttl_buf;
                 close(TOC_FILE);
                 $ttl_buf = '';
                 $indx++;

             } else {
                $ttl_buf .= $_;
             }

         } elsif( $indx < 0 ) {

             # up to this point, both buffers get the line
             #
             if( $_ =~ /CLASS="TITLEPAGE"/ ) {

                 $ttl_buf .= $_ . ">\n<P>\n<BR><BR><BR><BR>\n<\/P\n";
                 $indx++;

             } else {
                 $buf .= $_;
                 $ttl_buf .= $_;
             }

         } else {

             $buf .= $_;
         }
   }
   close(IN_FILE);

   open(OUT_FILE, "> $out") || do {
        print "fix_print_html: cannot open $out: $!\n";
        return 0;
   };


   # make these corrections and write out the file
   #

   $buf =~ s/(\n><LI\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
   $buf =~ s/(\n><LI\n><DIV\nCLASS="FORMALPARA"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
   $buf =~ s/(\n><LI\nSTYLE="[^\"]+"\n)><P\n(.*?)<\/P\n>/$1$2\n/gms;
   if( $is_article == 0 ) {
       $buf =~ s/(\nCLASS="SECT[TION\d]+"\n>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gims;
       $buf =~ s/(\nCLASS="SECT[TION\d]+"\n><HR>)<H1\n(.*?)<\/H1/$1<H2\n$2<\/H2/gims;
   }
   $buf =~ s/<H1(\nCLASS="INDEXDIV"\n)(.*?)<\/H1/<H2$1$2<\/H2/gims;
   if( ($indx = rindex($buf, "<H1\n><A\nNAME=\"DOC-INDEX\"")) > -1 ) {
       $buf = substr($buf, 0, $indx);
       $buf .= "\n<\/BODY>\n<\/HTML>\n\n";
   } elsif( ($indx = rindex($buf, "<H1\n><A\nNAME=\"doc-index\"")) > -1 ) {
       $buf = substr($buf, 0, $indx);
       $buf .= "\n<\/BODY>\n<\/HTML>\n\n";
   }
   $buf =~ s/\&\#13;//g;
   $buf =~ s/\&\#60;/\&lt;/g;
   $buf =~ s/\&\#62;/\&gt;/g;
   $buf =~ s/\&\#8211;/\-/g;
   $buf =~ s/WIDTH=\"\d\"//g;
   $buf =~ s/><[\/]*TBODY//g;
   $buf =~ s/><[\/]*THEAD//g;
   $buf =~ s/TYPE=\"1\"\n//gim;

   if( $is_article == 0 ) {

       # for books...decrement the headers by 1 and then re-set the
       # chapter level only to H1...
       #
       my($cnt,$j) = 0;
       for($cnt=5; $cnt > 0; $cnt--) {
           $j = $cnt + 1;
           $buf =~ s/<H${cnt}/<H${j}/g;
           $buf =~ s/<\/H${cnt}/<\/H${j}/g;
       }

       my(@l) = split(/\n/, $buf);
       for( $cnt=0; $cnt < (@l + 0); $cnt++ ) {

            if( $j == 1 ) {
                if( $l[$cnt] =~ /<DIV/ ) {
                    $j = 0;
                    next;
                }
                $l[$cnt] =~ s/<H2/<H1/g;
                $l[$cnt] =~ s/<\/H2/<\/H1/g;
            }
            if( $l[$cnt] =~ /^CLASS=\"CHAP/i
                ||
                $l[$cnt] =~ /^CLASS=\"PREF/i ) {
                $j = 1;
            }
       }

       $buf = join("\n", @l);

   }
   $buf =~ s/><DIV\nCLASS="\w+"\n//gms;
   $buf =~ s/><\/DIV\n//gms;
   $buf =~ s/(><LI\n)><P\n(.*?)<\/P\n>(<\/LI\n)/$1$2$3/gms;

   print OUT_FILE $buf;
   close(OUT_FILE);

   return 1;
}

# Return true from package include
#
1;