html2man.in   [plain text]


#! @PATH_PERL@ -w
#
# html2man: Converts the NTP HTML documentation to man page format
#
# This file require the Perl HTML::TokeParser module:
# http://search.cpan.org/search?module=HTML::TokeParser
#
# Depending on where this is run from, you might need to modify $MANDIR below.
#
# Hacked together by Peter Boettcher <boettcher@ll.mit.edu>
# Last modified: <Mon Jan 28 17:24:38 2002 by pwb>

require HTML::TokeParser;

# use strict;		# I can dream...

$MANDIR = "./man";

# HTML files to convert.  Also include per-file info here: 
#   name of man page, man section, 'see also' section
%manfiles = (
	     'ntpd' => ['ntpd', 8, 'ntp.conf(5), ntpq(8), ntpdc(8)'],
	     'ntpq' => ['ntpq', 8, 'ntpd(8), ntpdc(8)'],
	     'ntpdate' => ['ntpdate', 8, 'ntpd(8)'],
	     'ntpdc' => ['ntpdc', 8, 'ntpd(8)'],
	     'ntptime' => ['ntpdtime', 8, 'ntpd(8), ntpdate(8)'],
	     'ntptrace' => ['ntptrace', 8, 'ntpd(8)'],
	     'keygen' => ['ntp-keygen', 8, 'ntpd(8), ntp_auth(5)'],
	     'confopt' => ['ntp.conf', 5, 'ntpd(8)'],
	     'authopt' => ['ntp_auth', 5, 'ntp.conf(5), ntpd(8)'],
	     'monopt' => ['ntp_mon', 5, 'ntp.conf(5)'],
	     'accopt' => ['ntp_acc', 5, 'ntp.conf(5)'],
	     'clockopt' => ['ntp_clock', 5, 'ntp.conf(5)'],
	     'miscopt' => ['ntp_misc', 5, 'ntp.conf(5)']);

# Disclaimer to go in SEE ALSO section of the man page
$seealso_disclaimer = 'These man pages are automatically hacked from the main NTP ' .
    'documentation pages, which are maintained in HTML format.  These files are ' .
    'included in the NTP source distribution.  If you installed NTP from a binary ' .
    'package, or it came pre-installed on your system, chances are the documentation ' .
    'was also included in the usual place for your system.  The HTML files are more ' .
    'correct and complete than these man pages, which are provided for your reference ' .
    'only.';

# Disclaimer to go right at the top
$top_disclaimer = 'This file was automatically generated from HTML source, and may be ' .
    'incorrect.  See the SEE ALSO section at the end of this file for more info';

mkdir $MANDIR, 0777;
mkdir "$MANDIR/man8", 0777;
mkdir "$MANDIR/man5", 0777;

# Do the actual processing
foreach $file (keys %manfiles) {
    process($file);
}
# End of main function



# Do the real work
sub process {
    my($filename) = @_;
    $fileinfo = $manfiles{$filename};

    $p = HTML::TokeParser->new("$filename.html") || die "Can't open $filename.html: $!";
    open(MANOUT, ">$MANDIR/man$fileinfo->[1]/$fileinfo->[0].$fileinfo->[1]")
	|| die "Can't open: $!";

    $p->get_tag("title");
    $name = $p->get_text("/title");
    $p->get_tag("hr");		# Skip past image and quote, hopefully

    # Setup man header
    print MANOUT ".TH " . $fileinfo->[0] . " " . $fileinfo->[1] .  "\n";
    print MANOUT ".UC 4\n";
    print MANOUT ".SH NAME\n";
    $pat = $fileinfo->[0];
    if ($name =~ /$pat/) {
    } else {
	# Add the manpage name, if not in the HTML title already
	print MANOUT "$fileinfo->[0] - ";
    }
    print MANOUT "$name\n\n";

    print MANOUT "$top_disclaimer\n";

    # Now start scanning.  We basically print everything after translating some tags.
    # $token->[0] has "T", "S", "E" for Text, Start, End
    # $token->[1] has the tag name, or text (for "T" case)
    #  Theres lots more in the world of tokens, but who cares?
    while (my $token = $p->get_token) {
	if($token->[0] eq "T") {
	    my $text = $token->[1];
	    if($tag) {
		$text =~ s/^[\n ]*//;
		$text =~ s/[\n ]*$/ /;
	    }
	    $text =~ s/&nbsp\;/ /g;
	    $text =~ s/^\./\\./;
	    print MANOUT "$text";
	    $tag = 0;
	}
	if($token->[0] eq "S") {
	    if($token->[1] eq "h4") {
		my $text = uc($p->get_trimmed_text("/h4"));
		print MANOUT ".SH $text\n";
	    }
	    if($token->[1] eq "tt") {
		push @fontstack, "tt";
		print MANOUT "\\fB";
	    }
	    if($token->[1] eq "i") {
		push @fontstack, "i";
		print MANOUT "\\fI";
	    }
	    if($token->[1] eq "address") {
		my $text = $p->get_trimmed_text("/address");
		print MANOUT "\n.SH AUTHOR\n$text\n";
	    }
	    if($token->[1] eq "dt") {
		$tmp = $deflevel-4;
		print MANOUT "\n.RS $tmp\n";
		$tag = 1;
	    }
	    if($token->[1] eq "dd") {
		print MANOUT "\n.RS $deflevel\n";
		$tag = 1;
	    }
	    if($token->[1] eq "dl") {
		$deflevel+=4;
	    }
	}
	elsif($token->[0] eq "E") {
	    if($token->[1] eq "dd") {
		print MANOUT "\n.RE\n";
		$tag = 1;
	    }
	    if($token->[1] eq "tt") {
		$f = pop @fontstack;
		if($f ne "tt") {
		    warn "Oops, mismatched font!  Trying to continue\n";
		}
		if ($#fontstack < 0) { $fontswitch = "\\fR"; }
		elsif ($fontstack[$#fontstack] eq "tt") { $fontswitch = "\\fB"; }
		else { $fontswitch = "\\fI"; }
		print MANOUT "$fontswitch";
	    }
	    if($token->[1] eq "i") {
		$f = pop @fontstack;
		if($f ne "i") {
		    warn "Oops, mismatched font!  Trying to continue\n";
		}
		if ($#fontstack < 0) { $fontswitch = "\\fR"; }
		elsif ($fontstack[$#fontstack] eq "tt") { $fontswitch = "\\fB"; }
		else { $fontswitch = "\\fI"; }
		print MANOUT "$fontswitch";
	    }
	    if($token->[1] eq "dl") {
		$deflevel-=4;
	    }
	    if($token->[1] eq "dt") {
		print MANOUT "\n.RE";
		$tag = 1;
	    }
	}
    }
    print MANOUT ".SH SEE ALSO\n\n";
    print MANOUT "$fileinfo->[2]\n\n";
    print MANOUT "$seealso_disclaimer\n";
    close(MANOUT);
}