use Getopt::Std;
$usage = "Usage: $0 <opts> file
Where <opts> are:
-p Link to points in the document. The default is to link
to the closest containing section.
-g Group terms with IndexDiv based on the first letter
of the term (or its sortas attribute).
(This probably doesn't handle i10n particularly well)
-s name Name the IndexDiv that contains symbols. The default
is 'Symbols'. Meaningless if -g is not used.
-t name Title for the index.
-P file Read a preamble from file. The content of file will
be inserted before the <index> tag.
-i id The ID for the <index> tag.
-o file Output to file. Defaults to stdout.
-S scope Scope of the index, must be 'all', 'local', or 'global'.
If unspecified, 'all' is assumed.
-I scope The implied scope, must be 'all', 'local', or 'global'.
IndexTerms which do not specify a scope will have the
implied scope. If unspecified, 'all' is assumed.
-x Make a SetIndex.
-f Force the output file to be written, even if it appears
to have been edited by hand.
-N New index (generates an empty index file).
file The file containing index data generated by Jade
with the DocBook HTML Stylesheet.\n";
die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x');
$linkpoints = $opt_p;
$lettergroups = $opt_g;
$symbolsname = $opt_s || "Symbols";
$title = $opt_t;
$preamble = $opt_P;
$outfile = $opt_o || '-';
$indexid = $opt_i;
$scope = uc($opt_S) || 'ALL';
$impliedscope = uc($opt_I) || 'ALL';
$setindex = $opt_x;
$forceoutput = $opt_f;
$newindex = $opt_N;
$debug = $opt_D;
$indextag = $setindex ? 'setindex' : 'index';
if ($newindex) {
safe_open(*OUT, $outfile);
if ($indexid) {
print OUT "<$indextag id='$indexid'>\n\n";
} else {
print OUT "<$indextag>\n\n";
}
print OUT "<!-- This file was produced by collateindex.pl. -->\n";
print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
print OUT "</$indextag>\n";
exit 0;
}
$dat = shift @ARGV || die $usage;
die "$0: cannot find $dat.\n" if ! -f $dat;
%legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
if ($scope && !$legal_scopes{$scope}) {
die "Invalid scope.\n$usage\n";
}
if ($impliedscope && !$legal_scopes{$impliedscope}) {
die "Invalid implied scope.\n$usage\n";
}
@term = ();
%id = ();
$termcount = 0;
print STDERR "Processing $dat...\n";
open (F, $dat);
while (<F>) {
chop;
if (/^\/indexterm/i) {
push (@term, $idx);
next;
}
if (/^indexterm (.*)$/i) {
$termcount++;
$idx = {};
$idx->{'zone'} = {};
$idx->{'href'} = $1;
$idx->{'count'} = $termcount;
$idx->{'scope'} = $impliedscope;
next;
}
if (/^indexpoint (.*)$/i) {
$idx->{'hrefpoint'} = $1;
next;
}
if (/^title (.*)$/i) {
$idx->{'title'} = $1;
next;
}
if (/^primary[\[ ](.*)$/i) {
if (/^primary\[(.*?)\] (.*)$/i) {
$idx->{'psortas'} = $1;
$idx->{'primary'} = $2;
} else {
$idx->{'psortas'} = $1;
$idx->{'primary'} = $1;
}
next;
}
if (/^secondary[\[ ](.*)$/i) {
if (/^secondary\[(.*?)\] (.*)$/i) {
$idx->{'ssortas'} = $1;
$idx->{'secondary'} = $2;
} else {
$idx->{'ssortas'} = $1;
$idx->{'secondary'} = $1;
}
next;
}
if (/^tertiary[\[ ](.*)$/i) {
if (/^tertiary\[(.*?)\] (.*)$/i) {
$idx->{'tsortas'} = $1;
$idx->{'tertiary'} = $2;
} else {
$idx->{'tsortas'} = $1;
$idx->{'tertiary'} = $1;
}
next;
}
if (/^see (.*)$/i) {
$idx->{'see'} = $1;
next;
}
if (/^seealso (.*)$/i) {
$idx->{'seealso'} = $1;
next;
}
if (/^significance (.*)$/i) {
$idx->{'significance'} = $1;
next;
}
if (/^class (.*)$/i) {
$idx->{'class'} = $1;
next;
}
if (/^scope (.*)$/i) {
$idx->{'scope'} = uc($1);
next;
}
if (/^startref (.*)$/i) {
$idx->{'startref'} = $1;
next;
}
if (/^id (.*)$/i) {
$idx->{'id'} = $1;
$id{$1} = $idx;
next;
}
if (/^zone (.*)$/i) {
my($href) = $1;
$_ = scalar(<F>);
chop;
die "Bad zone: $_\n" if !/^title (.*)$/i;
$idx->{'zone'}->{$href} = $1;
next;
}
die "Unrecognized: $_\n";
}
close (F);
print STDERR "$termcount entries loaded...\n";
foreach $idx (@term) {
my($ididx, $field);
if ($idx->{'startref'}) {
$ididx = $id{$idx->{'startref'}};
foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
'psortas', 'ssortas', 'tsortas', 'significance',
'class', 'scope') {
$idx->{$field} = $ididx->{$field};
}
}
}
@term = sort termsort @term;
@term = sortsymbols(@term);
safe_open(*OUT, $outfile);
if ($indexid) {
print OUT "<$indextag id='$indexid'>\n\n";
} else {
print OUT "<$indextag>\n\n";
}
print OUT "<!-- This file was produced by collateindex.pl. -->\n";
print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
print OUT "<!-- ULINK is abused here.
The URL attribute holds the URL that points from the index entry
back to the appropriate place in the output produced by the HTML
stylesheet. (It's much easier to calculate this URL in the first
pass.)
The Role attribute holds the ID (either real or manufactured) of
the corresponding INDEXTERM. This is used by the print backends
to produce page numbers.
The entries below are sorted and collated into the correct order.
Duplicates may be removed in the HTML backend, but in the print
backends, it is impossible to suppress duplicate pages or coalesce
sequences of pages into a range.
-->\n\n";
print OUT "<title>$title</title>\n\n" if $title;
$last = {}; $first = 1; $group = ""; $lastout = "";
foreach $idx (@term) {
next if $idx->{'startref'}; next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
next if &same($idx, $last);
$termcount--;
if (!&tsame($last, $idx, 'primary')) {
print "DIFF PRIM\n" if $debug;
&end_entry() if not $first;
if ($lettergroups) {
$letter = $idx->{'psortas'};
$letter = $idx->{'primary'} if !$letter;
$letter = uc(substr($letter, 0, 1));
if (($letter lt 'A') || ($letter gt 'Z')) {
if (($group eq '')
|| (($group ge 'A') && ($group le 'Z'))) {
print OUT "</indexdiv>\n" if !$first;
print OUT "<indexdiv><title>$symbolsname</title>\n\n";
$group = $letter;
}
} elsif (($group eq '') || ($group ne $letter)) {
print OUT "</indexdiv>\n" if !$first;
print OUT "<indexdiv><title>$letter</title>\n\n";
$group = $letter;
}
}
$first = 0;
print OUT "<indexentry>\n";
print OUT " <primaryie>", $idx->{'primary'};
$lastout = "primaryie";
if ($idx->{'secondary'}) {
print OUT "\n </primaryie>\n";
print OUT " <secondaryie>", $idx->{'secondary'};
$lastout = "secondaryie";
};
if ($idx->{'tertiary'}) {
print OUT "\n </secondaryie>\n";
print OUT " <tertiaryie>", $idx->{'tertiary'};
$lastout = "tertiaryie";
}
} elsif (!&tsame($last, $idx, 'secondary')) {
print "DIFF SEC\n" if $debug;
print OUT "\n </$lastout>\n" if $lastout;
print OUT " <secondaryie>", $idx->{'secondary'};
$lastout = "secondaryie";
if ($idx->{'tertiary'}) {
print OUT "\n </secondaryie>\n";
print OUT " <tertiaryie>", $idx->{'tertiary'};
$lastout = "tertiaryie";
}
} elsif (!&tsame($last, $idx, 'tertiary')) {
print "DIFF TERT\n" if $debug;
print OUT "\n </$lastout>\n" if $lastout;
if ($idx->{'tertiary'}) {
print OUT " <tertiaryie>", $idx->{'tertiary'};
$lastout = "tertiaryie";
}
}
&print_term($idx);
$last = $idx;
}
print STDERR "$termcount entries ignored...\n";
&end_entry();
print OUT "</indexdiv>\n" if $lettergroups;
print OUT "</$indextag>\n";
close (OUT);
print STDERR "Done.\n";
sub same {
my($a) = shift;
my($b) = shift;
my($aP) = $a->{'psortas'} || $a->{'primary'};
my($aS) = $a->{'ssortas'} || $a->{'secondary'};
my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
my($bP) = $b->{'psortas'} || $b->{'primary'};
my($bS) = $b->{'ssortas'} || $b->{'secondary'};
my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
my($same);
$aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
$aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
$aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
$bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
$bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
$bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
$same = (($aP eq $bP)
&& ($aS eq $bS)
&& ($aT eq $bT)
&& ($a->{'title'} eq $b->{'title'})
&& ($a->{'href'} eq $b->{'href'}));
$same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
if $linkpoints;
$same;
}
sub tsame {
my($a) = shift;
my($b) = shift;
my($term) = shift;
my($sterm) = substr($term, 0, 1) . "sortas";
my($A, $B);
$A = $a->{$sterm} || $a->{$term};
$B = $b->{$sterm} || $b->{$term};
$A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
$B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
return $A eq $B;
}
sub end_entry {
print OUT "\n </$lastout>\n" if $lastout;
print OUT "</indexentry>\n\n";
$lastout = "";
}
sub print_term {
my($idx) = shift;
my($key, $indent, @hrefs);
my(%href) = ();
my(%phref) = ();
$indent = " ";
if ($idx->{'see'}) {
if ($lastout) {
print OUT "\n </$lastout>\n";
$lastout = "";
}
print OUT $indent, "<seeie>", $idx->{'see'}, "</seeie>\n";
return;
}
if ($idx->{'seealso'}) {
if ($lastout) {
print OUT "\n </$lastout>\n";
$lastout = "";
}
print OUT $indent, "<seealsoie>", $idx->{'seealso'}, "</seealsoie>\n";
return;
}
if (keys %{$idx->{'zone'}}) {
foreach $key (keys %{$idx->{'zone'}}) {
$href{$key} = $idx->{'zone'}->{$key};
$phref{$key} = $idx->{'zone'}->{$key};
}
} else {
$href{$idx->{'href'}} = $idx->{'title'};
$phref{$idx->{'href'}} = $idx->{'hrefpoint'};
}
print OUT ",\n";
@hrefs = keys %href;
while (@hrefs) {
my($linkend) = "";
my($role) = "";
$key = shift @hrefs;
if ($linkpoints) {
$linkend = $phref{$key};
} else {
$linkend = $key;
}
$role = $linkend;
$role = $1 if $role =~ /\
print OUT $indent;
print OUT "<ulink url=\"$linkend\" role=\"$role\">";
print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
print OUT $href{$key};
print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
print OUT "</ulink>";
}
}
sub termsort {
my($aP) = $a->{'psortas'} || $a->{'primary'};
my($aS) = $a->{'ssortas'} || $a->{'secondary'};
my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
my($ap) = $a->{'count'};
my($bP) = $b->{'psortas'} || $b->{'primary'};
my($bS) = $b->{'ssortas'} || $b->{'secondary'};
my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
my($bp) = $b->{'count'};
$aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
$aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
$aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
$bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
$bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
$bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
if ($aP eq $bP) {
if ($aS eq $bS) {
if ($aT eq $bT) {
return 1 if ($a->{'seealso'});
return -1 if ($b->{'seealso'});
return $ap <=> $bp;
} else {
return $aT cmp $bT;
}
} else {
return $aS cmp $bS;
}
} else {
return $aP cmp $bP;
}
}
sub sortsymbols {
my(@term) = @_;
my(@new) = ();
my(@sym) = ();
my($letter);
my($idx);
foreach $idx (@term) {
$letter = $idx->{'psortas'};
$letter = $idx->{'primary'} if !$letter;
$letter = uc(substr($letter, 0, 1));
if (($letter lt 'A') || ($letter gt 'Z')) {
push (@sym, $idx);
} else {
push (@new, $idx);
}
}
return (@sym, @new);
}
sub safe_open {
local(*OUT) = shift;
local(*F, $_);
if (($outfile ne '-') && (!$forceoutput)) {
my($handedit) = 1;
if (open (OUT, $outfile)) {
while (<OUT>) {
if (/<!-- Remove this comment if you edit this file by hand! -->/){
$handedit = 0;
last;
}
}
close (OUT);
} else {
$handedit = 0;
}
if ($handedit) {
print "\n$outfile appears to have been edited by hand; use -f or\n";
print " change the output file.\n";
exit 1;
}
}
open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n";
if ($preamble) {
if (open(F, $preamble)) {
while (<F>) {
print OUT $_;
}
close(F);
} else {
warn "$0: cannot open preamble $preamble.\n";
}
}
}