print "1..4\n";
{ package H;
sub new { bless {}, shift; }
sub header {
my $self = shift;
my $key = uc(shift);
my $old = $self->{$key};
if (@_) { $self->{$key} = shift; }
$old;
}
sub push_header {
my($self, $k, $v) = @_;
$k = uc($k);
if (exists $self->{$k}) {
$self->{$k} = [ $self->{$k} ] unless ref $self->{$k};
push(@{$self->{$k}}, $v);
} else {
$self->{$k} = $v;
}
}
sub as_string {
my $self = shift;
my $str = "";
for (sort keys %$self) {
if (ref($self->{$_})) {
my $v;
for $v (@{$self->{$_}}) {
$str .= "$_: $v\n";
}
} else {
$str .= "$_: $self->{$_}\n";
}
}
$str;
}
}
$HTML = <<'EOT';
Å være eller å ikke være
Dette er vanlig tekst. Denne teksten definerer også slutten på
<head> delen av dokumentet.
Dette er også vanlig tekst som ikke skal blir parset i det hele tatt.
EOT
$| = 1;
#$HTML::HeadParser::DEBUG = 1;
require HTML::HeadParser;
$p = HTML::HeadParser->new( H->new );
$bad = 0;
print "\n#### Parsing full text...\n";
if ($p->parse($HTML)) {
$bad++;
print "Need more data which should not happen\n";
} else {
print $p->as_string;
}
$p->header('Title') =~ /Å være eller å ikke være/ or $bad++;
$p->header('Expires') eq 'Soon' or $bad++;
$p->header('Content-Base') eq 'http://www.sn.no' or $bad++;
$p->header('Link') =~ // or $bad++;
# This header should not be present because the head ended
$p->header('Isindex') and $bad++;
print "not " if $bad;
print "ok 1\n";
# Try feeding one char at a time
print "\n\n#### Parsing once char at a time...\n";
$expected = $p->as_string;
$p = HTML::HeadParser->new(H->new);
while ($HTML =~ /(.)/sg) {
print $1;
$p->parse($1) or last;
}
print "«««« Enough!!\n";
$got = $p->as_string;
print "$got";
print "not " if $expected ne $got;
print "ok 2\n";
# Try reading it from a file
print "\n\n#### Parsing from file\n\n";
my $file = "hptest$$.html";
die "$file already exists" if -e $file;
open(FILE, ">$file") or die "Can't create $file: $!";
print FILE $HTML;
print FILE "
This is more content...
\n" x 2000;
print FILE "Buuuh!\n" x 200;
close FILE or die "Can't close $file: $!";
$p = HTML::HeadParser->new(H->new);
$p->parse_file($file);
unlink($file) or warn "Can't unlink $file: $!";
print $p->as_string;
print "not " if $p->header("Title") ne "Å være eller å ikke være";
print "ok 3\n";
# We got into an infinite loop on data without tags and no EOL.
# This was actually a HTML::Parser bug.
print "\n\n#### Try to reproduce bug with empty file\n\n";
open(FILE, ">$file") or die "Can't create $file: $!";
print FILE "Foo";
close(FILE);
$p = HTML::HeadParser->new(H->new);
$p->parse_file($file);
unlink($file) or warn "Can't unlink $file: $!";
print "not " if $p->as_string;
print "ok 4\n";