use strict;
require HTML::Parser;
my $decl = '';
my $com1 = '';
my $com2 = '';
my $start = '';
my $end = '';
my $empty = "";
my $proc = ' something completely different ?>';
my @argspec = qw( self offset length
event tagname tag token0
text
is_cdata dtext
tokens
tokenpos
attr
attrseq );
my @result = ();
my $p = HTML::Parser -> new(default_h => [\@result, join(',', @argspec)],
strict_comment => 1, xml_mode => 1);
my @tests =
( # string, expected results
$decl => [[$p, 0, 52, 'declaration', 'ENTITY', '!ENTITY', 'ENTITY',
'',
undef, undef,
['ENTITY', 'nbsp', 'CDATA', '" "', '-- no-break space --'],
[2, 6, 9, 4, 16, 5, 22, 8, 31, 20],
undef, undef ]],
$com1 => [[$p, 0, 16, 'comment', ' Comment ', '# Comment ', ' Comment ',
'',
undef, undef,
[' Comment '],
[4, 9],
undef, undef ]],
$com2 => [[$p, 0, 30, 'comment', ' Comment ', '# Comment ', ' Comment ',
'',
undef, undef,
[' Comment ', ' Comment '],
[4, 9, 18, 9],
undef, undef ]],
$start => [[$p, 0, 14, 'start', 'a', 'a', 'a',
'',
undef, undef,
['a', 'href', '"foo"'],
[1, 1, 3, 4, 8, 5],
{'href', 'foo'}, ['href'] ]],
$end => [[$p, 0, 4, 'end', 'a', '/a', 'a',
'',
undef, undef,
['a'],
[2, 1],
undef, undef ]],
$empty => [[$p, 0, 16, 'start', 'IMG', 'IMG', 'IMG',
"",
undef, undef,
['IMG', 'SRC', "'foo'"],
[1, 3, 5, 3, 9, 5],
{'SRC', 'foo'}, ['SRC'] ],
[$p, 16, 0, 'end', 'IMG', '/IMG', 'IMG',
'',
undef, undef,
['IMG'],
undef,
undef, undef ],
],
$proc => [[$p, 0, 36, 'process', ' something completely different ',
'? something completely different ',
' something completely different ',
' something completely different ?>',
undef, undef,
[' something completely different '],
[2, 32],
undef, undef ]],
"$end\n$end" => [[$p, 0, 4, 'end', 'a', '/a', 'a',
'',
undef, undef,
['a'],
[2, 1],
undef, undef],
[$p, 4, 1, 'text', undef, undef, undef,
"\n",
'', "\n",
undef,
undef,
undef, undef],
[$p, 5, 4, 'end', 'a', '/a', 'a',
'',
undef, undef,
['a'],
[2, 1],
undef, undef ]],
);
my $n = @tests / 2;
print "1..$n\n";
sub string_tag {
my (@pieces) = @_;
my $part;
foreach $part ( @pieces ) {
if (!defined $part) {
$part = 'undef';
}
elsif (!ref $part) {
$part = "'$part'" if $part !~ /^\d+$/;
}
elsif ('ARRAY' eq ref $part ) {
$part = '[' . join(', ', string_tag(@$part)) . ']';
}
elsif ('HASH' eq ref $part ) {
$part = '{' . join(',', string_tag(%$part)) . '}';
}
else {
$part = '<' . ref($part) . '>';
}
}
return join(", ", @pieces );
}
my $i = 0;
my ($got, $want);
while (@tests) {
my($html, $expected) = splice @tests, 0, 2;
++$i;
print "-" x 50, " $i\n";
print "$html\n";
print "-" x 50, " $i\n";
@result = ();
$p->parse($html)->eof;
shift(@result) if $result[0][3] eq "start_document";
pop(@result) if $result[-1][3] eq "end_document";
# Compare results for each element expected
foreach (@$expected) {
$want = string_tag($_);
$got = string_tag(shift @result);
print " $got\n";
if ($want ne $got) {
print "Expected: $want\n";
print( "not " );
last;
}
}
print "ok $i\n";
}