# $Id: 02parse.t,v 1.1.1.2 2007/10/10 23:04:15 ahuda Exp $
##
# this test checks the parsing capabilities of XML::LibXML
# it relies on the success of t/01basic.t
use Test;
use IO::File;
BEGIN { use XML::LibXML;
if ( XML::LibXML::LIBXML_VERSION >= 20600 ) {
plan tests => 478;
}
else {
plan tests => 470;
print "# skip NS cleaning tests\n";
}
};
use XML::LibXML::Common qw(:libxml);
use XML::LibXML::SAX;
use XML::LibXML::SAX::Builder;
use constant XML_DECL => "\n";
##
# test values
my @goodWFStrings = (
'',
'',
XML_DECL . "",
''."\n",
''."\n",
XML_DECL. " \n",
XML_DECL. ' ',
XML_DECL. ' ',
XML_DECL. '&"\']]>',
XML_DECL. '<>&"'',
XML_DECL. ' ',
XML_DECL. 'foo',
XML_DECL. 'foo',
XML_DECL. 'foo',
XML_DECL. '',
XML_DECL. '',
#XML_DECL. '',
#''
);
my @goodWFNSStrings = (
XML_DECL. ''."\n",
XML_DECL. ''."\n",
XML_DECL. ''."\n",
XML_DECL. ''."\n",
XML_DECL. ''."\n",
);
my @goodWFDTDStrings = (
XML_DECL. ''."\n".']>'."\n".'&foo;',
XML_DECL. ']>&foo;',
XML_DECL. ']>&foo;>',
XML_DECL. ']>&foo;>',
XML_DECL. ']>&foo;>',
XML_DECL. ']>',
XML_DECL. ']>',
);
my @badWFStrings = (
"", # totally empty document
XML_DECL, # only XML Declaration
"", # comment only is like an empty document
']>', # no good either ...
"", # single tag (tag mismatch)
"foo", # trailing junk
"foo", # leading junk
"", # bad attribute
'&", # bad char
"x20;", # bad char
"", # bad encoding
"&foo;", # undefind entity
">", # unterminated entity
XML_DECL. ']>', # bad placed entity
XML_DECL. ']>', # even worse
"", # bad comment
'', # bad either... (is this conform with the spec????)
);
my %goodPushWF = (
single1 => [''],
single2 => ['',''],
single3 => [ XML_DECL, "", "" ],
single4 => [""],
single5 => ["<", "foo","bar", "/>"],
single6 => ['',"\n"],
single7 => ['',"\n"],
single8 => [''],
single9 => ['',"\n"],
multiple1 => [ '','',' ', ],
multiple2 => [ '<','/foobar> ', ],
multiple3 => [ '','&"\']]>',''],
multiple4 => [ '','&', ']]>', '' ],
multiple5 => [ '','&', ']]>', '' ],
multiple6 => ['','<>&"'',''],
multiple6 => ['','<',';&','gt;&a','mp;','"&ap','os;',''],
multiple7 => [ '', ' ','' ],
multiple8 => [ '', '','20;','60;','' ],
multiple9 => [ '','moo','moo',' ', ],
multiple10 => [ '','moo',' ', ],
comment1 => [ '','' ],
comment2 => [ '','' ],
comment3 => [ '','' ],
comment4 => [ '','' ],
comment5 => [ 'fo','o',
wellformed7 => '',
wellformed8 => '',
wellformed9 => 'D',
wellformed10 => '',
wellformed11 => '',
wellbalance1 => '',
wellbalance2 => '',
wellbalance3 => '',
wellbalance4 => 'DI',
wellbalance5 => '',
wellbalance6 => '',
wellbalance7 => '',
wellbalance8 => 'DD',
wellbalance9 => 'D',
wellbalance10=> 'DD',
wellbalance11=> 'D',
wellbalance12=> 'D',
wellbalance13=> 'D',
wellbalance14=> '',
wellbalance15=> '',
wellbalance16=> 'D',
);
my @badWBStrings = (
"",
"",
"bar",
"bar",
"&foo;", # undefined entity
"&", # bad char
"häh?", # bad encoding
"", # bad stays bad ;)
"", # bad stays bad ;)
);
my $pparser = XML::LibXML->new;
print "# 5.1 DOM CHUNK PARSER\n";
for ( 1..$MAX_WF_C ) {
my $frag = $pparser->parse_xml_chunk($chunks{'wellformed'.$_});
ok($frag);
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
print "# well formness test $_\n";
if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) {
$_--; # because we cannot distinguish between and
}
ok($frag->toString,$chunks{'wellformed'.$_});
next;
}
}
ok(0);
}
for ( 1..$MAX_WB_C ) {
my $frag = $pparser->parse_xml_chunk($chunks{'wellbalance'.$_});
ok($frag);
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) {
$_--;
}
ok($frag->toString,$chunks{'wellbalance'.$_});
next;
}
ok(0);
}
eval { my $fail = $pparser->parse_xml_chunk(undef); };
ok($@);
eval { my $fail = $pparser->parse_xml_chunk(""); };
ok($@);
foreach my $str ( @badWBStrings ) {
eval { my $fail = $pparser->parse_xml_chunk($str); };
ok($@);
}
{
print "# 5.1.1 Segmenation fault tests\n";
my $sDoc = '';
my $sChunk = '';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->appendChild( $chk );
ok( $doc->toString(), '' );
}
{
print "# 5.1.2 Segmenation fault tests\n";
my $sDoc = '';
my $sChunk = '';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->insertAfter( $chk, $fc );
ok( $doc->toString(), '' );
}
{
print "# 5.1.3 Segmenation fault tests\n";
my $sDoc = '';
my $sChunk = '';
my $parser = XML::LibXML->new();
my $doc = $parser->parse_xml_chunk( $sDoc, undef );
my $chk = $parser->parse_xml_chunk( $sChunk,undef );
my $fc = $doc->firstChild;
$doc->insertBefore( $chk, $fc );
ok( $doc->toString(), '' );
}
ok(1);
print "# 5.2 SAX CHUNK PARSER\n";
my $handler = XML::LibXML::SAX::Builder->new();
my $parser = XML::LibXML->new;
$parser->set_handler( $handler );
for ( 1..$MAX_WF_C ) {
my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_});
ok($frag);
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) {
if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) {
$_--;
}
ok($frag->toString,$chunks{'wellformed'.$_});
next;
}
}
ok(0);
}
for ( 1..$MAX_WB_C ) {
my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_});
ok($frag);
if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE
&& $frag->hasChildNodes ) {
if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) {
$_--;
}
ok($frag->toString,$chunks{'wellbalance'.$_});
next;
}
ok(0);
}
}
{
print "# 6 VALIDATING PARSER\n";
my %badstrings = (
SIMPLE => ''."\n\n",
);
my $parser = XML::LibXML->new;
$parser->validation(1);
my $doc;
eval { $doc = $parser->parse_string($badstrings{SIMPLE}); };
ok( $@ );
my $ql;
}
{
print "# 7 LINE NUMBERS\n";
my $goodxml = <
EOXML
my $badxml = <
]>
EOXML
my $parser = XML::LibXML->new;
$parser->validation(1);
eval { $parser->parse_string( $badxml ); };
# correct line number may or may not be present
# depending on libxml2 version
ok( $@ =~ /^:[03]:/ );
$parser->line_numbers(1);
eval { $parser->parse_string( $badxml ); };
ok( $@ =~ /^:3:/ );
# switch off validation for the following tests
$parser->validation(0);
my $doc;
eval { $doc = $parser->parse_string( $goodxml ); };
my $root = $doc->documentElement();
ok( $root->line_number(), 2);
my @kids = $root->childNodes();
ok( $kids[1]->line_number(),3 );
my $newkid = $root->appendChild( $doc->createElement( "bar" ) );
ok( $newkid->line_number(), 0 );
$parser->line_numbers(0);
eval { $doc = $parser->parse_string( $goodxml ); };
$root = $doc->documentElement();
ok( $root->line_number(), 0);
@kids = $root->childNodes();
ok( $kids[1]->line_number(), 0 );
}
print "# " . XML::LibXML::LIBXML_VERSION . "\n";
if ( XML::LibXML::LIBXML_VERSION >= 20600 )
{
print "# 8 Clean Namespaces\n";
my ( $xsDoc1, $xsDoc2 );
$xsDoc1 = q{};
$xsDoc2 = q{};
my $parser = XML::LibXML->new();
$parser->clean_namespaces(1);
my $fn1 = "example/xmlns/goodguy.xml";
my $fn2 = "example/xmlns/badguy.xml";
ok( $parser->parse_string( $xsDoc1 )->documentElement->toString(),
q{} );
ok( $parser->parse_string( $xsDoc2 )->documentElement->toString(),
$xsDoc2 );
ok( $parser->parse_file( $fn1 )->documentElement->toString(),
q{} );
ok( $parser->parse_file( $fn2 )->documentElement->toString() ,
$xsDoc2 );
my $fh1 = IO::File->new($fn1);
my $fh2 = IO::File->new($fn2);
ok( $parser->parse_fh( $fh1 )->documentElement->toString(),
q{} );
ok( $parser->parse_fh( $fh2 )->documentElement->toString() ,
$xsDoc2 );
my @xaDoc1 = ('','' ,'');
my @xaDoc2 = ('','' , '');
my $doc;
foreach ( @xaDoc1 ) {
$parser->parse_chunk( $_ );
}
$doc = $parser->parse_chunk( "", 1 );
ok( $doc->documentElement->toString(),
q{} );
foreach ( @xaDoc2 ) {
$parser->parse_chunk( $_ );
}
$doc = $parser->parse_chunk( "", 1 );
ok( $doc->documentElement->toString() ,
$xsDoc2 );
}
##
# test if external subsets are loaded correctly
{
my $xmldoc = <
&foo;
EOXML
my $parser = XML::LibXML->new();
$parser->load_ext_dtd(1);
# first time it should work
my $doc = $parser->parse_string( $xmldoc );
ok( $doc->documentElement()->string_value(), " test " );
# second time it must not fail.
my $doc2 = $parser->parse_string( $xmldoc );
ok( $doc2->documentElement()->string_value(), " test " );
}
##
# Test ticket #7668 xinclude breaks entity expansion
# [CG] removed again, since #7668 claims the spec is incorrect
##
# Test ticket #7913
{
my $xmldoc = <
&foo;
EOXML
my $parser = XML::LibXML->new();
$parser->load_ext_dtd(1);
# first time it should work
my $doc = $parser->parse_string( $xmldoc );
ok( $doc->documentElement()->string_value(), " test " );
# lets see if load_ext_dtd(0) works
$parser->load_ext_dtd(0);
my $doc2;
eval {
$doc2 = $parser->parse_string( $xmldoc );
};
ok($@);
$parser->validation(1);
$parser->load_ext_dtd(0);
my $doc3;
eval {
$doc3 = $parser->parse_file( "example/article_external_bad.xml" );
};
ok( $doc3 );
$parser->load_ext_dtd(1);
eval {
$doc3 = $parser->parse_file( "example/article_external_bad.xml" );
};
ok( $@);
}
sub tsub {
my $doc = shift;
my $th = {};
$th->{d} = XML::LibXML::Document->createDocument;
my $e1 = $th->{d}->createElementNS("x","X:foo");
$th->{d}->setDocumentElement( $e1 );
my $e2 = $th->{d}->createElementNS( "x","X:bar" );
$e1->appendChild( $e2 );
$e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) );
return $th->{d};
}
sub tsub2 {
my ($doc,$query)=($_[0],@{$_[1]});
# return [ $doc->findnodes($query) ];
return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ];
}