# $Id: 02parse.t,v 1.1.1.2 2007/10/10 23:04:15 ahuda Exp $ ## # this test checks the parsing capabilities of XML::LibXML # it relies on the success of t/01basic.t use Test; use IO::File; BEGIN { use XML::LibXML; if ( XML::LibXML::LIBXML_VERSION >= 20600 ) { plan tests => 478; } else { plan tests => 470; print "# skip NS cleaning tests\n"; } }; use XML::LibXML::Common qw(:libxml); use XML::LibXML::SAX; use XML::LibXML::SAX::Builder; use constant XML_DECL => "\n"; ## # test values my @goodWFStrings = ( '', '', XML_DECL . "", ''."\n", ''."\n", XML_DECL. " \n", XML_DECL. ' ', XML_DECL. ' ', XML_DECL. '&"\']]>', XML_DECL. '<>&"'', XML_DECL. '  ', XML_DECL. 'foo', XML_DECL. 'foo', XML_DECL. 'foo', XML_DECL. '', XML_DECL. '', #XML_DECL. '', #'' ); my @goodWFNSStrings = ( XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", XML_DECL. ''."\n", ); my @goodWFDTDStrings = ( XML_DECL. ''."\n".']>'."\n".'&foo;', XML_DECL. ']>&foo;', XML_DECL. ']>&foo;>', XML_DECL. ']>&foo;>', XML_DECL. ']>&foo;>', XML_DECL. ']>', XML_DECL. ']>', ); my @badWFStrings = ( "", # totally empty document XML_DECL, # only XML Declaration "", # comment only is like an empty document ']>', # no good either ... "", # single tag (tag mismatch) "foo", # trailing junk "foo", # leading junk "", # bad attribute '&", # bad char "�x20;", # bad char "", # bad encoding "&foo;", # undefind entity ">", # unterminated entity XML_DECL. ']>', # bad placed entity XML_DECL. ']>', # even worse "", # bad comment '', # bad either... (is this conform with the spec????) ); my %goodPushWF = ( single1 => [''], single2 => ['',''], single3 => [ XML_DECL, "", "" ], single4 => [""], single5 => ["<", "foo","bar", "/>"], single6 => ['',"\n"], single7 => ['',"\n"], single8 => [''], single9 => ['',"\n"], multiple1 => [ '','',' ', ], multiple2 => [ '<','/foobar> ', ], multiple3 => [ '','&"\']]>',''], multiple4 => [ '','&', ']]>', '' ], multiple5 => [ '','&', ']]>', '' ], multiple6 => ['','<>&"'',''], multiple6 => ['','<',';&','gt;&a','mp;','"&ap','os;',''], multiple7 => [ '', '  ','' ], multiple8 => [ '', '&#x','20;','60;','' ], multiple9 => [ '','moo','moo',' ', ], multiple10 => [ '','moo',' ', ], comment1 => [ '','' ], comment2 => [ '','' ], comment3 => [ '','' ], comment4 => [ '','' ], comment5 => [ 'fo','o', wellformed7 => '', wellformed8 => '', wellformed9 => 'D', wellformed10 => '', wellformed11 => '', wellbalance1 => '', wellbalance2 => '', wellbalance3 => '', wellbalance4 => 'DI', wellbalance5 => '', wellbalance6 => '', wellbalance7 => '', wellbalance8 => 'DD', wellbalance9 => 'D', wellbalance10=> 'DD', wellbalance11=> 'D', wellbalance12=> 'D', wellbalance13=> 'D', wellbalance14=> '', wellbalance15=> '', wellbalance16=> 'D', ); my @badWBStrings = ( "", "", "bar", "bar", "&foo;", # undefined entity "&", # bad char "häh?", # bad encoding "", # bad stays bad ;) "", # bad stays bad ;) ); my $pparser = XML::LibXML->new; print "# 5.1 DOM CHUNK PARSER\n"; for ( 1..$MAX_WF_C ) { my $frag = $pparser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { print "# well formness test $_\n"; if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) { $_--; # because we cannot distinguish between and } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $pparser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } eval { my $fail = $pparser->parse_xml_chunk(undef); }; ok($@); eval { my $fail = $pparser->parse_xml_chunk(""); }; ok($@); foreach my $str ( @badWBStrings ) { eval { my $fail = $pparser->parse_xml_chunk($str); }; ok($@); } { print "# 5.1.1 Segmenation fault tests\n"; my $sDoc = ''; my $sChunk = ''; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->appendChild( $chk ); ok( $doc->toString(), '' ); } { print "# 5.1.2 Segmenation fault tests\n"; my $sDoc = ''; my $sChunk = ''; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->insertAfter( $chk, $fc ); ok( $doc->toString(), '' ); } { print "# 5.1.3 Segmenation fault tests\n"; my $sDoc = ''; my $sChunk = ''; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->insertBefore( $chk, $fc ); ok( $doc->toString(), '' ); } ok(1); print "# 5.2 SAX CHUNK PARSER\n"; my $handler = XML::LibXML::SAX::Builder->new(); my $parser = XML::LibXML->new; $parser->set_handler( $handler ); for ( 1..$MAX_WF_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { if ( $chunks{'wellformed'.$_} =~ /\\<\/A\>/ ) { $_--; } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } } { print "# 6 VALIDATING PARSER\n"; my %badstrings = ( SIMPLE => ''."\n\n", ); my $parser = XML::LibXML->new; $parser->validation(1); my $doc; eval { $doc = $parser->parse_string($badstrings{SIMPLE}); }; ok( $@ ); my $ql; } { print "# 7 LINE NUMBERS\n"; my $goodxml = < EOXML my $badxml = < ]> EOXML my $parser = XML::LibXML->new; $parser->validation(1); eval { $parser->parse_string( $badxml ); }; # correct line number may or may not be present # depending on libxml2 version ok( $@ =~ /^:[03]:/ ); $parser->line_numbers(1); eval { $parser->parse_string( $badxml ); }; ok( $@ =~ /^:3:/ ); # switch off validation for the following tests $parser->validation(0); my $doc; eval { $doc = $parser->parse_string( $goodxml ); }; my $root = $doc->documentElement(); ok( $root->line_number(), 2); my @kids = $root->childNodes(); ok( $kids[1]->line_number(),3 ); my $newkid = $root->appendChild( $doc->createElement( "bar" ) ); ok( $newkid->line_number(), 0 ); $parser->line_numbers(0); eval { $doc = $parser->parse_string( $goodxml ); }; $root = $doc->documentElement(); ok( $root->line_number(), 0); @kids = $root->childNodes(); ok( $kids[1]->line_number(), 0 ); } print "# " . XML::LibXML::LIBXML_VERSION . "\n"; if ( XML::LibXML::LIBXML_VERSION >= 20600 ) { print "# 8 Clean Namespaces\n"; my ( $xsDoc1, $xsDoc2 ); $xsDoc1 = q{}; $xsDoc2 = q{}; my $parser = XML::LibXML->new(); $parser->clean_namespaces(1); my $fn1 = "example/xmlns/goodguy.xml"; my $fn2 = "example/xmlns/badguy.xml"; ok( $parser->parse_string( $xsDoc1 )->documentElement->toString(), q{} ); ok( $parser->parse_string( $xsDoc2 )->documentElement->toString(), $xsDoc2 ); ok( $parser->parse_file( $fn1 )->documentElement->toString(), q{} ); ok( $parser->parse_file( $fn2 )->documentElement->toString() , $xsDoc2 ); my $fh1 = IO::File->new($fn1); my $fh2 = IO::File->new($fn2); ok( $parser->parse_fh( $fh1 )->documentElement->toString(), q{} ); ok( $parser->parse_fh( $fh2 )->documentElement->toString() , $xsDoc2 ); my @xaDoc1 = ('','' ,''); my @xaDoc2 = ('','' , ''); my $doc; foreach ( @xaDoc1 ) { $parser->parse_chunk( $_ ); } $doc = $parser->parse_chunk( "", 1 ); ok( $doc->documentElement->toString(), q{} ); foreach ( @xaDoc2 ) { $parser->parse_chunk( $_ ); } $doc = $parser->parse_chunk( "", 1 ); ok( $doc->documentElement->toString() , $xsDoc2 ); } ## # test if external subsets are loaded correctly { my $xmldoc = < &foo; EOXML my $parser = XML::LibXML->new(); $parser->load_ext_dtd(1); # first time it should work my $doc = $parser->parse_string( $xmldoc ); ok( $doc->documentElement()->string_value(), " test " ); # second time it must not fail. my $doc2 = $parser->parse_string( $xmldoc ); ok( $doc2->documentElement()->string_value(), " test " ); } ## # Test ticket #7668 xinclude breaks entity expansion # [CG] removed again, since #7668 claims the spec is incorrect ## # Test ticket #7913 { my $xmldoc = < &foo; EOXML my $parser = XML::LibXML->new(); $parser->load_ext_dtd(1); # first time it should work my $doc = $parser->parse_string( $xmldoc ); ok( $doc->documentElement()->string_value(), " test " ); # lets see if load_ext_dtd(0) works $parser->load_ext_dtd(0); my $doc2; eval { $doc2 = $parser->parse_string( $xmldoc ); }; ok($@); $parser->validation(1); $parser->load_ext_dtd(0); my $doc3; eval { $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); }; ok( $doc3 ); $parser->load_ext_dtd(1); eval { $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); }; ok( $@); } sub tsub { my $doc = shift; my $th = {}; $th->{d} = XML::LibXML::Document->createDocument; my $e1 = $th->{d}->createElementNS("x","X:foo"); $th->{d}->setDocumentElement( $e1 ); my $e2 = $th->{d}->createElementNS( "x","X:bar" ); $e1->appendChild( $e2 ); $e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) ); return $th->{d}; } sub tsub2 { my ($doc,$query)=($_[0],@{$_[1]}); # return [ $doc->findnodes($query) ]; return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ]; }