# $Id: 02parse.t,v 1.1.1.2 2007/10/10 23:04:15 ahuda Exp $ ## # this test checks the parsing capabilities of XML::LibXML # it relies on the success of t/01basic.t use Test; use IO::File; BEGIN { use XML::LibXML; if ( XML::LibXML::LIBXML_VERSION >= 20600 ) { plan tests => 478; } else { plan tests => 470; print "# skip NS cleaning tests\n"; } }; use XML::LibXML::Common qw(:libxml); use XML::LibXML::SAX; use XML::LibXML::SAX::Builder; use constant XML_DECL => "<?xml version=\"1.0\"?>\n"; ## # test values my @goodWFStrings = ( '<foobar/>', '<foobar></foobar>', XML_DECL . "<foobar></foobar>", '<?xml version="1.0" encoding="UTF-8"?>'."\n<foobar></foobar>", '<?xml version="1.0" encoding="ISO-8859-1"?>'."\n<foobar></foobar>", XML_DECL. "<foobar> </foobar>\n", XML_DECL. '<foobar><foo/></foobar> ', XML_DECL. '<foobar> <foo/> </foobar> ', XML_DECL. '<foobar><![CDATA[<>&"\']]></foobar>', XML_DECL. '<foobar><>&"'</foobar>', XML_DECL. '<foobar>  </foobar>', XML_DECL. '<!--comment--><foobar>foo</foobar>', XML_DECL. '<foobar>foo</foobar><!--comment-->', XML_DECL. '<foobar>foo<!----></foobar>', XML_DECL. '<foobar foo="bar"/>', XML_DECL. '<foobar foo="\'bar>"/>', #XML_DECL. '<bar:foobar foo="bar"><bar:foo/></bar:foobar>', #'<bar:foobar/>' ); my @goodWFNSStrings = ( XML_DECL. '<foobar xmlns:bar="xml://foo" bar:foo="bar"/>'."\n", XML_DECL. '<foobar xmlns="xml://foo" foo="bar"><foo/></foobar>'."\n", XML_DECL. '<bar:foobar xmlns:bar="xml://foo" foo="bar"><foo/></bar:foobar>'."\n", XML_DECL. '<bar:foobar xmlns:bar="xml://foo" foo="bar"><bar:foo/></bar:foobar>'."\n", XML_DECL. '<bar:foobar xmlns:bar="xml://foo" bar:foo="bar"><bar:foo/></bar:foobar>'."\n", ); my @goodWFDTDStrings = ( XML_DECL. '<!DOCTYPE foobar ['."\n".'<!ENTITY foo " test ">'."\n".']>'."\n".'<foobar>&foo;</foobar>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;</foobar>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;></foobar>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar="foo"">]><foobar>&foo;></foobar>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar>&foo;></foobar>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar foo="&foo;"/>', XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar foo=">&foo;"/>', ); my @badWFStrings = ( "", # totally empty document XML_DECL, # only XML Declaration "<!--ouch-->", # comment only is like an empty document '<!DOCTYPE ouch [<!ENTITY foo "bar">]>', # no good either ... "<ouch>", # single tag (tag mismatch) "<ouch/>foo", # trailing junk "foo<ouch/>", # leading junk "<ouch foo=bar/>", # bad attribute '<ouch foo="bar/>', # bad attribute "<ouch>&</ouch>", # bad char "<ouch>�x20;</ouch>", # bad char "<foobär/>", # bad encoding "<ouch>&foo;</ouch>", # undefind entity "<ouch>></ouch>", # unterminated entity XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar">]><foobar &foo;="ouch"/>', # bad placed entity XML_DECL. '<!DOCTYPE foobar [<!ENTITY foo "bar="foo"">]><foobar &foo;/>', # even worse "<ouch><!---></ouch>", # bad comment '<ouch><!-----></ouch>', # bad either... (is this conform with the spec????) ); my %goodPushWF = ( single1 => ['<foobar/>'], single2 => ['<foobar>','</foobar>'], single3 => [ XML_DECL, "<foobar>", "</foobar>" ], single4 => ["<foo", "bar/>"], single5 => ["<", "foo","bar", "/>"], single6 => ['<?xml version="1.0" encoding="UTF-8"?>',"\n<foobar/>"], single7 => ['<?xml',' version="1.0" ','encoding="UTF-8"?>',"\n<foobar/>"], single8 => ['<foobar', ' foo=', '"bar"', '/>'], single9 => ['<?xml',' versio','n="1.0" ','encodi','ng="U','TF8"?>',"\n<foobar/>"], multiple1 => [ '<foobar>','<foo/>','</foobar> ', ], multiple2 => [ '<foobar','><fo','o','/><','/foobar> ', ], multiple3 => [ '<foobar>','<![CDATA[<>&"\']]>','</foobar>'], multiple4 => [ '<foobar>','<![CDATA[', '<>&', ']]>', '</foobar>' ], multiple5 => [ '<foobar>','<!','[CDA','TA[', '<>&', ']]>', '</foobar>' ], multiple6 => ['<foobar>','<>&"'','</foobar>'], multiple6 => ['<foobar>','<',';&','gt;&a','mp;','"&ap','os;','</foobar>'], multiple7 => [ '<foobar>', '  ','</foobar>' ], multiple8 => [ '<foobar>', '&#x','20;','60;','</foobar>' ], multiple9 => [ '<foobar>','moo','moo','</foobar> ', ], multiple10 => [ '<foobar>','moo','</foobar> ', ], comment1 => [ '<!--comment-->','<foobar/>' ], comment2 => [ '<foobar/>','<!--comment-->' ], comment3 => [ '<!--','comment','-->','<foobar/>' ], comment4 => [ '<!--','-->','<foobar/>' ], comment5 => [ '<foobar>fo','o<!---','-><','/foobar>' ], attr1 => [ '<foobar',' foo="bar"/>'], attr2 => [ '<foobar',' foo','="','bar','"/>'], attr3 => [ '<foobar',' fo','o="b','ar"/>'], #prefix1 => [ '<bar:foobar/>' ], #prefix2 => [ '<bar',':','foobar/>' ], #prefix3 => [ '<ba','r:fo','obar/>' ], ns1 => [ '<foobar xmlns:bar="xml://foo"/>' ], ns2 => [ '<foobar ','xmlns:bar="xml://foo"','/>' ], ns3 => [ '<foo','bar x','mlns:b','ar="foo"/>' ], ns4 => [ '<bar:foobar xmlns:bar="xml://foo"/>' ], ns5 => [ '<bar:foo','bar xm','lns:bar="fo','o"/>' ], ns6 => [ '<bar:fooba','r xm','lns:ba','r="foo"','><bar',':foo/','></bar'.':foobar>'], dtd1 => [XML_DECL, '<!DOCTYPE ','foobar [','<!ENT','ITY foo " test ">',']>','<foobar>&f','oo;</foobar>',], dtd2 => [XML_DECL, '<!DOCTYPE ','foobar [','<!ENT','ITY foo " test ">',']>','<foobar>&f','oo;></foobar>',], ); my $goodfile = "example/dromeds.xml"; my $badfile1 = "example/bad.xml"; my $badfile2 = "does_not_exist.xml"; my $parser = XML::LibXML->new(); print "# 1 NON VALIDATING PARSER\n"; print "# 1.1 WELL FORMED STRING PARSING\n"; print "# 1.1.1 DEFAULT VALUES\n"; { foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) { my $doc = $parser->parse_string($str); ok($doc); } } eval { my $fail = $parser->parse_string(undef); }; ok($@); foreach my $str ( @badWFStrings ) { eval { my $fail = $parser->parse_string($str); }; ok($@); } print "# 1.1.2 NO KEEP BLANKS\n"; $parser->keep_blanks(0); { foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) { my $doc = $parser->parse_string($str); ok($doc); } } eval { my $fail = $parser->parse_string(undef); }; ok($@); foreach my $str ( @badWFStrings ) { eval { my $fail = $parser->parse_string($str); }; ok($@); } $parser->keep_blanks(1); print "# 1.1.3 EXPAND ENTITIES\n"; $parser->expand_entities(0); { foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) { my $doc = $parser->parse_string($str); ok($doc); } } eval { my $fail = $parser->parse_string(undef); }; ok($@); foreach my $str ( @badWFStrings ) { eval { my $fail = $parser->parse_string($str); }; ok($@); } $parser->expand_entities(1); print "# 1.1.4 PEDANTIC\n"; $parser->pedantic_parser(1); { foreach my $str ( @goodWFStrings,@goodWFNSStrings,@goodWFDTDStrings ) { my $doc = $parser->parse_string($str); ok($doc); } } eval { my $fail = $parser->parse_string(undef); }; ok($@); foreach my $str ( @badWFStrings ) { eval { my $fail = $parser->parse_string($str); }; ok($@); } $parser->pedantic_parser(0); print "# 1.2 PARSE A FILE\n"; { my $doc = $parser->parse_file($goodfile); ok($doc); } eval {my $fail = $parser->parse_file($badfile1);}; ok($@); eval { $parser->parse_file($badfile2); }; ok($@); { my $str = "<a> <b/> </a>"; my $tstr= "<a><b/></a>"; $parser->keep_blanks(0); my $docA = $parser->parse_string($str); my $docB = $parser->parse_file("example/test3.xml"); $XML::LibXML::skipXMLDeclaration = 1; ok( $docA->toString, $tstr ); ok( $docB->toString, $tstr ); $XML::LibXML::skipXMLDeclaration = 0; } print "# 1.3 PARSE A HANDLE\n"; my $fh = IO::File->new($goodfile); ok($fh); my $doc = $parser->parse_fh($fh); ok($doc); $fh = IO::File->new($badfile1); ok($fh); eval { my $doc = $parser->parse_fh($fh); }; ok($@); $fh = IO::File->new($badfile2); eval { my $doc = $parser->parse_fh($fh); }; ok($@); { $parser->expand_entities(1); my $doc = $parser->parse_file( "example/dtd.xml" ); my @cn = $doc->documentElement->childNodes; ok( scalar @cn, 1 ); $doc = $parser->parse_file( "example/complex/complex2.xml" ); @cn = $doc->documentElement->childNodes; ok( scalar @cn, 1 ); $parser->expand_entities(0); $doc = $parser->parse_file( "example/dtd.xml" ); @cn = $doc->documentElement->childNodes; ok( scalar @cn, 3 ); } print "# 1.4 x-include processing\n"; my $goodXInclude = q{ <x> <xinclude:include xmlns:xinclude="http://www.w3.org/2001/XInclude" href="test2.xml"/> </x> }; my $badXInclude = q{ <x xmlns:xinclude="http://www.w3.org/2001/XInclude"> <xinclude:include href="bad.xml"/> </x> }; { $parser->base_uri( "example/" ); $parser->keep_blanks(0); my $doc = $parser->parse_string( $goodXInclude ); ok($doc); my $i; eval { $i = $parser->processXIncludes($doc); }; ok( $i ); $doc = $parser->parse_string( $badXInclude ); $i= undef; eval { $i = $parser->processXIncludes($doc); }; ok($@); # auto expand $parser->expand_xinclude(1); $doc = $parser->parse_string( $goodXInclude ); ok($doc); $doc = undef; eval { $doc = $parser->parse_string( $badXInclude ); }; ok($@); ok(!$doc); # some bad stuff eval{ $parser->processXIncludes(undef); }; ok($@); eval{ $parser->processXIncludes("blahblah"); }; ok($@); } print "# 2 PUSH PARSER\n"; { my $pparser = XML::LibXML->new(); print "# 2.1 PARSING WELLFORMED DOCUMENTS\n"; foreach my $key ( qw(single1 single2 single3 single4 single5 single6 single7 single8 single9 multiple1 multiple2 multiple3 multiple4 multiple5 multiple6 multiple7 multiple8 multiple9 multiple10 comment1 comment2 comment3 comment4 comment5 attr1 attr2 attr3 ns1 ns2 ns3 ns4 ns5 ns6 dtd1 dtd2) ) { print "# key is $key\n"; foreach ( @{$goodPushWF{$key}} ) { $pparser->parse_chunk( $_ ); } my $doc; eval {$doc = $pparser->parse_chunk("",1); }; print "# caught an error $@ \n" if $@; print "# document seems to be ok \n" if $doc; ok($doc && !$@); } my @good_strings = ("<foo>", "bar", "</foo>" ); my %bad_strings = ( predocend1 => ["<A>" ], predocend2 => ["<A>", "B"], predocend3 => ["<A>", "<C>"], predocend4 => ["<A>", "<C/>"], postdocend1 => ["<A/>", "<C/>"], # use with libxml2 2.4.26: postdocend2 => ["<A/>", "B"], # libxml2 < 2.4.26 bug postdocend3 => ["<A/>", "BB"], badcdata => ["<A> ","<!","[CDATA[B]","</A>"], badending1 => ["<A> ","B","</C>"], badending2 => ["<A> ","</C>","</A>"], ); my $parser = XML::LibXML->new; { for ( @good_strings ) { $parser->parse_chunk( $_ ); } my $doc = $parser->parse_chunk("",1); ok($doc); } { print "# 2.2 PARSING BROKEN DOCUMENTS\n"; my $doc; foreach my $key ( keys %bad_strings ) { print "# $key\n"; $doc = undef; foreach ( @{$bad_strings{$key}} ) { eval { $parser->parse_chunk( $_ );}; if ( $@ ) { # if we won't stop here, we will loose the error :| last; } } if ( $@ ) { ok(1); # $parser->parse_chunk("",1); # will cause no harm anymore, but is still needed next; } eval { $doc = $parser->parse_chunk("",1); }; ok($@); } } { print "# 2.3 RECOVERING PUSH PARSER\n"; $parser->init_push; foreach ( "<A>", "B" ) { $parser->push( $_); } my $doc; eval { local $SIG{'__WARN__'} = sub { }; $doc = $parser->finish_push(1); }; ok( $doc ); } } print "# 3 SAX PARSER\n"; { my $handler = XML::LibXML::SAX::Builder->new(); my $generator = XML::LibXML::SAX->new( Handler=>$handler ); my $string = q{<bar foo="bar">foo</bar>}; $doc = $generator->parse_string( $string ); ok( $doc ); print "# 3.1 GENERAL TESTS \n"; foreach my $str ( @goodWFStrings ) { my $doc = $generator->parse_string( $str ); ok( $doc ); } print "# CDATA Sections\n"; $string = q{<foo><![CDATA[&foo<bar]]></foo>}; $doc = $generator->parse_string( $string ); my @cn = $doc->documentElement->childNodes(); ok( scalar @cn ); ok( $cn[0]->nodeType, XML_CDATA_SECTION_NODE ); ok( $cn[0]->textContent, "&foo<bar" ); ok( $cn[0]->toString, '<![CDATA[&foo<bar]]>'); print "# 3.2 NAMESPACE TESTS\n"; my $i = 0; foreach my $str ( @goodWFNSStrings ) { my $doc = $generator->parse_string( $str ); ok( $doc ); # skip the nested node tests until there is a xmlNormalizeNs(). #ok(1),next if $i > 2; ok( $doc->toString(), $str ); $i++ } print "# DATA CONSISTENCE\n"; # find out if namespaces are there my $string2 = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi/></foo>}; $doc = $generator->parse_string( $string2 ); my @attrs = $doc->documentElement->attributes; ok( scalar @attrs ); if ( scalar @attrs ) { ok( $attrs[0]->nodeType, XML_NAMESPACE_DECL ); } else { ok(0); } my $root = $doc->documentElement; # bad thing: i have to do some NS normalizing. # libxml2 will only do some fixing. this will lead to multiple # declarations, if a node with a new namespace is added. my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi/></foo>}; # my $vstring = q{<foo xmlns:bar="http://foo.bar">bar<bar:bi xmlns:bar="http://foo.bar"/></foo>}; ok($root->toString, $vstring ); print "# 3.3 INTERNAL SUBSETS\n"; foreach my $str ( @goodWFDTDStrings ) { my $doc = $generator->parse_string( $str ); ok( $doc ); } print "# 3.5 PARSE URI\n"; $doc = $generator->parse_uri( "example/test.xml" ); ok($doc); print "# 3.6 PARSE CHUNK\n"; } print "# 4 SAXY PUSHER\n"; { my $handler = XML::LibXML::SAX::Builder->new(); my $parser = XML::LibXML->new; $parser->set_handler( $handler ); $parser->push( '<foo/>' ); my $doc = $parser->finish_push; ok($doc); foreach my $key ( keys %goodPushWF ) { foreach ( @{$goodPushWF{$key}} ) { $parser->push( $_); } my $doc; eval {$doc = $parser->finish_push; }; ok($doc); } } print "# 5 PARSE WELL BALANCED CHUNKS\n"; { my $MAX_WF_C = 11; my $MAX_WB_C = 16; my %chunks = ( wellformed1 => '<A/>', wellformed2 => '<A></A>', wellformed3 => '<A B="C"/>', wellformed4 => '<A>D</A>', wellformed5 => '<A><![CDATA[D]]></A>', wellformed6 => '<A><!--D--></A>', wellformed7 => '<A><K/></A>', wellformed8 => '<A xmlns="xml://E"/>', wellformed9 => '<F:A xmlns:F="xml://G" F:A="B">D</F:A>', wellformed10 => '<!--D-->', wellformed11 => '<A xmlns:F="xml://E"/>', wellbalance1 => '<A/><A/>', wellbalance2 => '<A></A><A></A>', wellbalance3 => '<A B="C"/><A B="H"/>', wellbalance4 => '<A>D</A><A>I</A>', wellbalance5 => '<A><K/></A><A><L/></A>', wellbalance6 => '<A><![CDATA[D]]></A><A><![CDATA[I]]></A>', wellbalance7 => '<A><!--D--></A><A><!--I--></A>', wellbalance8 => '<F:A xmlns:F="xml://G" F:A="B">D</F:A><J:A xmlns:J="xml://G" J:A="M">D</J:A>', wellbalance9 => 'D<A/>', wellbalance10=> 'D<A/>D', wellbalance11=> 'D<A/><!--D-->', wellbalance12=> 'D<A/><![CDATA[D]]>', wellbalance13=> '<![CDATA[D]]><A/>D', wellbalance14=> '<!--D--><A/>', wellbalance15=> '<![CDATA[D]]>', wellbalance16=> 'D', ); my @badWBStrings = ( "", "<ouch>", "<ouch>bar", "bar</ouch>", "<ouch/>&foo;", # undefined entity "&", # bad char "häh?", # bad encoding "<!--->", # bad stays bad ;) "<!----->", # bad stays bad ;) ); my $pparser = XML::LibXML->new; print "# 5.1 DOM CHUNK PARSER\n"; for ( 1..$MAX_WF_C ) { my $frag = $pparser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { print "# well formness test $_\n"; if ( $chunks{'wellformed'.$_} =~ /\<A\>\<\/A\>/ ) { $_--; # because we cannot distinguish between <a/> and <a></a> } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $pparser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } eval { my $fail = $pparser->parse_xml_chunk(undef); }; ok($@); eval { my $fail = $pparser->parse_xml_chunk(""); }; ok($@); foreach my $str ( @badWBStrings ) { eval { my $fail = $pparser->parse_xml_chunk($str); }; ok($@); } { print "# 5.1.1 Segmenation fault tests\n"; my $sDoc = '<C/><D/>'; my $sChunk = '<A/><B/>'; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->appendChild( $chk ); ok( $doc->toString(), '<C/><D/><A/><B/>' ); } { print "# 5.1.2 Segmenation fault tests\n"; my $sDoc = '<C/><D/>'; my $sChunk = '<A/><B/>'; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->insertAfter( $chk, $fc ); ok( $doc->toString(), '<C/><A/><B/><D/>' ); } { print "# 5.1.3 Segmenation fault tests\n"; my $sDoc = '<C/><D/>'; my $sChunk = '<A/><B/>'; my $parser = XML::LibXML->new(); my $doc = $parser->parse_xml_chunk( $sDoc, undef ); my $chk = $parser->parse_xml_chunk( $sChunk,undef ); my $fc = $doc->firstChild; $doc->insertBefore( $chk, $fc ); ok( $doc->toString(), '<A/><B/><C/><D/>' ); } ok(1); print "# 5.2 SAX CHUNK PARSER\n"; my $handler = XML::LibXML::SAX::Builder->new(); my $parser = XML::LibXML->new; $parser->set_handler( $handler ); for ( 1..$MAX_WF_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellformed'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $frag->firstChild->isSameNode( $frag->lastChild ) ) { if ( $chunks{'wellformed'.$_} =~ /\<A\>\<\/A\>/ ) { $_--; } ok($frag->toString,$chunks{'wellformed'.$_}); next; } } ok(0); } for ( 1..$MAX_WB_C ) { my $frag = $parser->parse_xml_chunk($chunks{'wellbalance'.$_}); ok($frag); if ( $frag->nodeType == XML_DOCUMENT_FRAG_NODE && $frag->hasChildNodes ) { if ( $chunks{'wellbalance'.$_} =~ /<A><\/A>/ ) { $_--; } ok($frag->toString,$chunks{'wellbalance'.$_}); next; } ok(0); } } { print "# 6 VALIDATING PARSER\n"; my %badstrings = ( SIMPLE => '<?xml version="1.0"?>'."\n<A/>\n", ); my $parser = XML::LibXML->new; $parser->validation(1); my $doc; eval { $doc = $parser->parse_string($badstrings{SIMPLE}); }; ok( $@ ); my $ql; } { print "# 7 LINE NUMBERS\n"; my $goodxml = <<EOXML; <?xml version="1.0"?> <foo> <bar/> </foo> EOXML my $badxml = <<EOXML; <?xml version="1.0"?> <!DOCTYPE foo [<!ELEMENT foo EMPTY>]> <bar/> EOXML my $parser = XML::LibXML->new; $parser->validation(1); eval { $parser->parse_string( $badxml ); }; # correct line number may or may not be present # depending on libxml2 version ok( $@ =~ /^:[03]:/ ); $parser->line_numbers(1); eval { $parser->parse_string( $badxml ); }; ok( $@ =~ /^:3:/ ); # switch off validation for the following tests $parser->validation(0); my $doc; eval { $doc = $parser->parse_string( $goodxml ); }; my $root = $doc->documentElement(); ok( $root->line_number(), 2); my @kids = $root->childNodes(); ok( $kids[1]->line_number(),3 ); my $newkid = $root->appendChild( $doc->createElement( "bar" ) ); ok( $newkid->line_number(), 0 ); $parser->line_numbers(0); eval { $doc = $parser->parse_string( $goodxml ); }; $root = $doc->documentElement(); ok( $root->line_number(), 0); @kids = $root->childNodes(); ok( $kids[1]->line_number(), 0 ); } print "# " . XML::LibXML::LIBXML_VERSION . "\n"; if ( XML::LibXML::LIBXML_VERSION >= 20600 ) { print "# 8 Clean Namespaces\n"; my ( $xsDoc1, $xsDoc2 ); $xsDoc1 = q{<A:B xmlns:A="http://D"><A:C xmlns:A="http://D"></A:C></A:B>}; $xsDoc2 = q{<A:B xmlns:A="http://D"><A:C xmlns:A="http://E"/></A:B>}; my $parser = XML::LibXML->new(); $parser->clean_namespaces(1); my $fn1 = "example/xmlns/goodguy.xml"; my $fn2 = "example/xmlns/badguy.xml"; ok( $parser->parse_string( $xsDoc1 )->documentElement->toString(), q{<A:B xmlns:A="http://D"><A:C/></A:B>} ); ok( $parser->parse_string( $xsDoc2 )->documentElement->toString(), $xsDoc2 ); ok( $parser->parse_file( $fn1 )->documentElement->toString(), q{<A:B xmlns:A="http://D"><A:C/></A:B>} ); ok( $parser->parse_file( $fn2 )->documentElement->toString() , $xsDoc2 ); my $fh1 = IO::File->new($fn1); my $fh2 = IO::File->new($fn2); ok( $parser->parse_fh( $fh1 )->documentElement->toString(), q{<A:B xmlns:A="http://D"><A:C/></A:B>} ); ok( $parser->parse_fh( $fh2 )->documentElement->toString() , $xsDoc2 ); my @xaDoc1 = ('<A:B xmlns:A="http://D">','<A:C xmlns:A="h','ttp://D"/>' ,'</A:B>'); my @xaDoc2 = ('<A:B xmlns:A="http://D">','<A:C xmlns:A="h','ttp://E"/>' , '</A:B>'); my $doc; foreach ( @xaDoc1 ) { $parser->parse_chunk( $_ ); } $doc = $parser->parse_chunk( "", 1 ); ok( $doc->documentElement->toString(), q{<A:B xmlns:A="http://D"><A:C/></A:B>} ); foreach ( @xaDoc2 ) { $parser->parse_chunk( $_ ); } $doc = $parser->parse_chunk( "", 1 ); ok( $doc->documentElement->toString() , $xsDoc2 ); } ## # test if external subsets are loaded correctly { my $xmldoc = <<EOXML; <!DOCTYPE X SYSTEM "example/ext_ent.dtd"> <X>&foo;</X> EOXML my $parser = XML::LibXML->new(); $parser->load_ext_dtd(1); # first time it should work my $doc = $parser->parse_string( $xmldoc ); ok( $doc->documentElement()->string_value(), " test " ); # second time it must not fail. my $doc2 = $parser->parse_string( $xmldoc ); ok( $doc2->documentElement()->string_value(), " test " ); } ## # Test ticket #7668 xinclude breaks entity expansion # [CG] removed again, since #7668 claims the spec is incorrect ## # Test ticket #7913 { my $xmldoc = <<EOXML; <!DOCTYPE X SYSTEM "example/ext_ent.dtd"> <X>&foo;</X> EOXML my $parser = XML::LibXML->new(); $parser->load_ext_dtd(1); # first time it should work my $doc = $parser->parse_string( $xmldoc ); ok( $doc->documentElement()->string_value(), " test " ); # lets see if load_ext_dtd(0) works $parser->load_ext_dtd(0); my $doc2; eval { $doc2 = $parser->parse_string( $xmldoc ); }; ok($@); $parser->validation(1); $parser->load_ext_dtd(0); my $doc3; eval { $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); }; ok( $doc3 ); $parser->load_ext_dtd(1); eval { $doc3 = $parser->parse_file( "example/article_external_bad.xml" ); }; ok( $@); } sub tsub { my $doc = shift; my $th = {}; $th->{d} = XML::LibXML::Document->createDocument; my $e1 = $th->{d}->createElementNS("x","X:foo"); $th->{d}->setDocumentElement( $e1 ); my $e2 = $th->{d}->createElementNS( "x","X:bar" ); $e1->appendChild( $e2 ); $e2->appendChild( $th->{d}->importNode( $doc->documentElement() ) ); return $th->{d}; } sub tsub2 { my ($doc,$query)=($_[0],@{$_[1]}); # return [ $doc->findnodes($query) ]; return [ $doc->findnodes(encodeToUTF8('iso-8859-1',$query)) ]; }