#!/local/bin/perl =head1 NAME robot-rules.t =head1 DESCRIPTION Test a number of different A files against a number of different User-agents. =cut require WWW::RobotRules; use Carp; use strict; print "1..50\n"; # for Test::Harness # We test a number of different /robots.txt files, # my $content1 = < 1 => 'http://foo/private' => 1, 2 => 'http://foo/also_private' => 1, ], [$content1, 'Wubble' => 3 => 'http://foo/private' => 0, 4 => 'http://foo/also_private' => 0, 5 => 'http://foo/other' => 1, ], [$content2, 'MOMspider' => 6 => 'http://foo/private' => 0, 7 => 'http://foo/other' => 1, ], [$content2, 'Wubble' => 8 => 'http://foo/private' => 1, 9 => 'http://foo/also_private' => 1, 10 => 'http://foo/other' => 1, ], [$content3, 'MOMspider' => 11 => 'http://foo/private' => 1, 12 => 'http://foo/other' => 1, ], [$content3, 'Wubble' => 13 => 'http://foo/private' => 1, 14 => 'http://foo/other' => 1, ], [$content4, 'MOMspider' => 15 => 'http://foo/private' => 1, 16 => 'http://foo/this' => 0, 17 => 'http://foo/that' => 1, ], [$content4, 'Another' => 18 => 'http://foo/private' => 1, 19 => 'http://foo/this' => 1, 20 => 'http://foo/that' => 0, ], [$content4, 'Wubble' => 21 => 'http://foo/private' => 0, 22 => 'http://foo/this' => 1, 23 => 'http://foo/that' => 1, ], [$content4, 'Another/1.0' => 24 => 'http://foo/private' => 1, 25 => 'http://foo/this' => 1, 26 => 'http://foo/that' => 0, ], [$content4, "SvartEnke1" => 27 => "http://foo/" => 0, 28 => "http://foo/this" => 0, 29 => "http://bar/" => 1, ], [$content4, "SvartEnke2" => 30 => "http://foo/" => 1, 31 => "http://foo/this" => 1, 32 => "http://bar/" => 1, ], [$content4, "MomSpiderJr" => # should match "MomSpider" 33 => 'http://foo/private' => 1, 34 => 'http://foo/also_private' => 1, 35 => 'http://foo/this/' => 0, ], [$content4, "SvartEnk" => # should match "*" 36 => "http://foo/" => 1, 37 => "http://foo/private/" => 0, 38 => "http://bar/" => 1, ], [$content5, 'Villager/1.0' => 39 => 'http://foo/west-wing/' => 0, 40 => 'http://foo/' => 0, ], [$content5, 'Belle/2.0' => 41 => 'http://foo/west-wing/' => 0, 42 => 'http://foo/' => 1, ], [$content5, 'Beast/3.0' => 43 => 'http://foo/west-wing/' => 1, 44 => 'http://foo/' => 1, ], [$content6, 'Villager/1.0' => 45 => 'http://foo/west-wing/' => 0, 46 => 'http://foo/' => 0, ], [$content6, 'Belle/2.0' => 47 => 'http://foo/west-wing/' => 0, 48 => 'http://foo/' => 1, ], [$content6, 'Beast/3.0' => 49 => 'http://foo/west-wing/' => 1, 50 => 'http://foo/' => 1, ], # when adding tests, remember to increase # the maximum at the top ); my $t; for $t (@tests1) { my ($content, $ua) = splice(@$t, 0, 2); my $robotsrules = new WWW::RobotRules($ua); $robotsrules->parse('http://foo/robots.txt', $content); my ($num, $path, $expected); while(($num, $path, $expected) = splice(@$t, 0, 3)) { my $allowed = $robotsrules->allowed($path); $allowed = 1 if $allowed; if($allowed != $expected) { $robotsrules->dump; confess "Test Failed: $ua => $path ($allowed != $expected)"; } print "ok $num\n"; } }