package XML::SAX::PurePerl;
use strict;
sub encoding_detect {
my ($parser, $reader) = @_;
my $error = "Invalid byte sequence at start of file";
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nocheck("\xFE")) {
if ($reader->match_nonext("\xFF")) {
$reader->set_encoding("UCS-4BE");
$reader->next;
return;
}
}
elsif ($reader->match_nocheck("\xFF")) {
if ($reader->match_nonext("\xFE")) {
$reader->set_encoding("UCS-4-2143");
$reader->next;
return;
}
}
elsif ($reader->match_nocheck("\x00")) {
if ($reader->match_nonext("\x3C")) {
$reader->set_encoding("UCS-4BE");
$reader->next;
$reader->buffer('<');
return;
}
}
elsif ($reader->match_nocheck("\x3C")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UCS-4-2143");
$reader->next;
$reader->buffer('<');
return;
}
}
}
elsif ($reader->match_nocheck("\x3C")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UCS-4-3412");
$reader->next;
$reader->buffer("<");
return;
}
elsif ($reader->match_nonext("\x3F")) {
$reader->set_encoding("UTF-16BE");
$reader->buffer("<?");
return;
}
}
}
$parser->parser_error($error, $reader);
}
elsif ($reader->match_nocheck("\xFF")) {
if ($reader->match_nocheck("\xFE")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UCS-4LE");
$reader->next;
return;
}
}
else {
my $byte1 = $reader->current;
$reader->next;
my $char = chr unpack("v", $byte1 . $reader->current);
$reader->set_encoding("UTF-16LE");
$reader->next;
$reader->buffer($char);
return;
}
}
$parser->parser_error($error, $reader);
}
elsif ($reader->match_nocheck("\xFE")) {
if ($reader->match_nocheck("\xFF")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UCS-4-3412");
$reader->next;
return;
}
elsif ($reader->match_nonext("\x3C")) {
$reader->set_encoding("UTF-16BE");
$reader->next;
$reader->buffer("<");
return;
}
}
}
$parser->parser_error($error, $reader);
}
elsif ($reader->match_nocheck("\xEF")) {
if ($reader->match_nocheck("\xBB")) {
if ($reader->match_nonext("\xBF")) {
$reader->set_encoding("UTF-8");
$reader->next;
return;
}
}
$parser->parser_error($error, $reader);
}
elsif ($reader->match_nocheck("\x3C")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nocheck("\x00")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UCS-4LE");
$reader->next;
$reader->buffer("<");
return;
}
}
elsif ($reader->match_nocheck("\x3F")) {
if ($reader->match_nonext("\x00")) {
$reader->set_encoding("UTF-16LE");
$reader->next;
$reader->buffer("<?");
return;
}
}
}
elsif ($reader->match_nocheck("\x3F")) {
if ($reader->match_nocheck("\x78")) {
if ($reader->match_nocheck("\x6D")) {
$reader->buffer("<?xm");
return;
}
else {
$reader->buffer('<?x');
return;
}
}
else {
$reader->buffer('<?');
return;
}
}
else {
$reader->buffer("<");
return;
}
}
elsif ($reader->match_nocheck("\x4C") &&
$reader->match_nocheck("\x6F") &&
$reader->match_nocheck("\xA7") &&
$reader->match_nonext("\x94"))
{
$reader->set_encoding("EBCDIC");
$reader->next;
return;
}
return;
}
1;