package XML::SAX::PurePerl;
use strict;
use XML::SAX::PurePerl::Productions qw($NameChar $SingleChar);
sub elementdecl {
my ($self, $reader) = @_;
if ($reader->match_string('<!ELEMENT')) {
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after ELEMENT declaration", $reader);
my $name = $self->Name($reader);
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after ELEMENT's name", $reader);
$self->contentspec($reader, $name);
$self->skip_whitespace($reader);
$reader->match('>') ||
$self->parser_error("Closing angle bracket not found on ELEMENT declaration", $reader);
return 1;
}
return 0;
}
sub contentspec {
my ($self, $reader, $name) = @_;
my $model;
if ($reader->match_string('EMPTY')) {
$model = 'EMPTY';
}
elsif ($reader->match_string('ANY')) {
$model = 'ANY';
}
else {
$model = $self->Mixed_or_children($reader);
}
if ($model) {
$self->element_decl({Name => $name, Model => $model});
return 1;
}
$self->parser_error("contentspec not found in ELEMENT declaration", $reader);
}
sub Mixed_or_children {
my ($self, $reader) = @_;
my $model;
if ($reader->match('(')) {
$model = '(';
$self->skip_whitespace($reader);
if ($reader->match_string('#PCDATA')) {
return $self->Mixed($reader);
}
$reader->buffer('(');
return $self->children($reader);
}
return;
}
sub Mixed {
my ($self, $reader) = @_;
my $model = '(#PCDATA';
$self->skip_whitespace($reader);
my %seen;
while ($reader->match('|')) {
$self->skip_whitespace($reader);
my $name = $self->Name($reader) ||
$self->parser_error("No 'Name' after Mixed content '|'", $reader);
if ($seen{$name}) {
$self->parser_error("Element '$name' has already appeared in this group", $reader);
}
$seen{$name}++;
$model .= "|$name";
$self->skip_whitespace($reader);
}
$reader->match(')') || $self->parser_error("no closing bracket on mixed content", $reader);
$model .= ")";
if ($reader->match('*')) {
$model .= "*";
}
return $model;
}
sub children {
my ($self, $reader) = @_;
return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
}
sub ChoiceOrSeq {
my ($self, $reader) = @_;
$reader->match('(') || $self->parser_error("choice/seq contains no opening bracket", $reader);
my $model = '(';
$self->skip_whitespace($reader);
$model .= $self->Cp($reader);
if (my $choice = $self->Choice($reader)) {
$model .= $choice;
}
else {
$model .= $self->Seq($reader);
}
$self->skip_whitespace($reader);
$reader->match(')') || $self->parser_error("choice/seq contains no closing bracket", $reader);
$model .= ')';
return $model;
}
sub Cardinality {
my ($self, $reader) = @_;
if ($reader->match('?')) {
return '?';
}
if ($reader->match('+')) {
return '+';
}
if ($reader->match('*')) {
return '*';
}
return '';
}
sub Cp {
my ($self, $reader) = @_;
my $model;
if (my $name = $self->Name($reader)) {
return $name . $self->Cardinality($reader);
}
return $self->ChoiceOrSeq($reader) . $self->Cardinality($reader);
}
sub Choice {
my ($self, $reader) = @_;
my $model = '';
$self->skip_whitespace($reader);
while ($reader->match('|')) {
$self->skip_whitespace($reader);
$model .= '|';
$model .= $self->Cp($reader);
$self->skip_whitespace($reader);
}
return $model;
}
sub Seq {
my ($self, $reader) = @_;
my $model = '';
$self->skip_whitespace($reader);
while ($reader->match(',')) {
$self->skip_whitespace($reader);
$model .= ',';
$model .= $self->Cp($reader);
$self->skip_whitespace($reader);
}
return $model;
}
sub AttlistDecl {
my ($self, $reader) = @_;
if ($reader->match_string('<!ATTLIST')) {
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after ATTLIST declaration", $reader);
my $name = $self->Name($reader);
$self->AttDefList($reader, $name);
$self->skip_whitespace($reader);
$reader->match('>') ||
$self->parser_error("Closing angle bracket not found on ATTLIST declaration", $reader);
return 1;
}
return 0;
}
sub AttDefList {
my ($self, $reader, $name) = @_;
1 while $self->AttDef($reader, $name);
}
sub AttDef {
my ($self, $reader, $el_name) = @_;
$self->skip_whitespace($reader) || return 0;
my $att_name = $self->Name($reader) || return 0;
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after Name in attribute definition", $reader);
my $att_type = $self->AttType($reader);
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after AttType in attribute definition", $reader);
my ($default, $value) = $self->DefaultDecl($reader);
$self->attribute_decl({
eName => $el_name,
aName => $att_name,
Type => $att_type,
ValueDefault => $default,
Value => $value,
});
return 1;
}
sub AttType {
my ($self, $reader) = @_;
return $self->StringType($reader) ||
$self->TokenizedType($reader) ||
$self->EnumeratedType($reader) ||
$self->parser_error("Can't match AttType", $reader);
}
sub StringType {
my ($self, $reader) = @_;
if ($reader->match_string('CDATA')) {
return 'CDATA';
}
return;
}
sub TokenizedType {
my ($self, $reader) = @_;
if ($reader->match_string('IDREFS')) {
return 'IDREFS';
}
if ($reader->match_string('IDREF')) {
return 'IDREF';
}
if ($reader->match_string('ID')) {
return 'ID';
}
if ($reader->match_string('ENTITIES')) {
return 'ENTITIES';
}
if ($reader->match_string('ENTITY')) {
return 'ENTITY';
}
if ($reader->match_string('NMTOKENS')) {
return 'NMTOKENS';
}
if ($reader->match_string('NMTOKEN')) {
return 'NMTOKEN';
}
return;
}
sub EnumeratedType {
my ($self, $reader) = @_;
return $self->NotationType($reader) || $self->Enumeration($reader);
}
sub NotationType {
my ($self, $reader) = @_;
if ($reader->match_string('NOTATION')) {
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after NOTATION", $reader);
$reader->match('(') ||
$self->parser_error("No opening bracket in notation section", $reader);
$self->skip_whitespace($reader);
my $model = 'NOTATION (';
my $name = $self->Name($reader) ||
$self->parser_error("No name in notation section", $reader);
$model .= $name;
$self->skip_whitespace($reader);
while ($reader->match('|')) {
$model .= '|';
$self->skip_whitespace($reader);
my $name = $self->Name($reader) ||
$self->parser_error("No name in notation section", $reader);
$model .= $name;
$self->skip_whitespace($reader);
}
$reader->match(')') ||
$self->parser_error("No closing bracket in notation section", $reader);
$model .= ')';
return $model;
}
return;
}
sub Enumeration {
my ($self, $reader) = @_;
if ($reader->match('(')) {
$self->skip_whitespace($reader);
my $model = '(';
my $nmtoken = $self->Nmtoken($reader) ||
$self->parser_error("No Nmtoken in enumerated declaration", $reader);
$model .= $nmtoken;
$self->skip_whitespace($reader);
while ($reader->match('|')) {
$model .= '|';
$self->skip_whitespace($reader);
my $nmtoken = $self->Nmtoken($reader) ||
$self->parser_error("No Nmtoken in enumerated declaration", $reader);
$model .= $nmtoken;
$self->skip_whitespace($reader);
}
$reader->match(')') ||
$self->parser_error("No closing bracket in enumerated declaration", $reader);
$model .= ')';
return $model;
}
return;
}
sub Nmtoken {
my ($self, $reader) = @_;
$reader->consume($NameChar);
return $reader->consumed;
}
sub DefaultDecl {
my ($self, $reader) = @_;
if ($reader->match_string('#REQUIRED')) {
return '#REQUIRED';
}
if ($reader->match_string('#IMPLIED')) {
return '#IMPLIED';
}
my $model = '';
if ($reader->match_string('#FIXED')) {
$self->skip_whitespace($reader) || $self->parser_error(
"no whitespace after FIXED specifier", $reader);
my $value = $self->AttValue($reader);
return "#FIXED", $value;
}
my $value = $self->AttValue($reader);
return undef, $value;
}
sub EntityDecl {
my ($self, $reader) = @_;
if ($reader->match_string('<!ENTITY')) {
$self->skip_whitespace($reader) || $self->parser_error(
"No whitespace after ENTITY declaration", $reader);
$self->PEDecl($reader) || $self->GEDecl($reader);
$self->skip_whitespace($reader);
$reader->match('>') || $self->parser_error("No closing '>' in entity definition", $reader);
return 1;
}
return 0;
}
sub GEDecl {
my ($self, $reader) = @_;
my $name = $self->Name($reader) || $self->parser_error("No entity name given", $reader);
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after entity name", $reader);
my $value;
if ($value = $self->ExternalID($reader)) {
$value .= $self->NDataDecl($reader);
}
else {
$value = $self->EntityValue($reader);
}
if ($self->{ParseOptions}{entities}{$name}) {
warn("entity $name already exists\n");
} else {
$self->{ParseOptions}{entities}{$name} = 1;
$self->{ParseOptions}{expanded_entity}{$name} = $value; }
return 1;
}
sub PEDecl {
my ($self, $reader) = @_;
$reader->match('%') || return 0;
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity marker", $reader);
my $name = $self->Name($reader) || $self->parser_error("No parameter entity name given", $reader);
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after parameter entity name", $reader);
my $value = $self->ExternalID($reader) ||
$self->EntityValue($reader) ||
$self->parser_error("PE is not a value or an external resource", $reader);
return 1;
}
my $quotre = qr/[^%&\"]/;
my $aposre = qr/[^%&\']/;
sub EntityValue {
my ($self, $reader) = @_;
my $quote = '"';
my $re = $quotre;
if (!$reader->match($quote)) {
$quote = "'";
$re = $aposre;
$reader->match($quote) ||
$self->parser_error("Not a quote character", $reader);
}
my $value = '';
while (1) {
if ($reader->consume($re)) {
$value .= $reader->consumed;
}
elsif ($reader->match('&')) {
if ($reader->match('#')) {
my $char;
my $ref;
if ($reader->match('x')) {
$reader->consume(qr/[0-9a-fA-F]/) ||
$self->parser_error("Hex character reference contains illegal characters", $reader);
$ref = $reader->consumed;
$char = chr_ref(hex($ref));
$ref = "x$ref";
}
else {
$reader->consume(qr/[0-9]/) ||
$self->parser_error("Decimal character reference contains illegal characters", $reader);
$ref = $reader->consumed;
$char = chr($ref);
}
$reader->match(';') ||
$self->parser_error("No semi-colon found after character reference", $reader);
if ($char !~ $SingleChar) { $self->parser_error("Character reference '&#$ref;' refers to an illegal XML character ($char)", $reader);
}
$value .= $char;
}
else {
$value .= '&';
}
}
elsif ($reader->match('%')) {
$value .= $self->PEReference($reader);
}
elsif ($reader->match($quote)) {
last;
}
else {
$self->parser_error("Invalid character in attribute value", $reader);
}
}
return $value;
}
sub NDataDecl {
my ($self, $reader) = @_;
$self->skip_whitespace($reader) || return '';
$reader->match_string("NDATA") || return '';
$self->skip_whitespace($reader) || $self->parser_error("No whitespace after NDATA declaration", $reader);
my $name = $self->Name($reader) || $self->parser_error("NDATA declaration lacks a proper Name", $reader);
return " NDATA $name";
}
sub NotationDecl {
my ($self, $reader) = @_;
if ($reader->match_string('<!NOTATION')) {
$self->skip_whitespace($reader) ||
$self->parser_error("No whitespace after NOTATION declaration", $reader);
$reader->consume(qr/[^>]/); $reader->match('>'); $self->notation_decl({Name => "FIXME", SystemId => "FIXME", PublicId => "FIXME" });
return 1;
}
return 0;
}
1;