"""Convert ESIS events to SGML or XML markup.
This is limited, but seems sufficient for the ESIS generated by the
latex2esis.py script when run over the Python documentation.
"""
import errno
import esistools
import os
import re
import string
from xml.sax.saxutils import escape
AUTOCLOSE = ()
EMPTIES_FILENAME = "../sgml/empties.dat"
LIST_EMPTIES = 0
_elem_map = {}
_attr_map = {}
_token_map = {}
_normalize_case = str
def map_gi(sgmlgi, map):
uncased = _normalize_case(sgmlgi)
try:
return map[uncased]
except IndexError:
map[uncased] = sgmlgi
return sgmlgi
def null_map_gi(sgmlgi, map):
return sgmlgi
def format_attrs(attrs, xml=0):
attrs = attrs.items()
attrs.sort()
parts = []
append = parts.append
for name, value in attrs:
if xml:
append('%s="%s"' % (name, escape(value)))
else:
if name == value and isnmtoken(value):
append(value)
elif istoken(value):
if value == "no" + name:
append(value)
else:
append("%s=%s" % (name, value))
else:
append('%s="%s"' % (name, escape(value)))
if parts:
parts.insert(0, '')
return string.join(parts)
_nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
def isnmtoken(s):
return _nmtoken_rx.match(s) is not None
_token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
def istoken(s):
return _token_rx.match(s) is not None
def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
if xml:
autoclose = ()
attrs = {}
lastopened = None
knownempties = []
knownempty = 0
lastempty = 0
inverbatim = 0
while 1:
line = ifp.readline()
if not line:
break
type = line[0]
data = line[1:]
if data and data[-1] == "\n":
data = data[:-1]
if type == "-":
data = esistools.decode(data)
data = escape(data)
if not inverbatim:
data = string.replace(data, "---", "—")
ofp.write(data)
if "\n" in data:
lastopened = None
knownempty = 0
lastempty = 0
elif type == "(":
if data == "COMMENT":
ofp.write("<!--")
continue
data = map_gi(data, _elem_map)
if knownempty and xml:
ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
else:
ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
if knownempty and data not in knownempties:
knownempties.append(data)
attrs = {}
lastopened = data
lastempty = knownempty
knownempty = 0
inverbatim = data in verbatims
elif type == ")":
if data == "COMMENT":
ofp.write("-->")
continue
data = map_gi(data, _elem_map)
if xml:
if not lastempty:
ofp.write("</%s>" % data)
elif data not in knownempties:
if data in autoclose:
pass
elif lastopened == data:
ofp.write("</>")
else:
ofp.write("</%s>" % data)
lastopened = None
lastempty = 0
inverbatim = 0
elif type == "A":
name, type, value = string.split(data, " ", 2)
name = map_gi(name, _attr_map)
attrs[name] = esistools.decode(value)
elif type == "e":
knownempty = 1
elif type == "&":
ofp.write("&%s;" % data)
knownempty = 0
else:
raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
if LIST_EMPTIES:
dump_empty_element_names(knownempties)
def dump_empty_element_names(knownempties):
d = {}
for gi in knownempties:
d[gi] = gi
knownempties.append("")
if os.path.isfile(EMPTIES_FILENAME):
fp = open(EMPTIES_FILENAME)
while 1:
line = fp.readline()
if not line:
break
gi = string.strip(line)
if gi:
d[gi] = gi
fp = open(EMPTIES_FILENAME, "w")
gilist = d.keys()
gilist.sort()
fp.write(string.join(gilist, "\n"))
fp.write("\n")
fp.close()
def update_gi_map(map, names, fromsgml=1):
for name in string.split(names, ","):
if fromsgml:
uncased = string.lower(name)
else:
uncased = name
map[uncased] = name
def main():
import getopt
import sys
autoclose = AUTOCLOSE
xml = 1
xmldecl = 0
elem_names = ''
attr_names = ''
value_names = ''
verbatims = ('verbatim', 'interactive-session')
opts, args = getopt.getopt(sys.argv[1:], "adesx",
["autoclose=", "declare", "sgml", "xml",
"elements-map=", "attributes-map",
"values-map="])
for opt, arg in opts:
if opt in ("-d", "--declare"):
xmldecl = 1
elif opt == "-e":
global LIST_EMPTIES
LIST_EMPTIES = 1
elif opt in ("-s", "--sgml"):
xml = 0
elif opt in ("-x", "--xml"):
xml = 1
elif opt in ("-a", "--autoclose"):
autoclose = string.split(arg, ",")
elif opt == "--elements-map":
elem_names = ("%s,%s" % (elem_names, arg))[1:]
elif opt == "--attributes-map":
attr_names = ("%s,%s" % (attr_names, arg))[1:]
elif opt == "--values-map":
value_names = ("%s,%s" % (value_names, arg))[1:]
if len(args) == 0:
ifp = sys.stdin
ofp = sys.stdout
elif len(args) == 1:
ifp = open(args[0])
ofp = sys.stdout
elif len(args) == 2:
ifp = open(args[0])
ofp = open(args[1], "w")
else:
usage()
sys.exit(2)
if elem_names or attr_names or value_names:
global _normalize_case
_normalize_case = string.lower
update_gi_map(_elem_map, string.split(elem_names, ","))
update_gi_map(_attr_map, string.split(attr_names, ","))
update_gi_map(_values_map, string.split(value_names, ","))
else:
global map_gi
map_gi = null_map_gi
try:
if xml and xmldecl:
opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
except IOError, (err, msg):
if err != errno.EPIPE:
raise
if __name__ == "__main__":
main()