import re, os, cStringIO, time, cgi, string, urlparse
from twisted import copyright
from twisted.python import htmlizer, text
from twisted.web import microdom, domhelpers
import process, latex, indexer, numberer, htmlbook
from twisted.python.util import InsensitiveDict
def fixLinks(document, ext):
supported_schemes=['http', 'https', 'ftp', 'mailto']
for node in domhelpers.findElementsWithAttribute(document, 'href'):
href = node.getAttribute("href")
if urlparse.urlparse(href)[0] in supported_schemes:
continue
if node.getAttribute("class", "") == "absolute":
continue
if node.getAttribute("class", "").find('listing') != -1:
continue
if href.endswith('html') or href[:href.rfind('#')].endswith('html'):
fname, fext = os.path.splitext(href)
if '#' in fext:
fext = ext+'#'+fext.split('#', 1)[1]
else:
fext = ext
node.setAttribute("href", fname + fext)
def addMtime(document, fullpath):
for node in domhelpers.findElementsWithAttribute(document, "class","mtime"):
node.appendChild(microdom.Text(time.ctime(os.path.getmtime(fullpath))))
def _getAPI(node):
base = ""
if node.hasAttribute("base"):
base = node.getAttribute("base") + "."
return base+node.childNodes[0].nodeValue
def fixAPI(document, url):
for node in domhelpers.findElementsWithAttribute(document, "class", "API"):
fullname = _getAPI(node)
node2 = microdom.Element('a', {'href': url%fullname, 'title': fullname})
node2.childNodes = node.childNodes
node.childNodes = [node2]
node.removeAttribute('base')
def fontifyPython(document):
def matcher(node):
return (node.nodeName == 'pre' and node.hasAttribute('class') and
node.getAttribute('class') == 'python')
for node in domhelpers.findElements(document, matcher):
fontifyPythonNode(node)
def fontifyPythonNode(node):
oldio = cStringIO.StringIO()
latex.getLatexText(node, oldio.write,
entities={'lt': '<', 'gt': '>', 'amp': '&'})
oldio = cStringIO.StringIO(oldio.getvalue().strip()+'\n')
newio = cStringIO.StringIO()
htmlizer.filter(oldio, newio)
newio.seek(0)
newel = microdom.parse(newio).documentElement
newel.setAttribute("class", "python")
node.parentNode.replaceChild(newel, node)
def addPyListings(document, dir):
for node in domhelpers.findElementsWithAttribute(document, "class",
"py-listing"):
filename = node.getAttribute("href")
outfile = cStringIO.StringIO()
lines = map(string.rstrip, open(os.path.join(dir, filename)).readlines())
data = '\n'.join(lines[int(node.getAttribute('skipLines', 0)):])
data = cStringIO.StringIO(text.removeLeadingTrailingBlanks(data))
htmlizer.filter(data, outfile)
val = outfile.getvalue()
_replaceWithListing(node, val, filename, "py-listing")
def _replaceWithListing(node, val, filename, class_):
captionTitle = domhelpers.getNodeText(node)
if captionTitle == os.path.basename(filename):
captionTitle = 'Source listing'
text = ('<div class="%s">%s<div class="caption">%s - '
'<a href="%s"><span class="filename">%s</span></a></div></div>' %
(class_, val, captionTitle, filename, filename))
newnode = microdom.parseString(text).documentElement
node.parentNode.replaceChild(newnode, node)
def addHTMLListings(document, dir):
for node in domhelpers.findElementsWithAttribute(document, "class",
"html-listing"):
filename = node.getAttribute("href")
val = ('<pre class="htmlsource">\n%s</pre>' %
cgi.escape(open(os.path.join(dir, filename)).read()))
_replaceWithListing(node, val, filename, "html-listing")
def addPlainListings(document, dir):
for node in domhelpers.findElementsWithAttribute(document, "class",
"listing"):
filename = node.getAttribute("href")
val = ('<pre>\n%s</pre>' %
cgi.escape(open(os.path.join(dir, filename)).read()))
_replaceWithListing(node, val, filename, "listing")
def getHeaders(document):
return domhelpers.findElements(document,
lambda n,m=re.compile('h[23]$').match:m(n.nodeName))
def generateToC(document):
toc, level, id = '\n<ol>\n', 0, 0
for element in getHeaders(document):
elementLevel = int(element.tagName[1])-2
toc += (level-elementLevel)*'</ul>\n'
toc += (elementLevel-level)*'<ul>'
toc += '<li><a href="#auto%d">' % id
toc += domhelpers.getNodeText(element)
toc += '</a></li>\n'
level = elementLevel
anchor = microdom.parseString('<a name="auto%d" />' % id).documentElement
element.childNodes.append(anchor)
id += 1
toc += '</ul>\n' * level
toc += '</ol>\n'
return microdom.parseString(toc).documentElement
def putInToC(document, toc):
tocOrig = domhelpers.findElementsWithAttribute(document, 'class', 'toc')
if tocOrig:
tocOrig= tocOrig[0]
tocOrig.childNodes = [toc]
def removeH1(document):
h1 = domhelpers.findNodesNamed(document, 'h1')
empty = microdom.Element('span')
for node in h1:
node.parentNode.replaceChild(empty, node)
def footnotes(document):
footnotes = domhelpers.findElementsWithAttribute(document, "class",
"footnote")
if not footnotes:
return
footnoteElement = microdom.Element('ol')
id = 1
for footnote in footnotes:
href = microdom.parseString('<a href="#footnote-%(id)d">'
'<super>%(id)d</super></a>'
% vars()).documentElement
text = ' '.join(domhelpers.getNodeText(footnote).split())
href.setAttribute('title', text)
target = microdom.Element('a', attributes={'name': 'footnote-%d' % id})
target.childNodes = [footnote]
footnoteContent = microdom.Element('li')
footnoteContent.childNodes = [target]
footnoteElement.childNodes.append(footnoteContent)
footnote.parentNode.replaceChild(href, footnote)
id += 1
body = domhelpers.findNodesNamed(document, "body")[0]
header = microdom.parseString('<h2>Footnotes</h2>').documentElement
body.childNodes.append(header)
body.childNodes.append(footnoteElement)
def notes(document):
notes = domhelpers.findElementsWithAttribute(document, "class", "note")
notePrefix = microdom.parseString('<strong>Note: </strong>').documentElement
for note in notes:
note.childNodes.insert(0, notePrefix)
def compareMarkPos(a, b):
linecmp = cmp(a[0], b[0])
if linecmp:
return linecmp
return cmp(a[1], b[1])
def comparePosition(a, b):
return compareMarkPos(a._markpos, b._markpos)
def findNodeJustBefore(target, nodes):
result = None
for node in nodes:
if comparePosition(target, node) < 0:
return result
result = node
return result
def getFirstAncestorWithSectionHeader(entry):
"""Go up ancestors until one with at least one <h2> is found, then return the <h2> nodes"""
for a in domhelpers.getParents(entry)[1:]:
headers = domhelpers.findNodesNamed(a, "h2")
if len(headers) > 0:
return headers
return []
def getSectionNumber(header):
if not header:
return None
return header.childNodes[0].value.strip()
def getSectionReference(entry):
headers = getFirstAncestorWithSectionHeader(entry)
myHeader = findNodeJustBefore(entry, headers)
return getSectionNumber(myHeader)
def index(document, filename, chapterReference):
entries = domhelpers.findElementsWithAttribute(document, "class", "index")
if not entries:
return
i = 0;
for entry in entries:
i += 1
anchor = 'index%02d' % i
if chapterReference:
ref = getSectionReference(entry) or chapterReference
else:
ref = 'link'
indexer.addEntry(filename, anchor, entry.attributes['value'], ref)
entry.nodeName = entry.tagName = entry.endTagName = 'a'
entry.attributes = InsensitiveDict({'name': anchor})
def setIndexLink(template, indexFilename):
if not indexFilename:
return
indexLinks = domhelpers.findElementsWithAttribute(template, "class", "index-link")
for link in indexLinks:
link.nodeName = link.tagName = link.endTagName = 'a'
link.attributes = InsensitiveDict({'href': indexFilename})
def numberDocument(document, chapterNumber):
i = 1
for node in domhelpers.findNodesNamed(document, "h2"):
node.childNodes = [microdom.Text("%s.%d " % (chapterNumber, i))] + node.childNodes
i += 1
def fixRelativeLinks(document, linkrel):
for attr in 'src', 'href':
for node in domhelpers.findElementsWithAttribute(document, attr):
href = node.getAttribute(attr)
if not href.startswith('http') and not href.startswith('/'):
node.setAttribute(attr, linkrel+node.getAttribute(attr))
def setTitle(template, title, chapterNumber):
for nodeList in (domhelpers.findNodesNamed(template, "title"),
domhelpers.findElementsWithAttribute(template, "class",
'title')):
if nodeList:
if numberer.getNumberSections() and chapterNumber:
nodeList[0].childNodes.append(microdom.Text('%s. ' % chapterNumber))
nodeList[0].childNodes.extend(title)
def setAuthors(template, authors):
text = ''
for name, href in authors:
anchor = '<a href="%s">%s</a>' % (href, name)
if (name, href) == authors[-1]:
if len(authors) == 1:
text = anchor
else:
text += 'and ' + anchor
else:
text += anchor + ','
childNodes = microdom.parseString('<span>' + text +'</span>').childNodes
for node in domhelpers.findElementsWithAttribute(template,
"class", 'authors'):
node.childNodes.extend(childNodes)
head = domhelpers.findNodesNamed(template, 'head')[0]
authors = [microdom.parseString('<link rel="author" href="%s" title="%s"/>'
% (href, name)).childNodes[0]
for name, href in authors]
head.childNodes.extend(authors)
def setVersion(template, version):
for node in domhelpers.findElementsWithAttribute(template, "class",
"version"):
node.appendChild(microdom.Text(version))
def getOutputFileName(originalFileName, outputExtension, index=None):
return os.path.splitext(originalFileName)[0]+outputExtension
def munge(document, template, linkrel, dir, fullpath, ext, url, config, outfileGenerator=getOutputFileName):
fixRelativeLinks(template, linkrel)
addMtime(template, fullpath)
removeH1(document)
fixAPI(document, url)
fontifyPython(document)
fixLinks(document, ext)
addPyListings(document, dir)
addHTMLListings(document, dir)
addPlainListings(document, dir)
putInToC(template, generateToC(document))
footnotes(document)
notes(document)
setIndexLink(template, indexer.getIndexFilename())
setVersion(template, config.get('version', ''))
chapterNumber = htmlbook.getNumber(fullpath)
title = domhelpers.findNodesNamed(document, 'title')[0].childNodes
setTitle(template, title, chapterNumber)
if numberer.getNumberSections() and chapterNumber:
numberDocument(document, chapterNumber)
index(document, outfileGenerator(os.path.split(fullpath)[1], ext),
htmlbook.getReference(fullpath))
authors = domhelpers.findNodesNamed(document, 'link')
authors = [(node.getAttribute('title',''), node.getAttribute('href', ''))
for node in authors if node.getAttribute('rel', '') == 'author']
setAuthors(template, authors)
body = domhelpers.findNodesNamed(document, "body")[0]
tmplbody = domhelpers.findElementsWithAttribute(template, "class",
"body")[0]
tmplbody.childNodes = body.childNodes
tmplbody.setAttribute("class", "content")
def parseFileAndReport(filename):
try:
return microdom.parse(open(filename))
except microdom.MismatchedTags, e:
raise process.ProcessingFailure(
"%s:%s: begin mismatched tags <%s>/</%s>" %
(e.begLine, e.begCol, e.got, e.expect),
"%s:%s: end mismatched tags <%s>/</%s>" %
(e.endLine, e.endCol, e.got, e.expect))
except microdom.ParseError, e:
raise process.ProcessingFailure("%s:%s:%s" % (e.line, e.col, e.message))
except IOError, e:
raise process.ProcessingFailure(e.strerror + ", filename was '" + filename + "'")
def makeSureDirectoryExists(filename):
filename = os.path.abspath(filename)
dirname = os.path.dirname(filename)
if (not os.path.exists(dirname)):
os.makedirs(dirname)
def doFile(filename, linkrel, ext, url, templ, options={}, outfileGenerator=getOutputFileName):
doc = parseFileAndReport(filename)
clonedNode = templ.cloneNode(1)
munge(doc, clonedNode, linkrel, os.path.dirname(filename), filename, ext,
url, options, outfileGenerator)
newFilename = outfileGenerator(filename, ext)
makeSureDirectoryExists(newFilename)
clonedNode.writexml(open(newFilename, 'wb'))