#!/usr/bin/env python # # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # # # mlpatch.py: Run with no arguments for usage import sys, os import sgmllib try: # Python >=3.0 from html.entities import entitydefs from urllib.request import urlopen as urllib_request_urlopen except ImportError: # Python <3.0 from htmlentitydefs import entitydefs from urllib2 import urlopen as urllib_request_urlopen import fileinput CHUNKSIZE = 8 * 1024 class MyParser(sgmllib.SGMLParser): def __init__(self): self.baseclass = sgmllib.SGMLParser self.baseclass.__init__(self) self.entitydefs = entitydefs self.entitydefs["nbsp"] = " " self.inbody = False self.complete_line = False self.discard_gathered() def discard_gathered(self): self.gather_data = False self.gathered_data = "" def noop(self): pass def out(self, data): sys.stdout.write(data) def handle_starttag(self, tag, method, attrs): if not self.inbody: return self.baseclass.handle_starttag(self, tag, method, attrs) def handle_endtag(self, tag, method): if not self.inbody: return self.baseclass.handle_endtag(self, tag, method) def handle_data(self, data): if not self.inbody: return data = data.replace('\n','') if len(data) == 0: return if self.gather_data: self.gathered_data += data else: if self.complete_line: if data[0] in ('+', '-', ' ', '#') \ or data.startswith("Index:") \ or data.startswith("@@ ") \ or data.startswith("======"): # Real new line self.out('\n') else: # Presume that we are wrapped self.out(' ') self.complete_line = False self.out(data) def handle_charref(self, ref): if not self.inbody: return self.baseclass.handle_charref(self, ref) def handle_entityref(self, ref): if not self.inbody: return self.baseclass.handle_entityref(self, ref) def handle_comment(self, comment): if comment == ' body="start" ': self.inbody = True elif comment == ' body="end" ': self.inbody = False def handle_decl(self, data): if not self.inbody: return print("DECL: " + data) def unknown_starttag(self, tag, attrs): if not self.inbody: return print("UNKTAG: %s %s" % (tag, attrs)) def unknown_endtag(self, tag): if not self.inbody: return print("UNKTAG: /%s" % (tag)) def do_br(self, attrs): self.complete_line = True def do_p(self, attrs): if self.complete_line: self.out('\n') self.out(' ') self.complete_line = True def start_a(self, attrs): self.gather_data = True def end_a(self): self.out(self.gathered_data.replace('_at_', '@')) self.discard_gathered() def close(self): if self.complete_line: self.out('\n') self.baseclass.close(self) def main(): if len(sys.argv) == 1: sys.stderr.write( "usage: mlpatch.py dev|users year month msgno > foobar.patch\n" + "example: mlpatch.py dev 2005 01 0001 > issue-XXXX.patch\n" + """ Very annoyingly, the http://svn.haxx.se/ subversion mailing list archives mangle inline patches, and provide no raw message download facility (other than for an entire month's email as an mbox). So, I wrote this script, to demangle them. It's not perfect, as it has to guess about whitespace, but it does an acceptable job.\n""") sys.exit(0) elif len(sys.argv) != 5: sys.stderr.write("error: mlpatch.py: Bad parameters - run with no " + "parameters for usage\n") sys.exit(1) else: list, year, month, msgno = sys.argv[1:] url = "http://svn.haxx.se/" \ + "%(list)s/archive-%(year)s-%(month)s/%(msgno)s.shtml" % locals() print("MsgUrl: " + url) msgfile = urllib_request_urlopen(url) p = MyParser() buffer = msgfile.read(CHUNKSIZE) while buffer: p.feed(buffer) buffer = msgfile.read(CHUNKSIZE) p.close() msgfile.close() if __name__ == '__main__': main()