icu-svnprops-check.py [plain text]
import sys
import os
import os.path
import re
import getopt
file_types = list()
def parse_auto_props():
aprops = svn_auto_props.splitlines()
for propline in aprops:
if re.match("\s*(#.*)?$", propline): continue
if re.match("\s*\[auto-props\]", propline): continue
if not re.match("\s*[^\s]+\s*=", propline): print "Bad line from autoprops definitions: " + propline
continue
file_type, string_proplist = propline.split("=", 1)
file_type = file_type.strip()
file_type = file_type.replace(".", "\.")
file_type = file_type.replace("*", ".*")
file_type = file_type + "$"
string_proplist = re.split("(?<!;);(?!;)", string_proplist)
proplist = list()
for prop in string_proplist:
if prop.find("=") >= 0:
prop_name, prop_val = prop.split("=", 1)
else:
prop_name, prop_val = prop, ""
prop_name = prop_name.strip()
prop_val = prop_val.strip()
prop_val = prop_val.replace(";;", ";");
proplist.append((prop_name, prop_val))
file_types.append((file_type, proplist))
def runCommand(cmd):
output_file = os.popen(cmd);
output_text = output_file.read();
exit_status = output_file.close();
if exit_status:
print >>sys.stderr, '"', cmd, '" failed. Exiting.'
sys.exit(exit_status)
return output_text
svn_auto_props = runCommand("svn propget svn:auto-props http://source.icu-project.org/repos/icu")
def usage():
print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
def check_utf8(file_name, base_mime_type, actual_mime_type):
f = open(file_name, 'r')
bytes = f.read()
f.close()
file_is_utf8 = True
try:
bytes.decode("UTF-8")
except UnicodeDecodeError:
file_is_utf8 = False
if not file_is_utf8 and actual_mime_type.find("utf-8") >= 0:
print "Error: %s is not valid utf-8, but has a utf-8 mime type." % file_name
return actual_mime_type
if file_is_utf8 and actual_mime_type.find("charset") >=0 and actual_mime_type.find("utf-8") < 0:
print "Warning: %s is valid utf-8, but has a mime-type of %s." % (file_name, actual_mime_type)
if ord(bytes[0]) == 0xef:
if not file_name.endswith(".txt"):
print "Warning: file %s contains a UTF-8 BOM: " % file_name
if actual_mime_type.find("charset=") >= 0:
return actual_mime_type;
return base_mime_type
def main(argv):
fix_problems = False;
try:
opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
except getopt.GetoptError:
print "unrecognized option: " + argv[0]
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
if opt in ("-f", "--fix"):
fix_problems = True
if args:
print "unexpected command line argument"
usage()
sys.exit()
parse_auto_props()
output = runCommand("svn ls -R ");
file_list = output.splitlines()
for f in file_list:
if os.path.isdir(f):
continue
if not os.path.isfile(f):
print "Repository file not in working copy: " + f
continue;
for file_pattern, props in file_types:
if re.match(file_pattern, f):
for propname, propval in props:
actual_propval = runCommand("svn propget --strict " + propname + " " + f)
if propname == "svn:mime-type" and propval.find("text/") == 0:
propval = check_utf8(f, propval, actual_propval)
if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
print "svn propset %s '%s' %s" % (propname, propval, f)
if fix_problems:
os.system("svn propset %s '%s' %s" % (propname, propval, f))
if __name__ == "__main__":
main(sys.argv[1:])