icu-file-utf8-check.py [plain text]
from __future__ import print_function
import sys
import os
import os.path
import re
import getopt
def runCommand(cmd):
output_file = os.popen(cmd);
output_text = output_file.read();
exit_status = output_file.close();
if exit_status:
print('"', cmd, '" failed. Exiting.', file=sys.stderr)
sys.exit(exit_status)
return output_text
def usage():
print("usage: " + sys.argv[0] + " [-h | --help]")
def check_file(file_name, is_source):
f = open(file_name, 'rb')
bytes = f.read()
f.close()
if is_source:
try:
bytes.decode("UTF-8")
except UnicodeDecodeError:
print("Error: %s is a source code file but contains non-utf-8 bytes." % file_name)
if bytes[0] == 0xef:
if not (file_name.endswith(".txt") or file_name.endswith(".sln")
or file_name.endswith(".targets")
or ".vcxproj" in file_name):
print("Warning: file %s contains a UTF-8 BOM: " % file_name)
return
def main(argv):
try:
opts, args = getopt.getopt(argv, "h", ("help"))
except getopt.GetoptError:
print("unrecognized option: " + argv[0])
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
if args:
print("unexpected command line argument")
usage()
sys.exit()
output = runCommand("git ls-files ");
file_list = output.splitlines()
source_file_re = re.compile(".*((?:\\.c$)|(?:\\.cpp$)|(?:\\.h$)|(?:\\.java$))")
for f in file_list:
if os.path.isdir(f):
print("Skipping dir " + f)
continue
if not os.path.isfile(f):
print("Repository file not in working copy: " + f)
continue;
source_file = source_file_re.match(f)
check_file(f, source_file)
if __name__ == "__main__":
main(sys.argv[1:])