transcheck   [plain text]


#! @PYTHON@
#
# transcheck - (c) 2002 by Simone Piunno <pioppo@ferrara.linux.it>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the version 2.0 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

"""
Check a given Mailman translation, making sure that variables and
tags referenced in translation are the same variables and tags in
the original templates and catalog.

Usage:

cd $MAILMAN_DIR
%(program)s [-q] <lang>

Where <lang> is your country code (e.g. 'it' for Italy) and -q is
to ask for a brief summary.
"""

import sys
import re
import os
import getopt

import paths
from Mailman.i18n import _

program = sys.argv[0]



def usage(code, msg=''):
    if code:
        fd = sys.stderr
    else:
        fd = sys.stdout
    print >> fd, _(__doc__)
    if msg:
        print >> fd, msg
    sys.exit(code)



class TransChecker:
    "check a translation comparing with the original string"
    def __init__(self, regexp, escaped=None):
        self.dict = {}
        self.errs = []
        self.regexp = re.compile(regexp)
        self.escaped = None
        if escaped:
            self.escaped = re.compile(escaped)

    def checkin(self, string):
        "scan a string from the original file"
        for key in self.regexp.findall(string):
            if self.escaped and self.escaped.match(key):
                continue
            if self.dict.has_key(key):
                self.dict[key] += 1
            else:
                self.dict[key] = 1

    def checkout(self, string):
        "scan a translated string"
        for key in self.regexp.findall(string):
            if self.escaped and self.escaped.match(key):
                continue
            if self.dict.has_key(key):
                self.dict[key] -= 1
            else:
                self.errs.append(
                    "%(key)s was not found" %
                    { 'key' : key }
                )

    def computeErrors(self):
        "check for differences between checked in and checked out"
        for key in self.dict.keys():
            if self.dict[key] < 0:
                self.errs.append(
                    "Too much %(key)s" %
                    { 'key'  : key }
                )
            if self.dict[key] > 0:
                self.errs.append(
                    "Too few %(key)s" %
                    { 'key'  : key }
                )
        return self.errs

    def status(self):
        if self.errs:
            return "FAILED"
        else:
            return "OK"

    def errorsAsString(self):
        msg = ""
        for err in self.errs:
            msg += " - %(err)s" % { 'err': err }
        return msg

    def reset(self):
        self.dict = {}
        self.errs = []



class POParser:
    "parse a .po file extracting msgids and msgstrs"
    def __init__(self, filename=""):
        self.status = 0
        self.files = []
        self.msgid = ""
        self.msgstr = ""
        self.line = 1
        self.f = None
        self.esc = { "n": "\n", "r": "\r", "t": "\t" }
        if filename:
            self.f = open(filename)

    def open(self, filename):
        self.f = open(filename)

    def close(self):
        self.f.close()

    def parse(self):
        """States table for the finite-states-machine parser:
            0  idle
            1  filename-or-comment
            2  msgid
            3  msgstr
            4  end
        """
        # each time we can safely re-initialize those vars
        self.files = []
        self.msgid = ""
        self.msgstr = ""


        # can't continue if status == 4, this is a dead status
        if self.status == 4:
            return 0

        while 1:
            # continue scanning, char-by-char
            c = self.f.read(1)
            if not c:
                # EOF -> maybe we have a msgstr to save?
                self.status = 4
                if self.msgstr:
                    return 1
                else:
                    return 0

            # keep the line count up-to-date
            if c == "\n":
                self.line += 1

            # a pound was detected the previous char...
            if self.status == 1:
                if c == ":":
                    # was a line of filenames
                    row = self.f.readline()
                    self.files += row.split()
                    self.line += 1
                elif c == "\n":
                    # was a single pount on the line
                    pass
                else:
                    # was a comment... discard
                    self.f.readline()
                    self.line += 1
                # in every case, we switch to idle status
                self.status = 0;
                continue

            # in idle status we search for a '#' or for a 'm'
            if self.status == 0:
                if   c == "#":
                    # this could be a comment or a filename
                    self.status = 1;
                    continue
                elif c == "m":
                    # this should be a msgid start...
                    s = self.f.read(4)
                    assert s == "sgid"
                    # so now we search for a '"'
                    self.status = 2
                    continue
                # in idle only those other chars are possibile
                assert c in [ "\n", " ", "\t" ]

            # searching for the msgid string
            if self.status == 2:
                if c == "\n":
                    # a double LF is not possible here
                    c = self.f.read(1)
                    assert c != "\n"
                if c == "\"":
                    # ok, this is the start of the string,
                    # now search for the end
                    while 1:
                        c = self.f.read(1)
                        if not c:
                            # EOF, bailout
                            self.status = 4
                            return 0
                        if c == "\\":
                            # a quoted char...
                            c = self.f.read(1)
                            if self.esc.has_key(c):
                                self.msgid += self.esc[c]
                            else:
                                self.msgid += c
                            continue
                        if c == "\"":
                            # end of string found
                            break
                        # a normal char, add it
                        self.msgid += c
                if c == "m":
                    # this should be a msgstr identifier
                    s = self.f.read(5)
                    assert s == "sgstr"
                    # ok, now search for the msgstr string
                    self.status = 3

            # searching for the msgstr string
            if self.status == 3:
                if c == "\n":
                    # a double LF is the end of the msgstr!
                    c = self.f.read(1)
                    if c == "\n":
                        # ok, time to go idle and return
                        self.status = 0
                        self.line += 1
                        return 1
                if c == "\"":
                    # start of string found
                    while 1:
                        c = self.f.read(1)
                        if not c:
                            # EOF, bail out
                            self.status = 4
                            return 1
                        if c == "\\":
                            # a quoted char...
                            c = self.f.read(1)
                            if self.esc.has_key(c):
                                self.msgid += self.esc[c]
                            else:
                                self.msgid += c
                            continue
                        if c == "\"":
                            # end of string
                            break
                        # a normal char, add it
                        self.msgstr += c




def check_file(translatedFile, originalFile, html=0, quiet=0):
    """check a translated template against the original one
       search also <MM-*> tags if html is not zero"""

    if html:
        c = TransChecker("(%%|%\([^)]+\)[0-9]*[sd]|</?MM-[^>]+>)", "^%%$")
    else:
        c = TransChecker("(%%|%\([^)]+\)[0-9]*[sd])", "^%%$")

    try:
        f = open(originalFile)
    except IOError:
        if not quiet:
            print " - Can'open original file " + originalFile
        return 1

    while 1:
        line = f.readline()
        if not line: break
        c.checkin(line)

    f.close()

    try:
        f = open(translatedFile)
    except IOError:
        if not quiet:
            print " - Can'open translated file " + translatedFile
        return 1

    while 1:
        line = f.readline()
        if not line: break
        c.checkout(line)

    f.close()

    n = 0
    msg = ""
    for desc in c.computeErrors():
        n +=1
        if not quiet:
            print " - %(desc)s" % { 'desc': desc }
    return n



def check_po(file, quiet=0):
    "scan the po file comparing msgids with msgstrs"
    n = 0
    p = POParser(file)
    c = TransChecker("(%%|%\([^)]+\)[0-9]*[sdu]|%[0-9]*[sdu])", "^%%$")
    while p.parse():
        if p.msgstr:
            c.reset()
            c.checkin(p.msgid)
            c.checkout(p.msgstr)
            for desc in c.computeErrors():
                n += 1
                if not quiet:
                    print " - near line %(line)d %(file)s: %(desc)s" % {
                        'line': p.line,
                        'file': p.files,
                        'desc': desc
                    }
    p.close()
    return n


def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'qh', ['quiet', 'help'])
    except getopt.error, msg:
        usage(1, msg)

    quiet = 0
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-q', '--quiet'):
            quiet = 1

    if len(args) <> 1:
        usage(1)

    lang = args[0]

    isHtml = re.compile("\.html$");
    isTxt = re.compile("\.txt$");

    numerrors = 0
    numfiles = 0
    try:
        files = os.listdir("templates/" + lang + "/")
    except:
        print "can't open templates/%s/" % lang
    for file in files:
        fileEN = "templates/en/" + file
        fileIT = "templates/" + lang + "/" + file
        errlist = []
        if isHtml.search(file):
            if not quiet:
                print "HTML checking " + fileIT + "... "
            n = check_file(fileIT, fileEN, html=1, quiet=quiet)
            if n:
                numerrors += n
                numfiles += 1
        elif isTxt.search(file):
            if not quiet:
                print "TXT  checking " + fileIT + "... "
            n = check_file(fileIT, fileEN, html=0, quiet=quiet)
            if n:
                numerrors += n
                numfiles += 1

        else:
            continue

    file = "messages/" + lang + "/LC_MESSAGES/mailman.po"
    if not quiet:
        print "PO   checking " + file + "... "
    n = check_po(file, quiet=quiet)
    if n:
        numerrors += n
        numfiles += 1

    if quiet:
        print "%(errs)u warnings in %(files)u files" % {
            'errs':  numerrors,
            'files': numfiles
        }


if __name__ == '__main__':
    main()