nightly_gzip   [plain text]


#! @PYTHON@
# 
# Copyright (C) 1998,1999,2000,2001,2002 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software 
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
"""Re-generate the Pipermail gzip'd archive flat files.

This script should be run nightly from cron.  When run from the command line,
the following usage is understood:

Usage: %(program)s [-v] [-h] [listnames]

Where:
    --verbose
    -v
        print each file as it's being gzip'd

    --help
    -h
        print this message and exit

    listnames
        Optionally, only compress the .txt files for the named lists.  Without 
        this, all archivable lists are processed.

"""

import sys
import os
import time
from stat import *
import getopt

try:
    import gzip
except ImportError:
    gzip = None

import paths
# mm_cfg must be imported before the other modules, due to the side-effect of
# it hacking sys.paths to include site-packages.  Without this, running this
# script from cron with python -S will fail.
from Mailman import mm_cfg
from Mailman import Utils
from Mailman import MailList



program = sys.argv[0]
VERBOSE = 0

def usage(code, msg=''):
    if code:
        fd = sys.stderr
    else:
        fd = sys.stdout
    print >> fd, _(__doc__) % globals()
    if msg:
        print >> fd, msg
    sys.exit(code)



def compress(txtfile):
    if VERBOSE:
        print "gzip'ing:", txtfile
    infp = open(txtfile)
    outfp = gzip.open(txtfile+'.gz', 'wb', 6)
    outfp.write(infp.read())
    outfp.close()
    infp.close()



def main():
    global VERBOSE
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'vh', ['verbose', 'help'])
    except getopt.error, msg:
        usage(1, msg)

    # defaults
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-v', '--verbose'):
            VERBOSE = 1

    # limit to the specified lists?
    if args:
        listnames = args
    else:
        listnames = Utils.list_names()

    # process all the specified lists
    for name in listnames:
        mlist = MailList.MailList(name, lock=0)
        if not mlist.archive:
            continue
        dir = mlist.archive_dir()
        try:
            allfiles = os.listdir(dir)
        except os.error:
            # has the list received any messages?  if not, last_post_time will
            # be zero, so it's not really a bogus archive dir.
            if mlist.last_post_time > 0:
                print 'List', name, 'has a bogus archive_directory:', dir
            continue
        if VERBOSE:
            print 'Processing list:', name
        files = []
        for f in allfiles:
            if f[-4:] <> '.txt':
                continue
            # stat both the .txt and .txt.gz files and append them only if 
            # the former is newer than the latter.
            txtfile = os.path.join(dir, f)
            gzpfile = txtfile + '.gz'
            txt_mtime = os.stat(txtfile)[ST_MTIME]
            try:
                gzp_mtime = os.stat(gzpfile)[ST_MTIME]
            except os.error:
                gzp_mtime = -1
            if txt_mtime > gzp_mtime:
                files.append(txtfile)
        for f in files:
            compress(f)

        

if __name__ == '__main__' and \
   gzip is not None and \
   mm_cfg.ARCHIVE_TO_MBOX in (1, 2) and \
   not mm_cfg.GZIP_ARCHIVE_TXT_FILES:
    # we're only going to run the nightly archiver if messages are archived to
    # the mbox, and the gzip file is not created on demand (i.e. for every
    # individual post).  This is the normal mode of operation.  Also, be sure
    # we can actually import the gzip module!
    omask = os.umask(002)
    try:
        main()
    finally:
        os.umask(omask)