rb-archfix   [plain text]


#! @PYTHON@
#
# Copyright (C) 2003 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

# Author: Richard Barrett

"""Reduce disk space usage for Pipermail archives.

Usage: %(PROGRAM)s [options] file ...

Where options are:
    -h / --help
        Print this help message and exit.

Only use this to 'fix' archive -article database files that have been written
with Mailman 2.1.3 or earlier and have html_body attributes in them.  These
attributes can cause huge amounts of memory bloat and impact performance for
high activity lists, particularly those where large text postings are made to
them.

Example:

%% ls -1 archives/private/*/database/*-article | xargs %(PROGRAM)s

You should run `bin/check_perms -f' after running this script.

You will probably want to delete the -article.bak files created by this script
when you are satisfied with the results.

This script is provided for convenience purposes only.  It isn't supported.
"""

import os
import sys
import getopt
import marshal
import cPickle as pickle

# Required to get the right classes for unpickling
import paths
from Mailman.i18n import _

PROGRAM = sys.argv[0]



def usage(code, msg=''):
    if code:
        fd = sys.stderr
    else:
        fd = sys.stdout
    print >> fd, _(__doc__)
    if msg:
        print >> fd, msg
    sys.exit(code)



def main():
    # get command line arguments
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'h', ['help'])
    except getopt.error, msg:
        usage(1, msg)

    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)

    for filename in args:
        print 'processing:', filename
        fp = open(filename, 'rb')
        d = marshal.load(fp)
        fp.close()
        newd = {}
        for key, pckstr in d.items():
            article = pickle.loads(pckstr)
            try:
                del article.html_body
            except AttributeError:
                pass
            newd[key] = pickle.dumps(article)
        fp = open(filename + '.tmp', 'wb')
        marshal.dump(newd, fp)
        fp.close()
        os.rename(filename, filename + '.bak')
        os.rename(filename + '.tmp', filename)

    print 'You should now run "bin/check_perms -f"'



if __name__ == '__main__':
    main()