SpamDetect.py [plain text]

# Copyright (C) 1998-2007 by the Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
# USA.

"""Do more detailed spam detection.

This module hard codes site wide spam detection.  By hacking the
KNOWN_SPAMMERS variable, you can set up more regular expression matches
against message headers.  If spam is detected the message is discarded
immediately.

TBD: This needs to be made more configurable and robust.
"""

import re
from cStringIO import StringIO

from email.Generator import Generator

from Mailman import mm_cfg
from Mailman import Errors
from Mailman import i18n
from Mailman.Handlers.Hold import hold_for_approval

try:
    True, False
except NameError:
    True = 1
    False = 0

# First, play footsie with _ so that the following are marked as translated,
# but aren't actually translated until we need the text later on.
def _(s):
    return s



class SpamDetected(Errors.DiscardMessage):
    """The message contains known spam"""

class HeaderMatchHold(Errors.HoldMessage):
    reason = _('The message headers matched a filter rule')


# And reset the translator
_ = i18n._



class Tee:
    def __init__(self, outfp_a, outfp_b):
        self._outfp_a = outfp_a
        self._outfp_b = outfp_b

    def write(self, s):
        self._outfp_a.write(s)
        self._outfp_b.write(s)


# Class to capture the headers separate from the message body
class HeaderGenerator(Generator):
    def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
        Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
        self._headertxt = ''

    def _write_headers(self, msg):
        sfp = StringIO()
        oldfp = self._fp
        self._fp = Tee(oldfp, sfp)
        try:
            Generator._write_headers(self, msg)
        finally:
            self._fp = oldfp
        self._headertxt = sfp.getvalue()

    def header_text(self):
        return self._headertxt



def process(mlist, msg, msgdata):
    if msgdata.get('approved'):
        return
    # First do site hard coded header spam checks
    for header, regex in mm_cfg.KNOWN_SPAMMERS:
        cre = re.compile(regex, re.IGNORECASE)
        for value in msg.get_all(header, []):
            mo = cre.search(value)
            if mo:
                # we've detected spam, so throw the message away
                raise SpamDetected
    # Now do header_filter_rules
    # TK: Collect headers in sub-parts because attachment filename
    # extension may be a clue to possible virus/spam.
    headers = ''
    for p in msg.walk():
        g = HeaderGenerator(StringIO())
        g.flatten(p)
        headers += g.header_text()
    # Now reshape headers (remove extra CR and connect multiline).
    headers = re.sub('\n+', '\n', headers)
    headers = re.sub('\n\s', ' ', headers)
    for patterns, action, empty in mlist.header_filter_rules:
        if action == mm_cfg.DEFER:
            continue
        for pattern in patterns.splitlines():
            if pattern.startswith('#'):
                continue
            # ignore 'empty' patterns
            if not pattern.strip():
                continue
            if re.search(pattern, headers, re.IGNORECASE|re.MULTILINE):
                if action == mm_cfg.DISCARD:
                    raise Errors.DiscardMessage
                if action == mm_cfg.REJECT:
                    if msgdata.get('toowner'):
                        # Don't send rejection notice if addressed to '-owner'
                        # because it may trigger a loop of notices if the
                        # sender address is forged.  We just discard it here.
                        raise Errors.DiscardMessage
                    raise Errors.RejectMessage(
                        _('Message rejected by filter rule match'))
                if action == mm_cfg.HOLD:
                    if msgdata.get('toowner'):
                        # Don't hold '-owner' addressed message.  We just
                        # pass it here but list-owner can set this to be
                        # discarded on the GUI if he wants.
                        return
                    hold_for_approval(mlist, msg, msgdata, HeaderMatchHold)
                if action == mm_cfg.ACCEPT:
                    return