"""MIME-stripping filter for Mailman.
This module scans a message for MIME content, removing those sections whose
MIME types match one of a list of matches. multipart/alternative sections are
replaced by the first non-empty component, and multipart/mixed sections
wrapping only single sections after other processing are replaced by their
contents.
"""
import os
import errno
import tempfile
from os.path import splitext
from email.Iterators import typed_subpart_iterator
from Mailman import mm_cfg
from Mailman import Errors
from Mailman.Message import UserNotification
from Mailman.Queue.sbcache import get_switchboard
from Mailman.Logging.Syslog import syslog
from Mailman.Version import VERSION
from Mailman.i18n import _
from Mailman.Utils import oneline
def process(mlist, msg, msgdata):
if not mlist.filter_content:
return
if msgdata.get('isdigest'):
return
ctype = msg.get_content_type()
mtype = msg.get_content_maintype()
filtertypes = mlist.filter_mime_types
passtypes = mlist.pass_mime_types
if ctype in filtertypes or mtype in filtertypes:
dispose(mlist, msg, msgdata,
_("The message's content type was explicitly disallowed"))
if passtypes and not (ctype in passtypes or mtype in passtypes):
dispose(mlist, msg, msgdata,
_("The message's content type was not explicitly allowed"))
filterexts = mlist.filter_filename_extensions
passexts = mlist.pass_filename_extensions
fext = get_file_ext(msg)
if fext:
if fext in filterexts:
dispose(mlist, msg, msgdata,
_("The message's file extension was explicitly disallowed"))
if passexts and not (fext in passexts):
dispose(mlist, msg, msgdata,
_("The message's file extension was not explicitly allowed"))
numparts = len([subpart for subpart in msg.walk()])
if msg.is_multipart():
prelen = len(msg.get_payload())
filter_parts(msg, filtertypes, passtypes, filterexts, passexts)
postlen = len(msg.get_payload())
if postlen == 0 and prelen > 0:
dispose(mlist, msg, msgdata,
_("After content filtering, the message was empty"))
if mlist.collapse_alternatives:
collapse_multipart_alternatives(msg)
if ctype == 'multipart/alternative':
firstalt = msg.get_payload(0)
reset_payload(msg, firstalt)
changedp = 0
if numparts <> len([subpart for subpart in msg.walk()]):
changedp = 1
if mlist.convert_html_to_plaintext and mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND:
changedp += to_plaintext(msg)
if msg.is_multipart() and len(msg.get_payload()) == 2:
if msg.get_payload(0).get_payload() == '':
useful = msg.get_payload(1)
reset_payload(msg, useful)
changedp = 1
if changedp:
msg['X-Content-Filtered-By'] = 'Mailman/MimeDel %s' % VERSION
def reset_payload(msg, subpart):
payload = subpart.get_payload()
msg.set_payload(payload)
del msg['content-type']
del msg['content-transfer-encoding']
del msg['content-disposition']
del msg['content-description']
msg['Content-Type'] = subpart.get('content-type', 'text/plain')
cte = subpart.get('content-transfer-encoding')
if cte:
msg['Content-Transfer-Encoding'] = cte
cdisp = subpart.get('content-disposition')
if cdisp:
msg['Content-Disposition'] = cdisp
cdesc = subpart.get('content-description')
if cdesc:
msg['Content-Description'] = cdesc
def filter_parts(msg, filtertypes, passtypes, filterexts, passexts):
if not msg.is_multipart():
return 1
payload = msg.get_payload()
prelen = len(payload)
newpayload = []
for subpart in payload:
keep = filter_parts(subpart, filtertypes, passtypes,
filterexts, passexts)
if not keep:
continue
ctype = subpart.get_content_type()
mtype = subpart.get_content_maintype()
if ctype in filtertypes or mtype in filtertypes:
continue
if passtypes and not (ctype in passtypes or mtype in passtypes):
continue
fext = get_file_ext(subpart)
if fext:
if fext in filterexts:
continue
if passexts and not (fext in passexts):
continue
newpayload.append(subpart)
postlen = len(newpayload)
msg.set_payload(newpayload)
if postlen == 0 and prelen > 0:
return 0
return 1
def collapse_multipart_alternatives(msg):
if not msg.is_multipart():
return
newpayload = []
for subpart in msg.get_payload():
if subpart.get_content_type() == 'multipart/alternative':
try:
firstalt = subpart.get_payload(0)
newpayload.append(firstalt)
except (IndexError, TypeError):
pass
else:
newpayload.append(subpart)
msg.set_payload(newpayload)
def to_plaintext(msg):
changedp = 0
for subpart in typed_subpart_iterator(msg, 'text', 'html'):
filename = tempfile.mktemp('.html')
fp = open(filename, 'w')
try:
fp.write(subpart.get_payload(decode=1))
fp.close()
cmd = os.popen(mm_cfg.HTML_TO_PLAIN_TEXT_COMMAND %
{'filename': filename})
plaintext = cmd.read()
rtn = cmd.close()
if rtn:
syslog('error', 'HTML->text/plain error: %s', rtn)
finally:
try:
os.unlink(filename)
except OSError, e:
if e.errno <> errno.ENOENT: raise
del subpart['content-transfer-encoding']
subpart.set_payload(plaintext)
subpart.set_type('text/plain')
changedp = 1
return changedp
def dispose(mlist, msg, msgdata, why):
if mlist.filter_action == 1:
raise Errors.RejectMessage, why
if mlist.filter_action == 2:
listname = mlist.internal_name()
mlist.ForwardMessage(
msg,
text=_("""\
The attached message matched the %(listname)s mailing list's content filtering
rules and was prevented from being forwarded on to the list membership. You
are receiving the only remaining copy of the discarded message.
"""),
subject=_('Content filtered message notification'))
if mlist.filter_action == 3 and \
mm_cfg.OWNERS_CAN_PRESERVE_FILTERED_MESSAGES:
badq = get_switchboard(mm_cfg.BADQUEUE_DIR)
badq.enqueue(msg, msgdata)
raise Errors.DiscardMessage
def get_file_ext(m):
"""
Get filename extension. Caution: some virus don't put filename
in 'Content-Disposition' header.
"""
fext = ''
filename = m.get_filename('') or m.get_param('name', '')
if filename:
fext = splitext(oneline(filename,'utf-8'))[1]
if len(fext) > 1:
fext = fext[1:]
else:
fext = ''
return fext.lower()