package Mail::SpamAssassin::Plugin::MIMEEval;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Locales;
use Mail::SpamAssassin::Constants qw(:sa CHARSETS_LIKELY_TO_FP_AS_CAPS);
use strict;
use warnings;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->register_eval_rule("check_for_mime");
$self->register_eval_rule("check_for_mime_html");
$self->register_eval_rule("check_for_mime_html_only");
$self->register_eval_rule("check_mime_multipart_ratio");
$self->register_eval_rule("check_msg_parse_flags");
$self->register_eval_rule("check_for_faraway_charset");
$self->register_eval_rule("check_for_uppercase");
$self->register_eval_rule("check_ma_non_text");
$self->register_eval_rule("check_base64_length");
return $self;
}
sub are_more_high_bits_set {
my ($self, $str) = @_;
my $numhis = () = ($str =~ /[\200-\377]/g);
my $numlos = length($str) - $numhis;
($numlos <= $numhis && $numhis > 3);
}
sub check_for_faraway_charset {
my ($self, $pms, $body) = @_;
my $type = $pms->get('Content-Type');
my @locales = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
return 0 if grep { $_ eq "all" } @locales;
$type = get_charset_from_ct_line ($type);
if (defined $type &&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales
($type, @locales))
{
$body = join("\n", @$body);
if ($self->are_more_high_bits_set ($body)) {
return 1;
}
}
0;
}
sub check_for_mime {
my ($self, $pms, undef, $test) = @_;
$self->_check_attachments($pms) unless exists $pms->{$test};
return $pms->{$test};
}
sub check_for_mime_html {
my ($self, $pms) = @_;
my $ctype = $pms->get('Content-Type');
return 1 if (defined($ctype) && $ctype =~ m@^text/html@i);
$self->_check_attachments($pms) unless exists $pms->{mime_body_html_count};
return ($pms->{mime_body_html_count} > 0);
}
sub check_for_mime_html_only {
my ($self, $pms) = @_;
my $ctype = $pms->get('Content-Type');
return 1 if (defined($ctype) && $ctype =~ m@^text/html@i);
$self->_check_attachments($pms) unless exists $pms->{mime_body_html_count};
return ($pms->{mime_body_html_count} > 0 &&
$pms->{mime_body_text_count} == 0);
}
sub check_mime_multipart_ratio {
my ($self, $pms, undef, $min, $max) = @_;
$self->_check_attachments($pms) unless exists $pms->{mime_multipart_alternative};
return ($pms->{mime_multipart_ratio} >= $min &&
$pms->{mime_multipart_ratio} < $max);
}
sub _check_mime_header {
my ($self, $pms, $ctype, $cte, $cd, $charset, $name) = @_;
$charset ||= '';
if ($ctype eq 'text/html') {
$pms->{mime_body_html_count}++;
}
elsif ($ctype =~ m@^text@i) {
$pms->{mime_body_text_count}++;
}
if ($cte =~ /base64/) {
$pms->{mime_base64_count}++;
}
elsif ($cte =~ /quoted-printable/) {
$pms->{mime_qp_count}++;
}
if ($cd && $cd =~ /attachment/) {
$pms->{mime_attachment}++;
}
if ($ctype =~ /^text/ &&
$cte =~ /base64/ &&
$charset !~ /(?:utf-8|big5)/ && !($cd && $cd =~ /^(?:attachment|inline)/))
{
$pms->{mime_base64_encoded_text} = 1;
}
if ($charset =~ /iso-\S+-\S+\b/i &&
$charset !~ /iso-(?:8859-\d{1,2}|2022-(?:jp|kr))\b/)
{
$pms->{mime_bad_iso_charset} = 1;
}
if ($charset =~ /[a-z]/i) {
if (defined $pms->{mime_html_charsets}) {
$pms->{mime_html_charsets} .= " ".$charset;
} else {
$pms->{mime_html_charsets} = $charset;
}
if (! $pms->{mime_faraway_charset}) {
my @l = Mail::SpamAssassin::Util::get_my_locales($self->{main}->{conf}->{ok_locales});
if (!(grep { $_ eq "all" } @l) &&
!Mail::SpamAssassin::Locales::is_charset_ok_for_locales($charset, @l))
{
$pms->{mime_faraway_charset} = 1;
}
}
}
}
sub _check_attachments {
my ($self, $pms) = @_;
my $where = -1; my $qp_bytes = 0; my $qp_count = 0; my @part_bytes; my @part_type;
my $part = -1;
$pms->{mime_checked_attachments} = 1;
$pms->{mime_base64_blanks} = 0;
$pms->{mime_base64_count} = 0;
$pms->{mime_base64_encoded_text} = 0;
$pms->{mime_body_html_count} = 0;
$pms->{mime_body_text_count} = 0;
$pms->{mime_faraway_charset} = 0;
$pms->{mime_missing_boundary} = 0;
$pms->{mime_multipart_alternative} = 0;
$pms->{mime_multipart_ratio} = 1.0;
$pms->{mime_qp_count} = 0;
$pms->{mime_qp_long_line} = 0;
$pms->{mime_qp_ratio} = 0;
foreach my $p ($pms->{msg}->find_parts(qr/./)) {
my ($ctype, $boundary, $charset, $name) = Mail::SpamAssassin::Util::parse_content_type($p->get_header("content-type"));
if ($ctype eq 'multipart/alternative') {
$pms->{mime_multipart_alternative} = 1;
}
my $cte = $p->get_header('Content-Transfer-Encoding') || '';
chomp($cte = defined($cte) ? lc $cte : "");
my $cd = $p->get_header('Content-Disposition') || '';
chomp($cd = defined($cd) ? lc $cd : "");
$charset = lc $charset if ($charset);
$name = lc $name if ($name);
$self->_check_mime_header($pms, $ctype, $cte, $cd, $charset, $name);
if (! $p->is_leaf()) {
next;
}
$part++;
$part_type[$part] = $ctype;
$part_bytes[$part] = 0 if $cd !~ /attachment/;
my $previous = '';
foreach (@{$p->raw()}) {
if ($cte =~ /base64/i) {
if ($previous =~ /^\s*$/ && /^\s*$/) {
$pms->{mime_base64_blanks} = 1;
}
}
if ($pms->{mime_multipart_alternative} && $cd !~ /attachment/ &&
($ctype eq 'text/plain' || $ctype eq 'text/html')) {
$part_bytes[$part] += length;
}
if ($where != 1 && $cte eq "quoted-printable" && ! /^SPAM: /) {
if (length > 77) {
$pms->{mime_qp_long_line} = 1;
}
$qp_bytes += length;
if (index($_, '=') != -1) {
my $qp = () = m/=(?:09|3[0-9ABCEF]|[2456][0-9A-F]|7[0-9A-E])/g;
if ($qp) {
$qp_count += $qp;
my ($trailing) = m/((?:=09|=20)+)\s*$/g;
if ($trailing) {
$qp_count -= (length($trailing) / 3);
}
}
}
}
$previous = $_;
}
}
if ($qp_bytes) {
$pms->{mime_qp_ratio} = $qp_count / $qp_bytes;
}
if ($pms->{mime_multipart_alternative}) {
my $text;
my $html;
for (my $i = $part; $i >= 0; $i--) {
next if !defined $part_bytes[$i];
if (!defined($html) && $part_type[$i] eq 'text/html') {
$html = $part_bytes[$i];
}
elsif (!defined($text) && $part_type[$i] eq 'text/plain') {
$text = $part_bytes[$i];
}
last if (defined($html) && defined($text));
}
if (defined($text) && defined($html) && $html > 0) {
$pms->{mime_multipart_ratio} = ($text / $html);
}
}
foreach my $val (values %{$pms->{msg}->{mime_boundary_state}}) {
if ($val != 0) {
$pms->{mime_missing_boundary} = 1;
last;
}
}
}
sub check_msg_parse_flags {
my($self, $pms, $type, $type2) = @_;
$type = $type2 if ref($type);
return defined $pms->{msg}->{$type};
}
sub check_for_uppercase {
my ($self, $pms, $body, $min, $max) = @_;
local ($_);
if (exists $pms->{uppercase}) {
return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
}
if ($self->body_charset_is_likely_to_fp($pms)) {
$pms->{uppercase} = 0; return 0;
}
my $len = 0;
my $lower = 0;
my $upper = 0;
foreach (@{$body}) {
next unless /\S\s+\S/;
next if /^(?:[A-Za-z0-9+\/=]{60,76} ){2}/;
my $line = $_;
$line =~ s/\x1b\$B.*\x1b\(B//gs;
$len += length($line);
$lower += ($line =~ tr/a-z0-9//d);
$upper += ($line =~ tr/A-Z//);
}
if ($len < 200) {
$pms->{uppercase} = 0;
return 0;
}
if (($upper + $lower) == 0) {
$pms->{uppercase} = 0;
} else {
$pms->{uppercase} = ($upper / ($upper + $lower)) * 100;
}
return ($pms->{uppercase} > $min && $pms->{uppercase} <= $max);
}
sub body_charset_is_likely_to_fp {
my ($self, $pms) = @_;
$self->_check_attachments($pms) unless exists $pms->{mime_checked_attachments};
my @charsets = ();
my $type = $pms->get('Content-Type');
$type = get_charset_from_ct_line ($type);
if (defined $type) {
push (@charsets, $type);
}
if (defined $pms->{mime_html_charsets}) {
push (@charsets, split(' ', $pms->{mime_html_charsets}));
}
my $CHARSETS_LIKELY_TO_FP_AS_CAPS = CHARSETS_LIKELY_TO_FP_AS_CAPS;
foreach my $charset (@charsets) {
if ($charset =~ /^${CHARSETS_LIKELY_TO_FP_AS_CAPS}$/) {
return 1;
}
}
return 0;
}
sub get_charset_from_ct_line {
my $type = shift;
if ($type =~ /charset="([^"]+)"/i) { return $1; }
if ($type =~ /charset='([^']+)'/i) { return $1; }
if ($type =~ /charset=(\S+)/i) { return $1; }
return undef;
}
# came up on the users@ list, look for multipart/alternative parts which
# include non-text parts -- skip certain types which occur normally in ham
sub check_ma_non_text {
my($self, $pms) = @_;
foreach my $map ($pms->{msg}->find_parts(qr@^multipart/alternative$@i)) {
foreach my $p ($map->find_parts(qr/./, 1, 0)) {
next if (lc $p->{'type'} eq 'multipart/related');
next if (lc $p->{'type'} eq 'application/rtf');
next if ($p->{'type'} =~ m@^text/@i);
return 1;
}
}
return 0;
}
sub check_base64_length {
my $self = shift;
my $pms = shift;
shift; # body array, unnecessary
my $min = shift;
my $max = shift;
if (!defined $pms->{base64_length}) {
$pms->{base64_length} = $self->_check_base64_length($pms->{msg});
}
return 0 if (defined $max && $pms->{base64_length} > $max);
return $pms->{base64_length} >= $min;
}
sub _check_base64_length {
my $self = shift;
my $msg = shift;
my $result = 0;
foreach my $p ($msg->find_parts(qr@.@, 1)) {
my $ctype=
Mail::SpamAssassin::Util::parse_content_type($p->get_header('content-type'));
# FPs from Google Calendar invites, etc.
# perhaps just limit to test, and image?
next if ($ctype eq 'application/ics');
my $cte = lc $p->get_header('content-transfer-encoding') || '';
next if ($cte !~ /^base64$/);
foreach my $l ( @{$p->raw()} ) {
my $len = length $l;
$result = $len if ($len > $result);
}
}
return $result;
}
1;