=head1 NAME
Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam)
=head1 SYNOPSIS
my $spamtest = new Mail::SpamAssassin ({
'rules_filename' => '/etc/spamassassin.rules',
'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs'
});
my $mail = $spamtest->parse();
my $status = $spamtest->check ($mail);
if ($status->is_spam()) {
$status->rewrite_mail ();
}
...
=head1 DESCRIPTION
The Mail::SpamAssassin C<check()> method returns an object of this
class. This object encapsulates all the per-message state.
=head1 METHODS
=over 4
=cut
package Mail::SpamAssassin::PerMsgStatus;
use strict;
use bytes;
use Carp;
use Text::Wrap ();
use Mail::SpamAssassin::Constants qw(:sa);
use Mail::SpamAssassin::EvalTests;
use Mail::SpamAssassin::AutoWhitelist;
use Mail::SpamAssassin::Conf;
use Mail::SpamAssassin::Util;
use vars qw{
@ISA
};
@ISA = qw();
sub new {
my $class = shift;
$class = ref($class) || $class;
my ($main, $msg, $opts) = @_;
my $self = {
'main' => $main,
'msg' => $msg,
'score' => 0,
'test_logs' => '',
'test_names_hit' => [ ],
'subtest_names_hit' => [ ],
'tests_already_hit' => { },
'hdr_cache' => { },
'rule_errors' => 0,
'disable_auto_learning' => 0,
'auto_learn_status' => undef,
'conf' => $main->{conf},
};
if (defined $opts && $opts->{disable_auto_learning}) {
$self->{disable_auto_learning} = 1;
}
if ($self->{main}->{save_pattern_hits}) {
$self->{save_pattern_hits} = 1;
$self->{pattern_hits} = { };
}
bless ($self, $class);
$self;
}
=item $status->check ()
Runs the SpamAssassin rules against the message pointed to by the object.
=cut
sub check {
my ($self) = @_;
local ($_);
$self->{learned_points} = 0;
$self->{body_only_points} = 0;
$self->{head_only_points} = 0;
$self->{score} = 0;
$self->{main}->call_plugins ("check_start", { permsgstatus => $self });
$self->{msg}->delete_header('X-Spam-.*');
my $set = $self->{conf}->get_score_set();
if (($set & 2) == 0 && $self->{main}->{bayes_scanner}->is_scan_available()) {
dbg("debug: Scoreset $set but Bayes is available, switching scoresets");
$self->{conf}->set_score_set ($set|2);
}
$self->extract_message_metadata();
{
$self->run_rbl_eval_tests ($self->{conf}->{rbl_evals});
my $needs_dnsbl_harvest_p = 1;
my $decoded = $self->get_decoded_stripped_body_text_array();
$self->{html} = $self->{msg}->{metadata}->{html};
my $bodytext = $self->get_decoded_body_text_array();
my $fulltext = $self->{msg}->get_pristine();
my @uris = $self->get_uri_list();
foreach my $priority (sort { $a <=> $b } keys %{$self->{conf}->{priorities}}) {
next unless ($self->{conf}->{priorities}->{$priority} > 0);
dbg("Running tests for priority: $priority");
if ($priority >= HARVEST_DNSBL_PRIORITY && $needs_dnsbl_harvest_p) {
$self->harvest_dnsbl_queries();
$needs_dnsbl_harvest_p = 0;
$self->rbl_finish();
$self->{main}->call_plugins ("check_post_dnsbl", { permsgstatus => $self });
}
if ($priority >= META_TEST_MIN_PRIORITY) {
$self->do_meta_tests($priority);
}
$self->do_head_tests($priority);
$self->do_head_eval_tests($priority);
$self->do_body_tests($priority, $decoded);
$self->do_body_uri_tests($priority, @uris);
$self->do_body_eval_tests($priority, $decoded);
$self->{main}->call_plugins ("check_tick", { permsgstatus => $self });
$self->do_rawbody_tests($priority, $bodytext);
$self->do_rawbody_eval_tests($priority, $bodytext);
$self->do_full_tests($priority, \$fulltext);
$self->do_full_eval_tests($priority, \$fulltext);
}
if ($needs_dnsbl_harvest_p) {
$self->harvest_dnsbl_queries();
$self->rbl_finish();
$self->{main}->call_plugins ("check_post_dnsbl", { permsgstatus => $self });
}
delete $self->{current_rule_name};
undef $decoded;
undef $bodytext;
undef $fulltext;
$self->learn();
$self->{main}->call_plugins ("check_post_learn", { permsgstatus => $self });
}
$self->delete_fulltext_tmpfile();
$self->{hdr_cache} = { };
$self->{score} = (sprintf "%0.3f", $self->{score}) + 0;
dbg ("is spam? score=".$self->{score}.
" required=".$self->{conf}->{required_score});
dbg ("tests=".$self->get_names_of_tests_hit());
dbg ("subtests=".$self->get_names_of_subtests_hit());
$self->{is_spam} = $self->is_spam();
$self->{main}->call_plugins ("check_end", { permsgstatus => $self });
1;
}
=item $status->learn()
After a mail message has been checked, this method can be called. If the score
is outside a certain range around the threshold, ie. if the message is judged
more-or-less definitely spam or definitely non-spam, it will be fed into
SpamAssassin's learning systems (currently the naive Bayesian classifier),
so that future similar mails will be caught.
=cut
sub learn {
my ($self) = @_;
if (!$self->{conf}->{bayes_auto_learn} ||
!$self->{conf}->{use_bayes} ||
$self->{disable_auto_learning})
{
$self->{auto_learn_status} = "disabled";
return;
}
my $min = $self->{conf}->{bayes_auto_learn_threshold_nonspam};
my $max = $self->{conf}->{bayes_auto_learn_threshold_spam};
my $score = $self->_get_autolearn_points();
dbg ("auto-learn? ham=$min, spam=$max, ".
"body-points=".$self->{body_only_points}.", ".
"head-points=".$self->{head_only_points}.", ".
"learned-points=".$self->{learned_points});
my $isspam;
if ($score < $min) {
$isspam = 0;
} elsif ($score >= $max) {
$isspam = 1;
} else {
dbg ("auto-learn? no: inside auto-learn thresholds, not considered ham or spam");
$self->{auto_learn_status} = "no";
return;
}
my $learner_said_ham_points = -1.0;
my $learner_said_spam_points = 1.0;
if ($isspam) {
my $required_body_points = 3;
my $required_head_points = 3;
if ($self->{body_only_points} < $required_body_points) {
$self->{auto_learn_status} = "no";
dbg ("auto-learn? no: scored as spam but too few body points (".
$self->{body_only_points}." < ".$required_body_points.")");
return;
}
if ($self->{head_only_points} < $required_head_points) {
$self->{auto_learn_status} = "no";
dbg ("auto-learn? no: scored as spam but too few head points (".
$self->{head_only_points}." < ".$required_head_points.")");
return;
}
if ($self->{learned_points} < $learner_said_ham_points) {
$self->{auto_learn_status} = "no";
dbg ("auto-learn? no: scored as spam but learner indicated ham (".
$self->{learned_points}." < ".$learner_said_ham_points.")");
return;
}
} else {
if ($self->{learned_points} > $learner_said_spam_points) {
$self->{auto_learn_status} = "no";
dbg ("auto-learn? no: scored as ham but learner indicated spam (".
$self->{learned_points}." > ".$learner_said_spam_points.")");
return;
}
}
dbg ("auto-learn? yes, ".($isspam?"spam ($score > $max)":"ham ($score < $min)"));
$self->{main}->call_plugins ("autolearn", {
permsgstatus => $self,
isspam => $isspam
});
eval {
my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0);
$learnstatus->finish();
if ($learnstatus->did_learn()) {
$self->{auto_learn_status} = $isspam ? "spam" : "ham";
}
$self->{main}->finish_learner();
if (exists $self->{main}->{bayes_scanner}) {
$self->{main}->{bayes_scanner}->sanity_check_is_untied();
}
};
if ($@) {
dbg ("auto-learning failed: $@");
$self->{auto_learn_status} = "failed";
}
}
sub _get_autowhitelist_points {
my ($self) = @_;
my $scores = $self->{conf}->{scores};
my $tflags = $self->{conf}->{tflags};
my $points = 0;
foreach my $test (@{$self->{test_names_hit}})
{
next if ($scores->{$test} == 0);
next if (exists $tflags->{$test} && $tflags->{$test} =~ /\bnoautolearn\b/);
$points += $scores->{$test};
}
return (sprintf "%0.3f", $points) + 0;
}
sub _get_autolearn_points {
my ($self) = @_;
my $orig_scoreset = $self->{conf}->get_score_set();
my $new_scoreset = $orig_scoreset;
my $scores = $self->{conf}->{scores};
if (($orig_scoreset & 2) == 0) { dbg ("auto-learn: currently using scoreset $orig_scoreset.");
}
else {
$new_scoreset = $orig_scoreset & ~2;
dbg ("auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset.");
$scores = $self->{conf}->{scoreset}->[$new_scoreset];
}
my $tflags = $self->{conf}->{tflags};
my $points = 0;
$self->{learned_points} = 0;
$self->{body_only_points} = 0;
$self->{head_only_points} = 0;
foreach my $test (@{$self->{test_names_hit}}) {
if (exists $tflags->{$test}) {
next if $tflags->{$test} =~ /\bnoautolearn\b/;
next if $tflags->{$test} =~ /\buserconf\b/;
if ($tflags->{$test} =~ /\blearn\b/) {
$self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test};
next;
}
}
next if ($scores->{$test} == 0);
if (!$self->{conf}->maybe_header_only ($test)) {
$self->{body_only_points} += $scores->{$test};
}
if (!$self->{conf}->maybe_body_only ($test)) {
$self->{head_only_points} += $scores->{$test};
}
$points += $scores->{$test};
}
$points = (sprintf "%0.3f", $points) + 0;
dbg ("auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points");
return $points;
}
=item $isspam = $status->is_spam ()
After a mail message has been checked, this method can be called. It will
return 1 for mail determined likely to be spam, 0 if it does not seem
spam-like.
=cut
sub is_spam {
my ($self) = @_;
return ($self->{score} >= $self->{conf}->{required_score});
}
=item $list = $status->get_names_of_tests_hit ()
After a mail message has been checked, this method can be called. It will
return a comma-separated string, listing all the symbolic test names
of the tests which were trigged by the mail.
=cut
sub get_names_of_tests_hit {
my ($self) = @_;
return join(',', sort(@{$self->{test_names_hit}}));
}
=item $list = $status->get_names_of_subtests_hit ()
After a mail message has been checked, this method can be called. It will
return a comma-separated string, listing all the symbolic test names of the
meta-rule sub-tests which were trigged by the mail. Sub-tests are the
normally-hidden rules, which score 0 and have names beginning with two
underscores, used in meta rules.
=cut
sub get_names_of_subtests_hit {
my ($self) = @_;
return join(',', sort(@{$self->{subtest_names_hit}}));
}
=item $num = $status->get_score ()
After a mail message has been checked, this method can be called. It will
return the message's score.
=cut
sub get_score {
my ($self) = @_;
return $self->{score};
}
sub get_hits {
my ($self) = @_;
return $self->{score};
}
=item $num = $status->get_required_score ()
After a mail message has been checked, this method can be called. It will
return the score required for a mail to be considered spam.
=cut
sub get_required_score {
my ($self) = @_;
return $self->{conf}->{required_score};
}
sub get_required_hits {
my ($self) = @_;
return $self->{conf}->{required_score};
}
=item $num = $status->get_autolearn_status ()
After a mail message has been checked, this method can be called. It will
return one of the following strings depending on whether the mail was
auto-learned or not: "ham", "no", "spam", "disabled", "failed", "unavailable".
=cut
sub get_autolearn_status {
my ($self) = @_;
return ($self->{auto_learn_status} || "unavailable");
}
=item $report = $status->get_report ()
Deliver a "spam report" on the checked mail message. This contains details of
how many spam detection rules it triggered.
The report is returned as a multi-line string, with the lines separated by
C<\n> characters.
=cut
sub get_report {
my ($self) = @_;
if (!exists $self->{'report'}) {
my $report;
$report = $self->{conf}->{report_template};
$report ||= '(no report template found)';
$report = $self->_replace_tags($report);
$report =~ s/\n*$/\n\n/s;
$self->{report} = $report;
}
return $self->{report};
}
=item $preview = $status->get_content_preview ()
Give a "preview" of the content.
This is returned as a multi-line string, with the lines separated by C<\n>
characters, containing a fully-decoded, safe, plain-text sample of the first
few lines of the message body.
=cut
sub get_content_preview {
my ($self) = @_;
$Text::Wrap::columns = 74;
$Text::Wrap::huge = 'overflow';
my $str = '';
my $ary = $self->get_decoded_stripped_body_text_array();
shift @{$ary};
my $numlines = 3;
while (length ($str) < 200 && @{$ary} && $numlines-- > 0) {
$str .= shift @{$ary};
}
undef $ary;
chomp ($str); $str .= " [...]\n";
$str =~ s/^(.{,200}).*$/$1/gs;
$str =~ s/-----Original Message-----.*$//gs;
$str =~ s/This is a multi-part message in MIME format\.//gs;
$str =~ s/[-_\*\.]{10,}//gs;
$str =~ s/\s+/ /gs;
my $wrapped;
eval {
$wrapped = Text::Wrap::wrap ("Content preview: ", " ", $str);
if (defined $wrapped) {
$wrapped =~ s/^Content preview:\s+//gs;
$str = $wrapped;
}
};
$str;
}
=item $msg = $status->get_message()
Return the object representing the message being scanned.
=cut
sub get_message {
my ($self) = @_;
return $self->{msg};
}
=item $status->rewrite_mail ()
Rewrite the mail message. This will at minimum add headers, and at
maximum MIME-encapsulate the message text, to reflect its spam or not-spam
status. The function will return a scalar of the rewritten message.
The actual modifications depend on the configuration (see
C<Mail::SpamAssassin::Conf> for more information).
The possible modifications are as follows:
=over 4
=item To:, From: and Subject: modification on spam mails
Depending on the configuration, the To: and From: lines can have a
user-defined RFC 2822 comment appended for spam mail. The subject line
may have a user-defined string prepended to it for spam mail.
=item X-Spam-* headers for all mails
Depending on the configuration, zero or more headers with names
beginning with C<X-Spam-> will be added to mail depending on whether
it is spam or ham.
=item spam message with report_safe
If report_safe is set to true (1), then spam messages are encapsulated
into their own message/rfc822 MIME attachment without any modifications
being made.
If report_safe is set to false (0), then the message will only have the
above headers added/modified.
=back
=cut
sub rewrite_mail {
my ($self) = @_;
my $mbox = $self->{msg}->get_mbox_separator() || '';
if ($self->{is_spam} && $self->{conf}->{report_safe}) {
return $mbox.$self->rewrite_report_safe();
}
else {
return $mbox.$self->rewrite_no_report_safe();
}
}
sub rewrite_report_safe {
my ($self) = @_;
my $original = $self->{msg}->get_pristine();
my $newmsg = '';
my $report_charset = "";
if ($self->{conf}->{report_charset}) {
$report_charset = "; charset=" . $self->{conf}->{report_charset};
}
my $report = $self->get_report();
my $from = $self->{msg}->get_pristine_header("From");
my $to = $self->{msg}->get_pristine_header("To");
my $cc = $self->{msg}->get_pristine_header("Cc");
my $subject = $self->{msg}->get_pristine_header("Subject");
my $msgid = $self->{msg}->get_pristine_header('Message-Id');
my $date = $self->{msg}->get_pristine_header("Date");
if ($self->{conf}->{rewrite_header}->{Subject}) {
$subject ||= "\n";
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject});
$tag =~ s/\n/ /gs; $subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; }
if ($self->{conf}->{rewrite_header}->{To}) {
$to ||= "\n";
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To});
$tag =~ s/\n/ /gs; $to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/
}
if ($self->{conf}->{rewrite_header}->{From}) {
$from ||= "\n";
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From});
$tag =~ s/\n+//gs; # strip tag's newlines
$from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/
}
$newmsg .= "From: $from" if $from;
$newmsg .= "To: $to" if $to;
$newmsg .= "Cc: $cc" if $cc;
$newmsg .= "Subject: $subject" if $subject;
$newmsg .= "Date: $date" if $date;
$newmsg .= "Message-Id: $msgid" if $msgid;
foreach my $header (keys %{$self->{conf}->{headers_spam}}) {
my $data = $self->{conf}->{headers_spam}->{$header};
my $line = $self->_process_header($header,$data) || "";
$line = $self->qp_encode_header($line);
$newmsg .= "X-Spam-$header: $line\n" }
if (defined $self->{conf}->{report_safe_copy_headers}) {
my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/;
foreach my $hdr (@{$self->{conf}->{report_safe_copy_headers}}) {
next if exists $already_added{lc $hdr};
my @hdrtext = $self->{msg}->get_pristine_header($hdr);
$already_added{lc $hdr}++;
if (lc $hdr eq "received") { my $rhdr = "";
foreach (@hdrtext) {
$rhdr .= "$hdr: $_";
}
$newmsg = "$rhdr$newmsg";
}
else {
foreach (@hdrtext) {
$newmsg .= "$hdr: $_";
}
}
}
}
$newmsg = "Received: from localhost by " .
Mail::SpamAssassin::Util::fq_hostname() . "\n" .
"\twith SpamAssassin (version " .
Mail::SpamAssassin::Version() . ");\n" .
"\t" . Mail::SpamAssassin::Util::time_to_rfc822_date() . "\n" .
$newmsg;
my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2 ** 32)));
while ($original =~ /^\Q${boundary}\E$/m) {
$boundary .= "/".sprintf("%08X",int(rand(2 ** 32)));
}
my $disposition;
my $ct = $self->{msg}->get_header("Content-Type");
if (defined $ct && $ct ne '' && $ct !~ m{text/plain}i) {
$disposition = "attachment";
$report .= $self->_replace_tags($self->{conf}->{unsafe_report_template});
}
else {
$disposition = "inline";
}
my $type = "message/rfc822";
$type = "text/plain" if $self->{conf}->{report_safe} > 1;
my $description = $self->{main}->{'encapsulated_content_description'};
$newmsg .= <<"EOM";
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="$boundary"
This is a multi-part message in MIME format.
--$boundary
Content-Type: text/plain$report_charset
Content-Disposition: inline
Content-Transfer-Encoding: 8bit
$report
--$boundary
Content-Type: $type; x-spam-type=original
Content-Description: $description
Content-Disposition: $disposition
Content-Transfer-Encoding: 8bit
$original
--$boundary--
EOM
return $newmsg;
}
sub rewrite_no_report_safe {
my ($self) = @_;
my(@pristine_headers) = grep(!/^X-Spam-(?!Prev-)/i, $self->{msg}->get_pristine_header() =~ /^([^:]+:[ \t]*(?:.*\n(?:\s+\S.*\n)*))/mig);
my $addition = 'headers_ham';
if($self->{is_spam}) {
foreach (@pristine_headers) {
next if (!/^(From|Subject|To):/i);
my $hdr = ucfirst(lc($1));
next if (!exists $self->{conf}->{rewrite_header}->{$hdr});
push(@pristine_headers, "X-Spam-Prev-$_");
my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{$hdr});
$tag =~ s/\n/ /gs;
$tag = "($tag)" if ($hdr =~ /^(?:From|To)$/);
s/^([^:]+:[ \t]*)(?:\Q${tag}\E )?/$1${tag} /i;
}
$addition = 'headers_spam';
}
while (my ($header, $data) = each %{$self->{conf}->{$addition}}) {
my $line = $self->_process_header($header,$data) || "";
$line = $self->qp_encode_header($line);
push(@pristine_headers, "X-Spam-$header: $line\n");
}
return join('', @pristine_headers, "\n", $self->{msg}->get_pristine_body());
}
sub qp_encode_header {
my ($self, $text) = @_;
return $text unless ($text =~ /[\x80-\xff]/);
my $cs = 'ISO-8859-1';
if ($self->{report_charset}) {
$cs = $self->{report_charset};
}
my @hexchars = split('', '0123456789abcdef');
my $ord;
$text =~ s{([\x80-\xff])}{
$ord = ord $1;
'='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f]
}ges;
$text = '=?'.$cs.'?Q?'.$text.'?=';
dbg ("encoding header in $cs: $text");
return $text;
}
sub _process_header {
my ($self, $hdr_name, $hdr_data) = @_;
$hdr_data = $self->_replace_tags($hdr_data);
$hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ...
if ($self->{conf}->{fold_headers}) {
if ($hdr_data =~ /\n/) {
$hdr_data =~ s/\s*\n\s*/\n\t/g;
return $hdr_data;
}
else {
my $hdr = "X-Spam-$hdr_name!!$hdr_data";
$Text::Wrap::columns = 79;
$Text::Wrap::huge = 'wrap';
$Text::Wrap::break = '(?<=[\s,])';
$hdr = Text::Wrap::wrap('',"\t",$hdr);
$hdr =~ s/^\t\n//gm;
return (split (/!!/, $hdr, 2))[1]; }
}
else {
$hdr_data =~ s/\n/ /g; return $hdr_data;
}
}
sub _replace_tags {
my $self = shift;
my $text = shift;
$text =~ s/_(\w+?)(?:\((.*?)\))?_/${\($self->_get_tag($1,$2))}/g;
return $text;
}
sub bayes_report_make_list {
my $self = shift;
my $info = shift;
my $param = shift || "5";
my ($limit,$fmt_arg,$more) = split /,/, $param;
return "Tokens not available." unless defined $info;
my %formats =
( short => '$t',
Short => 'Token: \"$t\"',
compact => '$p-$D--$t',
Compact => 'Probability $p -declassification distance $D (\"+\" means > 9) --token: \"$t\"',
medium => '$p-$D-$N--$t',
long => '$p-$d--${h}h-${s}s--${a}d--$t',
Long => 'Probability $p -declassification distance $D --in ${h} ham messages -and ${s} spam messages --$a} days old--token:\"$t\"'
);
my $allow_user_defined = 0;
my $raw_fmt = !$fmt_arg ? '$p-$D--$t'
: $allow_user_defined && $fmt_arg =~ m/^\"([^"]+)\"/ ? $1
: $formats{$fmt_arg};
return "Invalid format, must be one of: ".join(",",keys %formats)
unless defined $raw_fmt;
my $fmt = '"'.$raw_fmt.'"';
my $amt = $limit < @$info ? $limit : @$info;
return "" unless $amt;
my $Bayes = $self->{main}{bayes_scanner};
my $ns = $self->{bayes_nspam};
my $nh = $self->{bayes_nham};
my $digit = sub { $_[0] > 9 ? "+" : $_[0] };
my $now = time;
join ', ', map {
my($t,$prob,$s,$h,$u) = @$_;
my $a = int(($now - $u)/(3600 * 24));
my $d = $Bayes->compute_declassification_distance($ns,$nh,$s,$h,$prob);
my $p = sprintf "%.3f", $prob;
my $n = $s + $h;
my ($c,$o) = $prob < 0.5 ? ($h,$s) : ($s,$h);
my ($D,$S,$H,$C,$O,$N) = map &$digit($_), ($d,$s,$h,$c,$o,$n);
eval $fmt;
} @{$info}[0..$amt-1];
}
sub set_tag {
my $self = shift;
my $tag = uc shift;
my $val = shift;
$self->{tag_data}->{$tag} = $val;
}
sub _get_tag_value_for_yesno {
my $self = shift;
return $self->{is_spam} ? "Yes" : "No";
}
sub _get_tag_value_for_score {
my ($self, $pad) = @_;
my $score = sprintf("%2.1f", $self->{score});
my $rscore = $self->_get_tag_value_for_required_score();
# padding
if (defined $pad && $pad =~ /^(0+| +)$/) {
my $count = length($1) + 3 - length($score);
$score = (substr($pad, 0, $count) . $score) if $count > 0;
}
# Do some rounding tricks to avoid the 5.0!=5.0-phenomenon,
# see <http://bugzilla.spamassassin.org/show_bug.cgi?id=2607>
return $score if $self->{is_spam} or $score < $rscore;
return $rscore - 0.1;
}
sub _get_tag_value_for_required_score {
my $self = shift;
return sprintf("%2.1f", $self->{conf}->{required_score});
}
sub _get_tag {
my $self = shift;
my $tag = shift;
my %tags;
# tag data also comes from $self->{tag_data}->{TAG}
$tag = "" unless defined $tag; # can be "0", so use defined test
%tags = ( YESNO => sub { $self->_get_tag_value_for_yesno() },
YESNOCAPS => sub { uc $self->_get_tag_value_for_yesno() },
SCORE => sub { $self->_get_tag_value_for_score(shift) },
HITS => sub { $self->_get_tag_value_for_score(shift) },
REQD => sub { $self->_get_tag_value_for_required_score() },
VERSION => \&Mail::SpamAssassin::Version,
SUBVERSION => sub { $Mail::SpamAssassin::SUB_VERSION },
HOSTNAME => sub {
$self->{conf}->{report_hostname} ||
Mail::SpamAssassin::Util::fq_hostname();
},
REMOTEHOSTNAME => sub {
$self->{tag_data}->{'REMOTEHOSTNAME'} ||
"localhost";
},
REMOTEHOSTADDR => sub {
$self->{tag_data}->{'REMOTEHOSTADDR'} ||
"127.0.0.1";
},
CONTACTADDRESS => sub { $self->{conf}->{report_contact}; },
BAYES => sub {
defined($self->{bayes_score}) ?
sprintf("%3.4f", $self->{bayes_score}) : "0.5"
},
HAMMYTOKENS => sub {
$self->bayes_report_make_list
( $self->{bayes_token_info_hammy}, shift );
},
SPAMMYTOKENS => sub {
$self->bayes_report_make_list
( $self->{bayes_token_info_spammy}, shift );
},
TOKENSUMMARY => sub {
if( defined $self->{tag_data}{BAYESTC} )
{
my $tcount_neutral = $self->{tag_data}{BAYESTCLEARNED}
- $self->{tag_data}{BAYESTCSPAMMY}
- $self->{tag_data}{BAYESTCHAMMY};
my $tcount_new = $self->{tag_data}{BAYESTC}
- $self->{tag_data}{BAYESTCLEARNED};
"Tokens: new, $tcount_new; "
."hammy, $self->{tag_data}{BAYESTCHAMMY}; "
."neutral, $tcount_neutral; "
."spammy, $self->{tag_data}{BAYESTCSPAMMY}."
} else {
"Bayes not run.";
}
},
DATE => \&Mail::SpamAssassin::Util::time_to_rfc822_date,
STARS => sub {
my $arg = (shift || "*");
my $length = int($self->{score});
$length = 50 if $length > 50;
return $arg x $length;
},
AUTOLEARN => sub { return $self->get_autolearn_status(); },
TESTS => sub {
my $arg = (shift || ',');
return (join($arg, sort(@{$self->{test_names_hit}})) || "none");
},
TESTSSCORES => sub {
my $arg = (shift || ",");
my $line = '';
foreach my $test (sort @{$self->{test_names_hit}}) {
if (!$line) {
$line .= $test . "=" . $self->{conf}->{scores}->{$test};
} else {
$line .= $arg . $test . "=" . $self->{conf}->{scores}->{$test};
}
}
return $line ? $line : 'none';
},
PREVIEW => sub { $self->get_content_preview() },
REPORT => sub {
return "\n" . ($self->{tag_data}->{REPORT} || "");
},
);
if (exists $tags{$tag}) {
return $tags{$tag}->(@_);
} elsif ($self->{tag_data}->{$tag}) {
return $self->{tag_data}->{$tag};
} else {
return "";
}
}
###########################################################################
=item $status->finish ()
Indicate that this C<$status> object is finished with, and can be destroyed.
If you are using SpamAssassin in a persistent environment, or checking many
mail messages from one C<Mail::SpamAssassin> factory, this method should be
called to ensure Perl's garbage collection will clean up old status objects.
=cut
sub finish {
my ($self) = @_;
$self->{main}->call_plugins ("per_msg_finish", {
permsgstatus => $self
});
foreach(keys %{$self}) {
delete $self->{$_};
}
}
=item $name = $status->get_current_eval_rule_name()
Return the name of the currently-running eval rule. C<undef> is
returned if no eval rule is currently being run. Useful for plugins
to determine the current rule name while inside an eval test function
call.
=cut
sub get_current_eval_rule_name {
my ($self) = @_;
return $self->{current_rule_name};
}
###########################################################################
sub extract_message_metadata {
my ($self) = @_;
$self->{msg}->extract_message_metadata($self->{main});
foreach my $item (qw(
relays_trusted relays_trusted_str num_relays_trusted
relays_untrusted relays_untrusted_str num_relays_untrusted
))
{
$self->{$item} = $self->{msg}->{metadata}->{$item};
}
$self->{tag_data}->{RELAYSTRUSTED} = $self->{relays_trusted_str};
$self->{tag_data}->{RELAYSUNTRUSTED} = $self->{relays_untrusted_str};
$self->{tag_data}->{LANGUAGES} = $self->{msg}->get_metadata("X-Languages");
# allow plugins to add more metadata, read the stuff that's there, etc.
$self->{main}->call_plugins ("parsed_metadata", { permsgstatus => $self });
}
###########################################################################
sub get_decoded_body_text_array {
return $_[0]->{msg}->get_decoded_body_text_array();
}
sub get_decoded_stripped_body_text_array {
return $_[0]->{msg}->get_rendered_body_text_array();
}
###########################################################################
=item $status->get (header_name [, default_value])
Returns a message header, pseudo-header, real name or address.
C<header_name> is the name of a mail header, such as 'Subject', 'To',
etc. If C<default_value> is given, it will be used if the requested
C<header_name> does not exist.
Appending C<:raw> to the header name will inhibit decoding of quoted-printable
or base-64 encoded strings.
Appending C<:addr> to the header name will cause everything except
the first email address to be removed from the header. For example,
all of the following will result in "example@foo":
=over 4
=item example@foo
=item example@foo (Foo Blah)
=item example@foo, example@bar
=item display: example@foo (Foo Blah), example@bar ;
=item Foo Blah <example@foo>
=item "Foo Blah" <example@foo>
=item "'Foo Blah'" <example@foo>
=back
Appending C<:name> to the header name will cause everything except
the first real name to be removed from the header. For example,
all of the following will result in "Foo Blah"
=over 4
=item example@foo (Foo Blah)
=item example@foo (Foo Blah), example@bar
=item display: example@foo (Foo Blah), example@bar ;
=item Foo Blah <example@foo>
=item "Foo Blah" <example@foo>
=item "'Foo Blah'" <example@foo>
=back
There are several special pseudo-headers that can be specified:
=over 4
=item C<ALL> can be used to mean the text of all the message's headers.
=item C<ToCc> can be used to mean the contents of both the 'To' and 'Cc'
headers.
=item C<EnvelopeFrom> is the address used in the 'MAIL FROM:' phase of the SMTP
transaction that delivered this message, if this data has been made available
by the SMTP server.
=item C<MESSAGEID> is a symbol meaning all Message-Id's found in the message;
some mailing list software moves the real 'Message-Id' to 'Resent-Message-Id'
or 'X-Message-Id', then uses its own one in the 'Message-Id' header. The value
returned for this symbol is the text from all 3 headers, separated by newlines.
=item C<X-Spam-Relays-Untrusted> is the generated metadata of untrusted relays
the message has passed through
=item C<X-Spam-Relays-Trusted> is the generated metadata of trusted relays
the message has passed through
=back
=cut
sub get {
my ($self, $request, $defval) = @_;
local ($_);
if (exists $self->{hdr_cache}->{$request}) {
$_ = $self->{hdr_cache}->{$request};
}
else {
my $hdrname = $request;
my $getaddr = ($hdrname =~ s/:addr$//);
my $getname = ($hdrname =~ s/:name$//);
my $getraw = ($hdrname eq 'ALL' || $hdrname =~ s/:raw$//);
if ($hdrname eq 'ALL') {
$_ = $self->{msg}->get_all_headers($getraw);
}
# EnvelopeFrom: the SMTP MAIL FROM: addr
elsif ($hdrname eq 'EnvelopeFrom') {
$getraw = 1; # this will *not* be encoded unless it's a trick
$getname = 0; # avoid other tricks
$getaddr = 0;
$_ = $self->get_envelope_from();
}
# ToCc: the combined recipients list
elsif ($hdrname eq 'ToCc') {
$_ = join ("\n", $self->{msg}->get_header ('To', $getraw));
if ($_ ne '') {
chop $_;
$_ .= ", " if /\S/;
}
$_ .= join ("\n", $self->{msg}->get_header ('Cc', $getraw));
undef $_ if $_ eq '';
}
# MESSAGEID: handle lists which move the real message-id to another
# header for resending.
elsif ($hdrname eq 'MESSAGEID') {
$_ = join ("\n", grep { defined($_) && length($_) > 0 }
$self->{msg}->get_header ('X-Message-Id', $getraw),
$self->{msg}->get_header ('Resent-Message-Id', $getraw),
$self->{msg}->get_header ('X-Original-Message-ID', $getraw), # bug 2122
$self->{msg}->get_header ('Message-Id', $getraw));
}
# untrusted relays list, as string
elsif ($hdrname eq 'X-Spam-Relays-Untrusted') {
$_ = $self->{relays_untrusted_str};
}
# trusted relays list, as string
elsif ($hdrname eq 'X-Spam-Relays-Trusted') {
$_ = $self->{relays_trusted_str};
}
# a conventional header
else {
my @hdrs = $self->{msg}->get_header ($hdrname, $getraw);
if ($#hdrs >= 0) {
$_ = join ('', @hdrs);
}
else {
$_ = undef;
}
}
if (defined) {
if ($getaddr || $getname) {
s/^[^:]+:(.*);\s*$/$1/gs; # 'undisclosed-recipients: ;'
s/\s+/ /g; # reduce whitespace to single space
s/^\s+//; # leading wsp
s/\s+$//; # trailing wsp
if ($getaddr) {
# Get the email address out of the header
# All of these should result in "jm@foo":
#
# jm@foo
# jm@foo (Foo Blah)
# jm@foo, jm@bar
# display: jm@foo (Foo Blah), jm@bar ;
# Foo Blah <jm@foo>
# "Foo Blah" <jm@foo>
# "'Foo Blah'" <jm@foo>
#
s/\s*\(.*?\)//g; # strip out the (comments)
s/^[^<]*?<(.*?)>.*$/$1/; # "Foo Blah" <jm@foo> or <jm@foo>
s/,.*$//; # multiple addrs on one line? remove all but first
}
elsif ($getname) {
# Get the real name out of the header
# All of these should result in "Foo Blah":
#
# jm@foo (Foo Blah)
# jm@foo (Foo Blah), jm@bar
# display: jm@foo (Foo Blah), jm@bar ;
# Foo Blah <jm@foo>
# "Foo Blah" <jm@foo>
# "'Foo Blah'" <jm@foo>
#
s/^[\'\"]*(.*?)[\'\"]*\s*<.+>\s*$/$1/g
or s/^.+\s\((.*?)\)\s*$/$1/g; # jm@foo (Foo Blah)
}
}
}
$self->{hdr_cache}->{$request} = $_;
}
# If the requested header wasn't found, we should return either
# a default value as specified by the caller, or the blank string ''.
if (!defined) {
$defval ||= '';
$_ = $defval;
}
return $_;
}
###########################################################################
sub ran_rule_debug_code {
my ($self, $rulename, $ruletype, $bit) = @_;
return '' if (!$Mail::SpamAssassin::DEBUG->{enabled}
&& !$self->{save_pattern_hits});
my $log_hits_code = '';
my $save_hits_code = '';
if ($Mail::SpamAssassin::DEBUG->{enabled} &&
($Mail::SpamAssassin::DEBUG->{rulesrun} & $bit) != 0)
{
# note: keep this in 'single quotes' to avoid the $ & performance hit,
# unless specifically requested by the caller.
$log_hits_code = ': match=\'$&\'';
}
if ($self->{save_pattern_hits}) {
$save_hits_code = '
$self->{pattern_hits}->{q{'.$rulename.'}} = $&;
';
}
return '
dbg ("Ran '.$ruletype.' rule '.$rulename.' ======> got hit'.
$log_hits_code.'", "rulesrun", '.$bit.');
'.$save_hits_code.'
';
# do we really need to see when we *don't* get a hit? If so, it should be a
# separate level as it's *very* noisy.
#} else {
# dbg ("Ran '.$ruletype.' rule '.$rulename.' but did not get hit", "rulesrun", '.
# $bit.');
}
sub hash_line_for_rule {
my ($self, $rulename) = @_;
return "\n".'#line 1 "'.
$self->{conf}->{source_file}->{$rulename}.
', rule '.$rulename.',"';
}
###########################################################################
sub do_head_tests {
my ($self, $priority) = @_;
local ($_);
# note: we do this only once for all head pattern tests. Only
# eval tests need to use stuff in here.
$self->{test_log_msgs} = (); # clear test state
dbg ("running header regexp tests; score so far=".$self->{score});
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS};
# clean up priority value so it can be used in a subroutine name
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
# speedup code provided by Matt Sergeant
if (defined &{'Mail::SpamAssassin::PerMsgStatus::_head_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_head_tests_'.$clean_priority}($self);
use strict "refs";
return;
}
my $evalstr = '';
my $evalstr2 = '';
while (my($rulename, $rule) = each %{$self->{conf}{head_tests}->{$priority}}) {
my $def = '';
my ($hdrname, $testtype, $pat) =
$rule =~ /^\s*(\S+)\s*(\=|\!)\~\s*(\S.*?\S)\s*$/;
if (!defined $pat) {
warn "invalid rule: $rulename\n";
$self->{rule_errors}++;
next;
}
if ($pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//) { $def = $1; }
$hdrname =~ s/#/[HASH]/g; # avoid probs with eval below
$def =~ s/#/[HASH]/g;
$evalstr .= '
if ($self->{conf}->{scores}->{q#'.$rulename.'#}) {
'.$rulename.'_head_test($self, $_); # no need for OO calling here (its faster this way)
}
';
if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_head_test'));
}
$evalstr2 .= '
sub '.$rulename.'_head_test {
my $self = shift;
$_ = shift;
'.$self->hash_line_for_rule($rulename).'
if ($self->get(q#'.$hdrname.'#, q#'.$def.'#) '.$testtype.'~ '.$pat.') {
$self->got_hit (q#'.$rulename.'#, q{});
'. $self->ran_rule_debug_code ($rulename,"header regex", 1) . '
}
}';
}
# clear out a previous version of this fn, if already defined
if (defined &{'_head_tests_'.$clean_priority}) {
undef &{'_head_tests_'.$clean_priority};
}
return unless ($evalstr);
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
$evalstr2
sub _head_tests_$clean_priority {
my (\$self) = \@_;
$evalstr;
}
1;
}
EOT
eval $evalstr;
if ($@) {
warn "Failed to run header SpamAssassin tests, skipping some: $@\n";
$self->{rule_errors}++;
}
else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_head_tests_'.$clean_priority}($self);
use strict "refs";
}
}
sub do_body_tests {
my ($self, $priority, $textary) = @_;
local ($_);
dbg ("running body-text per-line regexp tests; score so far=".$self->{score});
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_BODY_TESTS};
# clean up priority value so it can be used in a subroutine name
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
$self->{test_log_msgs} = (); # clear test state
if (defined &{'Mail::SpamAssassin::PerMsgStatus::_body_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_body_tests_'.$clean_priority}($self, @$textary);
use strict "refs";
return;
}
# build up the eval string...
my $evalstr = '';
my $evalstr2 = '';
while (my($rulename, $pat) = each %{$self->{conf}{body_tests}->{$priority}}) {
$evalstr .= '
if ($self->{conf}->{scores}->{q{'.$rulename.'}}) {
# call procedurally as it is faster.
'.$rulename.'_body_test($self,@_);
}
';
if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_body_test'));
}
$evalstr2 .= '
sub '.$rulename.'_body_test {
my $self = shift;
foreach (@_) {
'.$self->hash_line_for_rule($rulename).'
if ('.$pat.') {
$self->got_pattern_hit (q{'.$rulename.'}, "BODY: ");
'. $self->ran_rule_debug_code ($rulename,"body-text regex", 2) . '
# Ok, we hit, stop now.
last;
}
}
}
';
}
# clear out a previous version of this fn, if already defined
if (defined &{'_body_tests_'.$clean_priority}) {
undef &{'_body_tests_'.$clean_priority};
}
return unless ($evalstr);
# generate the loop that goes through each line...
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
$evalstr2
sub _body_tests_$clean_priority {
my \$self = shift;
$evalstr;
}
1;
}
EOT
# and run it.
eval $evalstr;
if ($@) {
warn("Failed to compile body SpamAssassin tests, skipping:\n".
"\t($@)\n");
$self->{rule_errors}++;
}
else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_body_tests_'.$clean_priority}($self, @$textary);
use strict "refs";
}
}
sub is_user_rule_sub {
my ($self, $subname) = @_;
return 0 if (eval 'defined &Mail::SpamAssassin::PerMsgStatus::'.$subname);
1;
}
# Taken from URI and URI::Find
my $reserved = q(;/?:@&=+$,[]\#|);
my $mark = q(-_.!~*'()); #'; emacs
my $unreserved = "A-Za-z0-9\Q$mark\E\x00-\x08\x0b\x0c\x0e-\x1f";
my $uricSet = quotemeta($reserved) . $unreserved . "%";
my $schemeRE = qr/(?:https?|ftp|mailto|javascript|file)/;
my $uricCheat = $uricSet;
$uricCheat =~ tr/://d;
my $schemelessRE = qr/(?<![.=])(?:www\.|ftp\.)/;
my $uriRe = qr/\b(?:$schemeRE:[$uricCheat]|$schemelessRE)[$uricSet#]*/o;
# Taken from Email::Find (thanks Tatso!)
# This is the BNF from RFC 822
my $esc = '\\\\';
my $period = '\.';
my $space = '\040';
my $open_br = '\[';
my $close_br = '\]';
my $nonASCII = '\x80-\xff';
my $ctrl = '\000-\037';
my $cr_list = '\n\015';
my $qtext = qq/[^$esc$nonASCII$cr_list\"]/; #"
my $dtext = qq/[^$esc$nonASCII$cr_list$open_br$close_br]/;
my $quoted_pair = qq<$esc>.qq<[^$nonASCII]>;
my $atom_char = qq/[^($space)<>\@,;:\".$esc$open_br$close_br$ctrl$nonASCII]/;
my $atom = qq{(?>$atom_char+)};
my $quoted_str = qq<\"$qtext*(?:$quoted_pair$qtext*)*\">; my $word = qq<(?:$atom|$quoted_str)>;
my $local_part = qq<$word(?:$period$word)*>;
my $label = q/[A-Za-z\d](?:[A-Za-z\d-]*[A-Za-z\d])?/;
my $domain_ref = qq<$label(?:$period$label)*>;
my $domain_lit = qq<$open_br(?:$dtext|$quoted_pair)*$close_br>;
my $domain = qq<(?:$domain_ref|$domain_lit)>;
my $Addr_spec_re = qr<$local_part\s*\@\s*$domain>o;
=item $status->get_uri_list ()
Returns an array of all unique URIs found in the message. It takes
a combination of the URIs found in the rendered (decoded and HTML
stripped) body and the URIs found when parsing the HTML in the message.
Will also set $status->{uri_domain_count} (count of unique domains)
and $status->{uri_list} (the array as returned by this function).
The returned array will include the "raw" URI as well as
"slightly cooked" versions. For example, the single URI
'http://%77w%77.example.com/' will get turned into:
( 'http://%77w%77.example.com/', 'http://www.example.com/' )
=cut
sub get_uri_list {
my ($self) = @_;
if (defined $self->{uri_list}) {
return @{$self->{uri_list}};
}
my $textary = $self->get_decoded_stripped_body_text_array();
my ($rulename, $pat, @uris);
local ($_);
my $text;
for (@$textary) {
while (/($uriRe)/go) {
my $uri = $1;
$uri =~ s/^<(.*)>$/$1/;
$uri =~ s/[\]\)>
if ($uri !~ /^${schemeRE}:/io) {
if ($uri =~ /^www\d*\./i) {
push (@uris, $uri);
$uri = "http://$uri";
}
elsif ($uri =~ /^ftp\./i) {
push (@uris, $uri);
$uri = "ftp://$uri";
}
}
push @uris, $uri;
}
while (/($Addr_spec_re)/go) {
my $uri = $1;
$uri = "mailto:$uri";
push @uris, $uri;
}
}
if (defined $self->{msg}->{metadata}->{html}->{uri}) {
push @uris, @{ $self->{msg}->{metadata}->{html}->{uri} };
}
@uris = Mail::SpamAssassin::Util::uri_list_canonify(@uris);
my %domains;
for (@uris) {
my $domain = Mail::SpamAssassin::Util::uri_to_domain($_);
$domains{$domain} = 1 if $domain;
}
$self->{uri_domain_count} = keys %domains;
$self->{uri_list} = \@uris;
if ($Mail::SpamAssassin::DEBUG->{enabled}) {
foreach my $nuri (@uris) {
dbg("uri found: $nuri");
}
}
return @uris;
}
sub do_body_uri_tests {
my ($self, $priority, @uris) = @_;
local ($_);
dbg ("running uri tests; score so far=".$self->{score});
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_URI_TESTS};
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
$self->{test_log_msgs} = (); if (defined &{'Mail::SpamAssassin::PerMsgStatus::_body_uri_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_body_uri_tests_'.$clean_priority}($self, @uris);
use strict "refs";
return;
}
my $evalstr = '';
my $evalstr2 = '';
while (my($rulename, $pat) = each %{$self->{conf}{uri_tests}->{$priority}}) {
$evalstr .= '
if ($self->{conf}->{scores}->{q{'.$rulename.'}}) {
'.$rulename.'_uri_test($self, @_); # call procedurally for speed
}
';
if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_uri_test'));
}
$evalstr2 .= '
sub '.$rulename.'_uri_test {
my $self = shift;
foreach (@_) {
'.$self->hash_line_for_rule($rulename).'
if ('.$pat.') {
$self->got_pattern_hit (q{'.$rulename.'}, "URI: ");
'. $self->ran_rule_debug_code ($rulename,"uri test", 4) . '
# Ok, we hit, stop now.
last;
}
}
}
';
}
if (defined &{'_body_uri_tests_'.$clean_priority}) {
undef &{'_body_uri_tests_'.$clean_priority};
}
return unless ($evalstr);
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
$evalstr2
sub _body_uri_tests_$clean_priority {
my \$self = shift;
$evalstr;
}
1;
}
EOT
eval $evalstr;
if ($@) {
warn("Failed to compile URI SpamAssassin tests, skipping:\n".
"\t($@)\n");
$self->{rule_errors}++;
}
else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_body_uri_tests_'.$clean_priority}($self, @uris);
use strict "refs";
}
}
sub do_rawbody_tests {
my ($self, $priority, $textary) = @_;
local ($_);
dbg ("running raw-body-text per-line regexp tests; score so far=".$self->{score});
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS};
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
$self->{test_log_msgs} = (); if (defined &{'Mail::SpamAssassin::PerMsgStatus::_rawbody_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_rawbody_tests_'.$clean_priority}($self, @$textary);
use strict "refs";
return;
}
my $evalstr = '';
my $evalstr2 = '';
while (my($rulename, $pat) = each %{$self->{conf}{rawbody_tests}->{$priority}}) {
$evalstr .= '
if ($self->{conf}->{scores}->{q{'.$rulename.'}}) {
'.$rulename.'_rawbody_test($self, @_); # call procedurally for speed
}
';
if ($doing_user_rules) {
next if (!$self->is_user_rule_sub ($rulename.'_rawbody_test'));
}
$evalstr2 .= '
sub '.$rulename.'_rawbody_test {
my $self = shift;
foreach (@_) {
'.$self->hash_line_for_rule($rulename).'
if ('.$pat.') {
$self->got_pattern_hit (q{'.$rulename.'}, "RAW: ");
'. $self->ran_rule_debug_code ($rulename,"body_pattern_hit", 8) . '
# Ok, we hit, stop now.
last;
}
}
}
';
}
if (defined &{'_rawbody_tests_'.$clean_priority}) {
undef &{'_rawbody_tests_'.$clean_priority};
}
return unless ($evalstr);
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
$evalstr2
sub _rawbody_tests_$clean_priority {
my \$self = shift;
$evalstr;
}
1;
}
EOT
eval $evalstr;
if ($@) {
warn("Failed to compile body SpamAssassin tests, skipping:\n".
"\t($@)\n");
$self->{rule_errors}++;
}
else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_rawbody_tests_'.$clean_priority}($self, @$textary);
use strict "refs";
}
}
sub do_full_tests {
my ($self, $priority, $fullmsgref) = @_;
local ($_);
dbg ("running full-text regexp tests; score so far=".$self->{score});
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_FULL_TESTS};
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
$self->{test_log_msgs} = ();
if (defined &{'Mail::SpamAssassin::PerMsgStatus::_full_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_full_tests_'.$clean_priority}($self, $fullmsgref);
use strict "refs";
return;
}
my $evalstr = '';
while (my($rulename, $pat) = each %{$self->{conf}{full_tests}->{$priority}}) {
$evalstr .= '
if ($self->{conf}->{scores}->{q{'.$rulename.'}}) {
'.$self->hash_line_for_rule($rulename).'
if ($$fullmsgref =~ '.$pat.') {
$self->got_pattern_hit (q{'.$rulename.'}, "FULL: ");
'. $self->ran_rule_debug_code ($rulename,"full-text regex", 16) . '
}
}
';
}
if (defined &{'_full_tests_'.$clean_priority}) {
undef &{'_full_tests_'.$clean_priority};
}
return unless ($evalstr);
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
sub _full_tests_$clean_priority {
my (\$self, \$fullmsgref) = \@_;
study \$\$fullmsgref;
$evalstr
}
1;
}
EOT
eval $evalstr;
if ($@) {
warn "Failed to compile full SpamAssassin tests, skipping:\n".
"\t($@)\n";
$self->{rule_errors}++;
} else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_full_tests_'.$clean_priority}($self, $fullmsgref);
use strict "refs";
}
}
sub do_head_eval_tests {
my ($self, $priority) = @_;
return unless (defined($self->{conf}->{head_evals}->{$priority}));
$self->run_eval_tests ($self->{conf}->{head_evals}->{$priority}, '');
}
sub do_body_eval_tests {
my ($self, $priority, $bodystring) = @_;
return unless (defined($self->{conf}->{body_evals}->{$priority}));
$self->run_eval_tests ($self->{conf}->{body_evals}->{$priority}, 'BODY: ', $bodystring);
}
sub do_rawbody_eval_tests {
my ($self, $priority, $bodystring) = @_;
return unless (defined($self->{conf}->{rawbody_evals}->{$priority}));
$self->run_eval_tests ($self->{conf}->{rawbody_evals}->{$priority}, 'RAW: ', $bodystring);
}
sub do_full_eval_tests {
my ($self, $priority, $fullmsgref) = @_;
return unless (defined($self->{conf}->{full_evals}->{$priority}));
$self->run_eval_tests ($self->{conf}->{full_evals}->{$priority}, '', $fullmsgref);
}
sub do_meta_tests {
my ($self, $priority) = @_;
local ($_);
dbg( "running meta tests; score so far=" . $self->{score} );
my $doing_user_rules =
$self->{conf}->{user_rules_to_compile}->{$Mail::SpamAssassin::Conf::TYPE_META_TESTS};
my $clean_priority;
($clean_priority = $priority) =~ s/-/neg/;
if (defined &{'Mail::SpamAssassin::PerMsgStatus::_meta_tests_'.$clean_priority}
&& !$doing_user_rules) {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_meta_tests_'.$clean_priority}($self);
use strict "refs";
return;
}
my (%rule_deps, %setup_rules, %meta, $rulename);
my $evalstr = '';
my @metas = keys %{ $self->{conf}{meta_tests}->{$priority} };
foreach $rulename (@metas) {
my $rule = $self->{conf}->{meta_tests}->{$priority}->{$rulename};
my $token;
my $lexer = ARITH_EXPRESSION_LEXER;
my @tokens = ($rule =~ m/$lexer/g);
$meta{$rulename} = "";
@{ $rule_deps{$rulename} } = ();
foreach $token (@tokens) {
if ($token =~ /^(?:\W+|\d+)$/) {
$meta{$rulename} .= "$token ";
}
else {
$meta{$rulename} .= "\$self->{'tests_already_hit'}->{'$token'} ";
$setup_rules{$token}=1;
push (@{ $rule_deps{$rulename} }, $token)
if (exists $self->{conf}{meta_tests}->{$priority}->{$token});
}
}
}
$evalstr .= join("\n", (map { "\$self->{'tests_already_hit'}->{'$_'} ||= 0;" } keys %setup_rules), "");
@metas = sort { @{ $rule_deps{$a} } <=> @{ $rule_deps{$b} } } @metas;
my $count;
do {
$count = $ my %metas = map { $_ => 1 } @metas;
for (my $i = 0 ; $i <= $
next if (grep( $metas{$_}, @{ $rule_deps{ $metas[$i] } }));
$evalstr .= ' if ('.$meta{$metas[$i]}.') { $self->got_hit (q#'.$metas[$i].'#, ""); }'."\n";
splice @metas, $i--, 1; }
} while ($
my %metas = map { $_ => 1 } @metas; foreach $rulename (@metas) {
$self->{rule_errors}++; dbg( "Excluding meta test $rulename; unsolved meta dependencies: "
. join(", ", grep($metas{$_},@{ $rule_deps{$rulename} })));
}
if (defined &{'_meta_tests_'.$clean_priority}) {
undef &{'_meta_tests_'.$clean_priority};
}
return unless ($evalstr);
$evalstr = <<"EOT";
{
package Mail::SpamAssassin::PerMsgStatus;
sub _meta_tests_$clean_priority {
my (\$self) = \@_;
$evalstr;
}
1;
}
EOT
eval $evalstr;
if ($@) {
warn "Failed to run meta SpamAssassin tests, skipping some: $@\n";
$self->{rule_errors}++;
}
else {
no strict "refs";
&{'Mail::SpamAssassin::PerMsgStatus::_meta_tests_'.$clean_priority}($self);
use strict "refs";
}
}
sub run_eval_tests {
my ($self, $evalhash, $prepend2desc, @extraevalargs) = @_;
local ($_);
my $debugenabled = $Mail::SpamAssassin::DEBUG->{enabled};
my $scoreset = $self->{conf}->get_score_set();
while (my ($rulename, $test) = each %{$evalhash}) {
next unless ($self->{conf}->{scores}->{$rulename});
next if (exists $self->{conf}->{tflags}->{$rulename} &&
(($scoreset & 1) == 0) &&
$self->{conf}->{tflags}->{$rulename} =~ /\bnet\b/);
next if (exists $self->{conf}->{tflags}->{$rulename} &&
(($scoreset & 2) == 0) &&
$self->{conf}->{tflags}->{$rulename} =~ /\bbayes\b/);
my $score = $self->{conf}{scores}{$rulename};
my $result;
$self->{test_log_msgs} = ();
my ($function, @args) = @{$test};
unshift(@args, @extraevalargs);
if (!$self->can ($function)) {
my $pluginobj = $self->{conf}->{eval_plugins}->{$function};
if ($pluginobj) {
$self->register_plugin_eval_glue ($pluginobj, $function);
} else {
dbg ("no method found for eval test $function");
}
}
$self->{current_rule_name} = $rulename;
eval {
$result = $self->$function(@args);
};
if ($@) {
warn "Failed to run $rulename SpamAssassin test, skipping:\n".
"\t($@)\n";
$self->{rule_errors}++;
next;
}
if ($result) {
$self->got_hit ($rulename, $prepend2desc);
dbg("Ran run_eval_test rule $rulename ======> got hit", "rulesrun", 32) if $debugenabled;
} else {
}
}
}
sub register_plugin_eval_glue {
my ($self, $pluginobj, $function) = @_;
dbg ("registering glue method for $function ($pluginobj)");
my $evalstr = <<"ENDOFEVAL";
{
package Mail::SpamAssassin::PerMsgStatus;
sub $function {
my (\$self) = shift;
my \$plugin = \$self->{conf}->{eval_plugins}->{$function};
return \$plugin->$function (\$self, \@_);
}
1;
}
ENDOFEVAL
eval $evalstr;
if ($@) {
warn "Failed to run header SpamAssassin tests, skipping some: $@\n";
$self->{rule_errors}++;
}
}
sub run_rbl_eval_tests {
my ($self, $evalhash) = @_;
my ($rulename, $pat, @args);
local ($_);
if ($self->{main}->{local_tests_only}) {
dbg ("local tests only, ignoring RBL eval", "rulesrun", 32);
return 0;
}
my $debugenabled = $Mail::SpamAssassin::DEBUG->{enabled};
while (my ($rulename, $test) = each %{$evalhash}) {
my $score = $self->{conf}->{scores}->{$rulename};
next unless $score;
$self->{test_log_msgs} = ();
my ($function, @args) = @{$test};
my $result;
eval {
$result = $self->$function($rulename, @args);
};
if ($@) {
warn "Failed to run $rulename RBL SpamAssassin test, skipping:\n".
"\t($@)\n";
$self->{rule_errors}++;
next;
}
}
}
sub got_pattern_hit {
my ($self, $rulename, $prefix) = @_;
return if (defined $self->{tests_already_hit}->{$rulename});
$self->got_hit ($rulename, $prefix);
}
=item $status->clear_test_state()
Clear test state, including test log messages from C<$status-E<gt>test_log()>.
=cut
sub clear_test_state {
my ($self) = @_;
$self->{test_log_msgs} = ();
}
sub _handle_hit {
my ($self, $rule, $score, $area, $desc) = @_;
if ($rule =~ /^__/) { push(@{$self->{subtest_names_hit}}, $rule); return; }
$self->{score} += $score;
push(@{$self->{test_names_hit}}, $rule);
$area ||= '';
if ($score >= 10 || $score <= -10) {
$score = sprintf("%4.0f", $score);
}
else {
$score = sprintf("%4.1f", $score);
}
$self->{tag_data}->{REPORT} .= sprintf ("* %s %s %s%s\n%s",
$score, $rule, $area, $desc,
($self->{test_log_msgs}->{TERSE} ?
"* " . $self->{test_log_msgs}->{TERSE} : '')
);
$self->{tag_data}->{SUMMARY} .= sprintf ("%s %-22s %s%s\n%s",
$score, $rule, $area, $desc,
($self->{test_log_msgs}->{LONG} || ''));
$self->{test_log_msgs} = (); }
sub handle_hit {
my ($self, $rule, $area, $deffallbackdesc) = @_;
my $desc = $self->{conf}->{descriptions}->{$rule};
$desc ||= $deffallbackdesc;
$desc ||= $rule;
my $score = $self->{conf}->{scores}->{$rule};
$self->_handle_hit($rule, $score, $area, $desc);
}
sub got_hit {
my ($self, $rule, $prepend2desc) = @_;
$self->{tests_already_hit}->{$rule} = 1;
my $txt = $self->{conf}->{full_tests}->{$rule};
$txt ||= $self->{conf}->{full_evals}->{$rule};
$txt ||= $self->{conf}->{head_tests}->{$rule};
$txt ||= $self->{conf}->{body_tests}->{$rule};
$self->handle_hit ($rule, $prepend2desc, $txt);
}
sub test_log {
my ($self, $msg) = @_;
while ($msg =~ s/^(.{30,48})\s//) {
$self->_test_log_line ($1);
}
$self->_test_log_line ($msg);
}
sub _test_log_line {
my ($self, $msg) = @_;
$self->{test_log_msgs}->{TERSE} .= sprintf ("[%s]\n", $msg);
if (length($msg) > 47) {
$self->{test_log_msgs}->{LONG} .= sprintf ("%78s\n", "[$msg]");
} else {
$self->{test_log_msgs}->{LONG} .= sprintf ("%27s [%s]\n", "", $msg);
}
}
sub get_envelope_from {
my ($self) = @_;
my $envf;
$envf = $self->{conf}->{envelope_sender_header};
if ((defined $envf) && ($envf = $self->get($envf)) && ($envf =~ /\@/)) {
goto ok;
}
if ($self->get ("X-Sender") =~ /\@/) {
my $rcvd = join (' ', $self->get ("Received"));
if ($rcvd =~ /\(fetchmail/) {
dbg ("X-Sender and fetchmail signatures found, cannot trust envelope-from");
return undef;
}
}
if ($envf = $self->get ("X-Envelope-From")) {
if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nX-Envelope-From:\s/s) {
dbg ("X-Envelope-From header found after 1 or more Received lines, cannot trust envelope-from");
} else {
goto ok;
}
}
if ($envf = $self->get ("Envelope-Sender")) {
if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nEnvelope-Sender:\s/s) {
dbg ("Envelope-Sender header found after 1 or more Received lines, cannot trust envelope-from");
} else {
goto ok;
}
}
if ($envf = $self->get ("Return-Path")) {
if ($self->get ("ALL") =~ /(?:^|\n)Received:\s.*\nReturn-Path:\s/s) {
dbg ("Return-Path header found after 1 or more Received lines, cannot trust envelope-from");
} else {
goto ok;
}
}
return undef;
ok:
$envf =~ s/^<*//gs; # remove <
$envf =~ s/>*\s*$//gs; # remove >, whitespace, newlines
return $envf;
}
sub dbg { Mail::SpamAssassin::dbg (@_); }
sub sa_die { Mail::SpamAssassin::sa_die (@_); }
=item $status->create_fulltext_tmpfile (fulltext_ref)
This function creates a temporary file containing the passed scalar
reference data (typically the full/pristine text of the message).
This is typically used by external programs like pyzor and dccproc, to
avoid hangs due to buffering issues. Methods that need this, should
call $self->create_fulltext_tmpfile($fulltext) to retrieve the temporary
filename; it will be created if it has not already been.
Note: This can only be called once until $status->delete_fulltext_tmpfile() is
called.
=cut
sub create_fulltext_tmpfile {
my ($self, $fulltext) = @_;
if (defined $self->{fulltext_tmpfile}) {
return $self->{fulltext_tmpfile};
}
my ($tmpf, $tmpfh) = Mail::SpamAssassin::Util::secure_tmpfile();
print $tmpfh $$fulltext;
close $tmpfh;
$self->{fulltext_tmpfile} = $tmpf;
return $self->{fulltext_tmpfile};
}
=item $status->delete_fulltext_tmpfile ()
Will cleanup after a $status->create_fulltext_tmpfile() call. Deletes the
temporary file and uncaches the filename.
=cut
sub delete_fulltext_tmpfile {
my ($self) = @_;
if (defined $self->{fulltext_tmpfile}) {
unlink $self->{fulltext_tmpfile};
$self->{fulltext_tmpfile} = undef;
}
}
1;
__END__
=back
=head1 SEE ALSO
C<Mail::SpamAssassin>
C<spamassassin>