# <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::PerMsgStatus - per-message status (spam or not-spam) =head1 SYNOPSIS my $spamtest = new Mail::SpamAssassin ({ 'rules_filename' => '/etc/spamassassin.rules', 'userprefs_filename' => $ENV{HOME}.'/.spamassassin/user_prefs' }); my $mail = $spamtest->parse(); my $status = $spamtest->check ($mail); my $rewritten_mail; if ($status->is_spam()) { $rewritten_mail = $status->rewrite_mail (); } ... =head1 DESCRIPTION The Mail::SpamAssassin C method returns an object of this class. This object encapsulates all the per-message state. =head1 METHODS =over 4 =cut package Mail::SpamAssassin::PerMsgStatus; use strict; use warnings; use Mail::SpamAssassin::Constants qw(:sa); use Mail::SpamAssassin::AsyncLoop; use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::Logger; use vars qw{ @ISA @TEMPORARY_METHODS %TEMPORARY_EVAL_GLUE_METHODS }; @ISA = qw(); # methods defined by the compiled ruleset; deleted in finish_tests() @TEMPORARY_METHODS = (); # methods defined by register_plugin_eval_glue(); deleted in finish_tests() %TEMPORARY_EVAL_GLUE_METHODS = (); ########################################################################### sub new { my $class = shift; $class = ref($class) || $class; my ($main, $msg, $opts) = @_; my $self = { 'main' => $main, 'msg' => $msg, 'score' => 0, 'test_logs' => '', 'test_names_hit' => [ ], 'subtest_names_hit' => [ ], 'spamd_result_log_items' => [ ], 'tests_already_hit' => { }, 'c' => { }, 'rule_errors' => 0, 'disable_auto_learning' => 0, 'auto_learn_status' => undef, 'conf' => $main->{conf}, 'async' => Mail::SpamAssassin::AsyncLoop->new($main) }; #$self->{main}->{use_rule_subs} = 1; if (defined $opts && $opts->{disable_auto_learning}) { $self->{disable_auto_learning} = 1; } # used with "mass-check --loghits" if ($self->{main}->{save_pattern_hits}) { $self->{save_pattern_hits} = 1; $self->{pattern_hits} = { }; } delete $self->{should_log_rule_hits}; my $dbgcache = would_log('dbg', 'rules'); if ($dbgcache || $self->{save_pattern_hits}) { $self->{should_log_rule_hits} = 1; } bless ($self, $class); $self; } ########################################################################### =item $status->check () Runs the SpamAssassin rules against the message pointed to by the object. =cut sub check { my ($self) = @_; local ($_); $self->{learned_points} = 0; $self->{body_only_points} = 0; $self->{head_only_points} = 0; $self->{score} = 0; $self->{main}->call_plugins ("check_start", { permsgstatus => $self }); # in order of slowness; fastest first, slowest last. # we do ALL the tests, even if a spam triggers lots of them early on. # this lets us see ludicrously spammish mails (score: 40) etc., which # we can then immediately submit to spamblocking services. # # TODO: change this to do whitelist/blacklists first? probably a plan # NOTE: definitely need AWL stuff last, for regression-to-mean of score # TVD: we may want to do more than just clearing out the headers, but ... $self->{msg}->delete_header('X-Spam-.*'); # Resident Mail::SpamAssassin code will possibly never change score # sets, even if bayes becomes available. So we should do a quick check # to see if we should go from {0,1} to {2,3}. We of course don't need # to do this switch if we're already using bayes ... ;) my $set = $self->{conf}->get_score_set(); if (($set & 2) == 0 && $self->{main}->{bayes_scanner} && $self->{main}->{bayes_scanner}->is_scan_available()) { dbg("check: scoreset $set but bayes is available, switching scoresets"); $self->{conf}->set_score_set ($set|2); } # The primary check functionality occurs via a plugin call. For more # information, please see: Mail::SpamAssassin::Plugin::Check if (!$self->{main}->call_plugins ("check_main", { permsgstatus => $self })) { # did anything happen? if not, this is fatal if (!$self->{main}->have_plugin("check_main")) { die "check: no loaded plugin implements 'check_main': cannot scan!"; } } # delete temporary storage and memory allocation used during checking $self->delete_fulltext_tmpfile(); # now that we've finished checking the mail, clear out this cache # to avoid unforeseen side-effects. $self->{c} = { }; # Round the score to 3 decimal places to avoid rounding issues # We assume required_score to be properly rounded already. # add 0 to force it back to numeric representation instead of string. $self->{score} = (sprintf "%0.3f", $self->{score}) + 0; dbg("check: is spam? score=".$self->{score}. " required=".$self->{conf}->{required_score}); dbg("check: tests=".$self->get_names_of_tests_hit()); dbg("check: subtests=".$self->get_names_of_subtests_hit()); $self->{is_spam} = $self->is_spam(); $self->{main}->{resolver}->bgabort(); $self->{main}->call_plugins ("check_end", { permsgstatus => $self }); 1; } ########################################################################### =item $status->learn() After a mail message has been checked, this method can be called. If the score is outside a certain range around the threshold, ie. if the message is judged more-or-less definitely spam or definitely non-spam, it will be fed into SpamAssassin's learning systems (currently the naive Bayesian classifier), so that future similar mails will be caught. =cut sub learn { my ($self) = @_; if (!$self->{conf}->{bayes_auto_learn} || !$self->{conf}->{use_bayes} || $self->{disable_auto_learning}) { $self->{auto_learn_status} = "disabled"; return; } my $isspam = $self->{main}->call_plugins ("autolearn_discriminator", { permsgstatus => $self }); if (!defined $isspam) { $self->{auto_learn_status} = 'no'; return; } $self->{main}->call_plugins ("autolearn", { permsgstatus => $self, isspam => $isspam }); # bug 3704: temporarily override learn's ability to re-learn a message my $orig_learner = $self->{main}->init_learner({ "no_relearn" => 1 }); eval { my $learnstatus = $self->{main}->learn ($self->{msg}, undef, $isspam, 0); if ($learnstatus->did_learn()) { $self->{auto_learn_status} = $isspam ? "spam" : "ham"; } # This must wait until the did_learn call. $learnstatus->finish(); $self->{main}->finish_learner(); # for now if (exists $self->{main}->{bayes_scanner}) { $self->{main}->{bayes_scanner}->sanity_check_is_untied(); } }; # reset learner options to their original values $self->{main}->init_learner($orig_learner); if ($@) { dbg("learn: auto-learning failed: $@"); $self->{auto_learn_status} = "failed"; } } =item $score = $status->get_autolearn_points() Return the message's score as computed for auto-learning. Certain tests are ignored: - rules with tflags set to 'learn' (the Bayesian rules) - rules with tflags set to 'userconf' (user white/black-listing rules, etc) - rules with tflags set to 'noautolearn' Also note that auto-learning occurs using scores from either scoreset 0 or 1, depending on what scoreset is used during message check. It is likely that the message check and auto-learn scores will be different. =cut sub get_autolearn_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{autolearn_points}; } =item $score = $status->get_head_only_points() Return the message's score as computed for auto-learning, ignoring all rules except for header-based ones. =cut sub get_head_only_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{head_only_points}; } =item $score = $status->get_learned_points() Return the message's score as computed for auto-learning, ignoring all rules except for learning-based ones. =cut sub get_learned_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{learned_points}; } =item $score = $status->get_body_only_points() Return the message's score as computed for auto-learning, ignoring all rules except for body-based ones. =cut sub get_body_only_points { my ($self) = @_; $self->_get_autolearn_points(); return $self->{body_only_points}; } sub _get_autolearn_points { my ($self) = @_; return if (exists $self->{autolearn_points}); # ensure it only gets computed once, even if we return early $self->{autolearn_points} = 0; # This function needs to use use sum($score[scoreset % 2]) not just {score}. # otherwise we shift what we autolearn on and it gets really wierd. - tvd my $orig_scoreset = $self->{conf}->get_score_set(); my $new_scoreset = $orig_scoreset; my $scores = $self->{conf}->{scores}; if (($orig_scoreset & 2) == 0) { # we don't need to recompute dbg("learn: auto-learn: currently using scoreset $orig_scoreset"); } else { $new_scoreset = $orig_scoreset & ~2; dbg("learn: auto-learn: currently using scoreset $orig_scoreset, recomputing score based on scoreset $new_scoreset"); $scores = $self->{conf}->{scoreset}->[$new_scoreset]; } my $tflags = $self->{conf}->{tflags}; my $points = 0; # Just in case this function is called multiple times, clear out the # previous calculated values $self->{learned_points} = 0; $self->{body_only_points} = 0; $self->{head_only_points} = 0; foreach my $test (@{$self->{test_names_hit}}) { # According to the documentation, noautolearn, userconf, and learn # rules are ignored for autolearning. if (exists $tflags->{$test}) { next if $tflags->{$test} =~ /\bnoautolearn\b/; next if $tflags->{$test} =~ /\buserconf\b/; # Keep track of the learn points for an additional autolearn check. # Use the original scoreset since it'll be 0 in sets 0 and 1. if ($tflags->{$test} =~ /\blearn\b/) { # we're guaranteed that the score will be defined $self->{learned_points} += $self->{conf}->{scoreset}->[$orig_scoreset]->{$test}; next; } } # ignore tests with 0 score in this scoreset next if ($scores->{$test} == 0); # Go ahead and add points to the proper locations if (!$self->{conf}->maybe_header_only ($test)) { $self->{body_only_points} += $scores->{$test}; } if (!$self->{conf}->maybe_body_only ($test)) { $self->{head_only_points} += $scores->{$test}; } $points += $scores->{$test}; } # Figure out the final value we'll use for autolearning $points = (sprintf "%0.3f", $points) + 0; dbg("learn: auto-learn: message score: ".$self->{score}.", computed score for autolearn: $points"); $self->{autolearn_points} = $points; } ########################################################################### =item $isspam = $status->is_spam () After a mail message has been checked, this method can be called. It will return 1 for mail determined likely to be spam, 0 if it does not seem spam-like. =cut sub is_spam { my ($self) = @_; # changed to test this so sub-tests can ask "is_spam" during a run return ($self->{score} >= $self->{conf}->{required_score}); } ########################################################################### =item $list = $status->get_names_of_tests_hit () After a mail message has been checked, this method can be called. It will return a comma-separated string, listing all the symbolic test names of the tests which were trigged by the mail. =cut sub get_names_of_tests_hit { my ($self) = @_; return join(',', sort(@{$self->{test_names_hit}})); } ########################################################################### =item $list = $status->get_names_of_subtests_hit () After a mail message has been checked, this method can be called. It will return a comma-separated string, listing all the symbolic test names of the meta-rule sub-tests which were trigged by the mail. Sub-tests are the normally-hidden rules, which score 0 and have names beginning with two underscores, used in meta rules. =cut sub get_names_of_subtests_hit { my ($self) = @_; return join(',', sort(@{$self->{subtest_names_hit}})); } ########################################################################### =item $num = $status->get_score () After a mail message has been checked, this method can be called. It will return the message's score. =cut sub get_score { my ($self) = @_; return $self->{score}; } # left as backward compatibility sub get_hits { my ($self) = @_; return $self->{score}; } ########################################################################### =item $num = $status->get_required_score () After a mail message has been checked, this method can be called. It will return the score required for a mail to be considered spam. =cut sub get_required_score { my ($self) = @_; return $self->{conf}->{required_score}; } # left as backward compatibility sub get_required_hits { my ($self) = @_; return $self->{conf}->{required_score}; } ########################################################################### =item $num = $status->get_autolearn_status () After a mail message has been checked, this method can be called. It will return one of the following strings depending on whether the mail was auto-learned or not: "ham", "no", "spam", "disabled", "failed", "unavailable". =cut sub get_autolearn_status { my ($self) = @_; return ($self->{auto_learn_status} || "unavailable"); } ########################################################################### =item $report = $status->get_report () Deliver a "spam report" on the checked mail message. This contains details of how many spam detection rules it triggered. The report is returned as a multi-line string, with the lines separated by C<\n> characters. =cut sub get_report { my ($self) = @_; if (!exists $self->{'report'}) { my $report; $report = $self->{conf}->{report_template}; $report ||= '(no report template found)'; $report = $self->_replace_tags($report); $report =~ s/\n*$/\n\n/s; $self->{report} = $report; } return $self->{report}; } ########################################################################### =item $preview = $status->get_content_preview () Give a "preview" of the content. This is returned as a multi-line string, with the lines separated by C<\n> characters, containing a fully-decoded, safe, plain-text sample of the first few lines of the message body. =cut sub get_content_preview { my ($self) = @_; my $str = ''; my $ary = $self->get_decoded_stripped_body_text_array(); shift @{$ary}; # drop the subject line my $numlines = 3; while (length ($str) < 200 && @{$ary} && $numlines-- > 0) { $str .= shift @{$ary}; } undef $ary; chomp ($str); $str .= " [...]\n"; # in case the last line was huge, trim it back to around 200 chars $str =~ s/^(.{,200}).*$/$1/gs; # now, some tidy-ups that make things look a bit prettier $str =~ s/-----Original Message-----.*$//gs; $str =~ s/This is a multi-part message in MIME format\.//gs; $str =~ s/[-_\*\.]{10,}//gs; $str =~ s/\s+/ /gs; # add "Content preview:" ourselves, so that the text aligns # correctly with the template -- then trim it off. We don't # have to get this *exactly* right, but it's nicer if we # make a bit of an effort ;) $str = Mail::SpamAssassin::Util::wrap($str, " ", "Content preview: ", 75, 1); $str =~ s/^Content preview:\s+//gs; return $str; } ########################################################################### =item $msg = $status->get_message() Return the object representing the message being scanned. =cut sub get_message { my ($self) = @_; return $self->{msg}; } ########################################################################### =item $status->rewrite_mail () Rewrite the mail message. This will at minimum add headers, and at maximum MIME-encapsulate the message text, to reflect its spam or not-spam status. The function will return a scalar of the rewritten message. The actual modifications depend on the configuration (see C for more information). The possible modifications are as follows: =over 4 =item To:, From: and Subject: modification on spam mails Depending on the configuration, the To: and From: lines can have a user-defined RFC 2822 comment appended for spam mail. The subject line may have a user-defined string prepended to it for spam mail. =item X-Spam-* headers for all mails Depending on the configuration, zero or more headers with names beginning with C will be added to mail depending on whether it is spam or ham. =item spam message with report_safe If report_safe is set to true (1), then spam messages are encapsulated into their own message/rfc822 MIME attachment without any modifications being made. If report_safe is set to false (0), then the message will only have the above headers added/modified. =back =cut sub rewrite_mail { my ($self) = @_; my $msg = $self->{msg}->get_mbox_separator() || ''; if ($self->{is_spam} && $self->{conf}->{report_safe}) { $msg .= $self->rewrite_report_safe(); } else { $msg .= $self->rewrite_no_report_safe(); } return $msg; } # Make the line endings in the passed string reference appropriate # for the original mail. Callers must note bug 5250: don't rewrite # the message body, since that will corrupt 8bit attachments/MIME parts. # sub _fixup_report_line_endings { my ($self, $strref) = @_; if ($self->{msg}->{line_ending} ne "\n") { $$strref =~ s/\r?\n/$self->{msg}->{line_ending}/gs; } } # rewrite the message in report_safe mode # should not be called directly, use rewrite_mail instead # sub rewrite_report_safe { my ($self) = @_; # This is the original message. We do not want to make any modifications so # we may recover it if necessary. It will be put into the new message as a # message/rfc822 MIME part. my $original = $self->{msg}->get_pristine(); # This is the new message. my $newmsg = ''; # the report charset my $report_charset = "; charset=iso-8859-1"; if ($self->{conf}->{report_charset}) { $report_charset = "; charset=" . $self->{conf}->{report_charset}; } # the SpamAssassin report my $report = $self->get_report(); # If there are any wide characters, need to MIME-encode in UTF-8 # TODO: If $report_charset is something other than iso-8859-1/us-ascii, then # we could try converting to that charset if possible unless ($] < 5.008 || utf8::downgrade($report, 1)) { $report_charset = "; charset=utf-8"; utf8::encode($report); } # get original headers, "pristine" if we can do it my $from = $self->{msg}->get_pristine_header("From"); my $to = $self->{msg}->get_pristine_header("To"); my $cc = $self->{msg}->get_pristine_header("Cc"); my $subject = $self->{msg}->get_pristine_header("Subject"); my $msgid = $self->{msg}->get_pristine_header('Message-Id'); my $date = $self->{msg}->get_pristine_header("Date"); # It'd be nice to do this with a foreach loop, but with only three # possibilities right now, it's easier not to... if ($self->{conf}->{rewrite_header}->{Subject}) { $subject ||= "\n"; my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{Subject}); $tag =~ s/\n/ /gs; # strip tag's newlines $subject =~ s/^(?:\Q${tag}\E )?/${tag} /g; # For some reason the tag may already be there!? } if ($self->{conf}->{rewrite_header}->{To}) { $to ||= "\n"; my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{To}); $tag =~ s/\n/ /gs; # strip tag's newlines $to =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; } if ($self->{conf}->{rewrite_header}->{From}) { $from ||= "\n"; my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{From}); $tag =~ s/\n+//gs; # strip tag's newlines $from =~ s/(?:\t\Q(${tag})\E)?$/\t(${tag})/; } # add report headers to message $newmsg .= "From: $from" if $from; $newmsg .= "To: $to" if $to; $newmsg .= "Cc: $cc" if $cc; $newmsg .= "Subject: $subject" if $subject; $newmsg .= "Date: $date" if $date; $newmsg .= "Message-Id: $msgid" if $msgid; foreach my $header (keys %{$self->{conf}->{headers_spam}}) { my $data = $self->{conf}->{headers_spam}->{$header}; my $line = $self->_process_header($header,$data); $line = $self->qp_encode_header($line); $newmsg .= "X-Spam-$header: $line\n" # add even if empty } if (defined $self->{conf}->{report_safe_copy_headers}) { my %already_added = map { $_ => 1 } qw/from to cc subject date message-id/; foreach my $hdr (@{$self->{conf}->{report_safe_copy_headers}}) { next if exists $already_added{lc $hdr}; my @hdrtext = $self->{msg}->get_pristine_header($hdr); $already_added{lc $hdr}++; if (lc $hdr eq "received") { # add Received at the top ... my $rhdr = ""; foreach (@hdrtext) { $rhdr .= "$hdr: $_"; } $newmsg = "$rhdr$newmsg"; } else { foreach (@hdrtext) { $newmsg .= "$hdr: $_"; } } } } # jm: add a SpamAssassin Received header to note markup time etc. # emulates the fetchmail style. # tvd: do this after report_safe_copy_headers so Received will be done correctly $newmsg = "Received: from localhost by " . Mail::SpamAssassin::Util::fq_hostname() . "\n" . "\twith SpamAssassin (version " . Mail::SpamAssassin::Version() . ");\n" . "\t" . Mail::SpamAssassin::Util::time_to_rfc822_date() . "\n" . $newmsg; # MIME boundary my $boundary = "----------=_" . sprintf("%08X.%08X",time,int(rand(2 ** 32))); # ensure it's unique, so we can't be attacked this way while ($original =~ /^\Q${boundary}\E(?:--)?$/m) { $boundary .= "/".sprintf("%08X",int(rand(2 ** 32))); } # determine whether Content-Disposition should be "attachment" or "inline" my $disposition; my $ct = $self->{msg}->get_header("Content-Type"); if (defined $ct && $ct ne '' && $ct !~ m{text/plain}i) { $disposition = "attachment"; $report .= $self->_replace_tags($self->{conf}->{unsafe_report_template}); # if we wanted to defang the attachment, this would be the place } else { $disposition = "inline"; } my $type = "message/rfc822"; $type = "text/plain" if $self->{conf}->{report_safe} > 1; my $description = $self->{conf}->{'encapsulated_content_description'}; # Note: the message should end in blank line since mbox format wants # blank line at end and messages may be concatenated! In addition, the # x-spam-type parameter is fixed since we will use it later to recognize # original messages that can be extracted. $newmsg .= <<"EOM"; MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="$boundary" This is a multi-part message in MIME format. --$boundary Content-Type: text/plain$report_charset Content-Disposition: inline Content-Transfer-Encoding: 8bit $report --$boundary Content-Type: $type; x-spam-type=original Content-Description: $description Content-Disposition: $disposition Content-Transfer-Encoding: 8bit EOM my $newmsgtrailer = "\n--$boundary--\n\n"; # now fix line endings in both headers, report_safe body parts, # and new MIME boundaries and structure $self->_fixup_report_line_endings(\$newmsg); $self->_fixup_report_line_endings(\$newmsgtrailer); $newmsg .= $original.$newmsgtrailer; return $newmsg; } # rewrite the message in non-report_safe mode (just headers) # should not be called directly, use rewrite_mail instead # sub rewrite_no_report_safe { my ($self) = @_; # put the pristine headers into an array # skip the X-Spam- headers, but allow the X-Spam-Prev headers to remain. # since there may be a missing header/body # my @pristine_headers = split(/^/m, $self->{msg}->get_pristine_header()); for (my $line = 0; $line <= $#pristine_headers; $line++) { next unless ($pristine_headers[$line] =~ /^X-Spam-(?!Prev-)/i); splice @pristine_headers, $line, 1 while ($pristine_headers[$line] =~ /^(?:X-Spam-(?!Prev-)|\s+\S)/i); $line--; } my $separator = ''; if ($pristine_headers[$#pristine_headers] =~ /^\s*$/) { $separator = pop @pristine_headers; } my $addition = 'headers_ham'; if($self->{is_spam}) { # special-case: Subject lines. ensure one exists, if we're # supposed to mark it up. my $created_subject = 0; my $subject = $self->{msg}->get_pristine_header('Subject'); if (!defined($subject) && $self->{is_spam} && exists $self->{conf}->{rewrite_header}->{'Subject'}) { push(@pristine_headers, "Subject: \n"); $created_subject = 1; } # Deal with header rewriting foreach (@pristine_headers) { # if we're not going to do a rewrite, skip this header! next if (!/^(From|Subject|To):/i); my $hdr = ucfirst(lc($1)); next if (!defined $self->{conf}->{rewrite_header}->{$hdr}); # pop the original version onto the end of the header array if ($created_subject) { push(@pristine_headers, "X-Spam-Prev-Subject: (nonexistent)\n"); } else { push(@pristine_headers, "X-Spam-Prev-$_"); } # Figure out the rewrite piece my $tag = $self->_replace_tags($self->{conf}->{rewrite_header}->{$hdr}); $tag =~ s/\n/ /gs; # The tag should be a comment for this header ... $tag = "($tag)" if ($hdr =~ /^(?:From|To)$/); s/^([^:]+:)[ \t]*(?:\Q${tag}\E )?/$1 ${tag} /i; } $addition = 'headers_spam'; } # Break the pristine header set up into two blocks; "pre" is the stuff that # we want to ensure comes before any SpamAssassin markup headers, like the # Return-Path header (see bug 3409). # # "post" is all the rest of the message headers, placed after the # SpamAssassin markup hdrs. Once one of those headers is seen, all further # headers go into that set; it's assumed that it's an old copy of the # header, or attempted spoofing, if it crops up halfway through the # headers. my $new_hdrs_pre = ''; my $new_hdrs_post = ''; foreach my $hdr (@pristine_headers) { if ($new_hdrs_post eq '' && $hdr =~ /^Return-Path:/i) { $new_hdrs_pre .= $hdr; } else { $new_hdrs_post .= $hdr; } } # use string appends to put this back together -- I finally benchmarked it. # join() is 56% of the speed of just using string appends. ;) while (my ($header, $data) = each %{$self->{conf}->{$addition}}) { my $line = $self->_process_header($header,$data); $line = $self->qp_encode_header($line); $new_hdrs_pre .= "X-Spam-$header: $line\n"; } # fix up line endings appropriately my $newmsg = $new_hdrs_pre.$new_hdrs_post.$separator; $self->_fixup_report_line_endings(\$newmsg); return $newmsg.$self->{msg}->get_pristine_body(); } sub qp_encode_header { my ($self, $text) = @_; # do nothing unless there's an 8-bit char return $text unless ($text =~ /[\x80-\xff]/); my $cs = 'ISO-8859-1'; if ($self->{report_charset}) { $cs = $self->{report_charset}; } my @hexchars = split('', '0123456789abcdef'); my $ord; $text =~ s{([\x80-\xff])}{ $ord = ord $1; '='.$hexchars[($ord & 0xf0) >> 4].$hexchars[$ord & 0x0f] }ges; $text = '=?'.$cs.'?Q?'.$text.'?='; dbg("markup: encoding header in $cs: $text"); return $text; } sub _process_header { my ($self, $hdr_name, $hdr_data) = @_; $hdr_data = $self->_replace_tags($hdr_data); $hdr_data =~ s/(?:\r?\n)+$//; # make sure there are no trailing newlines ... if ($self->{conf}->{fold_headers}) { if ($hdr_data =~ /\n/) { $hdr_data =~ s/\s*\n\s*/\n\t/g; return $hdr_data; } else { # use '!!' instead of ': ' so it doesn't wrap on the space my $hdr = "X-Spam-$hdr_name!!$hdr_data"; $hdr = Mail::SpamAssassin::Util::wrap($hdr, "\t", "", 79, 0, '(?<=[\s,])'); $hdr =~ s/^\t\n//gm; return (split (/!!/, $hdr, 2))[1]; # just return the data part } } else { $hdr_data =~ s/\n/ /g; # Can't have newlines in headers, unless folded return $hdr_data; } } sub _replace_tags { my $self = shift; my $text = shift; # default to leaving the original string in place, if we cannot find # a tag for it (bug 4793) my $t; my $v; $text =~ s{(_(\w+?)(?:\((.*?)\))?_)}{ my $full = $1; my $tag = $2; my $result = $self->_get_tag($tag,$3); (defined $result) ? $result : $full; }ge; return $text; } sub bayes_report_make_list { my $self = shift; my $info = shift; my $param = shift || "5"; my ($limit,$fmt_arg,$more) = split /,/, $param; return "Tokens not available." unless defined $info; my %formats = ( short => '$t', Short => 'Token: \"$t\"', compact => '$p-$D--$t', Compact => 'Probability $p -declassification distance $D (\"+\" means > 9) --token: \"$t\"', medium => '$p-$D-$N--$t', long => '$p-$d--${h}h-${s}s--${a}d--$t', Long => 'Probability $p -declassification distance $D --in ${h} ham messages -and ${s} spam messages --$a} days old--token:\"$t\"' ); my $raw_fmt = (!$fmt_arg ? '$p-$D--$t' : $formats{$fmt_arg}); return "Invalid format, must be one of: ".join(",",keys %formats) unless defined $raw_fmt; my $fmt = '"'.$raw_fmt.'"'; my $amt = $limit < @$info ? $limit : @$info; return "" unless $amt; my $Bayes = $self->{main}{bayes_scanner}; return "Bayes not available" unless defined $Bayes; my $ns = $self->{bayes_nspam}; my $nh = $self->{bayes_nham}; my $digit = sub { $_[0] > 9 ? "+" : $_[0] }; my $now = time; join ', ', map { my($t,$prob,$s,$h,$u) = @$_; my $a = int(($now - $u)/(3600 * 24)); my $d = $Bayes->compute_declassification_distance($ns,$nh,$s,$h,$prob); my $p = sprintf "%.3f", $prob; my $n = $s + $h; my ($c,$o) = $prob < 0.5 ? ($h,$s) : ($s,$h); my ($D,$S,$H,$C,$O,$N) = map &$digit($_), ($d,$s,$h,$c,$o,$n); eval $fmt; ## no critic } @{$info}[0..$amt-1]; } ########################################################################### # public API for plugins =item $status->set_tag($tagname, $value) Set a template tag, as used in C, report templates, etc. This API is intended for use by plugins. Tag names will be converted to an all-uppercase representation internally. C<$value> can be a subroutine reference, which will be evaluated each time the template is expanded. Note that perl supports closures, which means that variables set in the caller's scope can be accessed inside this C. For example: my $text = "hello world!"; $status->set_tag("FOO", sub { return $text; }); See C's C