=head1 NAME
Mail::SpamAssassin::Plugin::Rule2XSBody - speed up SpamAssassin by compiling regexps
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::Rule2XSBody
=head1 DESCRIPTION
This plugin will use native-code object files representing the ruleset,
in order to provide significant speedups in rule evaluation.
Note that C<sa-compile> must be run in advance, in order to compile the
ruleset using C<re2c> and the C compiler. See the C<sa-compile>
documentation for more details.
=cut
package Mail::SpamAssassin::Plugin::Rule2XSBody;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use Mail::SpamAssassin::Plugin::OneLineBodyRuleType;
use strict;
use warnings;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->{one_line_body} = Mail::SpamAssassin::Plugin::OneLineBodyRuleType->new();
return $self;
}
sub finish_parsing_end {
my ($self, $params) = @_;
my $conf = $params->{conf};
my $instdir = $conf->{main}->sed_path
('__local_state_dir__/compiled/__version__');
unshift @INC, $instdir, "$instdir/auto";
dbg "zoom: loading compiled ruleset from $instdir";
$self->setup_test_set ($conf, $conf->{body_tests}, 'body');
}
sub compile_now_start {
my ($self) = @_;
if (exists $self->{compiled_rules_log_msg_text}) {
info("zoom: $self->{compiled_rules_log_msg_text}") unless (would_log('dbg', 'zoom'));
delete $self->{compiled_rules_log_msg_text};
}
}
sub setup_test_set {
my ($self, $conf, $test_set, $ruletype) = @_;
foreach my $pri (keys %{$test_set}) {
my $nicepri = $pri; $nicepri =~ s/-/neg/g;
$self->setup_test_set_pri($conf, $test_set->{$pri}, $ruletype.'_'.$nicepri);
}
}
sub setup_test_set_pri {
my ($self, $conf, $rules, $ruletype) = @_;
my $modname = "Mail::SpamAssassin::CompiledRegexps::".$ruletype;
my $modpath = "Mail/SpamAssassin/CompiledRegexps/".$ruletype.".pm";
my $hasrules;
if (!eval qq{ use $modname; \$hasrules = \$${modname}::HAS_RULES; 1; }) {
dbg "zoom: no compiled ruleset found for $modname";
return 0;
}
my $file;
foreach my $dir (@INC) {
my $try = $dir."/".$modpath;
if (-f $try && -r $try) { $file = $try; last; }
}
dbg "zoom: using compiled ruleset in $file for $modname";
$conf->{skip_body_rules} ||= { };
$conf->{need_one_line_sub} ||= { };
my $found = 0;
foreach my $name (keys %{$rules}) {
my $rule = $rules->{$name};
my $comprule = $hasrules->{$name};
$rule =~ s/\
if (!$comprule) {
next;
}
if ($comprule ne $rule) {
dbg "zoom: skipping rule $name, code differs in compiled ruleset";
next;
}
if ($conf->{rules_to_replace}->{$name}) {
dbg "zoom: skipping rule $name, ReplaceTags";
next;
}
$conf->{skip_body_rules}->{$name} = 1;
$conf->{generate_body_one_line_sub}->{$name} = 1;
$found++;
}
if ($found) {
my $totalhasrules = scalar keys %{$hasrules};
my $pc_zoomed = ($found / ($totalhasrules || .001)) * 100;
$pc_zoomed = int($pc_zoomed * 1000) / 1000;
$self->{compiled_rules_log_msg_text} = "able to use $found/".
"$totalhasrules '$ruletype' compiled rules ($pc_zoomed\%)";
dbg("zoom: $self->{compiled_rules_log_msg_text}");
$conf->{zoom_ruletypes_available} ||= { };
$conf->{zoom_ruletypes_available}->{$ruletype} = 1;
return 1;
}
else {
dbg("zoom: no usable compiled rules for type $ruletype");
}
return 0;
}
sub check_start {
my ($self, $params) = @_;
$self->{one_line_body}->check_start($params);
}
sub check_rules_at_priority {
my ($self, $params) = @_;
$self->{one_line_body}->check_rules_at_priority($params);
}
sub run_body_fast_scan {
my ($self, $params) = @_;
return unless ($params->{ruletype} eq 'body');
my $nicepri = $params->{priority}; $nicepri =~ s/-/neg/g;
my $ruletype = ($params->{ruletype}.'_'.$nicepri);
my $scanner = $params->{permsgstatus};
my $conf = $scanner->{conf};
return unless $conf->{zoom_ruletypes_available}->{$ruletype};
dbg("zoom: run_body_fast_scan for $ruletype start");
my $do_dbg = (would_log('dbg', 'zoom') > 1);
my $scoresptr = $conf->{scores};
my $modname = "Mail::SpamAssassin::CompiledRegexps::".$ruletype;
{
no strict "refs";
foreach my $line (@{$params->{lines}})
{
my $results = &{$modname.'::scan'}(lc $line);
my %alreadydone = ();
foreach my $rulename (@{$results})
{
next if exists $alreadydone{$rulename};
$alreadydone{$rulename} = undef;
next unless $scoresptr->{$rulename};
my $fn = 'Mail::SpamAssassin::Plugin::Check::'.
$rulename.'_one_line_body_test';
if (defined &{$fn}) {
if (!&{$fn} ($scanner, $line) && $do_dbg) {
$self->{rule2xs_misses}->{$rulename}++;
}
}
}
}
use strict "refs";
}
dbg("zoom: run_body_fast_scan for $ruletype done");
}
sub finish {
my ($self) = @_;
my $do_dbg = (would_log('dbg', 'zoom') > 1);
return unless $do_dbg;
my $miss = $self->{rule2xs_misses};
foreach my $r (sort { $miss->{$a} <=> $miss->{$b} } keys %{$miss}) {
dbg "zoom: ".$miss->{$r}." misses for rule2xs rule $r\n";
}
}
1;