parse-rules-for-masses [plain text]
use strict;
sub usage {
die "
parse-rules-for-masses: parse the SpamAssassin rules files for mass-checks,
evolving, and frequency analysis
usage: ./parse-rules-for-masses [-d rulesdir] [-o outputfile] [-s scoreset]
rulesdir defaults to ../rules
outputfile defaults to ./tmp/rules.pl
scoreset default to 0
";
}
use Getopt::Long;
use Data::Dumper;
use vars qw(@rulesdirs $outputfile $scoreset);
GetOptions (
"d=s" => \@rulesdirs,
"o=s" => \$outputfile,
"s=i" => \$scoreset,
"help|h|?" => sub { usage(); } );
if ($ @rulesdirs = ("../rules");
}
if (!defined $outputfile) {
$outputfile = "./tmp/rules.pl";
mkdir ("tmp", 0755);
}
$scoreset = 0 if ( !defined $scoreset );
my $rules = { };
readrules(@rulesdirs);
my $scores = { };
foreach my $key (keys %{$rules}) {
$scores->{$key} = $rules->{$key}->{score};
}
writerules($outputfile);
exit;
sub readrules {
foreach my $indir (@_) {
my @files = <$indir/[0-9]*.cf>;
my $file;
my $scores_mutable = 1;
my %rulesfound = ();
my %langs = ();
foreach $file (sort @files) {
open (IN, "<$file");
while (<IN>)
{
if (/<\/gen:mutable>/i) {
$scores_mutable = 0;
}
elsif (/<gen:mutable>/i) {
$scores_mutable = 1;
}
s/
my $lang = '';
if (s/^lang\s+(\S+)\s+//) {
$lang = $1;
}
if (/^(header|rawbody|body|full|uri|meta)\s+(\S+)\s+/) {
my $type = $1;
my $name = $2;
$rules->{$name} ||= { };
$rules->{$name}->{type} = $type;
$rules->{$name}->{lang} = $lang;
$rules->{$name}->{issubrule} = ($name =~ /^__/) ? '1' : '0';
$rules->{$name}->{tflags} = '';
} elsif (/^describe\s+(\S+)\s+(.+)$/) {
$rules->{$1} ||= { };
if ($lang) {
$rules->{$1}->{describe} ||= $2;
}
else {
$rules->{$1}->{describe} = $2;
}
} elsif (/^tflags\s+(\S+)\s+(.+)$/) {
$rules->{$1} ||= { };
$rules->{$1}->{tflags} = $2;
} elsif (/^score\s+(\S+)\s+(.+)$/) {
my($name,$score) = ($1,$2);
$rules->{$name} ||= { };
if ( $score =~ /\s/ ) { ($score) = (split(/\s+/,$score))[$scoreset];
}
$rules->{$name}->{score} = $score;
$rules->{$name}->{mutable} = $scores_mutable;
}
}
close IN;
}
}
foreach my $rule (keys %{$rules}) {
if (!defined $rules->{$rule}->{type}) {
delete $rules->{$rule}; next;
}
if (!defined $rules->{$rule}->{score}) {
my $def = 1.0;
if ($rule =~ /^T_/) { $def = 0.01; }
if ($rules->{$rule}->{tflags} =~ /nice/) {
$rules->{$rule}->{score} = -$def;
} else {
$rules->{$rule}->{score} = $def;
}
$rules->{$rule}->{mutable} = 1;
}
}
}
sub writerules {
my $outfile = shift;
system ("mkdir -p $outfile 2>/dev/null ; rmdir $outfile 2>/dev/null");
open (OUT, ">$outfile") or die "cannot write to $outfile";
print OUT "# dumped at ".`date`."\n";
$Data::Dumper::Purity = 1;
print OUT Data::Dumper->Dump ([$rules, $scores], ['*rules', '*scores']);
print OUT "1;";
close OUT;
}