bayes_dump_to_trusted_networks [plain text]
=head1 NAME
bayes_dump_to_trusted_networks - generate configuration from Bayes database
=head1 SYNOPSIS
sa-learn --dump | bayes_dump_to_trusted_networks [opts] > trust.cf
bayes_dump_to_trusted_networks bayes.dump [opts] > trust.cf
options:
--minham n
--rdns
=head1 DESCRIPTION
This tool uses a dump of your Bayes database to determine which
IP addresses are 'trustworthy', and therefore should be listed in
'trusted_networks' lines in your configuration.
This will reduce unneccesary DNSBL lookups, will whitelist mails from
trustworthy sources, and allows several SpamAssassin rules to operate more
effectively.
A 'trustworthy' IP is one that is trusted not to B<forge> emails; in other
words, it's not a subverted machine running a proxy, or one under spammer
control.
As such, any IP that has relayed more than 3 ham mails is considered
trustworthy. It doesn't matter if it has ever relayed spam mails to you, since
large ISP smarthost relays will have done so -- relaying both ham and spam from
their customer pool. (The important thing is that it relayed the mail without
forging sender address information.)
=head1 OPTIONS
=over 4
=item --minham n
Require C<n> or more ham messages before considering an IP a candidate
for trust. Default: 3.
=item --rdns
Annotate with reverse-DNS for that IP address. Slows things down,
but easier to read.
=back
=head1 PREREQUISITES
C<Net::CIDR::Lite>
=cut
eval {
require Net::CIDR::Lite; };
die "Net::CIDR::Lite module is required to use this script. Aborting.\n".
"(error was: $@)\n" if $@;
use strict;
use vars qw{
$IP_ADDRESS $IP_IN_RESERVED_RANGE
};
$IP_ADDRESS = qr/\b (?:IPv6:)? (?: (?:0*:0*:ffff:(?:0*:)?)? (?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)\.
(?:1\d\d|2[0-4]\d|25[0-5]|\d\d|\d)
| [a-f0-9]{0,4} \:[a-f0-9]{0,4}
\:[a-f0-9]{0,4} \:[a-f0-9]{0,4}
\:[a-f0-9]{0,4} \:[a-f0-9]{0,4} (?:\:[a-f0-9]{0,4})*
)\b/ix;
$IP_IN_RESERVED_RANGE = qr{^(?:
192\.168| 10| 172\.(?:1[6-9]|2[0-9]|3[01])| 169\.254| 127| [01257]| 2[37]| 3[179]| 4[12]| 5[89]| 60| 7[0-9]| 9[0-9]| 1[01][0-9]| 12[0-6]| 197| 22[23]| 24[0-9]| 25[0-5] )\.}x;
use Getopt::Long;
sub usage {
die "
usage: bayes_dump_to_trusted_networks [--minham n] [--rdns] [file]
";
}
use vars qw(
$opt_minham $opt_rdns $opt_help
);
GetOptions(
'minham:i' => \$opt_minham,
'rdns' => \$opt_rdns,
'help' => \$opt_help
) or usage();
$opt_help and usage();
$opt_rdns ||= 0;
$opt_minham ||= 3;
my %class_cs = ();
while (<>) {
my ($prob, $nspam, $nham, $atime, $tok) = split;
next if ($tok !~ /^H\*r:ip\*(${IP_ADDRESS})/o);
my $ip = $1;
next unless ($nham >= $opt_minham); next if ($ip =~ /$IP_IN_RESERVED_RANGE/o);
$ip =~ s/[^0-9\.\:]/_/gs;
$ip =~ /^(.*)\.(\d+)/;
if (defined $class_cs{$1}) {
$class_cs{$1} .= " ".$2;
} else {
$class_cs{$1} = $2;
}
}
foreach my $class_c (sort {
classc2long($a) <=> classc2long($b)
} keys %class_cs)
{
my $cidr = Net::CIDR::Lite->new;
foreach my $octet (split (' ', $class_cs{$class_c})) {
my $ip = "$class_c.$octet";
$cidr->add_ip ($ip);
}
$cidr->clean();
my @ips = $cidr->list();
foreach my $ip (@ips) {
my $rdns = '';
if ($ip =~ s/\/32$//) {
if ($opt_rdns) {
my $host = `host -W 1 $ip 2>&1`;
$host =~ s/^.* domain name pointer (\S+)\s*$/$1/gm;
$host =~ s/^.*not found: .*$/(no rdns)/gs;
$host =~ s/\s+/ /gs; $host =~ s/\.$//;
$rdns = "\t# $host";
}
} else {
if ($opt_rdns) {
$rdns = "\t# (range)";
}
}
print "trusted_networks $ip$rdns\n";
}
}
sub classc2long {
my $ip = shift;
($ip =~ /^(\d+)\.(\d+)\.(\d+)$/) or return -1;
($1 << 16) | ($2 << 8) | ($3);
}