package Mail::SpamAssassin::BayesStore::DBM;
use strict;
use warnings;
use bytes;
use Fcntl;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Util;
use Mail::SpamAssassin::BayesStore;
use Mail::SpamAssassin::Logger;
use Digest::SHA1 qw(sha1);
use File::Basename;
use File::Spec;
use File::Path;
use constant MAGIC_RE => qr/^\015\001\007\011\003/;
use vars qw{
@ISA
@DBNAMES
$NSPAM_MAGIC_TOKEN $NHAM_MAGIC_TOKEN $LAST_EXPIRE_MAGIC_TOKEN $LAST_JOURNAL_SYNC_MAGIC_TOKEN
$NTOKENS_MAGIC_TOKEN $OLDEST_TOKEN_AGE_MAGIC_TOKEN $LAST_EXPIRE_REDUCE_MAGIC_TOKEN
$RUNNING_EXPIRE_MAGIC_TOKEN $DB_VERSION_MAGIC_TOKEN $LAST_ATIME_DELTA_MAGIC_TOKEN
$NEWEST_TOKEN_AGE_MAGIC_TOKEN
};
@ISA = qw( Mail::SpamAssassin::BayesStore );
@DBNAMES = qw(toks seen);
$DB_VERSION_MAGIC_TOKEN = "\015\001\007\011\003DBVERSION";
$LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA";
$LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE";
$LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE";
$LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC";
$NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE";
$NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM";
$NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM";
$NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS";
$OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE";
$RUNNING_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003RUNNINGEXPIRE";
sub HAS_DBM_MODULE {
my ($self) = @_;
if (exists($self->{has_dbm_module})) {
return $self->{has_dbm_module};
}
$self->{has_dbm_module} = eval { require DB_File; };
}
sub DBM_MODULE {
return "DB_File";
}
sub DB_EXTENSIONS {
return ('', '.db');
}
sub new {
my $class = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new(@_);
$self->{supported_db_version} = 3;
$self->{already_tied} = 0;
$self->{is_locked} = 0;
$self->{string_to_journal} = '';
$self;
}
sub tie_db_readonly {
my ($self) = @_;
if (!$self->HAS_DBM_MODULE) {
dbg("bayes: " . $self->DBM_MODULE . " module not installed, cannot use bayes");
return 0;
}
return 1 if ($self->{already_tied} && $self->{is_locked} == 0);
my $main = $self->{bayes}->{main};
if (!defined($main->{conf}->{bayes_path})) {
dbg("bayes: bayes_path not defined");
return 0;
}
$self->read_db_configs();
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $found = 0;
for my $ext ($self->DB_EXTENSIONS) {
if (-f $path.'_toks'.$ext) {
$found = 1;
last;
}
}
if (!$found) {
dbg("bayes: no dbs present, cannot tie DB R/O: ${path}_toks");
return 0;
}
foreach my $dbname (@DBNAMES) {
my $name = $path.'_'.$dbname;
my $db_var = 'db_'.$dbname;
dbg("bayes: tie-ing to DB file R/O $name");
if (!tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDONLY,
(oct($main->{conf}->{bayes_file_mode}) & 0666))
{
if ($dbname eq 'seen') {
tie %{$self->{$db_var}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
(oct($main->{conf}->{bayes_file_mode}) & 0666)
or goto failed_to_tie;
}
else {
goto failed_to_tie;
}
}
}
$self->{db_version} = ($self->get_storage_variables())[6];
dbg("bayes: found bayes db version ".$self->{db_version});
if ($self->_check_db_version() != 0) {
warn("bayes: bayes db version ".$self->{db_version}." is not able to be used, aborting!");
$self->untie_db();
return 0;
}
$self->{already_tied} = 1;
return 1;
failed_to_tie:
warn "bayes: cannot open bayes databases ${path}_* R/O: tie failed: $!\n";
foreach my $dbname (@DBNAMES) {
my $db_var = 'db_'.$dbname;
next unless exists $self->{$db_var};
dbg("bayes: untie-ing DB file $dbname");
untie %{$self->{$db_var}};
}
return 0;
}
sub tie_db_writable {
my ($self) = @_;
if (!$self->HAS_DBM_MODULE) {
dbg("bayes: " . $self->DBM_MODULE . " module not installed, cannot use bayes");
return 0;
}
my $main = $self->{bayes}->{main};
if ($self->{already_tied} && $self->{is_locked} == 1) {
$main->{locker}->refresh_lock($self->{locked_file});
return 1;
}
if (!defined($main->{conf}->{bayes_path})) {
dbg("bayes: bayes_path not defined");
return 0;
}
$self->read_db_configs();
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $found = 0;
for my $ext ($self->DB_EXTENSIONS) {
if (-f $path.'_toks'.$ext) {
$found = 1;
last;
}
}
my $parentdir = dirname($path);
if (!-d $parentdir) {
eval {
mkpath($parentdir, 0, (oct($main->{conf}->{bayes_file_mode}) & 0777));
};
}
my $tout;
if ($main->{learn_wait_for_lock}) {
$tout = 300; } else {
$tout = 10;
}
if ($main->{locker}->safe_lock($path, $tout, $main->{conf}->{bayes_file_mode}))
{
$self->{locked_file} = $path;
$self->{is_locked} = 1;
} else {
warn "bayes: cannot open bayes databases ${path}_* R/W: lock failed: $!\n";
return 0;
}
my $umask = umask 0;
foreach my $dbname (@DBNAMES) {
my $name = $path.'_'.$dbname;
my $db_var = 'db_'.$dbname;
dbg("bayes: tie-ing to DB file R/W $name");
tie %{$self->{$db_var}},$self->DBM_MODULE,$name, O_RDWR|O_CREAT,
(oct($main->{conf}->{bayes_file_mode}) & 0666)
or goto failed_to_tie;
}
umask $umask;
$self->{db_version} = ($self->get_storage_variables())[6];
dbg("bayes: found bayes db version ".$self->{db_version}) if ($found);
if ($found && !$self->_upgrade_db()) {
$self->untie_db();
return 0;
}
elsif (!$found) { $self->{db_version} = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION;
$self->{db_toks}->{$NTOKENS_MAGIC_TOKEN} = 0; dbg("bayes: new db, set db version ".$self->{db_version}." and 0 tokens");
}
$self->{already_tied} = 1;
return 1;
failed_to_tie:
my $err = $!;
umask $umask;
foreach my $dbname (@DBNAMES) {
my $db_var = 'db_'.$dbname;
next unless exists $self->{$db_var};
dbg("bayes: untie-ing DB file $dbname");
untie %{$self->{$db_var}};
}
if ($self->{is_locked}) {
$self->{bayes}->{main}->{locker}->safe_unlock($self->{locked_file});
$self->{is_locked} = 0;
}
warn "bayes: cannot open bayes databases ${path}_* R/W: tie failed: $err\n";
return 0;
}
sub _check_db_version {
my ($self) = @_;
return $self->{db_version} <=> $self->DB_VERSION;
}
sub _upgrade_db {
my ($self) = @_;
my $verschk = $self->_check_db_version();
my $res = 0; my $umask;
return 1 if ($verschk == 0);
if ($verschk == 1) {
warn("bayes: bayes db version ".$self->{db_version}." is newer than we understand, aborting!");
return 0;
}
dbg("bayes: detected bayes db format ".$self->{db_version}.", upgrading");
my $main = $self->{bayes}->{main};
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $name = $path.'_toks';
my $jpath = $self->_get_journal_filename();
if (-f $jpath) {
dbg("bayes: old journal file found, removing");
warn "bayes: couldn't remove $jpath: $!" if (!unlink $jpath);
}
if ($self->{db_version} < 2) {
dbg("bayes: upgrading database format from v".$self->{db_version}." to v2");
$self->set_running_expire_tok();
my ($DB_NSPAM_MAGIC_TOKEN, $DB_NHAM_MAGIC_TOKEN, $DB_NTOKENS_MAGIC_TOKEN);
my ($DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN, $DB_LAST_EXPIRE_MAGIC_TOKEN);
if ($self->{db_version} == 0) {
$DB_NSPAM_MAGIC_TOKEN = '**NSPAM';
$DB_NHAM_MAGIC_TOKEN = '**NHAM';
$DB_NTOKENS_MAGIC_TOKEN = '**NTOKENS';
}
else {
$DB_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM";
$DB_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM";
$DB_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS";
}
my $started = time;
my $newatime = $started;
my %new_toks;
$umask = umask 0;
$res = tie %new_toks, $self->DBM_MODULE, "${name}.new", O_RDWR|O_CREAT|O_EXCL,
(oct($main->{conf}->{bayes_file_mode}) & 0666);
umask $umask;
return 0 unless $res;
undef $res;
$new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN};
$new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN};
$new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN};
$new_toks{$DB_VERSION_MAGIC_TOKEN} = 2; $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime;
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $newatime;
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newatime;
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $newatime;
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
my ($tok, $packed);
my $count = 0;
while (($tok, $packed) = each %{$self->{db_toks}}) {
next if ($tok =~ /^(?:\*\*[A-Z]+$|\015\001\007\011\003)/);
my ($ts, $th, $atime) = $self->tok_unpack($packed);
$new_toks{$tok} = $self->tok_pack($ts, $th, $newatime);
if (($count++ % 1000) == 0) {
$self->set_running_expire_tok();
}
}
untie %{$self->{db_toks}};
untie %new_toks;
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
my $msgc = $path.'_msgcount';
if (-f $msgc) {
dbg("bayes: old msgcount file found, removing");
if (!unlink $msgc) {
warn "bayes: couldn't remove $msgc: $!";
}
}
for my $ext ($self->DB_EXTENSIONS) {
my $newf = $name.'.new'.$ext;
my $oldf = $name.$ext;
next unless (-f $newf);
if (!rename ($newf, $oldf)) {
warn "bayes: rename $newf to $oldf failed: $!\n";
return 0;
}
}
$umask = umask 0;
$res = tie %{$self->{db_toks}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
(oct($main->{conf}->{bayes_file_mode}) & 0666);
umask $umask;
return 0 unless $res;
undef $res;
dbg("bayes: upgraded database format from v".$self->{db_version}." to v2 in ".(time - $started)." seconds");
$self->{db_version} = 2; }
if ($self->{db_version} == 2) {
dbg("bayes: upgrading database format from v".$self->{db_version}." to v3");
$self->set_running_expire_tok();
my $DB_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM";
my $DB_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM";
my $DB_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS";
my $DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE";
my $DB_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE";
my $DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE";
my $DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC";
my $DB_LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA";
my $DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE";
my $started = time;
my %new_toks;
$umask = umask 0;
$res = tie %new_toks, $self->DBM_MODULE, "${name}.new", O_RDWR|O_CREAT|O_EXCL,
(oct($main->{conf}->{bayes_file_mode}) & 0666);
umask $umask;
return 0 unless $res;
undef $res;
$new_toks{$NSPAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NSPAM_MAGIC_TOKEN};
$new_toks{$NHAM_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NHAM_MAGIC_TOKEN};
$new_toks{$NTOKENS_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NTOKENS_MAGIC_TOKEN};
$new_toks{$DB_VERSION_MAGIC_TOKEN} = 3; $new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_OLDEST_TOKEN_AGE_MAGIC_TOKEN};
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_EXPIRE_MAGIC_TOKEN};
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $self->{db_toks}->{$DB_NEWEST_TOKEN_AGE_MAGIC_TOKEN};
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_JOURNAL_SYNC_MAGIC_TOKEN};
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = $self->{db_toks}->{$DB_LAST_ATIME_DELTA_MAGIC_TOKEN};
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} =$self->{db_toks}->{$DB_LAST_EXPIRE_REDUCE_MAGIC_TOKEN};
my $count = 0;
while (my ($tok, $packed) = each %{$self->{db_toks}}) {
next if ($tok =~ /^\015\001\007\011\003/); my $tok_hash = substr(sha1($tok), -5);
$new_toks{$tok_hash} = $packed;
if (($count++ % 1000) == 0) {
$self->set_running_expire_tok();
}
}
untie %{$self->{db_toks}};
untie %new_toks;
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
for my $ext ($self->DB_EXTENSIONS) {
my $newf = $name.'.new'.$ext;
my $oldf = $name.$ext;
next unless (-f $newf);
if (!rename($newf, $oldf)) {
warn "bayes: rename $newf to $oldf failed: $!\n";
return 0;
}
}
$umask = umask 0;
$res = tie %{$self->{db_toks}},$self->DBM_MODULE, $name, O_RDWR|O_CREAT,
(oct ($main->{conf}->{bayes_file_mode}) & 0666);
umask $umask;
return 0 unless $res;
undef $res;
dbg("bayes: upgraded database format from v".$self->{db_version}." to v3 in ".(time - $started)." seconds");
$self->{db_version} = 3; }
return 1;
}
sub untie_db {
my $self = shift;
return if (!$self->{already_tied});
dbg("bayes: untie-ing");
foreach my $dbname (@DBNAMES) {
my $db_var = 'db_'.$dbname;
if (exists $self->{$db_var}) {
untie %{$self->{$db_var}};
delete $self->{$db_var};
}
}
if ($self->{is_locked}) {
dbg("bayes: files locked, now unlocking lock");
$self->{bayes}->{main}->{locker}->safe_unlock ($self->{locked_file});
$self->{is_locked} = 0;
}
$self->{already_tied} = 0;
$self->{db_version} = undef;
}
sub calculate_expire_delta {
my ($self, $newest_atime, $start, $max_expire_mult) = @_;
my %delta = ();
my ($tok, $packed);
while (($tok, $packed) = each %{$self->{db_toks}}) {
next if ($tok =~ MAGIC_RE);
my ($ts, $th, $atime) = $self->tok_unpack ($packed);
my $token_age = $newest_atime - $atime;
for (my $i = 1; $i <= $max_expire_mult; $i<<=1) {
if ($token_age >= $start * $i) {
$delta{$i}++;
}
else {
last;
}
}
}
return %delta;
}
sub token_expiration {
my ($self, $opts, $newdelta, @vars) = @_;
my $deleted = 0;
my $kept = 0;
my $num_hapaxes = 0;
my $num_lowfreq = 0;
my $main = $self->{bayes}->{main};
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $tmpsuffix = "expire$$";
my $tmpdbname = $path.'_toks.'.$tmpsuffix;
for my $ext ($self->DB_EXTENSIONS) { unlink ($tmpdbname.$ext); }
my %new_toks;
my $umask = umask 0;
tie %new_toks, $self->DBM_MODULE, $tmpdbname, O_RDWR|O_CREAT|O_EXCL,
(oct ($main->{conf}->{bayes_file_mode}) & 0666);
umask $umask;
my $oldest;
my $showdots = $opts->{showdots};
if ($showdots) { print STDERR "\n"; }
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = $newdelta;
my $too_old = $vars[10] - $newdelta;
my ($tok, $packed);
while (($tok, $packed) = each %{$self->{db_toks}}) {
next if ($tok =~ MAGIC_RE);
my ($ts, $th, $atime) = $self->tok_unpack ($packed);
if ($atime < $too_old) {
$deleted++;
}
else {
if ($atime > $vars[10]) {
$atime = $vars[10];
}
$new_toks{$tok} = $self->tok_pack ($ts, $th, $atime); $kept++;
if (!defined($oldest) || $atime < $oldest) { $oldest = $atime; }
if ($ts + $th == 1) {
$num_hapaxes++;
} elsif ($ts < 8 && $th < 8) {
$num_lowfreq++;
}
}
if ((($kept + $deleted) % 1000) == 0) {
if ($showdots) { print STDERR "."; }
$self->set_running_expire_tok();
}
}
$new_toks{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION;
$new_toks{$NSPAM_MAGIC_TOKEN} = $vars[1];
$new_toks{$NHAM_MAGIC_TOKEN} = $vars[2];
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $vars[10];
$new_toks{$NTOKENS_MAGIC_TOKEN} = $kept;
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = time();
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest;
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = $deleted;
if ($kept < 100000) {
dbg("bayes: token expiration would expire too many tokens, aborting");
$self->{db_toks}->{$LAST_EXPIRE_MAGIC_TOKEN} = time();
$self->{db_toks}->{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
$self->{db_toks}->{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
untie %new_toks;
for my $ext ($self->DB_EXTENSIONS) { unlink ($tmpdbname.$ext); }
$kept = $vars[3];
$deleted = 0;
$num_hapaxes = 0;
$num_lowfreq = 0;
}
else {
untie %{$self->{db_toks}};
untie %new_toks;
{
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
for my $ext ($self->DB_EXTENSIONS) {
my $newf = $tmpdbname.$ext;
my $oldf = $path.'_toks'.$ext;
next unless (-f $newf);
if (!rename ($newf, $oldf)) {
warn "bayes: rename $newf to $oldf failed: $!\n";
}
}
}
}
$self->untie_db();
return ($kept, $deleted, $num_hapaxes, $num_lowfreq);
}
sub sync_due {
my ($self) = @_;
return 0 if ($self->{db_version} < $self->DB_VERSION);
my $conf = $self->{bayes}->{main}->{conf};
return 0 if ($conf->{bayes_journal_max_size} == 0);
my @vars = $self->get_storage_variables();
dbg("bayes: DB journal sync: last sync: ".$vars[7],'bayes','-1');
return 0 unless (stat($self->_get_journal_filename()) && -f _);
return 1 if (-s _ > $conf->{bayes_journal_max_size});
return 1 if (($vars[7] > 0) && (time - $vars[7] > 86400));
return 0;
}
sub seen_get {
my ($self, $msgid) = @_;
$self->{db_seen}->{$msgid};
}
sub seen_put {
my ($self, $msgid, $seen) = @_;
if ($self->{bayes}->{main}->{learn_to_journal}) {
$self->defer_update ("m $seen $msgid");
}
else {
$self->_seen_put_direct($msgid, $seen);
}
}
sub _seen_put_direct {
my ($self, $msgid, $seen) = @_;
$self->{db_seen}->{$msgid} = $seen;
}
sub seen_delete {
my ($self, $msgid) = @_;
if ($self->{bayes}->{main}->{learn_to_journal}) {
$self->defer_update ("m f $msgid");
}
else {
$self->_seen_delete_direct($msgid);
}
}
sub _seen_delete_direct {
my ($self, $msgid) = @_;
delete $self->{db_seen}->{$msgid};
}
sub tok_get {
my ($self, $tok) = @_;
$self->tok_unpack ($self->{db_toks}->{$tok});
}
sub tok_get_all {
my ($self, @tokens) = @_;
my @tokensdata;
foreach my $token (@tokens) {
my ($tok_spam, $tok_ham, $atime) = $self->tok_unpack($self->{db_toks}->{$token});
push(@tokensdata, [$token, $tok_spam, $tok_ham, $atime]);
}
return \@tokensdata;
}
sub get_storage_variables {
my ($self) = @_;
my @values;
my $db_ver = $self->{db_toks}->{$DB_VERSION_MAGIC_TOKEN};
if (!$db_ver || $db_ver =~ /\D/) { $db_ver = 0; }
if ($db_ver >= 2) {
my $DB2_LAST_ATIME_DELTA_MAGIC_TOKEN = "\015\001\007\011\003LASTATIMEDELTA";
my $DB2_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE";
my $DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIREREDUCE";
my $DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN = "\015\001\007\011\003LASTJOURNALSYNC";
my $DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003NEWESTAGE";
my $DB2_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM";
my $DB2_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM";
my $DB2_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS";
my $DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE";
my $DB2_RUNNING_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003RUNNINGEXPIRE";
@values = (
0,
$self->{db_toks}->{$DB2_NSPAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_NHAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_NTOKENS_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_LAST_EXPIRE_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_OLDEST_TOKEN_AGE_MAGIC_TOKEN},
$db_ver,
$self->{db_toks}->{$DB2_LAST_JOURNAL_SYNC_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_LAST_ATIME_DELTA_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_LAST_EXPIRE_REDUCE_MAGIC_TOKEN},
$self->{db_toks}->{$DB2_NEWEST_TOKEN_AGE_MAGIC_TOKEN},
);
}
elsif ($db_ver == 0) {
my $DB0_NSPAM_MAGIC_TOKEN = '**NSPAM';
my $DB0_NHAM_MAGIC_TOKEN = '**NHAM';
my $DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN = '**OLDESTAGE';
my $DB0_LAST_EXPIRE_MAGIC_TOKEN = '**LASTEXPIRE';
my $DB0_NTOKENS_MAGIC_TOKEN = '**NTOKENS';
my $DB0_SCANCOUNT_BASE_MAGIC_TOKEN = '**SCANBASE';
@values = (
$self->{db_toks}->{$DB0_SCANCOUNT_BASE_MAGIC_TOKEN},
$self->{db_toks}->{$DB0_NSPAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB0_NHAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB0_NTOKENS_MAGIC_TOKEN},
$self->{db_toks}->{$DB0_LAST_EXPIRE_MAGIC_TOKEN},
$self->{db_toks}->{$DB0_OLDEST_TOKEN_AGE_MAGIC_TOKEN},
0,
0,
0,
0,
0,
);
}
elsif ($db_ver == 1) {
my $DB1_NSPAM_MAGIC_TOKEN = "\015\001\007\011\003NSPAM";
my $DB1_NHAM_MAGIC_TOKEN = "\015\001\007\011\003NHAM";
my $DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN = "\015\001\007\011\003OLDESTAGE";
my $DB1_LAST_EXPIRE_MAGIC_TOKEN = "\015\001\007\011\003LASTEXPIRE";
my $DB1_NTOKENS_MAGIC_TOKEN = "\015\001\007\011\003NTOKENS";
my $DB1_SCANCOUNT_BASE_MAGIC_TOKEN = "\015\001\007\011\003SCANBASE";
@values = (
$self->{db_toks}->{$DB1_SCANCOUNT_BASE_MAGIC_TOKEN},
$self->{db_toks}->{$DB1_NSPAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB1_NHAM_MAGIC_TOKEN},
$self->{db_toks}->{$DB1_NTOKENS_MAGIC_TOKEN},
$self->{db_toks}->{$DB1_LAST_EXPIRE_MAGIC_TOKEN},
$self->{db_toks}->{$DB1_OLDEST_TOKEN_AGE_MAGIC_TOKEN},
1,
0,
0,
0,
0,
);
}
foreach (@values) {
if (!$_ || $_ =~ /\D/) {
$_ = 0;
}
}
return @values;
}
sub dump_db_toks {
my ($self, $template, $regex, @vars) = @_;
while (my ($tok, $tokvalue) = each %{$self->{db_toks}}) {
next if ($tok =~ MAGIC_RE); next if (defined $regex && ($tok !~ /$regex/o));
my ($ts, $th, $atime) = $self->tok_unpack ($tokvalue);
my $prob = $self->{bayes}->compute_prob_for_token($tok, $vars[1], $vars[2], $ts, $th);
$prob ||= 0.5;
my $encoded_tok = unpack("H*",$tok);
printf $template,$prob,$ts,$th,$atime,$encoded_tok;
}
}
sub set_last_expire {
my ($self, $time) = @_;
$self->{db_toks}->{$LAST_EXPIRE_MAGIC_TOKEN} = time();
}
sub get_running_expire_tok {
my ($self) = @_;
my $running = $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN};
if (!$running || $running =~ /\D/) { return undef; }
return $running;
}
sub set_running_expire_tok {
my ($self) = @_;
$self->{bayes}->{main}->{locker}->refresh_lock ($self->{locked_file});
$self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN} = time();
}
sub remove_running_expire_tok {
my ($self) = @_;
delete $self->{db_toks}->{$RUNNING_EXPIRE_MAGIC_TOKEN};
}
sub tok_count_change {
my ($self, $ds, $dh, $tok, $atime) = @_;
$atime = 0 unless defined $atime;
if ($self->{bayes}->{main}->{learn_to_journal}) {
my $encoded_tok = unpack("H*",$tok);
$self->defer_update ("c $ds $dh $atime $encoded_tok");
} else {
$self->tok_sync_counters ($ds, $dh, $atime, $tok);
}
}
sub multi_tok_count_change {
my ($self, $ds, $dh, $tokens, $atime) = @_;
$atime = 0 unless defined $atime;
foreach my $tok (keys %{$tokens}) {
if ($self->{bayes}->{main}->{learn_to_journal}) {
my $encoded_tok = unpack("H*",$tok);
$self->defer_update ("c $ds $dh $atime $encoded_tok");
} else {
$self->tok_sync_counters ($ds, $dh, $atime, $tok);
}
}
}
sub nspam_nham_get {
my ($self) = @_;
my @vars = $self->get_storage_variables();
($vars[1], $vars[2]);
}
sub nspam_nham_change {
my ($self, $ds, $dh) = @_;
if ($self->{bayes}->{main}->{learn_to_journal}) {
$self->defer_update ("n $ds $dh");
} else {
$self->tok_sync_nspam_nham ($ds, $dh);
}
}
sub tok_touch {
my ($self, $tok, $atime) = @_;
my $encoded_tok = unpack("H*", $tok);
$self->defer_update ("t $atime $encoded_tok");
}
sub tok_touch_all {
my ($self, $tokens, $atime) = @_;
foreach my $token (@{$tokens}) {
my $encoded_tok = unpack("H*", $token);
$self->defer_update ("t $atime $encoded_tok");
}
}
sub defer_update {
my ($self, $str) = @_;
$self->{string_to_journal} .= "$str\n";
}
sub cleanup {
my ($self) = @_;
my $nbytes = length ($self->{string_to_journal});
return if ($nbytes == 0);
my $path = $self->_get_journal_filename();
my $conf = $self->{bayes}->{main}->{conf};
my $umask = umask(0777 - (oct ($conf->{bayes_file_mode}) & 0666));
if (!open (OUT, ">>".$path)) {
warn "bayes: cannot write to $path, bayes db update ignored: $!\n";
umask $umask; return;
}
umask $umask;
my $write_failure = 0;
my $original_point = tell OUT;
my $len;
do {
$len = syswrite (OUT, $self->{string_to_journal}, $nbytes);
if (!defined $len || $len < 0) {
my $err = '';
if (!defined $len) {
$len = 0;
$err = " ($!)";
}
warn "bayes: write failed to Bayes journal $path ($len of $nbytes)!$err\n";
last;
}
if ($len != $nbytes) {
warn "bayes: partial write to bayes journal $path ($len of $nbytes), recovering\n";
if (!truncate(OUT, $original_point) || ($write_failure++ > 4)) {
warn "bayes: cannot write to bayes journal $path, aborting!\n";
last;
}
sleep 1;
}
} while ($len != $nbytes);
if (!close OUT) {
warn "bayes: cannot write to $path, bayes db update ignored\n";
}
$self->{string_to_journal} = '';
}
sub get_magic_re {
my ($self) = @_;
if (!defined $self->{db_version} || $self->{db_version} >= 1) {
return MAGIC_RE;
}
return qr/^\*\*[A-Z]+$/;
}
sub sync {
my ($self, $opts) = @_;
return $self->_sync_journal($opts);
}
sub _sync_journal {
my ($self, $opts) = @_;
my $ret = 0;
my $path = $self->_get_journal_filename();
if (!stat($path) || !-f _ || -z _) {
return 0;
}
eval {
local $SIG{'__DIE__'}; if ($self->tie_db_writable()) {
$ret = $self->_sync_journal_trapped($opts, $path);
}
};
my $err = $@;
if (!$self->{bayes}->{main}->{learn_caller_will_untie}) {
$self->untie_db();
}
if ($err) {
warn "bayes: $err\n";
return 0;
}
$ret;
}
sub _sync_journal_trapped {
my ($self, $opts, $path) = @_;
$self->set_running_expire_tok();
my $started = time();
my $count = 0;
my $total_count = 0;
my %tokens = ();
my $showdots = $opts->{showdots};
my $retirepath = $path.".old";
if (!stat($path) || !-f _ || -z _) {
return 0;
}
if (!-r $path) { warn "bayes: bad permissions on journal, can't read: $path\n";
return 0;
}
{
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
if (!rename ($path, $retirepath)) {
warn "bayes: failed rename $path to $retirepath\n";
return 0;
}
if (!open (JOURNAL, "<$retirepath")) {
warn "bayes: cannot open read $retirepath\n";
return 0;
}
while (<JOURNAL>) {
$total_count++;
if (/^t (\d+) (.+)$/) { my $tok = pack("H*",$2);
$tokens{$tok} = $1+0 if (!exists $tokens{$tok} || $1+0 > $tokens{$tok});
} elsif (/^c (-?\d+) (-?\d+) (\d+) (.+)$/) { my $tok = pack("H*",$4);
$self->tok_sync_counters ($1+0, $2+0, $3+0, $tok);
$count++;
} elsif (/^n (-?\d+) (-?\d+)$/) { $self->tok_sync_nspam_nham ($1+0, $2+0);
$count++;
} elsif (/^m ([hsf]) (.+)$/) { if ($1 eq "f") {
$self->_seen_delete_direct($2);
}
else {
$self->_seen_put_direct($2,$1);
}
$count++;
} else {
warn "bayes: gibberish entry found in journal: $_";
}
}
close JOURNAL;
while (my ($k,$v) = each %tokens) {
$self->tok_touch_token ($v, $k);
if ((++$count % 1000) == 0) {
if ($showdots) { print STDERR "."; }
$self->set_running_expire_tok();
}
}
if ($showdots) { print STDERR "\n"; }
unlink ($retirepath) || warn "bayes: can't unlink $retirepath: $!\n";
$self->{db_toks}->{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = $started;
my $done = time();
my $msg = ("bayes: synced databases from journal in " .
($done - $started) .
" seconds: $count unique entries ($total_count total entries)");
if ($opts->{verbose}) {
print $msg,"\n";
} else {
dbg($msg);
}
}
return 1;
}
sub tok_touch_token {
my ($self, $atime, $tok) = @_;
my ($ts, $th, $oldatime) = $self->tok_get ($tok);
return if ($oldatime >= $atime);
$self->tok_put ($tok, $ts, $th, $atime);
}
sub tok_sync_counters {
my ($self, $ds, $dh, $atime, $tok) = @_;
my ($ts, $th, $oldatime) = $self->tok_get ($tok);
$ts += $ds; if ($ts < 0) { $ts = 0; }
$th += $dh; if ($th < 0) { $th = 0; }
$atime = $oldatime if ($oldatime > $atime);
$self->tok_put ($tok, $ts, $th, $atime);
}
sub tok_put {
my ($self, $tok, $ts, $th, $atime) = @_;
$ts ||= 0;
$th ||= 0;
return if ($tok =~ MAGIC_RE);
my $exists_already = defined $self->{db_toks}->{$tok};
if ($ts == 0 && $th == 0) {
return if (!$exists_already); $self->{db_toks}->{$NTOKENS_MAGIC_TOKEN}--;
delete $self->{db_toks}->{$tok};
} else {
if (!$exists_already) { $self->{db_toks}->{$NTOKENS_MAGIC_TOKEN}++;
}
$self->{db_toks}->{$tok} = $self->tok_pack ($ts, $th, $atime);
my $newmagic = $self->{db_toks}->{$NEWEST_TOKEN_AGE_MAGIC_TOKEN};
if (!defined ($newmagic) || $atime > $newmagic) {
$self->{db_toks}->{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $atime;
}
my $oldmagic = $self->{db_toks}->{$OLDEST_TOKEN_AGE_MAGIC_TOKEN};
if (!defined ($oldmagic) || $oldmagic eq "" || $atime < $oldmagic) {
$self->{db_toks}->{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $atime;
}
}
}
sub tok_sync_nspam_nham {
my ($self, $ds, $dh) = @_;
my ($ns, $nh) = ($self->get_storage_variables())[1,2];
if ($ds) { $ns += $ds; } if ($ns < 0) { $ns = 0; }
if ($dh) { $nh += $dh; } if ($nh < 0) { $nh = 0; }
$self->{db_toks}->{$NSPAM_MAGIC_TOKEN} = $ns;
$self->{db_toks}->{$NHAM_MAGIC_TOKEN} = $nh;
}
sub _get_journal_filename {
my ($self) = @_;
my $main = $self->{bayes}->{main};
return $main->sed_path($main->{conf}->{bayes_path}."_journal");
}
sub perform_upgrade {
my ($self, $opts) = @_;
my $ret = 0;
eval {
local $SIG{'__DIE__'};
use File::Basename;
use File::Copy;
my $main = $self->{bayes}->{main};
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $dir = dirname($path);
opendir(DIR, $dir) || die "bayes: can't opendir $dir: $!";
my @files = grep { /^bayes_(?:seen|toks)(?:\.\w+)?$/ } readdir(DIR);
closedir(DIR);
if (@files < 2 || !grep(/bayes_seen/,@files) || !grep(/bayes_toks/,@files))
{
die "bayes: unable to find bayes_toks and bayes_seen, stopping\n";
}
@files = map { /(.*)/, $1 } @files;
for (@files) {
my $src = "$dir/$_";
my $dst = "$dir/old_$_";
copy($src, $dst) || die "bayes: can't copy $src to $dst: $!\n";
}
for (@files) { unlink("$dir/$_"); }
if ($self->tie_db_writable()) {
$ret += $self->upgrade_old_dbm_files_trapped("$dir/old_bayes_seen",
$self->{db_seen});
$ret += $self->upgrade_old_dbm_files_trapped("$dir/old_bayes_toks",
$self->{db_toks});
}
if ($ret == 2) {
print "import successful, original files saved with \"old\" prefix\n";
}
else {
print "import failed, original files saved with \"old\" prefix\n";
}
};
my $err = $@;
$self->untie_db();
if ($err) {
warn "bayes: perform_upgrade: $err\n";
return 0;
}
$ret;
}
sub upgrade_old_dbm_files_trapped {
my ($self, $filename, $output) = @_;
my $count;
my %in;
print "upgrading to DB_File, please be patient: $filename\n";
for my $dbm ('DB_File', 'GDBM_File', 'NDBM_File', 'SDBM_File') {
$count = 0;
eval 'use ' . $dbm . ';
tie %in, "' . $dbm . '", $filename, O_RDONLY, 0600;
%{ $output } = %in;
$count = scalar keys %{ $output };
untie %in;
';
if ($@) {
print "$dbm: $dbm module not installed, nothing copied\n";
dbg("bayes: error was: $@");
}
elsif ($count == 0) {
print "$dbm: no database of that kind found, nothing copied\n";
}
else {
print "$dbm: copied $count entries\n";
return 1;
}
}
return 0;
}
sub clear_database {
my ($self) = @_;
return 0 unless ($self->tie_db_writable());
dbg("bayes: untie-ing in preparation for removal.");
foreach my $dbname (@DBNAMES) {
my $db_var = 'db_'.$dbname;
if (exists $self->{$db_var}) {
untie %{$self->{$db_var}};
delete $self->{$db_var};
}
}
my $path = $self->{bayes}->{main}->sed_path($self->{bayes}->{main}->{conf}->{bayes_path});
foreach my $dbname (@DBNAMES, 'journal') {
foreach my $ext ($self->DB_EXTENSIONS) {
my $name = $path.'_'.$dbname.$ext;
my $ret = unlink $name;
dbg("bayes: clear_database: " . ($ret ? 'removed' : 'tried to remove') . " $name");
}
}
foreach my $dbname ('journal') {
my $name = $path.'_'.$dbname;
my $ret = unlink $name;
dbg("bayes: clear_database: " . ($ret ? 'removed' : 'tried to remove') . " $name");
}
$self->untie_db();
return 1;
}
sub backup_database {
my ($self) = @_;
return 0 unless ($self->tie_db_writable());
my @vars = $self->get_storage_variables();
print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
print "v\t$vars[1]\tnum_spam\n";
print "v\t$vars[2]\tnum_nonspam\n";
while (my ($tok, $packed) = each %{$self->{db_toks}}) {
next if ($tok =~ MAGIC_RE);
my ($ts, $th, $atime) = $self->tok_unpack($packed);
my $encoded_token = unpack("H*",$tok);
print "t\t$ts\t$th\t$atime\t$encoded_token\n";
}
while (my ($msgid, $flag) = each %{$self->{db_seen}}) {
print "s\t$flag\t$msgid\n";
}
$self->untie_db();
return 1;
}
sub restore_database {
my ($self, $filename, $showdots) = @_;
if (!open(DUMPFILE, '<', $filename)) {
dbg("bayes: unable to open backup file $filename: $!");
return 0;
}
if (!$self->tie_db_writable()) {
dbg("bayes: failed to tie db writable");
return 0;
}
my $main = $self->{bayes}->{main};
my $path = $main->sed_path($main->{conf}->{bayes_path});
my $tmpsuffix = "convert$$";
my $tmptoksdbname = $path.'_toks.'.$tmpsuffix;
my $tmpseendbname = $path.'_seen.'.$tmpsuffix;
my $toksdbname = $path.'_toks';
my $seendbname = $path.'_seen';
my %new_toks;
my %new_seen;
my $umask = umask 0;
unless (tie %new_toks, $self->DBM_MODULE, $tmptoksdbname, O_RDWR|O_CREAT|O_EXCL,
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
dbg("bayes: failed to tie temp toks db: $!");
$self->untie_db();
umask $umask;
return 0;
}
unless (tie %new_seen, $self->DBM_MODULE, $tmpseendbname, O_RDWR|O_CREAT|O_EXCL,
(oct ($main->{conf}->{bayes_file_mode}) & 0666)) {
dbg("bayes: failed to tie temp seen db: $!");
untie %new_toks;
$self->_unlink_file($tmptoksdbname);
$self->untie_db();
umask $umask;
return 0;
}
umask $umask;
my $line_count = 0;
my $db_version;
my $token_count = 0;
my $num_spam;
my $num_ham;
my $error_p = 0;
my $newest_token_age = 0;
my $oldest_token_age = time() + 100000;
my $line = <DUMPFILE>;
$line_count++;
if ($line =~ m/^v\s+(\d+)\s+db_version/) {
$db_version = $1;
}
else {
dbg("bayes: database version must be the first line in the backup file, correct and re-run");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
unless ($db_version == 2 || $db_version == 3) {
warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
while (my $line = <DUMPFILE>) {
chomp($line);
$line_count++;
if ($line_count % 1000 == 0) {
print STDERR "." if ($showdots);
}
if ($line =~ /^v\s+/) { my @parsed_line = split(/\s+/, $line, 3);
my $value = $parsed_line[1] + 0;
if ($parsed_line[2] eq 'num_spam') {
$num_spam = $value;
}
elsif ($parsed_line[2] eq 'num_nonspam') {
$num_ham = $value;
}
else {
dbg("bayes: restore_database: skipping unknown line: $line");
}
}
elsif ($line =~ /^t\s+/) { my @parsed_line = split(/\s+/, $line, 5);
my $spam_count = $parsed_line[1] + 0;
my $ham_count = $parsed_line[2] + 0;
my $atime = $parsed_line[3] + 0;
my $token = $parsed_line[4];
my $token_warn_p = 0;
my @warnings;
if ($spam_count < 0) {
$spam_count = 0;
push(@warnings, 'spam count < 0, resetting');
$token_warn_p = 1;
}
if ($ham_count < 0) {
$ham_count = 0;
push(@warnings, 'ham count < 0, resetting');
$token_warn_p = 1;
}
if ($spam_count == 0 && $ham_count == 0) {
dbg("bayes: token has zero spam and ham count, skipping");
next;
}
if ($atime > time()) {
$atime = time();
push(@warnings, 'atime > current time, resetting');
$token_warn_p = 1;
}
if ($token_warn_p) {
dbg("bayes: token ($token) has the following warnings:\n".join("\n",@warnings));
}
if ($db_version < 3) {
$token = substr(sha1($token), -5);
}
else {
$token = pack("H*",$token);
}
$new_toks{$token} = $self->tok_pack($spam_count, $ham_count, $atime);
if ($atime < $oldest_token_age) {
$oldest_token_age = $atime;
}
if ($atime > $newest_token_age) {
$newest_token_age = $atime;
}
$token_count++;
}
elsif ($line =~ /^s\s+/) { my @parsed_line = split(/\s+/, $line, 3);
my $flag = $parsed_line[1];
my $msgid = $parsed_line[2];
unless ($flag eq 'h' || $flag eq 's') {
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
next;
}
unless ($msgid) {
dbg("bayes: blank msgid for line: $line, skipping");
next;
}
$new_seen{$msgid} = $flag;
}
else {
dbg("bayes: skipping unknown line: $line");
next;
}
}
close(DUMPFILE);
print STDERR "\n" if ($showdots);
unless (defined($num_spam)) {
dbg("bayes: unable to find num spam, please check file");
$error_p = 1;
}
unless (defined($num_ham)) {
dbg("bayes: unable to find num ham, please check file");
$error_p = 1;
}
if ($error_p) {
dbg("bayes: error(s) while attempting to load $filename, correct and re-run");
untie %new_toks;
untie %new_seen;
$self->_unlink_file($tmptoksdbname);
$self->_unlink_file($tmpseendbname);
$self->untie_db();
return 0;
}
$new_toks{$DB_VERSION_MAGIC_TOKEN} = $self->DB_VERSION();
$new_toks{$NTOKENS_MAGIC_TOKEN} = $token_count;
$new_toks{$NSPAM_MAGIC_TOKEN} = $num_spam;
$new_toks{$NHAM_MAGIC_TOKEN} = $num_ham;
$new_toks{$NEWEST_TOKEN_AGE_MAGIC_TOKEN} = $newest_token_age;
$new_toks{$OLDEST_TOKEN_AGE_MAGIC_TOKEN} = $oldest_token_age;
$new_toks{$LAST_EXPIRE_MAGIC_TOKEN} = 0;
$new_toks{$LAST_JOURNAL_SYNC_MAGIC_TOKEN} = 0;
$new_toks{$LAST_ATIME_DELTA_MAGIC_TOKEN} = 0;
$new_toks{$LAST_EXPIRE_REDUCE_MAGIC_TOKEN} = 0;
local $SIG{'INT'} = 'IGNORE';
local $SIG{'TERM'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
untie %new_toks;
untie %new_seen;
$self->untie_db();
unless ($self->_rename_file($tmptoksdbname, $toksdbname)) {
dbg("bayes: error while renaming $tmptoksdbname to $toksdbname: $!");
return 0;
}
unless ($self->_rename_file($tmpseendbname, $seendbname)) {
dbg("bayes: error while renaming $tmpseendbname to $seendbname: $!");
dbg("bayes: database now in inconsistent state");
return 0;
}
dbg("bayes: parsed $line_count lines");
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");
return 1;
}
use constant FORMAT_FLAG => 0xc0; use constant ONE_BYTE_FORMAT => 0xc0; use constant TWO_LONGS_FORMAT => 0x00;
use constant ONE_BYTE_SSS_BITS => 0x38; use constant ONE_BYTE_HHH_BITS => 0x07;
sub tok_unpack {
my ($self, $value) = @_;
$value ||= 0;
my ($packed, $atime);
if ($self->{db_version} >= 1) {
($packed, $atime) = unpack("CV", $value);
}
elsif ($self->{db_version} == 0) {
($packed, $atime) = unpack("CS", $value);
}
if (($packed & FORMAT_FLAG) == ONE_BYTE_FORMAT) {
return (($packed & ONE_BYTE_SSS_BITS) >> 3,
$packed & ONE_BYTE_HHH_BITS,
$atime || 0);
}
elsif (($packed & FORMAT_FLAG) == TWO_LONGS_FORMAT) {
my ($packed, $ts, $th, $atime);
if ($self->{db_version} >= 1) {
($packed, $ts, $th, $atime) = unpack("CVVV", $value);
}
elsif ($self->{db_version} == 0) {
($packed, $ts, $th, $atime) = unpack("CLLS", $value);
}
return ($ts || 0, $th || 0, $atime || 0);
}
else {
warn "bayes: unknown packing format for bayes db, please re-learn: $packed";
return (0, 0, 0);
}
}
sub tok_pack {
my ($self, $ts, $th, $atime) = @_;
$ts ||= 0; $th ||= 0; $atime ||= 0;
if ($ts < 8 && $th < 8) {
return pack ("CV", ONE_BYTE_FORMAT | ($ts << 3) | $th, $atime);
} else {
return pack ("CVVV", TWO_LONGS_FORMAT, $ts, $th, $atime);
}
}
sub db_readable {
my ($self) = @_;
return $self->{already_tied};
}
sub db_writable {
my ($self) = @_;
return $self->{already_tied} && $self->{is_locked};
}
sub _unlink_file {
my ($self, $filename) = @_;
unlink $filename;
}
sub _rename_file {
my ($self, $sourcefilename, $targetfilename) = @_;
return 0 unless (rename($sourcefilename, $targetfilename));
return 1;
}
sub sa_die { Mail::SpamAssassin::sa_die(@_); }
1;