=head1 NAME
Mail::SpamAssassin::BayesStore::SQL - SQL Bayesian Storage Module Implementation
=head1 SYNOPSIS
=head1 DESCRIPTION
This module implementes a SQL based bayesian storage module.
=cut
package Mail::SpamAssassin::BayesStore::SQL;
use strict;
use warnings;
use bytes;
use Mail::SpamAssassin::BayesStore;
use Mail::SpamAssassin::Logger;
use Digest::SHA1 qw(sha1);
use vars qw( @ISA );
@ISA = qw( Mail::SpamAssassin::BayesStore );
use constant HAS_DBI => eval { require DBI; };
=head1 METHODS
=head2 new
public class (Mail::SpamAssassin::BayesStore::SQL) new (Mail::Spamassassin::Bayes $bayes)
Description:
This methods creates a new instance of the Mail::SpamAssassin::BayesStore::SQL
object. It expects to be passed an instance of the Mail::SpamAssassin:Bayes
object which is passed into the Mail::SpamAssassin::BayesStore parent object.
This method sets up the database connection and determines the username to
use in queries.
=cut
sub new {
my $class = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new(@_);
$self->{supported_db_version} = 3;
$self->{db_writable_p} = 0;
if (!$self->{bayes}->{conf}->{bayes_sql_dsn}) {
dbg("bayes: invalid config, must set bayes_sql_dsn config variable\n");
return undef;
}
$self->{_dsn} = $self->{bayes}->{conf}->{bayes_sql_dsn};
$self->{_dbuser} = $self->{bayes}->{conf}->{bayes_sql_username};
$self->{_dbpass} = $self->{bayes}->{conf}->{bayes_sql_password};
$self->{_dbh} = undef;
unless (HAS_DBI) {
dbg("bayes: unable to connect to database: DBI module not available: $!");
}
if ($self->{bayes}->{conf}->{bayes_sql_override_username}) {
$self->{_username} = $self->{bayes}->{conf}->{bayes_sql_override_username};
}
else {
$self->{_username} = $self->{bayes}->{main}->{username};
unless ($self->{_username}) {
$self->{_username} = "GLOBALBAYES";
}
}
dbg("bayes: using username: ".$self->{_username});
return $self;
}
=head2 tie_db_readonly
public instance (Boolean) tie_db_readonly ();
Description:
This method ensures that the database connection is properly setup
and working. If necessary it will initialize a user's bayes variables
so that they can begin using the database immediately.
=cut
sub tie_db_readonly {
my ($self) = @_;
return 0 unless (HAS_DBI);
if ($self->{_dbh}) {
$self->{db_writable_p} = 0;
return 1;
}
my $main = $self->{bayes}->{main};
$self->read_db_configs();
return 0 unless ($self->_connect_db());
my $db_ver = $self->_get_db_version();
$self->{db_version} = $db_ver;
dbg("bayes: found bayes db version ".$self->{db_version});
if ( $db_ver != $self->DB_VERSION ) {
warn("bayes: database version $db_ver is different than we understand (".$self->DB_VERSION."), aborting!");
$self->untie_db();
return 0;
}
unless ($self->_initialize_db(0)) {
dbg("bayes: unable to initialize database for ".$self->{_username}." user, aborting!");
$self->untie_db();
return 0;
}
return 1;
}
=head2 tie_db_writable
public instance (Boolean) tie_db_writable ()
Description:
This method ensures that the database connetion is properly setup
and working. If necessary it will initialize a users bayes variables
so that they can begin using the database immediately.
=cut
sub tie_db_writable {
my ($self) = @_;
return 0 unless (HAS_DBI);
if ($self->{_dbh}) {
$self->{db_writable_p} = 1;
return 1;
}
my $main = $self->{bayes}->{main};
$self->read_db_configs();
return 0 unless ($self->_connect_db());
my $db_ver = $self->_get_db_version();
$self->{db_version} = $db_ver;
dbg("bayes: found bayes db version ".$self->{db_version});
if ( $db_ver != $self->DB_VERSION ) {
warn("bayes: database version $db_ver is different than we understand (".$self->DB_VERSION."), aborting!");
$self->untie_db();
return 0;
}
unless ($self->_initialize_db(1)) {
dbg("bayes: unable to initialize database for ".$self->{_username}." user, aborting!");
$self->untie_db();
return 0;
}
$self->{db_writable_p} = 1;
return 1;
}
=head2 untie_db
public instance () untie_db ()
Description:
This method is unused for the SQL based implementation.
=cut
sub untie_db {
my ($self) = @_;
return unless (defined($self->{_dbh}));
$self->{db_writable_p} = 0;
$self->{_dbh}->disconnect();
$self->{_dbh} = undef;
}
=head2 calculate_expire_delta
public instance (%) calculate_expire_delta (Integer $newest_atime,
Integer $start,
Integer $max_expire_mult)
Description:
This method performs a calculation on the data to determine the optimum
atime for token expiration.
=cut
sub calculate_expire_delta {
my ($self, $newest_atime, $start, $max_expire_mult) = @_;
my %delta = ();
return %delta unless (defined($self->{_dbh}));
my $sql = "SELECT count(*)
FROM bayes_token
WHERE id = ?
AND (? - atime) > ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: calculate_expire_delta: SQL Error: ".$self->{_dbh}->errstr());
return %delta;
}
for (my $i = 1; $i <= $max_expire_mult; $i<<=1) {
my $rc = $sth->execute($self->{_userid}, $newest_atime, $start * $i);
unless ($rc) {
dbg("bayes: calculate_expire_delta: SQL error: ".$self->{_dbh}->errstr());
return undef;
}
my ($count) = $sth->fetchrow_array();
$delta{$i} = $count;
}
$sth->finish();
return %delta;
}
=head2 token_expiration
public instance (Integer, Integer,
Integer, Integer) token_expiration(\% $opts,
Integer $newdelta,
@ @vars)
Description:
This method performs the database specific expiration of tokens based on
the passed in C<$newdelta> and C<@vars>.
=cut
sub token_expiration {
my ($self, $opts, $newdelta, @vars) = @_;
my $num_hapaxes;
my $num_lowfreq;
my $deleted;
my $too_old = $vars[10] - $newdelta;
my $sql = "UPDATE bayes_token SET atime = ?
WHERE id = ?
AND atime > ?";
my $rows = $self->{_dbh}->do($sql, undef, $vars[10], $self->{_userid}, $vars[10]);
unless (defined($rows)) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
$deleted = 0;
goto token_expiration_final;
}
$sql = "SELECT count(token) FROM bayes_token
WHERE id = ?
AND atime < ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
$deleted = 0;
goto token_expiration_final;
}
my $rc = $sth->execute($self->{_userid}, $too_old);
unless ($rc) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
$deleted = 0;
goto token_expiration_final;
}
my ($count) = $sth->fetchrow_array();
$sth->finish();
if ($vars[3] - $count < 100000) {
dbg("bayes: token expiration would expire too many tokens, aborting");
$deleted = 0;
$newdelta = 0;
}
else {
$sql = "DELETE from bayes_token
WHERE id = ?
AND atime < ?";
$rows = $self->{_dbh}->do($sql, undef, $self->{_userid}, $too_old);
unless (defined($rows)) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
$deleted = 0;
goto token_expiration_final;
}
$deleted = $rows;
}
$sql = "UPDATE bayes_vars SET token_count = token_count - ?,
last_expire = ?,
last_atime_delta = ?,
last_expire_reduce = ?
WHERE id = ?";
$rows = $self->{_dbh}->do($sql, undef, $deleted, time(), $newdelta, $deleted, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
dbg("bayes: bayes database now in inconsistent state, suggest a backup/restore");
goto token_expiration_final;
}
if ($deleted) {
my $oldest_token_age = $self->_get_oldest_token_age();
$sql = "UPDATE bayes_vars SET oldest_token_age = ? WHERE id = ?";
$rows = $self->{_dbh}->do($sql, undef, $oldest_token_age, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: token_expiration: SQL error: ".$self->{_dbh}->errstr());
goto token_expiration_final;
}
}
token_expiration_final:
my $kept = $vars[3] - $deleted;
$num_hapaxes = $self->_get_num_hapaxes() if ($opts->{verbose});
$num_lowfreq = $self->_get_num_lowfreq() if ($opts->{verbose});
$self->untie_db();
return ($kept, $deleted, $num_hapaxes, $num_lowfreq);
}
=head2 sync_due
public instance (Boolean) sync_due ()
Description:
This method determines if a database sync is currently required.
Unused for SQL based implementation.
=cut
sub sync_due {
my ($self) = @_;
return 0;
}
=head2 seen_get
public instance (String) seen_get (string $msgid)
Description:
This method retrieves the stored value, if any, for C<$msgid>. The return value
is the stored string ('s' for spam and 'h' for ham) or undef if C<$msgid> is not
found.
=cut
sub seen_get {
my ($self, $msgid) = @_;
return undef unless (defined($self->{_dbh}));
my $sql = "SELECT flag FROM bayes_seen
WHERE id = ?
AND msgid = ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: seen_get: SQL Error: ".$self->{_dbh}->errstr());
return undef;
}
my $rc = $sth->execute($self->{_userid}, $msgid);
unless ($rc) {
dbg("bayes: seen_get: SQL error: ".$self->{_dbh}->errstr());
return undef;
}
my ($flag) = $sth->fetchrow_array();
$sth->finish();
return $flag;
}
=head2 seen_put
public (Boolean) seen_put (string $msgid, char $flag)
Description:
This method records C<$msgid> as the type given by C<$flag>. C<$flag> is one of
two values 's' for spam and 'h' for ham.
=cut
sub seen_put {
my ($self, $msgid, $flag) = @_;
return 0 if (!$msgid);
return 0 if (!$flag);
return 0 unless (defined($self->{_dbh}));
my $sql = "INSERT INTO bayes_seen (id, msgid, flag)
VALUES (?,?,?)";
my $rows = $self->{_dbh}->do($sql,
undef,
$self->{_userid}, $msgid, $flag);
unless (defined($rows)) {
dbg("bayes: seen_put: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
dbg("bayes: seen ($msgid) put");
return 1;
}
=head2 seen_delete
public instance (Boolean) seen_delete (string $msgid)
Description:
This method removes C<$msgid> from the database.
=cut
sub seen_delete {
my ($self, $msgid) = @_;
return 0 if (!$msgid);
return 0 unless (defined($self->{_dbh}));
my $sql = "DELETE FROM bayes_seen
WHERE id = ?
AND msgid = ?";
my $rows = $self->{_dbh}->do($sql,
undef,
$self->{_userid}, $msgid);
unless (defined($rows)) {
dbg("bayes: seen_delete: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 get_storage_variables
public instance (@) get_storage_variables ()
Description:
This method retrieves the various administrative variables used by
the Bayes process and database.
The values returned in the array are in the following order:
0: scan count base
1: number of spam
2: number of ham
3: number of tokens in db
4: last expire atime
5: oldest token in db atime
6: db version value
7: last journal sync
8: last atime delta
9: last expire reduction count
10: newest token in db atime
=cut
sub get_storage_variables {
my ($self) = @_;
my @values;
return (0,0,0,0,0,0,0,0,0,0,0) unless (defined($self->{_dbh}));
my $sql = "SELECT spam_count, ham_count, token_count, last_expire,
last_atime_delta, last_expire_reduce, oldest_token_age,
newest_token_age
FROM bayes_vars
WHERE id = ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: get_storage_variables: SQL error: ".$self->{_dbh}->errstr());
return (0,0,0,0,0,0,0,0,0,0,0);
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: get_storage_variables: SQL error: ".$self->{_dbh}->errstr());
return (0,0,0,0,0,0,0,0,0,0,0);
}
my ($spam_count, $ham_count, $token_count,
$last_expire, $last_atime_delta, $last_expire_reduce,
$oldest_token_age, $newest_token_age) = $sth->fetchrow_array();
$sth->finish();
my $db_ver = $self->DB_VERSION;
@values = (
0,
$spam_count,
$ham_count,
$token_count,
$last_expire,
$oldest_token_age,
$db_ver,
0, $last_atime_delta,
$last_expire_reduce,
$newest_token_age
);
return @values;
}
=head2 dump_db_toks
public instance () dump_db_toks (String $template, String $regex, Array @vars)
Description:
This method loops over all tokens, computing the probability for the token and then
printing it out according to the passed in token.
=cut
sub dump_db_toks {
my ($self, $template, $regex, @vars) = @_;
return unless (defined($self->{_dbh}));
my $token_select = $self->_token_select_string();
my $sql = "SELECT $token_select, spam_count, ham_count, atime
FROM bayes_token
WHERE id = ?
AND (spam_count > 0 OR ham_count > 0)";
my $sth = $self->{_dbh}->prepare($sql);
unless (defined($sth)) {
dbg("bayes: dump_db_toks: SQL error: ".$self->{_dbh}->errstr());
return;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: dump_db_toks: SQL error: ".$self->{_dbh}->errstr());
return;
}
while (my ($token, $spam_count, $ham_count, $atime) = $sth->fetchrow_array()) {
my $prob = $self->{bayes}->compute_prob_for_token($token, $vars[1], $vars[2],
$spam_count, $ham_count);
$prob ||= 0.5;
my $encoded_token = unpack("H*", $token);
printf $template,$prob,$spam_count,$ham_count,$atime,$encoded_token;
}
$sth->finish();
return;
}
=head2 set_last_expire
public instance (Boolean) set_last_expire (Integer $time)
Description:
This method sets the last expire time.
=cut
sub set_last_expire {
my ($self, $time) = @_;
return 0 unless (defined($time));
return 0 unless (defined($self->{_dbh}));
my $sql = "UPDATE bayes_vars SET last_expire = ? WHERE id = ?";
my $rows = $self->{_dbh}->do($sql,
undef,
$time,
$self->{_userid});
unless (defined($rows)) {
dbg("bayes: set_last_expire: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 get_running_expire_tok
public instance (String $time) get_running_expire_tok ()
Description:
This method determines if an expire is currently running and returns
the last time set.
There can be multiple times, so we just pull the greatest (most recent)
value.
=cut
sub get_running_expire_tok {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "SELECT max(runtime) from bayes_expire WHERE id = ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: get_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: get_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($runtime) = $sth->fetchrow_array();
$sth->finish();
return $runtime;
}
=head2 set_running_expire_tok
public instance (String $time) set_running_expire_tok ()
Description:
This method sets the time that an expire starts running.
=cut
sub set_running_expire_tok {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "INSERT INTO bayes_expire (id,runtime) VALUES (?,?)";
my $time = time();
my $rows = $self->{_dbh}->do($sql,
undef,
$self->{_userid}, $time);
unless (defined($rows)) {
dbg("bayes: set_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
return undef;
}
return $time;
}
=head2 remove_running_expire_tok
public instance (Boolean) remove_running_expire_tok ()
Description:
This method removes the row in the database that indicates that
and expire is currently running.
=cut
sub remove_running_expire_tok {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "DELETE from bayes_expire
WHERE id = ?";
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: remove_running_expire_tok: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 tok_get
public instance (Integer, Integer, Integer) tok_get (String $token)
Description:
This method retrieves a specificed token (C<$token>) from the database
and returns it's spam_count, ham_count and last access time.
=cut
sub tok_get {
my ($self, $token) = @_;
return (0,0,0) unless (defined($self->{_dbh}));
my $sql = "SELECT spam_count, ham_count, atime
FROM bayes_token
WHERE id = ?
AND token = ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: tok_get: SQL error: ".$self->{_dbh}->errstr());
return (0,0,0);
}
my $rc = $sth->execute($self->{_userid}, $token);
unless ($rc) {
dbg("bayes: tok_get: SQL error: ".$self->{_dbh}->errstr());
return (0,0,0);
}
my ($spam_count, $ham_count, $atime) = $sth->fetchrow_array();
$sth->finish();
$spam_count = 0 if (!$spam_count || $spam_count < 0);
$ham_count = 0 if (!$ham_count || $ham_count < 0);
$atime = 0 if (!$atime);
return ($spam_count, $ham_count, $atime)
}
=head2 tok_get_all
public instance (\@) tok_get (@ $tokens)
Description:
This method retrieves the specified tokens (C<$tokens>) from storage and returns
an array ref of arrays spam count, ham acount and last access time.
=cut
sub tok_get_all {
my ($self, @tokens) = @_;
return [] unless (defined($self->{_dbh}));
my $token_list_size = scalar(@tokens);
dbg("bayes: tok_get_all: token count: $token_list_size");
my @tok_results;
my $search_index = 0;
my $results_index = 0;
my $bunch_end;
my $token_select = $self->_token_select_string();
my $multi_sql = "SELECT $token_select, spam_count, ham_count, atime
FROM bayes_token
WHERE id = ?
AND token IN ";
while ($token_list_size > $search_index) {
my $bunch_size;
if ($token_list_size - $search_index > 100) {
$bunch_size = 100;
}
else {
$bunch_size = $token_list_size - $search_index;
}
while ($token_list_size - $search_index >= $bunch_size) {
my @bindings;
my $bindcount;
my $in_str = '(';
$bunch_end = $search_index + $bunch_size;
for ( ; $search_index < $bunch_end; $search_index++) {
$in_str .= '?,';
push(@bindings, $tokens[$search_index]);
}
chop $in_str;
$in_str .= ')';
my $dynamic_sql = $multi_sql . $in_str;
my $sth = $self->{_dbh}->prepare($dynamic_sql);
unless (defined($sth)) {
dbg("bayes: tok_get_all: SQL error: ".$self->{_dbh}->errstr());
return [];
}
my $rc = $sth->execute($self->{_userid}, @bindings);
unless ($rc) {
dbg("bayes: tok_get_all: SQL error: ".$self->{_dbh}->errstr());
return [];
}
my $results = $sth->fetchall_arrayref();
$sth->finish();
foreach my $result (@{$results}) {
$result->[1] = 0 if (!$result->[1] || $result->[1] < 0);
$result->[2] = 0 if (!$result->[2] || $result->[2] < 0);
$result->[3] = 0 if (!$result->[3]);
$tok_results[$results_index++] = $result;
}
}
}
return \@tok_results;
}
=head2 tok_count_change
public instance (Boolean) tok_count_change (Integer $spam_count,
Integer $ham_count,
String $token,
String $atime)
Description:
This method takes a C<$spam_count> and C<$ham_count> and adds it to
C<$tok> along with updating C<$tok>s atime with C<$atime>.
=cut
sub tok_count_change {
my ($self, $spam_count, $ham_count, $token, $atime) = @_;
$atime = 0 unless defined $atime;
$self->_put_token($token, $spam_count, $ham_count, $atime);
}
=head2 multi_tok_count_change
public instance (Boolean) multi_tok_count_change (Integer $spam_count,
Integer $ham_count,
\% $tokens,
String $atime)
Description:
This method takes a C<$spam_count> and C<$ham_count> and adds it to all
of the tokens in the C<$tokens> hash ref along with updating each tokens
atime with C<$atime>.
=cut
sub multi_tok_count_change {
my ($self, $spam_count, $ham_count, $tokens, $atime) = @_;
$atime = 0 unless defined $atime;
$self->_put_tokens($tokens, $spam_count, $ham_count, $atime);
}
=head2 nspam_nham_get
public instance ($spam_count, $ham_count) nspam_nham_get ()
Description:
This method retrieves the total number of spam and the total number of
ham learned.
=cut
sub nspam_nham_get {
my ($self) = @_;
return (0,0) unless (defined($self->{_dbh}));
my @vars = $self->get_storage_variables();
return ($vars[1] || 0, $vars[2] || 0);
}
=head2 nspam_nham_change
public instance (Boolean) nspam_nham_change (Integer $num_spam,
Integer $num_ham)
Description:
This method updates the number of spam and the number of ham in the database.
=cut
sub nspam_nham_change {
my ($self, $num_spam, $num_ham) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql;
my @bindings;
if ($num_spam != 0 && $num_ham != 0) {
$sql = "UPDATE bayes_vars
SET spam_count = spam_count + ?,
ham_count = ham_count + ?
WHERE id = ?";
@bindings = ($num_spam, $num_ham, $self->{_userid});
}
elsif ($num_spam != 0) {
$sql = "UPDATE bayes_vars
SET spam_count = spam_count + ?
WHERE id = ?";
@bindings = ($num_spam, $self->{_userid});
}
elsif ($num_ham != 0) {
$sql = "UPDATE bayes_vars
SET ham_count = ham_count + ?
WHERE id = ?";
@bindings = ($num_ham, $self->{_userid});
}
else {
dbg("bayes: nspam_nham_change: Called with no delta on spam or ham");
return 1;
}
my $rows = $self->{_dbh}->do($sql,
undef,
@bindings);
unless (defined($rows)) {
dbg("bayes: nspam_nham_change: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 tok_touch
public instance (Boolean) tok_touch (String $token,
String $atime)
Description:
This method updates the given tokens (C<$token>) atime.
The assumption is that the token already exists in the database.
=cut
sub tok_touch {
my ($self, $token, $atime) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "UPDATE bayes_token
SET atime = ?
WHERE id = ?
AND token = ?
AND atime < ?";
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid},
$token, $atime);
unless (defined($rows)) {
dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1 if ($rows eq '0E0');
$sql = "UPDATE bayes_vars
SET newest_token_age = ?
WHERE id = ?
AND newest_token_age < ?";
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: tok_touch: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 tok_touch_all
public instance (Boolean) tok_touch (\@ $tokens
String $atime)
Description:
This method does a mass update of the given list of tokens C<$tokens>, if the existing token
atime is < C<$atime>.
The assumption is that the tokens already exist in the database.
We should never be touching more than N_SIGNIFICANT_TOKENS, so we can make
some assumptions about how to handle the data (ie no need to batch like we
do in tok_get_all)
=cut
sub tok_touch_all {
my ($self, $tokens, $atime) = @_;
return 0 unless (defined($self->{_dbh}));
return 1 unless (scalar(@{$tokens}));
my $sql = "UPDATE bayes_token SET atime = ? WHERE id = ? AND token IN (";
my @bindings = ($atime, $self->{_userid});
foreach my $token (@{$tokens}) {
$sql .= "?,";
push(@bindings, $token);
}
chop($sql);
$sql .= ") AND atime < ?";
push(@bindings, $atime);
my $rows = $self->{_dbh}->do($sql, undef, @bindings);
unless (defined($rows)) {
dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1 if ($rows eq '0E0');
$sql = "UPDATE bayes_vars
SET newest_token_age = ?
WHERE id = ?
AND newest_token_age < ?";
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: tok_touch_all: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 cleanup
public instance (Boolean) cleanup ()
Description:
This method peroms any cleanup necessary before moving onto the next
operation.
=cut
sub cleanup {
my ($self) = @_;
return 1 unless ($self->{needs_cleanup});
$self->{needs_cleanup} = 0;
my $sql = "DELETE from bayes_token
WHERE id = ?
AND spam_count = 0
AND ham_count = 0";
my $toks_deleted = $self->{_dbh}->do($sql, undef, $self->{_userid});
unless (defined($toks_deleted)) {
dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1 if ($toks_deleted eq '0E0');
$sql = "UPDATE bayes_vars SET token_count = token_count - $toks_deleted
WHERE id = ?";
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: cleanup: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 get_magic_re
public instance get_magic_re (String)
Description:
This method returns a regexp which indicates a magic token.
Unused in SQL implementation.
=cut
sub get_magic_re {
my ($self) = @_;
undef;
}
=head2 sync
public instance (Boolean) sync (\% $opts)
Description:
This method performs a sync of the database
=cut
sub sync {
my ($self, $opts) = @_;
return 1;
}
=head2 perform_upgrade
public instance (Boolean) perform_upgrade (\% $opts);
Description:
Performs an upgrade of the database from one version to another, not
currently used in this implementation.
=cut
sub perform_upgrade {
my ($self) = @_;
return 1;
}
=head2 clear_database
public instance (Boolean) clear_database ()
Description:
This method deletes all records for a particular user.
Callers should be aware that any errors returned by this method
could causes the database to be inconsistent for the given user.
=cut
sub clear_database {
my ($self) = @_;
if ($self->tie_db_readonly()) {
$self->tie_db_writable();
}
else {
return 1;
}
return 0 unless (defined($self->{_dbh}));
my $rows = $self->{_dbh}->do("DELETE FROM bayes_vars WHERE id = ?",
undef,
$self->{_userid});
unless (defined($rows)) {
dbg("bayes: SQL error removing user (bayes_vars) data: ".$self->{_dbh}->errstr());
return 0;
}
$rows = $self->{_dbh}->do("DELETE FROM bayes_seen WHERE id = ?",
undef,
$self->{_userid});
unless (defined($rows)) {
dbg("bayes: SQL error removing seen data: ".$self->{_dbh}->errstr());
return 0;
}
$rows = $self->{_dbh}->do("DELETE FROM bayes_token WHERE id = ?",
undef,
$self->{_userid});
unless (defined($rows)) {
dbg("bayes: SQL error removing token data: ".$self->{_dbh}->errstr());
return 0;
}
return 1;
}
=head2 backup_database
public instance (Boolean) backup_database ()
Description:
This method will dump the users database in a marchine readable format.
=cut
sub backup_database {
my ($self) = @_;
return 0 unless ($self->tie_db_readonly());
return 0 unless (defined($self->{_dbh}));
my @vars = $self->get_storage_variables();
my $num_spam = $vars[1] || 0;
my $num_ham = $vars[2] || 0;
print "v\t$vars[6]\tdb_version # this must be the first line!!!\n";
print "v\t$num_spam\tnum_spam\n";
print "v\t$num_ham\tnum_nonspam\n";
my $token_select = $self->_token_select_string();
my $token_sql = "SELECT spam_count, ham_count, atime, $token_select
FROM bayes_token
WHERE id = ?
AND (spam_count > 0 OR ham_count > 0)";
my $seen_sql = "SELECT flag, msgid
FROM bayes_seen
WHERE id = ?";
my $sth = $self->{_dbh}->prepare_cached($token_sql);
unless (defined ($sth)) {
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
while (my @values = $sth->fetchrow_array()) {
$values[3] = unpack("H*", $values[3]);
print "t\t" . join("\t", @values) . "\n";
}
$sth->finish();
$sth = $self->{_dbh}->prepare_cached($seen_sql);
unless (defined ($sth)) {
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
$rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: backup_database: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
while (my @values = $sth->fetchrow_array()) {
print "s\t" . join("\t",@values) . "\n";
}
$sth->finish();
$self->untie_db();
return 1;
}
=head2 restore_database
public instance (Boolean) restore_database (String $filename, Boolean $showdots)
Description:
This method restores a database from the given filename, C<$filename>.
Callers should be aware that any errors returned by this method
could causes the database to be inconsistent for the given user.
=cut
sub restore_database {
my ($self, $filename, $showdots) = @_;
if (!open(DUMPFILE, '<', $filename)) {
dbg("bayes: unable to open backup file $filename: $!");
return 0;
}
local $SIG{'INT'} = 'IGNORE';
local $SIG{'HUP'} = 'IGNORE' if (!Mail::SpamAssassin::Util::am_running_on_windows());
local $SIG{'TERM'} = 'IGNORE';
unless ($self->clear_database()) {
return 0;
}
$self->untie_db();
unless ($self->tie_db_writable()) {
return 0;
}
my $token_count = 0;
my $db_version;
my $num_spam;
my $num_ham;
my $error_p = 0;
my $line_count = 0;
my $line = <DUMPFILE>;
$line_count++;
if ($line =~ m/^v\s+(\d+)\s+db_version/) {
$db_version = $1;
}
else {
dbg("bayes: database version must be the first line in the backup file, correct and re-run");
return 0;
}
unless ($db_version == 2 || $db_version == 3) {
warn("bayes: database version $db_version is unsupported, must be version 2 or 3");
return 0;
}
my $token_error_count = 0;
my $seen_error_count = 0;
while (my $line = <DUMPFILE>) {
chomp($line);
$line_count++;
if ($line_count % 1000 == 0) {
print STDERR "." if ($showdots);
}
if ($line =~ /^v\s+/) { my @parsed_line = split(/\s+/, $line, 3);
my $value = $parsed_line[1] + 0;
if ($parsed_line[2] eq 'num_spam') {
$num_spam = $value;
}
elsif ($parsed_line[2] eq 'num_nonspam') {
$num_ham = $value;
}
else {
dbg("bayes: restore_database: skipping unknown line: $line");
}
}
elsif ($line =~ /^t\s+/) { my @parsed_line = split(/\s+/, $line, 5);
my $spam_count = $parsed_line[1] + 0;
my $ham_count = $parsed_line[2] + 0;
my $atime = $parsed_line[3] + 0;
my $token = $parsed_line[4];
my $token_warn_p = 0;
my @warnings;
if ($spam_count < 0) {
$spam_count = 0;
push(@warnings, 'spam count < 0, resetting');
$token_warn_p = 1;
}
if ($ham_count < 0) {
$ham_count = 0;
push(@warnings, 'ham count < 0, resetting');
$token_warn_p = 1;
}
if ($spam_count == 0 && $ham_count == 0) {
dbg("bayes: token has zero spam and ham count, skipping");
next;
}
if ($atime > time()) {
$atime = time();
push(@warnings, 'atime > current time, resetting');
$token_warn_p = 1;
}
if ($token_warn_p) {
dbg("bayes: token ($token) has the following warnings:\n".join("\n",@warnings));
}
if ($db_version < 3) {
$token = substr(sha1($token), -5);
}
else {
$token = pack("H*",$token);
}
unless ($self->_put_token($token, $spam_count, $ham_count, $atime)) {
dbg("bayes: error inserting token for line: $line");
$token_error_count++;
}
$token_count++;
}
elsif ($line =~ /^s\s+/) { my @parsed_line = split(/\s+/, $line, 3);
my $flag = $parsed_line[1];
my $msgid = $parsed_line[2];
unless ($flag eq 'h' || $flag eq 's') {
dbg("bayes: unknown seen flag ($flag) for line: $line, skipping");
next;
}
unless ($msgid) {
dbg("bayes: blank msgid for line: $line, skipping");
next;
}
unless ($self->seen_put($msgid, $flag)) {
dbg("bayes: error inserting msgid in seen table for line: $line");
$seen_error_count++;
}
}
else {
dbg("bayes: skipping unknown line: $line");
next;
}
if ($token_error_count >= 20) {
warn "bayes: encountered too many errors (20) while parsing token line, reverting to empty database and exiting\n";
$self->clear_database();
return 0;
}
if ($seen_error_count >= 20) {
warn "bayes: encountered too many errors (20) while parsing seen lines, reverting to empty database and exiting\n";
$self->clear_database();
return 0;
}
}
close(DUMPFILE);
print STDERR "\n" if ($showdots);
unless (defined($num_spam)) {
dbg("bayes: unable to find num spam, please check file");
$error_p = 1;
}
unless (defined($num_ham)) {
dbg("bayes: unable to find num ham, please check file");
$error_p = 1;
}
if ($error_p) {
dbg("bayes: error(s) while attempting to load $filename, clearing database, correct and re-run");
$self->clear_database();
return 0;
}
if ($num_spam || $num_ham) {
unless ($self->nspam_nham_change($num_spam, $num_ham)) {
dbg("bayes: error updating num spam and num ham, clearing database");
$self->clear_database();
return 0;
}
}
dbg("bayes: parsed $line_count lines");
dbg("bayes: created database with $token_count tokens based on $num_spam spam messages and $num_ham ham messages");
$self->untie_db();
return 1;
}
=head2 db_readable
public instance (Boolean) db_readable()
Description:
This method returns a boolean value indicating if the database is in a
readable state.
=cut
sub db_readable {
my ($self) = @_;
return defined $self->{_dbh};
}
=head2 db_writable
public instance (Boolean) db_writeable()
Description:
This method returns a boolean value indicating if the database is in a
writable state.
=cut
sub db_writable {
my ($self) = @_;
return (defined $self->{_dbh} && $self->{db_writable_p})
}
=head1 Private Methods
=head2 _connect_db
private instance (Boolean) _connect_db ()
Description:
This method connects to the SQL database.
=cut
sub _connect_db {
my ($self) = @_;
$self->{_dbh} = undef;
my $dbh = DBI->connect($self->{_dsn}, $self->{_dbuser}, $self->{_dbpass},
{'PrintError' => 0, 'AutoCommit' => 1});
if (!$dbh) {
dbg("bayes: unable to connect to database: ".DBI->errstr());
return 0;
}
else {
dbg("bayes: database connection established");
}
$self->{_dbh} = $dbh;
return 1;
}
=head2 _get_db_version
private instance (Integer) _get_db_version ()
Description:
Gets the current version of the database from the special global vars
tables.
=cut
sub _get_db_version {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
return ($self->{_db_version_cache}) if (defined($self->{_db_version_cache}));
my $sql = "SELECT value FROM bayes_global_vars WHERE variable = 'VERSION'";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: _get_db_version: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute();
unless ($rc) {
dbg("bayes: _get_db_version: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($version) = $sth->fetchrow_array();
$sth->finish();
$self->{_db_version_cache} = $version;
return $version;
}
=head2 _initialize_db
private instance (Boolean) _initialize_db ()
Description:
This method will check to see if a user has had their bayes variables
initialized. If not then it will perform this initialization.
=cut
sub _initialize_db {
my ($self, $create_entry_p) = @_;
return 0 unless (defined($self->{_dbh}));
return 0 if (!$self->{_username});
if ($self->{bayes}->{conf}->{bayes_sql_username_authorized}) {
my $services = { 'bayessql' => 0 };
$self->{bayes}->{main}->call_plugins("services_allowed_for_username",
{ services => $services,
username => $self->{_username},
conf => $self->{bayes}->{conf},
});
unless ($services->{bayessql}) {
dbg("bayes: username not allowed by services_allowed_for_username plugin call");
return 0;
}
}
my $sqlselect = "SELECT id FROM bayes_vars WHERE username = ?";
my $sthselect = $self->{_dbh}->prepare_cached($sqlselect);
unless (defined($sthselect)) {
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sthselect->execute($self->{_username});
unless ($rc) {
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($id) = $sthselect->fetchrow_array();
if ($id) {
$self->{_userid} = $id;
dbg("bayes: Using userid: ".$self->{_userid});
$sthselect->finish();
return 1;
}
return 0 unless ($create_entry_p);
my $sqlinsert = "INSERT INTO bayes_vars (username) VALUES (?)";
my $rows = $self->{_dbh}->do($sqlinsert,
undef,
$self->{_username});
unless (defined($rows)) {
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
$rc = $sthselect->execute($self->{_username});
unless ($rc) {
dbg("bayes: _initialize_db: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
($id) = $sthselect->fetchrow_array();
$sthselect->finish();
if ($id) {
$self->{_userid} = $id;
dbg("bayes: using userid: ".$self->{_userid});
return 1;
}
return 1;
}
=head2 _put_token
private instance (Boolean) _put_token (string $token,
integer $spam_count,
integer $ham_count,
string $atime)
Description:
This method performs the work of either inserting or updating a token in
the database.
=cut
sub _put_token {
my ($self, $token, $spam_count, $ham_count, $atime) = @_;
return 0 unless (defined($self->{_dbh}));
$spam_count ||= 0;
$ham_count ||= 0;
if ($spam_count == 0 && $ham_count == 0) {
return 1;
}
my ($existing_spam_count,
$existing_ham_count,
$existing_atime) = $self->tok_get($token);
if (!$existing_atime) {
return 1 if ($spam_count < 0 || $ham_count < 0);
my $sql = "INSERT INTO bayes_token
(id, token, spam_count, ham_count, atime)
VALUES (?,?,?,?,?)";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid},
$token,
$spam_count,
$ham_count,
$atime);
unless ($rc) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
$sth->finish();
$sql = "UPDATE bayes_vars SET token_count = token_count + 1
WHERE id = ?";
my $rows = $self->{_dbh}->do($sql, undef, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
$sql = "UPDATE bayes_vars SET newest_token_age = ?
WHERE id = ? AND newest_token_age < ?";
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
if ($rows eq '0E0') {
$sql = "UPDATE bayes_vars SET oldest_token_age = ?
WHERE id = ? AND oldest_token_age > ?";
$rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
}
}
else {
if ($spam_count < 0 || $ham_count < 0) {
$self->{needs_cleanup} = 1;
}
my $update_atime_p = 1;
my $updated_atime_p = 0;
$update_atime_p = 0 if ($existing_atime >= $atime);
if ($spam_count) {
my $sql;
my @args;
if ($update_atime_p) {
$sql = "UPDATE bayes_token
SET spam_count = spam_count + ?,
atime = ?
WHERE id = ?
AND token = ?
AND spam_count + ? >= 0";
@args = ($spam_count, $atime, $self->{_userid}, $token, $spam_count);
$updated_atime_p = 1; }
else {
$sql = "UPDATE bayes_token
SET spam_count = spam_count + ?
WHERE id = ?
AND token = ?
AND spam_count + ? >= 0";
@args = ($spam_count, $self->{_userid}, $token, $spam_count);
}
my $rows = $self->{_dbh}->do($sql, undef, @args);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
}
if ($ham_count) {
my $sql;
my @args;
if ($update_atime_p && !$updated_atime_p) {
$sql = "UPDATE bayes_token
SET ham_count = ham_count + ?,
atime = ?
WHERE id = ?
AND token = ?
AND ham_count + ? >= 0";
@args = ($ham_count, $atime, $self->{_userid}, $token, $ham_count);
$updated_atime_p = 1; }
else {
$sql = "UPDATE bayes_token
SET ham_count = ham_count + ?
WHERE id = ?
AND token = ?
AND ham_count + ? >= 0";
@args = ($ham_count, $self->{_userid}, $token, $ham_count);
}
my $rows = $self->{_dbh}->do($sql, undef, @args);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
}
if ($updated_atime_p) {
my $sql = "UPDATE bayes_vars SET newest_token_age = ?
WHERE id = ? AND newest_token_age < ?";
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
}
}
return 1;
}
=head2 _put_tokens
private instance (Boolean) _put_tokens (\% $tokens,
integer $spam_count,
integer $ham_count,
string $atime)
Description:
This method performs the work of either inserting or updating tokens in
the database.
=cut
sub _put_tokens {
my ($self, $tokens, $spam_count, $ham_count, $atime) = @_;
return 0 unless (defined($self->{_dbh}));
$spam_count ||= 0;
$ham_count ||= 0;
if ($spam_count == 0 && $ham_count == 0) {
return 1;
}
my $atime_updated_p = 0;
my $atime_inserted_p = 0;
my $new_tokens = 0;
my $insertsql = "INSERT INTO bayes_token
(id, token, spam_count, ham_count, atime)
VALUES (?,?,?,?,?)";
my $insertsth = $self->{_dbh}->prepare_cached($insertsql);
unless (defined($insertsth)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
foreach my $token (keys %{$tokens}) {
my ($existing_spam_count,
$existing_ham_count,
$existing_atime) = $self->tok_get($token);
if (!$existing_atime) {
next if ($spam_count < 0 || $ham_count < 0);
my $rc = $insertsth->execute($self->{_userid},
$token,
$spam_count,
$ham_count,
$atime);
unless ($rc) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
next;
}
$insertsth->finish();
$atime_inserted_p = 1;
$new_tokens++;
}
else {
if ($spam_count < 0 || $ham_count < 0) {
$self->{needs_cleanup} = 1;
}
my $update_atime_p = 1;
$update_atime_p = 0 if ($existing_atime >= $atime);
if ($spam_count) {
my $sql;
my @args;
if ($update_atime_p) {
$sql = "UPDATE bayes_token
SET spam_count = spam_count + ?,
atime = ?
WHERE id = ?
AND token = ?
AND spam_count + ? >= 0";
@args = ($spam_count, $atime, $self->{_userid}, $token, $spam_count);
$atime_updated_p = 1;
}
else {
$sql = "UPDATE bayes_token
SET spam_count = spam_count + ?
WHERE id = ?
AND token = ?
AND spam_count + ? >= 0";
@args = ($spam_count, $self->{_userid}, $token, $spam_count);
}
my $rows = $self->{_dbh}->do($sql, undef, @args);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
}
}
if ($ham_count) {
my $sql;
my @args;
if ($update_atime_p && !$spam_count) {
$sql = "UPDATE bayes_token
SET ham_count = ham_count + ?,
atime = ?
WHERE id = ?
AND token = ?
AND ham_count + ? >= 0";
@args = ($ham_count, $atime, $self->{_userid}, $token, $ham_count);
$atime_updated_p = 1;
}
else {
$sql = "UPDATE bayes_token
SET ham_count = ham_count + ?
WHERE id = ?
AND token = ?
AND ham_count + ? >= 0";
@args = ($ham_count, $self->{_userid}, $token, $ham_count);
}
my $rows = $self->{_dbh}->do($sql, undef, @args);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
}
}
}
}
if ($new_tokens) {
my $sql = "UPDATE bayes_vars SET token_count = token_count + ?
WHERE id = ?";
my $rows = $self->{_dbh}->do($sql, undef, $new_tokens, $self->{_userid});
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
}
}
if ($atime_updated_p || $atime_inserted_p) {
my $sql = "UPDATE bayes_vars SET newest_token_age = ?
WHERE id = ? AND newest_token_age < ?";
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
}
}
if ($atime_inserted_p) {
my $sql = "UPDATE bayes_vars SET oldest_token_age = ?
WHERE id = ? AND oldest_token_age > ?";
my $rows = $self->{_dbh}->do($sql, undef, $atime, $self->{_userid}, $atime);
unless (defined($rows)) {
dbg("bayes: _put_token: SQL error: ".$self->{_dbh}->errstr());
}
}
return 1;
}
=head2 _get_oldest_token_age
private instance (Integer) _get_oldest_token_age ()
Description:
This method finds the atime of the oldest token in the database.
The use of min(atime) in the SQL is ugly and but really the most efficient
way of getting the oldest_token_age after we've done a mass expire. It should
only be called at expire time.
=cut
sub _get_oldest_token_age {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "SELECT min(atime) FROM bayes_token
WHERE id = ?";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: _get_oldest_token_age: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: _get_oldest_token_age: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($atime) = $sth->fetchrow_array();
$sth->finish();
return $atime;
}
=head2 _get_num_hapaxes
private instance (Integer) _get_num_hapaxes ()
Description:
This method gets the total number of hapaxes (spam_count + ham_count == 1) in
the token database for a user.
=cut
sub _get_num_hapaxes {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "SELECT count(*)
FROM bayes_token
WHERE id = ?
AND spam_count + ham_count = 1";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: _get_num_hapaxes: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: _get_num_hapaxes: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($num_hapaxes) = $sth->fetchrow_array();
$sth->finish();
return $num_hapaxes;
}
=head2 _get_num_lowfreq
private instance (Integer) _get_num_lowfreq ()
Description:
This method gets the total number of lowfreq tokens (spam_count < 8 and
ham_count < 8) in the token database for a user
=cut
sub _get_num_lowfreq {
my ($self) = @_;
return 0 unless (defined($self->{_dbh}));
my $sql = "SELECT count(*)
FROM bayes_token
WHERE id = ?
AND (spam_count >= 0 AND spam_count < 8)
AND (ham_count >= 0 AND ham_count < 8)
AND spam_count + ham_count != 1";
my $sth = $self->{_dbh}->prepare_cached($sql);
unless (defined($sth)) {
dbg("bayes: _get_num_lowfreq: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my $rc = $sth->execute($self->{_userid});
unless ($rc) {
dbg("bayes: _get_num_lowfreq: SQL error: ".$self->{_dbh}->errstr());
return 0;
}
my ($num_lowfreq) = $sth->fetchrow_array();
$sth->finish();
return $num_lowfreq;
}
=head2 _token_select_string
private instance (String) _token_select_string
Description:
This method returns the string to be used in SELECT statements to represent
the token column.
The default is to use the RPAD function to pad the token out to 5 characters.
=cut
sub _token_select_string {
return "RPAD(token, 5, ' ')";
}
sub sa_die { Mail::SpamAssassin::sa_die(@_); }
1;