# <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::Conf - SpamAssassin configuration file =head1 SYNOPSIS # a comment rewrite_header Subject *****SPAM***** full PARA_A_2_C_OF_1618 /Paragraph .a.{0,10}2.{0,10}C. of S. 1618/i describe PARA_A_2_C_OF_1618 Claims compliance with senate bill 1618 header FROM_HAS_MIXED_NUMS From =~ /\d+[a-z]+\d+\S*@/i describe FROM_HAS_MIXED_NUMS From: contains numbers mixed in with letters score A_HREF_TO_REMOVE 2.0 lang es describe FROM_FORGED_HOTMAIL Forzado From: simula ser de hotmail.com lang pt_BR report O programa detetor de Spam ZOE [...] =head1 DESCRIPTION SpamAssassin is configured using traditional UNIX-style configuration files, loaded from the C and C directories. The following web page lists the most important configuration settings used to configure SpamAssassin; novices are encouraged to read it first: http://wiki.apache.org/spamassassin/ImportantInitialConfigItems =head1 FILE FORMAT The C<#> character starts a comment, which continues until end of line. B if the C<#> character is to be used as part of a rule or configuration option, it must be escaped with a backslash. i.e.: C<\#> Whitespace in the files is not significant, but please note that starting a line with whitespace is deprecated, as we reserve its use for multi-line rule definitions, at some point in the future. Currently, each rule or configuration setting must fit on one-line; multi-line settings are not supported yet. File and directory paths can use C<~> to refer to the user's home directory, but no other shell-style path extensions such as globing or C<~user/> are supported. Where appropriate below, default values are listed in parentheses. =head1 USER PREFERENCES The following options can be used in both site-wide (C) and user-specific (C) configuration files to customize how SpamAssassin handles incoming email messages. =cut package Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Util; use Mail::SpamAssassin::NetSet; use Mail::SpamAssassin::Constants qw(:sa); use Mail::SpamAssassin::Conf::Parser; use Mail::SpamAssassin::Logger; use Mail::SpamAssassin::Util::TieOneStringHash; use File::Spec; use strict; use warnings; use bytes; use vars qw{ @ISA $VERSION $CONF_TYPE_STRING $CONF_TYPE_BOOL $CONF_TYPE_NUMERIC $CONF_TYPE_HASH_KEY_VALUE $CONF_TYPE_ADDRLIST $CONF_TYPE_TEMPLATE $INVALID_VALUE $MISSING_REQUIRED_VALUE @MIGRATED_SETTINGS $TYPE_HEAD_TESTS $TYPE_HEAD_EVALS $TYPE_BODY_TESTS $TYPE_BODY_EVALS $TYPE_FULL_TESTS $TYPE_FULL_EVALS $TYPE_RAWBODY_TESTS $TYPE_RAWBODY_EVALS $TYPE_URI_TESTS $TYPE_URI_EVALS $TYPE_META_TESTS $TYPE_RBL_EVALS }; @ISA = qw(); # odd => eval test. Not constants so they can be shared with Parser # TODO: move to Constants.pm? $TYPE_HEAD_TESTS = 0x0008; $TYPE_HEAD_EVALS = 0x0009; $TYPE_BODY_TESTS = 0x000a; $TYPE_BODY_EVALS = 0x000b; $TYPE_FULL_TESTS = 0x000c; $TYPE_FULL_EVALS = 0x000d; $TYPE_RAWBODY_TESTS = 0x000e; $TYPE_RAWBODY_EVALS = 0x000f; $TYPE_URI_TESTS = 0x0010; $TYPE_URI_EVALS = 0x0011; $TYPE_META_TESTS = 0x0012; $TYPE_RBL_EVALS = 0x0013; my @rule_types = ("body_tests", "uri_tests", "uri_evals", "head_tests", "head_evals", "body_evals", "full_tests", "full_evals", "rawbody_tests", "rawbody_evals", "rbl_evals", "meta_tests"); $VERSION = 'bogus'; # avoid CPAN.pm picking up version strings later # these are variables instead of constants so that other classes can # access them; if they're constants, they'd have to go in Constants.pm # TODO: move to Constants.pm? $CONF_TYPE_STRING = 1; $CONF_TYPE_BOOL = 2; $CONF_TYPE_NUMERIC = 3; $CONF_TYPE_HASH_KEY_VALUE = 4; $CONF_TYPE_ADDRLIST = 5; $CONF_TYPE_TEMPLATE = 6; $MISSING_REQUIRED_VALUE = -99999999999999; $INVALID_VALUE = -99999999999998; # set to "1" by the test suite code, to record regression tests # $Mail::SpamAssassin::Conf::COLLECT_REGRESSION_TESTS = 1; # search for "sub new {" to find the start of the code ########################################################################### sub set_default_commands { my($self) = @_; # see "perldoc Mail::SpamAssassin::Conf::Parser" for details on this fmt. # push each config item like this, to avoid a POD bug; it can't just accept # ( { ... }, { ... }, { ...} ) otherwise POD parsing dies. my @cmds = (); =head2 SCORING OPTIONS =over 4 =item required_score n.nn (default: 5) Set the score required before a mail is considered spam. C can be an integer or a real number. 5.0 is the default setting, and is quite aggressive; it would be suitable for a single-user setup, but if you're an ISP installing SpamAssassin, you should probably set the default to be more conservative, like 8.0 or 10.0. It is not recommended to automatically delete or discard messages marked as spam, as your users B complain, but if you choose to do so, only delete messages with an exceptionally high score such as 15.0 or higher. This option was previously known as C and that name is still accepted, but is deprecated. =cut push (@cmds, { setting => 'required_score', aliases => ['required_hits'], # backwards compat default => 5, type => $CONF_TYPE_NUMERIC }); =item score SYMBOLIC_TEST_NAME n.nn [ n.nn n.nn n.nn ] Assign scores (the number of points for a hit) to a given test. Scores can be positive or negative real numbers or integers. C is the symbolic name used by SpamAssassin for that test; for example, 'FROM_ENDS_IN_NUMS'. If only one valid score is listed, then that score is always used for a test. If four valid scores are listed, then the score that is used depends on how SpamAssassin is being used. The first score is used when both Bayes and network tests are disabled (score set 0). The second score is used when Bayes is disabled, but network tests are enabled (score set 1). The third score is used when Bayes is enabled and network tests are disabled (score set 2). The fourth score is used when Bayes is enabled and network tests are enabled (score set 3). Setting a rule's score to 0 will disable that rule from running. If any of the score values are surrounded by parenthesis '()', then all of the scores in the line are considered to be relative to the already set score. ie: '(3)' means increase the score for this rule by 3 points in all score sets. '(3) (0) (3) (0)' means increase the score for this rule by 3 in score sets 0 and 2 only. If no score is given for a test by the end of the configuration, a default score is assigned: a score of 1.0 is used for all tests, except those who names begin with 'T_' (this is used to indicate a rule in testing) which receive 0.01. Note that test names which begin with '__' are indirect rules used to compose meta-match rules and can also act as prerequisites to other rules. They are not scored or listed in the 'tests hit' reports, but assigning a score of 0 to an indirect rule will disable it from running. =cut push (@cmds, { setting => 'score', is_frequent => 1, code => sub { my ($self, $key, $value, $line) = @_; my($rule, @scores) = split(/\s+/, $value); unless (defined $value && $value !~ /^$/ && (scalar @scores == 1 || scalar @scores == 4)) { info("config: score: requires a symbolic rule name and 1 or 4 scores"); return $MISSING_REQUIRED_VALUE; } # Figure out if we're doing relative scores, remove the parens if we are my $relative = 0; foreach (@scores) { if (s/^\((-?\d+(?:\.\d+)?)\)$/$1/) { $relative = 1; } unless (/^-?\d+(?:\.\d+)?$/) { info("config: score: the non-numeric score ($_) is not valid, " . "a numeric score is required"); return $INVALID_VALUE; } } if ($relative && !exists $self->{scoreset}->[0]->{$rule}) { info("config: score: relative score without previous setting in " . "configuration"); return $INVALID_VALUE; } # If we're only passed 1 score, copy it to the other scoresets if (@scores) { if (@scores != 4) { @scores = ( $scores[0], $scores[0], $scores[0], $scores[0] ); } # Set the actual scoreset values appropriately for my $index (0..3) { my $score = $relative ? $self->{scoreset}->[$index]->{$rule} + $scores[$index] : $scores[$index]; $self->{scoreset}->[$index]->{$rule} = $score + 0.0; } } } }); =head2 WHITELIST AND BLACKLIST OPTIONS =over 4 =item whitelist_from add@ress.com Used to whitelist sender addresses which send mail that is often tagged (incorrectly) as spam. Use of this setting is not recommended, since it blindly trusts the message, which is routinely and easily forged by spammers and phish senders. The recommended solution is to instead use C or other authenticated whitelisting methods, or C. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Multiple addresses per line, separated by spaces, is OK. Multiple C lines is also OK. The headers checked for whitelist addresses are as follows: if C is set, use that; otherwise check all addresses taken from the following set of headers: Envelope-Sender Resent-Sender X-Envelope-From From In addition, the "envelope sender" data, taken from the SMTP envelope data where this is available, is looked up. See C. e.g. whitelist_from joe@example.com fred@example.com whitelist_from *@example.com =cut push (@cmds, { setting => 'whitelist_from', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item unwhitelist_from add@ress.com Used to override a default whitelist_from entry, so for example a distribution whitelist_from can be overridden in a local.cf file, or an individual user can override a whitelist_from entry in their own C file. The specified email address has to match exactly the address previously used in a whitelist_from line. e.g. unwhitelist_from joe@example.com fred@example.com unwhitelist_from *@example.com =cut push (@cmds, { command => 'unwhitelist_from', setting => 'whitelist_from', code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Use this to supplement the whitelist_from addresses with a check against the Received headers. The first parameter is the address to whitelist, and the second is a string to match the relay's rDNS. This string is matched against the reverse DNS lookup used during the handover from the internet to your internal network's mail exchangers. It can either be the full hostname, or the domain component of that hostname. In other words, if the host that connected to your MX had an IP address that mapped to 'sendinghost.spamassassin.org', you should specify C or just C here. Note that this requires that C be correct. For simple cases, it will be, but for a complex network you may get better results by setting that parameter. e.g. whitelist_from_rcvd joe@example.com example.com whitelist_from_rcvd *@axkit.org sergeant.org =item def_whitelist_from_rcvd addr@lists.sourceforge.net sourceforge.net Same as C, but used for the default whitelist entries in the SpamAssassin distribution. The whitelist score is lower, because these are often targets for spammer spoofing. =cut push (@cmds, { setting => 'whitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } unless ($value =~ /^\S+\s+\S+$/) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $self->{parser}->add_to_addrlist_rcvd ('whitelist_from_rcvd', split(/\s+/, $value)); } }); push (@cmds, { setting => 'def_whitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } unless ($value =~ /^\S+\s+\S+$/) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $self->{parser}->add_to_addrlist_rcvd ('def_whitelist_from_rcvd', split(/\s+/, $value)); } }); =item whitelist_allows_relays add@ress.com Specify addresses which are in C that sometimes send through a mail relay other than the listed ones. By default mail with a From address that is in C that does not match the relay will trigger a forgery rule. Including the address in C prevents that. Whitelist and blacklist addresses are now file-glob-style patterns, so C, C<*@isp.com>, or C<*.domain.net> will all work. Specifically, C<*> and C are allowed, but all other metacharacters are not. Regular expressions are not used for security reasons. Multiple addresses per line, separated by spaces, is OK. Multiple C lines is also OK. The specified email address does not have to match exactly the address previously used in a whitelist_from_rcvd line as it is compared to the address in the header. e.g. whitelist_allows_relays joe@example.com fred@example.com whitelist_allows_relays *@example.com =cut push (@cmds, { setting => 'whitelist_allows_relays', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item unwhitelist_from_rcvd add@ress.com Used to override a default whitelist_from_rcvd entry, so for example a distribution whitelist_from_rcvd can be overridden in a local.cf file, or an individual user can override a whitelist_from_rcvd entry in their own C file. The specified email address has to match exactly the address previously used in a whitelist_from_rcvd line. e.g. unwhitelist_from_rcvd joe@example.com fred@example.com unwhitelist_from_rcvd *@axkit.org =cut push (@cmds, { setting => 'unwhitelist_from_rcvd', code => sub { my ($self, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } unless ($value =~ /^(?:\S+(?:\s+\S+)*)$/) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $self->{parser}->remove_from_addrlist_rcvd('whitelist_from_rcvd', split (/\s+/, $value)); $self->{parser}->remove_from_addrlist_rcvd('def_whitelist_from_rcvd', split (/\s+/, $value)); } }); =item blacklist_from add@ress.com Used to specify addresses which send mail that is often tagged (incorrectly) as non-spam, but which the user doesn't want. Same format as C. =cut push (@cmds, { setting => 'blacklist_from', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item unblacklist_from add@ress.com Used to override a default blacklist_from entry, so for example a distribution blacklist_from can be overridden in a local.cf file, or an individual user can override a blacklist_from entry in their own C file. The specified email address has to match exactly the address previously used in a blacklist_from line. e.g. unblacklist_from joe@example.com fred@example.com unblacklist_from *@spammer.com =cut push (@cmds, { command => 'unblacklist_from', setting => 'blacklist_from', code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =item whitelist_to add@ress.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be whitelisted. Useful if you're deploying SpamAssassin system-wide, and don't want some users to have their mail filtered. Same format as C. There are three levels of To-whitelisting, C, C and C. Users in the first level may still get some spammish mails blocked, but users in C should never get mail blocked. The headers checked for whitelist addresses are as follows: if C or C are set, use those; otherwise check all addresses taken from the following set of headers: To Cc Apparently-To Delivered-To Envelope-Recipients Apparently-Resent-To X-Envelope-To Envelope-To X-Delivered-To X-Original-To X-Rcpt-To X-Real-To =item more_spam_to add@ress.com See above. =item all_spam_to add@ress.com See above. =cut push (@cmds, { setting => 'whitelist_to', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); push (@cmds, { setting => 'more_spam_to', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); push (@cmds, { setting => 'all_spam_to', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item blacklist_to add@ress.com If the given address appears as a recipient in the message headers (Resent-To, To, Cc, obvious envelope recipient, etc.) the mail will be blacklisted. Same format as C. =cut push (@cmds, { setting => 'blacklist_to', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item whitelist_auth add@ress.com Used to specify addresses which send mail that is often tagged (incorrectly) as spam. This is different from C and C in that it first verifies that the message was sent by an authorized sender for the address, before whitelisting. Authorization is performed using one of the installed sender-authorization schemes: SPF (using C), Domain Keys (using C), or DKIM (using C). Note that those plugins must be active, and working, for this to operate. Using C is roughly equivalent to specifying duplicate C, C, and C lines for each of the addresses specified. e.g. whitelist_auth joe@example.com fred@example.com whitelist_auth *@example.com =item def_whitelist_auth add@ress.com Same as C, but used for the default whitelist entries in the SpamAssassin distribution. The whitelist score is lower, because these are often targets for spammer spoofing. =cut push (@cmds, { setting => 'whitelist_auth', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); push (@cmds, { setting => 'def_whitelist_auth', type => $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST }); =item unwhitelist_auth add@ress.com Used to override a C entry. The specified email address has to match exactly the address previously used in a C line. e.g. unwhitelist_auth joe@example.com fred@example.com unwhitelist_auth *@example.com =cut push (@cmds, { command => 'unwhitelist_auth', setting => 'whitelist_auth', code => \&Mail::SpamAssassin::Conf::Parser::remove_addrlist_value }); =back =head2 BASIC MESSAGE TAGGING OPTIONS =over 4 =item rewrite_header { subject | from | to } STRING By default, suspected spam messages will not have the C, C or C lines tagged to indicate spam. By setting this option, the header will be tagged with C to indicate that a message is spam. For the From or To headers, this will take the form of an RFC 2822 comment following the address in parantheses. For the Subject header, this will be prepended to the original subject. Note that you should only use the _REQD_ and _SCORE_ tags when rewriting the Subject header if C is 0. Otherwise, you may not be able to remove the SpamAssassin markup via the normal methods. More information about tags is explained below in the B