# <@LICENSE> # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to you under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at: # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # =head1 NAME Mail::SpamAssassin::Conf::Parser - parse SpamAssassin configuration =head1 SYNOPSIS (see Mail::SpamAssassin) =head1 DESCRIPTION Mail::SpamAssassin is a module to identify spam using text analysis and several internet-based realtime blacklists. This class is used internally by SpamAssassin to parse its configuration files. Please refer to the C documentation for public interfaces. =head1 STRUCTURE OF A CONFIG BLOCK This is the structure of a config-setting block. Each is a hashref which may contain these keys: =over 4 =item setting the name of the setting it modifies, e.g. "required_score". this also doubles as the default for 'command' (below). THIS IS REQUIRED. =item command The command string used in the config file for this setting. Optional; 'setting' will be used for the command if this is omitted. =item aliases An [aryref] of other aliases for the same command. optional. =item type The type of this setting: - $CONF_TYPE_STRING: string - $CONF_TYPE_NUMERIC: numeric value (float or int) - $CONF_TYPE_BOOL: boolean (0/no or 1/yes) - $CONF_TYPE_TEMPLATE: template, like "report" - $CONF_TYPE_ADDRLIST: address list, like "whitelist_from" - $CONF_TYPE_HASH_KEY_VALUE: hash key/value pair, like "describe" or tflags If this is set, a 'code' block is assigned based on the type. Note that C<$CONF_TYPE_HASH_KEY_VALUE>-type settings require that the value be non-empty, otherwise they'll produce a warning message. =item code A subroutine to deal with the setting. Only used if B is not set. ONE OF B OR B IS REQUIRED. The arguments passed to the function are C<($self, $key, $value, $line)>, where $key is the setting (*not* the command), $value is the value string, and $line is the entire line. There are two special return values that the B subroutine may return to signal that there is an error in the configuration: C<$Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE> -- this setting requires that a value be set, but one was not provided. C<$Mail::SpamAssassin::Conf::INVALID_VALUE> -- this setting requires a value from a set of 'valid' values, but the user provided an invalid one. Any other values -- including C -- returned from the subroutine are considered to mean 'success'. =item default The default value for the setting. may be omitted if the default value is a non-scalar type, which should be set in the Conf ctor. note for path types: using "__userstate__" is recommended for defaults, as it allows Mail::SpamAssassin module users who set that configuration setting, to receive the correct values. =item is_priv Set to 1 if this setting requires 'allow_user_rules' when run from spamd. =item is_admin Set to 1 if this setting can only be set in the system-wide config when run from spamd. (All settings can be used by local programs run directly by the user.) =item is_frequent Set to 1 if this value occurs frequently in the config. this means it's looked up first for speed. =back =head1 METHODS =over 4 =cut package Mail::SpamAssassin::Conf::Parser; use Mail::SpamAssassin::Conf; use Mail::SpamAssassin::Constants qw(:sa); use Mail::SpamAssassin::Logger; use strict; use warnings; use bytes; use vars qw{ @ISA }; @ISA = qw(); ########################################################################### sub new { my $class = shift; $class = ref($class) || $class; my ($conf) = @_; my $self = { 'conf' => $conf }; $self->{command_luts} = { }; $self->{command_luts}->{frequent} = { }; $self->{command_luts}->{remaining} = { }; bless ($self, $class); $self; } ########################################################################### sub register_commands { my($self, $arrref) = @_; my $conf = $self->{conf}; $self->set_defaults_from_command_list($arrref); $self->build_command_luts($arrref); push(@{$conf->{registered_commands}}, @{$arrref}); } sub set_defaults_from_command_list { my ($self, $arrref) = @_; my $conf = $self->{conf}; foreach my $cmd (@{$arrref}) { # note! exists, not defined -- we want to be able to set # "undef" default values. if (exists($cmd->{default})) { $conf->{$cmd->{setting}} = $cmd->{default}; } } } sub build_command_luts { my ($self, $arrref) = @_; my $conf = $self->{conf}; my $set; foreach my $cmd (@{$arrref}) { # first off, decide what set this is in. if ($cmd->{is_frequent}) { $set = 'frequent'; } else { $set = 'remaining'; } # next, its priority (used to ensure frequently-used params # are parsed first) my $cmdname = $cmd->{command} || $cmd->{setting}; $self->{command_luts}->{$set}->{$cmdname} = $cmd; if ($cmd->{aliases} && scalar @{$cmd->{aliases}} > 0) { foreach my $name (@{$cmd->{aliases}}) { $self->{command_luts}->{$set}->{$name} = $cmd; } } } } ########################################################################### sub parse { my ($self, undef, $scoresonly) = @_; # leave $rules in $_[1] $self->{scoresonly} = $scoresonly; my $conf = $self->{conf}; # Language selection: # See http://www.gnu.org/manual/glibc-2.2.5/html_node/Locale-Categories.html # and http://www.gnu.org/manual/glibc-2.2.5/html_node/Using-gettextized-software.html my $lang = $ENV{'LANGUAGE'}; # LANGUAGE has the highest precedence but has a if ($lang) { # special format: The user may specify more than $lang =~ s/:.*$//; # one language here, colon separated. We use the } # first one only (lazy bums we are :o) $lang ||= $ENV{'LC_ALL'}; $lang ||= $ENV{'LC_MESSAGES'}; $lang ||= $ENV{'LANG'}; $lang ||= 'C'; # Nothing set means C/POSIX if ($lang =~ /^(C|POSIX)$/) { $lang = 'en_US'; # Our default language } else { $lang =~ s/[@.+,].*$//; # Strip codeset, modifier/audience, etc. } # (eg. .utf8 or @euro) # get fast-access handles on the command lookup tables my $lut_frequent = $self->{command_luts}->{frequent}; my $lut_remaining = $self->{command_luts}->{remaining}; my %migrated_keys = map { $_ => 1 } @Mail::SpamAssassin::Conf::MIGRATED_SETTINGS; $self->{currentfile} = '(no file)'; my $skip_parsing = 0; my @curfile_stack = (); my @if_stack = (); my @conf_lines = split (/\n/, $_[1]); my $line; $self->{if_stack} = \@if_stack; while (defined ($line = shift @conf_lines)) { local ($1); # bug 3838: prevent random taint flagging of $1 $line =~ s/(?{currentfile}); $self->{currentfile} = $1; next; } if ($value =~ /^end\s/) { if (scalar @if_stack > 0) { my $cond = pop @if_stack; if ($cond->{type} eq 'if') { my $msg = "config: unclosed 'if' in ". $self->{currentfile}.": if ".$cond->{conditional}."\n"; warn $msg; $self->lint_warn($msg, undef); } else { # die seems a bit excessive here, but this shouldn't be possible # so I suppose it's okay. die "config: unknown 'if' type: ".$cond->{type}."\n"; } @if_stack = (); } $skip_parsing = 0; my $curfile = pop @curfile_stack; if (defined $curfile) { $self->{currentfile} = $curfile; } else { $self->{currentfile} = '(no file)'; } next; } } # now handle the commands. if ($key eq 'include') { $value = $self->fix_path_relative_to_current_file($value); my $text = $conf->{main}->read_cf($value, 'included file'); unshift (@conf_lines, split (/\n/, $text)); next; } if ($key eq 'ifplugin') { $self->handle_conditional ($key, "plugin ($value)", \@if_stack, \$skip_parsing); next; } if ($key eq 'if') { $self->handle_conditional ($key, $value, \@if_stack, \$skip_parsing); next; } if ($key eq 'else') { # TODO: if/else/else won't get flagged here :( if (!@if_stack) { $parse_error = "config: found else without matching conditional"; goto failed_line; } $skip_parsing = !$skip_parsing; next; } # and the endif statement: if ($key eq 'endif') { my $lastcond = pop @if_stack; if (!defined $lastcond) { $parse_error = "config: found endif without matching conditional"; goto failed_line; } $skip_parsing = $lastcond->{skip_parsing}; next; } # preprocessing? skip all other commands next if $skip_parsing; if ($key eq 'require_version') { # if it wasn't replaced during install, assume current version ... next if ($value eq "\@\@VERSION\@\@"); my $ver = $Mail::SpamAssassin::VERSION; # if we want to allow "require_version 3.0" be good for all # "3.0.x" versions: ## make sure it's a numeric value #$value += 0.0; ## convert 3.000000 -> 3.0, stay backwards compatible ... #$ver =~ s/^(\d+)\.(\d{1,3}).*$/sprintf "%d.%d", $1, $2/e; #$value =~ s/^(\d+)\.(\d{1,3}).*$/sprintf "%d.%d", $1, $2/e; if ($ver ne $value) { my $msg = "config: configuration file \"$self->{currentfile}\" requires ". "version $value of SpamAssassin, but this is code version ". "$ver. Maybe you need to use ". "the -C switch, or remove the old config files? ". "Skipping this file"; warn $msg; $self->lint_warn($msg, undef); $skip_parsing = 1; } next; } my $cmd = $lut_frequent->{$key}; # check the frequent command set if (!$cmd) { $cmd = $lut_remaining->{$key}; # no? try the rest } # we've either fallen through with no match, in which case this # if() will fail, or we have a match. if ($cmd) { if ($self->{scoresonly}) { # reading user config from spamd if ($cmd->{is_priv} && !$conf->{allow_user_rules}) { info("config: not parsing, 'allow_user_rules' is 0: $line"); goto failed_line; } if ($cmd->{is_admin}) { info("config: not parsing, administrator setting: $line"); goto failed_line; } } if (!$cmd->{code}) { if (! $self->setup_default_code_cb($cmd)) { goto failed_line; } } my $ret = &{$cmd->{code}} ($conf, $cmd->{setting}, $value, $line); if ($ret && $ret eq $Mail::SpamAssassin::Conf::INVALID_VALUE) { $parse_error = "config: SpamAssassin failed to parse line, ". "\"$value\" is not valid for \"$key\", ". "skipping: $line"; goto failed_line; } elsif ($ret && $ret eq $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE) { $parse_error = "config: SpamAssassin failed to parse line, ". "no value provided for \"$key\", ". "skipping: $line"; goto failed_line; } else { next; } } # last ditch: try to see if the plugins know what to do with it if ($conf->{main}->call_plugins("parse_config", { key => $key, value => $value, line => $line, conf => $conf, user_config => $self->{scoresonly} })) { # a plugin dealt with it successfully. next; } failed_line: my $msg = $parse_error; my $is_error = 1; if (!$msg) { # use a default warning, if a more specific one wasn't output if ($migrated_keys{$key}) { # this key was moved into a plugin; non-fatal for lint $is_error = 0; $msg = "config: failed to parse, now a plugin, skipping, in \"$self->{currentfile}\": $line"; } else { # a real syntax error; this is fatal for --lint $msg = "config: failed to parse line, skipping, in \"$self->{currentfile}\": $line"; } } $self->lint_warn($msg, undef, $is_error); } delete $self->{if_stack}; $self->lint_check(); $self->set_default_scores(); delete $self->{scoresonly}; } sub handle_conditional { my ($self, $key, $value, $if_stack_ref, $skip_parsing_ref) = @_; my $conf = $self->{conf}; my $lexer = ARITH_EXPRESSION_LEXER; my @tokens = ($value =~ m/($lexer)/g); my $eval = ''; my $bad = 0; foreach my $token (@tokens) { if ($token =~ /^(\W+|[+-]?\d+(?:\.\d+)?)$/) { $eval .= $1." "; # note: untaints! } elsif ($token eq 'plugin') { # replace with method call $eval .= "\$self->cond_clause_plugin_loaded"; } elsif ($token eq 'version') { $eval .= $Mail::SpamAssassin::VERSION." "; } elsif ($token =~ /^(\w[\w\:]+)$/) { # class name $eval .= "\"$1\" "; # note: untaints! } else { $bad++; warn "config: unparseable chars in 'if $value': '$token'\n"; } } if ($bad) { $self->lint_warn("bad 'if' line, in \"$self->{currentfile}\"", undef); return -1; } push (@{$if_stack_ref}, { type => 'if', conditional => $value, skip_parsing => $$skip_parsing_ref }); if (eval $eval) { # leave $skip_parsing as-is; we may not be parsing anyway in this block. # in other words, support nested 'if's and 'require_version's } else { $$skip_parsing_ref = 1; } } # functions supported in the "if" eval: sub cond_clause_plugin_loaded { return $_[0]->{conf}->{plugins_loaded}->{$_[1]}; } # Let's do some linting here ... # This is called from _parse(), BTW, so we can check for $conf->{tests} # easily before finish_parsing() is called and deletes it. # sub lint_check { my ($self) = @_; my $conf = $self->{conf}; if ($conf->{lint_rules}) { # Check for description and score issues in lint fashion while ( my $k = each %{$conf->{descriptions}} ) { if (!exists $conf->{tests}->{$k}) { $self->lint_warn("config: warning: description exists for non-existent rule $k\n", $k); } } while ( my($sk) = each %{$conf->{scores}} ) { if (!exists $conf->{tests}->{$sk}) { $self->lint_warn("config: warning: score set for non-existent rule $sk\n", $sk); } } } } # we should set a default score for all valid rules... Do this here # instead of add_test because mostly 'score' occurs after the rule is # specified, so why set the scores to default, then set them again at # 'score'? # sub set_default_scores { my ($self) = @_; my $conf = $self->{conf}; while ( my $k = each %{$conf->{tests}} ) { if ( ! exists $conf->{scores}->{$k} ) { # T_ rules (in a testing probationary period) get low, low scores my $set_score = ($k =~/^T_/) ? 0.01 : 1.0; $set_score = -$set_score if ( ($conf->{tflags}->{$k}||'') =~ /\bnice\b/ ); for my $index (0..3) { $conf->{scoreset}->[$index]->{$k} = $set_score; } } } } ########################################################################### sub setup_default_code_cb { my ($self, $cmd) = @_; my $type = $cmd->{type}; if ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_STRING) { $cmd->{code} = \&set_string_value; } elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL) { $cmd->{code} = \&set_bool_value; } elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC) { $cmd->{code} = \&set_numeric_value; } elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_HASH_KEY_VALUE) { $cmd->{code} = \&set_hash_key_value; } elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_ADDRLIST) { $cmd->{code} = \&set_addrlist_value; } elsif ($type == $Mail::SpamAssassin::Conf::CONF_TYPE_TEMPLATE) { $cmd->{code} = \&set_template_append; } else { warn "config: unknown conf type $type!"; return 0; } return 1; } sub set_numeric_value { my ($conf, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } unless ($value =~ /^-?\d+(?:\.\d+)?$/) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $conf->{$key} = $value+0.0; } sub set_bool_value { my ($conf, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } # bug 4462: allow yes/1 and no/0 for boolean values $value = lc $value; if ($value eq 'yes') { $value = 1; } elsif ($value eq 'no') { $value = 0; } unless ($value =~ /^[01]$/) { return $Mail::SpamAssassin::Conf::INVALID_VALUE; } $conf->{$key} = $value+0; } sub set_string_value { my ($conf, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } $conf->{$key} = $value; } sub set_hash_key_value { my ($conf, $key, $value, $line) = @_; my($k,$v) = split(/\s+/, $value, 2); unless (defined $v && $v ne '') { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } $conf->{$key}->{$k} = $v; } sub set_addrlist_value { my ($conf, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } $conf->{parser}->add_to_addrlist ($key, split (' ', $value)); } sub remove_addrlist_value { my ($conf, $key, $value, $line) = @_; unless (defined $value && $value !~ /^$/) { return $Mail::SpamAssassin::Conf::MISSING_REQUIRED_VALUE; } $conf->{parser}->remove_from_addrlist ($key, split (' ', $value)); } sub set_template_append { my ($conf, $key, $value, $line) = @_; if ( $value =~ /^"(.*?)"$/ ) { $value = $1; } $conf->{$key} .= $value."\n"; } sub set_template_clear { my ($conf, $key, $value, $line) = @_; $conf->{$key} = ''; } ########################################################################### sub finish_parsing { my ($self) = @_; my $conf = $self->{conf}; $self->trace_meta_dependencies(); $self->fix_priorities(); $self->find_dup_rules(); # must be after fix_priorities() dbg("conf: finish parsing"); while (my ($name, $text) = each %{$conf->{tests}}) { my $type = $conf->{test_types}->{$name}; my $priority = $conf->{priority}->{$name} || 0; $conf->{priorities}->{$priority}++; # eval type handling if (($type & 1) == 1) { if (my ($function, $args) = ($text =~ m/(.*?)\s*\((.*?)\)\s*$/)) { my ($packed, $argsref) = $self->pack_eval_method($function, $args, $name, $text); if (!$packed) { # we've already warned about this } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_EVALS) { $conf->{body_evals}->{$priority}->{$name} = $packed; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_EVALS) { $conf->{head_evals}->{$priority}->{$name} = $packed; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS) { # We don't do priorities for $Mail::SpamAssassin::Conf::TYPE_RBL_EVALS # we also use the arrayref instead of the packed string $conf->{rbl_evals}->{$name} = [ $function, @$argsref ]; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_EVALS) { $conf->{rawbody_evals}->{$priority}->{$name} = $packed; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_FULL_EVALS) { $conf->{full_evals}->{$priority}->{$name} = $packed; } #elsif ($type == $Mail::SpamAssassin::Conf::TYPE_URI_EVALS) { # $conf->{uri_evals}->{$priority}->{$name} = $packed; #} else { $self->lint_warn("unknown type $type for $name: $text", $name); } } else { $self->lint_warn("syntax error for eval function $name: $text", $name); } } # non-eval tests else { if ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS) { $conf->{body_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS) { $conf->{head_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { $conf->{meta_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) { $conf->{uri_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS) { $conf->{rawbody_tests}->{$priority}->{$name} = $text; } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS) { $conf->{full_tests}->{$priority}->{$name} = $text; } else { $self->lint_warn("unknown type $type for $name: $text", $name); } } } $self->lint_trusted_networks(); # named this way just in case we ever want a "finish_parsing_start" $conf->{main}->call_plugins("finish_parsing_end", { conf => $conf }); # free up stuff we no longer need delete $conf->{tests}; delete $conf->{priority}; delete $conf->{test_types}; } sub trace_meta_dependencies { my ($self) = @_; my $conf = $self->{conf}; $conf->{meta_dependencies} = { }; foreach my $name (keys %{$conf->{tests}}) { next unless ($conf->{test_types}->{$name} == $Mail::SpamAssassin::Conf::TYPE_META_TESTS); my $deps = [ ]; my $alreadydone = { }; $self->_meta_deps_recurse($conf, $name, $name, $deps, $alreadydone); $conf->{meta_dependencies}->{$name} = join (' ', @{$deps}); } } sub _meta_deps_recurse { my ($self, $conf, $toprule, $name, $deps, $alreadydone) = @_; # Only do each rule once per top-level meta; avoid infinite recursion return if $alreadydone->{$name}; $alreadydone->{$name} = 1; # Obviously, don't trace empty or nonexistent rules my $rule = $conf->{tests}->{$name}; return unless $rule; # Lex the rule into tokens using a rather simple RE method ... my $lexer = ARITH_EXPRESSION_LEXER; my @tokens = ($rule =~ m/$lexer/g); # Go through each token in the meta rule foreach my $token (@tokens) { # has to be an alpha+numeric token next if ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/); # and has to be a rule name next unless exists $conf->{tests}->{$token}; # add and recurse push @{$deps}, $token; $self->_meta_deps_recurse($conf, $toprule, $token, $deps, $alreadydone); } } sub fix_priorities { my ($self) = @_; my $conf = $self->{conf}; die unless $conf->{meta_dependencies}; # order requirement my $pri = $conf->{priority}; # sort into priority order, lowest first -- this way we ensure that if we # rearrange the pri of a rule early on, we cannot accidentally increase its # priority later. foreach my $rule (sort { $pri->{$a} <=> $pri->{$b} } keys %{$pri}) { # we only need to worry about meta rules -- they are the # only type of rules which depend on other rules my $deps = $conf->{meta_dependencies}->{$rule}; next unless (defined $deps); my $basepri = $pri->{$rule}; foreach my $dep (split ' ', $deps) { my $deppri = $pri->{$dep}; if ($deppri > $basepri) { dbg("rules: $rule (pri $basepri) requires $dep (pri $deppri): fixed"); $pri->{$dep} = $basepri; } } } } sub find_dup_rules { my ($self) = @_; my $conf = $self->{conf}; my %names_for_text = (); my %dups = (); while (my ($name, $text) = each %{$conf->{tests}}) { my $type = $conf->{test_types}->{$name}; my $tf = ($conf->{tflags}->{$name}||''); $tf =~ s/\s+/ /gs; # ensure similar, but differently-typed, rules are not marked as dups; # take tflags into account too due to "tflags multiple" $text = "$type\t$text\t$tf"; if (defined $names_for_text{$text}) { $names_for_text{$text} .= " ".$name; $dups{$text} = undef; # found (at least) one } else { $names_for_text{$text} = $name; } } foreach my $text (keys %dups) { my $first; my $first_pri; my @names = sort {$a cmp $b} split(' ', $names_for_text{$text}); foreach my $name (@names) { my $priority = $conf->{priority}->{$name} || 0; if (!defined $first || $priority < $first_pri) { $first_pri = $priority; $first = $name; } } # $first is now the earliest-occurring rule. mark others as dups my @dups = (); foreach my $name (@names) { next if $name eq $first; push @dups, $name; delete $conf->{tests}->{$name}; } dbg("rules: $first merged duplicates: ".join(' ', @dups)); $conf->{duplicate_rules}->{$first} = \@dups; } } sub pack_eval_method { my ($self, $function, $args, $name, $text) = @_; my @args; if ($args) { # bug 4419: Parse quoted strings, unquoted alphanumerics/floats and # both unquoted IPv4 and IPv6 addresses. s// is used so that we can # determine whether or not we successfully parsed ALL arguments. while ($args =~ s/^\s*(?:['"](.*?)['"]|([\d\.:A-Za-z]+?))\s*(?:,\s*|$)//) { if (defined $1) { push @args, $1; } else { push @args, $2; } } } if ($args) { $self->lint_warn("syntax error (unparsable argument: $args) for eval function: $name: $text", $name); return; } my $argstr = $function; $argstr =~ s/\s+//gs; if (scalar @args > 0) { $argstr .= ',' . join (', ', map { s/\#/[HASH]/gs; "q#".$_."#" } @args); } return ($argstr, \@args); } ########################################################################### sub lint_trusted_networks { my ($self) = @_; my $conf = $self->{conf}; # validate trusted_networks and internal_networks, bug 4760. # check that all internal_networks are listed in trusted_networks # too. do the same for msa_networks, but check msa_networks against # internal_networks if trusted_networks aren't defined my ($nt, $matching_against); if ($conf->{trusted_networks_configured}) { $nt = $conf->{trusted_networks}; $matching_against = 'trusted_networks'; } elsif ($conf->{internal_networks_configured}) { $nt = $conf->{internal_networks}; $matching_against = 'internal_networks'; } else { return; } foreach my $net_type ('internal_networks', 'msa_networks') { next unless $conf->{"${net_type}_configured"}; next if $net_type eq $matching_against; my $replace_nets; my @valid_net_list = (); my $net_list = $conf->{$net_type}; foreach my $net (@{$net_list->{nets}}) { # don't check to see if an excluded network is included - that's senseless if (!$net->{exclude} && !$nt->contains_net($net)) { my $msg = "$matching_against doesn't contain $net_type entry '". ($net->{as_string})."'"; $self->lint_warn($msg, undef); # complain $replace_nets = 1; # and omit it from the new internal set } else { push @valid_net_list, $net; } } if ($replace_nets) { # something was invalid. replace the old nets list with a fixed version # (which may be empty) $net_list->{nets} = \@valid_net_list; } } } ########################################################################### sub add_test { my ($self, $name, $text, $type) = @_; my $conf = $self->{conf}; # Don't allow invalid names ... if ($name !~ /^\D\w*$/) { $self->lint_warn("config: error: rule '$name' has invalid characters ". "(not Alphanumeric + Underscore + starting with a non-digit)\n", $name); return; } # Also set a hard limit for ALL rules (rule names longer than 242 # characters throw warnings). Check this separately from the above # pattern to avoid vague error messages. if (length $name > 200) { $self->lint_warn("config: error: rule '$name' is way too long ". "(recommended maximum length is 22 characters)\n", $name); return; } # Warn about, but use, long rule names during --lint if ($conf->{lint_rules}) { if (length($name) > 50 && $name !~ /^__/ && $name !~ /^T_/) { $self->lint_warn("config: warning: rule name '$name' is over 50 chars ". "(recommended maximum length is 22 characters)\n", $name); } } # all of these rule types are regexps if ($type == $Mail::SpamAssassin::Conf::TYPE_BODY_TESTS || $type == $Mail::SpamAssassin::Conf::TYPE_FULL_TESTS || $type == $Mail::SpamAssassin::Conf::TYPE_RAWBODY_TESTS || $type == $Mail::SpamAssassin::Conf::TYPE_URI_TESTS) { return unless $self->is_delimited_regexp_valid($name, $text); } if ($type == $Mail::SpamAssassin::Conf::TYPE_HEAD_TESTS) { my ($pat) = ($text =~ /^\s*\S+\s*(?:\=|\!)\~\s*(\S.*?\S)\s*$/); if ($pat) { $pat =~ s/\s+\[if-unset:\s+(.+)\]\s*$//; } return unless $self->is_delimited_regexp_valid($name, $pat); } elsif ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { return unless $self->is_meta_valid($name, $text); } $conf->{tests}->{$name} = $text; $conf->{test_types}->{$name} = $type; if ($type == $Mail::SpamAssassin::Conf::TYPE_META_TESTS) { $conf->{priority}->{$name} ||= 500; } else { $conf->{priority}->{$name} ||= 0; } $conf->{priority}->{$name} ||= 0; $conf->{source_file}->{$name} = $self->{currentfile}; if ($self->{main}->{keep_config_parsing_metadata}) { $conf->{if_stack}->{$name} = $self->get_if_stack_as_string(); } if ($self->{scoresonly}) { $conf->{user_rules_to_compile}->{$type} = 1; $conf->{user_defined_rules}->{$name} = 1; } } sub add_regression_test { my ($self, $name, $ok_or_fail, $string) = @_; my $conf = $self->{conf}; if ($conf->{regression_tests}->{$name}) { push @{$conf->{regression_tests}->{$name}}, [$ok_or_fail, $string]; } else { # initialize the array, and create one element $conf->{regression_tests}->{$name} = [ [$ok_or_fail, $string] ]; } } sub is_meta_valid { my ($self, $name, $rule) = @_; my $meta = ''; # Lex the rule into tokens using a rather simple RE method ... my $lexer = ARITH_EXPRESSION_LEXER; my @tokens = ($rule =~ m/$lexer/g); if (length($name) == 1) { print "$name $_\n " for @tokens; } # Go through each token in the meta rule foreach my $token (@tokens) { # Numbers can't be rule names if ($token =~ /^(?:\W+|[+-]?\d+(?:\.\d+)?)$/) { $meta .= "$token "; } # Zero will probably cause more errors else { $meta .= "0 "; } } my $evalstr = 'my $x = ' . $meta . '; 1;'; if (eval $evalstr) { return 1; } if ($@) { my $err = $@; $err =~ s/\s+(?:at|near)\b.*//s; $err =~ s/Illegal division by zero/division by zero possible/i; $self->lint_warn("config: invalid expression for rule $name: \"$rule\": $err\n", $name); return 0; } } sub is_delimited_regexp_valid { my ($self, $name, $re) = @_; if (!$re || $re !~ /^\s*m?(\W).*(?:\1|>|}|\)|\])[a-z]*\s*$/) { $re ||= ''; $self->lint_warn("config: invalid regexp for rule $name: $re: missing or invalid delimiters\n", $name); return 0; } return $self->is_regexp_valid($name, $re); } sub is_regexp_valid { my ($self, $name, $re) = @_; # OK, try to remove any normal perl-style regexp delimiters at # the start and end, and modifiers at the end if present, # so we can validate those too. my $origre = $re; my $safere = $re; my $mods = ''; if ($re =~ s/^m{//) { $re =~ s/}([a-z]*)$//; $mods = $1; } elsif ($re =~ s/^m\(//) { $re =~ s/\)([a-z]*)$//; $mods = $1; } elsif ($re =~ s/^m([a-z]*)$//; $mods = $1; } elsif ($re =~ s/^m(\W)//) { $re =~ s/\Q$1\E([a-z]*)$//; $mods = $1; } elsif ($re =~ s/^\/(.*)\/([a-z]*)$/$1/) { $mods = $2; } else { $safere = "m#".$re."#"; } # now prepend the modifiers, in order to check if they're valid if ($mods) { $re = "(?".$mods.")".$re; } # note: this MUST use m/...${re}.../ in some form or another, ie. # interpolation of the $re variable into a code regexp, in order to test the # security of the regexp. simply using ("" =~ $re) will NOT do that, and # will therefore open a hole! if (eval { ("" =~ m#${re}#); 1; }) { return 1; } my $err = $@; $err =~ s/ at .*? line \d.*$//; $self->lint_warn("config: invalid regexp for rule $name: $origre: $err\n", $name); return 0; } ########################################################################### sub add_to_addrlist { my ($self, $singlelist, @addrs) = @_; my $conf = $self->{conf}; foreach my $addr (@addrs) { $addr = lc $addr; my $re = $addr; $re =~ s/[\000\\\(]/_/gs; # paranoia $re =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape any possible metachars $re =~ tr/?/./; # "?" -> "." $re =~ s/\*+/\.\*/g; # "*" -> "any string" $conf->{$singlelist}->{$addr} = "^${re}\$"; } } sub add_to_addrlist_rcvd { my ($self, $listname, $addr, $domain) = @_; my $conf = $self->{conf}; $addr = lc $addr; if ($conf->{$listname}->{$addr}) { push @{$conf->{$listname}->{$addr}{domain}}, $domain; } else { my $re = $addr; $re =~ s/[\000\\\(]/_/gs; # paranoia $re =~ s/([^\*\?_a-zA-Z0-9])/\\$1/g; # escape any possible metachars $re =~ tr/?/./; # "?" -> "." $re =~ s/\*+/\.\*/g; # "*" -> "any string" $conf->{$listname}->{$addr}{re} = "^${re}\$"; $conf->{$listname}->{$addr}{domain} = [ $domain ]; } } sub remove_from_addrlist { my ($self, $singlelist, @addrs) = @_; my $conf = $self->{conf}; foreach my $addr (@addrs) { delete($conf->{$singlelist}->{$addr}); } } sub remove_from_addrlist_rcvd { my ($self, $listname, @addrs) = @_; my $conf = $self->{conf}; foreach my $addr (@addrs) { delete($conf->{$listname}->{$addr}); } } ########################################################################### sub fix_path_relative_to_current_file { my ($self, $path) = @_; # the path may be specified as "~/foo", so deal with that $path = $self->{conf}->{main}->sed_path($path); if (!File::Spec->file_name_is_absolute ($path)) { my ($vol, $dirs, $file) = File::Spec->splitpath ($self->{currentfile}); $path = File::Spec->catpath ($vol, $dirs, $path); dbg("config: fixed relative path: $path"); } return $path; } ########################################################################### sub lint_warn { my ($self, $msg, $rule, $iserror) = @_; if (!defined $iserror) { $iserror = 1; } if ($self->{conf}->{main}->{lint_callback}) { $self->{conf}->{main}->{lint_callback}->( msg => $msg, rule => $rule, iserror => $iserror ); } elsif ($self->{conf}->{lint_rules}) { warn $msg."\n"; } else { info($msg); } if ($iserror) { $self->{conf}->{errors}++; } } ########################################################################### sub get_if_stack_as_string { my ($self) = @_; return join ' ', map { $_->{conditional} } @{$self->{if_stack}}; } ########################################################################### 1;