amavisd-nanny   [plain text]


#!/usr/bin/perl -T

#------------------------------------------------------------------------------
# This is amavisd-nanny, a program to show the status
# and keep an eye on the health of child processes in amavisd-new.
#
# Author: Mark Martinec <mark.martinec@ijs.si>
# Copyright (C) 2004-2009  Mark Martinec,  All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of the author, nor the name of the "Jozef Stefan"
#   Institute, nor the names of contributors may be used to endorse or
#   promote products derived from this software without specific prior
#   written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#(the license above is the new BSD license, and pertains to this program only)
#
# Patches and problem reports are welcome.
# The latest version of this program is available at:
#   http://www.ijs.si/software/amavisd/
#------------------------------------------------------------------------------

use strict;
use re 'taint';
use warnings;
no warnings 'uninitialized';

use Errno qw(ESRCH ENOENT);
use POSIX qw(strftime);
use Time::HiRes ();
use BerkeleyDB;

use vars qw($VERSION);  $VERSION = 1.400;

my($idlettl) = 3*60*60; # idle children are sent a SIGTERM
                        #   after this many seconds
my($activettl) = 10*60; # stuck active children are sent a SIGTERM
                        #   after this many seconds

my($dbfile) = 'nanny.db';
my($db_home) =  # DB databases directory
  defined $ENV{'AMAVISD_DB_HOME'} ? $ENV{'AMAVISD_DB_HOME'} : '/var/amavis/db';
my($wakeuptime) = 2;  # -w, sleep time in seconds, may be fractional
my($repeatcount);     # -c, repeat count (when defined)

sub fmt_age($$$) {
  my($t,$state_bar,$idling) = @_;
  $t = int($t);
  my($char) = $idling ? '.' : '=';
  my($bar_l) = $idling ? $t : length($state_bar);
  my($bar) = substr( ($char x 9 . ':') x 3 . $char x 5, 0,$bar_l);
  if (!$idling) {
    $state_bar = substr($state_bar,0,length($bar)-2) . substr($state_bar,-1,1)
                 . '>'  if length($state_bar) > length($bar);
    for my $j (0 .. length($bar)-1) {
      substr($bar,$j,1) = substr($state_bar,$j,1)
        if substr($bar,$j,1) eq '=' && substr($state_bar,$j,1) ne ' ';
    }
  }
  my($s) = $t % 60;  $t = int($t/60);
  my($m) = $t % 60;  $t = int($t/60);
  my($h) = $t % 24;  $t = int($t/24);
  my($d) = $t;
  my($str) = sprintf("%d:%02d:%02d", $h,$m,$s);
  $str = (!$d ? "  " : sprintf("%dd",$d)) . $str;
  $str . ' ' . $bar;
};

# main program starts here
  my($normal_termination) = 0;
  $SIG{INT} = sub { die "\n" };  # do the END code block when interrupted
  while (@ARGV) {
    my($opt) = shift @ARGV;
    my($val) = shift @ARGV;
    if ($opt eq '-w' && $val =~ /^\+?\d+(?:\.\d*)?\z/) { $wakeuptime = $val }
    elsif ($opt eq '-c' && $val =~ /^[+-]?\d+\z/) { $repeatcount = $val }
    else {
      print <<'EOD';
States legend:
  A  accepted a connection
  b  begin with a protocol for accepting a request
  m  'MAIL FROM' smtp command started a new transaction in the same session
  d  transferring data from MTA to amavisd
  =  content checking just started
  G  generating and verifying unique mail_id
  D  decoding of mail parts
  V  virus scanning
  S  spam scanning
  P  pen pals database lookup and updates
  r  preparing results
  Q  quarantining and preparing/sending notifications
  F  forwarding mail to MTA
  .  content checking just finished
  sp space indicates idle (elapsed bar is showing dots)

EOD
      die "Usage: $0 [-c <count>] [-w <wait-interval>]\n";
    }
  }
  print <<'EOD';
process-id task-id     elapsed in    elapsed-bar (dots indicate idle)
           or state   idle or busy
EOD

  my(%waittime); # associative array on pid
  my($env,$db,$old_db_inode,@dbstat,$cursor);
  my(%proc_last_timestamp, %proc_state_bars);
  for (;;) {
    last  if defined $repeatcount && $repeatcount <= 0;
    @dbstat = stat("$db_home/$dbfile");
    my($errn) = @dbstat ? 0 : 0+$!;
    $errn==0 || $errn==ENOENT  or die "stat $db_home/$dbfile: $!";
    if (defined $db && $old_db_inode != $dbstat[1]) {
      $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!";
      undef $db;
      printf STDERR ("Reopening nanny database %s/%s\n", $db_home,$dbfile);
    }
    if (!defined $db && $errn==0) {
      $old_db_inode = $dbstat[1];
      $env = BerkeleyDB::Env->new(
        -Home => $db_home, -Flags => DB_INIT_CDB | DB_INIT_MPOOL,
        -ErrFile => \*STDOUT, -Verbose => 1);
      defined $env or die "BDB no env: $BerkeleyDB::Error $!";
      $db = BerkeleyDB::Hash->new(-Filename => $dbfile, -Env => $env);
      defined $db or die "BDB no dbN 1: $BerkeleyDB::Error $!";
    }
    $| = 0;
    my(%proc_timestamp, %proc_state, %proc_task_id);
    my($stat,$key,$val); my($now);
    my($eval_stat,$interrupt); $interrupt = '';
    if (!defined $db) {
      printf STDERR ("No nanny database %s/%s; waiting...\n",
                     $db_home,$dbfile);
    } else {
      $repeatcount--  if defined $repeatcount && $repeatcount > 0;
      print "\n";
      my($h1) = sub { $interrupt = $_[0] };
      local(@SIG{qw(INT HUP TERM TSTP QUIT ALRM USR1 USR2)}) = ($h1) x 8;
      eval {
        $cursor = $db->db_cursor;  # obtain read lock
        defined $cursor or die "db_cursor error: $BerkeleyDB::Error";
        $now = Time::HiRes::time; local($1,$2);
        my($now_utc_iso8601) = strftime("%Y%m%dT%H%M%S",gmtime(int($now)));
        while ( ($stat=$cursor->c_get($key,$val,DB_NEXT)) == 0 ) {
          if ($val !~ /^(\d+(?:\.\d*)?) (.*?) *\z/s) {
            print STDERR "Bad db entry: $key, $val\n";
          } else {
            $proc_timestamp{$key} = $1; my($task_id) = $2;
            $proc_state{$key} = $1  if $task_id =~ s/^([^0-9])//;
            $proc_task_id{$key} = $task_id;
            if (!exists $proc_state_bars{$key}) {  # new process appeared
              $proc_last_timestamp{$key} = $proc_timestamp{$key};
              $proc_state_bars{$key} = '';
            }
          }
        }
        $stat==DB_NOTFOUND  or die "c_get: $BerkeleyDB::Error $!";
        $cursor->c_close==0 or die "c_close error: $BerkeleyDB::Error";
        $cursor = undef;
      };
      my($eval_stat) = $@;
      if (defined $db) {
        $cursor->c_close  if defined $cursor;  # unlock, ignoring status
        $cursor = undef;
      }
    }
    if ($interrupt ne '') { kill($interrupt,$$) }  # resignal, ignoring status
    elsif ($eval_stat ne '') { chomp($eval_stat); die "BDB $eval_stat\n" }
    for my $key (keys(%proc_state_bars)) {  # remove old entries
      if (!exists($proc_timestamp{$key})) {
        delete $proc_timestamp{$key};
        delete $proc_task_id{$key};
        delete $proc_state_bars{$key};
      }
    }
    my(@to_be_removed,@killed);
    for my $pid (sort {$a<=>$b} keys %proc_timestamp) {
      $proc_state{$pid} = ' '  if $proc_state{$pid} eq '';
      my($idling) = $proc_task_id{$pid} eq '' &&
                    $proc_state{$pid} =~ /^[. ]?\z/s;
      my($age) = $now - $proc_timestamp{$pid};
      if ($idling) { $proc_state_bars{$pid} = '' }
      else {
        $proc_state_bars{$pid} = ''
          if $proc_timestamp{$pid} ne $proc_last_timestamp{$pid};
        my($len) = int($age+0.5);
        $len = 1  if $len < 1;
        my($str) = $proc_state_bars{$pid};
        if ($len > length($str)) {  # replicate last character to desired size
          my($ch) = $str eq '' ? '=' : substr($str,-1,1);
          $str .= $ch x ($len - length($str));
        }
        substr($str,$len-1,1) = $proc_state{$pid};
        $proc_state_bars{$pid} = $str;
      }
      $proc_last_timestamp{$pid} = $proc_timestamp{$pid};
      my($ttl) = $idling ? $idlettl : $activettl;
      my($n) = kill(0,$pid);  # test if the process is still there
      if ($n == 0 && $! != ESRCH) {
        die "Can't check the process $pid: $!";
      } elsif ($n == 0) {  # ESRCH means there is no such process
        printf STDERR ("PID %s: %-11s went away %s\n",
                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
        push(@to_be_removed, $pid);
      } elsif ($age <= $ttl) {     # all ok
        printf STDERR ("PID %s: %-11s %s\n",
                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
      } else {                                            # send a SIGTERM
        printf STDERR ("PID %s: %-11s terminated %s\n",
                       $pid, $proc_task_id{$pid} || $proc_state{$pid},
                       fmt_age($age, $proc_state_bars{$pid}, $idling) );
        if (kill('TERM',$pid) || $! == ESRCH) { push(@killed,$pid) }
        else { warn "Can't SIGTERM $pid: $!" }
      }
    }
    if (@to_be_removed) {
      local($SIG{'INT'}) = 'IGNORE';
      $cursor = $db->db_cursor(DB_WRITECURSOR);  # obtain a write lock
      defined $cursor or die "BDB db_cursor error: $BerkeleyDB::Error";
      for my $key (@to_be_removed) {
        my($val); my($stat) = $cursor->c_get($key,$val,DB_SET);
        $stat==0 || $stat==DB_NOTFOUND
          or die "BDB c_get: $BerkeleyDB::Error, $!.";
        if ($stat==0) {  # remove existing entry
          $cursor->c_del==0 or die "BDB c_del: $BerkeleyDB::Error, $!.";
        }
      }
      $cursor->c_close==0 or die "BDB c_close error: $BerkeleyDB::Error";
      $cursor = undef;
    }
    my($delay) = 1;  # seconds
    while (@killed) {
      Time::HiRes::sleep($delay); $delay = 2;
      for my $pid (@killed) {
        $waittime{$pid}++;
        printf STDERR ("PID %s: sending SIGKILL in %d s\n",
                       $pid, 30-$waittime{$pid});
        if ($waittime{$pid} > 30) {  # send a SIGKILL
          kill('KILL',$pid) or $! == ESRCH or warn "Can't SIGKILL $pid: $!";
          $waittime{$pid} = 0;
        } elsif (kill(0,$pid)) {  # process is still there
        } elsif ($! != ESRCH) {   # problem?
          warn "Can't check process $pid: $!";
        } else {                  # no longer around
          printf STDERR ("PID %s: %-11s successfully killed\n", $pid);
          delete($waittime{$pid});
          $pid = undef;
        }
      }
      @killed = grep {defined} @killed;
      printf STDERR ("Waiting for the process to terminate: %s\n",
                     join(', ',@killed))  if @killed;
    }
    $| = 1;
    last  if defined $repeatcount && $repeatcount <= 0;
    Time::HiRes::sleep($wakeuptime)  if $wakeuptime > 0;
  } # forever
  $normal_termination = 1;

END {
  if (defined $db) {
    $cursor->c_close  if defined $cursor;  # ignoring status
    $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!";
  }
  print STDERR "exited\n"  if !$normal_termination;
}