amavisd-nanny   [plain text]


#!/usr/bin/perl -T

#------------------------------------------------------------------------------
# This is amavisd-nanny, a program to show the status
# and keep an eye on the health of child processes in amavisd-new.
#
# Author: Mark Martinec <mark.martinec@ijs.si>
# Copyright (C) 2004  Mark Martinec,  All Rights Reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice,
#   this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# * Neither the name of the author, nor the name of the "Jozef Stefan"
#   Institute, nor the names of contributors may be used to endorse or
#   promote products derived from this software without specific prior
#   written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#(the license above is the new BSD license, and pertains to this program only)
#
# Patches and problem reports are welcome.
# The latest version of this program is available at:
#   http://www.ijs.si/software/amavisd/
#------------------------------------------------------------------------------

use strict;
use re 'taint';

use POSIX qw(strftime);
use BerkeleyDB;

use vars qw($VERSION);  $VERSION = 1.02;

my($idlettl)   = 60*60; # idle children are sent a SIGTERM after this many seconds
my($activettl) = 10*60; # stuck active children are sent a SIGTERM after this
                        # many seconds

my($db_home) = '/var/amavis/db';  # DB databases directory
my($dbfile)  = 'nanny.db';
my($wakeuptime) = 2;  # seconds

sub fmt_age($$) {
  my($t,$char) = @_;
  my($bar);
  $bar = substr(($char x 9 . ':') x 3 . $char x 5 . '>', 0, $t) if $char ne '';
  my($s) = $t % 60;  $t = int($t/60);
  my($m) = $t % 60;  $t = int($t/60);
  my($h) = $t % 24;  $t = int($t/24);
  my($d) = $t;
  my($str) = sprintf("%d:%02d:%02d", $h,$m,$s);
  $str = (!$d ? "  " : sprintf("%dd",$d)) . $str;
  $str . ' ' . $bar;
};

# main program starts here
  $SIG{INT} = sub { die "\n" };  # do the END code block
  my($env) = BerkeleyDB::Env->new(
    '-Home'=>$db_home, '-Flags'=> DB_INIT_CDB | DB_INIT_MPOOL);
  defined $env or die "BDB no env: $BerkeleyDB::Error $!";
  my($db) = BerkeleyDB::Hash->new(
    '-Filename'=>$dbfile, '-Env'=>$env );
  defined $db or die "BDB no dbN 1: $BerkeleyDB::Error $!";
  my($cursor);

  my(%waittime); # associative array on pid

  for (;;) {
    $| = 0;
    print "\n";
    my(%proc_timestamp, %proc_task_id);
    my($stat,$key,$val); my($now);
    my($eval_stat,$interrupt); $interrupt = '';
    { my($h1) = sub { $interrupt = $_[0] };
      local(@SIG{qw(INT HUP TERM TSTP QUIT ALRM USR1 USR2)}) = ($h1) x 8;
      eval {
        $cursor = $db->db_cursor;  # obtain read lock
        defined $cursor or die "db_cursor error: $BerkeleyDB::Error";
        $now = time;
        my($now_utc_iso8601) = strftime("%Y%m%dT%H%M%S",gmtime($now));
        while ( ($stat=$cursor->c_get($key,$val,DB_NEXT)) == 0 ) {
          if ($val !~ /^(\d+) (.*?) *\z/s) {
            print STDERR "Bad db entry: $key, $val\n";
          } else {
            ($proc_timestamp{$key}, $proc_task_id{$key}) = ($1,$2);
          }
        }
        $stat==DB_NOTFOUND  or die "c_get: $BerkeleyDB::Error $!";
        $cursor->c_close==0 or die "c_close error: $BerkeleyDB::Error";
        $cursor = undef;
      };
      my($eval_stat) = $@;
      if (defined $db) {
        $cursor->c_close  if defined $cursor;  # unlock, ignoring status
        $cursor = undef;
      }
    }
    if ($interrupt ne '') { kill($interrupt,$$) }  # resignal
    elsif ($eval_stat ne '') { chomp($eval_stat); die "BDB $eval_stat\n" }
    my(@to_be_removed,@killed);
    for my $pid (sort {$a<=>$b} keys %proc_timestamp) {
      my($idling) = $proc_task_id{$pid} =~ /^\.?\z/s;
      my($ttl) = $idling ? $idlettl : $activettl;
      if (!kill(0,$pid)) {  # test if the process is still there
        printf("PID %s: %-11s went away %s\n",
               $pid, $proc_task_id{$pid},
               fmt_age($now-$proc_timestamp{$pid}, $idling?'.':'=') );
        push(@to_be_removed, $pid);
      } elsif ($now <= $proc_timestamp{$pid}+$ttl) {     # all ok
        printf("PID %s: %-11s %s\n",
               $pid, $proc_task_id{$pid},
               fmt_age($now-$proc_timestamp{$pid}, $idling?'.':'=') );
      } else {                                            # send a SIGTERM
        printf("PID %s: %-11s terminated %s\n",
               $pid, $proc_task_id{$pid},
               fmt_age($now-$proc_timestamp{$pid}, $idling?'.':'=') );

        if (kill('TERM',$pid)) { push(@killed,$pid) }
        else { warn "Can't SIGTERM $pid: $!" }
      }
    }
    if (@to_be_removed) {
      local($SIG{'INT'}) = 'IGNORE';
      $cursor = $db->db_cursor(DB_WRITECURSOR);  # obtain a write lock
      defined $cursor or die "BDB db_cursor error: $BerkeleyDB::Error";
      for my $key (@to_be_removed) {
        my($val); my($stat) = $cursor->c_get($key,$val,DB_SET);
        $stat==0 || $stat==DB_NOTFOUND
          or die "BDB c_get: $BerkeleyDB::Error, $!.";
        if ($stat==0) {  # remove existing entry
          $cursor->c_del==0 or die "BDB c_del: $BerkeleyDB::Error, $!.";
        }
      }
      $cursor->c_close==0 or die "BDB c_close error: $BerkeleyDB::Error";
      $cursor = undef;
    }
    my($delay) = 1;  # seconds
    while (@killed) {
      sleep $delay; $delay = 2;
      for my $pid (@killed) {
        $waittime{$pid}++;
        printf("PID %s: sending SIGKILL in %d s\n", $pid, 30 - $waittime{$pid});
        if ($waittime{$pid} > 30) {  # send a SIGKILL
          kill('KILL',$pid) or warn "Can't SIGKILL $pid: $!";
          $waittime{$pid} = 0;
        } elsif (!kill(0,$pid)) {    # no longer around
          printf("PID %s: %-11s successfully killed\n", $pid);
          delete($waittime{$pid});
          $pid = undef;
        }
      }
      @killed = grep {defined} @killed;
      printf("Waiting for the process to terminate: %s\n",
             join(', ',@killed))  if @killed;
    }
    $| = 1;
    sleep $wakeuptime;
  } # forever

END {
  if (defined $db) {
    $cursor->c_close  if defined $cursor;  # ignoring status
    $db->db_close==0 or die "BDB db_close error: $BerkeleyDB::Error $!";
  }
  print STDERR "exited\n";
}