extract-message-from-mbox   [plain text]


#!/usr/bin/perl
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;
use bytes;

use vars qw {
  $opt_f $opt_h $opt_m $opt_H
};

use Getopt::Std;
getopts("f:hmH");

sub usage {
  die "extract-message-from-mbox [-f=file] [-m] [-H] offset

  Extracts the message starting at offset from file (or stdin). Very
  useful in combination with mass-check logs and mboxes. If the -m
  option is used, the input should be in \"mass-check\" format (as
  output by mass-check). Use the -H option to just output headers.
";
}

usage() if($opt_h || (!defined($ARGV[0]) && !$opt_m));
my $offset = $ARGV[0];

if($opt_m) {
  masscheck();
} else {
  $opt_f ||= '&STDIN';
  extract($opt_f, $offset);
}

sub extract {
  my $file = shift;
  my $offset = shift;
  my $found = 0;

  open (IN, "<$file") || die "Could not open $file: $!\n";

  while(<IN>) {

    if(!$found) {
      $offset -= length;
      $found = $offset <= 0;
    } else {
      $found++ if(/^From /);
      last if($found == 3);
      print;
      last if ($opt_H && /^$/) # empty line? end of headers
    }
  }
}

sub masscheck {
  while (<STDIN>) {
    my $mail = (split(/\s+/, $_))[2];
    $mail =~ tr/_/ /;
    if ($mail =~ /^(.*)\.(\d+)$/) {
      extract($1, $2);
    } else { # could just be a filename. Lets print it anyway.
      chomp $mail;
      extract ($mail, 0);
    }
  }
}