use File::Find;
$| = 1;
if (defined $ARGV[0] && $ARGV[0] eq '-m') {
shift @ARGV;
my $msgidsfile = shift @ARGV;
%file = ();
$count = 0;
open (IN, "<$msgidsfile") or die "cannot read msgids from $msgidsfile";
while (<IN>) {
/^([^:]+):Message-I.: (\S+)/ or next;
$f = $1;
$m = $2; $m =~ s/^<//; $m =~ s/>$//;
$file{$m} = $f;
$count++;
progress ($count, "m");
}
warn "\nfound $count message-ids.\n";
}
$count = 0;
$dups = 0;
foreach my $dir (@ARGV) {
File::Find::find ( { wanted => \&wanted, no_chdir => 1 }, $dir);
}
warn "\nscanned $count mails, $dups dups.\n";
sub wanted {
local ($_);
$count++;
open (IN, "<$File::Find::name") or warn "cannot read $File::Find::name";
while (<IN>) {
/^$/ and last;
/^Message-I[dD]: (\S+)/ or next;
my $m = $1;
$m =~ s/^<//; $m =~ s/>$//;
if (exists $file{$m}) {
print "\n# DUP: $File::Find::name dup of $file{$m}\n";
print "rm -f $File::Find::name\n";
$dups++;
progress ($count, "*");
} else {
$file{$m} = $File::Find::name;
progress ($count, ".");
}
last;
}
close IN;
}
sub progress {
my ($c, $sym) = @_;
if (($c % 500) == 0) { print STDERR $sym; }
if (($c % (500*70)) == 0) { print STDERR "\n"; }
}