package Mail::Address;
use strict;
use Carp;
use vars qw($VERSION);
use locale;
$VERSION = "1.65";
sub Version { $VERSION }
sub _extract_name
{
my $self = @_ && ref $_[0] ? shift : undef;
local $_ = shift or return '';
if($] eq 5.008)
{ require utf8;
eval 'utf8::downgrade($_)';
}
s/^\s+//;
s/\s+$//;
s/\s+/ /;
return "" if /^[\d ]+$/;
s/^\((.*)\)$/$1/;
s/^"(.*)"$/$1/;
s/\(.*?\)//g;
s/\\//g;
s/^"(.*)"$/$1/;
s/^([^\s]+) ?, ?(.*)$/$2 $1/;
s/,.*//;
unless( m/[A-Z]/ && m/[a-z]/ )
{ s/\b(\w+)/\L\u$1/igo;
s/\bMc(\w)/Mc\u$1/igo;
s/\bo'(\w)/O'\u$1/igo;
s/\b(x*(ix)?v*(iv)?i*)\b/\U$1/igo;
}
s/\[[^\]]*\]//g;
s/(^[\s'"]+|[\s'"]+$)//g;
s/\s{2,}/ /g;
return $_;
}
sub _tokenise {
local($_) = join(',', @_);
my(@words,$snippet,$field);
s/\A\s+//;
s/[\r\n]+/ /g;
while ($_ ne '')
{
$field = '';
if( s/^\s*\(/(/ ) # (...)
{
my $depth = 0;
PAREN: while(s/^(\(([^\(\)\\]|\\.)*)//)
{
$field .= $1;
$depth++;
while(s/^(([^\(\)\\]|\\.)*\)\s*)//)
{
$field .= $1;
last PAREN unless --$depth;
$field .= $1 if s/^(([^\(\)\\]|\\.)+)//;
}
}
carp "Unmatched () '$field' '$_'"
if $depth;
$field =~ s/\s+\Z//;
push(@words, $field);
next;
}
s/^("([^"\\]|\\.)*")\s*// # "..."
|| s/^(\[([^\]\\]|\\.)*\])\s*// # [...]
|| s/^([^\s\Q()<>\@,;:\\".[]\E]+)\s*//
|| s/^([\Q()<>\@,;:\\".[]\E])\s*//
and do { push(@words, $1); next; };
croak "Unrecognised line: $_";
}
push(@words, ",");
\@words;
}
sub _find_next {
my $idx = shift;
my $tokens = shift;
my $len = shift;
while($idx < $len) {
my $c = $tokens->[$idx];
return $c if $c eq ',' || $c eq ';' || $c eq '<';
$idx++;
}
return "";
}
sub _complete {
my $pkg = shift;
my $phrase = shift;
my $address = shift;
my $comment = shift;
my $o = undef;
if(@{$phrase} || @{$comment} || @{$address}) {
$o = $pkg->new(join(" ",@{$phrase}),
join("", @{$address}),
join(" ",@{$comment}));
@{$phrase} = ();
@{$address} = ();
@{$comment} = ();
}
return $o;
}
sub new {
my $pkg = shift;
my $me = bless [@_], $pkg;
return $me;
}
sub parse {
my $pkg = shift;
my @line = grep { defined $_} @_;
my $line = join '', @line;
local $_;
my @phrase = ();
my @comment = ();
my @address = ();
my @objs = ();
my $depth = 0;
my $idx = 0;
my $tokens = _tokenise(@line);
my $len = @$tokens;
my $next = _find_next($idx,$tokens,$len);
for( ; $idx < $len ; $idx++) {
$_ = $tokens->[$idx];
if(substr($_,0,1) eq "(") {
push(@comment,$_);
}
elsif($_ eq '<') {
$depth++;
}
elsif($_ eq '>') {
$depth-- if $depth;
}
elsif($_ eq ',' || $_ eq ';') {
warn "Unmatched '<>' in $line" if($depth);
my $o = _complete($pkg,\@phrase, \@address, \@comment);
push(@objs, $o) if(defined $o);
$depth = 0;
$next = _find_next($idx+1,$tokens,$len);
}
elsif($depth) {
push(@address,$_);
}
elsif($next eq "<") {
push(@phrase,$_);
}
elsif( /\A[\Q.\@:;\E]\Z/ || !@address || $address[-1] =~ /\A[\Q.\@:;\E]\Z/) {
push(@address,$_);
}
else {
warn "Unmatched '<>' in $line" if($depth);
my $o = _complete($pkg,\@phrase, \@address, \@comment);
push(@objs, $o) if(defined $o);
$depth = 0;
push(@address,$_);
}
}
@objs;
}
sub set_or_get {
my $me = shift;
my $i = shift;
my $val = $me->[$i];
$me->[$i] = shift if(@_);
$val;
}
sub phrase { set_or_get(shift,0,@_) }
sub address { set_or_get(shift,1,@_) }
sub comment { set_or_get(shift,2,@_) }
sub format {
my @fmts = ();
my $me;
my $atext = '[\-\w !#$%&\'*+/=?^`{|}~]';
foreach $me (@_) {
my($phrase,$addr,$comment) = @{$me};
my @tmp = ();
if(defined $phrase && length($phrase)) {
push @tmp, $phrase =~ /^(?:\s*$atext\s*)+$/ ? $phrase
: $phrase =~ /(?<!\\)"/ ? $phrase
: qq("$phrase");
push(@tmp, "<" . $addr . ">") if(defined $addr && length($addr));
}
else {
push(@tmp, $addr) if(defined $addr && length($addr));
}
if(defined($comment) && $comment =~ /\S/) {
$comment =~ s/^\s*\(?/(/;
$comment =~ s/\)?\s*$/)/;
}
push(@tmp, $comment) if(defined $comment && length($comment));
push(@fmts, join(" ", @tmp)) if(scalar(@tmp));
}
return join(", ", @fmts);
}
sub name
{
my $me = shift;
my $phrase = $me->phrase;
my $addr = $me->address;
$phrase = $me->comment unless(defined($phrase) && length($phrase));
my $name = $me->_extract_name($phrase);
# first.last@domain address
if($name eq '' && $addr =~ /([^\%\.\@_]+([\._][^\%\.\@_]+)+)[\@\%]/o)
{
($name = $1) =~ s/[\._]+/ /go;
$name = _extract_name($name);
}
if($name eq '' && $addr =~ m#/g=#oi)
# X400 style address
{
my ($f) = $addr =~ m#g=([^/]*)#oi;
my ($l) = $addr =~ m#s=([^/]*)#io;
$name = _extract_name($f . " " . $l);
}
return length($name) ? $name : undef;
}
sub host {
my $me = shift;
my $addr = $me->address || '';
my $i = rindex($addr,'@');
my $host = ($i >= 0) ? substr($addr,$i+1) : undef;
return $host;
}
sub user {
my $me = shift;
my $addr = $me->address;
my $i = index($addr,'@');
my $user = ($i >= 0) ? substr($addr,0,$i) : $addr;
return $user;
}
sub path {
return ();
}
sub canon {
my $me = shift;
return ($me->host, $me->user, $me->path);
}
1;
__END__
=head1 NAME
Mail::Address - Parse mail addresses
=head1 SYNOPSIS
use Mail::Address;
my @addrs = Mail::Address->parse($line);
foreach $addr (@addrs) {
print $addr->format,"\n";
}
=head1 DESCRIPTION
C<Mail::Address> extracts and manipulates RFC822 compilant email
addresses. As well as being able to create C<Mail::Address> objects
in the normal manner, C<Mail::Address> can extract addresses from
the To and Cc lines found in an email message.
=head1 CONSTRUCTORS
=over 4
=item new( PHRASE, ADDRESS, [ COMMENT ])
Mail::Address->new("Perl5 Porters", "perl5-porters@africa.nicoh.com");
Create a new C<Mail::Address> object which represents an address with the
elements given. In a message these 3 elements would be seen like:
PHRASE <ADDRESS> (COMMENT)
ADDRESS (COMMENT)
=item parse( LINE )
Mail::Address->parse($line);
Parse the given line a return a list of extracted C<Mail::Address> objects.
The line would normally be one taken from a To,Cc or Bcc line in a message
=back
=head1 METHODS
=over 4
=item phrase ()
Return the phrase part of the object.
=item address ()
Return the address part of the object.
=item comment ()
Return the comment part of the object
=item format ()
Return a string representing the address in a suitable form to be placed
on a To,Cc or Bcc line of a message
=item name ()
Using the information contained within the object attempt to identify what
the person or groups name is
=item host ()
Return the address excluding the user id and '@'
=item user ()
Return the address excluding the '@' and the mail domain
=item path ()
Unimplemented yet but should return the UUCP path for the message
=item canon ()
Unimplemented yet but should return the UUCP canon for the message
=back
=head1 AUTHOR
Graham Barr. Maintained by Mark Overmeer <mailtools@overmeer.net>
=head1 COPYRIGHT
Copyright (c) 2002-2003 Mark Overmeer, 1995-2001 Graham Barr. All rights
reserved. This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut