package Convert::UUlib;
use Carp;
require Exporter;
require DynaLoader;
$VERSION = "1.03";
@ISA = qw(Exporter DynaLoader);
@_consts = qw(
ACT_COPYING ACT_DECODING ACT_ENCODING ACT_IDLE ACT_SCANNING
FILE_DECODED FILE_ERROR FILE_MISPART FILE_NOBEGIN FILE_NODATA
FILE_NOEND FILE_OK FILE_READ FILE_TMPFILE
MSG_ERROR MSG_FATAL MSG_MESSAGE MSG_NOTE MSG_PANIC MSG_WARNING
OPT_BRACKPOL OPT_DEBUG OPT_DESPERATE OPT_DUMBNESS OPT_ENCEXT
OPT_ERRNO OPT_FAST OPT_IGNMODE OPT_IGNREPLY OPT_OVERWRITE OPT_PREAMB
OPT_PROGRESS OPT_SAVEPATH OPT_TINYB64 OPT_USETEXT OPT_VERBOSE
OPT_VERSION OPT_REMOVE OPT_MOREMIME OPT_DOTDOT
RET_CANCEL RET_CONT RET_EXISTS RET_ILLVAL RET_IOERR RET_NODATA
RET_NOEND RET_NOMEM RET_OK RET_UNSUP
B64_ENCODED BH_ENCODED PT_ENCODED QP_ENCODED
XX_ENCODED UU_ENCODED YENC_ENCODED
);
@_funcs = qw(
Initialize CleanUp GetOption SetOption strerror SetMsgCallback
SetBusyCallback SetFileCallback SetFNameFilter SetFileNameCallback
FNameFilter LoadFile GetFileListItem RenameFile DecodeToTemp
RemoveTemp DecodeFile InfoFile Smerge QuickDecode EncodeMulti
EncodePartial EncodeToStream EncodeToFile E_PrepSingle
E_PrepPartial
straction strencoding strmsglevel
);
@EXPORT = @_consts;
@EXPORT_OK = @_funcs;
%EXPORT_TAGS = (all => [@_consts,@_funcs], constants => \@_consts);
bootstrap Convert::UUlib $VERSION;
Initialize();
for (@_consts) {
my $constant = constant($_);
*$_ = sub () { $constant };
}
sub straction($) {
return 'copying' if $_[0] == &ACT_COPYING;
return 'decoding' if $_[0] == &ACT_DECODING;
return 'encoding' if $_[0] == &ACT_ENCODING;
return 'idle' if $_[0] == &ACT_IDLE;
return 'scanning' if $_[0] == &ACT_SCANNING;
'unknown';
}
sub strencoding($) {
return 'uuencode' if $_[0] == &UU_ENCODED;
return 'base64' if $_[0] == &B64_ENCODED;
return 'yenc' if $_[0] == &YENC_ENCODED;
return 'binhex' if $_[0] == &BH_ENCODED;
return 'plaintext' if $_[0] == &PT_ENCODED;
return 'quoted-printable' if $_[0] == &QP_ENCODED;
return 'xxencode' if $_[0] == &XX_ENCODED;
'unknown';
}
sub strmsglevel($) {
return 'message' if $_[0] == &MSG_MESSAGE;
return 'note' if $_[0] == &MSG_NOTE;
return 'warning' if $_[0] == &MSG_WARNING;
return 'error' if $_[0] == &MSG_ERROR;
return 'panic' if $_[0] == &MSG_PANIC;
return 'fatal' if $_[0] == &MSG_FATAL;
'unknown';
}
1;
__END__
=head1 NAME
Convert::UUlib - Perl interface to the uulib library (a.k.a. uudeview/uuenview).
=head1 SYNOPSIS
use Convert::UUlib ':all';
# read all the files named on the commandline and decode them
# into the CURRENT directory. See below for a longer example.
LoadFile $_ for @ARGV;
for (my $i = 0; my $uu = GetFileListItem $i; $i++) {
if ($uu->state & FILE_OK) {
$uu->decode;
print $uu->filename, "\n";
}
}
=head1 DESCRIPTION
Read the file doc/library.pdf from the distribution for in-depth
information about the C-library used in this interface, and the rest of
this document and especially the non-trivial decoder program at the end.
=head1 EXPORTED CONSTANTS
=head2 Action code constants
ACT_IDLE we don't do anything
ACT_SCANNING scanning an input file
ACT_DECODING decoding into a temp file
ACT_COPYING copying temp to target
ACT_ENCODING encoding a file
=head2 Message severity levels
MSG_MESSAGE just a message, nothing important
MSG_NOTE something that should be noticed
MSG_WARNING important msg, processing continues
MSG_ERROR processing has been terminated
MSG_FATAL decoder cannot process further requests
MSG_PANIC recovery impossible, app must terminate
=head2 Options
OPT_VERSION version number MAJOR.MINORplPATCH (ro)
OPT_FAST assumes only one part per file
OPT_DUMBNESS switch off the program's intelligence
OPT_BRACKPOL give numbers in [] higher precendence
OPT_VERBOSE generate informative messages
OPT_DESPERATE try to decode incomplete files
OPT_IGNREPLY ignore RE:plies (off by default)
OPT_OVERWRITE whether it's OK to overwrite ex. files
OPT_SAVEPATH prefix to save-files on disk
OPT_IGNMODE ignore the original file mode
OPT_DEBUG print messages with FILE/LINE info
OPT_ERRNO get last error code for RET_IOERR (ro)
OPT_PROGRESS retrieve progress information
OPT_USETEXT handle text messages
OPT_PREAMB handle Mime preambles/epilogues
OPT_TINYB64 detect short B64 outside of Mime
OPT_ENCEXT extension for single-part encoded files
OPT_REMOVE remove input files after decoding (dangerous)
OPT_MOREMIME strict MIME adherence
OPT_DOTDOT ".."-unescaping has not yet been done on input files
=head2 Result/Error codes
RET_OK everything went fine
RET_IOERR I/O Error - examine errno
RET_NOMEM not enough memory
RET_ILLVAL illegal value for operation
RET_NODATA decoder didn't find any data
RET_NOEND encoded data wasn't ended properly
RET_UNSUP unsupported function (encoding)
RET_EXISTS file exists (decoding)
RET_CONT continue -- special from ScanPart
RET_CANCEL operation canceled
=head2 File States
This code is zero, i.e. "false":
UUFILE_READ Read in, but not further processed
The following state codes are or'ed together:
FILE_MISPART Missing Part(s) detected
FILE_NOBEGIN No 'begin' found
FILE_NOEND No 'end' found
FILE_NODATA File does not contain valid uudata
FILE_OK All Parts found, ready to decode
FILE_ERROR Error while decoding
FILE_DECODED Successfully decoded
FILE_TMPFILE Temporary decoded file exists
=head2 Encoding types
UU_ENCODED UUencoded data
B64_ENCODED Mime-Base64 data
XX_ENCODED XXencoded data
BH_ENCODED Binhex encoded
PT_ENCODED Plain-Text encoded (MIME)
QP_ENCODED Quoted-Printable (MIME)
YENC_ENCODED yEnc encoded (non-MIME)
=head1 EXPORTED FUNCTIONS
=head2 Initializing and cleanup
Initialize is automatically called when the module is loaded and allocates
quite a small amount of memory for todays machines ;) CleanUp releases that
again.
On my machine, a fairly complete decode with DBI backend needs about 10MB
RSS to decode 20000 files.
=over 4
=item Initialize
Not normally necessary, (re-)initializes the library.
=item CleanUp
Not normally necessary, could be called at the end to release memory
before starting a new decoding round.
=back
=head2 Setting and querying options
=over 4
=item $option = GetOption OPT_xxx
=item SetOption OPT_xxx, opt-value
=back
See the C<OPT_xxx> constants above to see which options exist.
=head2 Setting various callbacks
=over 4
=item SetMsgCallback [callback-function]
=item SetBusyCallback [callback-function]
=item SetFileCallback [callback-function]
=item SetFNameFilter [callback-function]
=back
=head2 Call the currently selected FNameFilter
=over 4
=item $file = FNameFilter $file
=back
=head2 Loading sourcefiles, optionally fuzzy merge and start decoding
=over 4
=item ($retval, $count) = LoadFile $fname, [$id, [$delflag, [$partno]]]
Load the given file and scan it for encoded contents. Optionally tag it
with the given id, and if C<$delflag> is true, delete the file after it
is no longer necessary. If you are certain of the part number, you can
specify it as the last argument.
A better (usually faster) way of doing this is using the C<SetFNameFilter>
functionality.
=item $retval = Smerge $pass
If you are desperate, try to call C<Smerge> with increasing C<$pass>
values, beginning at C<0>, to try to merge parts that usually would not
have been merged.
Most probably this will result in garbled files, so never do this by
default.
=item $item = GetFileListItem $item_number
Return the C<$item> structure for the C<$item_number>'th found file, or
C<undef> of no file with that number exists.
The first file has number C<0>, and the series has no holes, so you can
iterate over all files by starting with zero and incrementing until you
hit C<undef>.
=back
=head2 Decoding files
=over 4
=item $retval = $item->rename($newname)
Change the ondisk filename where the decoded file will be saved.
=item $retval = $item->decode_temp
Decode the file into a temporary location, use C<< $item->infile >> to
retrieve the temporary filename.
=item $retval = $item->remove_temp
Remove the temporarily decoded file again.
=item $retval = $item->decode([$target_path])
Decode the file to it's destination, or the given target path.
=item $retval = $item->info(callback-function)
=back
=head2 Querying (and setting) item attributes
=over 4
=item $state = $item->state
=item $mode = $item->mode([newmode])
=item $uudet = $item->uudet
=item $size = $item->size
=item $filename = $item->filename([newfilename})
=item $subfname = $item->subfname
=item $mimeid = $item->mimeid
=item $mimetype = $item->mimetype
=item $binfile = $item->binfile
=back
=head2 Information about source parts
=over 4
=item $parts = $item->parts
Return information about all parts (source files) used to decode the file
as a list of hashrefs with the following structure:
{
partno => <integer describing the part number, starting with 1>,
# the following member sonly exist when they contain useful information
sfname => <local pathname of the file where this part is from>,
filename => <the ondisk filename of the decoded file>,
subfname => <used to cluster postings, possibly the posting filename>,
subject => <the subject of the posting/mail>,
origin => <the possible source (From) address>,
mimetype => <the possible mimetype of the decoded file>,
mimeid => <the id part of the Content-Type>,
}
Usually you are interested mostly the C<sfname> and possibly the C<partno>
and C<filename> members.
=back
=head2 Functions below not documented and not very well tested
QuickDecode
EncodeMulti
EncodePartial
EncodeToStream
EncodeToFile
E_PrepSingle
E_PrepPartial
=head2 EXTENSION FUNCTIONS
Functions found in this module but not documented in the uulib documentation:
=over 4
=item $msg = straction ACT_xxx
Return a human readable string representing the given action code.
=item $msg = strerror RET_xxx
Return a human readable string representing the given error code.
=item $str = strencoding xxx_ENCODED
Return the name of the encoding type as a string.
=item $str = strmsglevel MSG_xxx
Returns the message level as a string.
=item SetFileNameCallback $cb
Sets (or queries) the FileNameCallback, which is called whenever the
decoding library can't find a filename and wants to extract a filename
from the subject line of a posting. The callback will be called with
two arguments, the subject line and the current candidate for the
filename. The latter argument can be C<undef>, which means that no
filename could be found (and likely no one exists, so it is safe to also
return C<undef> in this case). If it doesn't return anything (not even
C<undef>!), then nothing happens, so this is a no-op callback:
sub cb {
return ();
}
If it returns C<undef>, then this indicates that no filename could be
found. In all other cases, the return value is taken to be the filename.
This is a slightly more useful callback:
sub cb {
return unless $_[1]; # skip "Re:"-plies et al.
my ($subject, $filename) = @_;
# if we find some *.rar, take it
return $1 if $subject =~ /(\w+\.rar)/;
# otherwise just pass what we have
return ();
}
=back
=head1 LARGE EXAMPLE DECODER
This is the file C<example-decoder> from the distribution, put here
instead of more thorough documentation.
# decode all the files in the directory uusrc/ and copy
# the resulting files to uudst/
use Convert::UUlib ':all';
sub namefilter {
my($path)=@_;
$path=~s/^.*[\/\\]//;
$path;
}
sub busycb {
my ($action, $curfile, $partno, $numparts, $percent, $fsize) = @_;
$_[0]=straction($action);
print "busy_callback(", (join ",",@_), ")\n";
0;
}
SetOption OPT_IGNMODE, 1;
SetOption OPT_VERBOSE, 1;
# show the three ways you can set callback functions. I normally
# prefer the one with the sub inplace.
SetFNameFilter \&namefilter;
SetBusyCallback "busycb", 333;
SetMsgCallback sub {
my ($msg, $level) = @_;
print uc strmsglevel $_[1], ": $msg\n";
};
# the following non-trivial FileNameCallback takes care
# of some subject lines not detected properly by uulib:
SetFileNameCallback sub {
return unless $_[1]; # skip "Re:"-plies et al.
local $_ = $_[0];
# the following rules are rather effective on some newsgroups,
# like alt.binaries.games.anime, where non-mime, uuencoded data
# is very common
# if we find some *.rar, take it as the filename
return $1 if /(\S{3,}\.(?:[rstuvwxyz]\d\d|rar))\s/i;
# one common subject format
return $1 if /- "(.{2,}?\..+?)" (?:yenc )?\(\d+\/\d+\)/i;
# - filename.par (04/55)
return $1 if /- "?(\S{3,}\.\S+?)"? (?:yenc )?\(\d+\/\d+\)/i;
# - (xxx) No. 1 sayuri81.jpg 756565 bytes
# - (20 files) No.17 Roseanne.jpg [2/2]
return $1 if /No\.[ 0-9]+ (\S+\....) (?:\d+ bytes )?\[/;
# otherwise just pass what we have
return ();
};
# now read all files in the directory uusrc/*
for(<uusrc/*>) {
my($retval,$count)=LoadFile ($_, $_, 1);
print "file($_), status(", strerror $retval, ") parts($count)\n";
}
SetOption OPT_SAVEPATH, "uudst/";
# now wade through all files and their source parts
$i = 0;
while ($uu = GetFileListItem($i)) {
$i++;
print "file nr. $i";
print " state ", $uu->state;
print " mode ", $uu->mode;
print " uudet ", strencoding $uu->uudet;
print " size ", $uu->size;
print " filename ", $uu->filename;
print " subfname ", $uu->subfname;
print " mimeid ", $uu->mimeid;
print " mimetype ", $uu->mimetype;
print "\n";
# print additional info about all parts
for ($uu->parts) {
while (my ($k, $v) = each %$_) {
print "$k > $v, ";
}
print "\n";
}
$uu->decode_temp;
print " temporarily decoded to ", $uu->binfile, "\n";
$uu->remove_temp;
print strerror $uu->decode;
print " saved as uudst/", $uu->filename, "\n";
}
print "cleanup...\n";
CleanUp();
=head1 AUTHOR
Marc Lehmann <pcg@goof.com>, the original uulib library was written
by Frank Pilhofer <fp@informatik.uni-frankfurt.de>, and later heavily
bugfixed by Marc Lehmann.
=head1 SEE ALSO
perl(1), uudeview homepage at http://www.uni-frankfurt.de/~fp/uudeview/.
=cut