package Razor2::String;
use Digest::SHA1 qw(sha1_hex);
use URI::Escape;
use Razor2::Preproc::enBase64;
require Exporter;
use vars qw ( @ISA $VERSION @EXPORT );
@ISA = qw(Exporter);
@EXPORT = qw( hmac_sha1 xor_key
from_batched_query
to_batched_query findsimilar debugobj
makesis parsesis makesis_nue parsesis_nue
hextobase64 base64tohex
randstr round
hex_dump prep_mail
prehash printb64table
hexbits2hash hmac2_sha1
fisher_yates_shuffle
);
my %b64table;
BEGIN {
foreach (0..25) { $b64table{$_} = chr($_ + 65); }
foreach (26..51) { $b64table{$_} = chr($_ + 71); }
foreach (52..61) { $b64table{$_} = chr($_ - 4 ); }
$b64table{62} = "-";
$b64table{63} = "_";
}
sub printb64table {
foreach (0..63) {
print "$_ = $b64table{$_}\n";
}
}
sub hmac_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
my ($b64, $hex) = hmac2_sha1($text, $iv1, $iv2);
return $b64;
}
sub hmac2_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
return unless $text && $iv1 && $iv2;
die "no ref's allowed" if ref($text);
my $ctx = Digest::SHA1->new;
$ctx->add($iv2);
$ctx->add($text);
my $digest = $ctx->hexdigest;
$ctx = Digest::SHA1->new;
$ctx->add($iv1);
$ctx->add($digest);
$digest = $ctx->hexdigest;
return (hextobase64($digest), $digest);
}
sub hmac3_sha1 {
my $text = shift;
my $iv1 = shift;
my $iv2 = shift;
return unless $text && $iv1 && $iv2;
die "no ref's allowed" if ref($text);
my $digest = $text;
$digest = sha1_hex($iv1 . $digest);
$digest = sha1_hex($iv2 . $digest);
return (hextobase64($digest), $digest);
}
sub xor_key {
my $key = shift;
my $iv1 = "\x36" x 64 ^ $key;
my $iv2 = "\x5C" x 64 ^ $key;
return ($iv1, $iv2);
}
sub hextobase64 {
my $hs = shift;
my @b64s;
my $i = 0;
while ($i < length($hs)) {
my $hex3 = substr $hs, $i, 3;
$i += 3;
my $bv = pack "h3", $hex3;
my $cur = 0;
foreach (0..5) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
push @b64s, $cur/2; $cur = 0;
foreach (6..11) { my $bt = vec($bv,$_,1); $cur += $bt; $cur *= 2; }
push @b64s, $cur/2;
}
my $bs = "";
foreach (@b64s) { $bs .= $b64table{$_}; }
return $bs;
}
sub base64tohex {
my $bs = shift;
my @b64s;
my $hexstr;
foreach my $chr (split '', $bs) {
foreach (keys %b64table) {
push @b64s, $_ if $b64table{$_} eq $chr;
}
}
while (@b64s) {
my $bv = ""; vec($bv,0,16) = 0;
my $a = shift @b64s;
foreach (0..5) {my $i=5-$_; my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
$a = shift @b64s;
foreach (6..11) {my $i=17-$_;my $bt=$a%2; vec($bv,$i,1) = $bt; $a = int($a/2); }
$hexstr .= unpack "h3", $bv;
}
$hexstr = substr($hexstr, 0, 40) if (length($hexstr) == 42);
$hexstr = substr($hexstr, 0, 64) if (length($hexstr) == 66);
return $hexstr;
}
sub makesis {
my $first = shift;
my $data;
if (ref($first) eq 'HASH') {
$data = $first;
} else {
$data = {$first, @_};
}
my $sis = '';
foreach (sort keys %$data) {
$sis .= "$_=" . (exists $data->{$_} ? uri_escape($data->{$_}) : '') . '&';
}
return substr($sis, 0, length($sis)-1) . "\r\n";
}
sub parsesis {
my $query = $_[1] || {};
my $wantref = 1 if $_[1];
$_[0] =~ s/\n$//; # SIS shouldn't have this!
$_[0] =~ s/\r$//; # SIS shouldn't have this!
my @pairs = split /\&/, $_[0];
for (@pairs) {
my ($key, $value) = split /=/, $_;
$query->{$key} = defined $value ? uri_unescape($value) : '';
}
return $query if $wantref;
return %$query;
}
sub makesis_nue {
my $first = shift;
my $data;
if (ref($first) eq 'HASH') {
$data = $first;
} else {
$data = {$first, @_};
}
my $sis = '';
foreach (sort keys %$data) {
$sis .= "$_=";
$sis .= $data->{$_} if exists($data->{$_});
$sis .= '&';
}
return substr($sis, 0, length($sis)-1) . "\r\n";
}
sub parsesis_nue {
my $query = $_[1] || {};
my $wantref = 1 if $_[1];
$_[0] =~ s/\r\n$//;
my @pairs = split /\&/, $_[0];
for (@pairs) {
my ($key, $value) = split /=/, $_;
$query->{$key} = $value;
}
return $query if $wantref;
return %$query;
}
sub to_batched_query {
my ($queries, $bql, $bqs, $novar) = @_;
my @bqueries;
return unless ref($queries->[0]) eq 'HASH';
my $last;
my $line;
my $linecnt = 0;
my $batchmode = 0;
foreach my $cur (@$queries) {
if (exists $cur->{message}) {
my $msg = $cur->{message};
delete $cur->{message};
$line = "-". makesis($cur);
$cur->{message} = $msg;
$line =~ s/\r\n$//s;
$line .= "&message=*\r\n$msg\r\n.\r\n";
push @bqueries, $line;
next;
}
unless ($last) {
$last = $cur;
next;
}
unless ($batchmode) {
my ($both, $diff) = findsimilar($last, $cur);
if ($diff && !$novar) {
$batchmode = 2;
$line = "-". makesis_nue($both);
$line .= join(",", map "$last->{$_}", @$diff) ."\r\n";
$line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
$last = $both; $linecnt = 2;
} else {
$batchmode = 1;
$line = "-". makesis($last);
$line .= makesis_nue($cur);
$linecnt = 2;
}
next;
} else {
my ($both, $diff) = findsimilar($last, $cur) if ($batchmode == 2);
if ( ($bqs && (length($line) > ($bqs*1024))) ||
($bql && ($linecnt >= $bql)) ||
($batchmode == 2 && !$diff) ) {
$batchmode = 0;
$line .= ".\r\n";
push @bqueries, $line;
$last = $cur;
} else {
if ($batchmode == 2) {
$line .= join(",", map "$cur->{$_}", @$diff) ."\r\n";
} else {
$line .= makesis_nue($cur);
}
$linecnt++;
}
}
}
if ($batchmode) {
$line .= ".\r\n";
push @bqueries, $line;
} elsif ($last) {
$line = makesis($last);
push @bqueries, $line;
}
return \@bqueries;
}
sub findsimilar {
my ($a, $b) = @_;
my @diffvalues = ();
my %samevalues = ();
foreach (sort keys %$a) {
return unless exists $b->{$_};
if ($b->{$_} eq $a->{$_}) {
$samevalues{$_} = $a->{$_};
} else {
$samevalues{$_} = "?";
push @diffvalues, $_;
}
}
foreach (sort keys %$b) {
return unless exists $a->{$_};
}
return (1) unless scalar(@diffvalues) > 0;
return (\%samevalues, \@diffvalues);
}
sub from_batched_query {
my ($queries) = @_;
my @queries;
my ($fq, $rq) = $queries =~ m:^\-(.*?)\r\n(.*)$:sm;
unless ($fq && $rq) {
$fq = $queries;
$rq = "";
}
if ($fq =~ m:\?:) {
my %template_query = ();
my @seq = ();
my @pairs = split /\&/, $fq;
for (@pairs) {
my ($key, $value) = split /=/, $_;
if ($value eq "?") {
push @seq, $key;
} else {
$template_query{$key} = $value ? uri_unescape($value) : '';
}
}
for (split /\r\n/, $rq) {
my @values = split /,/, $_;
my %foo = %template_query;
@foo{@seq} = @values;
push @queries, \%foo;
}
return undef unless @queries;
} elsif ($fq =~ m:\*:) {
my %query = parsesis($fq);
for (keys %query) {
if ($query{$_} eq "*") {
$query{$_} = $rq;
last;
}
}
push @queries, \%query;
} else {
my %q = parsesis($fq);
push @queries, \%q;
for (split /\r\n/, $rq) {
my %q = parsesis($_);
push @queries, \%q;
}
}
return \@queries;
}
sub randstr {
my $size = shift;
my $alphanum = shift;
my $str;
$alphanum = 1 if !defined($alphanum);
while ($size--) {
if ($alphanum) {
$str .= $b64table{ int(rand 64) };
} else {
$str .= chr(int(rand 94) + 33);
}
}
return $str;
}
sub escape_smtp_terminator {
my ($textref) = @_;
$$textref =~ s/\r\n\./\r\n\.\./gm
}
sub unescape_smtp_terminator {
my ($textref) = @_;
$$textref =~ s/\r\n\.\./\r\n\./gm;
}
sub hex_dump {
my $string = shift;
for (split //, $string) {
print ord($_) . " ";
}
print "\n";
}
sub hash2str {
my $href = shift;
my %hash = %$href;
my ($str, $key);
for $key ( keys %hash ) {
my $tstr;
if ( ref $hash{$key} eq 'ARRAY' ) {
for ( @{ $hash{ $key }} ) { $tstr .= escape( $_ ) . "," } $str =~ s/,$//;
} elsif ( !(ref $hash{$key}) ) {
$tstr .= escape ( $hash{$key} );
}
if ( $tstr ) { $str .= "$key:$tstr&" }
}
$str =~ s/&$//; return $str;
}
sub str2hash {
my $str = shift;
my %hash;
my @pairs = split /(?<!\\)&/, $str;
for ( @pairs ) {
my ( $key, $data ) = split /(?<!\\):/, $_, 2;
if ( $data =~ /(?<!\\),/ ) {
my @list = split /(?<!\\),/, $data;
for ( @list ) { $_ = unescape ( $_ ) };
$hash{$key} = [@list];
} else { $hash{$key} = unescape ( $data ) }
}
return \%hash;
}
sub split_mime {
my ($mailref, $ver, $recursive, $debug ) = @_;
return unless ref($mailref);
my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
my $no_valid_mime_hdr = 0;
unless ($body) {
print "split_mime: no blank lines\n" if $debug > 1;
$no_valid_mime_hdr = 1;
}
if ($recursive && ($hdr !~ /^Content-Type:/i)) {
$no_valid_mime_hdr = 1;
print "uh-oh, bad mime-body len=". length($$mailref) .":\n$$mailref\n" if $debug;
}
if ($no_valid_mime_hdr) {
my $mimepart = "X-Razor2-Agent: $ver\n";
my $hrdlen = length($mimepart);
if ($$mailref =~ /^\n/) {
$mimepart .= $$mailref;
} else {
$mimepart .= "\n". $$mailref;
}
print "split_mime: returning total_len=". length($mimepart) ."; hdrs=".
$hdrlen .", body=". length($$mailref) ."\n" if $debug;
return (\$mimepart);
}
my $orig_hdr = $hdr;
$hdr =~ s/\n\s+//sg; # merge multi-line headers
my $trimmed_hdr = "";
foreach (split '\n',$hdr) {
/^Content-/i and $trimmed_hdr .= "$_\n";
/^X-Razor2/i and $trimmed_hdr .= "$_\n";
}
my $boundary = "";
if ($trimmed_hdr =~ /Content-Type: multipart.+boundary=("[^"]+"|\S+)/ig) {
$boundary = $1;
}
if ($boundary eq "") {
#
# valid mime hdr, but no nested mime.
# add razor hdr and return.
#
print "split_mime: valid_mime_hdr [len=". length($orig_hdr)
."], but no nested mime\n$orig_hdr\n" if $debug > 1;
$trimmed_hdr = "X-Razor2-Agent: $ver\n" . $trimmed_hdr;
my $mimepart = "$trimmed_hdr\n$body";
print "split_mime: returning total=". length($mimepart) ."; hdrs=".
length($trimmed_hdr) .", body=". length($body) ."\n" if $debug;
return (\$mimepart);
}
$boundary = $1 if $boundary =~ /^"(.*)"$/;
# At this point, we know body has mime parts.
#
my @mimeparts;
#
# According to RFC 1341
# http://www.w3.org/Protocols/rfc1341/7_2_Multipart.html
#
# mimes are separated by \n--boundary\n
# and are followed immediately by header, blank line, body;
# or blank line and body.
#
# if no header in mime part, default content type for mime body is
# based on header where 'Content-Type: multipart*' was defined, where
# multipart/digest --> message/rfc822
# multipart/* --> text/plain
# perhaps we should add a header if none present?
#
# if a body contains mimes, the 'preable', or stuff before
# the first boundary, and the 'epilogue', the stuff after the
# last boudary, are to be ignored.
#
# NOTE: We split up multiparts, but content-type's can also be
# nested. i.e, a header of 'Content-Type: message' can have a body
# of 'Content-Type: image'
#
$body =~ s/\n\Q--$boundary--\E.*$//sg; # trash last boundary and epilogue
if ($body =~ /^\Q--$boundary\E\r*\n/) {
# bug in some mails, make it RFC compliant
# now our split will work correctly
print "bad mime body [len=". length($body) ."], not doing \\n--boundary, fixed tho.\n" if $debug > 1;
$body = "garbage\n$body";
}
my @tmpparts = split /\n\Q--$boundary\E\r*\n/, $body;
shift @tmpparts; # trash everything up to the first boundary;
foreach (@tmpparts) {
# perhaps we should add a header based on default content-type?
unless (/\S/s) {
print "skipping body part containing only whitespace [len=". length($_) ."]\n" if $debug;
next;
}
print "boundary: ". $recursive . "$boundary\n" if $debug > 1;
push @mimeparts, split_mime(\$_, $ver, " ". $recursive, $debug);
}
print "Saweeet!!! Boundary (". scalar(@mimeparts) ."): $boundary\n" if defined($boundary) && ($debug > 1);
return @mimeparts;
}
# mailref is not modified by this sub
#
sub prep_part {
my ($mailref, $maxheader, $maxbody) = @_;
#print "[". length($$mailref) ."] maxsize=$maxheader + $maxbody\n";
my ($hdr, $body) = split /\n\r*\n/, $$mailref, 2;
$hdr .= "\n"; # put newline back on last header line
unless ($body) {
#
# fixme - this should not happen.
# if it does, split_mime needs work
#
# print "prep_part got F**KED-up mimepart [len=". length($$mailref) ."]\n$$mailref\n";
return; # body is empty
}
# fixme - are these the best chars to check for binary?
my $is_binary = ($hdr =~ /^Content-Type-Encoding: 8-bit/) ||
($body =~ /([\x00-\x1f|\x7f-\xff])/ and $1 !~ /[\r\n\t]/);
my $enBase64 = new Razor2::Preproc::enBase64;
$is_binary = $enBase64->isit($mailref);
$enBase64->doit(\$body) if $is_binary;
$body =~ s/\r+\n/\n/sg; # outlook sometimes does \r\r\n
$hdr =~ s/\r+\n/\n/sg;
if ((my $len = length($body)) > $maxbody) {
$body = substr $body, 0, $maxbody;
substr($body, -2) = "==" if $is_binary;
$hdr = "X-Razor2-Origlen-Body: $len\n" . $hdr;
#print "maxbody=$maxbody body went from $len to ". length($body) ."\n";
}
if ((my $len = length($hdr)) > $maxheader) {
$hdr = "X-Razor2-Origlen-Header: $len\n" . $hdr;
while (length($hdr) > $maxheader) {
$hdr =~ s/.*\n$//; # remove last line of headers
}
#print "maxhdr=$maxheader header went from $len to ". length($hdr) ."\n";
}
my $dude = "$hdr\n$body";
return $mailref if $dude eq $$mailref; # this happens majority of the time
return \$dude;
}
# NOTE: Important function!
# *must* be kept in sync with server and all clients
# same holds true for prep_part()
#
# This is the preprocessing done on a mail before sent over network
#
sub prep_mail {
my ($mailref, $report_headers, $maxheader, $maxbody, $maxorighdr, $versionstring, $debug) = @_;
return unless ref($mailref);
print " prep_mail: orig=". length($$mailref) ."\n" if ($debug > 1);
my ($orig_hdr) = split /\n\r*\n/, $$mailref, 2;
$orig_hdr .= "\n"; # put newline back on last header line
my $ver = $versionstring || 1;
my @mimeparts = split_mime($mailref, $ver, 0, $debug);
my @mimeparts_prep;
foreach (@mimeparts) {
push @mimeparts_prep, prep_part($_, $maxheader, $maxbody);
}
unless ($report_headers) {
my $hdr = "X-Razor2-Headers-Suppressed: 1\n";
foreach (split '\n',$orig_hdr) {
/^Content-/i and $hdr .= "$_\n";
/^X-Razor2/i and $hdr .= "$_\n";
}
$orig_hdr = $hdr;
}
if ((my $len = length($orig_hdr)) > $maxorighdr) {
$hdr = "X-Razor2-Origlen-Header: $len\n" . $orig_hdr;
while (length($hdr) > $maxorighdr) {
$hdr =~ s/.*\n$//; # remove last line of headers
}
#print "max=$maxorighdr orig_header went from $len to ". length($hdr) ."\n";
$orig_hdr = $hdr;
}
if ($debug > 1) {
print "**** prep_mail done: headers=". length($orig_hdr);
foreach (0..$#mimeparts_prep) {
print "\n**** mail $_ [". length(${$mimeparts_prep[$_]}) ."] ". substr(${$mimeparts_prep[$_]} ,0,40);
}
print "\n\n";
}
return (\$orig_hdr, @mimeparts_prep);
}
# from MIME::Parser
#my $parser = new MIME::Parser;
#my $entity = $parser->parse($body);
# foreach (dump_entity($entity))
sub dump_entity {
my $ent = shift;
my @parts = $ent->parts;
if (@parts) { # multipart...
map { dump_entity($_) } @parts;
} else { # single part...
return ( $ent->body ); # return text blob
print " Part: ", $ent->bodyhandle->path, " (", scalar($ent->head->mime_type), ")\n";
}
}
# input: hex string ("2D")
# output: hash ref or array containg bits that are set
# 2D == (1, 3, 4, 6)
sub hexbits2hash {
my $hex = shift;
my %h;
my $i = 0;
foreach (reverse split '', unpack "B*", pack "H*", $hex) {
$i++;
$h{$i} = 1 if $_ eq 1;
}
return wantarray ? (sort keys %h) : \%h;
}
# input: hash ref, array ref, or array containg bits that are set
# output: hex string ("2D")
# 2D == (4, 8, 32)
sub hash2hexbits {
my @bits = @_;
@bits = @{$bits[0]} if ref($bits[0]) eq 'ARRAY';
@bits = (sort keys %{$bits[0]}) if ref($bits[0]) eq 'HASH';
my @all;
my $i = 1;
foreach (sort {$a <=> $b} @bits) {
while (1) {
push @all, 1 if $_ == $i;
last if $_ == $i;
push @all, 0;
$i++;
}
}
my $bs = join '', reverse @all;
# fixme needs testing
my $hex = (unpack "H*", pack "B*", join '', reverse @all);
return $hex
}
# for debugging - dumps a obj to a string
sub debugobj {
my ($obj, $prefix, $maxwidth) = @_;
$maxwidth ||= 70;
return if (defined($prefix) && length($prefix) > $maxwidth);
my $line = "";
$prefix .= " "x4;
if (my $r = ref($obj)) {
if ($r eq 'HASH') {
$line = "$r - $obj,". scalar(keys %$obj) ." keys\n";
foreach (sort keys %$obj) {
$line .= "$prefix$_ => ". debugobj($obj->{$_}, $prefix);
}
$line .= $prefix ."[empty]\n" unless (keys %$obj);
} elsif ($r eq 'ARRAY') {
$line = "$r - $obj,". scalar(@$obj) ." items\n";
foreach (@$obj) {
$line .= $prefix . debugobj($_, $prefix);
}
$line .= $prefix ."[empty]\n" unless (@$obj);
} elsif ($r eq 'REF') {
$line = "$r - $obj\n";
$line .= $prefix . debugobj($$obj, $prefix);
} elsif ($r eq 'SCALAR') {
$line = "$r - $obj\n";
$line .= $prefix . debugobj($$obj, $prefix);
}
} else {
if (defined $obj) {
$line = $1 if substr($obj, 0, $maxwidth-length($prefix)) =~ /^([^\n]+)/;
$line = "[length=". length($obj) ."] ". $line
if (length($line) ne length($obj));
} else {
$line = "[empty]";
}
$line .= "\n";
}
return $line;
}
sub clean_body {
my ($self, $bodyref) = @_;
my $hasheaders = 1;
if ($self->{preprocs}->{deBase64}->isit($bodyref)) {
$self->{preprocs}->{deBase64}->doit($bodyref);
$hasheaders = 0;
}
if ($self->{preprocs}->{deQP}->isit($bodyref)) {
$self->{preprocs}->{deQP}->doit($bodyref);
$hasheaders = 0;
}
if ($self->{preprocs}->{deHTML}->isit($bodyref)) {
$self->{preprocs}->{deHTML}->doit($bodyref);
}
if ($hasheaders) {
$$bodyref =~ s/^.*?\n\n//s;
}
}
sub round {
my $float = shift;
return sprintf("%.0f", $float);
}
sub fisher_yates_shuffle {
my $deck = shift; # $deck is a reference to an array
my $i = @$deck;
while ($i--) {
my $j = int rand ($i+1);
@$deck[$i,$j] = @$deck[$j,$i];
}
}
1;