extract-localizable-strings.pl [plain text]
use strict;
use Getopt::Long;
no warnings 'deprecated';
sub UnescapeHexSequence($);
my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, UI_STRING_LOCALIZE_LATER_KEY => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
my $verify;
my $exceptionsFile;
my @directoriesToSkip = ();
my %options = (
'verify' => \$verify,
'exceptions=s' => \$exceptionsFile,
'skip=s' => \@directoriesToSkip,
);
GetOptions(%options);
@ARGV >= 2 or die "Usage: extract-localizable-strings [--verify] [--exceptions <exceptions file>] <file to update> [--skip directory | directory]...\nDid you mean to run update-webkit-localizable-strings instead?\n";
-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless !defined $exceptionsFile;
my $fileToUpdate = shift @ARGV;
-f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
my $warnAboutUnlocalizedStrings = defined $exceptionsFile;
my @directories = ();
if (@ARGV < 1) {
push(@directories, ".");
} else {
for my $dir (@ARGV) {
push @directories, $dir;
}
}
my $sawError = 0;
my $localizedCount = 0;
my $keyCollisionCount = 0;
my $notLocalizedCount = 0;
my $NSLocalizeCount = 0;
my %exception;
my %usedException;
if (defined $exceptionsFile && open EXCEPTIONS, $exceptionsFile) {
while (<EXCEPTIONS>) {
chomp;
if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
if ($exception{$_}) {
print "$exceptionsFile:$.: warning: exception for $_ appears twice\n";
print "$exceptionsFile:$exception{$_}: warning: first appearance\n";
} else {
$exception{$_} = $.;
}
} else {
print "$exceptionsFile:$.: warning: syntax error\n";
}
}
close EXCEPTIONS;
}
my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
for my $dir (@directoriesToSkip) {
$quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o';
}
my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` );
for my $file (sort @files) {
next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/;
$file =~ s-^./--;
open SOURCE, $file or die "can't open $file\n";
my $inComment = 0;
my $expected = "";
my $macroLine;
my $macro;
my $UIString;
my $key;
my $comment;
my $string;
my $stringLine;
my $nestingLevel;
my $previousToken = "";
while (<SOURCE>) {
chomp;
# Handle continued multi-line comment.
if ($inComment) {
next unless s-.*\*/--;
$inComment = 0;
}
next unless defined $nestingLevel or /(\"|\/\*)/;
# Handle all the tokens in the line.
while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
my $token = $1;
if ($token eq "\"") {
if ($expected and $expected ne "a quoted string") {
print "$file:$.: found a quoted string but expected $expected\n";
$sawError = 1;
$expected = "";
}
if (s-^(([^\\$token]|\\.)*?)$token--) {
if (!defined $string) {
$stringLine = $.;
$string = $1;
} else {
$string .= $1;
}
} else {
print "$file:$.: mismatched quotes\n";
$sawError = 1;
$_ = "";
}
next;
}
if (defined $string) {
handleString:
if ($expected) {
if (!defined $UIString) {
# FIXME: Validate UTF-8 here?
$UIString = $string;
$expected = ",";
} elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) {
# FIXME: Validate UTF-8 here?
$key = $string;
$expected = ",";
} elsif (!defined $comment) {
# FIXME: Validate UTF-8 here?
$comment = $string;
$expected = ")";
}
} else {
if (defined $nestingLevel) {
# In a debug macro, no need to localize.
} elsif ($previousToken eq " } elsif ($previousToken eq "extern" and $string eq "C") {
} elsif ($string eq "") {
} elsif ($exception{$file}) {
$usedException{$file} = 1;
} elsif ($exception{"\"$string\""}) {
$usedException{"\"$string\""} = 1;
} elsif ($exception{"$file:\"$string\""}) {
$usedException{"$file:\"$string\""} = 1;
} else {
print "$file:$stringLine: warning: \"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings;
$notLocalizedCount++;
}
}
$string = undef;
last if !defined $token;
}
$previousToken = $token;
if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/ && $token !~ /NSLocalizedFileSizeDescription/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedRecoverySuggestionErrorKey/) {
print "$file:$.: found a use of an NSLocalized macro ($token); not supported\n";
$nestingLevel = 0 if !defined $nestingLevel;
$sawError = 1;
$NSLocalizeCount++;
} elsif ($token eq "/*") {
if (!s-^.*?\*/--) {
$_ = ""; $inComment = 1;
}
} elsif ($token eq "//") {
$_ = ""; } elsif ($token eq "'") {
if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
print "$file:$.: mismatched single quote\n";
$sawError = 1;
$_ = "";
}
} else {
if ($expected and $expected ne $token) {
print "$file:$.: found $token but expected $expected\n";
$sawError = 1;
$expected = "";
}
if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) {
$expected = "(";
$macro = $token;
$UIString = undef;
$key = undef;
$comment = undef;
$macroLine = $.;
} elsif ($token eq "(" or $token eq "[") {
++$nestingLevel if defined $nestingLevel;
$expected = "a quoted string" if $expected;
} elsif ($token eq ",") {
$expected = "a quoted string" if $expected;
} elsif ($token eq ")" or $token eq "]") {
$nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
if ($expected) {
$key = $UIString if !defined $key;
HandleUIString($UIString, $key, $comment, $file, $macroLine);
$macro = "";
$expected = "";
$localizedCount++;
}
} elsif ($isDebugMacro{$token}) {
$nestingLevel = 0 if !defined $nestingLevel;
}
}
}
}
goto handleString if defined $string;
if ($expected) {
print "$file: reached end of file but expected $expected\n";
$sawError = 1;
}
close SOURCE;
}
sub UnescapeHexSequence($)
{
my ($originalStr) = @_;
my $escapedStr = $originalStr;
my $unescapedStr = "";
for (;;) {
if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
if (256 <= hex($1)) {
print "Hexadecimal escape sequence out of range: \\x$1\n";
return undef;
}
$unescapedStr .= pack("H*", $1);
} elsif ($escapedStr =~ s-^(.)--) {
$unescapedStr .= $1;
} else {
return $unescapedStr;
}
}
}
my %stringByKey;
my %commentByKey;
my %fileByKey;
my %lineByKey;
sub HandleUIString
{
my ($string, $key, $comment, $file, $line) = @_;
my $bad = 0;
$string = UnescapeHexSequence($string);
if (!defined($string)) {
print "$file:$line: string has an illegal hexadecimal escape sequence\n";
$bad = 1;
}
$key = UnescapeHexSequence($key);
if (!defined($key)) {
print "$file:$line: key has an illegal hexadecimal escape sequence\n";
$bad = 1;
}
$comment = UnescapeHexSequence($comment);
if (!defined($comment)) {
print "$file:$line: comment has an illegal hexadecimal escape sequence\n";
$bad = 1;
}
if (grep { $_ == 0xFFFD } unpack "U*", $string) {
print "$file:$line: string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
$bad = 1;
}
if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
print "$file:$line: key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
$bad = 1;
}
if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
print "$file:$line: comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
$bad = 1;
}
if ($bad) {
$sawError = 1;
return;
}
if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
print "$file:$line: warning: encountered the same key, \"$key\", twice, with different strings\n";
print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n";
$keyCollisionCount++;
return;
}
if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
print "$file:$line: warning: encountered the same key, \"$key\", twice, with different comments\n";
print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n";
$keyCollisionCount++;
return;
}
$fileByKey{$key} = $file;
$lineByKey{$key} = $line;
$stringByKey{$key} = $string;
$commentByKey{$key} = $comment;
}
print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
if (@unusedExceptions) {
for my $unused (@unusedExceptions) {
print "$exceptionsFile:$exception{$unused}: warning: exception $unused not used\n";
}
print "\n";
}
print "$localizedCount localizable strings\n" if $localizedCount;
print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
if ($sawError) {
print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
exit 1;
}
my $localizedStrings = "";
for my $key (sort keys %commentByKey) {
$localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
}
if (-e "$fileToUpdate") {
if (!$verify) {
open STRINGS, ">", "$fileToUpdate" or die;
print STRINGS $localizedStrings;
close STRINGS;
} else {
open STRINGS, $fileToUpdate or die;
my $lastComment;
my $line;
while (<STRINGS>) {
chomp;
next if (/^\s*$/);
if (/^\/\* (.*) \*\/$/) {
$lastComment = $1;
} elsif (/^"((?:[^\\]|\\[^"])*)"\s*=\s*"((?:[^\\]|\\[^"])*)";$/) {
my $string = delete $stringByKey{$1};
if (!defined $string) {
print "$fileToUpdate:$.: unused key \"$1\"\n";
$sawError = 1;
} else {
if (!($string eq $2)) {
print "$fileToUpdate:$.: unexpected value \"$2\" for key \"$1\"\n";
print "$fileByKey{$1}:$lineByKey{$1}: expected value \"$string\" defined here\n";
$sawError = 1;
}
if (!($lastComment eq $commentByKey{$1})) {
print "$fileToUpdate:$.: unexpected comment /* $lastComment */ for key \"$1\"\n";
print "$fileByKey{$1}:$lineByKey{$1}: expected comment /* $commentByKey{$1} */ defined here\n";
$sawError = 1;
}
}
} else {
print "$fileToUpdate:$.: line with unexpected format: $_\n";
$sawError = 1;
}
}
for my $missing (keys %stringByKey) {
print "$fileByKey{$missing}:$lineByKey{$missing}: missing key \"$missing\"\n";
$sawError = 1;
}
if ($sawError) {
print "\n$fileToUpdate:0: file is not up to date.\n";
exit 1;
}
}
} else {
print "error: $fileToUpdate does not exist\n";
exit 1;
}