create_hash_table   [plain text]


#! /usr/bin/perl -w
#
# Static Hashtable Generator
#
# (c) 2000-2002 by Harri Porten <porten@kde.org> and
#                  David Faure <faure@kde.org>
# Modified (c) 2004 by Nikolas Zimmermann <wildfox@kde.org>
# Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved.
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
#

use strict;

my $file = $ARGV[0];
shift;
my $includelookup = 0;

# Use -i as second argument to make it include "Lookup.h"
$includelookup = 1 if (defined($ARGV[0]) && $ARGV[0] eq "-i");

# Use -n as second argument to make it use the third argument as namespace parameter ie. -n KDOM
my $useNameSpace = $ARGV[1] if (defined($ARGV[0]) && $ARGV[0] eq "-n");

print STDERR "Creating hashtable for $file\n";
open(IN, $file) or die "No such file $file";

my @keys = ();
my @attrs = ();
my @values = ();
my @hashes = ();

my $inside = 0;
my $name;
my $pefectHashSize;
my $compactSize;
my $compactHashSizeMask;
my $banner = 0;
sub calcPerfectHashSize();
sub calcCompactHashSize();
sub output();
sub jsc_ucfirst($);
sub hashValue($);

while (<IN>) {
    chomp;
    s/^\s+//;
    next if /^\#|^$/; # Comment or blank line. Do nothing.
    if (/^\@begin/ && !$inside) {
        if (/^\@begin\s*([:_\w]+)\s*\d*\s*$/) {
            $inside = 1;
            $name = $1;
        } else {
            print STDERR "WARNING: \@begin without table name, skipping $_\n";
        }
    } elsif (/^\@end\s*$/ && $inside) {
        calcPerfectHashSize();
        calcCompactHashSize();
        output();

        @keys = ();
        @attrs = ();
        @values = ();
        @hashes = ();

        $inside = 0;
    } elsif (/^(\S+)\s*(\S+)\s*([\w\|]*)\s*(\w*)\s*$/ && $inside) {
        my $key = $1;
        my $val = $2;
        my $att = $3;
        my $param = $4;

        push(@keys, $key);
        push(@attrs, length($att) > 0 ? $att : "0");

        if ($att =~ m/Function/) {
            push(@values, { "type" => "Function", "function" => $val, "params" => (length($param) ? $param : "") });
            #printf STDERR "WARNING: Number of arguments missing for $key/$val\n" if (length($param) == 0);
        } elsif (length($att)) {
            my $get = $val;
            my $put = !($att =~ m/ReadOnly/) ? "set" . jsc_ucfirst($val) : "0";
            push(@values, { "type" => "Property", "get" => $get, "put" => $put });
        } else {
            push(@values, { "type" => "Lexer", "value" => $val });        
        }
        push(@hashes, hashValue($key));
    } elsif ($inside) {
        die "invalid data {" . $_ . "}";
    }
}

die "missing closing \@end" if ($inside);

sub jsc_ucfirst($)
{
    my ($value) = @_;

    if ($value =~ /js/) {
        $value =~ s/js/JS/;
        return $value;
    }

    return ucfirst($value);
}


sub ceilingToPowerOf2
{
    my ($pefectHashSize) = @_;

    my $powerOf2 = 1;
    while ($pefectHashSize > $powerOf2) {
        $powerOf2 <<= 1;
    }

    return $powerOf2;
}

sub calcPerfectHashSize()
{
tableSizeLoop:
    for ($pefectHashSize = ceilingToPowerOf2(scalar @keys); ; $pefectHashSize += $pefectHashSize) {
        my @table = ();
        foreach my $key (@keys) {
            my $h = hashValue($key) % $pefectHashSize;
            next tableSizeLoop if $table[$h];
            $table[$h] = 1;
        }
        last;
    }
}

sub leftShift($$) {
    my ($value, $distance) = @_;
    return (($value << $distance) & 0xFFFFFFFF);
}

sub calcCompactHashSize()
{
    my @table = ();
    my @links = ();
    my $compactHashSize = ceilingToPowerOf2(2 * @keys);
    $compactHashSizeMask = $compactHashSize - 1;
    $compactSize = $compactHashSize;
    my $collisions = 0;
    my $maxdepth = 0;
    my $i = 0;
    foreach my $key (@keys) {
        my $depth = 0;
        my $h = hashValue($key) % $compactHashSize;
        while (defined($table[$h])) {
            if (defined($links[$h])) {
                $h = $links[$h];
                $depth++;
            } else {
                $collisions++;
                $links[$h] = $compactSize;
                $h = $compactSize;
                $compactSize++;
            }
        }
        $table[$h] = $i;
        $i++;
        $maxdepth = $depth if ( $depth > $maxdepth);
    }
}

# Paul Hsieh's SuperFastHash
# http://www.azillionmonkeys.com/qed/hash.html
# Ported from UString..
sub hashValue($) {
  my @chars = split(/ */, $_[0]);

  # This hash is designed to work on 16-bit chunks at a time. But since the normal case
  # (above) is to hash UTF-16 characters, we just treat the 8-bit chars as if they
  # were 16-bit chunks, which should give matching results

  my $EXP2_32 = 4294967296;

  my $hash = 0x9e3779b9;
  my $l    = scalar @chars; #I wish this was in Ruby --- Maks
  my $rem  = $l & 1;
  $l = $l >> 1;

  my $s = 0;

  # Main loop
  for (; $l > 0; $l--) {
    $hash   += ord($chars[$s]);
    my $tmp = leftShift(ord($chars[$s+1]), 11) ^ $hash;
    $hash   = (leftShift($hash, 16)% $EXP2_32) ^ $tmp;
    $s += 2;
    $hash += $hash >> 11;
    $hash %= $EXP2_32;
  }

  # Handle end case
  if ($rem !=0) {
    $hash += ord($chars[$s]);
    $hash ^= (leftShift($hash, 11)% $EXP2_32);
    $hash += $hash >> 17;
  }

  # Force "avalanching" of final 127 bits
  $hash ^= leftShift($hash, 3);
  $hash += ($hash >> 5);
  $hash = ($hash% $EXP2_32);
  $hash ^= (leftShift($hash, 2)% $EXP2_32);
  $hash += ($hash >> 15);
  $hash = $hash% $EXP2_32;
  $hash ^= (leftShift($hash, 10)% $EXP2_32);
  
  # this avoids ever returning a hash code of 0, since that is used to
  # signal "hash not computed yet", using a value that is likely to be
  # effectively the same as 0 when the low bits are masked
  $hash = 0x80000000  if ($hash == 0);

  return $hash;
}

sub output() {
    if (!$banner) {
        $banner = 1;
        print "// Automatically generated from $file using $0. DO NOT EDIT!\n";
    }

    my $nameEntries = "${name}Values";
    $nameEntries =~ s/:/_/g;

    print "\n#include \"Lookup.h\"\n" if ($includelookup);
    if ($useNameSpace) {
        print "\nnamespace ${useNameSpace} {\n";
        print "\nusing namespace JSC;\n";
    } else {
        print "\nnamespace JSC {\n";
    }
    my $count = scalar @keys + 1;
    print "\nstatic const struct HashTableValue ${nameEntries}\[$count\] = {\n";
    my $i = 0;
    foreach my $key (@keys) {
        my $firstValue = "";
        my $secondValue = "";

        if ($values[$i]{"type"} eq "Function") {
            $firstValue = $values[$i]{"function"};
            $secondValue = $values[$i]{"params"};
        } elsif ($values[$i]{"type"} eq "Property") {
            $firstValue = $values[$i]{"get"};
            $secondValue = $values[$i]{"put"};
        } elsif ($values[$i]{"type"} eq "Lexer") {
            $firstValue = $values[$i]{"value"};
            $secondValue = "0";
        }
        print "   { \"$key\", $attrs[$i], (intptr_t)$firstValue, (intptr_t)$secondValue },\n";
        $i++;
    }
    print "   { 0, 0, 0, 0 }\n";
    print "};\n\n";
    print "extern const struct HashTable $name =\n";
    print "#if ENABLE(PERFECT_HASH_SIZE)\n";
    print "    \{ ", $pefectHashSize - 1, ", $nameEntries, 0 \};\n";
    print "#else\n";
    print "    \{ $compactSize, $compactHashSizeMask, $nameEntries, 0 \};\n";
    print "#endif\n\n";
    print "} // namespace\n";
}