git-common-hash   [plain text]


#!/bin/sh

# This script displays the distribution of longest common hash prefixes.
# This can be used to determine the minimum prefix length to use
# for object names to be unique.

git rev-list --objects --all | sort | perl -lne '
  substr($_, 40) = "";
  # uncomment next line for a distribution of bits instead of hex chars
  # $_ = unpack("B*",pack("H*",$_));
  if (defined $p) {
    ($p ^ $_) =~ /^(\0*)/;
    $common = length $1;
    if (defined $pcommon) {
      $count[$pcommon > $common ? $pcommon : $common]++;
    } else {
      $count[$common]++; # first item
    }
  }
  $p = $_;
  $pcommon = $common;
  END {
    $count[$common]++; # last item
    print "$_: $count[$_]" for 0..$#count;
  }
'