svn-bdb-view.py [plain text]

#!/usr/bin/env python
#
# This is a pretty-printer for subversion BDB repository databases.
#

import sys, os, re, codecs, textwrap
import skel, svnfs

# Parse arguments
if len(sys.argv) == 2:
  dbhome = os.path.join(sys.argv[1], 'db')
  if not os.path.exists(dbhome):
    sys.stderr.write("%s: '%s' is not a valid svn repository\n" %
        (sys.argv[0], dbhome))
    sys.exit(1)
else:
  sys.stderr.write("Usage: %s <svn-repository>\n" % sys.argv[0])
  sys.exit(1)

# Helper Classes
class RepositoryProblem(Exception):
  pass

# Helper Functions
def ok(bool, comment):
  if not bool:
    raise RepositoryProblem(text)

# Helper Data
opmap = {
  'add': 'A',
  'modify': 'M',
  'delete': 'D',
  'replace': 'R',
  'reset': 'X',
}

# Analysis Modules
def am_uuid(ctx):
  "uuids"
  db = ctx.uuids_db
  ok(list(db.keys()) == [1], 'uuid Table Structure')
  ok(re.match(r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
    db[1]), 'UUID format')
  print("Repos UUID: %s" % db[1])

def am_revisions(ctx):
  "revisions"
  cur = ctx.revs_db.cursor()
  try:
    rec = cur.first()
    ctx.txn2rev = txn2rev = {}
    prevrevnum = -1
    while rec:
      rev = skel.Rev(rec[1])
      revnum = rec[0] - 1
      print("r%d: txn %s%s" % (revnum, rev.txn,
          (rev.txn not in ctx.txns_db) and "*** MISSING TXN ***" or ""))
      ok(rev.txn not in txn2rev, 'Multiple revs bound to same txn')
      txn2rev[rev.txn] = revnum
      rec = cur.next()
  finally:
    cur.close()

def am_changes(ctx):
  "changes"
  cur = ctx.changes_db.cursor()
  try:
    current_txnid_len = 0
    maximum_txnid_len = 0
    while current_txnid_len <= maximum_txnid_len:
      current_txnid_len += 1
      rec = cur.first()
      prevtxn = None
      while rec:
        if len(rec[0]) != current_txnid_len:
          rec = cur.next()
          continue
        ch = skel.Change(rec[1])
        lead = "txn %s:" % rec[0]
        if prevtxn == rec[0]:
          lead = " " * len(lead)
        print("%s %s %s %s %s %s%s" % (lead, opmap[ch.kind], ch.path, ch.node,
            ch.textmod and "T" or "-", ch.propmod and "P" or "-",
            (ch.node not in ctx.nodes_db) \
                and "*** MISSING NODE ***" or ""))
        prevtxn = rec[0]
        if len(rec[0]) > maximum_txnid_len:
          maximum_txnid_len = len(rec[0])
        rec = cur.next()
  finally:
    cur.close()

def am_copies(ctx):
  "copies"
  cur = ctx.copies_db.cursor()
  try:
    print("next-key: %s" % ctx.copies_db['next-key'])
    rec = cur.first()
    while rec:
      if rec[0] != 'next-key':
        cp = skel.Copy(rec[1])
        destnode = ctx.nodes_db.get(cp.destnode)
        if not destnode:
          destpath = "*** MISSING NODE ***"
        else:
          destpath = skel.Node(destnode).createpath
        print("cpy %s: %s %s @txn %s to %s (%s)" % (rec[0],
            {'copy':'C','soft-copy':'S'}[cp.kind], cp.srcpath or "-",
            cp.srctxn or "-", cp.destnode, destpath))
      rec = cur.next()
  finally:
    cur.close()

def am_txns(ctx):
  "transactions"
  cur = ctx.txns_db.cursor()
  try:
    print("next-key: %s" % ctx.txns_db['next-key'])
    length = 1
    found_some = True
    while found_some:
      found_some = False
      rec = cur.first()
      while rec:
        if rec[0] != 'next-key' and len(rec[0]) == length:
          found_some = True
          txn = skel.Txn(rec[1])
          if txn.kind == "committed":
            label = "r%s" % txn.rev
            ok(ctx.txn2rev[rec[0]] == int(txn.rev), 'Txn->rev not <-txn')
          else:
            label = "%s based-on %s" % (txn.kind, txn.basenode)
          print("txn %s: %s root-node %s props %d copies %s" % (rec[0],
              label, txn.rootnode, len(txn.proplist) / 2, ",".join(txn.copies)))
        rec = cur.next()
      length += 1
  finally:
    cur.close()

def am_nodes(ctx):
  "nodes"
  cur = ctx.nodes_db.cursor()
  try:
    print("next-key: %s" % ctx.txns_db['next-key'])
    rec = cur.first()
    data = {}
    while rec:
      if rec[0] == 'next-key':
        rec = cur.next()
        continue
      nd = skel.Node(rec[1])
      nid,cid,tid = rec[0].split(".")
      data[tid.rjust(20)+nd.createpath] = (rec[0], nd)
      rec = cur.next()
    k = sorted(data.keys())
    reptype = {"fulltext":"F", "delta":"D"}
    for i in k:
      nd = data[i][1]
      prkind = drkind = " "
      if nd.proprep:
        try:
          rep = skel.Rep(ctx.reps_db[nd.proprep])
          prkind = reptype[rep.kind]
          if nd.proprep in ctx.bad_reps:
            prkind += " *** BAD ***"
        except KeyError:
          prkind = "*** MISSING ***"
      if nd.datarep:
        try:
          rep = skel.Rep(ctx.reps_db[nd.datarep])
          drkind = reptype[rep.kind]
          if nd.datarep in ctx.bad_reps:
            drkind += " *** BAD ***"
        except KeyError:
          drkind = "*** MISSING ***"
      stringdata = "%s: %s %s pred %s count %s prop %s %s data %s %s edit %s" \
          % ( data[i][0], {"file":"F", "dir":"D"}[nd.kind], nd.createpath,
          nd.prednode or "-", nd.predcount, prkind, nd.proprep or "-",
          drkind, nd.datarep or "-", nd.editrep or "-")
      if nd.createpath == "/":
        print("")
      print(stringdata)
  finally:
    cur.close()

def get_string(ctx, id):
  try:
    return ctx.get_whole_string(id)
  except DbNotFoundError:
    return "*** MISSING STRING ***"

def am_reps(ctx):
  "representations"
  ctx.bad_reps = {}
  cur = ctx.reps_db.cursor()
  try:
    print("next-key: %s" % ctx.txns_db['next-key'])
    rec = cur.first()
    while rec:
      if rec[0] != 'next-key':
        rep = skel.Rep(rec[1])
        lead = "rep %s: txn %s: %s %s " % (rec[0], rep.txn, rep.cksumtype,
            codecs.getencoder('hex_codec')(rep.cksum)[0])
        if rep.kind == "fulltext":
          note = ""
          if rep.str not in ctx.strings_db:
            note = " *MISS*"
            ctx.bad_reps[rec[0]] = None
          print(lead+("fulltext str %s%s" % (rep.str, note)))
          if ctx.verbose:
            print(textwrap.fill(get_string(ctx, rep.str), initial_indent="  ",
                subsequent_indent="  ", width=78))
        elif rep.kind == "delta":
          print(lead+("delta of %s window%s" % (len(rep.windows),
            len(rep.windows) != 1 and "s" or "")))
          for window in rep.windows:
            noterep = notestr = ""
            if window.vs_rep not in ctx.reps_db:
              noterep = " *MISS*"
              ctx.bad_reps[rec[0]] = None
            if window.str not in ctx.strings_db:
              notestr = " *MISS*"
              ctx.bad_reps[rec[0]] = None
            print("\toff %s len %s vs-rep %s%s str %s%s" % (window.offset,
                window.size, window.vs_rep, noterep, window.str, notestr))
        else:
          print(lead+"*** UNKNOWN REPRESENTATION TYPE ***")
      rec = cur.next()
  finally:
    cur.close()


def am_stringsize(ctx):
  "string size"
  if not ctx.verbose:
    return
  cur = ctx.strings_db.cursor()
  try:
    rec = cur.first()
    size = 0
    while rec:
      size = size + len(rec[1] or "")
      rec = cur.next()
    print("%s %s %s" % (size, size/1024.0, size/1024.0/1024.0))
  finally:
    cur.close()

modules = (
    am_uuid,
    am_revisions,
    am_changes,
    am_copies,
    am_txns,
    am_reps,
    am_nodes,
    # Takes too long: am_stringsize,
    )

def main():
  print("Repository View for '%s'" % dbhome)
  print("")
  ctx = svnfs.Ctx(dbhome, readonly=1)
  # Stash process state in a library data structure. Yuck!
  ctx.verbose = 0
  try:
    for am in modules:
      print("MODULE: %s" % am.__doc__)
      am(ctx)
      print("")
  finally:
    ctx.close()

if __name__ == '__main__':
  main()