svn_export_empty_files.py   [plain text]


#!/usr/bin/env python

# Copyright (c) 2005 Sony Pictures Imageworks Inc.  All rights reserved.
#
# This software/script is free software; you may redistribute it
# and/or modify it under the terms of Version 2 or later of the GNU
# General Public License ("GPL") as published by the Free Software
# Foundation.
#
# This software/script is distributed "AS IS," WITHOUT ANY EXPRESS OR
# IMPLIED WARRANTIES OR REPRESENTATIONS OF ANY KIND WHATSOEVER,
# including without any implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE.  See the GNU GPL (Version 2 or later) for
# details and license obligations.

"""
Script to "export" from a Subversion repository a clean directory tree
of empty files instead of the content contained in those files in the
repository.  The directory tree will also omit the .svn directories.

The export is done from the repository specified by URL at HEAD into
PATH.  If PATH is omitted, the last components of the URL is used for
the local directory name.  If the --delete command line option is
given, then files and directories in PATH that do not exist in the
Subversion repository are deleted.

As Subversion does not have any built-in tools to help locate files
and directories, in extremely large repositories it can be hard to
find what you are looking for.  This script was written to create a
smaller non-working working copy that can be crawled with find or
find's locate utility to make it easier to find files.

$HeadURL: http://svn.collab.net/repos/svn/branches/1.6.x/contrib/client-side/svn_export_empty_files.py $
$LastChangedRevision: 28522 $
$LastChangedDate: 2007-12-17 22:48:44 +0000 (Mon, 17 Dec 2007) $
$LastChangedBy: epg $
"""

import getopt
try:
    my_getopt = getopt.gnu_getopt
except AttributeError:
    my_getopt = getopt.getopt
import os
import sys

import svn.client
import svn.core

class context:
    """A container for holding process context."""

def recursive_delete(dirname):
    """Recursively delete the given directory name."""

    for filename in os.listdir(dirname):
        file_or_dir = os.path.join(dirname, filename)
        if os.path.isdir(file_or_dir) and not os.path.islink(file_or_dir):
            recursive_delete(file_or_dir)
        else:
            os.unlink(file_or_dir)
    os.rmdir(dirname)

def check_url_for_export(ctx, url, revision, client_ctx):
    """Given a URL to a Subversion repository, check that the URL is
    in the repository and that it refers to a directory and not a
    non-directory."""

    # Try to do a listing on the URL to see if the repository can be
    # contacted.  Do not catch failures here, as they imply that there
    # is something wrong with the given URL.
    try:
        if ctx.verbose:
            print "Trying to list '%s'" % url
        svn.client.ls(url, revision, 0, client_ctx)

        # Given a URL, the ls command does not tell you if
        # you have a directory or a non-directory, so try doing a
        # listing on the parent URL.  If the listing on the parent URL
        # fails, then assume that the given URL was the top of the
        # repository and hence a directory.
        try:
            last_slash_index = url.rindex('/')
        except ValueError:
            print "Cannot find a / in the URL '%s'" % url
            return False

        parent_url = url[:last_slash_index]
        path_name = url[last_slash_index+1:]

        try:
            if ctx.verbose:
                print "Trying to list '%s'" % parent_url
            remote_ls = svn.client.ls(parent_url,
                                      revision,
                                      0,
                                      client_ctx)
        except svn.core.SubversionException:
            if ctx.verbose:
                print "Listing of '%s' failed, assuming URL is top of repos" \
                      % parent_url
            return True

        try:
            path_info = remote_ls[path_name]
        except ValueError:
            print "Able to ls '%s' but '%s' not in ls of '%s'" \
                  % (url, path_name, parent_url)
            return False

        if svn.core.svn_node_dir != path_info.kind:
            if ctx.verbose:
                print "The URL '%s' is not a directory" % url
            return False
        else:
            if ctx.verbose:
                print "The URL '%s' is a directory" % url
            return True
    finally:
        pass

LOCAL_PATH_DIR = 'Directory'
LOCAL_PATH_NON_DIR = 'Non-directory'
LOCAL_PATH_NONE = 'Nonexistent'
def get_local_path_kind(pathname):
    """Determine if there is a path in the filesystem and if the path
    is a directory or non-directory."""

    try:
        os.stat(pathname)
        if os.path.isdir(pathname):
            status = LOCAL_PATH_DIR
        else:
            status = LOCAL_PATH_NON_DIR
    except OSError:
        status = LOCAL_PATH_NONE

    return status

def synchronize_dir(ctx, url, dir_name, revision, client_ctx):
    """Synchronize a directory given by a URL to a Subversion
    repository with a local directory located by the dir_name
    argument."""

    status = True

    # Determine if there is a path in the filesystem and if the path
    # is a directory or non-directory.
    local_path_kind = get_local_path_kind(dir_name)

    # If the path on the local filesystem is not a directory, then
    # delete it if deletes are enabled, otherwise return.
    if LOCAL_PATH_NON_DIR == local_path_kind:
        msg = ("'%s' which is a local non-directory but remotely a " +
               "directory") % dir_name
        if ctx.delete_local_paths:
            print "Removing", msg
            os.unlink(dir_name)
            local_path_kind = LOCAL_PATH_NONE
        else:
            print "Need to remove", msg
            ctx.delete_needed = True
            return False

    if LOCAL_PATH_NONE == local_path_kind:
        print "Creating directory '%s'" % dir_name
        os.mkdir(dir_name)

    remote_ls = svn.client.ls(url,
                              revision,
                              0,
                              client_ctx)

    if ctx.verbose:
        print "Syncing '%s' to '%s'" % (url, dir_name)

    remote_pathnames = remote_ls.keys()
    remote_pathnames.sort()

    local_pathnames = os.listdir(dir_name)

    for remote_pathname in remote_pathnames:
        # For each name in the remote list, remove it from the local
        # list so that the remaining names may be deleted.
        try:
            local_pathnames.remove(remote_pathname)
        except ValueError:
            pass

        full_remote_pathname = os.path.join(dir_name, remote_pathname)

        if remote_pathname in ctx.ignore_names or \
               full_remote_pathname in ctx.ignore_paths:
            print "Skipping '%s'" % full_remote_pathname
            continue

        # Get the remote path kind.
        remote_path_kind = remote_ls[remote_pathname].kind

        # If the remote path is a directory, then recursively handle
        # that here.
        if svn.core.svn_node_dir == remote_path_kind:
            s = synchronize_dir(ctx,
                                os.path.join(url, remote_pathname),
                                full_remote_pathname,
                                revision,
                                client_ctx)
            status &= s

        else:
            # Determine if there is a path in the filesystem and if
            # the path is a directory or non-directory.
            local_path_kind = get_local_path_kind(full_remote_pathname)

            # If the path exists on the local filesystem but its kind
            # does not match the kind in the Subversion repository,
            # then either remove it if the local paths should be
            # deleted or continue to the next path if deletes should
            # not be done.
            if LOCAL_PATH_DIR == local_path_kind:
                msg = ("'%s' which is a local directory but remotely a " +
                       "non-directory") % full_remote_pathname
                if ctx.delete_local_paths:
                    print "Removing", msg
                    recursive_delete(full_remote_pathname)
                    local_path_kind = LOCAL_PATH_NONE
                else:
                    print "Need to remove", msg
                    ctx.delete_needed = True
                    continue

            if LOCAL_PATH_NONE == local_path_kind:
                print "Creating file '%s'" % full_remote_pathname
                f = file(full_remote_pathname, 'w')
                f.close()

    # Any remaining local paths should be removed.
    local_pathnames.sort()
    for local_pathname in local_pathnames:
        full_local_pathname = os.path.join(dir_name, local_pathname)
        if os.path.isdir(full_local_pathname):
            if ctx.delete_local_paths:
                print "Removing directory '%s'" % full_local_pathname
                recursive_delete(full_local_pathname)
            else:
                print "Need to remove directory '%s'" % full_local_pathname
                ctx.delete_needed = True
        else:
            if ctx.delete_local_paths:
                print "Removing file '%s'" % full_local_pathname
                os.unlink(full_local_pathname)
            else:
                print "Need to remove file '%s'" % full_local_pathname
                ctx.delete_needed = True

    return status

def main(ctx, url, export_pathname):
    # Create a client context to run all Subversion client commands
    # with.
    client_ctx = svn.client.create_context()

    # Give the client context baton a suite of authentication
    # providers.
    providers = [
        svn.client.get_simple_provider(),
        svn.client.get_ssl_client_cert_file_provider(),
        svn.client.get_ssl_client_cert_pw_file_provider(),
        svn.client.get_ssl_server_trust_file_provider(),
        svn.client.get_username_provider(),
        ]
    client_ctx.auth_baton = svn.core.svn_auth_open(providers)

    # Load the configuration information from the configuration files.
    client_ctx.config = svn.core.svn_config_get_config(None)

    # Use the HEAD revision to check out.
    head_revision = svn.core.svn_opt_revision_t()
    head_revision.kind = svn.core.svn_opt_revision_head

    # Check that the URL refers to a directory in the repository and
    # not non-directory (file, special, etc).
    status = check_url_for_export(ctx, url, head_revision, client_ctx)
    if not status:
        return 1

    # Synchronize the current working directory with the given URL and
    # descend recursively into the repository.
    status = synchronize_dir(ctx,
                             url,
                             export_pathname,
                             head_revision,
                             client_ctx)

    if ctx.delete_needed:
        print "There are files and directories in the local filesystem"
        print "that do not exist in the Subversion repository that were"
        print "not deleted.  ",
        if ctx.delete_needed:
            print "Please pass the --delete command line option"
            print "to have this script delete those files and directories."
        else:
            print ""

    if status:
        return 0
    else:
        return 1

def usage(verbose_usage):
    message1 = \
"""usage: %s [options] URL [PATH]
Options include
    --delete           delete files and directories that don't exist in repos
    -h (--help)        show this message
    -n (--name) arg    add arg to the list of file or dir names to ignore
    -p (--path) arg    add arg to the list of file or dir paths to ignore
    -v (--verbose)     be verbose in output"""

    message2 = \
"""Script to "export" from a Subversion repository a clean directory tree
of empty files instead of the content contained in those files in the
repository.  The directory tree will also omit the .svn directories.

The export is done from the repository specified by URL at HEAD into
PATH.  If PATH is omitted, the last components of the URL is used for
the local directory name.  If the --delete command line option is
given, then files and directories in PATH that do not exist in the
Subversion repository are deleted.

As Subversion does have any built-in tools to help locate files and
directories, in extremely large repositories it can be hard to find
what you are looking for.  This script was written to create a smaller
non-working working copy that can be crawled with find or find's
locate utility to make it easier to find files."""

    print >>sys.stderr, message1 % sys.argv[0]
    if verbose_usage:
        print >>sys.stderr, message2
    sys.exit(1)

if __name__ == '__main__':
    ctx = context()

    # Context storing command line options settings.
    ctx.delete_local_paths = False
    ctx.ignore_names = []
    ctx.ignore_paths = []
    ctx.verbose = False

    # Context storing state from running the sync.
    ctx.delete_needed = False

    try:
        opts, args = my_getopt(sys.argv[1:],
                               'hn:p:v',
                               ['delete',
                                'help',
                                'name=',
                                'path=',
                                'verbose'
                                ])
    except getopt.GetoptError:
        usage(False)
    if len(args) < 1 or len(args) > 2:
        print >>sys.stderr, "Incorrect number of arguments"
        usage(False)

    for o, a in opts:
        if o in ('--delete',):
            ctx.delete_local_paths = True
            continue
        if o in ('-h', '--help'):
            usage(True)
            continue
        if o in ('-n', '--name'):
            ctx.ignore_names += [a]
            continue
        if o in ('-p', '--path'):
            ctx.ignore_paths += [a]
            continue
        if o in ('-v', '--verbose'):
            ctx.verbose = True
            continue

    # Get the URL to export and remove any trailing /'s from it.
    url = args[0]
    args = args[1:]
    while url[-1] == '/':
        url = url[:-1]

    # Get the local path to export into.
    if args:
        export_pathname = args[0]
        args = args[1:]
    else:
        try:
            last_slash_index = url.rindex('/')
        except ValueError:
            print >>sys.stderr, "Cannot find a / in the URL '%s'" % url
            usage(False)
        export_pathname = url[last_slash_index+1:]

    sys.exit(main(ctx, url, export_pathname))