svn_export_empty_files.py [plain text]
"""
Script to "export" from a Subversion repository a clean directory tree
of empty files instead of the content contained in those files in the
repository. The directory tree will also omit the .svn directories.
The export is done from the repository specified by URL at HEAD into
PATH. If PATH is omitted, the last components of the URL is used for
the local directory name. If the --delete command line option is
given, then files and directories in PATH that do not exist in the
Subversion repository are deleted.
As Subversion does not have any built-in tools to help locate files
and directories, in extremely large repositories it can be hard to
find what you are looking for. This script was written to create a
smaller non-working working copy that can be crawled with find or
find's locate utility to make it easier to find files.
$HeadURL: http://svn.collab.net/repos/svn/branches/1.6.x/contrib/client-side/svn_export_empty_files.py $
$LastChangedRevision: 28522 $
$LastChangedDate: 2007-12-17 22:48:44 +0000 (Mon, 17 Dec 2007) $
$LastChangedBy: epg $
"""
import getopt
try:
my_getopt = getopt.gnu_getopt
except AttributeError:
my_getopt = getopt.getopt
import os
import sys
import svn.client
import svn.core
class context:
"""A container for holding process context."""
def recursive_delete(dirname):
"""Recursively delete the given directory name."""
for filename in os.listdir(dirname):
file_or_dir = os.path.join(dirname, filename)
if os.path.isdir(file_or_dir) and not os.path.islink(file_or_dir):
recursive_delete(file_or_dir)
else:
os.unlink(file_or_dir)
os.rmdir(dirname)
def check_url_for_export(ctx, url, revision, client_ctx):
"""Given a URL to a Subversion repository, check that the URL is
in the repository and that it refers to a directory and not a
non-directory."""
try:
if ctx.verbose:
print "Trying to list '%s'" % url
svn.client.ls(url, revision, 0, client_ctx)
try:
last_slash_index = url.rindex('/')
except ValueError:
print "Cannot find a / in the URL '%s'" % url
return False
parent_url = url[:last_slash_index]
path_name = url[last_slash_index+1:]
try:
if ctx.verbose:
print "Trying to list '%s'" % parent_url
remote_ls = svn.client.ls(parent_url,
revision,
0,
client_ctx)
except svn.core.SubversionException:
if ctx.verbose:
print "Listing of '%s' failed, assuming URL is top of repos" \
% parent_url
return True
try:
path_info = remote_ls[path_name]
except ValueError:
print "Able to ls '%s' but '%s' not in ls of '%s'" \
% (url, path_name, parent_url)
return False
if svn.core.svn_node_dir != path_info.kind:
if ctx.verbose:
print "The URL '%s' is not a directory" % url
return False
else:
if ctx.verbose:
print "The URL '%s' is a directory" % url
return True
finally:
pass
LOCAL_PATH_DIR = 'Directory'
LOCAL_PATH_NON_DIR = 'Non-directory'
LOCAL_PATH_NONE = 'Nonexistent'
def get_local_path_kind(pathname):
"""Determine if there is a path in the filesystem and if the path
is a directory or non-directory."""
try:
os.stat(pathname)
if os.path.isdir(pathname):
status = LOCAL_PATH_DIR
else:
status = LOCAL_PATH_NON_DIR
except OSError:
status = LOCAL_PATH_NONE
return status
def synchronize_dir(ctx, url, dir_name, revision, client_ctx):
"""Synchronize a directory given by a URL to a Subversion
repository with a local directory located by the dir_name
argument."""
status = True
local_path_kind = get_local_path_kind(dir_name)
if LOCAL_PATH_NON_DIR == local_path_kind:
msg = ("'%s' which is a local non-directory but remotely a " +
"directory") % dir_name
if ctx.delete_local_paths:
print "Removing", msg
os.unlink(dir_name)
local_path_kind = LOCAL_PATH_NONE
else:
print "Need to remove", msg
ctx.delete_needed = True
return False
if LOCAL_PATH_NONE == local_path_kind:
print "Creating directory '%s'" % dir_name
os.mkdir(dir_name)
remote_ls = svn.client.ls(url,
revision,
0,
client_ctx)
if ctx.verbose:
print "Syncing '%s' to '%s'" % (url, dir_name)
remote_pathnames = remote_ls.keys()
remote_pathnames.sort()
local_pathnames = os.listdir(dir_name)
for remote_pathname in remote_pathnames:
try:
local_pathnames.remove(remote_pathname)
except ValueError:
pass
full_remote_pathname = os.path.join(dir_name, remote_pathname)
if remote_pathname in ctx.ignore_names or \
full_remote_pathname in ctx.ignore_paths:
print "Skipping '%s'" % full_remote_pathname
continue
remote_path_kind = remote_ls[remote_pathname].kind
if svn.core.svn_node_dir == remote_path_kind:
s = synchronize_dir(ctx,
os.path.join(url, remote_pathname),
full_remote_pathname,
revision,
client_ctx)
status &= s
else:
local_path_kind = get_local_path_kind(full_remote_pathname)
if LOCAL_PATH_DIR == local_path_kind:
msg = ("'%s' which is a local directory but remotely a " +
"non-directory") % full_remote_pathname
if ctx.delete_local_paths:
print "Removing", msg
recursive_delete(full_remote_pathname)
local_path_kind = LOCAL_PATH_NONE
else:
print "Need to remove", msg
ctx.delete_needed = True
continue
if LOCAL_PATH_NONE == local_path_kind:
print "Creating file '%s'" % full_remote_pathname
f = file(full_remote_pathname, 'w')
f.close()
local_pathnames.sort()
for local_pathname in local_pathnames:
full_local_pathname = os.path.join(dir_name, local_pathname)
if os.path.isdir(full_local_pathname):
if ctx.delete_local_paths:
print "Removing directory '%s'" % full_local_pathname
recursive_delete(full_local_pathname)
else:
print "Need to remove directory '%s'" % full_local_pathname
ctx.delete_needed = True
else:
if ctx.delete_local_paths:
print "Removing file '%s'" % full_local_pathname
os.unlink(full_local_pathname)
else:
print "Need to remove file '%s'" % full_local_pathname
ctx.delete_needed = True
return status
def main(ctx, url, export_pathname):
client_ctx = svn.client.create_context()
providers = [
svn.client.get_simple_provider(),
svn.client.get_ssl_client_cert_file_provider(),
svn.client.get_ssl_client_cert_pw_file_provider(),
svn.client.get_ssl_server_trust_file_provider(),
svn.client.get_username_provider(),
]
client_ctx.auth_baton = svn.core.svn_auth_open(providers)
client_ctx.config = svn.core.svn_config_get_config(None)
head_revision = svn.core.svn_opt_revision_t()
head_revision.kind = svn.core.svn_opt_revision_head
status = check_url_for_export(ctx, url, head_revision, client_ctx)
if not status:
return 1
status = synchronize_dir(ctx,
url,
export_pathname,
head_revision,
client_ctx)
if ctx.delete_needed:
print "There are files and directories in the local filesystem"
print "that do not exist in the Subversion repository that were"
print "not deleted. ",
if ctx.delete_needed:
print "Please pass the --delete command line option"
print "to have this script delete those files and directories."
else:
print ""
if status:
return 0
else:
return 1
def usage(verbose_usage):
message1 = \
"""usage: %s [options] URL [PATH]
Options include
--delete delete files and directories that don't exist in repos
-h (--help) show this message
-n (--name) arg add arg to the list of file or dir names to ignore
-p (--path) arg add arg to the list of file or dir paths to ignore
-v (--verbose) be verbose in output"""
message2 = \
"""Script to "export" from a Subversion repository a clean directory tree
of empty files instead of the content contained in those files in the
repository. The directory tree will also omit the .svn directories.
The export is done from the repository specified by URL at HEAD into
PATH. If PATH is omitted, the last components of the URL is used for
the local directory name. If the --delete command line option is
given, then files and directories in PATH that do not exist in the
Subversion repository are deleted.
As Subversion does have any built-in tools to help locate files and
directories, in extremely large repositories it can be hard to find
what you are looking for. This script was written to create a smaller
non-working working copy that can be crawled with find or find's
locate utility to make it easier to find files."""
print >>sys.stderr, message1 % sys.argv[0]
if verbose_usage:
print >>sys.stderr, message2
sys.exit(1)
if __name__ == '__main__':
ctx = context()
ctx.delete_local_paths = False
ctx.ignore_names = []
ctx.ignore_paths = []
ctx.verbose = False
ctx.delete_needed = False
try:
opts, args = my_getopt(sys.argv[1:],
'hn:p:v',
['delete',
'help',
'name=',
'path=',
'verbose'
])
except getopt.GetoptError:
usage(False)
if len(args) < 1 or len(args) > 2:
print >>sys.stderr, "Incorrect number of arguments"
usage(False)
for o, a in opts:
if o in ('--delete',):
ctx.delete_local_paths = True
continue
if o in ('-h', '--help'):
usage(True)
continue
if o in ('-n', '--name'):
ctx.ignore_names += [a]
continue
if o in ('-p', '--path'):
ctx.ignore_paths += [a]
continue
if o in ('-v', '--verbose'):
ctx.verbose = True
continue
url = args[0]
args = args[1:]
while url[-1] == '/':
url = url[:-1]
if args:
export_pathname = args[0]
args = args[1:]
else:
try:
last_slash_index = url.rindex('/')
except ValueError:
print >>sys.stderr, "Cannot find a / in the URL '%s'" % url
usage(False)
export_pathname = url[last_slash_index+1:]
sys.exit(main(ctx, url, export_pathname))