ren [plain text]

#!/bin/bash
#@ This program came from: ftp://ftp.armory.com/pub/scripts/ren
#@ Look there for the latest version.
#@ If you don't find it, look through http://www.armory.com/~ftp/
#
# @(#) ren 2.1.1 2002-03-17
# 1990-06-01 John H. DuBois III (john@armory.com)
# 1991-02-25 Improved help info
# 1992-06-07 Remove quotes from around shell pattern as required by new ksh
# 1994-05-10 Exit if no globbing chars given.
# 1995-01-23 Allow filename set to be given on command line.
# 1997-09-24 1.4 Let [] be used for globbing.  Added x option.
# 1997-11-26 1.4.1 Notice if the sequences of globbing chars aren't the same.
# 1999-05-13 Changed name to ren to avoid conflict with /etc/rename
# 2000-01-01 1.4.2 Let input patterns that contain whitespace be used.
# 2001-02-14 1.5 Better test for whether old & new globbing seqs are identical.
# 2001-02-20 1.6 Added pP options.
# 2001-02-27 1.7 Added qf options.  Improved interpretation of rename patterns.
# 2001-05-10 1.8 Allow multiple pP options.  Added Qr options.
# 2001-07-25 2.0 Added mz options.
# 2001-11-25 2.1 Allow segment ranges to be given with -m.  Work under ksh93.
# 2002-03-17 2.1.1 Fixed bug in test for legal expressions.

# todo: It would be nice to be able to escape metacharacters with '\'
# todo: Should enhance patterns to make ] in a pair of brackets work ([]])
# todo: Allow use of all ksh globbing patterns.
# todo: Allow use of extended regexps, with () to enumerate pieces and \num to
# todo: select them.
#
# Modifications for bash made by Chet Ramey <chet@po.cwru.edu>

name=${0##*/}
Usage="Usage:
$name [-fhqtv] [-m<segstart[:segend]=operation>] [-z<len>] [-[pP]<pattern>] 
    oldpattern [newpattern [filename ...]]
or
$name -r [same options as above] oldpattern newpattern directory ..."
tell=false
verbose=false
warn=true
warnNoFiles=true
debug=false
recurse=false
inclPat=
exclPat=
declare -i inclCt=0 exclCt=0
check=true
declare -i j op_end_seg

# Begin bash additions
shopt -s extglob

#
# ksh print emulation
#
#	print [-Rnprsu[n]] [-f format] [arg ...]
#
#	-	end of options
#	-R	BSD-style -- only accept -n, no escapes
#	-n	do not add trailing newline
#	-p	no-op (no coprocesses)
#	-r	no escapes
#	-s	print to the history file
#	-u n	redirect output to fd n
#	-f format	printf "$format" "$@"
#

print()
{
	local eflag=-e
	local nflag= fflag= c
	local fd=1

	OPTIND=1
	while getopts "fRnprsu:" c
	do
		case $c in
		R)	eflag= ;;
		r)	eflag= ;;
		n)	nflag=-n ;;
		s)	sflag=y ;;
		f)	fflag=y ;;
		u)	fd=$OPTARG ;;
		p)	;;
		esac
	done
	shift $(( $OPTIND - 1 ))

	if [ -n "$fflag" ]; then
		builtin printf "$@" >&$fd
		return
	fi

	case "$sflag" in
	y)	builtin history -s "$*" ;;
	*)	builtin echo $eflag $nflag "$@" >&$fd
	esac
}

# End bash additions

while getopts :htvxp:P:fqQrm:z: opt; do
    case $opt in
    h)
	print -r -- \
"$name: rename files by changing parts of filenames that match a pattern.
$Usage
oldpattern and newpattern are subsets of sh filename patterns; the only
globbing operators (wildcards) allowed are ?, *, and [].  All filenames that
match oldpattern will be renamed with the filename characters that match the
constant (non-globbing) characters of oldpattern changed to the corresponding
constant characters of newpattern.  The characters of the filename that match
the globbing operators of oldpattern will be preserved.  Globbing operators
in oldpattern must occur in the same order in newpattern; for every globbing
operators in newpattern there must be an identical globbing operators in
oldpattern in the same sequence.  Both arguments should be quoted since
globbing operators are special to the shell.  If filenames are given, only
those named are acted on; if not, all filenames that match oldpattern are acted
on.  newpattern is required in all cases except when -m is given and no further
arguments are given.
If you are unsure whether a $name command will do what you intend, issue it
with the -t option first to be sure.
Examples:
$name \"/tmp/foo*.ba.?\" \"/tmp/new*x?\"
    All filenames in /tmp that match foo*.ba.? will have the \"foo\" part
    replaced by \"new\" and the \".ba.\" part replaced by \"x\".
    For example, /tmp/fooblah.ba.baz would be renamed to /tmp/newblahxbaz.
$name \* \*- foo bar baz
    foo, bar, and baz will be renamed to foo-, bar-, and baz-.
$name '????????' '????-??-??'
    All filenames that are 8 characters long will be changed such that dashes
    are inserted after the 4th and 6th characters.
Options:
-h: Print this help.
-r: Recursive operation.  Filenames given on the command line after oldpattern
    and newpattern are taken to be directories to traverse recursively.  For
    each subdirectory found, the specified renaming is applied to any matching
    filenames.  oldpattern and newpattern should not include any directory
    components.
-p<pattern>, -P<pattern>: Act only on filenames that do (if -p is given) or do
    not (if -P is given) match the sh-style filename globbing pattern
    <pattern>.  This further restricts the filenames that are acted on, beyond
    the filename selection produced by oldpattern and the filename list (if
    any).  <pattern> must be quoted to prevent it from being interpreted by the
    shell.  Multiple instances of these options may be given.  In this case,
    filenames are acted on only if they match at least one of the patterns
    given with -p and do not match any of the patterns given with -P.
-m<segstart[:segend]=operation>: For each file being renamed, perform a
    mathematical operation on the string that results from concatenating
    together the filename segments that matched globbing operator numbers
    segstart through segend, where operators are numbered in order of
    occurrence from the left.  For example, in the pattern a?b*c[0-9]f, segment
    1 consists of the character that matched ?, segment 2 consists of the
    character(s) that matched *, and segment 3 consists of the character that
    matched [0-9].  The selected segments are replaced with the result of the
    mathematical operation.
    The concatenated string must consist of characters that can be interpreted
    as a decimal integer; if it does not, the filename is not acted on.  This
    number is assigned to the variable 'i', which can be referenced by the
    operation.  The operations available are those understood by the ksh
    interpreter, which includes most of the operators and syntax of the C
    language.  The original filename segment is replaced by the result of the
    operation.  If -m is used, newpattern may be an empty string or not given
    at all (if no directory/file names are given).  In this case, it is taken
    to be the same as oldpattern.
    If segend is given, any fixed text that occurs in the pattern between the
    starting and ending globbing segments is discarded.  If there are fewer
    globbing segments than segend, no complaint is issued; the string is formed
    from segment segstart through the last segment that does exist.
    If segend is not given, the only segment acted on is startseg.
    Examples:
    $name -m3=i+6 '??*.ppm'
	This is equivalent to:
	$name -m3=i+6 '??*.ppm' '??*.ppm'
	Since the old pattern and new pattern are identical, this would
	normally be a no-op.  But in this case, if a filename of ab079.ppm is
	given, it is changed to ab85.ppm.
    $name '-m1:2=i*2' 'foo??bar'
	This will change a file named foo12bar to foo24bar
    $name '-m1:2=i*2' 'foo?xyz?bar'
	This will also change a file named foo1xyz2bar to foo24bar
-z<len>: Set the size of the number fields that result when -m is used.  The
    field is truncated to the trailing <len> digits or filled out to <len>
    digits with leading zeroes.  In the above example, if -z3 is given, the
    output filename will be ab085.ppm.  
-f: Force rename.  By default, $name will not rename files if a file with the
    new filename already exists.  If -f is given, $name will carry out the
    rename anyway.
-q: Quiet operation.  By default, if -f is given, $name will still notify the
    user if a rename results in replacement of an already-existing filename. 
    If -q is given, no notification is issued.
-Q: Suppress other warnings.  By default, a warning is issued if no files are
    selected for acting upon.  If -Q is given, no warning is issued.
-v: Show the rename commands being executed.
-t: Show what rename commands would be done, but do not carry them out."
	exit 0
	;;
    f)
	check=false
	;;
    q)
	warn=false
	;;
    Q)
	warnNoFiles=false
	;;
    r)
	warnNoFiles=false
	recurse=true
	;;
    t)
	tell=true
	;;
    v)
	verbose=true
	;;
    x)
	verbose=true
	debug=true
	;;
    p)
	inclPats[inclCt]=$OPTARG
	((inclCt+=1))
	;;
    P)
	exclPats[exclCt]=$OPTARG
	((exclCt+=1))
	;;
    m)
	# Store operation for each segment number in ops[num]
	# Store ending segment number in op_end_seg[num]
	range=${OPTARG%%=*}
	op=${OPTARG#*=}
	start=${range%%:*}
	end=${range#*:}
	if [[ "$start" != +([0-9]) || "$start" -eq 0 ]]; then
	    print -ru2 -- "$name: Bad starting segment number given with -m: $start"
	    exit 1
	fi
	if [[ "$end" != +([0-9]) || "$end" -eq 0 ]]; then
	    print -ru2 -- "$name: Bad ending segment number given with -m: $end"
	    exit 1
	fi
	if [[ start -gt end ]]; then
	    print -ru2 -- "$name: Ending segment ($end) is less than starting segment ($start)"
	    exit 1
	fi
	if [[ "$op" != @(|*[!_a-zA-Z0-9])i@(|[!_a-zA-Z0-9]*) ]]; then
	    print -ru2 -- \
	    "$name: Operation given with -m does not reference 'i': $op"
	    exit 1
	fi
	# Test whether operation is legal.  let returns 1 both for error
	# indication and when last expression evaluates to 0, so evaluate 1
	# after test expression.
	i=1
	let "$op" 1 2>/dev/null || {
	    print -ru2 -- \
	    "$name: Bad operation given with -m: $op"
	    exit 1
	}
	ops[start]=$op
	op_end_seg[start]=$end
	;;
    z)
	if [[ "$OPTARG" != +([0-9]) || "$OPTARG" -eq 0 ]]; then
	    print -ru2 -- "$name: Bad length given with -z: $OPTARG"
	    exit 1
	fi
	typeset -Z$OPTARG j || exit 1
	;;
    +?)	# no way to tell getopts to not treat +x as an option
	print -r -u2 "$name: Do not prefix options with '+'."
	exit 1
	;;
    :) 
	print -r -u2 \
"$name: Option -$OPTARG requires a value.
$Usage
Use -h for help."
	exit 1
	;;
    \?) 
	print -r -u2 \
"$name: -$OPTARG: no such option.
$Usage
Use -h for help."
	exit 1
	;;
    esac
done
 
# remove args that were options
let OPTIND=OPTIND-1
shift $OPTIND

oldpat=$1
newpat=$2

# If -m is given, a non-existant or null newpat should be set to oldpat
if [ ${#ops[*]} -gt 0 ]; then
    case $# in
    0)
	;;
    1)
	set -- "$oldpat" "$oldpat"
	newpat=$oldpat
	$debug && print -ru2 -- "Set new pattern to: $newpat"
	;;
    *)
	if [ -z "$newpat" ]; then
	    shift 2
	    set -- "$oldpat" "$oldpat" "$@"
	    newpat=$oldpat
	    $debug && print -ru2 -- "Set new pattern to: $newpat"
	fi
	;;
    esac
fi

# Make sure input patterns that contain whitespace can be expanded properly
IFS=

origPat=$oldpat

# Generate list of filenames to act on.
case $# in
[01])
    print -u2 "$Usage\nUse -h for help."
    exit 1
    ;;
2)
    if $recurse; then
	print -r -u2 "$name: No directory names given with -r.  Use -h for help."
	exit 1
    fi
    set -- $oldpat	# Get list of all filenames that match 1st globbing pattern.
    if [[ ! -a $1 ]]; then
	$warnNoFiles && print -r -- "$name: No filenames match this pattern: $oldpat"
	exit
    fi
    ;;
*)
    shift 2
    ;;
esac

integer patSegNum=1 numPatSegs

# For old ksh
# while [[ "$oldpat" = *'[\*\?]'* ]]; do

# Example oldpat: foo*.a
# Example newpat: bar*.b

# Build list of non-pattern segments and globbing segments found in arguments.
# Note the patterns given are used to get the list of filenames to act on,
# to delimit constant segments, and to determine which parts of filenames are
# to be replaced.
# Examples given for first iteration (in the example, the only iteration)
# The || newpat  is to ensure that new pattern does not have more globbing
# segments than old pattern
while [[ "$oldpat" = *@([\*\?]|\[+([!\]])\])* ||
         "$newpat" = *@([\*\?]|\[+([!\]])\])* ]]; do
    ## Get leftmost globbing pattern in oldpat

    # Make r be oldpat with smallest left piece that includes a globbing
    # pattern removed from it
    r=${oldpat#*@([\*\?]|\[+([!\]])\])}	# r=.a
    # Make pat be oldpat with the above removed from it, leaving smallest
    # left piece that includes a globbing pattern
    pat=${oldpat%%"$r"}			# pat=foo*
    # Make l be pat with the globbing pattern removed from the right,
    # leaving a constant string
    l=${pat%@([\*\?]|\[+([!\]])\])}	# l=foo
    # Remove the constant part of pat from the left, leaving the globbing
    # pattern
    pat=${pat#"$l"}			# pat=*

    # Do the same thing for newpat, solely to provide a reliable test that
    # both oldpat & newpat contain exactly the same sequence of globbing
    # patterns.
    r=${newpat#*@([\*\?]|\[+([!\]])\])}	# r=.b
    npat=${newpat%%"$r"}		# pat=bar*
    l=${npat%@([\*\?]|\[+([!\]])\])}	# l=bar
    npat=${npat#"$l"}			# npat=*

    if [[ "$pat" != "$npat" ]]; then
	print -ru2 -- \
"$name: Old-pattern and new-pattern do not have the same sequence of globbing chars.
Pattern segment $patSegNum: Old pattern: $pat  New pattern: $npat"
	exit 1
    fi

    ## Find parts before & after pattern
    # oldpre[] stores the old constant part before the pattern,
    # so that it can be removed and replaced with the new constant part.
    oldpre[patSegNum]=${oldpat%%"$pat"*}	# oldpre[1]=foo
    # oldsuf stores the part that follows the globbing pattern,
    # so that it too can be removed.
    # After oldpre[] & oldsuf[] have been removed from a filename, what remains
    # is the part matched by the globbing pattern, which is to be retained.
    oldsuf[patSegNum]=${oldpat#*"$pat"}		# oldsuf[1]=.a
    # newpre[] stores the new constant part before the pattern,
    # so that it can be used to replace the old constant part.
    newpre[patSegNum]=${newpat%%"$pat"*}	# newpre[1]=bar
    # Get rid of processed part of patterns
    oldpat=${oldpat#${oldpre[patSegNum]}"$pat"}	# oldpat=.a
    newpat=${newpat#${newpre[patSegNum]}"$pat"}	# newpat=.b
    # Store either * or ? in pats[], depending on whether this segment matches 1
    # or any number of characters.
    [[ "$pat" = \[* ]] && pat=?
    pats[patSegNum]=$pat
    ((patSegNum+=1))
done

if [ patSegNum -eq 1 ]; then
    print -u2 "No globbing chars in pattern."
    exit 1
fi

oldpre[patSegNum]=${oldpat%%"$pat"*}	# oldpre[2]=.a
oldsuf[patSegNum]=${oldpat#*"$pat"}	# oldsuf[2]=.a
newpre[patSegNum]=${newpat%%"$pat"*}	# newpre[2]=.b

numPatSegs=patSegNum

if $debug; then
    patSegNum=1
    while [[ patSegNum -le numPatSegs ]]; do
	print -ru2 -- \
"Old prefix: <${oldpre[patSegNum]}>   Old suffix: <${oldsuf[patSegNum]}>   New prefix: <${newpre[patSegNum]}>   Pattern: <${pats[patSegNum]}>"
	((patSegNum+=1))
    done
fi

# Example filename: foox.a
# Example oldpat: foo*.a
# Example newpat: bar*.b

integer numFiles=0

# Usage: renameFile filename [dirname]
# [dirname] is a directory name to prefix filenames with when they are printed
# for informational purposes.
# Uses globals:
#     inclCt exclCt inclPats[] exclPats[] ops[]
#     numPatSegs oldpre[] oldsuf[] newpre[] pats[]
#     check warn tell verbose name
# Modifies globals: numFiles
function renameFile {
    typeset file=$1 subdir=$2
    integer patSegNum patnum
    typeset origname porigname newfile matchtext pnewfile matchsegs
    integer startseg endseg

    origname=$file	# origname=foox.a
    porigname=$subdir$file
    # Unfortunately, ksh88 does not do a good job of allowing for patterns
    # stored in variables.  Without the conditional expression being eval'ed,
    # only sh patterns are recognized.  If the expression is eval'ed, full
    # ksh expressions can be used, but then expressions that contain whitespace
    # break unless the user passed a pattern with the whitespace properly
    # quoted, which is not intuititive.  This is fixed in ksh93; full patterns
    # work without being eval'ed.
    if [ inclCt -gt 0 ]; then
	patnum=0
	while [ patnum -lt inclCt ]; do
	    [[ "$file" = ${inclPats[patnum]} ]] && break
	    ((patnum+=1))
	done
	if [ patnum -eq inclCt ]; then
	    $debug && print -ru2 -- "Skipping not-included filename '$porigname'"
	    return 1
	fi
    fi
    patnum=0
    while [ patnum -lt exclCt ]; do
	if [[ "$file" = ${exclPats[patnum]} ]]; then
	    $debug && print -ru2 -- "Skipping excluded filename '$porigname'"
	    return 1
	fi
	((patnum+=1))
    done
    # Extract matching segments from filename
    ((numFiles+=1))
    patSegNum=1
    while [[ patSegNum -le numPatSegs ]]; do
	# Remove a fixed prefix		iteration:	1		2
	file=${file#${oldpre[patSegNum]}}			# file=x.a	file=
	# Save the part of this suffix that is to be retained.  To do this, we
	# need to know what part of the suffix matched the current globbing
	# segment.  If the globbing segment is a *, this is done by removing
	# the minimum part of the suffix that matches oldsuf (since * matches
	# the longest segment possible).  If the globbing segment is ? or []
	# (the latter has already been coverted to ?), it is done by taking the
	# next character.
	if [ "${pats[patSegNum]}" == \? ]; then
	    matchtext=${file#?}
	    matchtext=${file%$matchtext}
	else
	    matchtext=${file%${oldsuf[patSegNum]}}		# matchtext=x	matchtext=
	fi
	$debug && print -ru2 -- "Matching segment $patSegNum: $matchtext"
	file=${file#$matchtext}				# file=.a	file=.a

	matchsegs[patSegNum]=$matchtext
	((patSegNum+=1))
    done

    # Paste fixed and matching segments together to form new filename.
    patSegNum=0
    newfile=
    while [[ patSegNum -le numPatSegs ]]; do
	matchtext=${matchsegs[patSegNum]}
	startseg=patSegNum
	if [ -n "${ops[startseg]}" ]; then
	    endseg=${op_end_seg[startseg]}
	    while [ patSegNum -lt endseg ]; do
		((patSegNum+=1))
		matchtext=$matchtext${matchsegs[patSegNum]}
	    done
	    if [[ "$matchtext" != +([-0-9]) ]]; then
		print -ru2 -- \
"Segment(s) $startseg - $endseg ($matchtext) of file '$porigname' do not form an integer; skipping this file."
		return 2
	    fi
	    i=$matchtext
	    let "j=${ops[startseg]}" || {
		print -ru2 -- \
"Operation failed on segment(s) $startseg - $endseg ($matchtext) of file '$file'; skipping this file."
		return 2
	    }
	    $debug && print -ru2 -- "Converted $matchtext to $j"
	    matchtext=$j
	fi
	newfile=$newfile${newpre[startseg]}$matchtext		# newfile=barx	newfile=barx.b
	((patSegNum+=1))
    done

    pnewfile=$subdir$newfile
    if $check && [ -e "$newfile" ]; then
	$warn &&
	print -ru2 -- "$name: Not renaming \"$porigname\"; destination filename \"$pnewfile\" already exists."
	return 2
    fi
    if $tell; then
	print -n -r -- "Would move: $porigname -> $pnewfile"
	$warn && [ -e "$newfile" ] && print -n -r " (destination filename already exists; would replace it)"
	print ""
    else
	if $verbose; then
	    print -n -r -- "Moving: $porigname -> $pnewfile"
	    $warn && [ -e "$newfile" ] && print -n -r -- " (replacing old destination filename \"$pnewfile\")"
	    print ""
	elif $warn && [ -e "$newfile" ]; then
	    print -r -- "$name: Note: Replacing old file \"$pnewfile\""
	fi
	mv -f -- "$origname" "$newfile"
    fi
}

if $recurse; then
    oPWD=$PWD
    find "$@" -depth -type d ! -name '*
*' -print | while read dir; do
	cd -- "$oPWD"
	if cd -- "$dir"; then
	    for file in $origPat; do
		renameFile "$file" "$dir/"
	    done
	else
	    print -ru2 -- "$name: Could not access directory '$dir' - skipped."
	fi
    done
else
    for file; do
	renameFile "$file"
    done
fi

if [ numFiles -eq 0 ]; then
    $warnNoFiles && print -ru2 -- \
    "$name: All filenames were excluded by patterns given with -p or -P."
fi