pattern.c   [plain text]


/*
 * pattern.c - pattern matching
 *
 * This file is part of zsh, the Z shell.
 *
 * Copyright (c) 1999 Peter Stephenson
 * All rights reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and to distribute modified versions of this software for any
 * purpose, provided that the above copyright notice and the following
 * two paragraphs appear in all copies of this software.
 *
 * In no event shall Peter Stephenson or the Zsh Development Group be liable
 * to any party for direct, indirect, special, incidental, or consequential
 * damages arising out of the use of this software and its documentation,
 * even if Peter Stephenson and the Zsh Development Group have been advised of
 * the possibility of such damage.
 *
 * Peter Stephenson and the Zsh Development Group specifically disclaim any
 * warranties, including, but not limited to, the implied warranties of
 * merchantability and fitness for a particular purpose.  The software
 * provided hereunder is on an "as is" basis, and Peter Stephenson and the
 * Zsh Development Group have no obligation to provide maintenance,
 * support, updates, enhancements, or modifications.
 *
 * Pattern matching code derived from the regexp library by Henry
 * Spencer, which has the following copyright.
 *
 *	Copyright (c) 1986 by University of Toronto.
 *	Written by Henry Spencer.  Not derived from licensed software.
 *
 *	Permission is granted to anyone to use this software for any
 *	purpose on any computer system, and to redistribute it freely,
 *	subject to the following restrictions:
 *
 *	1. The author is not responsible for the consequences of use of
 *		this software, no matter how awful, even if they arise
 *		from defects in it.
 *
 *	2. The origin of this software must not be misrepresented, either
 *		by explicit claim or by omission.
 *
 *	3. Altered versions must be plainly marked as such, and must not
 *		be misrepresented as being the original software.
 *
 * Eagle-eyed readers will notice this is an altered version.  Incredibly
 * sharp-eyed readers might even find bits that weren't altered.
 *
 *
 *      And I experienced a sense that, like certain regular
 *      expressions, seemed to match the day from beginning to end, so
 *      that I did not need to identify the parenthesised subexpression
 *      that told of dawn, nor the group of characters that indicated
 *      the moment when my grandfather returned home with news of
 *      Swann's departure for Paris; and the whole length of the month
 *      of May, as if matched by a closure, fitted into the buffer of my
 *      life with no sign of overflowing, turning the days, like a
 *      procession of insects that could consist of this or that
 *      species, into a random and unstructured repetition of different
 *      sequences, anchored from the first day of the month to the last
 *      in the same fashion as the weeks when I knew I would not see
 *      Gilberte and would search in vain for any occurrences of the
 *      string in the avenue of hawthorns by Tansonville, without my
 *      having to delimit explicitly the start or finish of the pattern.
 *
 *                                 M. Proust, "In Search of Lost Files",
 *                                 bk I, "The Walk by Bourne's Place".
 */

#include "zsh.mdh"

/*
 * The following union is used mostly for alignment purposes.
 * Normal nodes are longs, while certain nodes take a char * as an argument;
 * here we make sure that they both work out to the same length.
 * The compiled regexp we construct consists of upats stuck together;
 * anything else to be added (strings, numbers) is stuck after and
 * then aligned to a whole number of upat units.
 *
 * Note also that offsets are in terms of the sizes of these things.
 */
union upat {
    long l;
    unsigned char *p;
};

typedef union upat *Upat;

#include "pattern.pro"

/* Number of active parenthesized expressions allowed in backreferencing */
#define NSUBEXP  9

/* definition	number	opnd?	meaning */
#define	P_END	  0x00	/* no	End of program. */
#define P_EXCSYNC 0x01	/* no   Test if following exclude already failed */
#define P_EXCEND  0x02	/* no   Test if exclude matched orig branch */
#define	P_BACK	  0x03	/* no	Match "", "next" ptr points backward. */
#define	P_EXACTLY 0x04	/* lstr	Match this string. */
#define	P_NOTHING 0x05	/* no	Match empty string. */
#define	P_ONEHASH 0x06	/* node	Match this (simple) thing 0 or more times. */
#define	P_TWOHASH 0x07	/* node	Match this (simple) thing 1 or more times. */
#define P_GFLAGS  0x08	/* long Match nothing and set globbing flags */
#define P_ISSTART 0x09  /* no   Match start of string. */
#define P_ISEND   0x0a  /* no   Match end of string. */
#define P_COUNTSTART 0x0b /* no Initialise P_COUNT */
#define P_COUNT   0x0c  /* 3*long uc* node Match a number of repetitions */
/* numbered so we can test bit 5 for a branch */
#define	P_BRANCH  0x20	/* node	Match this alternative, or the next... */
#define	P_WBRANCH 0x21	/* uc* node P_BRANCH, but match at least 1 char */
/* excludes are also branches, but have bit 4 set, too */
#define P_EXCLUDE 0x30	/* uc* node Exclude this from previous branch */
#define P_EXCLUDP 0x31	/* uc* node Exclude, using full file path so far */
/* numbered so we can test bit 6 so as not to match initial '.' */
#define	P_ANY	  0x40	/* no	Match any one character. */
#define	P_ANYOF	  0x41	/* str  Match any character in this string. */
#define	P_ANYBUT  0x42	/* str  Match any character not in this string. */
#define P_STAR    0x43	/* no   Match any set of characters. */
#define P_NUMRNG  0x44	/* zr, zr Match a numeric range. */
#define P_NUMFROM 0x45	/* zr   Match a number >= X */
#define P_NUMTO   0x46	/* zr   Match a number <= X */
#define P_NUMANY  0x47	/* no   Match any set of decimal digits */
/* spaces left for P_OPEN+n,... for backreferences */
#define	P_OPEN	  0x80	/* no	Mark this point in input as start of n. */
#define	P_CLOSE	  0x90	/* no	Analogous to OPEN. */
/*
 * no    no argument
 * zr    the range type zrange_t:  may be zlong or unsigned long
 * char  a single char
 * uc*   a pointer to unsigned char, used at run time and initialised
 *       to NULL.
 * str   null-terminated, metafied string
 * lstr  length as long then string, not null-terminated, unmetafied.
 */

/*
 * Notes on usage:
 * P_WBRANCH:  This works like a branch and is used in complex closures,
 *    to ensure we don't succeed on a zero-length match of the pattern,
 *    since that would cause an infinite loop.  We do this by recording
 *    the positions where we have already tried to match.   See the
 *    P_WBRANCH test in patmatch().
 *
 *  P_ANY, P_ANYOF:  the operand is a null terminated
 *    string.  Normal characters match as expected.  Characters
 *    in the range Meta+PP_ALPHA..Meta+PP_UNKNWN do the appropriate
 *    Posix range tests.  This relies on imeta returning true for these
 *    characters.  We treat unknown POSIX ranges as never matching.
 *    PP_RANGE means the next two (possibly metafied) characters form
 *    the limits of a range to test; it's too much like hard work to
 *    expand the range.
 *
 *  P_EXCLUDE, P_EXCSYNC, PEXCEND:  P_EXCLUDE appears in the pattern like
 *    P_BRANCH, but applies to the immediately preceding branch.  The code in
 *    the corresponding branch is followed by a P_EXCSYNC, which simply
 *    acts as a marker that a P_EXCLUDE comes next.  The P_EXCLUDE
 *    has a pointer to char embeded in it, which works
 *    like P_WBRANCH:  if we get to the P_EXCSYNC, and we already matched
 *    up to the same position, fail.  Thus we are forced to backtrack
 *    on closures in the P_BRANCH if the first attempt was excluded.
 *    Corresponding to P_EXCSYNC in the original branch, there is a
 *    P_EXCEND in the exclusion.  If we get to this point, and we did
 *    *not* match in the original branch, the exclusion itself fails,
 *    otherwise it succeeds since we know the tail already matches,
 *    so P_EXCEND is the end of the exclusion test.
 *    The whole sorry mess looks like this, where the upper lines
 *    show the linkage of the branches, and the lower shows the linkage
 *    of their pattern arguments.
 *
 *     	        ---------------------      ----------------------
 *              ^      	       	     v    ^      	         v
 *      ( <BRANCH>:apat-><EXCSYNC> <EXCLUDE>:excpat-><EXCEND> ) tail
 *                               	                         ^
 *		       	  |                                      |
 *			   --------------------------------------
 *
 * P_EXCLUDP: this behaves exactly like P_EXCLUDE, with the sole exception
 *   that we prepend the path so far to the exclude pattern.   This is
 *   for top level file globs, e.g. ** / *.c~*foo.c
 *                                    ^ I had to leave this space
 * P_NUM*: zl is a zlong if that is 64-bit, else an unsigned long.
 *
 * P_COUNTSTART, P_COUNT: a P_COUNTSTART flags the start of a quantified
 * closure (#cN,M) and is used to initialise the count.  Executing
 * the pattern leads back to the P_COUNT, while the next links of the
 * P_COUNTSTART and P_COUNT lead to the tail of the pattern:
 *
 *	       	        ----------------
 *     	       	       v       	        ^
 *        <COUNTSTART><COUNT>pattern<BACK> tail
 *	     	    v      v  	  	    ^
 *	            ------------------------
 */

#define	P_OP(p)		((p)->l & 0xff)
#define	P_NEXT(p)	((p)->l >> 8)
#define	P_OPERAND(p)	((p) + 1)
#define P_ISBRANCH(p)   ((p)->l & 0x20)
#define P_ISEXCLUDE(p)	(((p)->l & 0x30) == 0x30)
#define P_NOTDOT(p)	((p)->l & 0x40)

/* Specific to lstr type, i.e. P_EXACTLY. */
#define P_LS_LEN(p)	((p)[1].l) /* can be used as lvalue */
#define P_LS_STR(p)	((char *)((p) + 2))

/* Specific to P_COUNT: arguments as offset in nodes from operator */
#define P_CT_CURRENT	(1)	/* Current count */
#define P_CT_MIN	(2)     /* Minimum count */
#define P_CT_MAX	(3)	/* Maximum count, -1 for none */
#define P_CT_PTR	(4)	/* Pointer to last match start */
#define P_CT_OPERAND	(5)	/* Operand of P_COUNT */

/* Flags needed when pattern is executed */
#define P_SIMPLE        0x01	/* Simple enough to be #/## operand. */
#define P_HSTART        0x02	/* Starts with # or ##'d pattern. */
#define P_PURESTR	0x04	/* Can be matched with a strcmp */

#if defined(ZSH_64_BIT_TYPE) || defined(LONG_IS_64_BIT)
typedef zlong zrange_t;
#define ZRANGE_T_IS_SIGNED	(1)
#else
typedef unsigned long zrange_t;
#endif

/*
 * Characters which terminate a pattern segment.  We actually use
 * a pointer patendseg which skips the first character if we are not
 * parsing a file pattern.
 * Note that the size of this and the next array are hard-wired
 * via the definitions.
 */

static char endseg[] = {
    '/',			/* file only */
    '\0', Bar, Outpar,		/* all patterns */
    Tilde			/* extended glob only */
};

#define PATENDSEGLEN_NORM 4
#define PATENDSEGLEN_EXT  5

/* Characters which terminate a simple string */

static char endstr[] = {
    '/',			/* file only */
    '\0', Bar, Outpar, Quest, Star, Inbrack, Inpar, Inang, Bnullkeep,
				/* all patterns */
    Tilde, Hat, Pound		/* extended glob only */
};

#define PATENDSTRLEN_NORM 10
#define PATENDSTRLEN_EXT  13


/* Default size for pattern buffer */
#define P_DEF_ALLOC 256

/* Flags used in compilation */
static char *patstart, *patparse;	/* input pointers */
static int patnpar;		/* () count */
static char *patcode;		/* point of code emission */
static long patsize;		/* size of code */
static char *patout;		/* start of code emission string */
static long patalloc;		/* size allocated for same */
static char *patendseg;		/* characters ending segment */
static int patendseglen;	/* length of same */
static char *patendstr;		/* characters ending plain string */
static int patendstrlen;	/* length of sameo */

/* Flags used in both compilation and execution */
static int patflags;		    /* flags passed down to patcompile */
static int patglobflags;  /* globbing flags & approx */

/*
 * Increment pointer to metafied multibyte string.
 */
#ifdef MULTIBYTE_SUPPORT
typedef wint_t patint_t;

#define PEOF WEOF

#define METACHARINC(x) ((void)metacharinc(&x))

/*
 * TODO: the shiftstate isn't well handled; we don't guarantee
 * to maintain it properly between characters.  If we don't
 * need it we should use mbtowc() instead.
 */
static mbstate_t shiftstate;

/*
 * Multibyte version: it's (almost) as easy to return the
 * value as not, so do so since we sometimes need it..
 */
static wchar_t
metacharinc(char **x)
{
    char *inptr = *x;
    char inchar;
    size_t ret = MB_INVALID;
    wchar_t wc;

    /*
     * Cheat if the top bit isn't set.  This is second-guessing
     * the library, but we know for sure that if the character
     * set doesn't have the property that all bytes with the 8th
     * bit clear are single characters then we are stuffed.
     */
    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*inptr) & 0x80))
    {
	if (itok(*inptr))
	    inchar = ztokens[*inptr++ - Pound];
	else if (*inptr == Meta) {
	    inptr++;
	    inchar = *inptr++ ^ 32;
	} else {
	    inchar = *inptr++;
	}
	*x = inptr;
	return (wchar_t)STOUC(inchar);
    }

    while (*inptr) {
	if (itok(*inptr))
	    inchar = ztokens[*inptr++ - Pound];
	else if (*inptr == Meta) {
	    inptr++;
	    inchar = *inptr++ ^ 32;
	} else {
	    inchar = *inptr++;
	}
	ret = mbrtowc(&wc, &inchar, 1, &shiftstate);

	if (ret == MB_INVALID)
	    break;
	if (ret == MB_INCOMPLETE)
	    continue;
	*x = inptr;
	return wc;
    }

    /* Error.  Treat as single byte. */
    /* Reset the shift state for next time. */
    memset(&shiftstate, 0, sizeof(shiftstate));
    return (wchar_t) STOUC(*(*x)++);
}

#else
typedef int patint_t;

#define PEOF EOF

#define METACHARINC(x)	((void)((x) += (*(x) == Meta) ? 2 : 1))
#endif

/*
 * Return unmetafied char from string (x is any char *).
 * Used with MULTIBYTE_SUPPORT if the GF_MULTIBYTE is not
 * in effect.
 */
#define UNMETA(x)	(*(x) == Meta ? (x)[1] ^ 32 : *(x))

/* Add n more characters, ensuring there is enough space. */

enum {
    PA_NOALIGN = 1,
    PA_UNMETA  = 2
};

/**/
static void
patadd(char *add, int ch, long n, int paflags)
{
    /* Make sure everything gets aligned unless we get PA_NOALIGN. */
    long newpatsize = patsize + n;
    if (!(paflags & PA_NOALIGN))
	newpatsize = (newpatsize + sizeof(union upat) - 1) &
		      ~(sizeof(union upat) - 1);
    if (patalloc < newpatsize) {
	long newpatalloc =
	    2*(newpatsize > patalloc ? newpatsize : patalloc);
	patout = (char *)zrealloc((char *)patout, newpatalloc);
	patcode = patout + patsize;
	patalloc = newpatalloc;
    }
    patsize = newpatsize;
    if (add) {
	if (paflags & PA_UNMETA) {
	    /*
	     * Unmetafy and untokenize the string as we go.
	     * The Meta characters in add aren't counted in n.
	     */
	    while (n--) {
		if (itok(*add))
		    *patcode++ = ztokens[*add++ - Pound];
		else if (*add == Meta) {
		    add++;
		    *patcode++ = *add++ ^ 32;
		} else {
		    *patcode++ = *add++;
		}
	    }
	} else {
	    while (n--)
		*patcode++ = *add++;
	}
    } else
	*patcode++ = ch;
    patcode = patout + patsize;
}

static long rn_offs;
/* operates on pointers to union upat, returns a pointer */
#define PATNEXT(p) ((rn_offs = P_NEXT(p)) ? \
		    (P_OP(p) == P_BACK) ? \
		    ((p)-rn_offs) : ((p)+rn_offs) : NULL)

/* Called before parsing a set of file matchs to initialize flags */

/**/
void
patcompstart(void)
{
    if (isset(CASEGLOB))
	patglobflags = 0;
    else
	patglobflags = GF_IGNCASE;
    if (isset(MULTIBYTE))
	patglobflags |= GF_MULTIBYTE;
}

/*
 * Top level pattern compilation subroutine
 * exp is a null-terminated, metafied string.
 * inflags is an or of some PAT_* flags.
 * endexp, if non-null, is set to a pointer to the end of the
 *   part of exp which was compiled.  This is used when
 *   compiling patterns for directories which must be
 *   matched recursively.
 */

/**/
mod_export Patprog
patcompile(char *exp, int inflags, char **endexp)
{
    int flags = 0;
    long len = 0;
    long startoff;
    Upat pscan;
    char *lng, *strp = NULL;
    Patprog p;

    startoff = sizeof(struct patprog);
    /* Ensure alignment of start of program string */
    startoff = (startoff + sizeof(union upat) - 1) & ~(sizeof(union upat) - 1);

    /* Allocate reasonable sized chunk if none, reduce size if too big */
    if (patalloc != P_DEF_ALLOC)
	patout = (char *)zrealloc(patout, patalloc = P_DEF_ALLOC);
    patcode = patout + startoff;
    patsize = patcode - patout;
    patstart = patparse = exp;
    /*
     * Note global patnpar numbers parentheses 1..9, while patnpar
     * in struct is actual count of parentheses.
     */
    patnpar = 1;
    patflags = inflags & ~(PAT_PURES|PAT_HAS_EXCLUDP);

    patendseg = endseg;
    patendseglen = isset(EXTENDEDGLOB) ? PATENDSEGLEN_EXT : PATENDSEGLEN_NORM;
    patendstr = endstr;
    patendstrlen = isset(EXTENDEDGLOB) ? PATENDSTRLEN_EXT : PATENDSTRLEN_NORM;

    if (!(patflags & PAT_FILE)) {
	patendseg++;
	patendstr++;
	patendseglen--;
	patendstrlen--;
	remnulargs(patparse);
	if (isset(MULTIBYTE))
	    patglobflags = GF_MULTIBYTE;
	else
	    patglobflags = 0;
    }
    if (patflags & PAT_LCMATCHUC)
	patglobflags |= GF_LCMATCHUC;
    /*
     * Have to be set now, since they get updated during compilation.
     */
    ((Patprog)patout)->globflags = patglobflags;

    if (!(patflags & PAT_ANY)) {
	/* Look for a really pure string, with no tokens at all. */
	if (!(patglobflags & ~GF_MULTIBYTE)
#ifdef __CYGWIN__
	    /*
	     * If the OS treats files case-insensitively and we
	     * are looking at files, we don't need to use pattern
	     * matching to find the file.
	     */
	    || (!(patglobflags & ~GF_IGNCASE) && (patflags & PAT_FILE))
#endif
	    )
	{
	    /*
	     * Waah!  I wish I understood this.
	     * Empty metafied strings have an initial Nularg.
	     * This never corresponds to a real character in
	     * a glob pattern or string, so skip it.
	     */
	    if (*exp == Nularg)
		exp++;
	    for (strp = exp; *strp &&
		     (!(patflags & PAT_FILE) || *strp != '/') && !itok(*strp);
		 strp++)
		;
	}
	if (!strp || (*strp && *strp != '/')) {
	    /* No, do normal compilation. */
	    strp = NULL;
	    if (patcompswitch(0, &flags) == 0)
		return NULL;
	} else {
	    /*
	     * Yes, copy the string, and skip compilation altogether.
	     * Null terminate for the benefit of globbing.
	     * Leave metafied both for globbing and for our own
	     * efficiency.
	     */
	    patparse = strp;
	    len = strp - exp;
	    patadd(exp, 0, len + 1, 0);
	    patout[startoff + len] = '\0';
	    patflags |= PAT_PURES;
	}
    }

    /* end of compilation: safe to use pointers */
    p = (Patprog)patout;
    p->startoff = startoff;
    p->patstartch = '\0';
    p->globend = patglobflags;
    p->flags = patflags;
    p->mustoff = 0;
    p->size = patsize;
    p->patmlen = len;
    p->patnpar = patnpar-1;

    if (!strp) {
	pscan = (Upat)(patout + startoff);

	if (!(patflags & PAT_ANY) && P_OP(PATNEXT(pscan)) == P_END) {
	    /* only one top level choice */
	    pscan = P_OPERAND(pscan);

	    if (flags & P_PURESTR) {
		/*
		 * The pattern can be matched with a simple strncmp/strcmp.
		 * Careful in case we've overwritten the node for the next ptr.
		 */
		char *dst = patout + startoff;
		Upat next;
		p->flags |= PAT_PURES;
		for (; pscan; pscan = next) {
		    next = PATNEXT(pscan);
		    if (P_OP(pscan) == P_EXACTLY) {
			char *opnd = P_LS_STR(pscan), *mtest;
			long oplen = P_LS_LEN(pscan), ilen;
			int nmeta = 0;
			/*
			 * Unfortunately we unmetafied the string
			 * and we need to put any metacharacters
			 * back now we know it's a pure string.
			 * This shouldn't happen too often, it's
			 * just that there are some cases such
			 * as . and .. in files where we really
			 * need a pure string even if there are
			 * pattern characters flying around.
			 */
			for (mtest = opnd, ilen = oplen; ilen;
			     mtest++, ilen--)
			    if (imeta(*mtest))
				nmeta++;
			if (nmeta) {
			    char *oldpatout = patout;
			    patadd(NULL, 0, nmeta, 0);
			    /*
			     * Yuk.
			     */
			    p = (Patprog)patout;
			    opnd = patout + (opnd - oldpatout);
			    dst = patout + startoff;
			}

			while (oplen--) {
			    if (imeta(*opnd)) {
				*dst++ = Meta;
				*dst++ = *opnd++ ^ 32;
			    } else {
				*dst++ = *opnd++;
			    }
			}
		    }
		}
		p->size = dst - patout;
		/* patmlen is really strlen.  We don't need a null. */
		p->patmlen = p->size - startoff;
	    } else {
		/* starting point info */
		if (P_OP(pscan) == P_EXACTLY && !p->globflags &&
		    P_LS_LEN(pscan))
		    p->patstartch = *P_LS_STR(pscan);
		/*
		 * Find the longest literal string in something expensive.
		 * This is itself not all that cheap if we have
		 * case-insensitive matching or approximation, so don't.
		 */
		if ((flags & P_HSTART) && !p->globflags) {
		    lng = NULL;
		    len = 0;
		    for (; pscan; pscan = PATNEXT(pscan))
			if (P_OP(pscan) == P_EXACTLY &&
			    P_LS_LEN(pscan) >= len) {
			    lng = P_LS_STR(pscan);
			    len = P_LS_LEN(pscan);
			}
		    if (lng) {
			p->mustoff = lng - patout;
			p->patmlen = len;
		    }
		}
	    }
	}
    }

    /*
     * The pattern was compiled in a fixed buffer:  unless told otherwise,
     * we stick the compiled pattern on the heap.  This is necessary
     * for files where we will often be compiling multiple segments at once.
     * But if we get the ZDUP flag we always put it in zalloc()ed memory.
     */
    if (patflags & PAT_ZDUP) {
	Patprog newp = (Patprog)zalloc(patsize);
	memcpy((char *)newp, (char *)p, patsize);
	p = newp;
    } else if (!(patflags & PAT_STATIC)) {
	Patprog newp = (Patprog)zhalloc(patsize);
	memcpy((char *)newp, (char *)p, patsize);
	p = newp;
    }

    if (endexp)
	*endexp = patparse;
    return p;
}

/*
 * Main body or parenthesized subexpression in pattern
 * Parenthesis (and any ksh_glob gubbins) will have been removed.
 */

/**/
static long
patcompswitch(int paren, int *flagp)
{
    long starter, br, ender, excsync = 0;
    int parno = 0;
    int flags, gfchanged = 0, savglobflags = patglobflags;
    Upat ptr;

    *flagp = 0;

    if (paren && (patglobflags & GF_BACKREF) && patnpar <= NSUBEXP) {
	/*
	 * parenthesized:  make an open node.
	 * We can only refer to the first nine parentheses.
	 * For any others, we just use P_OPEN on its own; there's
	 * no gain in arbitrarily limiting the number of parentheses.
	 */
	parno = patnpar++;
	starter = patnode(P_OPEN + parno);
    } else
	starter = 0;

    br = patnode(P_BRANCH);
    if (!patcompbranch(&flags))
	return 0;
    if (patglobflags != savglobflags)
	gfchanged++;
    if (starter)
	pattail(starter, br);
    else
	starter = br;

    *flagp |= flags & (P_HSTART|P_PURESTR);

    while (*patparse == Bar ||
	   (isset(EXTENDEDGLOB) && *patparse == Tilde &&
	    (patparse[1] == '/' ||
	     !memchr(patendseg, patparse[1], patendseglen)))) {
	int tilde = *patparse++ == Tilde;
	long gfnode = 0, newbr;

	*flagp &= ~P_PURESTR;

	if (tilde) {
	    union upat up;
	    /* excsync remembers the P_EXCSYNC node before a chain of
	     * exclusions:  all point back to this.  only the
	     * original (non-excluded) branch gets a trailing P_EXCSYNC.
	     */
	    if (!excsync) {
		excsync = patnode(P_EXCSYNC);
		patoptail(br, excsync);
	    }
	    /*
	     * By default, approximations are turned off in exclusions:
	     * we need to do this here as otherwise the code compiling
	     * the exclusion doesn't know if the flags have really
	     * changed if the error count gets restored.
	     */
	    patglobflags &= ~0xff;
	    if (!(patflags & PAT_FILET) || paren) {
		br = patnode(P_EXCLUDE);
	    } else {
		/*
		 * At top level (paren == 0) in a file glob !(patflags
		 * &PAT_FILET) do the exclusion prepending the file path
		 * so far.  We need to flag this to avoid unnecessarily
		 * copying the path.
		 */
		br = patnode(P_EXCLUDP);
		patflags |= PAT_HAS_EXCLUDP;
	    }
	    up.p = NULL;
	    patadd((char *)&up, 0, sizeof(up), 0);
	    /* / is not treated as special if we are at top level */
	    if (!paren && *patendseg == '/') {
		tilde++;
		patendseg++;
		patendseglen--;
		patendstr++;
		patendstrlen--;
	    }
	} else {
	    excsync = 0;
	    br = patnode(P_BRANCH);
	    /*
	     * The position of the following statements means globflags
	     * set in the main branch carry over to the exclusion.
	     */
	    if (!paren) {
		patglobflags = 0;
		if (((Patprog)patout)->globflags) {
		    /*
		     * If at top level, we need to reinitialize flags to zero,
		     * since (#i)foo|bar only applies to foo and we stuck
		     * the #i into the global flags.
		     * We could have done it so that they only got set in the
		     * first branch, but it's quite convenient having any
		     * global flags set in the header and not buried in the
		     * pattern.  (Or maybe it isn't and we should
		     * forget this bit and always stick in an explicit GFLAGS
		     * statement instead of using the header.)
		     * Also, this can't happen for file globs where there are
		     * no top-level |'s.
		     *
		     * No gfchanged, as nothing to follow branch at top
		     * level.
		     */
		    union upat up;
		    gfnode = patnode(P_GFLAGS);
		    up.l = patglobflags;
		    patadd((char *)&up, 0, sizeof(union upat), 0);
		}
	    } else {
		patglobflags = savglobflags;
	    }
	}
	newbr = patcompbranch(&flags);
	if (tilde == 2) {
	    /* restore special treatment of / */
	    patendseg--;
	    patendseglen++;
	    patendstr--;
	    patendstrlen++;
	}
	if (!newbr)
	    return 0;
	if (gfnode)
	    pattail(gfnode, newbr);
	if (!tilde && patglobflags != savglobflags)
	    gfchanged++;
	pattail(starter, br);
	if (excsync)
	    patoptail(br, patnode(P_EXCEND));
	*flagp |= flags & P_HSTART;
    }

    /*
     * Make a closing node, hooking it to the end.
     * Note that we can't optimize P_NOTHING out here, since another
     * branch at that point would indicate the current choices continue,
     * which they don't.
     */
    ender = patnode(paren ? parno ? P_CLOSE+parno : P_NOTHING : P_END);
    pattail(starter, ender);

    /*
     * Hook the tails of the branches to the closing node,
     * except for exclusions which terminate where they are.
     */
    for (ptr = (Upat)patout + starter; ptr; ptr = PATNEXT(ptr))
	if (!P_ISEXCLUDE(ptr))
	    patoptail(ptr-(Upat)patout, ender);

    /* check for proper termination */
    if ((paren && *patparse++ != Outpar) ||
	(!paren && *patparse &&
	 !((patflags & PAT_FILE) && *patparse == '/')))
	return 0;

    if (paren && gfchanged) {
	/*
	 * Restore old values of flags when leaving parentheses.
	 * gfchanged detects a change in any branch (except exclusions
	 * which are separate), since we need to emit this even if
	 * a later branch happened to put the flags back.
	 */
	pattail(ender, patnode(P_GFLAGS));
	patglobflags = savglobflags;
	patadd((char *)&savglobflags, 0, sizeof(long), 0);
    }

    return starter;
}

/*
 * Compile something ended by Bar, Outpar, Tilde, or end of string.
 * Note the BRANCH or EXCLUDE tag must already have been omitted:
 * this returns the position of the operand of that.
 */

/**/
static long
patcompbranch(int *flagp)
{
    long chain, latest = 0, starter;
    int flags = 0;

    *flagp = P_PURESTR;

    starter = chain = 0;
    while (!memchr(patendseg, *patparse, patendseglen) ||
	   (*patparse == Tilde && patparse[1] != '/' &&
	    memchr(patendseg, patparse[1], patendseglen))) {
	if (isset(EXTENDEDGLOB) &&
	    ((!isset(SHGLOB) &&
	      (*patparse == Inpar && patparse[1] == Pound)) ||
	     (isset(KSHGLOB) && *patparse == '@' && patparse[1] == Inpar &&
	      patparse[2] == Pound))) {
	    /* Globbing flags. */
	    char *pp1 = patparse;
	    int oldglobflags = patglobflags, ignore;
	    long assert;
	    patparse += (*patparse == '@') ? 3 : 2;
	    if (!patgetglobflags(&patparse, &assert, &ignore))
		return 0;
	    if (!ignore) {
		if (assert) {
		    /*
		     * Start/end assertion looking like flags, but
		     * actually handled as a normal node
		     */
		    latest = patnode(assert);
		    flags = 0;
		} else {
		    if (pp1 == patstart) {
			/* Right at start of pattern, the simplest case.
			 * Put them into the flags and don't emit anything.
			 */
			((Patprog)patout)->globflags = patglobflags;
			continue;
		    } else if (!*patparse) {
			/* Right at the end, so just leave the flags for
			 * the next Patprog in the chain to pick up.
			 */
			break;
		    }
		    /*
		     * Otherwise, we have to stick them in as a pattern
		     * matching nothing.
		     */
		    if (oldglobflags != patglobflags) {
			/* Flags changed */
			union upat up;
			latest = patnode(P_GFLAGS);
			up.l = patglobflags;
			patadd((char *)&up, 0, sizeof(union upat), 0);
		    } else {
			/* No effect. */
			continue;
		    }
		}
	    } else if (!*patparse)
		break;
	    else
		continue;
	} else if (isset(EXTENDEDGLOB) && *patparse == Hat) {
	    /*
	     * ^pat:  anything but pat.  For proper backtracking,
	     * etc., we turn this into (*~pat), except without the
	     * parentheses.
	     */
	    patparse++;
	    latest = patcompnot(0, &flags);
	} else
	    latest = patcomppiece(&flags);
	if (!latest)
	    return 0;
	if (!starter)
	    starter = latest;
	if (!(flags & P_PURESTR))
	    *flagp &= ~P_PURESTR;
	if (!chain)
	    *flagp |= flags & P_HSTART;
	else
	    pattail(chain, latest);
	chain = latest;
    }
    /* check if there was nothing in the loop, i.e. () */
    if (!chain)
	starter = patnode(P_NOTHING);

    return starter;
}

/* get glob flags, return 1 for success, 0 for failure */

/**/
int
patgetglobflags(char **strp, long *assertp, int *ignore)
{
    char *nptr, *ptr = *strp;
    zlong ret;

    *assertp = 0;
    *ignore = 1;
    /* (#X): assumes we are still positioned on the first X */
    for (; *ptr && *ptr != Outpar; ptr++) {
	if (*ptr == 'q') {
	    /* Glob qualifiers, ignored in pattern code */
	    while (*ptr && *ptr != Outpar)
		ptr++;
	    break;
	} else {
	    *ignore = 0;
	    switch (*ptr) {
	    case 'a':
		/* Approximate matching, max no. of errors follows */
		ret = zstrtol(++ptr, &nptr, 10);
		/*
		 * We can't have more than 254, because we need 255 to
		 * mark 254 errors in wbranch and exclude sync strings
		 * (hypothetically --- hope no-one tries it).
		 */
		if (ret < 0 || ret > 254 || ptr == nptr)
		    return 0;
		patglobflags = (patglobflags & ~0xff) | (ret & 0xff);
		ptr = nptr-1;
		break;

	    case 'l':
		/* Lowercase in pattern matches lower or upper in target */
		patglobflags = (patglobflags & ~GF_IGNCASE) | GF_LCMATCHUC;
		break;

	    case 'i':
		/* Fully case insensitive */
		patglobflags = (patglobflags & ~GF_LCMATCHUC) | GF_IGNCASE;
		break;

	    case 'I':
		/* Restore case sensitivity */
		patglobflags &= ~(GF_LCMATCHUC|GF_IGNCASE);
		break;

	    case 'b':
		/* Make backreferences */
		patglobflags |= GF_BACKREF;
		break;

	    case 'B':
		/* Don't make backreferences */
		patglobflags &= ~GF_BACKREF;
		break;

	    case 'm':
		/* Make references to complete match */
		patglobflags |= GF_MATCHREF;
		break;

	    case 'M':
		/* Don't */
		patglobflags &= ~GF_MATCHREF;
		break;

	    case 's':
		*assertp = P_ISSTART;
		break;

	    case 'e':
		*assertp = P_ISEND;
		break;

	    case 'u':
		patglobflags |= GF_MULTIBYTE;
		break;

	    case 'U':
		patglobflags &= ~GF_MULTIBYTE;
		break;

	    default:
		return 0;
	    }
	}
    }
    if (*ptr != Outpar)
	return 0;
    /* Start/end assertions must appear on their own. */
    if (*assertp && (*strp)[1] != Outpar)
	return 0;
    *strp = ptr + 1;
    return 1;
}


static const char *colon_stuffs[]  = {
    "alpha", "alnum", "ascii", "blank", "cntrl", "digit", "graph", 
    "lower", "print", "punct", "space", "upper", "xdigit", "IDENT",
    "IFS", "IFSSPACE", "WORD", NULL
};

/*
 * Handle the guts of a [:stuff:] character class element.
 * start is the beginning of "stuff" and len is its length.
 * This code is exported for the benefit of completion matching.
 */

/**/
mod_export int
range_type(char *start, int len)
{
    const char **csp;

    for (csp = colon_stuffs; *csp; csp++) {
	if (!strncmp(start, *csp, len))
	    return (csp - colon_stuffs) + PP_FIRST;
    }

    return PP_UNKWN;
}


/*
 * Convert the contents of a [...] or [^...] expression (just the
 * ... part) back into a string.  This is used by compfiles -p/-P
 * for some reason.  The compiled form (a metafied string) is
 * passed in rangestr.
 *
 * If outstr is non-NULL the compiled form is placed there.  It
 * must be sufficiently long.  A terminating NULL is appended.
 *
 * Return the length required, not including the terminating NULL.
 *
 * TODO: this is non-multibyte for now.  It will need to be defined
 * appropriately with MULTIBYTE_SUPPORT when the completion matching
 * code catches up.
 */

/**/
mod_export int
pattern_range_to_string(char *rangestr, char *outstr)
{
    int len = 0;

    while (*rangestr) {
	if (imeta(STOUC(*rangestr))) {
	    int swtype = STOUC(*rangestr) - STOUC(Meta);

	    if (swtype == 0) {
		/* Ordindary metafied character */
		if (outstr)
		{
		    *outstr++ = Meta;
		    *outstr++ = rangestr[1] ^ 32;
		}
		len += 2;
		rangestr += 2;
	    } else if (swtype == PP_RANGE) {
		/* X-Y range */
		int i;

		for (i = 0; i < 2; i++) {
		    if (*rangestr == Meta) {
			if (outstr) {
			    *outstr++ = Meta;
			    *outstr++ = rangestr[1];
			}
			len += 2;
			rangestr += 2;
		    } else {
			if (outstr)
			    *outstr++ = *rangestr;
			len++;
			rangestr++;
		    }

		    if (i == 0) {
			if (outstr)
			    *outstr++ = '-';
			len++;
		    }
		}
	    } else if (swtype >= PP_FIRST && swtype <= PP_LAST) {
		/* [:stuff:]; we need to output [: and :] */
		const char *found = colon_stuffs[swtype - PP_FIRST];
		int newlen = strlen(found);
		if (outstr) {
		    strcpy(outstr, "[:");
		    outstr += 2;
		    memcpy(outstr, found, newlen);
		    outstr += newlen;
		    strcpy(outstr, ":]");
		    outstr += 2;
		}
		len += newlen + 4;
		rangestr++;
	    } else {
		/* shouldn't happen */
		DPUTS(1, "BUG: unknown PP_ code in pattern range");
		rangestr++;
	    }
	} else {
	    /* ordinary character, guaranteed no Meta handling needed */
	    if (outstr)
		*outstr++ = *rangestr;
	    len++;
	    rangestr++;
	}
    }

    if (outstr)
	*outstr = '\0';
    return len;
}

/*
 * compile a chunk such as a literal string or a [...] followed
 * by a possible hash operator
 */

/**/
static long
patcomppiece(int *flagp)
{
    long starter = 0, next, op, opnd;
    int flags, flags2, kshchar, len, ch, patch, nmeta;
    int pound, count;
    union upat up;
    char *nptr, *str0, *ptr, *patprev;
    zrange_t from, to;
    char *charstart;

    flags = 0;
    str0 = patprev = patparse;
    for (;;) {
	/*
	 * Check if we have a string. First, we need to make sure
	 * the string doesn't introduce a ksh-like parenthesized expression.
	 */
	kshchar = '\0';
	if (isset(KSHGLOB) && *patparse && patparse[1] == Inpar) {
	    if (strchr("?*+!@", *patparse))
		kshchar = STOUC(*patparse);
	    else if (*patparse == Star || *patparse == Quest)
		kshchar = STOUC(ztokens[*patparse - Pound]);
	}

	/*
	 * End of string (or no string at all) if ksh-type parentheses,
	 * or special character, unless that character is a tilde and
	 * the character following is an end-of-segment character.  Thus
	 * tildes are not special if there is nothing following to
	 * be excluded.
	 */
	if (kshchar || (memchr(patendstr, *patparse, patendstrlen) &&
			(*patparse != Tilde ||
			 patparse[1] == '/' ||
			 !memchr(patendseg, patparse[1], patendseglen))))
	    break;

	/* Remember the previous character for backtracking */
	patprev = patparse;
	METACHARINC(patparse);
    }

    if (patparse > str0) {
	long slen = patparse - str0;
	int morelen;

	/* Ordinary string: cancel kshchar lookahead */
	kshchar = '\0';
	/*
	 * Assume it matches a simple string until we find otherwise.
	 */
	flags |= P_PURESTR;
	DPUTS(patparse == str0, "BUG: matched nothing in patcomppiece.");
	/* more than one character matched? */
	morelen = (patprev > str0);
	/*
	 * If we have more than one character, a following hash
	 * or (#c...) only applies to the last, so backtrack one character.
	 */
	if (isset(EXTENDEDGLOB) &&
	    (*patparse == Pound ||
	     (*patparse == Inpar && patparse[1] == Pound &&
	      patparse[2] == 'c')) && morelen)
	    patparse = patprev;
	/*
	 * If len is 1, we can't have an active # following, so doesn't
	 * matter that we don't make X in `XX#' simple.
	 */
	if (!morelen)
	    flags |= P_SIMPLE;
	starter = patnode(P_EXACTLY);

	/* Get length of string without metafication. */
	nmeta = 0;
	/* inherited from domatch, but why, exactly? */
	if (*str0 == Nularg)
	    str0++;
	for (ptr = str0; ptr < patparse; ptr++) {
	    if (*ptr == Meta) {
		nmeta++;
		ptr++;
	    }
	}
	slen = (patparse - str0) - nmeta;
	/* First add length, which is a long */
	patadd((char *)&slen, 0, sizeof(long), 0);
	/*
	 * Then the string, not null terminated.
	 * Unmetafy and untokenize; pass the final length,
	 * which is what we need to allocate, i.e. not including
	 * a count for each Meta in the string.
	 */
	patadd(str0, 0, slen, PA_UNMETA);
	nptr = P_LS_STR((Upat)patout + starter);
	/*
	 * It's much simpler to turn off pure string mode for
	 * any case-insensitive or approximate matching; usually,
	 * that is correct, or they wouldn't have been turned on.
	 * However, we need to make sure we match a "." or ".."
	 * in a file name as a pure string.  There's a minor bug
	 * that this will also apply to something like
	 * ..(#a1).. (i.e. the (#a1) has no effect), but if you're
	 * going to write funny patterns, you get no sympathy from me.
	 */
	if (patglobflags &
#ifdef __CYGWIN__
	    /*
	     * As above: don't use pattern matching for files
	     * just because of case insensitivity if file system
	     * is known to be case insensitive.
	     *
	     * This is known to be necessary in at least one case:
	     * if "mount -c /" is in effect, so that drives appear
	     * directly under / instead of the usual /cygdrive, they
	     * aren't shown by readdir().  So it's vital we don't use
	     * globbing to find "/c", since that'll fail.
	     */
	    ((patflags & PAT_FILE) ?
	    (0xFF|GF_LCMATCHUC) :
	    (0xFF|GF_LCMATCHUC|GF_IGNCASE))
#else
	    (0xFF|GF_LCMATCHUC|GF_IGNCASE)
#endif
	    ) {
	    if (!(patflags & PAT_FILE))
		flags &= ~P_PURESTR;
	    else if (!(nptr[0] == '.' &&
		       (slen == 1 || (nptr[1] == '.' && slen == 2))))
		flags &= ~P_PURESTR;
	}
    } else {
	if (kshchar)
	    patparse++;

	patch = *patparse;
	METACHARINC(patparse);
	switch(patch) {
	case Quest:
	    flags |= P_SIMPLE;
	    starter = patnode(P_ANY);
	    break;
	case Star:
	    /* kshchar is used as a sign that we can't have #'s. */
	    kshchar = -1;
	    starter = patnode(P_STAR);
	    break;
	case Inbrack:
	    flags |= P_SIMPLE;
	    if (*patparse == Hat || *patparse == '^' || *patparse == '!') {
		patparse++;
		starter = patnode(P_ANYBUT);
	    } else
		starter = patnode(P_ANYOF);
	    if (*patparse == Outbrack) {
		patparse++;
		patadd(NULL, ']', 1, PA_NOALIGN);
	    }
	    while (*patparse && *patparse != Outbrack) {
		/* Meta is not a token */
		if (*patparse == Inbrack && patparse[1] == ':' &&
			(nptr = strchr(patparse+2, ':')) &&
			nptr[1] == Outbrack) {
			/* Posix range. */
			patparse += 2;
			len = nptr - patparse;
			ch = range_type(patparse, len);
			patparse = nptr + 2;
			if (ch != PP_UNKWN)
			    patadd(NULL, STOUC(Meta) + ch, 1, PA_NOALIGN);
			continue;
		}
		charstart = patparse;
		METACHARINC(patparse);

		if (*patparse == '-' && patparse[1] &&
		    patparse[1] != Outbrack) {
		    patadd(NULL, STOUC(Meta)+PP_RANGE, 1, PA_NOALIGN);
		    if (itok(*charstart)) {
			patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
			       PA_NOALIGN);
		    } else {
			patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
		    }
		    charstart = ++patparse;	/* skip ASCII '-' */
		    METACHARINC(patparse);
		}
		if (itok(*charstart)) {
		    patadd(0, STOUC(ztokens[*charstart - Pound]), 1,
			   PA_NOALIGN);
		} else {
		    patadd(charstart, 0, patparse-charstart, PA_NOALIGN);
		}
	    }
	    if (*patparse != Outbrack)
		return 0;
	    patparse++;
	    /* terminate null string and fix alignment */
	    patadd(NULL, 0, 1, 0);
	    break;
	case Inpar:
	    /* is this how to treat parentheses in SHGLOB? */
	    if (isset(SHGLOB) && !kshchar)
		return 0;
	    if (kshchar == '!') {
		/* This is nasty, we should really either handle all
		 * kshglobbing below or here.  But most of the
		 * others look like non-ksh patterns, while this one
		 * doesn't, so we handle it here and leave the rest.
		 * We treat it like an extendedglob ^, except that
		 * it goes into parentheses.
		 *
		 * If we did do kshglob here, we could support
		 * the old behaviour that things like !(foo)##
		 * work, but it makes the code more complicated at
		 * the expense of allowing the user to do things
		 * they shouldn't.
		 */
		if (!(starter = patcompnot(1, &flags2)))
		    return 0;
	    } else if (!(starter = patcompswitch(1, &flags2)))
		return 0;
	    flags |= flags2 & P_HSTART;
	    break;
	case Inang:
	    /* Numeric glob */
	    len = 0;		/* beginning present 1, end present 2 */
	    if (idigit(*patparse)) {
		from = (zrange_t) zstrtol((char *)patparse,
					 (char **)&nptr, 10);
		patparse = nptr;
		len |= 1;
	    }
	    DPUTS(*patparse != '-', "BUG: - missing from numeric glob");
	    patparse++;
	    if (idigit(*patparse)) {
		to = (zrange_t) zstrtol((char *)patparse,
					  (char **)&nptr, 10);
		patparse = nptr;
		len |= 2;
	    }
	    if (*patparse != Outang)
		return 0;
	    patparse++;
	    switch(len) {
	    case 3:
		starter = patnode(P_NUMRNG);
		patadd((char *)&from, 0, sizeof(from), 0);
		patadd((char *)&to, 0, sizeof(to), 0);
		break;
	    case 2:
		starter = patnode(P_NUMTO);
		patadd((char *)&to, 0, sizeof(to), 0);
		break;
	    case 1:
		starter = patnode(P_NUMFROM);
		patadd((char *)&from, 0, sizeof(from), 0);
		break;
	    case 0:
		starter = patnode(P_NUMANY);
		break;
	    }
	    /* This can't be simple, because it isn't.
	     * Mention in manual that matching digits with [...]
	     * is more efficient.
	     */
	    break;
	case Pound:
	    DPUTS(!isset(EXTENDEDGLOB), "BUG: # not treated as string");
	    /*
	     * A hash here is an error; it should follow something
	     * repeatable.
	     */
	    return 0;
	    break;
	case Bnullkeep:
	    /*
	     * Marker for restoring a backslash in output:
	     * does not match a character.
	     */
	    next = patcomppiece(flagp);
	    /*
	     * Can't match a pure string since we need to do this
	     * as multiple chunks.
	     */
	    *flagp &= ~P_PURESTR;
	    return next;
	    break;
#ifdef DEBUG
	default:
	    dputs("BUG: character not handled in patcomppiece");
	    return 0;
	    break;
#endif
	}
    }

    count = 0;
    if (!(pound = (*patparse == Pound && isset(EXTENDEDGLOB))) &&
	!(count = (isset(EXTENDEDGLOB) && *patparse == Inpar &&
		   patparse[1] == Pound && patparse[2] == 'c')) &&
	(kshchar <= 0 || kshchar == '@' || kshchar == '!')) {
	*flagp = flags;
	return starter;
    }

    /* too much at once doesn't currently work */
    if (kshchar && pound)
	return 0;

    if (kshchar == '*') {
	op = P_ONEHASH;
	*flagp = P_HSTART;
    } else if (kshchar == '+') {
	op = P_TWOHASH;
	*flagp = P_HSTART;
    } else if (kshchar == '?') {
	op = 0;
	*flagp = 0;
    } else if (count) {
	op = P_COUNT;
	patparse += 3;
	*flagp = P_HSTART;
    } else if (*++patparse == Pound) {
	op = P_TWOHASH;
	patparse++;
	*flagp = P_HSTART;
    } else {
	op = P_ONEHASH;
	*flagp = P_HSTART;
    }

    /*
     * Note optimizations with pointers into P_NOTHING branches:  some
     * should logically point to next node after current piece.
     *
     * Backtracking is also encoded in a slightly obscure way:  the
     * code emitted ensures we test the non-empty branch of complex
     * patterns before the empty branch on each repetition.  Hence
     * each time we fail on a non-empty branch, we try the empty branch,
     * which is equivalent to backtracking.
     */
    if (op == P_COUNT) {
	/* (#cN,M) */
	union upat countargs[P_CT_OPERAND];
	char *opp = patparse;

	countargs[0].l = P_COUNT;
	countargs[P_CT_CURRENT].l = 0L;
	countargs[P_CT_MIN].l = (long)zstrtol(patparse, &patparse, 10);
	if (patparse == opp) {
	    /* missing number treated as zero */
	    countargs[P_CT_MIN].l = 0L;
	}
	if (*patparse != ',' && *patparse != Comma) {
	    /* either max = min or error */
	    if (*patparse != Outpar)
		return 0;
	    countargs[P_CT_MAX].l = countargs[P_CT_MIN].l;
	} else {
	    opp = ++patparse;
	    countargs[P_CT_MAX].l = (long)zstrtol(patparse, &patparse, 10);
	    if (*patparse != Outpar)
		return 0;
	    if (patparse == opp) {
		/* missing number treated as infinity: record as -1 */
		countargs[P_CT_MAX].l = -1L;
	    }
	}
	patparse++;
	countargs[P_CT_PTR].p = NULL;
	/* Mark this chain as a min/max count... */
	patinsert(P_COUNTSTART, starter, (char *)countargs, sizeof(countargs));
	/*
	 * The next of the operand is a loop back to the P_COUNT.  This is
	 * how we get recursion for the count.  We don't loop back to
	 * the P_COUNTSTART; that's used for initialising the count
	 * and saving and restoring the count for any enclosing use
	 * of the match.
	 */
	opnd = P_OPERAND(starter) + P_CT_OPERAND;
	pattail(opnd, patnode(P_BACK));
	pattail(opnd, P_OPERAND(starter));
	/*
	 * The next of the counter operators is what follows the
	 * closure.
	 * This handles matching of the tail.
	 */
	next = patnode(P_NOTHING);
	pattail(starter, next);
	pattail(P_OPERAND(starter), next);
    } else if ((flags & P_SIMPLE) && (op == P_ONEHASH || op == P_TWOHASH) &&
	P_OP((Upat)patout+starter) == P_ANY) {
	/* Optimize ?# to *.  Silly thing to do, since who would use
	 * use ?# ? But it makes the later code shorter.
	 */
	Upat uptr = (Upat)patout + starter;
	if (op == P_TWOHASH) {
	    /* ?## becomes ?* */
	    uptr->l = (uptr->l & ~0xff) | P_ANY;
	    pattail(starter, patnode(P_STAR));
	} else {
	    uptr->l = (uptr->l & ~0xff) | P_STAR;
	}
    } else if ((flags & P_SIMPLE) && op && !(patglobflags & 0xff)) {
	/* Simplify, but not if we need to look for approximations. */
	patinsert(op, starter, NULL, 0);
    } else if (op == P_ONEHASH) {
	/* Emit x# as (x&|), where & means "self". */
	up.p = NULL;
	patinsert(P_WBRANCH, starter, (char *)&up, sizeof(up));
	                                      /* Either x */
	patoptail(starter, patnode(P_BACK));  /* and loop */
	patoptail(starter, starter);	      /* back */
	pattail(starter, patnode(P_BRANCH));  /* or */
	pattail(starter, patnode(P_NOTHING)); /* null. */
    } else if (op == P_TWOHASH) {
	/* Emit x## as x(&|) where & means "self". */
	next = patnode(P_WBRANCH);	      /* Either */
	up.p = NULL;
	patadd((char *)&up, 0, sizeof(up), 0);
	pattail(starter, next);
	pattail(patnode(P_BACK), starter);    /* loop back */
	pattail(next, patnode(P_BRANCH));     /* or */
	pattail(starter, patnode(P_NOTHING)); /* null. */
    } else if (kshchar == '?') {
	/* Emit ?(x) as (x|) */
	patinsert(P_BRANCH, starter, NULL, 0); /* Either x */
	pattail(starter, patnode(P_BRANCH));   /* or */
	next = patnode(P_NOTHING);	       /* null */
	pattail(starter, next);
	patoptail(starter, next);
    }
    if (*patparse == Pound)
	return 0;

    return starter;
}

/*
 * Turn a ^foo (paren = 0) or !(foo) (paren = 1) into *~foo with
 * parentheses if necessary.   As you see, that's really quite easy.
 */

/**/
static long
patcompnot(int paren, int *flagsp)
{
    union upat up;
    long excsync, br, excl, n, starter;
    int dummy;

    /* Here, we're matching a star at the start. */
    *flagsp = P_HSTART;

    starter = patnode(P_BRANCH);
    br = patnode(P_STAR);
    excsync = patnode(P_EXCSYNC);
    pattail(br, excsync);
    pattail(starter, excl = patnode(P_EXCLUDE));
    up.p = NULL;
    patadd((char *)&up, 0, sizeof(up), 0);
    if (!(br = (paren ? patcompswitch(1, &dummy) : patcompbranch(&dummy))))
	return 0;
    pattail(br, patnode(P_EXCEND));
    n = patnode(P_NOTHING); /* just so much easier */
    pattail(excsync, n);
    pattail(excl, n);

    return starter;
}

/* Emit a node */

/**/
static long
patnode(long op)
{
    long starter = (Upat)patcode - (Upat)patout;
    union upat up;

    up.l = op;
    patadd((char *)&up, 0, sizeof(union upat), 0);
    return starter;
}

/*
 * insert an operator in front of an already emitted operand:
 * we relocate the operand.  there had better be nothing else after.
 */

/**/
static void
patinsert(long op, int opnd, char *xtra, int sz)
{
    char *src, *dst, *opdst;
    union upat buf, *lptr;

    buf.l = 0;
    patadd((char *)&buf, 0, sizeof(buf), 0);
    if (sz)
	patadd(xtra, 0, sz, 0);
    src = patcode - sizeof(union upat) - sz;
    dst = patcode;
    opdst = patout + opnd * sizeof(union upat);
    while (src > opdst)
	*--dst = *--src;

    /* A cast can't be an lvalue */
    lptr = (Upat)opdst;
    lptr->l = op;
    opdst += sizeof(union upat);
    while (sz--)
	*opdst++ = *xtra++;
}

/* set the 'next' pointer at the end of a node chain */

/**/
static void
pattail(long p, long val)
{
    Upat scan, temp;
    long offset;

    scan = (Upat)patout + p;
    for (;;) {
	if (!(temp = PATNEXT(scan)))
	    break;
	scan = temp;
    }

    offset = (P_OP(scan) == P_BACK)
	? (scan - (Upat)patout) - val : val - (scan - (Upat)patout);

    scan->l |= offset << 8;
}

/* do pattail, but on operand of first argument; nop if operandless */

/**/
static void patoptail(long p, long val)
{
    Upat ptr = (Upat)patout + p;
    int op = P_OP(ptr);
    if (!p || !P_ISBRANCH(ptr))
	return;
    if (op == P_BRANCH)
	pattail(P_OPERAND(p), val);
    else
	pattail(P_OPERAND(p) + 1, val);
}


/*
 * Run a pattern.
 */
static char *patinstart;	/* Start of input string */
static char *patinend;		/* End of input string */
static char *patinput;		/* String input pointer */
static char *patinpath;		/* Full path for use with ~ exclusions */
static int   patinlen;		/* Length of last successful match.
				 * Includes count of Meta characters.
				 */

static char *patbeginp[NSUBEXP];	/* Pointer to backref beginnings */
static char *patendp[NSUBEXP];		/* Pointer to backref ends */
static int parsfound;			/* parentheses (with backrefs) found */

static int globdots;			/* Glob initial dots? */

/*
 * Character functions operating on unmetafied strings.
 */
#ifdef MULTIBYTE_SUPPORT

/* Get a character from the start point in a string */
#define CHARREF(x, y)	charref((x), (y))
static wchar_t
charref(char *x, char *y)
{
    wchar_t wc;
    size_t ret;

    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
	return (wchar_t) STOUC(*x);

    ret = mbrtowc(&wc, x, y-x, &shiftstate);

    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
	/* Error.  Treat as single byte. */
	/* Reset the shift state for next time. */
	memset(&shiftstate, 0, sizeof(shiftstate));
	return (wchar_t) STOUC(*x);
    }

    return wc;
}

/* Get  a pointer to the next character */
#define CHARNEXT(x, y)	charnext((x), (y))
static char *
charnext(char *x, char *y)
{
    wchar_t wc;
    size_t ret;

    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(*x) & 0x80))
	return x + 1;

    ret = mbrtowc(&wc, x, y-x, &shiftstate);

    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
	/* Error.  Treat as single byte. */
	/* Reset the shift state for next time. */
	memset(&shiftstate, 0, sizeof(shiftstate));
	return x + 1;
    }

    /* Nulls here are normal characters */
    return x + (ret ? ret : 1);
}

/* Increment a pointer past the current character. */
#define CHARINC(x, y)	((x) = charnext((x), (y)))


/* Get a character and increment */
#define CHARREFINC(x, y)	charrefinc(&(x), (y))
static wchar_t
charrefinc(char **x, char *y)
{
    wchar_t wc;
    size_t ret;

    if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
	return (wchar_t) STOUC(*(*x)++);

    ret = mbrtowc(&wc, *x, y-*x, &shiftstate);

    if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
	/* Error.  Treat as single byte. */
	/* Reset the shift state for next time. */
	memset(&shiftstate, 0, sizeof(shiftstate));
	return (wchar_t) STOUC(*(*x)++);
    }

    /* Nulls here are normal characters */
    *x += ret ? ret : 1;

    return wc;
}


/*
 * Counter the number of characters between two pointers, smaller first
 *
 * This is used when setting values in parameters, so we obey
 * the MULTIBYTE option (even if it's been overridden locally).
 */
#define CHARSUB(x,y)	charsub(x, y)
static ptrdiff_t
charsub(char *x, char *y)
{
    ptrdiff_t res = 0;
    size_t ret;
    wchar_t wc;

    if (!isset(MULTIBYTE))
	return y - x;

    while (x < y) {
	ret = mbrtowc(&wc, x, y-x, &shiftstate);

	if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
	    /* Error.  Treat remainder as single characters */
	    return res + (y - x);
	}

	/* Treat nulls as normal characters */
	if (!ret)
	    ret = 1;
	res++;
	x += ret;
    }

    return res;
}

#else /* no MULTIBYTE_SUPPORT */

/* Get a character from the start point in a string */
#define CHARREF(x, y)	(STOUC(*(x)))
/* Get  a pointer to the next character */
#define CHARNEXT(x, y)	((x)+1)
/* Increment a pointer past the current character. */
#define CHARINC(x, y)	((x)++)
/* Get a character and increment */
#define CHARREFINC(x, y)	(STOUC(*(x)++))
/* Counter the number of characters between two pointers, smaller first */
#define CHARSUB(x,y)	((y) - (x))

#endif /* MULTIBYTE_SUPPORT */

/*
 * The following need to be accessed in the globbing scanner for
 * a multi-component file path.  See horror story in glob.c.
 */
/**/
int errsfound;				/* Total error count so far */

/**/
int forceerrs;				/* Forced maximum error count */

/**/
void
pattrystart(void)
{
    forceerrs = -1;
    errsfound = 0;
}

/*
 * Test prog against null-terminated, metafied string.
 */

/**/
mod_export int
pattry(Patprog prog, char *string)
{
    return pattryrefs(prog, string, -1, -1, 0, NULL, NULL, NULL);
}

/*
 * Test prog against string of given length, no null termination
 * but still metafied at this point.  offset gives an offset
 * to include in reported match indices
 */

/**/
mod_export int
pattrylen(Patprog prog, char *string, int len, int unmetalen, int offset)
{
    return pattryrefs(prog, string, len, unmetalen, offset, NULL, NULL, NULL);
}

/*
 * Test prog against string with given lengths.  The input
 * string is metafied; stringlen is the raw string length, and
 * unmetalen the number of characters in the original string (some
 * of which may now be metafied).  Either value may be -1
 * to indicate a null-terminated string which will be counted.  Note
 * there may be a severe penalty for this if a lot of matching is done
 * on one string.
 *
 * offset is the position in the original string (not seen by
 * the pattern module) at which we are trying to match.
 * This is added in to the positions recorded in patbeginp and patendp
 * when we are looking for substrings.  Currently this only happens
 * in the parameter substitution code.
 *
 * Note this is a character offset, i.e. a metafied character
 * counts as 1.
 *
 * The last three arguments are used to report the positions for the
 * backreferences. On entry, *nump should contain the maximum number
 * of positions to report.  In this case the match, mbegin, mend
 * arrays are not altered.
 *
 * If nump is NULL but endp is not NULL, then *endp is set to the
 * end position of the match, taking into account patinstart.
 */

/**/
mod_export int
pattryrefs(Patprog prog, char *string, int stringlen, int unmetalen,
	   int patoffset,
	   int *nump, int *begp, int *endp)
{
    int i, maxnpos = 0, ret, needfullpath, unmetalenp;
    int origlen;
    char **sp, **ep, *tryalloced, *ptr;
    char *progstr = (char *)prog + prog->startoff;

    if (nump) {
	maxnpos = *nump;
	*nump = 0;
    }
    /* inherited from domatch, but why, exactly? */
    if (*string == Nularg) {
	string++;
	unmetalen--;
    }

    if (stringlen < 0)
	stringlen = strlen(string);
    origlen = stringlen;

    patflags = prog->flags;
    /*
     * For a top-level ~-exclusion, we will need the full
     * path to exclude, so copy the path so far and append the
     * current test string.
     */
    needfullpath = (patflags & PAT_HAS_EXCLUDP) && pathpos;

    /* Get the length of the full string when unmetafied. */
    if (unmetalen < 0)
	unmetalen = ztrsub(string + stringlen, string);
    if (needfullpath)
	unmetalenp = ztrsub(pathbuf + pathpos, pathbuf);
    else
	unmetalenp = 0;

    DPUTS(needfullpath && (patflags & (PAT_PURES|PAT_ANY)),
	  "rum sort of file exclusion");
    /*
     * Partly for efficiency, and partly for the convenience of
     * globbing, we don't unmetafy pure string patterns, and
     * there's no reason to if the pattern is just a *.
     */
    if (!(patflags & (PAT_PURES|PAT_ANY))
	&& (needfullpath || unmetalen != stringlen)) {
	/*
	 * We need to copy if we need to prepend the path so far
	 * (in which case we copy both chunks), or if we have
	 * Meta characters.
	 */
	char *dst;
	int icopy, ncopy;

	dst = tryalloced = zalloc(unmetalen + unmetalenp);

	if (needfullpath) {
	    /* loop twice, copy path buffer first time */
	    ptr = pathbuf;
	    ncopy = unmetalenp;
	} else {
	    /* just loop once, copy string with unmetafication */
	    ptr = string;
	    ncopy = unmetalen;
	}
	for (icopy = 0; icopy < 2; icopy++) {
	    for (i = 0; i < ncopy; i++) {
		if (*ptr == Meta) {
		    ptr++;
		    *dst++ = *ptr++ ^ 32;
		} else {
		    *dst++ = *ptr++;
		}
	    }
	    if (!needfullpath)
		break;
	    /* next time append test string to path so far */
	    ptr = string;
	    ncopy = unmetalen;
	}

	if (needfullpath) {
	    patinstart = tryalloced + unmetalenp;
	    patinpath = tryalloced;
	} else {
	    patinstart = tryalloced;
	    patinpath = NULL;
	}
	stringlen = unmetalen;
    } else {
	patinstart = string;
	tryalloced = patinpath = NULL;
    }

    patinend = patinstart + stringlen;
    /*
     * From now on we do not require NULL termination of
     * the test string.  There should also be no more references
     * to the variable string.
     */

    if (prog->flags & (PAT_PURES|PAT_ANY)) {
	/*
	 * Either we are testing against a pure string,
	 * or we can match anything at all.
	 */
	int ret;
	if (prog->flags & PAT_ANY) {
	    /*
	     * Optimisation for a single "*": always matches
	     * (except for no_glob_dots, see below).
	     */
	    ret = 1;
	} else {
	    /*
	     * Testing a pure string.  See if initial
	     * components match.
	     */
	    int lendiff = stringlen - prog->patmlen;
	    if (lendiff < 0) {
		/* No, the pattern string is too long. */
		ret = 0;
	    } else if (!memcmp(progstr, patinstart, prog->patmlen)) {
		/*
		 * Initial component matches.  Matches either
		 * if lengths are the same or we are not anchored
		 * to the end of the string.
		 */
		ret = !lendiff || (prog->flags & PAT_NOANCH);
	    } else {
		/* No match. */
		ret = 0;
	    }
	}
	if (ret) {
	    /*
	     * For files, we won't match initial "."s unless
	     * glob_dots is set.
	     */
	    if ((prog->flags & PAT_NOGLD) && *patinstart == '.') {
		ret = 0;
	    } else {
		/*
		 * Remember the length in case used for ${..#..} etc.
		 * In this case, we didn't unmetafy the string.
		 */
		patinlen = (int)prog->patmlen;
		/* if matching files, must update globbing flags */
		patglobflags = prog->globend;

		if ((patglobflags & GF_MATCHREF) &&
		    !(patflags & PAT_FILE)) {
		    char *str = ztrduppfx(patinstart, patinlen);
		    char *ptr = patinstart;
		    int mlen = 0;

		    /*
		     * Count the characters.  We're not using CHARSUB()
		     * because the string is still metafied.  We're
		     * not using mb_metastrlen() because that expects
		     * the string to be null terminated.
		     */
		    MB_METACHARINIT();
		    while (ptr < patinstart + patinlen) {
			mlen++;
			ptr += MB_METACHARLEN(ptr);
		    }

		    setsparam("MATCH", str);
		    setiparam("MBEGIN",
			      (zlong)(patoffset + !isset(KSHARRAYS)));
		    setiparam("MEND",
			      (zlong)(mlen + patoffset +
				      !isset(KSHARRAYS) - 1));
		}
	    }
	}

	if (tryalloced)
	    zfree(tryalloced, unmetalen + unmetalenp);

	return ret;
    } else {
	/*
	 * Test for a `must match' string, unless we're scanning for a match
	 * in which case we don't need to do this each time.
	 */
	ret = 1;
	if (!(prog->flags & PAT_SCAN) && prog->mustoff)
	{
	    char *testptr;	/* start pointer into test string */
	    char *teststop;	/* last point from which we can match */
	    char *patptr = (char *)prog + prog->mustoff;
	    int patlen = prog->patmlen;
	    int found = 0;

	    if (patlen > stringlen) {
		/* Too long, can't match. */
		ret = 0;
	    } else {
		teststop = patinend - patlen;

		for (testptr = patinstart; testptr <= teststop; testptr++)
		{
		    if (!memcmp(testptr, patptr, patlen)) {
			found = 1;
			break;
		    }
		}

		if (!found)
		    ret = 0;
	    }
	}
	if (!ret) {
	    if (tryalloced)
		zfree(tryalloced, unmetalen + unmetalenp);
	    return 0;
	}

	patglobflags = prog->globflags;
	if (!(patflags & PAT_FILE)) {
	    forceerrs = -1;
	    errsfound = 0;
	}
	globdots = !(patflags & PAT_NOGLD);
	parsfound = 0;

	patinput = patinstart;

	if (patmatch((Upat)progstr)) {
	    /*
	     * we were lazy and didn't save the globflags if an exclusion
	     * failed, so set it now
	     */
	    patglobflags = prog->globend;

	    /*
	     * Record length of successful match, including Meta
	     * characters.  Do it here so that patmatchlen() can return
	     * it even if we delete the pattern strings.
	     */
	    patinlen = patinput - patinstart;
	    /*
	     * Optimization: if we didn't find any Meta characters
	     * to begin with, we don't need to look for them now.
	     */
	    if (unmetalen != origlen) {
		for (ptr = patinstart; ptr < patinput; ptr++)
		    if (imeta(*ptr))
			patinlen++;
	    }

	    /*
	     * Should we clear backreferences and matches on a failed
	     * match?
	     */
	    if ((patglobflags & GF_MATCHREF) && !(patflags & PAT_FILE)) {
		/*
		 * m flag: for global match.  This carries no overhead
		 * in the pattern matching part.
		 *
		 * Remember the test pattern is already unmetafied.
		 */
		char *str;
		int mlen = CHARSUB(patinstart, patinput);

		str = metafy(patinstart, patinput - patinstart, META_DUP);
		setsparam("MATCH", str);
		setiparam("MBEGIN", (zlong)(patoffset + !isset(KSHARRAYS)));
		setiparam("MEND",
			  (zlong)(mlen + patoffset +
				  !isset(KSHARRAYS) - 1));
	    }
	    if (prog->patnpar && nump) {
		/*
		 * b flag: for backreferences using parentheses. Reported
		 * directly.
		 */
		*nump = prog->patnpar;

		sp = patbeginp;
		ep = patendp;

		for (i = 0; i < prog->patnpar && i < maxnpos; i++) {
		    if (parsfound & (1 << i)) {
			if (begp)
			    *begp++ = CHARSUB(patinstart, *sp) + patoffset;
			if (endp)
			    *endp++ = CHARSUB(patinstart, *ep) + patoffset
				- 1;
		    } else {
			if (begp)
			    *begp++ = -1;
			if (endp)
			    *endp++ = -1;
		    }

		    sp++;
		    ep++;
		}
	    } else if (prog->patnpar && !(patflags & PAT_FILE)) {
		/*
		 * b flag: for backreferences using parentheses.
		 */
		int palen = prog->patnpar+1;
		char **matcharr, **mbeginarr, **mendarr;
		char numbuf[DIGBUFSIZE];

		matcharr = zshcalloc(palen*sizeof(char *));
		mbeginarr = zshcalloc(palen*sizeof(char *));
		mendarr = zshcalloc(palen*sizeof(char *));

		sp = patbeginp;
		ep = patendp;

		for (i = 0; i < prog->patnpar; i++) {
		    if (parsfound & (1 << i)) {
			matcharr[i] = metafy(*sp, *ep - *sp, META_DUP);
			/*
			 * mbegin and mend give indexes into the string
			 * in the standard notation, i.e. respecting
			 * KSHARRAYS, and with the end index giving
			 * the last character, not one beyond.
			 * For example, foo=foo; [[ $foo = (f)oo ]] gives
			 * (without KSHARRAYS) indexes 1 and 1, which
			 * corresponds to indexing as ${foo[1,1]}.
			 */
			sprintf(numbuf, "%ld",
				(long)(CHARSUB(patinstart, *sp) +
				       patoffset +
				       !isset(KSHARRAYS)));
			mbeginarr[i] = ztrdup(numbuf);
			sprintf(numbuf, "%ld",
				(long)(CHARSUB(patinstart, *ep) +
				       patoffset +
				       !isset(KSHARRAYS) - 1));
			mendarr[i] = ztrdup(numbuf);
		    } else {
			/* Pattern wasn't set: either it was in an
			 * unmatched branch, or a hashed parenthesis
			 * that didn't match at all.
			 */
			matcharr[i] = ztrdup("");
			mbeginarr[i] = ztrdup("-1");
			mendarr[i] = ztrdup("-1");
		    }
		    sp++;
		    ep++;
		}
		setaparam("match", matcharr);
		setaparam("mbegin", mbeginarr);
		setaparam("mend", mendarr);
	    }

	    if (!nump && endp) {
		/*
		 * We just need the overall end position.
		 */
		*endp = CHARSUB(patinstart, patinput) + patoffset;
	    }

	    ret = 1;
	} else
	    ret = 0;

	if (tryalloced)
	    zfree(tryalloced, unmetalen + unmetalenp);

	return ret;
    }
}

/*
 * Return length of previous succesful match.  This is
 * in metafied bytes, i.e. includes a count of Meta characters.
 * Unusual and futile attempt at modular encapsulation.
 */

/**/
int
patmatchlen(void)
{
    return patinlen;
}

/*
 * Match literal characters with case insensitivity test:  the first
 * comes from the input string, the second the current pattern.
 */
#ifdef MULTIBYTE_SUPPORT
#define ISUPPER(x)	iswupper(x)
#define ISLOWER(x)	iswlower(x)
#define TOUPPER(x)	towupper(x)
#define TOLOWER(x)	towlower(x)
#define ISDIGIT(x)	iswdigit(x)
#else
#define ISUPPER(x)	isupper(x)
#define ISLOWER(x)	islower(x)
#define TOUPPER(x)	toupper(x)
#define TOLOWER(x)	tolower(x)
#define ISDIGIT(x)	idigit(x)
#endif
#define CHARMATCH(chin, chpa) (chin == chpa || \
        ((patglobflags & GF_IGNCASE) ? \
	 ((ISUPPER(chin) ? TOLOWER(chin) : chin) == \
	  (ISUPPER(chpa) ? TOLOWER(chpa) : chpa)) : \
	 (patglobflags & GF_LCMATCHUC) ? \
	 (ISLOWER(chpa) && TOUPPER(chpa) == chin) : 0))

/*
 * The same but caching an expression from the first argument,
 * Requires local charmatch_cache definition.
 */
#define CHARMATCH_EXPR(expr, chpa) \
	(charmatch_cache = (expr), CHARMATCH(charmatch_cache, chpa))

/*
 * exactpos is used to remember how far down an exact string we have
 * matched, if we are doing approximation and can therefore redo from
 * the same point; we never need to otherwise.
 *
 * exactend is a pointer to the end of the string, which isn't
 * null-terminated.
 */
static char *exactpos, *exactend;

/*
 * Main matching routine.
 *
 * Testing the tail end of a match is usually done by recursion, but
 * we try to eliminate that in favour of looping for simple cases.
 */

/**/
static int
patmatch(Upat prog)
{
    /* Current and next nodes */
    Upat scan = prog, next, opnd;
    char *start, *save, *chrop, *chrend, *compend;
    int savglobflags, op, no, min, fail = 0, saverrsfound;
    zrange_t from, to, comp;
    patint_t nextch;

    while  (scan) {
	next = PATNEXT(scan);

	if (!globdots && P_NOTDOT(scan) && patinput == patinstart &&
	    patinput < patinend && *patinput == '.')
	    return 0;

	switch (P_OP(scan)) {
	case P_ANY:
	    if (patinput == patinend)
		fail = 1;
	    else
		CHARINC(patinput, patinend);
	    break;
	case P_EXACTLY:
	    /*
	     * acts as nothing if *chrop is null:  this is used by
	     * approx code.
	     */
	    if (exactpos) {
		chrop = exactpos;
		chrend = exactend;
	    } else {
		chrop = P_LS_STR(scan);
		chrend = chrop + P_LS_LEN(scan);
	    }
	    exactpos = NULL;
	    while (chrop < chrend && patinput < patinend) {
		char *savpatinput = patinput;
		char *savchrop = chrop;
		patint_t chin = CHARREFINC(patinput, patinend);
		patint_t chpa = CHARREFINC(chrop, chrend);
		if (!CHARMATCH(chin, chpa)) {
		    fail = 1;
		    patinput = savpatinput;
		    chrop = savchrop;
		    break;
		}
	    }
	    if (chrop < chrend) {
		exactpos = chrop;
		exactend = chrend;
		fail = 1;
	    }
	    break;
	case P_ANYOF:
	case P_ANYBUT:
	    if (patinput == patinend)
		fail = 1;
	    else {
#ifdef MULTIBYTE_SUPPORT
		wchar_t cr = CHARREF(patinput, patinend);
		char *scanop = (char *)P_OPERAND(scan);
		if (patglobflags & GF_MULTIBYTE) {
		    if (mb_patmatchrange(scanop, cr, NULL, NULL) ^
			(P_OP(scan) == P_ANYOF))
			fail = 1;
		    else
			CHARINC(patinput, patinend);
		} else if (patmatchrange(scanop, (int)cr, NULL, NULL) ^
			   (P_OP(scan) == P_ANYOF))
		    fail = 1;
		else
		    CHARINC(patinput, patinend);
#else
		if (patmatchrange((char *)P_OPERAND(scan),
				  CHARREF(patinput, patinend), NULL, NULL) ^
		    (P_OP(scan) == P_ANYOF))
		    fail = 1;
		else
		    CHARINC(patinput, patinend);
#endif
	    }
	    break;
	case P_NUMRNG:
	case P_NUMFROM:
	case P_NUMTO:
	    /*
	     * To do this properly, we really have to treat numbers as
	     * closures:  that's so things like <1-1000>33 will
	     * match 633 (they didn't up to 3.1.6).  To avoid making this
	     * too inefficient, we see if there's an exact match next:
	     * if there is, and it's not a digit, we return 1 after
	     * the first attempt.
	     */
	    op = P_OP(scan);
	    start = (char *)P_OPERAND(scan);
	    from = to = 0;
	    if (op != P_NUMTO) {
#ifdef ZSH_64_BIT_TYPE
		/* We can't rely on pointer alignment being good enough. */
		memcpy((char *)&from, start, sizeof(zrange_t));
#else
		from = *((zrange_t *) start);
#endif
		start += sizeof(zrange_t);
	    }
	    if (op != P_NUMFROM) {
#ifdef ZSH_64_BIT_TYPE
		memcpy((char *)&to, start, sizeof(zrange_t));
#else
		to = *((zrange_t *) start);
#endif
	    }
	    start = compend = patinput;
	    comp = 0;
	    while (patinput < patinend && idigit(*patinput)) {
		if (comp)
		    comp *= 10;
		comp += *patinput - '0';
		patinput++;
		compend++;

		if (comp & ((zrange_t)1 << (sizeof(comp)*8 -
#ifdef ZRANGE_T_IS_SIGNED
					    2
#else
					    1
#endif
				))) {
		    /*
		     * Out of range (allowing for signedness, which
		     * we need if we are using zlongs).
		     * This is as far as we can go.
		     * If we're doing a range "from", skip all the
		     * remaining numbers.  Otherwise, we can't
		     * match beyond the previous point anyway.
		     * Leave the pointer to the last calculated
		     * position (compend) where it was before.
		     */
		    if (op == P_NUMFROM) {
			while (patinput < patinend && idigit(*patinput))
			    patinput++;
		    }
		}
	    }
	    save = patinput;
	    no = 0;
	    while (patinput > start) {
		/* if already too small, no power on earth can save it */
		if (comp < from && patinput <= compend)
		    break;
		if ((op == P_NUMFROM || comp <= to) && patmatch(next)) {
		    return 1;
		}
		if (!no && P_OP(next) == P_EXACTLY &&
		    (!P_LS_LEN(next) ||
		     !idigit(STOUC(*P_LS_STR(next)))) &&
		    !(patglobflags & 0xff))
		    return 0;
		patinput = --save;
		no++;
		/*
		 * With a range start and an unrepresentable test
		 * number, we just back down the test string without
		 * changing the number until we get to a representable
		 * one.
		 */
		if (patinput < compend)
		    comp /= 10;
	    }
	    patinput = start;
	    fail = 1;
	    break;
	case P_NUMANY:
	    /* This is <->: any old set of digits, don't bother comparing */
	    start = patinput;
	    while (patinput < patinend && idigit(*patinput))
		patinput++;
	    save = patinput;
	    no = 0;
	    while (patinput > start) {
		if (patmatch(next))
		    return 1;
		if (!no && P_OP(next) == P_EXACTLY &&
		    (!P_LS_LEN(next) ||
		     !idigit(*P_LS_STR(next))) &&
		    !(patglobflags & 0xff))
		    return 0;
		patinput = --save;
		no++;
	    }
	    patinput = start;
	    fail = 1;
	    break;
	case P_NOTHING:
	    break;
	case P_BACK:
	    break;
	case P_GFLAGS:
	    patglobflags = P_OPERAND(scan)->l;
	    break;
	case P_OPEN:
	case P_OPEN+1:
	case P_OPEN+2:
	case P_OPEN+3:
	case P_OPEN+4:
	case P_OPEN+5:
	case P_OPEN+6:
	case P_OPEN+7:
	case P_OPEN+8:
	case P_OPEN+9:
	    no = P_OP(scan) - P_OPEN;
	    save = patinput;

	    if (patmatch(next)) {
		/*
		 * Don't set patbeginp if some later invocation of
		 * the same parentheses already has.
		 */
		if (no && !(parsfound & (1 << (no - 1)))) {
		    patbeginp[no-1] = save;
		    parsfound |= 1 << (no - 1);
		}
		return 1;
	    } else
		return 0;
	    break;
	case P_CLOSE:
	case P_CLOSE+1:
	case P_CLOSE+2:
	case P_CLOSE+3:
	case P_CLOSE+4:
	case P_CLOSE+5:
	case P_CLOSE+6:
	case P_CLOSE+7:
	case P_CLOSE+8:
	case P_CLOSE+9:
	    no = P_OP(scan) - P_CLOSE;
	    save = patinput;

	    if (patmatch(next)) {
		if (no && !(parsfound & (1 << (no + 15)))) {
		    patendp[no-1] = save;
		    parsfound |= 1 << (no + 15);
		}
		return 1;
	    } else
		return 0;
	    break;
	case P_EXCSYNC:
	    /* See the P_EXCLUDE code below for where syncptr comes from */
	    {
		unsigned char *syncptr;
		Upat after;
		after = P_OPERAND(scan);
		DPUTS(!P_ISEXCLUDE(after),
		      "BUG: EXCSYNC not followed by EXCLUDE.");
		DPUTS(!P_OPERAND(after)->p,
		      "BUG: EXCSYNC not handled by EXCLUDE");
		syncptr = P_OPERAND(after)->p + (patinput - patinstart);
		/*
		 * If we already matched from here, this time we fail.
		 * See WBRANCH code for story about error count.
		 */
		if (*syncptr && errsfound + 1 >= *syncptr)
		    return 0;
		/*
		 * Else record that we (possibly) matched this time.
		 * No harm if we don't:  then the previous test will just
		 * short cut the attempted match that is bound to fail.
		 * We never try to exclude something that has already
		 * failed anyway.
		 */
		*syncptr = errsfound + 1;
	    }
	    break;
	case P_EXCEND:
	    /*
	     * This is followed by a P_EXCSYNC, but only in the P_EXCLUDE
	     * branch.  Actually, we don't bother following it:  all we
	     * need to know is that we successfully matched so far up
	     * to the end of the asserted pattern; the endpoint
	     * in the target string is nulled out.
	     */
	    if (!(fail = (patinput < patinend)))
		return 1;
	    break;
	case P_BRANCH:
	case P_WBRANCH:
	    /* P_EXCLUDE shouldn't occur without a P_BRANCH */
	    if (!P_ISBRANCH(next)) {
		/* no choice, avoid recursion */
		DPUTS(P_OP(scan) == P_WBRANCH,
		      "BUG: WBRANCH with no alternative.");
		next = P_OPERAND(scan);
	    } else {
		do {
		    save = patinput;
		    savglobflags = patglobflags;
		    saverrsfound = errsfound;
		    if (P_ISEXCLUDE(next)) {
			/*
			 * The strategy is to test the asserted pattern,
			 * recording via P_EXCSYNC how far the part to
			 * be excluded matched.  We then set the
			 * length of the test string to that
			 * point and see if the exclusion as far as
			 * P_EXCEND also matches that string.
			 * We need to keep testing the asserted pattern
			 * by backtracking, since the first attempt
			 * may be excluded while a later attempt may not.
			 * For this we keep a pointer just after
			 * the P_EXCLUDE which is tested by the P_EXCSYNC
			 * to see if we matched there last time, in which
			 * case we fail.  If there is nothing to backtrack
			 * over, that doesn't matter:  we should fail anyway.
			 * The pointer also tells us where the asserted
			 * pattern matched for use by the exclusion.
			 *
			 * It's hard to allocate space for this
			 * beforehand since we may need to do it
			 * recursively.
			 *
			 * P.S. in case you were wondering, this code
			 * is horrible.
			 */
			Upat syncstrp;
			char *origpatinend;
			unsigned char *oldsyncstr;
			char *matchpt = NULL;
			int ret, savglobdots, matchederrs = 0;
			int savparsfound = parsfound;
			DPUTS(P_OP(scan) == P_WBRANCH,
			      "BUG: excluded WBRANCH");
			syncstrp = P_OPERAND(next);
			/*
			 * Unlike WBRANCH, each test at the same exclude
			 * sync point (due to an external loop) is separate,
			 * i.e testing (foo~bar)# is no different from
			 * (foo~bar)(foo~bar)... from the exclusion point
			 * of view, so we use a different sync string.
			 */
			oldsyncstr = syncstrp->p;
			syncstrp->p = (unsigned char *)
			    zshcalloc((patinend - patinstart) + 1);
			origpatinend = patinend;
			while ((ret = patmatch(P_OPERAND(scan)))) {
			    unsigned char *syncpt;
			    char *savpatinstart;
			    int savforce = forceerrs;
			    int savpatflags = patflags, synclen;
			    forceerrs = -1;
			    savglobdots = globdots;
			    matchederrs = errsfound;
			    matchpt = patinput;    /* may not be end */
			    globdots = 1;	   /* OK to match . first */
			    /* Find the point where the scan
			     * matched the part to be excluded: because
			     * of backtracking, the one
			     * most recently matched will be the first.
			     * (Luckily, backtracking is done after all
			     * possibilities for approximation have been
			     * checked.)
			     */
			    for (syncpt = syncstrp->p; !*syncpt; syncpt++)
				;
			    synclen = syncpt - syncstrp->p;
			    if (patinstart + synclen != patinend) {
				/*
				 * Temporarily mark the string as
				 * ending at this point.
				 */
				DPUTS(patinstart + synclen > matchpt,
				      "BUG: EXCSYNC failed");

				patinend = patinstart + synclen;
				/*
				 * If this isn't really the end of the string,
				 * remember this for the (#e) assertion.
				 */
				patflags |= PAT_NOTEND;
			    }
			    savpatinstart = patinstart;
			    next = PATNEXT(scan);
			    while (next && P_ISEXCLUDE(next)) {
				patinput = save;
				/*
				 * turn off approximations in exclusions:
				 * note we keep remaining patglobflags
				 * set by asserted branch (or previous
				 * excluded branches, for consistency).
				 */
				patglobflags &= ~0xff;
				errsfound = 0;
				opnd = P_OPERAND(next) + 1;
				if (P_OP(next) == P_EXCLUDP && patinpath) {
				    /*
				     * Top level exclusion with a file,
				     * applies to whole path so add the
				     * segments already matched.
				     * We copied these in front of the
				     * test pattern, so patinend doesn't
				     * need moving.
				     */
				    DPUTS(patinput != patinstart,
					  "BUG: not at start excluding path");
				    patinput = patinstart = patinpath;
				}
				if (patmatch(opnd)) {
				    ret = 0;
				    /*
				     * Another subtlety: if we exclude the
				     * match, any parentheses just found
				     * become invalidated.
				     */
				    parsfound = savparsfound;
				}
				if (patinpath) {
				    patinput = savpatinstart +
					(patinput - patinstart);
				    patinstart = savpatinstart;
				}
				if (!ret)
				    break;
				next = PATNEXT(next);
			    }
			    /*
			     * Restore original end position.
			     */
			    patinend = origpatinend;
			    patflags = savpatflags;
			    globdots = savglobdots;
			    forceerrs = savforce;
			    if (ret)
				break;
			    patinput = save;
			    patglobflags = savglobflags;
			    errsfound = saverrsfound;
			}
			zfree((char *)syncstrp->p,
			      (patinend - patinstart) + 1);
			syncstrp->p = oldsyncstr;
			if (ret) {
			    patinput = matchpt;
			    errsfound = matchederrs;
			    return 1;
			}
			while ((scan = PATNEXT(scan)) &&
			       P_ISEXCLUDE(scan))
			    ;
		    } else {
			int ret = 1, pfree = 0;
			Upat ptrp = NULL;
			unsigned char *ptr;
			if (P_OP(scan) == P_WBRANCH) {
			    /*
			     * This is where we make sure that we are not
			     * repeatedly matching zero-length strings in
			     * a closure, which would cause an infinite loop,
			     * and also remove exponential behaviour in
			     * backtracking nested closures.
			     * The P_WBRANCH operator leaves a space for a
			     * uchar *, initialized to NULL, which is
			     * turned into a string the same length as the
			     * target string.  Every time we match from a
			     * particular point in the target string, we
			     * stick a 1 at the corresponding point here.
			     * If we come round to the same branch again, and
			     * there is already a 1, then the test fails.
			     */
			    opnd = P_OPERAND(scan);
			    ptrp = opnd++;
			    if (!ptrp->p) {
				ptrp->p = (unsigned char *)
				    zshcalloc((patinend - patinstart) + 1);
				pfree = 1;
			    }
			    ptr = ptrp->p + (patinput - patinstart);

			    /*
			     * Without approximation, this is just a
			     * single bit test.  With approximation, we
			     * need to know how many errors there were
			     * last time we made the test.  If errsfound
			     * is now smaller than it was, hence we can
			     * make more approximations in the remaining
			     * code, we continue with the test.
			     * (This is why the max number of errors is
			     * 254, not 255.)
			     */
			    if (*ptr && errsfound + 1 >= *ptr)
				ret = 0;
			    *ptr = errsfound + 1;
			} else
			    opnd = P_OPERAND(scan);
			if (ret)
			    ret = patmatch(opnd);
			if (pfree) {
			    zfree((char *)ptrp->p,
				  (patinend - patinstart) + 1);
			    ptrp->p = NULL;
			}
			if (ret)
			    return 1;
			scan = PATNEXT(scan);
		    }
		    patinput = save;
		    patglobflags = savglobflags;
		    errsfound = saverrsfound;
		    DPUTS(P_OP(scan) == P_WBRANCH,
			  "BUG: WBRANCH not first choice.");
		    next = PATNEXT(scan);
		} while (scan && P_ISBRANCH(scan));
		return 0;
	    }
	    break;
	case P_STAR:
	    /* Handle specially for speed, although really P_ONEHASH+P_ANY */
	case P_ONEHASH:
	case P_TWOHASH:
	    /*
	     * This is just simple cases, matching one character.
	     * With approximations, we still handle * this way, since
	     * no approximation is ever necessary, but other closures
	     * are handled by the more complicated branching method
	     */
	    op = P_OP(scan);
	    /* Note that no counts possibly metafied characters */
	    start = patinput;
	    {
		char *lastcharstart;
		/*
		 * Array to record the start of characters for
		 * backtracking.
		 */
		VARARR(char, charstart, patinend-patinput);
		memset(charstart, 0, patinend-patinput);

		if (op == P_STAR) {
		    for (no = 0; patinput < patinend;
			 CHARINC(patinput, patinend))
		    {
			charstart[patinput-start] = 1;
			no++;
		    }
		    /* simple optimization for reasonably common case */
		    if (P_OP(next) == P_END)
			return 1;
		} else {
		    DPUTS(patglobflags & 0xff,
			  "BUG: wrong backtracking with approximation.");
		    if (!globdots && P_NOTDOT(P_OPERAND(scan)) &&
			patinput == patinstart && patinput < patinend &&
			CHARREF(patinput, patinend) == ZWC('.'))
			return 0;
		    no = patrepeat(P_OPERAND(scan), charstart);
		}
		min = (op == P_TWOHASH) ? 1 : 0;
		/*
		 * Lookahead to avoid useless matches. This is not possible
		 * with approximation.
		 */
		if (P_OP(next) == P_EXACTLY && P_LS_LEN(next) &&
		    !(patglobflags & 0xff)) {
		    char *nextop = P_LS_STR(next);
#ifdef MULTIBYTE_SUPPORT
		    /* else second argument of CHARREF isn't used */
		    int nextlen = P_LS_LEN(next);
#endif
		    /*
		     * If that P_EXACTLY is last (common in simple patterns,
		     * such as *.c), then it can be only be matched at one
		     * point in the test string, so record that.
		     */
		    if (P_OP(PATNEXT(next)) == P_END &&
			!(patflags & PAT_NOANCH)) {
			int ptlen = patinend - patinput;
			int lenmatch = patinend -
			    (min ? CHARNEXT(start, patinend) : start);
			/* Are we in the right range? */
			if (P_LS_LEN(next) > lenmatch ||
			    P_LS_LEN(next) < ptlen)
			    return 0;
			/* Yes, just position appropriately and test. */
			patinput += ptlen - P_LS_LEN(next);
			/*
			 * Here we will need to be careful that patinput is not
			 * in the middle of a multibyte character.
			 */
			/* Continue loop with P_EXACTLY test. */
			break;
		    }
		    nextch = CHARREF(nextop, nextop + nextlen);
		} else
		    nextch = PEOF;
		savglobflags = patglobflags;
		saverrsfound = errsfound;
		lastcharstart = charstart + (patinput - start);
		if (no >= min) {
		    for (;;) {
			patint_t charmatch_cache;
			if (nextch == PEOF ||
			    (patinput < patinend &&
			     CHARMATCH_EXPR(CHARREF(patinput, patinend),
					    nextch))) {
			    if (patmatch(next))
				return 1;
			}
			if (--no < min)
			    break;
			/* find start of previous full character */
			while (!*--lastcharstart)
			    DPUTS(lastcharstart < charstart,
				  "lastcharstart invalid");
			patinput = start + (lastcharstart-charstart);
			patglobflags = savglobflags;
			errsfound = saverrsfound;
		    }
		}
	    }
	    /*
	     * As with branches, the patmatch(next) stuff for *
	     * handles approximation, so we don't need to try
	     * anything here.
	     */
	    return 0;
	case P_ISSTART:
	    if (patinput != patinstart || (patflags & PAT_NOTSTART))
		fail = 1;
	    break;
	case P_ISEND:
	    if (patinput < patinend || (patflags & PAT_NOTEND))
		fail = 1;
	    break;
	case P_COUNTSTART:
	    {
		/*
		 * Save and restore the current count and the
		 * start pointer in case the pattern has been
		 * executed by a previous repetition of a
		 * closure.
		 */
		long *curptr = &P_OPERAND(scan)[P_CT_CURRENT].l;
		long savecount = *curptr;
		unsigned char *saveptr = scan[P_CT_PTR].p;
		int ret;

		*curptr = 0L;
		ret = patmatch(P_OPERAND(scan));
		*curptr = savecount;
		scan[P_CT_PTR].p = saveptr;
		return ret;
	    }
	case P_COUNT:
	    {
		/* (#cN,M): execution is relatively straightforward */
		long cur = scan[P_CT_CURRENT].l;
		long min = scan[P_CT_MIN].l;
		long max = scan[P_CT_MAX].l;

		if (cur && cur >= min &&
		    (unsigned char *)patinput == scan[P_CT_PTR].p) {
		    /*
		     * Not at the first attempt to match so
		     * the previous attempt managed zero length.
		     * We can do this indefinitely so there's
		     * no point in going on.  Simply try to
		     * match the remainder of the pattern.
		     */
		    return patmatch(next);
		}
		scan[P_CT_PTR].p = (unsigned char *)patinput;

		if (max < 0 || cur < max) {
		    char *patinput_thistime = patinput;
		    scan[P_CT_CURRENT].l = cur + 1;
		    if (patmatch(scan + P_CT_OPERAND))
			return 1;
		    patinput = patinput_thistime;
		}
		if (cur < min)
		    return 0;
		return patmatch(next);
	    }
	case P_END:
	    if (!(fail = (patinput < patinend && !(patflags & PAT_NOANCH))))
		return 1;
	    break;
#ifdef DEBUG
	default:
	    dputs("BUG: bad operand in patmatch.");
	    return 0;
	    break;
#endif
	}

	if (fail) {
	    if (errsfound < (patglobflags & 0xff) &&
		(forceerrs == -1 || errsfound < forceerrs)) {
		/*
		 * Approximation code.  There are four possibilities
		 *
		 * 1. omit character from input string
		 * 2. transpose characters in input and pattern strings
		 * 3. omit character in both input and pattern strings
		 * 4. omit character from pattern string.
		 *
		 * which we try in that order.
		 *
		 * Of these, 2, 3 and 4 require an exact match string
		 * (P_EXACTLY) while 1, 2 and 3 require that we not
		 * have reached the end of the input string.
		 *
		 * Note in each case after making the approximation we
		 * need to retry the *same* pattern; this is what
		 * requires exactpos, a slightly doleful way of
		 * communicating with the exact character matcher.
		 */
		char *savexact = exactpos;
		save = patinput;
		savglobflags = patglobflags;
		saverrsfound = ++errsfound;
		fail = 0;

		DPUTS(P_OP(scan) != P_EXACTLY && exactpos,
		      "BUG: non-exact match has set exactpos");

		/* Try omitting a character from the input string */
		if (patinput < patinend) {
		    CHARINC(patinput, patinend);
		    /* If we are not on an exact match, then this is
		     * our last gasp effort, so we can optimize out
		     * the recursive call.
		     */
		    if (P_OP(scan) != P_EXACTLY)
			continue;
		    if (patmatch(scan))
			return 1;
		}

		if (P_OP(scan) == P_EXACTLY) {
		    char *nextexact = savexact;
		    DPUTS(!savexact,
			  "BUG: exact match has not set exactpos");
		    CHARINC(nextexact, exactend);

		    if (save < patinend) {
			char *nextin = save;
			CHARINC(nextin, patinend);
			patglobflags = savglobflags;
			errsfound = saverrsfound;
			exactpos = savexact;

			/*
			 * Try swapping two characters in patinput and
			 * exactpos
			 */
			if (save < patinend && nextin < patinend &&
			    nextexact < exactend) {
			    patint_t cin0 = CHARREF(save, patinend);
			    patint_t cpa0 = CHARREF(exactpos, exactend);
			    patint_t cin1 = CHARREF(nextin, patinend);
			    patint_t cpa1 = CHARREF(nextexact, exactend);

			    if (CHARMATCH(cin0, cpa1) &&
				CHARMATCH(cin1, cpa0)) {
				patinput = nextin;
				CHARINC(patinput, patinend);
				exactpos = nextexact;
				CHARINC(exactpos, exactend);
				if (patmatch(scan))
				    return 1;

				patglobflags = savglobflags;
				errsfound = saverrsfound;
			    }
			}

			/*
			 * Try moving up both strings.
			 */
			patinput = nextin;
			exactpos = nextexact;
			if (patmatch(scan))
			    return 1;

			patinput = save;
			patglobflags = savglobflags;
			errsfound = saverrsfound;
			exactpos = savexact;
		    }

		    DPUTS(exactpos == exactend, "approximating too far");
		    /*
		     * Try moving up the exact match pattern.
		     * This must be the last attempt, so just loop
		     * instead of calling recursively.
		     */
		    CHARINC(exactpos, exactend);
		    continue;
		}
	    }
	    exactpos = NULL;
	    return 0;
	}

	scan = next;
    }

    return 0;
}


/**/
#ifdef MULTIBYTE_SUPPORT

/*
 * See if character ch matches a pattern range specification.
 * The null-terminated specification is in range; the test
 * character is in ch.
 *
 * indptr is used by completion matching, which is why this
 * function is exported.  If indptr is not NULL we set *indptr
 * to the index of the character in the range string, adjusted
 * in the case of "A-B" ranges such that A would count as its
 * normal index (say IA), B would count as IA + (B-A), and any
 * character within the range as appropriate.  We're not strictly
 * guaranteed this fits within a wint_t, but if this is Unicode
 * in 32 bits we have a fair amount of distance left over.
 *
 * mtp is used in the same circumstances.  *mtp returns the match type:
 * 0 for a standard character, else the PP_ index.  It's not
 * useful if the match failed.
 */

/**/
mod_export int
mb_patmatchrange(char *range, wchar_t ch, wint_t *indptr, int *mtp)
{
    wchar_t r1, r2;

    if (indptr)
	*indptr = 0;
    /*
     * Careful here: unlike other strings, range is a NULL-terminated,
     * metafied string, because we need to treat the Posix and hyphenated
     * ranges specially.
     */
    while (*range) {
	if (imeta(STOUC(*range))) {
	    int swtype = STOUC(*range++) - STOUC(Meta);
	    if (mtp)
		*mtp = swtype;
	    switch (swtype) {
	    case 0:
		/* ordinary metafied character */
		range--;
		if (metacharinc(&range) == ch)
		    return 1;
		break;
	    case PP_ALPHA:
		if (iswalpha(ch))
		    return 1;
		break;
	    case PP_ALNUM:
		if (iswalnum(ch))
		    return 1;
		break;
	    case PP_ASCII:
		if ((ch & ~0x7f) == 0)
		    return 1;
		break;
	    case PP_BLANK:
		if (ch == L' ' || ch == L'\t')
		    return 1;
		break;
	    case PP_CNTRL:
		if (iswcntrl(ch))
		    return 1;
		break;
	    case PP_DIGIT:
		if (iswdigit(ch))
		    return 1;
		break;
	    case PP_GRAPH:
		if (iswgraph(ch))
		    return 1;
		break;
	    case PP_LOWER:
		if (iswlower(ch))
		    return 1;
		break;
	    case PP_PRINT:
		if (iswprint(ch))
		    return 1;
		break;
	    case PP_PUNCT:
		if (iswpunct(ch))
		    return 1;
		break;
	    case PP_SPACE:
		if (iswspace(ch))
		    return 1;
		break;
	    case PP_UPPER:
		if (iswupper(ch))
		    return 1;
		break;
	    case PP_XDIGIT:
		if (iswxdigit(ch))
		    return 1;
		break;
	    case PP_IDENT:
		if (wcsitype(ch, IIDENT))
		    return 1;
		break;
	    case PP_IFS:
		if (wcsitype(ch, ISEP))
		    return 1;
		break;
	    case PP_IFSSPACE:
		/* must be ASCII space character */
		if (ch < 128 && iwsep((int)ch))
		    return 1;
		break;
	    case PP_WORD:
		if (wcsitype(ch, IWORD))
		    return 1;
		break;
	    case PP_RANGE:
		r1 = metacharinc(&range);
		r2 = metacharinc(&range);
		if (r1 <= ch && ch <= r2) {
		    if (indptr)
			*indptr += ch - r1;
		    return 1;
		}
		/* Careful not to screw up counting with bogus range */
		if (indptr && r1 < r2) {
		    /*
		     * This gets incremented again below to get
		     * us past the range end.  This is correct.
		     */
		    *indptr += r2 - r1;
		}
		break;
	    case PP_UNKWN:
		DPUTS(1, "BUG: unknown posix range passed through.\n");
		break;
	    default:
		DPUTS(1, "BUG: unknown metacharacter in range.");
		break;
	    }
	} else if (metacharinc(&range) == ch) {
	    if (mtp)
		*mtp = 0;
	    return 1;
	}
	if (indptr)
	    (*indptr)++;
    }
    return 0;
}


#if 0
/*
 * This is effectively the reverse of mb_patmatchrange().
 * Given a range descriptor of the same form, and an index into it,
 * try to determine the character that is matched.  If the index
 * points to a [:...:] generic style match, set chr to WEOF and
 * return the type in mtp instead.  Return 1 if successful, 0 if
 * there was no corresponding index.  Note all pointer arguments
 * must be non-null.
 *
 * TODO: for now the completion matching code does not handle
 * multibyte.  When it does, we will need either this, or
 * patmatchindex(), but not both---unlike user-initiated pattern
 * matching, multibyte mode in the line editor is always on when available.
 */

/**/
mod_export int
mb_patmatchindex(char *range, wint_t ind, wint_t *chr, int *mtp)
{
    wchar_t r1, r2, rchr;
    wint_t rdiff;

    *chr = WEOF;
    *mtp = 0;

    while (*range) {
	if (imeta(STOUC(*range))) {
	    int swtype = STOUC(*range++) - STOUC(Meta);
	    switch (swtype) {
	    case 0:
		range--;
		rchr = metacharinc(&range);
		if (!ind) {
		    *chr = (wint_t) rchr;
		    return 1;
		}
		break;

	    case PP_ALPHA:
	    case PP_ALNUM:
	    case PP_ASCII:
	    case PP_BLANK:
	    case PP_CNTRL:
	    case PP_DIGIT:
	    case PP_GRAPH:
	    case PP_LOWER:
	    case PP_PRINT:
	    case PP_PUNCT:
	    case PP_SPACE:
	    case PP_UPPER:
	    case PP_XDIGIT:
	    case PP_IDENT:
	    case PP_IFS:
	    case PP_IFSSPACE:
	    case PP_WORD:
		if (!ind) {
		    *mtp = swtype;
		    return 1;
		}
		break;

	    case PP_RANGE:
		r1 = metacharinc(&range);
		r2 = metacharinc(&range);
		rdiff = (wint_t)r2 - (wint_t)r1; 
		if (rdiff >= ind) {
		    *chr = (wint_t)r1 + ind;
		    return 1;
		}
		/* note the extra decrement to ind below */
		ind -= rdiff;
		break;
	    case PP_UNKWN:
		DPUTS(1, "BUG: unknown posix range passed through.\n");
		break;
	    default:
		DPUTS(1, "BUG: unknown metacharacter in range.");
		break;
	    }
	} else {
	    rchr = metacharinc(&range);
	    if (!ind) {
		*chr = (wint_t)rchr;
		return 1;
	    }
	}
	if (!ind--)
	    break;
    }

    /* No corresponding index. */
    return 0;
}
#endif

/**/
#endif

/*
 * Identical function to mb_patmatchrange() above for single-byte
 * characters.
 */

/**/
mod_export int
patmatchrange(char *range, int ch, int *indptr, int *mtp)
{
    int r1, r2;

    if (indptr)
	*indptr = 0;
    /*
     * Careful here: unlike other strings, range is a NULL-terminated,
     * metafied string, because we need to treat the Posix and hyphenated
     * ranges specially.
     */
    for (; *range; range++) {
	if (imeta(STOUC(*range))) {
	    int swtype = STOUC(*range) - STOUC(Meta);
	    if (mtp)
		*mtp = swtype;
	    switch (swtype) {
	    case 0:
		if (STOUC(*++range ^ 32) == ch)
		    return 1;
		break;
	    case PP_ALPHA:
		if (isalpha(ch))
		    return 1;
		break;
	    case PP_ALNUM:
		if (isalnum(ch))
		    return 1;
		break;
	    case PP_ASCII:
		if ((ch & ~0x7f) == 0)
		    return 1;
		break;
	    case PP_BLANK:
		if (ch == ' ' || ch == '\t')
		    return 1;
		break;
	    case PP_CNTRL:
		if (iscntrl(ch))
		    return 1;
		break;
	    case PP_DIGIT:
		if (isdigit(ch))
		    return 1;
		break;
	    case PP_GRAPH:
		if (isgraph(ch))
		    return 1;
		break;
	    case PP_LOWER:
		if (islower(ch))
		    return 1;
		break;
	    case PP_PRINT:
		if (isprint(ch))
		    return 1;
		break;
	    case PP_PUNCT:
		if (ispunct(ch))
		    return 1;
		break;
	    case PP_SPACE:
		if (isspace(ch))
		    return 1;
		break;
	    case PP_UPPER:
		if (isupper(ch))
		    return 1;
		break;
	    case PP_XDIGIT:
		if (isxdigit(ch))
		    return 1;
		break;
	    case PP_IDENT:
		if (iident(ch))
		    return 1;
		break;
	    case PP_IFS:
		if (isep(ch))
		    return 1;
		break;
	    case PP_IFSSPACE:
		if (iwsep(ch))
		    return 1;
		break;
	    case PP_WORD:
		if (iword(ch))
		    return 1;
		break;
	    case PP_RANGE:
		range++;
		r1 = STOUC(UNMETA(range));
		METACHARINC(range);
		r2 = STOUC(UNMETA(range));
		if (*range == Meta)
		    range++;
		if (r1 <= ch && ch <= r2) {
		    if (indptr)
			*indptr += ch - r1;
		    return 1;
		}
		if (indptr && r1 < r2)
		    *indptr += r2 - r1;
		break;
	    case PP_UNKWN:
		DPUTS(1, "BUG: unknown posix range passed through.\n");
		break;
	    default:
		DPUTS(1, "BUG: unknown metacharacter in range.");
		break;
	    }
	} else if (STOUC(*range) == ch) {
	    if (mtp)
		*mtp = 0;
	    return 1;
	}
	if (indptr)
	    (*indptr)++;
    }
    return 0;
}

/*
 * Identical function to mb_patmatchindex() above for single-byte
 * characters.  Here -1 represents a character that needs a special type.
 */

/**/
mod_export int
patmatchindex(char *range, int ind, int *chr, int *mtp)
{
    int r1, r2, rdiff, rchr;

    *chr = -1;
    *mtp = 0;

    for (; *range; range++) {
	if (imeta(STOUC(*range))) {
	    int swtype = STOUC(*range) - STOUC(Meta);
	    switch (swtype) {
	    case 0:
		/* ordinary metafied character */
		rchr = STOUC(*++range) ^ 32;
		if (!ind) {
		    *chr = rchr;
		    return 1;
		}
		break;

	    case PP_ALPHA:
	    case PP_ALNUM:
	    case PP_ASCII:
	    case PP_BLANK:
	    case PP_CNTRL:
	    case PP_DIGIT:
	    case PP_GRAPH:
	    case PP_LOWER:
	    case PP_PRINT:
	    case PP_PUNCT:
	    case PP_SPACE:
	    case PP_UPPER:
	    case PP_XDIGIT:
	    case PP_IDENT:
	    case PP_IFS:
	    case PP_IFSSPACE:
	    case PP_WORD:
		if (!ind) {
		    *mtp = swtype;
		    return 1;
		}
		break;

	    case PP_RANGE:
		range++;
		r1 = STOUC(UNMETA(range));
		METACHARINC(range);
		r2 = STOUC(UNMETA(range));
		if (*range == Meta)
		    range++;
		rdiff = r2 - r1; 
		if (rdiff >= ind) {
		    *chr = r1 + ind;
		    return 1;
		}
		/* note the extra decrement to ind below */
		ind -= rdiff;
		break;
	    case PP_UNKWN:
		DPUTS(1, "BUG: unknown posix range passed through.\n");
		break;
	    default:
		DPUTS(1, "BUG: unknown metacharacter in range.");
		break;
	    }
	} else {
	    if (!ind) {
		*chr = STOUC(*range);
		return 1;
	    }
	}
	if (!ind--)
	    break;
    }

    /* No corresponding index. */
    return 0;
}

/*
 * Repeatedly match something simple and say how many times.
 * charstart is an array parallel to that starting at patinput
 * and records the start of (possibly multibyte) characters
 * to aid in later backtracking.
 */

/**/
static int patrepeat(Upat p, char *charstart)
{
    int count = 0;
    patint_t tch, charmatch_cache;
    char *scan, *opnd;

    scan = patinput;
    opnd = (char *)P_OPERAND(p);

    switch(P_OP(p)) {
#ifdef DEBUG
    case P_ANY:
	dputs("BUG: ?# did not get optimized to *");
	return 0;
	break;
#endif
    case P_EXACTLY:
	DPUTS(P_LS_LEN(p) != 1, "closure following more than one character");
	tch = CHARREF(P_LS_STR(p), P_LS_STR(p) + P_LS_LEN(p));
	while (scan < patinend &&
	       CHARMATCH_EXPR(CHARREF(scan, patinend), tch)) {
	    charstart[scan-patinput] = 1;
	    count++;
	    CHARINC(scan, patinend);
	}
	break;
    case P_ANYOF:
    case P_ANYBUT:
	while (scan < patinend) {
#ifdef MULTIBYTE_SUPPORT
	    wchar_t cr = CHARREF(scan, patinend);
	    if (patglobflags & GF_MULTIBYTE) {
		if (mb_patmatchrange(opnd, cr, NULL, NULL) ^
		    (P_OP(p) == P_ANYOF))
		    break;
	    } else if (patmatchrange(opnd, (int)cr, NULL, NULL) ^
		       (P_OP(p) == P_ANYOF))
		break;
#else
	    if (patmatchrange(opnd, CHARREF(scan, patinend), NULL, NULL) ^
		(P_OP(p) == P_ANYOF))
		break;
#endif
	    charstart[scan-patinput] = 1;
	    count++;
	    CHARINC(scan, patinend);
	}
	break;
#ifdef DEBUG
    default:
	dputs("BUG: something very strange is happening in patrepeat");
	return 0;
	break;
#endif
    }

    patinput = scan;
    return count;
}

/* Free a patprog. */

/**/
mod_export void
freepatprog(Patprog prog)
{
    if (prog && prog != dummy_patprog1 && prog != dummy_patprog2)
	zfree(prog, prog->size);
}