--- engine.c.orig 2004-11-25 11:38:32.000000000 -0800 +++ engine.c 2005-04-18 16:52:09.000000000 -0700 @@ -270,7 +270,7 @@ int eflags; break; assert(m->coldp < m->endp); m->coldp += XMBRTOWC(NULL, m->coldp, - m->endp - m->coldp, &m->mbs, 0); + m->endp - m->coldp, &m->mbs, 0, g->loc); } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ @@ -331,7 +331,7 @@ int eflags; NOTE("false alarm"); /* recycle starting later */ start = m->coldp + XMBRTOWC(NULL, m->coldp, - m->endp - m->coldp, &m->mbs, 0); + m->endp - m->coldp, &m->mbs, 0, g->loc); assert(start <= stop); } @@ -409,7 +409,7 @@ sopno stopst; assert(nope); break; case OCHAR: - sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc); break; case OBOL: case OEOL: @@ -418,7 +418,7 @@ sopno stopst; break; case OANY: case OANYOF: - sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc); break; case OBACK_: case O_BACK: @@ -479,6 +479,10 @@ sopno stopst; sep = ssp; ssp = oldssp; } + else if (tail==rest) { + /* Fix for test expr 105 */ + ssp = oldssp; + } assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); dp = dissect(m, ssp, sep, ssub, esub); @@ -531,6 +535,14 @@ sopno stopst; i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_so = sp - m->offp; + /* fix for T.regcomp 43: don't remember previous + subexpression matches beyond the current one (i) */ + i++; + while (i<= m->g->nsub) { + m->pmatch[i].rm_so = -1; + m->pmatch[i].rm_eo = -1; + i++; + } break; case ORPAREN: i = OPND(m->g->strip[ss]); @@ -585,14 +597,14 @@ sopno lev; /* PLUS nesting level */ case OCHAR: if (sp == stop) return(NULL); - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); if (wc != OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); if (wc == BADCHAR) return (NULL); break; @@ -600,8 +612,8 @@ sopno lev; /* PLUS nesting level */ if (sp == stop) return (NULL); cs = &m->g->sets[OPND(s)]; - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); - if (wc == BADCHAR || !CHIN(cs, wc)) + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); + if (wc == BADCHAR || !CHIN(cs, wc, m->g->loc)) return(NULL); break; case OBOL: @@ -625,8 +637,8 @@ sopno lev; /* PLUS nesting level */ (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) || (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) + !ISWORD(*(sp-1), m->g->loc)) ) && + (sp < m->endp && ISWORD(*sp, m->g->loc)) ) { /* yes */ } else return(NULL); @@ -635,8 +647,8 @@ sopno lev; /* PLUS nesting level */ if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) + (sp < m->endp && !ISWORD(*sp, m->g->loc)) ) && + (sp > m->beginp && ISWORD(*(sp-1), m->g->loc)) ) { /* yes */ } else return(NULL); @@ -807,7 +819,7 @@ sopno stopst; if (p == m->endp) c = OUT; else - clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc); if (EQ(st, fresh)) coldp = p; @@ -831,12 +843,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) && + (c != OUT && ISWORD(c, m->g->loc)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) && + (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -861,7 +873,7 @@ sopno stopst; assert(coldp != NULL); m->coldp = coldp; if (ISSET(st, stopst)) - return(p+XMBRTOWC(NULL, p, m->endp - p, &m->mbs, 0)); + return(p+XMBRTOWC(NULL, p, m->endp - p, &m->mbs, 0, m->g->loc)); else return(NULL); } @@ -913,7 +925,7 @@ sopno stopst; c = OUT; clen = 0; } else - clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc); /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; @@ -935,12 +947,12 @@ sopno stopst; } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) && + (c != OUT && ISWORD(c, m->g->loc)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) && + (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -1031,7 +1043,7 @@ states aft; /* states already known re break; case OANYOF: cs = &g->sets[OPND(s)]; - if (!NONCHAR(ch) && CHIN(cs, ch)) + if (!NONCHAR(ch) && CHIN(cs, ch, g->loc)) FWD(aft, bef, 1); break; case OBACK_: /* ignored here */