--- engine.c.bsdnew 2009-11-11 11:29:04.000000000 -0800 +++ engine.c 2009-11-11 11:30:28.000000000 -0800 @@ -272,7 +272,7 @@ matcher(struct re_guts *g, break; assert(m->coldp < m->endp); m->coldp += XMBRTOWC(NULL, m->coldp, - m->endp - m->coldp, &m->mbs, 0); + m->endp - m->coldp, &m->mbs, 0, g->loc); } if (nmatch == 1 && !g->backrefs) break; /* no further info needed */ @@ -333,7 +333,7 @@ matcher(struct re_guts *g, NOTE("false alarm"); /* recycle starting later */ start = m->coldp + XMBRTOWC(NULL, m->coldp, - stop - m->coldp, &m->mbs, 0); + stop - m->coldp, &m->mbs, 0, g->loc); assert(start <= stop); } @@ -410,7 +410,7 @@ dissect(struct match *m, assert(nope); break; case OCHAR: - sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc); break; case OBOL: case OEOL: @@ -419,7 +419,7 @@ dissect(struct match *m, break; case OANY: case OANYOF: - sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0); + sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0, m->g->loc); break; case OBACK_: case O_BACK: @@ -480,6 +480,10 @@ dissect(struct match *m, sep = ssp; ssp = oldssp; } + else if (tail==rest) { + /* Fix for test expr 105 */ + ssp = oldssp; + } assert(sep == rest); /* must exhaust substring */ assert(slow(m, ssp, sep, ssub, esub) == rest); dp = dissect(m, ssp, sep, ssub, esub); @@ -532,6 +536,14 @@ dissect(struct match *m, i = OPND(m->g->strip[ss]); assert(0 < i && i <= m->g->nsub); m->pmatch[i].rm_so = sp - m->offp; + /* fix for T.regcomp 43: don't remember previous + subexpression matches beyond the current one (i) */ + i++; + while (i<= m->g->nsub) { + m->pmatch[i].rm_so = -1; + m->pmatch[i].rm_eo = -1; + i++; + } break; case ORPAREN: i = OPND(m->g->strip[ss]); @@ -586,14 +598,14 @@ backref(struct match *m, case OCHAR: if (sp == stop) return(NULL); - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); if (wc != OPND(s)) return(NULL); break; case OANY: if (sp == stop) return(NULL); - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); if (wc == BADCHAR) return (NULL); break; @@ -601,8 +613,8 @@ backref(struct match *m, if (sp == stop) return (NULL); cs = &m->g->sets[OPND(s)]; - sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR); - if (wc == BADCHAR || !CHIN(cs, wc)) + sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR, m->g->loc); + if (wc == BADCHAR || !CHIN(cs, wc, m->g->loc)) return(NULL); break; case OBOL: @@ -626,8 +638,8 @@ backref(struct match *m, (sp < m->endp && *(sp-1) == '\n' && (m->g->cflags®_NEWLINE)) || (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) + !ISWORD(*(sp-1), m->g->loc)) ) && + (sp < m->endp && ISWORD(*sp, m->g->loc)) ) { /* yes */ } else return(NULL); @@ -636,8 +648,8 @@ backref(struct match *m, if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || (sp < m->endp && *sp == '\n' && (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) + (sp < m->endp && !ISWORD(*sp, m->g->loc)) ) && + (sp > m->beginp && ISWORD(*(sp-1), m->g->loc)) ) { /* yes */ } else return(NULL); @@ -811,7 +823,7 @@ fast( struct match *m, clen = 0; c = OUT; } else - clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc); if (EQ(st, fresh)) coldp = p; @@ -835,12 +847,12 @@ fast( struct match *m, } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) && + (c != OUT && ISWORD(c, m->g->loc)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) && + (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -865,7 +877,7 @@ fast( struct match *m, assert(coldp != NULL); m->coldp = coldp; if (ISSET(st, stopst)) - return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0)); + return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0, m->g->loc)); else return(NULL); } @@ -916,7 +928,7 @@ slow( struct match *m, c = OUT; clen = 0; } else - clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR); + clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR, m->g->loc); /* is there an EOL and/or BOL between lastc and c? */ flagch = '\0'; @@ -938,12 +950,12 @@ slow( struct match *m, } /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { + if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc, m->g->loc))) && + (c != OUT && ISWORD(c, m->g->loc)) ) { flagch = BOW; } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { + if ( (lastc != OUT && ISWORD(lastc, m->g->loc)) && + (flagch == EOL || (c != OUT && !ISWORD(c, m->g->loc))) ) { flagch = EOW; } if (flagch == BOW || flagch == EOW) { @@ -1033,7 +1045,7 @@ step(struct re_guts *g, break; case OANYOF: cs = &g->sets[OPND(s)]; - if (!NONCHAR(ch) && CHIN(cs, ch)) + if (!NONCHAR(ch) && CHIN(cs, ch, g->loc)) FWD(aft, bef, 1); break; case OBACK_: /* ignored here */