#include "config.h"
#include "pcre_internal.h"
#define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
static int
find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
const pcre_uchar *startcode, int options, recurse_check *recurses,
int *countptr)
{
int length = -1;
BOOL utf = (options & PCRE_UTF8) != 0;
BOOL had_recurse = FALSE;
recurse_check this_recurse;
register int branchlength = 0;
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
if ((*countptr)++ > 1000) return -1;
if (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
for (;;)
{
int d, min;
pcre_uchar *cs, *ce;
register pcre_uchar op = *cc;
switch (op)
{
case OP_COND:
case OP_SCOND:
cs = cc + GET(cc, 1);
if (*cs != OP_ALT)
{
cc = cs + 1 + LINK_SIZE;
break;
}
case OP_CBRA:
case OP_SCBRA:
case OP_BRA:
case OP_SBRA:
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_BRAPOS:
case OP_SBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
d = find_minlength(re, cc, startcode, options, recurses, countptr);
if (d < 0) return d;
branchlength += d;
do cc += GET(cc, 1); while (*cc == OP_ALT);
cc += 1 + LINK_SIZE;
break;
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
return -1;
case OP_ALT:
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_KETRPOS:
case OP_END:
if (length < 0 || (!had_recurse && branchlength < length))
length = branchlength;
if (op != OP_ALT) return length;
cc += 1 + LINK_SIZE;
branchlength = 0;
had_recurse = FALSE;
break;
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do cc += GET(cc, 1); while (*cc == OP_ALT);
case OP_REVERSE:
case OP_CREF:
case OP_DNCREF:
case OP_RREF:
case OP_DNRREF:
case OP_DEF:
case OP_CALLOUT:
case OP_SOD:
case OP_SOM:
case OP_EOD:
case OP_EODN:
case OP_CIRC:
case OP_CIRCM:
case OP_DOLL:
case OP_DOLLM:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
cc += PRIV(OP_lengths)[*cc];
break;
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
case OP_SKIPZERO:
cc += PRIV(OP_lengths)[*cc];
do cc += GET(cc, 1); while (*cc == OP_ALT);
cc += 1 + LINK_SIZE;
break;
case OP_CHAR:
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
case OP_PLUS:
case OP_PLUSI:
case OP_MINPLUS:
case OP_MINPLUSI:
case OP_POSPLUS:
case OP_POSPLUSI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
branchlength++;
cc += 2;
#ifdef SUPPORT_UTF
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
branchlength++;
cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
break;
case OP_EXACT:
case OP_EXACTI:
case OP_NOTEXACT:
case OP_NOTEXACTI:
branchlength += GET2(cc,1);
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UTF
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_TYPEEXACT:
branchlength += GET2(cc,1);
cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
|| cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
break;
case OP_PROP:
case OP_NOTPROP:
cc += 2;
case OP_NOT_DIGIT:
case OP_DIGIT:
case OP_NOT_WHITESPACE:
case OP_WHITESPACE:
case OP_NOT_WORDCHAR:
case OP_WORDCHAR:
case OP_ANY:
case OP_ALLANY:
case OP_EXTUNI:
case OP_HSPACE:
case OP_NOT_HSPACE:
case OP_VSPACE:
case OP_NOT_VSPACE:
branchlength++;
cc++;
break;
case OP_ANYNL:
branchlength += 1;
cc++;
break;
case OP_ANYBYTE:
#ifdef SUPPORT_UTF
if (utf) return -1;
#endif
branchlength++;
cc++;
break;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSQUERY:
if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
cc += PRIV(OP_lengths)[op];
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
if (cc[1 + IMM2_SIZE] == OP_PROP
|| cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
cc += PRIV(OP_lengths)[op];
break;
case OP_CLASS:
case OP_NCLASS:
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
if (op == OP_XCLASS)
cc += GET(cc, 1);
else
cc += PRIV(OP_lengths)[OP_CLASS];
#else
cc += PRIV(OP_lengths)[OP_CLASS];
#endif
switch (*cc)
{
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
branchlength++;
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
branchlength += GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
break;
default:
branchlength++;
break;
}
break;
case OP_DNREF:
case OP_DNREFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
int count = GET2(cc, 1+IMM2_SIZE);
pcre_uchar *slot = (pcre_uchar *)re +
re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
d = INT_MAX;
while (count-- > 0)
{
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce)
{
d = 0;
had_recurse = TRUE;
break;
}
else
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL)
{
d = 0;
had_recurse = TRUE;
break;
}
else
{
int dd;
this_recurse.prev = recurses;
this_recurse.group = cs;
dd = find_minlength(re, cs, startcode, options, &this_recurse,
countptr);
if (dd < d) d = dd;
}
}
slot += re->name_entry_size;
}
}
else d = 0;
cc += 1 + 2*IMM2_SIZE;
goto REPEAT_BACK_REFERENCE;
case OP_REF:
case OP_REFI:
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
{
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
if (cs == NULL) return -2;
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce)
{
d = 0;
had_recurse = TRUE;
}
else
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL)
{
d = 0;
had_recurse = TRUE;
}
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
d = find_minlength(re, cs, startcode, options, &this_recurse,
countptr);
}
}
}
else d = 0;
cc += 1 + IMM2_SIZE;
REPEAT_BACK_REFERENCE:
switch (*cc)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
min = 0;
cc++;
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
case OP_CRPOSPLUS:
min = 1;
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
min = GET2(cc, 1);
cc += 1 + 2 * IMM2_SIZE;
break;
default:
min = 1;
break;
}
branchlength += min * d;
break;
case OP_RECURSE:
cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
do ce += GET(ce, 1); while (*ce == OP_ALT);
if (cc > cs && cc < ce)
had_recurse = TRUE;
else
{
recurse_check *r = recurses;
for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
if (r != NULL)
had_recurse = TRUE;
else
{
this_recurse.prev = recurses;
this_recurse.group = cs;
branchlength += find_minlength(re, cs, startcode, options,
&this_recurse, countptr);
}
}
cc += 1 + LINK_SIZE;
break;
case OP_UPTO:
case OP_UPTOI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_MINUPTO:
case OP_MINUPTOI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_POSUPTO:
case OP_POSUPTOI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_STAR:
case OP_STARI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_MINSTAR:
case OP_MINSTARI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_POSSTAR:
case OP_POSSTARI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_QUERY:
case OP_QUERYI:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_MINQUERY:
case OP_MINQUERYI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_POSQUERY:
case OP_POSQUERYI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
cc += PRIV(OP_lengths)[op];
#ifdef SUPPORT_UTF
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
break;
case OP_MARK:
case OP_PRUNE_ARG:
case OP_SKIP_ARG:
case OP_THEN_ARG:
cc += PRIV(OP_lengths)[op] + cc[1];
break;
case OP_CLOSE:
case OP_COMMIT:
case OP_FAIL:
case OP_PRUNE:
case OP_SET_SOM:
case OP_SKIP:
case OP_THEN:
cc += PRIV(OP_lengths)[op];
break;
default:
return -3;
}
}
}
static const pcre_uchar *
set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
compile_data *cd, BOOL utf)
{
pcre_uint32 c = *p;
#ifdef COMPILE_PCRE8
SET_BIT(c);
#ifdef SUPPORT_UTF
if (utf && c > 127)
{
GETCHARINC(c, p);
#ifdef SUPPORT_UCP
if (caseless)
{
pcre_uchar buff[6];
c = UCD_OTHERCASE(c);
(void)PRIV(ord2utf)(c, buff);
SET_BIT(buff[0]);
}
#endif
return p;
}
#else
(void)(utf);
#endif
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
return p + 1;
#endif
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
if (c > 0xff)
{
c = 0xff;
caseless = FALSE;
}
SET_BIT(c);
#ifdef SUPPORT_UTF
if (utf && c > 127)
{
GETCHARINC(c, p);
#ifdef SUPPORT_UCP
if (caseless)
{
c = UCD_OTHERCASE(c);
if (c > 0xff)
c = 0xff;
SET_BIT(c);
}
#endif
return p;
}
#else
(void)(utf);
#endif
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
return p + 1;
#endif
}
static void
set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
compile_data *cd)
{
register pcre_uint32 c;
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (table_limit == 32) return;
for (c = 128; c < 256; c++)
{
if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
{
pcre_uchar buff[6];
(void)PRIV(ord2utf)(c, buff);
SET_BIT(buff[0]);
}
}
#endif
}
static void
set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
compile_data *cd)
{
register pcre_uint32 c;
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
#endif
}
static int
set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
compile_data *cd)
{
register pcre_uint32 c;
int yield = SSB_DONE;
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
int table_limit = utf? 16:32;
#else
int table_limit = 32;
#endif
#if 0
volatile int dummy;
#endif
do
{
BOOL try_next = TRUE;
const pcre_uchar *tcode = code + 1 + LINK_SIZE;
if (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
while (try_next)
{
int rc;
switch(*tcode)
{
default:
return SSB_UNKNOWN;
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
case OP_ALLANY:
case OP_ANY:
case OP_ANYBYTE:
case OP_CIRC:
case OP_CIRCM:
case OP_CLOSE:
case OP_COMMIT:
case OP_COND:
case OP_CREF:
case OP_DEF:
case OP_DNCREF:
case OP_DNREF:
case OP_DNREFI:
case OP_DNRREF:
case OP_DOLL:
case OP_DOLLM:
case OP_END:
case OP_EOD:
case OP_EODN:
case OP_EXTUNI:
case OP_FAIL:
case OP_MARK:
case OP_NOT:
case OP_NOTEXACT:
case OP_NOTEXACTI:
case OP_NOTI:
case OP_NOTMINPLUS:
case OP_NOTMINPLUSI:
case OP_NOTMINQUERY:
case OP_NOTMINQUERYI:
case OP_NOTMINSTAR:
case OP_NOTMINSTARI:
case OP_NOTMINUPTO:
case OP_NOTMINUPTOI:
case OP_NOTPLUS:
case OP_NOTPLUSI:
case OP_NOTPOSPLUS:
case OP_NOTPOSPLUSI:
case OP_NOTPOSQUERY:
case OP_NOTPOSQUERYI:
case OP_NOTPOSSTAR:
case OP_NOTPOSSTARI:
case OP_NOTPOSUPTO:
case OP_NOTPOSUPTOI:
case OP_NOTPROP:
case OP_NOTQUERY:
case OP_NOTQUERYI:
case OP_NOTSTAR:
case OP_NOTSTARI:
case OP_NOTUPTO:
case OP_NOTUPTOI:
case OP_NOT_HSPACE:
case OP_NOT_VSPACE:
case OP_PRUNE:
case OP_PRUNE_ARG:
case OP_RECURSE:
case OP_REF:
case OP_REFI:
case OP_REVERSE:
case OP_RREF:
case OP_SCOND:
case OP_SET_SOM:
case OP_SKIP:
case OP_SKIP_ARG:
case OP_SOD:
case OP_SOM:
case OP_THEN:
case OP_THEN_ARG:
return SSB_FAIL;
case OP_PROP:
if (tcode[1] != PT_CLIST) return SSB_FAIL;
{
const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
while ((c = *p++) < NOTACHAR)
{
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (utf)
{
pcre_uchar buff[6];
(void)PRIV(ord2utf)(c, buff);
c = buff[0];
}
#endif
if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
}
}
try_next = FALSE;
break;
case OP_WORD_BOUNDARY:
case OP_NOT_WORD_BOUNDARY:
tcode++;
break;
case OP_BRA:
case OP_SBRA:
case OP_CBRA:
case OP_SCBRA:
case OP_BRAPOS:
case OP_SBRAPOS:
case OP_CBRAPOS:
case OP_SCBRAPOS:
case OP_ONCE:
case OP_ONCE_NC:
case OP_ASSERT:
rc = set_start_bits(tcode, start_bits, utf, cd);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
if (rc == SSB_DONE) try_next = FALSE; else
{
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
}
break;
case OP_ALT:
yield = SSB_CONTINUE;
try_next = FALSE;
break;
case OP_KET:
case OP_KETRMAX:
case OP_KETRMIN:
case OP_KETRPOS:
return SSB_CONTINUE;
case OP_CALLOUT:
tcode += 2 + 2*LINK_SIZE;
break;
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
case OP_BRAZERO:
case OP_BRAMINZERO:
case OP_BRAPOSZERO:
rc = set_start_bits(++tcode, start_bits, utf, cd);
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
case OP_SKIPZERO:
tcode++;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
case OP_STAR:
case OP_MINSTAR:
case OP_POSSTAR:
case OP_QUERY:
case OP_MINQUERY:
case OP_POSQUERY:
tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
break;
case OP_STARI:
case OP_MINSTARI:
case OP_POSSTARI:
case OP_QUERYI:
case OP_MINQUERYI:
case OP_POSQUERYI:
tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
break;
case OP_UPTO:
case OP_MINUPTO:
case OP_POSUPTO:
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
break;
case OP_UPTOI:
case OP_MINUPTOI:
case OP_POSUPTOI:
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
break;
case OP_EXACT:
tcode += IMM2_SIZE;
case OP_CHAR:
case OP_PLUS:
case OP_MINPLUS:
case OP_POSPLUS:
(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
try_next = FALSE;
break;
case OP_EXACTI:
tcode += IMM2_SIZE;
case OP_CHARI:
case OP_PLUSI:
case OP_MINPLUSI:
case OP_POSPLUSI:
(void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
try_next = FALSE;
break;
case OP_HSPACE:
SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2);
SET_BIT(0xE1);
SET_BIT(0xE2);
SET_BIT(0xE3);
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xA0);
SET_BIT(0xFF);
#endif
}
else
#endif
{
#ifndef EBCDIC
SET_BIT(0xA0);
#endif
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF);
#endif
}
try_next = FALSE;
break;
case OP_ANYNL:
case OP_VSPACE:
SET_BIT(CHAR_LF);
SET_BIT(CHAR_VT);
SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2);
SET_BIT(0xE2);
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(CHAR_NEL);
SET_BIT(0xFF);
#endif
}
else
#endif
{
SET_BIT(CHAR_NEL);
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF);
#endif
}
try_next = FALSE;
break;
case OP_NOT_DIGIT:
set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
try_next = FALSE;
break;
case OP_DIGIT:
set_type_bits(start_bits, cbit_digit, table_limit, cd);
try_next = FALSE;
break;
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
try_next = FALSE;
break;
case OP_WHITESPACE:
set_type_bits(start_bits, cbit_space, table_limit, cd);
try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
set_nottype_bits(start_bits, cbit_word, table_limit, cd);
try_next = FALSE;
break;
case OP_WORDCHAR:
set_type_bits(start_bits, cbit_word, table_limit, cd);
try_next = FALSE;
break;
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
case OP_TYPEPOSPLUS:
tcode++;
break;
case OP_TYPEEXACT:
tcode += 1 + IMM2_SIZE;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
case OP_TYPEPOSUPTO:
tcode += IMM2_SIZE;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEPOSSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
switch(tcode[1])
{
default:
case OP_ANY:
case OP_ALLANY:
return SSB_FAIL;
case OP_HSPACE:
SET_BIT(CHAR_HT);
SET_BIT(CHAR_SPACE);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2);
SET_BIT(0xE1);
SET_BIT(0xE2);
SET_BIT(0xE3);
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xA0);
SET_BIT(0xFF);
#endif
}
else
#endif
#ifndef EBCDIC
SET_BIT(0xA0);
#endif
break;
case OP_ANYNL:
case OP_VSPACE:
SET_BIT(CHAR_LF);
SET_BIT(CHAR_VT);
SET_BIT(CHAR_FF);
SET_BIT(CHAR_CR);
#ifdef SUPPORT_UTF
if (utf)
{
#ifdef COMPILE_PCRE8
SET_BIT(0xC2);
SET_BIT(0xE2);
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(CHAR_NEL);
SET_BIT(0xFF);
#endif
}
else
#endif
SET_BIT(CHAR_NEL);
break;
case OP_NOT_DIGIT:
set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
break;
case OP_DIGIT:
set_type_bits(start_bits, cbit_digit, table_limit, cd);
break;
case OP_NOT_WHITESPACE:
set_nottype_bits(start_bits, cbit_space, table_limit, cd);
break;
case OP_WHITESPACE:
set_type_bits(start_bits, cbit_space, table_limit, cd);
break;
case OP_NOT_WORDCHAR:
set_nottype_bits(start_bits, cbit_word, table_limit, cd);
break;
case OP_WORDCHAR:
set_type_bits(start_bits, cbit_word, table_limit, cd);
break;
}
tcode += 2;
break;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
return SSB_FAIL;
if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
return SSB_FAIL;
#endif
case OP_NCLASS:
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (utf)
{
start_bits[24] |= 0xf0;
memset(start_bits+25, 0xff, 7);
}
#endif
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
SET_BIT(0xFF);
#endif
case OP_CLASS:
{
pcre_uint8 *map;
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
map = NULL;
if (*tcode == OP_XCLASS)
{
if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
tcode += GET(tcode, 1);
}
else
#endif
{
tcode++;
map = (pcre_uint8 *)tcode;
tcode += 32 / sizeof(pcre_uchar);
}
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
if (map != NULL)
#endif
{
#if defined SUPPORT_UTF && defined COMPILE_PCRE8
if (utf)
{
for (c = 0; c < 16; c++) start_bits[c] |= map[c];
for (c = 128; c < 256; c++)
{
if ((map[c/8] && (1 << (c&7))) != 0)
{
int d = (c >> 6) | 0xc0;
start_bits[d/8] |= (1 << (d&7));
c = (c & 0xc0) + 0x40 - 1;
}
}
}
else
#endif
{
for (c = 0; c < 32; c++) start_bits[c] |= map[c];
}
}
switch (*tcode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSSTAR:
case OP_CRPOSQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
else try_next = FALSE;
break;
default:
try_next = FALSE;
break;
}
}
break;
}
}
code += GET(code, 1);
}
while (*code == OP_ALT);
return yield;
}
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
#endif
{
int min;
int count = 0;
BOOL bits_set = FALSE;
pcre_uint8 start_bits[32];
PUBL(extra) *extra = NULL;
pcre_study_data *study;
const pcre_uint8 *tables;
pcre_uchar *code;
compile_data compile_block;
const REAL_PCRE *re = (const REAL_PCRE *)external_re;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
{
*errorptr = "argument is not a compiled regular expression";
return NULL;
}
if ((re->flags & PCRE_MODE) == 0)
{
#if defined COMPILE_PCRE8
*errorptr = "argument not compiled in 8 bit mode";
#elif defined COMPILE_PCRE16
*errorptr = "argument not compiled in 16 bit mode";
#elif defined COMPILE_PCRE32
*errorptr = "argument not compiled in 32 bit mode";
#endif
return NULL;
}
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
return NULL;
}
code = (pcre_uchar *)re + re->name_table_offset +
(re->name_count * re->name_entry_size);
if ((re->options & PCRE_ANCHORED) == 0 &&
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
{
int rc;
tables = re->tables;
#if defined COMPILE_PCRE8
if (tables == NULL)
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
#elif defined COMPILE_PCRE16
if (tables == NULL)
(void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
#elif defined COMPILE_PCRE32
if (tables == NULL)
(void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
(void *)(&tables));
#endif
compile_block.lcc = tables + lcc_offset;
compile_block.fcc = tables + fcc_offset;
compile_block.cbits = tables + cbits_offset;
compile_block.ctypes = tables + ctypes_offset;
memset(start_bits, 0, 32 * sizeof(pcre_uint8));
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
&compile_block);
bits_set = rc == SSB_DONE;
if (rc == SSB_UNKNOWN)
{
*errorptr = "internal error: opcode not recognized";
return NULL;
}
}
switch(min = find_minlength(re, code, code, re->options, NULL, &count))
{
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
default: break;
}
if (bits_set || min > 0 || (options & (
#ifdef SUPPORT_JIT
PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
#endif
PCRE_STUDY_EXTRA_NEEDED)) != 0)
{
extra = (PUBL(extra) *)(PUBL(malloc))
(sizeof(PUBL(extra)) + sizeof(pcre_study_data));
if (extra == NULL)
{
*errorptr = "failed to get memory";
return NULL;
}
study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
extra->flags = PCRE_EXTRA_STUDY_DATA;
extra->study_data = study;
study->size = sizeof(pcre_study_data);
study->flags = 0;
if (bits_set)
{
study->flags |= PCRE_STUDY_MAPPED;
memcpy(study->start_bits, start_bits, sizeof(start_bits));
}
else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
#ifdef PCRE_DEBUG
if (bits_set)
{
pcre_uint8 *ptr = start_bits;
int i;
printf("Start bits:\n");
for (i = 0; i < 32; i++)
printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
}
#endif
if (min > 0)
{
study->flags |= PCRE_STUDY_MINLEN;
study->minlength = min;
}
else study->minlength = 0;
#ifdef SUPPORT_JIT
extra->executable_jit = NULL;
if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
(options & PCRE_STUDY_EXTRA_NEEDED) == 0)
{
#if defined COMPILE_PCRE8
pcre_free_study(extra);
#elif defined COMPILE_PCRE16
pcre16_free_study(extra);
#elif defined COMPILE_PCRE32
pcre32_free_study(extra);
#endif
extra = NULL;
}
#endif
}
return extra;
}
#if defined COMPILE_PCRE8
PCRE_EXP_DEFN void
pcre_free_study(pcre_extra *extra)
#elif defined COMPILE_PCRE16
PCRE_EXP_DEFN void
pcre16_free_study(pcre16_extra *extra)
#elif defined COMPILE_PCRE32
PCRE_EXP_DEFN void
pcre32_free_study(pcre32_extra *extra)
#endif
{
if (extra == NULL)
return;
#ifdef SUPPORT_JIT
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
extra->executable_jit != NULL)
PRIV(jit_free)(extra->executable_jit);
#endif
PUBL(free)(extra);
}