#ifndef TRE_INTERNAL_H
#define TRE_INTERNAL_H 1
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#ifdef HAVE_WCTYPE_H
#include <wctype.h>
#endif
#include <ctype.h>
#ifdef __LIBC__
#include <xlocale_private.h>
#else
#include <xlocale.h>
#endif
#include "tre.h"
#include "tre-last-matched.h"
#ifdef TRE_DEBUG
#include <stdio.h>
#define DPRINT(msg) do {printf msg; fflush(stdout);} while(0)
#else
#define DPRINT(msg) do { } while(0)
#endif
#define elementsof(x) ( sizeof(x) / sizeof(x[0]) )
#ifdef HAVE_MBRTOWC
#define tre_mbrtowc(pwc, s, n, ps) (mbrtowc((pwc), (s), (n), (ps)))
#define tre_mbrtowc_l(pwc, s, n, ps, l) (mbrtowc_l((pwc), (s), (n), (ps), (l)))
#else
#ifdef HAVE_MBTOWC
#define tre_mbrtowc(pwc, s, n, ps) (mbtowc((pwc), (s), (n)))
#endif
#endif
#ifdef TRE_MULTIBYTE
#ifdef HAVE_MBSTATE_T
#define TRE_MBSTATE
#endif
#endif
#ifdef TRE_WCHAR
typedef wint_t tre_cint_t;
#define TRE_CHAR_MAX WCHAR_MAX
#ifdef TRE_MULTIBYTE
#define TRE_MB_CUR_MAX MB_CUR_MAX
#define TRE_MB_CUR_MAX_L MB_CUR_MAX_L
#else
#define TRE_MB_CUR_MAX 1
#endif
#define tre_isalnum iswalnum
#define tre_isalpha iswalpha
#ifdef HAVE_ISWBLANK
#define tre_isblank iswblank
#endif
#define tre_iscntrl iswcntrl
#define tre_isdigit iswdigit
#define tre_isgraph iswgraph
#define tre_islower iswlower
#define tre_isprint iswprint
#define tre_ispunct iswpunct
#define tre_isspace iswspace
#define tre_isupper iswupper
#define tre_isxdigit iswxdigit
#define tre_tolower towlower
#define tre_toupper towupper
#define tre_strlen wcslen
#define tre_isalnum_l iswalnum_l
#define tre_isdigit_l iswdigit_l
#define tre_islower_l iswlower_l
#define tre_isupper_l iswupper_l
#define tre_isxdigit_l iswxdigit_l
#define tre_tolower_l towlower_l
#define tre_toupper_l towupper_l
#else
typedef short tre_cint_t;
#define TRE_CHAR_MAX 255
#define TRE_MB_CUR_MAX 1
#define tre_isalnum isalnum
#define tre_isalpha isalpha
#ifdef HAVE_ISASCII
#define tre_isascii isascii
#endif
#ifdef HAVE_ISBLANK
#define tre_isblank isblank
#endif
#define tre_iscntrl iscntrl
#define tre_isdigit isdigit
#define tre_isgraph isgraph
#define tre_islower islower
#define tre_isprint isprint
#define tre_ispunct ispunct
#define tre_isspace isspace
#define tre_isupper isupper
#define tre_isxdigit isxdigit
#define tre_tolower(c) (tre_cint_t)(tolower(c))
#define tre_toupper(c) (tre_cint_t)(toupper(c))
#define tre_strlen(s) (strlen((const char*)s))
#endif
#if defined(TRE_WCHAR) && defined(HAVE_ISWCTYPE) && defined(HAVE_WCTYPE)
#define TRE_USE_SYSTEM_WCTYPE 1
#endif
#ifdef TRE_USE_SYSTEM_WCTYPE
typedef wctype_t tre_ctype_t;
#define tre_isctype iswctype
#define tre_ctype wctype
#define tre_isctype_l iswctype_l
#define tre_ctype_l wctype_l
#else
typedef int (*tre_ctype_t)(tre_cint_t);
#define tre_isctype(c, type) ( (type)(c) )
tre_ctype_t tre_ctype(const char *name);
#endif
typedef enum { STR_WIDE, STR_BYTE, STR_MBS,
#ifdef TRE_STR_USER
STR_USER
#endif
} tre_str_type_t;
#define ALIGN(ptr, type) \
((((long)ptr) % sizeof(type)) \
? (sizeof(type) - (((long)ptr) % sizeof(type))) \
: 0)
#undef MAX
#undef MIN
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define MIN(a, b) (((a) <= (b)) ? (a) : (b))
#ifdef TRE_WCHAR
#define STRF "ls"
#else
#define STRF "s"
#endif
typedef enum {
TRE_BRACKET_MATCH_TYPE_UNUSED = 0,
TRE_BRACKET_MATCH_TYPE_CHAR,
TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,
TRE_BRACKET_MATCH_TYPE_RANGE_END,
TRE_BRACKET_MATCH_TYPE_CLASS,
TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,
} tre_bracket_match_type_t;
typedef struct {
tre_bracket_match_type_t type;
tre_cint_t value;
} tre_bracket_match_t;
#define TRE_BRACKET_MATCH_FLAG_NEGATE 1
typedef struct {
int num_bracket_matches;
int flags;
tre_bracket_match_t bracket_matches[0];
} tre_bracket_match_list_t;
#define SIZEOF_BRACKET_MATCH_LIST_N(n) (sizeof(tre_bracket_match_list_t) + \
sizeof(tre_bracket_match_t) * (n))
#define SIZEOF_BRACKET_MATCH_LIST(l) SIZEOF_BRACKET_MATCH_LIST_N( \
(l)->num_bracket_matches)
typedef struct {
int count;
int first;
int value;
int touch;
} tre_tag_t;
typedef struct tnfa_transition tre_tnfa_transition_t;
struct tnfa_transition {
tre_cint_t code_min;
tre_cint_t code_max;
tre_tnfa_transition_t *state;
int state_id;
int *tags;
int *params;
int assertions;
union {
tre_bracket_match_list_t *bracket_match_list;
int backref;
} u;
};
#define ASSERT_AT_BOL 1
#define ASSERT_AT_EOL 2
#define ASSERT_BRACKET_MATCH 4
#define ASSERT_AT_BOW 8
#define ASSERT_AT_EOW 16
#define ASSERT_AT_WB 32
#define ASSERT_AT_WB_NEG 64
#define ASSERT_BACKREF 128
#define ASSERT_LAST 128
typedef enum {
TRE_TAG_MINIMIZE = 0,
TRE_TAG_MAXIMIZE,
TRE_TAG_LEFT_MAXIMIZE,
} tre_tag_direction_t;
typedef enum {
TRE_PARAM_COST_INS = 0,
TRE_PARAM_COST_DEL = 1,
TRE_PARAM_COST_SUBST = 2,
TRE_PARAM_COST_MAX = 3,
TRE_PARAM_MAX_INS = 4,
TRE_PARAM_MAX_DEL = 5,
TRE_PARAM_MAX_SUBST = 6,
TRE_PARAM_MAX_ERR = 7,
TRE_PARAM_DEPTH = 8,
TRE_PARAM_LAST = 9
} tre_param_t;
#define TRE_PARAM_UNSET -1
#define TRE_PARAM_DEFAULT -2
struct tre_submatch_data {
int so_tag;
int eo_tag;
};
typedef struct tre_submatch_data tre_submatch_data_t;
typedef struct tnfa tre_tnfa_t;
struct tnfa {
tre_tnfa_transition_t *transitions;
tre_tnfa_transition_t *initial;
tre_tnfa_transition_t *final;
tre_submatch_data_t *submatch_data;
#ifdef USE_FIRSTPOS_CHARS
char *firstpos_chars;
#endif
tre_tag_direction_t *tag_directions;
int *minimal_tags;
tre_last_matched_branch_t *last_matched_branch;
locale_t loc;
unsigned int num_transitions;
int first_char;
unsigned int num_submatches;
unsigned int num_submatches_invisible;
int num_tags;
int num_minimals;
int end_tag;
int num_states;
int cflags;
int have_backrefs;
int num_reorder_tags;
int have_approx;
int params_depth;
};
__private_extern__ int
tre_compile(regex_t * __restrict preg, const tre_char_t * __restrict regex, size_t n, int cflags,
locale_t __restrict loc);
__private_extern__ void
tre_free(regex_t *preg);
__private_extern__ reg_errcode_t
tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[ __restrict ], int cflags,
const tre_tnfa_t * __restrict tnfa, const tre_tag_t * __restrict tags, int match_eo);
__private_extern__ reg_errcode_t
tre_tnfa_run_parallel(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
tre_str_type_t type, tre_tag_t * __restrict match_tags, int eflags,
int * __restrict match_end_ofs);
__private_extern__ reg_errcode_t
tre_tnfa_run_backtrack(const tre_tnfa_t * __restrict tnfa, const void * __restrict string,
int len, tre_str_type_t type, tre_tag_t * __restrict match_tags,
int eflags, int * __restrict match_end_ofs);
#ifdef TRE_APPROX
__private_extern__ reg_errcode_t
tre_tnfa_run_approx(const tre_tnfa_t * __restrict tnfa, const void * __restrict string, int len,
tre_str_type_t type, tre_tag_t * __restrict match_tags,
regamatch_t * __restrict match, regaparams_t params,
int eflags, int * __restrict match_end_ofs);
#endif
#endif