#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/lib/libc/locale/collate.c,v 1.33 2004/09/22 16:56:48 stefanf Exp $");
#include "xlocale_private.h"
#define __collate_chain_equiv_table (loc->__lc_collate->__chain_equiv_table)
#define __collate_chain_pri_table (loc->__lc_collate->__chain_pri_table)
#define __collate_char_pri_table (loc->__lc_collate->__char_pri_table)
#define __collate_info (&loc->__lc_collate->__info)
#define __collate_large_char_pri_table (loc->__lc_collate->__large_char_pri_table)
#define __collate_substitute_table (loc->__lc_collate->__substitute_table)
#include "namespace.h"
#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <wchar.h>
#include <errno.h>
#include <unistd.h>
#include <sysexits.h>
#include <ctype.h>
#include "un-namespace.h"
#include "collate.h"
#include "setlocale.h"
#include "ldpart.h"
#include "libc_private.h"
#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
static void wntohl(wchar_t *, int);
#endif
void __collate_err(int ex, const char *f) __dead2;
#undef __collate_load_error
int __collate_load_error = 1;
__private_extern__ int
__collate_load_tables(const char *encoding, locale_t loc)
{
FILE *fp;
int i, saverr, chains, z;
char strbuf[STR_LEN], buf[PATH_MAX];
struct __xlocale_st_collate *TMP;
static struct __xlocale_st_collate *cache = NULL;
struct __collate_st_info info;
void *vp;
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
loc->__collate_load_error = 1;
if (loc == &__global_locale)
__collate_load_error = 1;
XL_RELEASE(loc->__lc_collate);
loc->__lc_collate = NULL;
return (_LDP_CACHE);
}
if (cache && strcmp(encoding, cache->__encoding) == 0) {
loc->__collate_load_error = 0;
if (loc == &__global_locale)
__collate_load_error = 0;
XL_RELEASE(loc->__lc_collate);
loc->__lc_collate = cache;
XL_RETAIN(loc->__lc_collate);
return (_LDP_CACHE);
}
(void)strcpy(buf, _PathLocale);
(void)strcat(buf, "/");
(void)strcat(buf, encoding);
(void)strcat(buf, "/LC_COLLATE");
if ((fp = fopen(buf, "r")) == NULL)
return (_LDP_ERROR);
if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
chains = -1;
if (strcmp(strbuf, COLLATE_VERSION1_1A) == 0)
chains = 1;
if (chains < 0) {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
}
if (chains) {
if (fread(&info, sizeof(info), 1, fp) != 1) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
for(z = 0; z < info.directive_count; z++) {
info.undef_pri[z] = ntohl(info.undef_pri[z]);
info.subst_count[z] = ntohl(info.subst_count[z]);
}
info.chain_count = ntohl(info.chain_count);
info.large_pri_count = ntohl(info.large_pri_count);
#endif
if ((chains = info.chain_count) < 0) {
(void)fclose(fp);
errno = EFTYPE;
return (_LDP_ERROR);
}
} else
chains = TABLE_SIZE;
i = sizeof(struct __xlocale_st_collate)
+ sizeof(struct __collate_st_chain_pri) * chains
+ sizeof(struct __collate_st_large_char_pri) * info.large_pri_count;
for(z = 0; z < info.directive_count; z++)
i += sizeof(struct __collate_st_subst) * info.subst_count[z];
if ((TMP = (struct __xlocale_st_collate *)malloc(i)) == NULL) {
saverr = errno;
(void)fclose(fp);
errno = saverr;
return (_LDP_ERROR);
}
TMP->__refcount = 2;
TMP->__free_extra = NULL;
#define FREAD(a, b, c, d) \
{ \
if (fread(a, b, c, d) != c) { \
saverr = errno; \
free(TMP); \
(void)fclose(d); \
errno = saverr; \
return (_LDP_ERROR); \
} \
}
i -= offsetof(struct __xlocale_st_collate, __char_pri_table);
FREAD(TMP->__char_pri_table, i, 1, fp);
(void)fclose(fp);
vp = (void *)(TMP + 1);
if (info.subst_count[0] > 0) {
TMP->__substitute_table[0] = (struct __collate_st_subst *)vp;
vp += info.subst_count[0] * sizeof(struct __collate_st_subst);
} else
TMP->__substitute_table[0] = NULL;
if (info.flags & COLLATE_SUBST_DUP)
TMP->__substitute_table[1] = TMP->__substitute_table[0];
else if (info.subst_count[1] > 0) {
TMP->__substitute_table[1] = (struct __collate_st_subst *)vp;
vp += info.subst_count[1] * sizeof(struct __collate_st_subst);
} else
TMP->__substitute_table[1] = NULL;
if (chains > 0) {
TMP->__chain_pri_table = (struct __collate_st_chain_pri *)vp;
vp += chains * sizeof(struct __collate_st_chain_pri);
} else
TMP->__chain_pri_table = NULL;
if (info.large_pri_count > 0)
TMP->__large_char_pri_table = (struct __collate_st_large_char_pri *)vp;
else
TMP->__large_char_pri_table = NULL;
#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
{
struct __collate_st_char_pri *p = TMP->__char_pri_table;
for(i = UCHAR_MAX + 1; i-- > 0; p++) {
for(z = 0; z < info.directive_count; z++)
p->pri[z] = ntohl(p->pri[z]);
}
}
for(z = 0; z < info.directive_count; z++)
if (info.subst_count[z] > 0) {
struct __collate_st_subst *p = TMP->__substitute_table[z];
for(i = info.subst_count[z]; i-- > 0; p++) {
p->val = ntohl(p->val);
wntohl(p->str, STR_LEN);
}
}
{
struct __collate_st_chain_pri *p = TMP->__chain_pri_table;
for(i = chains; i-- > 0; p++) {
wntohl(p->str, STR_LEN);
for(z = 0; z < info.directive_count; z++)
p->pri[z] = ntohl(p->pri[z]);
}
}
if (info.large_pri_count > 0) {
struct __collate_st_large_char_pri *p = TMP->__large_char_pri_table;
for(i = info.large_pri_count; i-- > 0; p++) {
p->val = ntohl(p->val);
for(z = 0; z < info.directive_count; z++)
p->pri.pri[z] = ntohl(p->pri.pri[z]);
}
}
#endif
(void)strcpy(TMP->__encoding, encoding);
(void)memcpy(&TMP->__info, &info, sizeof(info));
XL_RELEASE(cache);
cache = TMP;
XL_RELEASE(loc->__lc_collate);
loc->__lc_collate = cache;
loc->__collate_substitute_nontrivial = (info.subst_count[0] > 0 || info.subst_count[1] > 0);
loc->__collate_load_error = 0;
if (loc == &__global_locale)
__collate_load_error = 0;
return (_LDP_LOADED);
}
static int
__collate_wcsnlen(const wchar_t *s, int len)
{
int n = 0;
while (*s && n < len) {
s++;
n++;
}
return n;
}
static struct __collate_st_subst *
substsearch(const wchar_t key, struct __collate_st_subst *tab, int n)
{
int low = 0;
int high = n - 1;
int next, compar;
struct __collate_st_subst *p;
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
compar = key - p->val;
if (compar == 0)
return p;
if (compar > 0)
low = next + 1;
else
high = next - 1;
}
return NULL;
}
__private_extern__ wchar_t *
__collate_substitute(const wchar_t *s, int which, locale_t loc)
{
int dest_len, len, nlen;
int n, delta, nsubst;
wchar_t *dest_str = NULL;
const wchar_t *fp;
struct __collate_st_subst *subst, *match;
if (s == NULL || *s == '\0')
return (__collate_wcsdup(L""));
dest_len = wcslen(s);
nsubst = __collate_info->subst_count[which];
if (nsubst <= 0)
return __collate_wcsdup(s);
subst = __collate_substitute_table[which];
delta = dest_len / 4;
if (delta < 2)
delta = 2;
dest_str = (wchar_t *)malloc((dest_len += delta) * sizeof(wchar_t));
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
len = 0;
while (*s) {
if ((match = substsearch(*s, subst, nsubst)) != NULL) {
fp = match->str;
n = __collate_wcsnlen(fp, STR_LEN);
} else {
fp = s;
n = 1;
}
nlen = len + n;
if (dest_len <= nlen) {
dest_str = reallocf(dest_str, (dest_len = nlen + delta) * sizeof(wchar_t));
if (dest_str == NULL)
__collate_err(EX_OSERR, __func__);
}
wcsncpy(dest_str + len, fp, n);
len += n;
s++;
}
dest_str[len] = 0;
return (dest_str);
}
static struct __collate_st_chain_pri *
chainsearch(const wchar_t *key, int *len, locale_t loc)
{
int low = 0;
int high = __collate_info->chain_count - 1;
int next, compar, l;
struct __collate_st_chain_pri *p;
struct __collate_st_chain_pri *tab = __collate_chain_pri_table;
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
compar = *key - *p->str;
if (compar == 0) {
l = __collate_wcsnlen(p->str, STR_LEN);
compar = wcsncmp(key, p->str, l);
if (compar == 0) {
*len = l;
return p;
}
}
if (compar > 0)
low = next + 1;
else
high = next - 1;
}
return NULL;
}
static struct __collate_st_large_char_pri *
largesearch(const wchar_t key, locale_t loc)
{
int low = 0;
int high = __collate_info->large_pri_count - 1;
int next, compar;
struct __collate_st_large_char_pri *p;
struct __collate_st_large_char_pri *tab = __collate_large_char_pri_table;
while (low <= high) {
next = (low + high) / 2;
p = tab + next;
compar = key - p->val;
if (compar == 0)
return p;
if (compar > 0)
low = next + 1;
else
high = next - 1;
}
return NULL;
}
__private_extern__ void
__collate_lookup_l(const wchar_t *t, int *len, int *prim, int *sec, locale_t loc)
{
struct __collate_st_chain_pri *p2;
int l;
*len = 1;
*prim = *sec = 0;
p2 = chainsearch(t, &l, loc);
if (p2 && p2->pri[0] >= 0) {
*len = l;
*prim = p2->pri[0];
*sec = p2->pri[1];
return;
}
if (*t <= UCHAR_MAX) {
*prim = __collate_char_pri_table[*t].pri[0];
*sec = __collate_char_pri_table[*t].pri[1];
return;
}
if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *match;
match = largesearch(*t, loc);
if (match) {
*prim = match->pri.pri[0];
*sec = match->pri.pri[1];
return;
}
}
*prim = (l = __collate_info->undef_pri[0]) >= 0 ? l : *t - l;
*sec = (l = __collate_info->undef_pri[1]) >= 0 ? l : *t - l;
}
void
__collate_lookup(const unsigned char *t, int *len, int *prim, int *sec)
{
locale_t loc = __current_locale();
wchar_t *w = __collate_mbstowcs((const char *)t, loc);
int sverrno;
__collate_lookup_l(w, len, prim, sec, loc);
sverrno = errno;
free(w);
errno = sverrno;
}
__private_extern__ void
__collate_lookup_which(const wchar_t *t, int *len, int *pri, int which, locale_t loc)
{
struct __collate_st_chain_pri *p2;
int p, l;
*len = 1;
*pri = 0;
p2 = chainsearch(t, &l, loc);
if (p2) {
p = p2->pri[which];
if (p >= 0) {
*len = l;
*pri = p;
return;
}
}
if (*t <= UCHAR_MAX) {
*pri = __collate_char_pri_table[*t].pri[which];
return;
}
if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *match;
match = largesearch(*t, loc);
if (match) {
*pri = match->pri.pri[which];
return;
}
}
*pri = (l = __collate_info->undef_pri[which]) >= 0 ? l : *t - l;
}
__private_extern__ wchar_t *
__collate_mbstowcs(const char *s, locale_t loc)
{
static const mbstate_t initial;
mbstate_t st;
size_t len;
const char *ss;
wchar_t *wcs;
ss = s;
st = initial;
if ((len = mbsrtowcs_l(NULL, &ss, 0, &st, loc)) == (size_t)-1)
return NULL;
if ((wcs = (wchar_t *)malloc((len + 1) * sizeof(wchar_t))) == NULL)
__collate_err(EX_OSERR, __func__);
st = initial;
mbsrtowcs_l(wcs, &s, len, &st, loc);
wcs[len] = 0;
return (wcs);
}
__private_extern__ wchar_t *
__collate_wcsdup(const wchar_t *s)
{
size_t len = wcslen(s) + 1;
wchar_t *wcs;
if ((wcs = (wchar_t *)malloc(len * sizeof(wchar_t))) == NULL)
__collate_err(EX_OSERR, __func__);
wcscpy(wcs, s);
return (wcs);
}
__private_extern__ void
__collate_xfrm(const wchar_t *src, wchar_t **xf, locale_t loc)
{
int pri, len;
size_t slen;
const wchar_t *t;
wchar_t *tt = NULL, *tr = NULL;
int direc, pass;
wchar_t *xfp;
struct __collate_st_info *info = __collate_info;
int sverrno;
for(pass = 0; pass < COLL_WEIGHTS_MAX; pass++)
xf[pass] = NULL;
for(pass = 0; pass < info->directive_count; pass++) {
direc = info->directive[pass];
if (pass == 0 || !(info->flags & COLLATE_SUBST_DUP)) {
sverrno = errno;
free(tt);
errno = sverrno;
tt = __collate_substitute(src, pass, loc);
}
if (direc & DIRECTIVE_BACKWARD) {
wchar_t *bp, *fp, c;
sverrno = errno;
free(tr);
errno = sverrno;
tr = __collate_wcsdup(tt ? tt : src);
bp = tr;
fp = tr + wcslen(tr) - 1;
while(bp < fp) {
c = *bp;
*bp++ = *fp;
*fp-- = c;
}
t = (const wchar_t *)tr;
} else if (tt)
t = (const wchar_t *)tt;
else
t = (const wchar_t *)src;
sverrno = errno;
if ((xf[pass] = (wchar_t *)malloc(sizeof(wchar_t) * (wcslen(t) + 1))) == NULL) {
errno = sverrno;
slen = 0;
goto end;
}
errno = sverrno;
xfp = xf[pass];
if (direc & DIRECTIVE_POSITION) {
while(*t) {
__collate_lookup_which(t, &len, &pri, pass, loc);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
slen = 0;
goto end;
}
pri = COLLATE_MAX_PRIORITY;
}
*xfp++ = pri;
}
} else {
while(*t) {
__collate_lookup_which(t, &len, &pri, pass, loc);
t += len;
if (pri <= 0) {
if (pri < 0) {
errno = EINVAL;
slen = 0;
goto end;
}
continue;
}
*xfp++ = pri;
}
}
*xfp = 0;
}
end:
sverrno = errno;
free(tt);
free(tr);
errno = sverrno;
}
__private_extern__ void
__collate_err(int ex, const char *f)
{
const char *s;
int serrno = errno;
s = _getprogname();
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, ": ", 2);
s = f;
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, ": ", 2);
s = strerror(serrno);
_write(STDERR_FILENO, s, strlen(s));
_write(STDERR_FILENO, "\n", 1);
exit(ex);
}
__private_extern__ size_t
__collate_collating_symbol(wchar_t *dst, size_t dlen, const char *src, size_t slen, mbstate_t *ps, locale_t loc)
{
wchar_t wname[STR_LEN];
wchar_t w, *wp;
size_t len, l;
if (loc->__collate_load_error) {
if (dlen < 1)
return (size_t)-1;
if (slen != 1 || !isascii(*src))
return 0;
*dst = *src;
return 1;
}
for(wp = wname, len = 0; slen > 0; len++) {
l = mbrtowc_l(&w, src, slen, ps, loc);
if (l == (size_t)-1 || l == (size_t)-2)
return (size_t)-1;
if (l == 0)
break;
if (len >= STR_LEN)
return -1;
*wp++ = w;
src += l;
slen = (long)slen - (long)l;
}
if (len == 0 || len > dlen)
return (size_t)-1;
if (len == 1) {
if (*wname <= UCHAR_MAX) {
if (__collate_char_pri_table[*wname].pri[0] >= 0) {
if (dlen > 0)
*dst = *wname;
return 1;
}
return 0;
} else if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *match;
match = largesearch(*wname, loc);
if (match && match->pri.pri[0] >= 0) {
if (dlen > 0)
*dst = *wname;
return 1;
}
}
return 0;
}
*wp = 0;
if (__collate_info->chain_count > 0) {
struct __collate_st_chain_pri *match;
int ll;
match = chainsearch(wname, &ll, loc);
if (match) {
if (ll < dlen)
dlen = ll;
wcsncpy(dst, wname, dlen);
return ll;
}
}
return 0;
}
__private_extern__ int
__collate_equiv_class(const char *src, size_t slen, mbstate_t *ps, locale_t loc)
{
wchar_t wname[STR_LEN];
wchar_t w, *wp;
size_t len, l;
int e;
if (loc->__collate_load_error)
return 0;
for(wp = wname, len = 0; slen > 0; len++) {
l = mbrtowc_l(&w, src, slen, ps, loc);
if (l == (size_t)-1 || l == (size_t)-2)
return -1;
if (l == 0)
break;
if (len >= STR_LEN)
return -1;
*wp++ = w;
src += l;
slen = (long)slen - (long)l;
}
if (len == 0)
return -1;
if (len == 1) {
e = -1;
if (*wname <= UCHAR_MAX)
e = __collate_char_pri_table[*wname].pri[0];
else if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *match;
match = largesearch(*wname, loc);
if (match)
e = match->pri.pri[0];
}
if (e == 0)
return IGNORE_EQUIV_CLASS;
return e > 0 ? e : 0;
}
*wp = 0;
if (__collate_info->chain_count > 0) {
struct __collate_st_chain_pri *match;
int ll;
match = chainsearch(wname, &ll, loc);
if (match) {
e = match->pri[0];
if (e == 0)
return IGNORE_EQUIV_CLASS;
return e < 0 ? -e : e;
}
}
return 0;
}
size_t
__collate_equiv_match(int equiv_class, wchar_t *dst, size_t dlen, wchar_t start, const char *src, size_t slen, mbstate_t *ps, size_t *rlen, locale_t loc)
{
wchar_t w;
size_t len, l, clen;
int i;
wchar_t buf[STR_LEN], *wp;
mbstate_t save;
const char *s = src;
size_t sl = slen;
struct __collate_st_chain_pri *ch = NULL;
if (loc->__collate_load_error)
return (size_t)-1;
if (equiv_class == IGNORE_EQUIV_CLASS)
equiv_class = 0;
if (ps)
save = *ps;
wp = buf;
len = clen = 0;
if (start) {
*wp++ = start;
len = 1;
}
while(sl > 0 && len < __collate_info->chain_max_len) {
l = mbrtowc_l(&w, s, sl, ps, loc);
if (l == (size_t)-1 || l == (size_t)-2 || l == 0)
break;
*wp++ = w;
s += l;
clen += l;
sl -= l;
len++;
}
*wp = 0;
if (len > 1 && (ch = chainsearch(buf, &i, loc)) != NULL) {
int e = ch->pri[0];
if (e < 0)
e = -e;
if (e == equiv_class)
goto found;
}
i = 1;
if (*buf <= UCHAR_MAX) {
if (equiv_class == __collate_char_pri_table[*buf].pri[0])
goto found;
} else if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *match;
match = largesearch(*buf, loc);
if (match && equiv_class == match->pri.pri[0])
goto found;
}
if (ps)
*ps = save;
return 0;
found:
if (i < len) {
len = i;
if (ps)
*ps = save;
if (start)
i--;
clen = 0;
while(i-- > 0) {
l = mbrtowc_l(&w, src, slen, ps, loc);
src += l;
clen += l;
slen -= l;
}
}
if (dst) {
if (dlen < len) {
if (ps)
*ps = save;
return (size_t)-1;
}
for(wp = buf; len > 0; len--)
*dst++ = *wp++;
}
if (rlen)
*rlen = clen;
return len;
}
#if __DARWIN_BYTE_ORDER == __DARWIN_LITTLE_ENDIAN
static void
wntohl(wchar_t *str, int len)
{
for(; *str && len > 0; str++, len--)
*str = ntohl(*str);
}
#endif
#ifdef COLLATE_DEBUG
static char *
show(int c)
{
static char buf[5];
if (c >=32 && c <= 126)
sprintf(buf, "'%c' ", c);
else
sprintf(buf, "\\x{%02x}", c);
return buf;
}
static char *
showwcs(const wchar_t *t, int len)
{
static char buf[64];
char *cp = buf;
for(; *t && len > 0; len--, t++) {
if (*t >=32 && *t <= 126)
*cp++ = *t;
else {
sprintf(cp, "\\x{%02x}", *t);
cp += strlen(cp);
}
}
*cp = 0;
return buf;
}
void
__collate_print_tables()
{
int i, z;
locale_t loc = __current_locale();
printf("Info: p=%d s=%d f=0x%02x m=%d dc=%d up=%d us=%d pc=%d sc=%d cc=%d lc=%d\n",
__collate_info->directive[0], __collate_info->directive[1],
__collate_info->flags, __collate_info->chain_max_len,
__collate_info->directive_count,
__collate_info->undef_pri[0], __collate_info->undef_pri[1],
__collate_info->subst_count[0], __collate_info->subst_count[1],
__collate_info->chain_count, __collate_info->large_pri_count);
for(z = 0; z < __collate_info->directive_count; z++) {
if (__collate_info->subst_count[z] > 0) {
struct __collate_st_subst *p2 = __collate_substitute_table[z];
if (z == 0 && (__collate_info->flags & COLLATE_SUBST_DUP))
printf("Both substitute tables:\n");
else
printf("Substitute table %d:\n", z);
for (i = __collate_info->subst_count[z]; i-- > 0; p2++)
printf("\t%s --> \"%s\"\n",
show(p2->val),
showwcs(p2->str, STR_LEN));
}
}
if (__collate_info->chain_count > 0) {
printf("Chain priority table:\n");
struct __collate_st_chain_pri *p2 = __collate_chain_pri_table;
for (i = __collate_info->chain_count; i-- > 0; p2++) {
printf("\t\"%s\" :", showwcs(p2->str, STR_LEN));
for(z = 0; z < __collate_info->directive_count; z++)
printf(" %d", p2->pri[z]);
putchar('\n');
}
}
printf("Char priority table:\n");
{
struct __collate_st_char_pri *p2 = __collate_char_pri_table;
for (i = 0; i < UCHAR_MAX + 1; i++, p2++) {
printf("\t%s :", show(i));
for(z = 0; z < __collate_info->directive_count; z++)
printf(" %d", p2->pri[z]);
putchar('\n');
}
}
if (__collate_info->large_pri_count > 0) {
struct __collate_st_large_char_pri *p2 = __collate_large_char_pri_table;
printf("Large priority table:\n");
for (i = __collate_info->large_pri_count; i-- > 0; p2++) {
printf("\t%s :", show(p2->val));
for(z = 0; z < __collate_info->directive_count; z++)
printf(" %d", p2->pri.pri[z]);
putchar('\n');
}
}
}
#endif