#ifndef lint
static char *rcsid = "$Id: race.c,v 1.1 2003/06/04 00:26:07 marka Exp $";
#endif
#include <config.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <idn/result.h>
#include <idn/assert.h>
#include <idn/logmacro.h>
#include <idn/converter.h>
#include <idn/ucs4.h>
#include <idn/debug.h>
#include <idn/race.h>
#include <idn/util.h>
#ifndef IDN_RACE_PREFIX
#define IDN_RACE_PREFIX "bq--"
#endif
#define RACE_2OCTET_MODE 0xd8
#define RACE_ESCAPE 0xff
#define RACE_ESCAPE_2ND 0x99
#define RACE_BUF_SIZE 128
#define IS_SURROGATE_HIGH(v) (0xd800 <= (v) && (v) <= 0xdbff)
#define IS_SURROGATE_LOW(v) (0xdc00 <= (v) && (v) <= 0xdfff)
#define SURROGATE_HIGH(v) (SURROGATE_H_OFF + (((v) - 0x10000) >> 10))
#define SURROGATE_LOW(v) (SURROGATE_L_OFF + ((v) & 0x3ff))
#define SURROGATE_BASE 0x10000
#define SURROGATE_H_OFF 0xd800
#define SURROGATE_L_OFF 0xdc00
#define COMBINE_SURROGATE(h, l) \
(SURROGATE_BASE + (((h)-SURROGATE_H_OFF)<<10) + ((l)-SURROGATE_L_OFF))
enum {
compress_one,
compress_two,
compress_none
};
static idn_result_t race_decode_decompress(const char *from,
unsigned short *buf,
size_t buflen);
static idn_result_t race_compress_encode(const unsigned short *p,
int compress_mode,
char *to, size_t tolen);
static int get_compress_mode(unsigned short *p);
idn_result_t
idn__race_decode(idn_converter_t ctx, void *privdata,
const char *from, unsigned long *to, size_t tolen) {
unsigned short *buf = NULL;
size_t prefixlen = strlen(IDN_RACE_PREFIX);
size_t fromlen;
size_t buflen;
idn_result_t r;
assert(ctx != NULL);
TRACE(("idn__race_decode(from=\"%s\", tolen=%d)\n",
idn__debug_xstring(from, 50), (int)tolen));
if (!idn__util_asciihaveaceprefix(from, IDN_RACE_PREFIX)) {
if (*from == '\0') {
r = idn_ucs4_utf8toucs4(from, to, tolen);
goto ret;
}
r = idn_invalid_encoding;
goto ret;
}
from += prefixlen;
fromlen = strlen(from);
buflen = fromlen + 1;
buf = malloc(sizeof(*buf) * buflen);
if (buf == NULL) {
r = idn_nomemory;
goto ret;
}
r = race_decode_decompress(from, buf, buflen);
if (r != idn_success)
goto ret;
r = idn_ucs4_utf16toucs4(buf, to, tolen);
if (r != idn_success)
goto ret;
ret:
free(buf);
if (r == idn_success) {
TRACE(("idn__race_decode(): succcess (to=\"%s\")\n",
idn__debug_ucs4xstring(to, 50)));
} else {
TRACE(("idn__race_decode(): %s\n", idn_result_tostring(r)));
}
return (r);
}
static idn_result_t
race_decode_decompress(const char *from, unsigned short *buf, size_t buflen)
{
unsigned short *p = buf;
unsigned int bitbuf = 0;
int bitlen = 0;
int i, j;
size_t len;
while (*from != '\0') {
int c = *from++;
int x;
if ('a' <= c && c <= 'z')
x = c - 'a';
else if ('A' <= c && c <= 'Z')
x = c - 'A';
else if ('2' <= c && c <= '7')
x = c - '2' + 26;
else
return (idn_invalid_encoding);
bitbuf = (bitbuf << 5) + x;
bitlen += 5;
if (bitlen >= 8) {
*p++ = (bitbuf >> (bitlen - 8)) & 0xff;
bitlen -= 8;
}
}
len = p - buf;
if (buf[0] == RACE_2OCTET_MODE) {
if ((len - 1) % 2 != 0)
return (idn_invalid_encoding);
for (i = 1, j = 0; i < len; i += 2, j++)
buf[j] = (buf[i] << 8) + buf[i + 1];
len = j;
} else {
unsigned short c = buf[0] << 8;
for (i = 1, j = 0; i < len; j++) {
if (buf[i] == RACE_ESCAPE) {
if (i + 1 >= len)
return (idn_invalid_encoding);
else if (buf[i + 1] == RACE_ESCAPE_2ND)
buf[j] = c | 0xff;
else
buf[j] = buf[i + 1];
i += 2;
} else if (buf[i] == 0x99 && c == 0x00) {
return (idn_invalid_encoding);
} else {
buf[j] = c | buf[i++];
}
}
len = j;
}
buf[len] = '\0';
return (idn_success);
}
idn_result_t
idn__race_encode(idn_converter_t ctx, void *privdata,
const unsigned long *from, char *to, size_t tolen) {
char *to_org = to;
unsigned short *p, *buf = NULL;
size_t prefixlen = strlen(IDN_RACE_PREFIX);
size_t buflen;
size_t fromlen;
idn_result_t r;
int compress_mode;
assert(ctx != NULL);
TRACE(("idn__race_encode(from=\"%s\", tolen=%d)\n",
idn__debug_ucs4xstring(from, 50), (int)tolen));
if (*from == '\0') {
r = idn_ucs4_ucs4toutf8(from, to, tolen);
goto ret;
} else if (idn__util_ucs4haveaceprefix(from, IDN_RACE_PREFIX)) {
r = idn_prohibited;
goto ret;
}
if (tolen < prefixlen) {
r = idn_buffer_overflow;
goto ret;
}
memcpy(to, IDN_RACE_PREFIX, prefixlen);
to += prefixlen;
tolen -= prefixlen;
fromlen = idn_ucs4_strlen(from);
buflen = fromlen * 2 + 2;
for (;;) {
unsigned short *new_buf;
new_buf = realloc(buf, sizeof(*buf) * buflen);
if (new_buf == NULL) {
r = idn_nomemory;
goto ret;
}
buf = new_buf;
r = idn_ucs4_ucs4toutf16(from, buf + 1, buflen - 1);
if (r == idn_success)
break;
else if (r != idn_buffer_overflow)
goto ret;
buflen = fromlen * 2 + 2;
}
p = buf + 1;
for (p = buf + 1; *p != '\0'; p++) {
if (*p == 0x0099) {
r = idn_invalid_encoding;
goto ret;
}
}
compress_mode = get_compress_mode(buf + 1);
r = race_compress_encode(buf, compress_mode, to, tolen);
ret:
free(buf);
if (r == idn_success) {
TRACE(("idn__race_encode(): succcess (to=\"%s\")\n",
idn__debug_xstring(to_org, 50)));
} else {
TRACE(("idn__race_encode(): %s\n", idn_result_tostring(r)));
}
return (r);
}
static idn_result_t
race_compress_encode(const unsigned short *p, int compress_mode,
char *to, size_t tolen)
{
unsigned long bitbuf = *p++;
int bitlen = 8;
while (*p != '\0' || bitlen > 0) {
unsigned int c = *p;
if (c == '\0') {
bitbuf <<= (5 - bitlen);
bitlen = 5;
} else if (compress_mode == compress_none) {
bitbuf = (bitbuf << 16) | c;
bitlen += 16;
p++;
} else {
if (compress_mode == compress_two &&
(c & 0xff00) == 0) {
bitbuf = (bitbuf << 16) | 0xff00 | c;
bitlen += 16;
} else if ((c & 0xff) == 0xff) {
bitbuf = (bitbuf << 16) |
(RACE_ESCAPE << 8) | RACE_ESCAPE_2ND;
bitlen += 16;
} else {
bitbuf = (bitbuf << 8) | (c & 0xff);
bitlen += 8;
}
p++;
}
while (bitlen >= 5) {
int x;
x = (bitbuf >> (bitlen - 5)) & 0x1f;
bitlen -= 5;
if (x < 26)
x += 'a';
else
x = (x - 26) + '2';
if (tolen < 1)
return (idn_buffer_overflow);
*to++ = x;
tolen--;
}
}
if (tolen <= 0)
return (idn_buffer_overflow);
*to = '\0';
return (idn_success);
}
static int
get_compress_mode(unsigned short *p) {
int zero = 0;
unsigned int upper = 0;
unsigned short *modepos = p - 1;
while (*p != '\0') {
unsigned int hi = *p++ & 0xff00;
if (hi == 0) {
zero++;
} else if (hi == upper) {
;
} else if (upper == 0) {
upper = hi;
} else {
*modepos = RACE_2OCTET_MODE;
return (compress_none);
}
}
*modepos = upper >> 8;
if (upper > 0 && zero > 0)
return (compress_two);
else
return (compress_one);
}