#include <sys_defs.h>
#include <string.h>
#include <ctype.h>
#ifndef NO_EAI
#include <unicode/ucasemap.h>
#include <unicode/ustring.h>
#include <unicode/uchar.h>
#endif
#include <msg.h>
#include <stringops.h>
#define STR(x) vstring_str(x)
#define LEN(x) VSTRING_LEN(x)
char *casefoldx(int flags, VSTRING *dest, const char *src, ssize_t len)
{
size_t old_len;
#ifdef NO_EAI
if (len < 0)
len = strlen(src);
if ((flags & CASEF_FLAG_APPEND) == 0)
VSTRING_RESET(dest);
old_len = VSTRING_LEN(dest);
vstring_strncat(dest, src, len);
lowercase(STR(dest) + old_len);
return (STR(dest));
#else
const char myname[] = "casefold";
static VSTRING *fold_buf = 0;
static UCaseMap *csm = 0;
UErrorCode error;
ssize_t space_needed;
int n;
if (len < 0)
len = strlen(src);
if (dest == 0)
dest = (fold_buf != 0 ? fold_buf : (fold_buf = vstring_alloc(100)));
if ((flags & CASEF_FLAG_APPEND) == 0)
VSTRING_RESET(dest);
old_len = VSTRING_LEN(dest);
if ((flags & CASEF_FLAG_UTF8) == 0 || allascii(src)) {
vstring_strncat(dest, src, len);
lowercase(STR(dest) + old_len);
return (STR(dest));
}
#if 0
if (valid_utf8_string(src, len) == 0) {
if (err)
*err = "malformed UTF-8 or invalid codepoint";
return (0);
}
#endif
if (csm == 0) {
error = U_ZERO_ERROR;
csm = ucasemap_open("en_US", U_FOLD_CASE_DEFAULT, &error);
if (U_SUCCESS(error) == 0)
msg_fatal("ucasemap_open error: %s", u_errorName(error));
}
for (n = 0; n < 3; n++) {
error = U_ZERO_ERROR;
space_needed = ucasemap_utf8FoldCase(csm, STR(dest) + old_len,
vstring_avail(dest), src, len, &error);
if (U_SUCCESS(error)) {
VSTRING_AT_OFFSET(dest, old_len + space_needed);
if (vstring_avail(dest) == 0)
VSTRING_TERMINATE(dest);
break;
} else if (error == U_BUFFER_OVERFLOW_ERROR) {
VSTRING_SPACE(dest, space_needed + 1);
} else {
msg_fatal("%s: conversion error for \"%s\": %s",
myname, src, u_errorName(error));
}
}
return (STR(dest));
#endif
}
#ifdef TEST
static void encode_utf8(VSTRING *buffer, int codepoint)
{
const char myname[] = "encode_utf8";
VSTRING_RESET(buffer);
if (codepoint < 0x80) {
VSTRING_ADDCH(buffer, codepoint);
} else if (codepoint < 0x800) {
VSTRING_ADDCH(buffer, 0xc0 | (codepoint >> 6));
VSTRING_ADDCH(buffer, 0x80 | (codepoint & 0x3f));
} else if (codepoint < 0x10000) {
VSTRING_ADDCH(buffer, 0xe0 | (codepoint >> 12));
VSTRING_ADDCH(buffer, 0x80 | ((codepoint >> 6) & 0x3f));
VSTRING_ADDCH(buffer, 0x80 | (codepoint & 0x3f));
} else if (codepoint <= 0x10FFFF) {
VSTRING_ADDCH(buffer, 0xf0 | (codepoint >> 18));
VSTRING_ADDCH(buffer, 0x80 | ((codepoint >> 12) & 0x3f));
VSTRING_ADDCH(buffer, 0x80 | ((codepoint >> 6) & 0x3f));
VSTRING_ADDCH(buffer, 0x80 | (codepoint & 0x3f));
} else {
msg_panic("%s: out-of-range codepoint U+%X", myname, codepoint);
}
VSTRING_TERMINATE(buffer);
}
#include <stdlib.h>
#include <stdio.h>
#include <locale.h>
#include <vstream.h>
#include <vstring_vstream.h>
#include <msg_vstream.h>
int main(int argc, char **argv)
{
VSTRING *buffer = vstring_alloc(1);
VSTRING *dest = vstring_alloc(1);
char *bp;
char *conv_res;
char *cmd;
int codepoint, first, last;
VSTREAM *fp;
if (setlocale(LC_ALL, "C") == 0)
msg_fatal("setlocale(LC_ALL, C) failed: %m");
msg_vstream_init(argv[0], VSTREAM_ERR);
util_utf8_enable = 1;
VSTRING_SPACE(buffer, 256);
while (vstring_fgets_nonl(buffer, VSTREAM_IN)) {
bp = STR(buffer);
vstream_printf("> %s\n", bp);
cmd = mystrtok(&bp, CHARS_SPACE);
if (cmd == 0 || *cmd == '#')
continue;
while (ISSPACE(*bp))
bp++;
if (strcmp(cmd, "fold") == 0) {
conv_res = casefold(dest, bp);
vstream_printf("\"%s\" ->fold \"%s\"\n", bp, conv_res);
}
else if (strcmp(cmd, "range") == 0
&& sscanf(bp, "%i %i", &first, &last) == 2
&& first <= last) {
for (codepoint = first; codepoint <= last; codepoint++) {
if (codepoint >= 0xD800 && codepoint <= 0xDFFF) {
vstream_printf("skipping surrogate range\n");
codepoint = 0xDFFF;
} else {
encode_utf8(buffer, codepoint);
if (msg_verbose)
vstream_printf("U+%X -> %s\n", codepoint, STR(buffer));
if (valid_utf8_string(STR(buffer), LEN(buffer)) == 0)
msg_fatal("bad utf-8 encoding for U+%X\n", codepoint);
casefold(dest, STR(buffer));
}
}
vstream_printf("range completed: 0x%x..0x%x\n", first, last);
}
else if (strcmp(cmd, "chroot") == 0
&& sscanf(bp, "%255s", STR(buffer)) == 1) {
if (geteuid() == 0) {
if (chdir(STR(buffer)) < 0)
msg_fatal("chdir(%s): %m", STR(buffer));
if (chroot(STR(buffer)) < 0)
msg_fatal("chroot(%s): %m", STR(buffer));
vstream_printf("chroot %s completed\n", STR(buffer));
}
}
else if (strcmp(cmd, "file") == 0
&& sscanf(bp, "%255s", STR(buffer)) == 1) {
if ((fp = vstream_fopen(STR(buffer), O_RDONLY, 0)) == 0)
msg_fatal("open(%s): %m", STR(buffer));
while (vstring_fgets_nonl(buffer, fp))
vstream_printf("%s\n", casefold(dest, STR(buffer)));
vstream_fclose(fp);
}
else if (strcmp(cmd, "verbose") == 0
&& sscanf(bp, "%i", &msg_verbose) == 1) {
;
}
else {
vstream_printf("Usage: %s chroot <path> | file <path> | fold <text> | range <first> <last> | verbose <int>\n",
argv[0]);
}
vstream_fflush(VSTREAM_OUT);
}
vstring_free(buffer);
vstring_free(dest);
exit(0);
}
#endif