chartable.h [plain text]

/* Definitions internal to charset.c and chartable.c */

/*
 * Copyright (c) 1998-2003 Carnegie Mellon University.  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer. 
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The name "Carnegie Mellon University" must not be used to
 *    endorse or promote products derived from this software without
 *    prior written permission. For permission or any other legal
 *    details, please contact  
 *      Office of Technology Transfer
 *      Carnegie Mellon University
 *      5000 Forbes Avenue
 *      Pittsburgh, PA  15213-3890
 *      (412) 268-4387, fax: (412) 268-7395
 *      tech-transfer@andrew.cmu.edu
 *
 * 4. Redistributions of any form whatsoever must retain the following
 *    acknowledgment:
 *    "This product includes software developed by Computing Services
 *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
 *
 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 */

/* $Id: chartable.h,v 1.5 2003/02/13 20:15:39 rjs3 Exp $ */

/* note that these are all uppercase letters. since the translation
   tables canonicalize to lower case letters, we never see these bytes
   in the output UTF-8 and they're safely used as control codes to the
   character decoder. */

/* note that currently we never return a character that is represented
 * by more than 3 octets in UTF-8, since we only deal with characters
 * in UCS-2. this means that 11110xxx, 111110xx, and 1111110x never
 * appear in our outgoing tables, and could be used instead of the following.
 */

#define XLT 'N'			/* Long translation */
#define U7F 'O'			/* UTF-7 first base64 character */
#define U7N 'P'			/* UTF-7 subsquent base64 character */
#define U83 'Q'			/* UTF-8 3-char sequence */
#define U83_2 'R'		/* second char of same */
#define U83_3 'S'		/* third char of same */
#define JSR 'T'
#define JMP 'U'
#define RET 'V'
#define END 'W'

struct charset {
    char *name;
    const unsigned char (*table)[256][4];
};