gencmn.c   [plain text]


/*
*******************************************************************************
*
*   Copyright (C) 1999-2001, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  gencmn.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:4
*
*   created on: 1999nov01
*   created by: Markus W. Scherer
*
*   This program reads a list of data files and combines them
*   into one common, memory-mappable file.
*/

#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "unicode/putil.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
#include "toolutil.h"
#include "unewdata.h"
#include "uoptions.h"

#define STRING_STORE_SIZE 100000
#define MAX_FILE_COUNT 2000

#define COMMON_DATA_NAME U_ICUDATA_NAME
#define DATA_TYPE "dat"

/* UDataInfo cf. udata.h */
static const UDataInfo dataInfo={
    sizeof(UDataInfo),
    0,

    U_IS_BIG_ENDIAN,
    U_CHARSET_FAMILY,
    sizeof(UChar),
    0,

    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
    {1, 0, 0, 0},                 /* formatVersion */
    {3, 0, 0, 0}                  /* dataVersion */
};

static uint32_t maxSize;

static char stringStore[STRING_STORE_SIZE];
static uint32_t stringTop=0, basenameTotal=0;

typedef struct {
    char *pathname, *basename;
    uint32_t basenameLength, basenameOffset, fileSize, fileOffset;
} File;

static File files[MAX_FILE_COUNT];
static uint32_t fileCount=0;

/* prototypes --------------------------------------------------------------- */

static void
addFile(const char *filename, UBool sourceTOC, UBool verbose);

static char *
allocString(uint32_t length);

static int
compareFiles(const void *file1, const void *file2);

/* -------------------------------------------------------------------------- */

static UOption options[]={
/*0*/ UOPTION_HELP_H,
/*1*/ UOPTION_HELP_QUESTION_MARK,
/*2*/ UOPTION_VERBOSE,
/*3*/ UOPTION_COPYRIGHT,
/*4*/ UOPTION_DESTDIR,
/*5*/ UOPTION_DEF( "comment", 'C', UOPT_REQUIRES_ARG),
/*6*/ UOPTION_DEF( "name", 'n', UOPT_REQUIRES_ARG),
/*7*/ UOPTION_DEF( "type", 't', UOPT_REQUIRES_ARG),
/*8*/ UOPTION_DEF( "source", 'S', UOPT_NO_ARG),
/*9*/ UOPTION_DEF( "entrypoint", 'e', UOPT_REQUIRES_ARG)
};

static char *symPrefix = NULL;

extern int
main(int argc, char* argv[]) {
    static char buffer[4096];
    char line[512];
    FileStream *in, *file;
    char *s;
    UErrorCode errorCode=U_ZERO_ERROR;
    uint32_t i, fileOffset, basenameOffset, length, nread;
    UBool sourceTOC, verbose;
    const char *entrypointName = NULL;

    U_MAIN_INIT_ARGS(argc, argv);

    /* preset then read command line options */
    options[4].value=u_getDataDirectory();
    options[6].value=COMMON_DATA_NAME;
    options[7].value=DATA_TYPE;
    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);

    /* error handling, printing usage message */
    if(argc<0) {
        fprintf(stderr,
            "error in command line argument \"%s\"\n",
            argv[-argc]);
    } else if(argc<2) {
        argc=-1;
    }
    if(argc<0 || options[0].doesOccur || options[1].doesOccur) {
        FILE *where = argc < 0 ? stderr : stdout;
        
        /*
         * Broken into chucks because the C89 standard says the minimum
         * required supported string length is 509 bytes.
         */
        fprintf(where,
                "%csage: %s [ -h, -?, --help ] [ -v, --verbose ] [ -c, --copyright ] [ -C, --comment comment ] [ -d, --destdir dir ] [ -n, --name filename ] [ -t, --type filetype ] [ -S, --source tocfile ] [ -e, --entrypoint name ] [ maxsize ] [ [ -f ] filename ]\n", argc < 0 ? 'u' : 'U', *argv);
        if (options[0].doesOccur || options[1].doesOccur) {
            fprintf(where, "\n"
            "Read the list file (default: standard input) and create a common data\n"
                    "file from specified files; omit any larger than maxsize.\n");
            fprintf(where, "\n"
            "Options:\n"
            "\t-h, -?, --help              this usage text\n"
            "\t-v, --verbose               verbose output\n"
            "\t-c, --copyright             include the ICU copyright notice\n"
            "\t-C, --comment comment       include a comment string\n"
            "\t-d, --destdir dir           destination directory\n");
            fprintf(where,
            "\t-n, --name filename         output filename, without .type extension\n"
            "\t                            (default: " COMMON_DATA_NAME ")\n"
            "\t-t, --type filetype         type of the destination file\n"
            "\t                            (default: \"" DATA_TYPE "\")\n"
            "\t-S, --source tocfile        write a .c source file with the table of\n"
            "\t                            contents\n"
            "\t-e, --entrypoint name       override the c entrypoint name\n"
            "\t                            (default: \"<name>_<type>\")\n");
        }
        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
    }

    sourceTOC=options[8].doesOccur;

    verbose = options[2].doesOccur;

    maxSize=(uint32_t)uprv_strtoul(argv[1], NULL, 0);

    if(argc==2) {
        in=T_FileStream_stdin();
    } else {
        in=T_FileStream_open(argv[2], "r");
        if(in==NULL) {
            fprintf(stderr, "gencmn: unable to open input file %s\n", argv[2]);
            exit(U_FILE_ACCESS_ERROR);
        }
    }

    if (verbose) {
        if(sourceTOC) {
            printf("generating %s_%s.c (table of contents source file)\n", options[6].value, options[7].value);
        } else {
            printf("generating %s.%s (common data file with table of contents)\n", options[6].value, options[7].value);
        }
    }

    /* read the list of files and get their lengths */
    while(T_FileStream_readLine(in, line, sizeof(line))!=NULL) {
        /* remove trailing newline characters */
        s=line;
        while(*s!=0) {
            if(*s=='\r' || *s=='\n') {
                *s=0;
                break;
            }
            ++s;
        }

        /* check for comment */

        if (*line == '#') {
            continue;
        }

        /* add the file */

        addFile(getLongPathname(line), sourceTOC, verbose);
    }

    if(in!=T_FileStream_stdin()) {
        T_FileStream_close(in);
    }

    if(fileCount==0) {
        fprintf(stderr, "gencmn: no files listed in %s\n", argc==2 ? "<stdin>" : argv[2]);
        return 0;
    }

    /* sort the files by basename */
    qsort(files, fileCount, sizeof(File), compareFiles);

    if(!sourceTOC) {
        UNewDataMemory *out;

        /* determine the offsets of all basenames and files in this common one */
        basenameOffset=4+8*fileCount;
        fileOffset=(basenameOffset+(basenameTotal+15))&~0xf;
        for(i=0; i<fileCount; ++i) {
            files[i].fileOffset=fileOffset;
            fileOffset+=(files[i].fileSize+15)&~0xf;
            files[i].basenameOffset=basenameOffset;
            basenameOffset+=files[i].basenameLength;
        }

        /* create the output file */
        out=udata_create(options[4].value, options[7].value, options[6].value,
                         &dataInfo,
                         options[3].doesOccur ? U_COPYRIGHT_STRING : options[5].value,
                         &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_create(-d %s -n %s -t %s) failed - %s\n",
                options[4].value, options[6].value, options[7].value,
                u_errorName(errorCode));
            exit(errorCode);
        }

        /* write the table of contents */
        udata_write32(out, fileCount);
        for(i=0; i<fileCount; ++i) {
            udata_write32(out, files[i].basenameOffset);
            udata_write32(out, files[i].fileOffset);
        }

        /* write the basenames */
        for(i=0; i<fileCount; ++i) {
            udata_writeString(out, files[i].basename, files[i].basenameLength);
        }
        length=4+8*fileCount+basenameTotal;

        /* copy the files */
        for(i=0; i<fileCount; ++i) {
            /* pad to 16-align the next file */
            length&=0xf;
            if(length!=0) {
                udata_writePadding(out, 16-length);
            }

            if (verbose) {
                printf("adding %s (%ld byte%s)\n", files[i].pathname, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
            }

            /* copy the next file */
            file=T_FileStream_open(files[i].pathname, "rb");
            if(file==NULL) {
                fprintf(stderr, "gencmn: unable to open listed file %s\n", files[i].pathname);
                exit(U_FILE_ACCESS_ERROR);
            }
            for(nread = 0;;) {
                length=T_FileStream_read(file, buffer, sizeof(buffer));
                if(length <= 0) {
                    break;
                }
                nread += length;
                udata_writeBlock(out, buffer, length);
            }
            T_FileStream_close(file);
            length=files[i].fileSize;

            if (nread != files[i].fileSize) {
                fprintf(stderr, "gencmn: unable to read %s properly (got %ld/%ld byte%s)\n", files[i].pathname, (long)nread, (long)files[i].fileSize, files[i].fileSize == 1 ? "" : "s");
                exit(U_FILE_ACCESS_ERROR);
            }
        }

        /* finish */
        udata_finish(out, &errorCode);
        if(U_FAILURE(errorCode)) {
            fprintf(stderr, "gencmn: udata_finish() failed - %s\n", u_errorName(errorCode));
            exit(errorCode);
        }
    } else {
        /* write a .c source file with the table of contents */
        char *filename;
        FileStream *out;

        /* create the output filename */
        filename=s=buffer;
        uprv_strcpy(filename, options[4].value);
        s=filename+uprv_strlen(filename);
        if(s>filename && *(s-1)!=U_FILE_SEP_CHAR) {
            *s++=U_FILE_SEP_CHAR;
        }
        uprv_strcpy(s, options[6].value);
        if(*(options[7].value)!=0) {
            s+=uprv_strlen(s);
            *s++='_';
            uprv_strcpy(s, options[7].value);
        }
        s+=uprv_strlen(s);
        uprv_strcpy(s, ".c");

        /* open the output file */
        out=T_FileStream_open(filename, "w");
        if(out==NULL) {
            fprintf(stderr, "gencmn: unable to open .c output file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        /* If an entrypoint is specified, use it. */
        if(options[9].doesOccur) {
            entrypointName = options[9].value;
        } else {
            entrypointName = options[6].value;
        }


#if 0
        symPrefix = (char *) uprv_malloc(uprv_strlen(entrypointName) + 2);

        /* test for NULL */
	if (symPrefix == NULL) {
	    sprintf(buffer, "U_MEMORY_ALLOCATION_ERROR");
	    exit(U_MEMORY_ALLOCATION_ERROR);
	}

        uprv_strcpy(symPrefix, entrypointName);
        uprv_strcat(symPrefix, "_");
#endif

        /* write the source file */
        sprintf(buffer,
            "/*\n"
            " * ICU common data table of contents for %s.%s ,\n"
            " * Automatically generated by icu/source/tools/gencmn/gencmn .\n"
            " */\n\n"
            "#include \"unicode/utypes.h\"\n"
            "#include \"unicode/udata.h\"\n"
            "\n"
            "/* external symbol declarations for data */\n",
            options[6].value, options[7].value);
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "extern const char\n    %s%s[]", symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n    %s%s[]", symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }
        T_FileStream_writeLine(out, ";\n\n");

        sprintf(
            buffer,
            "U_EXPORT const struct {\n"
            "    uint16_t headerSize;\n"
            "    uint8_t magic1, magic2;\n"
            "    UDataInfo info;\n"
            "    char padding[%lu];\n"
            "    uint32_t count, reserved;\n"
            "    struct {\n"
            "        const char *name;\n"
            "        const void *data;\n"
            "    } toc[%lu];\n"
            "} U_EXPORT2 %s_dat = {\n"
            "    32, 0xda, 0x27, {\n"
            "        %lu, 0,\n"
            "        %u, %u, %u, 0,\n"
            "        {0x54, 0x6f, 0x43, 0x50},\n"
            "        {1, 0, 0, 0},\n"
            "        {0, 0, 0, 0}\n"
            "    },\n"
            "    \"\", %lu, 0, {\n",
            (unsigned long)32-4-sizeof(UDataInfo),
            (unsigned long)fileCount,
            entrypointName,
            (unsigned long)sizeof(UDataInfo),
            U_IS_BIG_ENDIAN,
            U_CHARSET_FAMILY,
            U_SIZEOF_UCHAR,
            (unsigned long)fileCount
        );
        T_FileStream_writeLine(out, buffer);

        sprintf(buffer, "        { \"%s\", %s%s }", files[0].basename, symPrefix?symPrefix:"", files[0].pathname);
        T_FileStream_writeLine(out, buffer);
        for(i=1; i<fileCount; ++i) {
            sprintf(buffer, ",\n        { \"%s\", %s%s }", files[i].basename, symPrefix?symPrefix:"", files[i].pathname);
            T_FileStream_writeLine(out, buffer);
        }

        T_FileStream_writeLine(out, "\n    }\n};\n");
        T_FileStream_close(out);

        uprv_free(symPrefix);
    }

    return 0;
}

static void
addFile(const char *filename, UBool sourceTOC, UBool verbose) {
    char *s;
    uint32_t length;

    if(fileCount==MAX_FILE_COUNT) {
        fprintf(stderr, "gencmn: too many files, maximum is %d\n", MAX_FILE_COUNT);
        exit(U_BUFFER_OVERFLOW_ERROR);
    }

    if(!sourceTOC) {
        FileStream *file;

        /* store the pathname */
        length = (uint32_t)(uprv_strlen(filename) + 1);
        s=allocString(length);
        uprv_memcpy(s, filename, length);
        files[fileCount].pathname=s;

        /* get the basename */
        s=(char *)findBasename(s);
        files[fileCount].basename=s;
        length = (uint32_t)(uprv_strlen(s) + 1);
        files[fileCount].basenameLength=length;
        basenameTotal+=length;

        /* try to open the file */
        file=T_FileStream_open(filename, "rb");
        if(file==NULL) {
            fprintf(stderr, "gencmn: unable to open listed file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }

        /* get the file length */
        length=T_FileStream_size(file);
        if(T_FileStream_error(file) || length<=20) {
            fprintf(stderr, "gencmn: unable to get length of listed file %s\n", filename);
            exit(U_FILE_ACCESS_ERROR);
        }
        T_FileStream_close(file);

        /* do not add files that are longer than maxSize */
        if(maxSize && length>maxSize) {
            if (verbose) {
                printf("%s ignored (size %ld > %ld)\n", filename, (long)length, (long)maxSize);
            }
            return;
        }
        files[fileCount].fileSize=length;
    } else {
        char *t;

        /* get and store the basename */
        filename=findBasename(filename);
        length = (uint32_t)(uprv_strlen(filename) + 1);
        s=allocString(length);
        uprv_memcpy(s, filename, length);
        files[fileCount].basename=s;

        /* turn the basename into an entry point name and store in the pathname field */
        t=files[fileCount].pathname=allocString(length);
        while(--length>0) {
            if(*s=='.' || *s=='-') {
                *t='_';
            } else {
                *t=*s;
            }
            ++s;
            ++t;
        }
        *t=0;
    }

    ++fileCount;
}

static char *
allocString(uint32_t length) {
    uint32_t top=stringTop+length;
    char *p;

    if(top>STRING_STORE_SIZE) {
        fprintf(stderr, "gencmn: out of memory\n");
        exit(U_MEMORY_ALLOCATION_ERROR);
    }
    p=stringStore+stringTop;
    stringTop=top;
    return p;
}

static int
compareFiles(const void *file1, const void *file2) {
    /* sort by basename */
    return uprv_strcmp(((File *)file1)->basename, ((File *)file2)->basename);
}

/*
 * Hey, Emacs, please set the following:
 *
 * Local Variables:
 * indent-tabs-mode: nil
 * End:
 *
 */