rle.c   [plain text]


/*
*******************************************************************************
*
*   Copyright (C) 2000-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*
* File writejava.c
*
* Modification History:
*
*   Date        Name        Description
*   01/11/02    Ram        Creation.
*******************************************************************************
*/
#include "rle.h"
/**
 * The ESCAPE character is used during run-length encoding.  It signals
 * a run of identical chars.
 */
static const uint16_t ESCAPE = 0xA5A5;

/**
 * The ESCAPE_BYTE character is used during run-length encoding.  It signals
 * a run of identical bytes.
 */
static const uint8_t ESCAPE_BYTE = (uint8_t)0xA5;

/**
 * Append a byte to the given StringBuffer, packing two bytes into each
 * character.  The state parameter maintains intermediary data between
 * calls.
 * @param state A two-element array, with state[0] == 0 if this is the
 * first byte of a pair, or state[0] != 0 if this is the second byte
 * of a pair, in which case state[1] is the first byte.
 */
static uint16_t*
appendEncodedByte(uint16_t* buffer, uint16_t* buffLimit, uint8_t value, uint8_t state[],UErrorCode* status) {
    if(!status || U_FAILURE(*status)){
        return NULL;
    }
    if (state[0] != 0) {
        uint16_t c = (uint16_t) ((state[1] << 8) | (((int32_t) value) & 0xFF));
        if(buffer < buffLimit){
            *buffer++ = c;
        }else{
            *status = U_BUFFER_OVERFLOW_ERROR;
        }
        state[0] = 0;
        return buffer;
    }
    else {
        state[0] = 1;
        state[1] = value;
        return buffer;
    }
}
/**
 * Encode a run, possibly a degenerate run (of < 4 values).
 * @param length The length of the run; must be > 0 && <= 0xFF.
 */
static uint16_t*
encodeRunByte(uint16_t* buffer,uint16_t* bufLimit, uint8_t value, int32_t length, uint8_t state[], UErrorCode* status) {
    if(!status || U_FAILURE(*status)){
        return NULL;
    }
    if (length < 4) {
        int32_t j=0;
        for (; j<length; ++j) {
            if (value == ESCAPE_BYTE) {
                buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
            }
            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
        }
    }
    else {
        if (length == ESCAPE_BYTE) {
            if (value == ESCAPE_BYTE){
               buffer =  appendEncodedByte(buffer, bufLimit,ESCAPE_BYTE, state,status);
            }
            buffer = appendEncodedByte(buffer,bufLimit, value, state, status);
            --length;
        }
        buffer = appendEncodedByte(buffer,bufLimit, ESCAPE_BYTE, state,status);
        buffer = appendEncodedByte(buffer,bufLimit, (char)length, state, status);
        buffer = appendEncodedByte(buffer,bufLimit, value, state, status); /* Don't need to escape this value*/
    }
    return buffer;
}

#define APPEND( buffer, bufLimit, value, num, status){  \
    if(buffer<bufLimit){                    \
        *buffer++=(value);                  \
    }else{                                  \
        *status = U_BUFFER_OVERFLOW_ERROR;  \
    }                                       \
    num++;                                  \
}

/**
 * Encode a run, possibly a degenerate run (of < 4 values).
 * @param length The length of the run; must be > 0 && <= 0xFFFF.
 */
static uint16_t*
encodeRunShort(uint16_t* buffer,uint16_t* bufLimit, uint16_t value, int32_t length,UErrorCode* status) {
    int32_t num=0;
    if (length < 4) {
        int j=0;
        for (; j<length; ++j) {
            if (value == (int32_t) ESCAPE){
                APPEND(buffer,bufLimit,ESCAPE, num, status);

            }
            APPEND(buffer,bufLimit,value,num, status);
        }
    }
    else {
        if (length == (int32_t) ESCAPE) {
            if (value == (int32_t) ESCAPE){
                APPEND(buffer,bufLimit,ESCAPE,num,status);

            }
            APPEND(buffer,bufLimit,value,num,status);
            --length;
        }
        APPEND(buffer,bufLimit,ESCAPE,num,status);
        APPEND(buffer,bufLimit,(uint16_t) length, num,status);
        APPEND(buffer,bufLimit,(uint16_t)value, num, status); /* Don't need to escape this value */
    }
    return buffer;
}

/**
 * Construct a string representing a char array.  Use run-length encoding.
 * A character represents itself, unless it is the ESCAPE character.  Then
 * the following notations are possible:
 *   ESCAPE ESCAPE   ESCAPE literal
 *   ESCAPE n c      n instances of character c
 * Since an encoded run occupies 3 characters, we only encode runs of 4 or
 * more characters.  Thus we have n > 0 and n != ESCAPE and n <= 0xFFFF.
 * If we encounter a run where n == ESCAPE, we represent this as:
 *   c ESCAPE n-1 c
 * The ESCAPE value is chosen so as not to collide with commonly
 * seen values.
 */
int32_t
usArrayToRLEString(const uint16_t* src,int32_t srcLen,uint16_t* buffer, int32_t bufLen,UErrorCode* status) {
    uint16_t* bufLimit =  buffer+bufLen;
    uint16_t* saveBuffer = buffer;
    if(buffer < bufLimit){
        *buffer++ =  (uint16_t)(srcLen>>16);
        if(buffer<bufLimit){
            uint16_t runValue = src[0];
            int32_t runLength = 1;
            int i=1;
            *buffer++ = (uint16_t) srcLen;

            for (; i<srcLen; ++i) {
                uint16_t s = src[i];
                if (s == runValue && runLength < 0xFFFF){
                    ++runLength;
                }else {
                    buffer = encodeRunShort(buffer,bufLimit, (uint16_t)runValue, runLength,status);
                    runValue = s;
                    runLength = 1;
                }
            }
            buffer= encodeRunShort(buffer,bufLimit,(uint16_t)runValue, runLength,status);
        }else{
            *status = U_BUFFER_OVERFLOW_ERROR;
        }
    }else{
        *status = U_BUFFER_OVERFLOW_ERROR;
    }
    return (int32_t)(buffer - saveBuffer);
}

/**
 * Construct a string representing a byte array.  Use run-length encoding.
 * Two bytes are packed into a single char, with a single extra zero byte at
 * the end if needed.  A byte represents itself, unless it is the
 * ESCAPE_BYTE.  Then the following notations are possible:
 *   ESCAPE_BYTE ESCAPE_BYTE   ESCAPE_BYTE literal
 *   ESCAPE_BYTE n b           n instances of byte b
 * Since an encoded run occupies 3 bytes, we only encode runs of 4 or
 * more bytes.  Thus we have n > 0 and n != ESCAPE_BYTE and n <= 0xFF.
 * If we encounter a run where n == ESCAPE_BYTE, we represent this as:
 *   b ESCAPE_BYTE n-1 b
 * The ESCAPE_BYTE value is chosen so as not to collide with commonly
 * seen values.
 */
int32_t
byteArrayToRLEString(const uint8_t* src,int32_t srcLen, uint16_t* buffer,int32_t bufLen, UErrorCode* status) {
    const uint16_t* saveBuf = buffer;
    uint16_t* bufLimit =  buffer+bufLen;
    if(buffer < bufLimit){
        *buffer++ = ((uint16_t) (srcLen >> 16));

        if(buffer<bufLimit){
            uint8_t runValue = src[0];
            int runLength = 1;
            uint8_t state[2]= {0};
            int i=1;
            *buffer++=((uint16_t) srcLen);
            for (; i<srcLen; ++i) {
                uint8_t b = src[i];
                if (b == runValue && runLength < 0xFF){
                    ++runLength;
                }
                else {
                    buffer = encodeRunByte(buffer, bufLimit,runValue, runLength, state,status);
                    runValue = b;
                    runLength = 1;
                }
            }
            buffer = encodeRunByte(buffer,bufLimit, runValue, runLength, state, status);

            /* We must save the final byte, if there is one, by padding
             * an extra zero.
             */
            if (state[0] != 0) {
                buffer = appendEncodedByte(buffer,bufLimit, 0, state ,status);
            }
        }else{
            *status = U_BUFFER_OVERFLOW_ERROR;
        }
    }else{
        *status = U_BUFFER_OVERFLOW_ERROR;
    }
    return (int32_t) (buffer - saveBuf);
}


/**
 * Construct an array of shorts from a run-length encoded string.
 */
int32_t
rleStringToUCharArray(uint16_t* src, int32_t srcLen, uint16_t* target, int32_t tgtLen, UErrorCode* status) {
    int32_t length = 0;
    int32_t ai = 0;
    int i=2;

    if(!status || U_FAILURE(*status)){
        return 0;
    }
    /* the source is null terminated */
    if(srcLen == -1){
        srcLen = u_strlen(src);
    }
    if(srcLen <= 2){
        return 2;
    }
    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);

    if(target == NULL){
        return length;
    }
    if(tgtLen < length){
        *status = U_BUFFER_OVERFLOW_ERROR;
        return length;
    }

    for (; i<srcLen; ++i) {
        uint16_t c = src[i];
        if (c == ESCAPE) {
            c = src[++i];
            if (c == ESCAPE) {
                target[ai++] = c;
            } else {
                int32_t runLength = (int32_t) c;
                uint16_t runValue = src[++i];
                int j=0;
                for (; j<runLength; ++j) {
                    target[ai++] = runValue;
                }
            }
        }
        else {
            target[ai++] = c;
        }
    }

    if (ai != length){
        *status = U_INTERNAL_PROGRAM_ERROR;
    }

    return length;
}

/**
 * Construct an array of bytes from a run-length encoded string.
 */
int32_t
rleStringToByteArray(uint16_t* src, int32_t srcLen, uint8_t* target, int32_t tgtLen, UErrorCode* status) {

    int32_t length = 0;
    UBool nextChar = TRUE;
    uint16_t c = 0;
    int32_t node = 0;
    int32_t runLength = 0;
    int32_t i = 2;
    int32_t ai=0;

    if(!status || U_FAILURE(*status)){
        return 0;
    }
    /* the source is null terminated */
    if(srcLen == -1){
        srcLen = u_strlen(src);
    }
    if(srcLen <= 2){
        return 2;
    }
    length = (((int32_t) src[0]) << 16) | ((int32_t) src[1]);

    if(target == NULL){
        return length;
    }
    if(tgtLen < length){
        *status = U_BUFFER_OVERFLOW_ERROR;
        return length;
    }

    for (; ai<tgtLen; ) {
       /* This part of the loop places the next byte into the local
        * variable 'b' each time through the loop.  It keeps the
        * current character in 'c' and uses the boolean 'nextChar'
        * to see if we've taken both bytes out of 'c' yet.
        */
        uint8_t b;
        if (nextChar) {
            c = src[i++];
            b = (uint8_t) (c >> 8);
            nextChar = FALSE;
        }
        else {
            b = (uint8_t) (c & 0xFF);
            nextChar = TRUE;
        }

       /* This part of the loop is a tiny state machine which handles
        * the parsing of the run-length encoding.  This would be simpler
        * if we could look ahead, but we can't, so we use 'node' to
        * move between three nodes in the state machine.
        */
        switch (node) {
        case 0:
            /* Normal idle node */
            if (b == ESCAPE_BYTE) {
                node = 1;
            }
            else {
                target[ai++] = b;
            }
            break;
        case 1:
           /* We have seen one ESCAPE_BYTE; we expect either a second
            * one, or a run length and value.
            */
            if (b == ESCAPE_BYTE) {
                target[ai++] = ESCAPE_BYTE;
                node = 0;
            }
            else {
                runLength = b;
                node = 2;
            }
            break;
        case 2:
            {
                int j=0;
               /* We have seen an ESCAPE_BYTE and length byte.  We interpret
                * the next byte as the value to be repeated.
                */
                for (; j<runLength; ++j){
                    if(ai<tgtLen){
                        target[ai++] = b;
                    }else{
                        *status = U_BUFFER_OVERFLOW_ERROR;
                        return ai;
                    }
                }
                node = 0;
                break;
            }
        }
    }

    if (node != 0){
        *status = U_INTERNAL_PROGRAM_ERROR;
        /*("Bad run-length encoded byte array")*/
        return 0;
    }


    if (i != srcLen){
        /*("Excess data in RLE byte array string");*/
        *status = U_INTERNAL_PROGRAM_ERROR;
        return ai;
    }

    return ai;
}