normalizer_normalize.c [plain text]
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php_intl.h"
#include "unicode/unorm.h"
#include "normalizer.h"
#include "normalizer_class.h"
#include "normalizer_normalize.h"
#include "intl_convert.h"
PHP_FUNCTION( normalizer_normalize )
{
char* input = NULL;
long form = NORMALIZER_DEFAULT;
int input_len = 0;
UChar* uinput = NULL;
int uinput_len = 0;
int expansion_factor = 1;
UErrorCode status = U_ZERO_ERROR;
UChar* uret_buf = NULL;
int uret_len = 0;
char* ret_buf = NULL;
int32_t ret_len = 0;
int32_t size_needed;
intl_error_reset( NULL TSRMLS_CC );
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
&input, &input_len, &form ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"normalizer_normalize: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
expansion_factor = 1;
switch(form) {
case NORMALIZER_NONE:
break;
case NORMALIZER_FORM_D:
expansion_factor = 3;
break;
case NORMALIZER_FORM_KD:
expansion_factor = 3;
break;
case NORMALIZER_FORM_C:
case NORMALIZER_FORM_KC:
break;
default:
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
RETURN_FALSE;
}
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
if( U_FAILURE( status ) )
{
intl_error_set_code( NULL, status TSRMLS_CC );
intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
if (uinput) {
efree( uinput );
}
RETURN_FALSE;
}
uret_len = uinput_len * expansion_factor;
uret_buf = eumalloc( uret_len + 1 );
size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 , uret_buf, uret_len, &status);
if( U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR && status != U_STRING_NOT_TERMINATED_WARNING ) {
efree( uret_buf );
efree( uinput );
RETURN_NULL();
}
if ( size_needed > uret_len ) {
efree( uret_buf );
uret_buf = eumalloc( size_needed + 1 );
uret_len = size_needed;
status = U_ZERO_ERROR;
size_needed = unorm_normalize( uinput, uinput_len, form, (int32_t) 0 , uret_buf, uret_len, &status);
if( U_FAILURE(status) ) {
intl_error_set_custom_msg( NULL,"Error normalizing string", 0 TSRMLS_CC );
efree( uret_buf );
efree( uinput );
RETURN_FALSE;
}
}
efree( uinput );
uret_len = size_needed;
intl_convert_utf16_to_utf8( &ret_buf, &ret_len, uret_buf, uret_len, &status );
efree( uret_buf );
if( U_FAILURE( status ) )
{
intl_error_set( NULL, status,
"normalizer_normalize: error converting normalized text UTF-8", 0 TSRMLS_CC );
RETURN_FALSE;
}
RETVAL_STRINGL( ret_buf, ret_len, FALSE );
}
PHP_FUNCTION( normalizer_is_normalized )
{
char* input = NULL;
long form = NORMALIZER_DEFAULT;
int input_len = 0;
UChar* uinput = NULL;
int uinput_len = 0;
UErrorCode status = U_ZERO_ERROR;
UBool uret = FALSE;
intl_error_reset( NULL TSRMLS_CC );
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "s|l",
&input, &input_len, &form) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"normalizer_is_normalized: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
switch(form) {
case NORMALIZER_FORM_D:
case NORMALIZER_FORM_KD:
case NORMALIZER_FORM_C:
case NORMALIZER_FORM_KC:
break;
default:
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"normalizer_normalize: illegal normalization form", 0 TSRMLS_CC );
RETURN_FALSE;
}
intl_convert_utf8_to_utf16(&uinput, &uinput_len, input, input_len, &status );
if( U_FAILURE( status ) )
{
intl_error_set_code( NULL, status TSRMLS_CC );
intl_error_set_custom_msg( NULL, "Error converting string to UTF-16.", 0 TSRMLS_CC );
if (uinput) {
efree( uinput );
}
RETURN_FALSE;
}
uret = unorm_isNormalizedWithOptions( uinput, uinput_len, form, (int32_t) 0 , &status);
efree( uinput );
if( U_FAILURE(status) ) {
intl_error_set_custom_msg( NULL,"Error testing if string is the given normalization form.", 0 TSRMLS_CC );
RETURN_FALSE;
}
if ( uret )
RETURN_TRUE;
RETURN_FALSE;
}