#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "php_intl.h"
#include "collator.h"
#include "collator_class.h"
#include "collator_sort.h"
#include "collator_convert.h"
#include "intl_convert.h"
#if !defined(HAVE_PTRDIFF_T) && !defined(_PTRDIFF_T_DEFINED)
typedef long ptrdiff_t;
#endif
typedef struct _collator_sort_key_index {
char* key;
zval** zstr;
} collator_sort_key_index_t;
ZEND_EXTERN_MODULE_GLOBALS( intl )
static const size_t DEF_SORT_KEYS_BUF_SIZE = 1048576;
static const size_t DEF_SORT_KEYS_BUF_INCREMENT = 1048576;
static const size_t DEF_SORT_KEYS_INDX_BUF_SIZE = 1048576;
static const size_t DEF_SORT_KEYS_INDX_BUF_INCREMENT = 1048576;
static const size_t DEF_UTF16_BUF_SIZE = 1024;
static int collator_regular_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
Collator_object* co = NULL;
int rc = SUCCESS;
zval* str1 = collator_convert_object_to_string( op1 TSRMLS_CC );
zval* str2 = collator_convert_object_to_string( op2 TSRMLS_CC );
zval* num1 = NULL;
zval* num2 = NULL;
zval* norm1 = NULL;
zval* norm2 = NULL;
if( Z_TYPE_P(str1) == IS_STRING && Z_TYPE_P(str2) == IS_STRING &&
( str1 == ( num1 = collator_convert_string_to_number_if_possible( str1 ) ) ||
str2 == ( num2 = collator_convert_string_to_number_if_possible( str2 ) ) ) )
{
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
if (!co || !co->ucoll) {
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
"Object not initialized", 0 TSRMLS_CC );
php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized");
}
result->value.lval = ucol_strcoll(
co->ucoll,
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
result->type = IS_LONG;
}
else
{
if( num1 )
{
if( num1 == str1 )
{
norm1 = collator_convert_zstr_utf16_to_utf8( str1 );
norm2 = collator_normalize_sort_argument( str2 );
}
else
{
zval_add_ref( &num1 );
norm1 = num1;
zval_add_ref( &num2 );
norm2 = num2;
}
}
else
{
norm1 = collator_normalize_sort_argument( str1 );
norm2 = collator_normalize_sort_argument( str2 );
}
rc = compare_function( result, norm1, norm2 TSRMLS_CC );
zval_ptr_dtor( &norm1 );
zval_ptr_dtor( &norm2 );
}
if( num1 )
zval_ptr_dtor( &num1 );
if( num2 )
zval_ptr_dtor( &num2 );
zval_ptr_dtor( &str1 );
zval_ptr_dtor( &str2 );
return rc;
}
static int collator_numeric_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
int rc = SUCCESS;
zval* num1 = NULL;
zval* num2 = NULL;
if( Z_TYPE_P(op1) == IS_STRING )
{
num1 = collator_convert_string_to_double( op1 );
op1 = num1;
}
if( Z_TYPE_P(op2) == IS_STRING )
{
num2 = collator_convert_string_to_double( op2 );
op2 = num2;
}
rc = numeric_compare_function( result, op1, op2 TSRMLS_CC);
if( num1 )
zval_ptr_dtor( &num1 );
if( num2 )
zval_ptr_dtor( &num2 );
return rc;
}
static int collator_icu_compare_function(zval *result, zval *op1, zval *op2 TSRMLS_DC)
{
int rc = SUCCESS;
Collator_object* co = NULL;
zval* str1 = NULL;
zval* str2 = NULL;
str1 = collator_make_printable_zval( op1 );
str2 = collator_make_printable_zval( op2 );
co = (Collator_object *) zend_object_store_get_object( INTL_G(current_collator) TSRMLS_CC );
result->value.lval = ucol_strcoll(
co->ucoll,
INTL_Z_STRVAL_P(str1), INTL_Z_STRLEN_P(str1),
INTL_Z_STRVAL_P(str2), INTL_Z_STRLEN_P(str2) );
result->type = IS_LONG;
zval_ptr_dtor( &str1 );
zval_ptr_dtor( &str2 );
return rc;
}
static int collator_compare_func( const void* a, const void* b TSRMLS_DC )
{
Bucket *f;
Bucket *s;
zval result;
zval *first;
zval *second;
f = *((Bucket **) a);
s = *((Bucket **) b);
first = *((zval **) f->pData);
second = *((zval **) s->pData);
if( INTL_G(compare_func)( &result, first, second TSRMLS_CC) == FAILURE )
return 0;
if( Z_TYPE(result) == IS_DOUBLE )
{
if( Z_DVAL(result) < 0 )
return -1;
else if( Z_DVAL(result) > 0 )
return 1;
else
return 0;
}
convert_to_long(&result);
if( Z_LVAL(result) < 0 )
return -1;
else if( Z_LVAL(result) > 0 )
return 1;
return 0;
}
static int collator_cmp_sort_keys( const void *p1, const void *p2 TSRMLS_DC )
{
char* key1 = ((collator_sort_key_index_t*)p1)->key;
char* key2 = ((collator_sort_key_index_t*)p2)->key;
return strcmp( key1, key2 );
}
static collator_compare_func_t collator_get_compare_function( const long sort_flags )
{
collator_compare_func_t func;
switch( sort_flags )
{
case COLLATOR_SORT_NUMERIC:
func = collator_numeric_compare_function;
break;
case COLLATOR_SORT_STRING:
func = collator_icu_compare_function;
break;
case COLLATOR_SORT_REGULAR:
default:
func = collator_regular_compare_function;
break;
}
return func;
}
static void collator_sort_internal( int renumber, INTERNAL_FUNCTION_PARAMETERS )
{
zval* array = NULL;
HashTable* hash = NULL;
zval* saved_collator = NULL;
long sort_flags = COLLATOR_SORT_REGULAR;
COLLATOR_METHOD_INIT_VARS
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa|l",
&object, Collator_ce_ptr, &array, &sort_flags ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"collator_sort_internal: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
COLLATOR_METHOD_FETCH_OBJECT;
INTL_G(compare_func) = collator_get_compare_function( sort_flags );
hash = HASH_OF( array );
collator_convert_hash_from_utf8_to_utf16( hash, COLLATOR_ERROR_CODE_P( co ) );
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-8 to UTF-16" );
saved_collator = INTL_G( current_collator );
INTL_G( current_collator ) = object;
zend_hash_sort( hash, zend_qsort, collator_compare_func, renumber TSRMLS_CC );
INTL_G( current_collator ) = saved_collator;
collator_convert_hash_from_utf16_to_utf8( hash, COLLATOR_ERROR_CODE_P( co ) );
COLLATOR_CHECK_STATUS( co, "Error converting hash from UTF-16 to UTF-8" );
RETURN_TRUE;
}
PHP_FUNCTION( collator_sort )
{
collator_sort_internal( TRUE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
}
PHP_FUNCTION( collator_sort_with_sort_keys )
{
zval* array = NULL;
HashTable* hash = NULL;
zval** hashData = NULL;
char* sortKeyBuf = NULL;
uint32_t sortKeyBufSize = DEF_SORT_KEYS_BUF_SIZE;
ptrdiff_t sortKeyBufOffset = 0;
int32_t sortKeyLen = 0;
uint32_t bufLeft = 0;
uint32_t bufIncrement = 0;
collator_sort_key_index_t* sortKeyIndxBuf = NULL;
uint32_t sortKeyIndxBufSize = DEF_SORT_KEYS_INDX_BUF_SIZE;
uint32_t sortKeyIndxSize = sizeof( collator_sort_key_index_t );
uint32_t sortKeyCount = 0;
uint32_t j = 0;
UChar* utf16_buf = NULL;
int utf16_buf_size = DEF_UTF16_BUF_SIZE;
int utf16_len = 0;
HashTable* sortedHash = NULL;
COLLATOR_METHOD_INIT_VARS
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Oa",
&object, Collator_ce_ptr, &array ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"collator_sort_with_sort_keys: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
COLLATOR_METHOD_FETCH_OBJECT;
if (!co || !co->ucoll) {
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
"Object not initialized", 0 TSRMLS_CC );
php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized");
RETURN_FALSE;
}
hash = HASH_OF( array );
if( !hash || zend_hash_num_elements( hash ) == 0 )
RETURN_TRUE;
sortKeyBuf = ecalloc( sortKeyBufSize, sizeof( char ) );
sortKeyIndxBuf = ecalloc( sortKeyIndxBufSize, sizeof( uint8_t ) );
utf16_buf = eumalloc( utf16_buf_size );
zend_hash_internal_pointer_reset( hash );
while( zend_hash_get_current_data( hash, (void**) &hashData ) == SUCCESS )
{
utf16_len = utf16_buf_size;
if( Z_TYPE_PP( hashData ) == IS_STRING )
{
intl_convert_utf8_to_utf16( &utf16_buf, &utf16_len, Z_STRVAL_PP( hashData ), Z_STRLEN_PP( hashData ), COLLATOR_ERROR_CODE_P( co ) );
if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
{
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ), "Sort with sort keys failed", 0 TSRMLS_CC );
if( utf16_buf )
efree( utf16_buf );
efree( sortKeyIndxBuf );
efree( sortKeyBuf );
RETURN_FALSE;
}
}
else
{
utf16_len = 0;
utf16_buf[utf16_len] = 0;
}
if( (utf16_len + 1) > utf16_buf_size )
utf16_buf_size = utf16_len + 1;
bufLeft = sortKeyBufSize - sortKeyBufOffset;
sortKeyLen = ucol_getSortKey( co->ucoll,
utf16_buf,
utf16_len,
(uint8_t*)sortKeyBuf + sortKeyBufOffset,
bufLeft );
if( sortKeyLen > bufLeft )
{
bufIncrement = ( sortKeyLen > DEF_SORT_KEYS_BUF_INCREMENT ) ? sortKeyLen : DEF_SORT_KEYS_BUF_INCREMENT;
sortKeyBufSize += bufIncrement;
bufLeft += bufIncrement;
sortKeyBuf = erealloc( sortKeyBuf, sortKeyBufSize );
sortKeyLen = ucol_getSortKey( co->ucoll, utf16_buf, utf16_len, (uint8_t*)sortKeyBuf + sortKeyBufOffset, bufLeft );
}
if( ( sortKeyCount + 1 ) * sortKeyIndxSize > sortKeyIndxBufSize )
{
bufIncrement = ( sortKeyIndxSize > DEF_SORT_KEYS_INDX_BUF_INCREMENT ) ? sortKeyIndxSize : DEF_SORT_KEYS_INDX_BUF_INCREMENT;
sortKeyIndxBufSize += bufIncrement;
sortKeyIndxBuf = erealloc( sortKeyIndxBuf, sortKeyIndxBufSize );
}
sortKeyIndxBuf[sortKeyCount].key = (char*)sortKeyBufOffset;
sortKeyIndxBuf[sortKeyCount].zstr = hashData;
sortKeyBufOffset += sortKeyLen;
++sortKeyCount;
zend_hash_move_forward( hash );
}
for( j = 0; j < sortKeyCount; j++ )
sortKeyIndxBuf[j].key = sortKeyBuf + (ptrdiff_t)sortKeyIndxBuf[j].key;
zend_qsort( sortKeyIndxBuf, sortKeyCount, sortKeyIndxSize, collator_cmp_sort_keys TSRMLS_CC );
ALLOC_HASHTABLE( sortedHash );
zend_hash_init( sortedHash, 0, NULL, ZVAL_PTR_DTOR, 0 );
for( j = 0; j < sortKeyCount; j++ )
{
zval_add_ref( sortKeyIndxBuf[j].zstr );
zend_hash_next_index_insert( sortedHash, sortKeyIndxBuf[j].zstr, sizeof(zval **), NULL );
}
zval_dtor( array );
(array)->value.ht = sortedHash;
(array)->type = IS_ARRAY;
if( utf16_buf )
efree( utf16_buf );
efree( sortKeyIndxBuf );
efree( sortKeyBuf );
RETURN_TRUE;
}
PHP_FUNCTION( collator_asort )
{
collator_sort_internal( FALSE, INTERNAL_FUNCTION_PARAM_PASSTHRU );
}
PHP_FUNCTION( collator_get_sort_key )
{
char* str = NULL;
int str_len = 0;
UChar* ustr = NULL;
int ustr_len = 0;
uint8_t* key = NULL;
int key_len = 0;
COLLATOR_METHOD_INIT_VARS
if( zend_parse_method_parameters( ZEND_NUM_ARGS() TSRMLS_CC, getThis(), "Os",
&object, Collator_ce_ptr, &str, &str_len ) == FAILURE )
{
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
"collator_get_sort_key: unable to parse input params", 0 TSRMLS_CC );
RETURN_FALSE;
}
COLLATOR_METHOD_FETCH_OBJECT;
if (!co || !co->ucoll) {
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
"Object not initialized", 0 TSRMLS_CC );
php_error_docref(NULL TSRMLS_CC, E_RECOVERABLE_ERROR, "Object not initialized");
RETURN_FALSE;
}
intl_convert_utf8_to_utf16(
&ustr, &ustr_len, str, str_len, COLLATOR_ERROR_CODE_P( co ) );
if( U_FAILURE( COLLATOR_ERROR_CODE( co ) ) )
{
intl_error_set_code( NULL, COLLATOR_ERROR_CODE( co ) TSRMLS_CC );
intl_errors_set_custom_msg( COLLATOR_ERROR_P( co ),
"Error converting first argument to UTF-16", 0 TSRMLS_CC );
efree( ustr );
RETURN_FALSE;
}
key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, 0);
if(!key_len) {
efree( ustr );
RETURN_FALSE;
}
key = emalloc(key_len);
key_len = ucol_getSortKey(co->ucoll, ustr, ustr_len, key, key_len);
efree( ustr );
if(!key_len) {
RETURN_FALSE;
}
RETURN_STRINGL((char *)key, key_len - 1, 0);
}