#include "php.h"
#include "php_metaphone.h"
static int metaphone(unsigned char *word, size_t word_len, zend_long max_phonemes, zend_string **phoned_word, int traditional);
PHP_FUNCTION(metaphone)
{
zend_string *str;
zend_string *result = NULL;
zend_long phones = 0;
if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &str, &phones) == FAILURE) {
return;
}
if (metaphone((unsigned char *)ZSTR_VAL(str), ZSTR_LEN(str), phones, &result, 1) == 0) {
RETVAL_STR(result);
} else {
if (result) {
zend_string_free(result);
}
RETURN_FALSE;
}
}
#define SH 'X'
#define TH '0'
char _codes[26] =
{
1, 16, 4, 16, 9, 2, 4, 16, 9, 2, 0, 2, 2, 2, 1, 4, 0, 2, 4, 4, 1, 0, 0, 0, 8, 0
};
#define ENCODE(c) (isalpha(c) ? _codes[((toupper(c)) - 'A')] : 0)
#define isvowel(c) (ENCODE(c) & 1)
#define NOCHANGE(c) (ENCODE(c) & 2)
#define AFFECTH(c) (ENCODE(c) & 4)
#define MAKESOFT(c) (ENCODE(c) & 8)
#define NOGHTOF(c) (ENCODE(c) & 16)
#define Next_Letter (toupper(word[w_idx+1]))
#define Curr_Letter (toupper(word[w_idx]))
#define Look_Back_Letter(n) (w_idx >= n ? toupper(word[w_idx-n]) : '\0')
#define Prev_Letter (Look_Back_Letter(1))
#define After_Next_Letter (Next_Letter != '\0' ? toupper(word[w_idx+2]) \
: '\0')
#define Look_Ahead_Letter(n) (toupper(Lookahead((char *) word+w_idx, n)))
static char Lookahead(char *word, int how_far)
{
char letter_ahead = '\0';
int idx;
for (idx = 0; word[idx] != '\0' && idx < how_far; idx++);
letter_ahead = word[idx];
return letter_ahead;
}
#define Phonize(c) { \
if (p_idx >= max_buffer_len) { \
*phoned_word = zend_string_extend(*phoned_word, 2 * sizeof(char) + max_buffer_len, 0); \
max_buffer_len += 2; \
} \
ZSTR_VAL(*phoned_word)[p_idx++] = c; \
ZSTR_LEN(*phoned_word) = p_idx; \
}
#define End_Phoned_Word { \
if (p_idx == max_buffer_len) { \
*phoned_word = zend_string_extend(*phoned_word, 1 * sizeof(char) + max_buffer_len, 0); \
max_buffer_len += 1; \
} \
ZSTR_VAL(*phoned_word)[p_idx] = '\0'; \
ZSTR_LEN(*phoned_word) = p_idx; \
}
#define Phone_Len (p_idx)
#define Isbreak(c) (!isalpha(c))
static int metaphone(unsigned char *word, size_t word_len, zend_long max_phonemes, zend_string **phoned_word, int traditional)
{
int w_idx = 0;
size_t p_idx = 0;
size_t max_buffer_len = 0;
if (max_phonemes < 0)
return -1;
if (word == NULL)
return -1;
if (max_phonemes == 0) {
max_buffer_len = word_len;
*phoned_word = zend_string_alloc(sizeof(char) * word_len + 1, 0);
} else {
max_buffer_len = max_phonemes;
*phoned_word = zend_string_alloc(sizeof(char) * max_phonemes + 1, 0);
}
for (; !isalpha(Curr_Letter); w_idx++) {
if (Curr_Letter == '\0') {
End_Phoned_Word
return SUCCESS;
}
}
switch (Curr_Letter) {
case 'A':
if (Next_Letter == 'E') {
Phonize('E');
w_idx += 2;
}
else {
Phonize('A');
w_idx++;
}
break;
case 'G':
case 'K':
case 'P':
if (Next_Letter == 'N') {
Phonize('N');
w_idx += 2;
}
break;
case 'W':
if (Next_Letter == 'R') {
Phonize(Next_Letter);
w_idx += 2;
} else if (Next_Letter == 'H' || isvowel(Next_Letter)) {
Phonize('W');
w_idx += 2;
}
break;
case 'X':
Phonize('S');
w_idx++;
break;
case 'E':
case 'I':
case 'O':
case 'U':
Phonize(Curr_Letter);
w_idx++;
break;
default:
break;
}
for (; Curr_Letter != '\0' &&
(max_phonemes == 0 || Phone_Len < (size_t)max_phonemes);
w_idx++) {
unsigned short int skip_letter = 0;
if (!isalpha(Curr_Letter))
continue;
if (Curr_Letter == Prev_Letter &&
Curr_Letter != 'C')
continue;
switch (Curr_Letter) {
case 'B':
if (Prev_Letter != 'M')
Phonize('B');
break;
case 'C':
if (MAKESOFT(Next_Letter)) {
if (After_Next_Letter == 'A' &&
Next_Letter == 'I') {
Phonize(SH);
}
else if (Prev_Letter == 'S') {
} else {
Phonize('S');
}
} else if (Next_Letter == 'H') {
if ((!traditional) && (After_Next_Letter == 'R' || Prev_Letter == 'S')) {
Phonize('K');
} else {
Phonize(SH);
}
skip_letter++;
} else {
Phonize('K');
}
break;
case 'D':
if (Next_Letter == 'G' &&
MAKESOFT(After_Next_Letter)) {
Phonize('J');
skip_letter++;
} else
Phonize('T');
break;
case 'G':
if (Next_Letter == 'H') {
if (!(NOGHTOF(Look_Back_Letter(3)) ||
Look_Back_Letter(4) == 'H')) {
Phonize('F');
skip_letter++;
} else {
}
} else if (Next_Letter == 'N') {
if (Isbreak(After_Next_Letter) ||
(After_Next_Letter == 'E' &&
Look_Ahead_Letter(3) == 'D')) {
} else
Phonize('K');
} else if (MAKESOFT(Next_Letter) &&
Prev_Letter != 'G') {
Phonize('J');
} else {
Phonize('K');
}
break;
case 'H':
if (isvowel(Next_Letter) &&
!AFFECTH(Prev_Letter))
Phonize('H');
break;
case 'K':
if (Prev_Letter != 'C')
Phonize('K');
break;
case 'P':
if (Next_Letter == 'H') {
Phonize('F');
} else {
Phonize('P');
}
break;
case 'Q':
Phonize('K');
break;
case 'S':
if (Next_Letter == 'I' &&
(After_Next_Letter == 'O' ||
After_Next_Letter == 'A')) {
Phonize(SH);
} else if (Next_Letter == 'H') {
Phonize(SH);
skip_letter++;
} else if ((!traditional) && (Next_Letter == 'C' && Look_Ahead_Letter(2) == 'H' && Look_Ahead_Letter(3) == 'W')) {
Phonize(SH);
skip_letter += 2;
} else {
Phonize('S');
}
break;
case 'T':
if (Next_Letter == 'I' &&
(After_Next_Letter == 'O' ||
After_Next_Letter == 'A')) {
Phonize(SH);
} else if (Next_Letter == 'H') {
Phonize(TH);
skip_letter++;
} else if (!(Next_Letter == 'C' && After_Next_Letter == 'H')) {
Phonize('T');
}
break;
case 'V':
Phonize('F');
break;
case 'W':
if (isvowel(Next_Letter))
Phonize('W');
break;
case 'X':
Phonize('K');
Phonize('S');
break;
case 'Y':
if (isvowel(Next_Letter))
Phonize('Y');
break;
case 'Z':
Phonize('S');
break;
case 'F':
case 'J':
case 'L':
case 'M':
case 'N':
case 'R':
Phonize(Curr_Letter);
break;
default:
break;
}
w_idx += skip_letter;
}
End_Phoned_Word;
return 0;
}