/******************************************************************** * COPYRIGHT: * Copyright (c) 1997-2013,2015, International Business Machines Corporation and * others. All Rights Reserved. ********************************************************************/ /******************************************************************************** * * File CBIAPTS.C * * Modification History: * Name Description * Madhu Katragadda Creation *********************************************************************************/ /*C API TEST FOR BREAKITERATOR */ /** * This is an API test. It doesn't test very many cases, and doesn't * try to test the full functionality. It just calls each function in the class and * verifies that it works on a basic level. **/ #include "unicode/utypes.h" #if !UCONFIG_NO_BREAK_ITERATION #include #include #include "unicode/uloc.h" #include "unicode/ubrk.h" #include "unicode/ustring.h" #include "unicode/ucnv.h" #include "unicode/utext.h" #include "cintltst.h" #include "cbiapts.h" #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ log_data_err("Failure at file %s, line %d, error = %s (Are you missing data?)\n", __FILE__, __LINE__, u_errorName(status));}} #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ log_data_err("Test Failure at file %s, line %d (Are you missing data?)\n", __FILE__, __LINE__);}} #if !UCONFIG_NO_FILE_IO static void TestBreakIteratorSafeClone(void); #endif static void TestBreakIteratorRules(void); static void TestBreakIteratorRuleError(void); static void TestBreakIteratorStatusVec(void); static void TestBreakIteratorUText(void); static void TestBreakIteratorTailoring(void); static void TestBreakIteratorRefresh(void); static void TestBreakIteratorSuppressions(void); /* Apple-specific for now */ void addBrkIterAPITest(TestNode** root); void addBrkIterAPITest(TestNode** root) { #if !UCONFIG_NO_FILE_IO addTest(root, &TestBreakIteratorCAPI, "tstxtbd/cbiapts/TestBreakIteratorCAPI"); addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); addTest(root, &TestBreakIteratorUText, "tstxtbd/cbiapts/TestBreakIteratorUText"); #endif addTest(root, &TestBreakIteratorRules, "tstxtbd/cbiapts/TestBreakIteratorRules"); addTest(root, &TestBreakIteratorRuleError, "tstxtbd/cbiapts/TestBreakIteratorRuleError"); addTest(root, &TestBreakIteratorStatusVec, "tstxtbd/cbiapts/TestBreakIteratorStatusVec"); addTest(root, &TestBreakIteratorTailoring, "tstxtbd/cbiapts/TestBreakIteratorTailoring"); addTest(root, &TestBreakIteratorRefresh, "tstxtbd/cbiapts/TestBreakIteratorRefresh"); addTest(root, &TestBreakIteratorSuppressions, "tstxtbd/cbiapts/TestBreakIteratorSuppressions"); } #define CLONETEST_ITERATOR_COUNT 2 /* * Utility function for converting char * to UChar * strings, to * simplify the test code. Converted strings are put in heap allocated * storage. A hook (probably a local in the caller's code) allows all * strings converted with that hook to be freed with a single call. */ typedef struct StringStruct { struct StringStruct *link; UChar str[1]; } StringStruct; static UChar* toUChar(const char *src, void **freeHook) { /* Structure of the memory that we allocate on the heap */ int32_t numUChars; int32_t destSize; UChar stackBuf[2000 + sizeof(void *)/sizeof(UChar)]; StringStruct *dest; UConverter *cnv; UErrorCode status = U_ZERO_ERROR; if (src == NULL) { return NULL; }; cnv = ucnv_open(NULL, &status); if(U_FAILURE(status) || cnv == NULL) { return NULL; } ucnv_reset(cnv); numUChars = ucnv_toUChars(cnv, stackBuf, 2000, src, -1, &status); destSize = (numUChars+1) * sizeof(UChar) + sizeof(struct StringStruct); dest = (StringStruct *)malloc(destSize); if (dest != NULL) { if (status == U_BUFFER_OVERFLOW_ERROR || status == U_STRING_NOT_TERMINATED_WARNING) { ucnv_toUChars(cnv, dest->str, numUChars+1, src, -1, &status); } else if (status == U_ZERO_ERROR) { u_strcpy(dest->str, stackBuf); } else { free(dest); dest = NULL; } } ucnv_reset(cnv); /* be good citizens */ ucnv_close(cnv); if (dest == NULL) { return NULL; } dest->link = (StringStruct*)(*freeHook); *freeHook = dest; return dest->str; } static void freeToUCharStrings(void **hook) { StringStruct *s = *(StringStruct **)hook; while (s != NULL) { StringStruct *next = s->link; free(s); s = next; } } #if !UCONFIG_NO_FILE_IO static void TestBreakIteratorCAPI() { UErrorCode status = U_ZERO_ERROR; UBreakIterator *word, *sentence, *line, *character, *b, *bogus; int32_t start,pos,end,to; int32_t i; int32_t count = 0; UChar text[50]; /* Note: the adjacent "" are concatenating strings, not adding a \" to the string, which is probably what whoever wrote this intended. Don't fix, because it would throw off the hard coded break positions in the following tests. */ u_uastrcpy(text, "He's from Africa. ""Mr. Livingston, I presume?"" Yeah"); /*test ubrk_open()*/ log_verbose("\nTesting BreakIterator open functions\n"); /* Use french for fun */ word = ubrk_open(UBRK_WORD, "en_US", text, u_strlen(text), &status); if(status == U_FILE_ACCESS_ERROR) { log_data_err("Check your data - it doesn't seem to be around\n"); return; } else if(U_FAILURE(status)){ log_err_status(status, "FAIL: Error in ubrk_open() for word breakiterator: %s\n", myErrorName(status)); } else{ log_verbose("PASS: Successfully opened word breakiterator\n"); } sentence = ubrk_open(UBRK_SENTENCE, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err_status(status, "FAIL: Error in ubrk_open() for sentence breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened sentence breakiterator\n"); } line = ubrk_open(UBRK_LINE, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err("FAIL: Error in ubrk_open() for line breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened line breakiterator\n"); } character = ubrk_open(UBRK_CHARACTER, "en_US", text, u_strlen(text), &status); if(U_FAILURE(status)){ log_err("FAIL: Error in ubrk_open() for character breakiterator: %s\n", myErrorName(status)); return; } else{ log_verbose("PASS: Successfully opened character breakiterator\n"); } /*trying to open an illegal iterator*/ bogus = ubrk_open((UBreakIteratorType)5, "en_US", text, u_strlen(text), &status); if(bogus != NULL) { log_err("FAIL: expected NULL from opening an invalid break iterator.\n"); } if(U_SUCCESS(status)){ log_err("FAIL: Error in ubrk_open() for BOGUS breakiterator. Expected U_ILLEGAL_ARGUMENT_ERROR\n"); } if(U_FAILURE(status)){ if(status != U_ILLEGAL_ARGUMENT_ERROR){ log_err("FAIL: Error in ubrk_open() for BOGUS breakiterator. Expected U_ILLEGAL_ARGUMENT_ERROR\n Got %s\n", myErrorName(status)); } } status=U_ZERO_ERROR; /* ======= Test ubrk_countAvialable() and ubrk_getAvialable() */ log_verbose("\nTesting ubrk_countAvailable() and ubrk_getAvailable()\n"); count=ubrk_countAvailable(); /* use something sensible w/o hardcoding the count */ if(count < 0){ log_err("FAIL: Error in ubrk_countAvialable() returned %d\n", count); } else{ log_verbose("PASS: ubrk_countAvialable() successful returned %d\n", count); } for(i=0;i 1) { --bufferSize; } if (NULL == (brk = ubrk_safeClone(someIterators[i], NULL, &bufferSize, &status)) || status != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned Iterator failed to deal correctly with too-small buffer size\n"); } if (brk) ubrk_close(brk); status = U_ZERO_ERROR; bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; /* Null buffer pointer - return Iterator & set error to U_SAFECLONE_ALLOCATED_ERROR */ if (NULL == (brk = ubrk_safeClone(someIterators[i], NULL, &bufferSize, &status)) || status != U_SAFECLONE_ALLOCATED_WARNING) { log_err("FAIL: Cloned Iterator failed to deal correctly with null buffer pointer\n"); } if (brk) ubrk_close(brk); status = U_ZERO_ERROR; /* Mis-aligned buffer pointer. */ { char stackBuf[U_BRK_SAFECLONE_BUFFERSIZE+sizeof(void *)]; brk = ubrk_safeClone(someIterators[i], &stackBuf[1], &bufferSize, &status); if (U_FAILURE(status) || brk == NULL) { log_err("FAIL: Cloned Iterator failed with misaligned buffer pointer\n"); } if (status == U_SAFECLONE_ALLOCATED_WARNING) { log_verbose("Cloned Iterator allocated when using a mis-aligned buffer.\n"); } if (brk) ubrk_close(brk); } /* Null Iterator - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */ if (NULL != ubrk_safeClone(NULL, buffer[i], &bufferSize, &status) || status != U_ILLEGAL_ARGUMENT_ERROR) { log_err("FAIL: Cloned Iterator failed to deal correctly with null Iterator pointer\n"); } status = U_ZERO_ERROR; /* Do these cloned Iterators work at all - make a first & next call */ bufferSize = U_BRK_SAFECLONE_BUFFERSIZE; someClonedIterators[i] = ubrk_safeClone(someIterators[i], buffer[i], &bufferSize, &status); start = ubrk_first(someClonedIterators[i]); if(start!=0) log_err("error ubrk_start(clone) did not return 0\n"); pos=ubrk_next(someClonedIterators[i]); if(pos!=4) log_err("error ubrk_next(clone) did not return 4\n"); ubrk_close(someClonedIterators[i]); ubrk_close(someIterators[i]); } } #endif /* // Open a break iterator from char * rules. Take care of conversion // of the rules and error checking. */ static UBreakIterator * testOpenRules(char *rules) { UErrorCode status = U_ZERO_ERROR; UChar *ruleSourceU = NULL; void *strCleanUp = NULL; UParseError parseErr; UBreakIterator *bi; ruleSourceU = toUChar(rules, &strCleanUp); bi = ubrk_openRules(ruleSourceU, -1, /* The rules */ NULL, -1, /* The text to be iterated over. */ &parseErr, &status); if (U_FAILURE(status)) { log_data_err("FAIL: ubrk_openRules: ICU Error \"%s\" (Are you missing data?)\n", u_errorName(status)); bi = 0; }; freeToUCharStrings(&strCleanUp); return bi; } /* * TestBreakIteratorRules - Verify that a break iterator can be created from * a set of source rules. */ static void TestBreakIteratorRules() { /* Rules will keep together any run of letters not including 'a', OR * keep together 'abc', but only when followed by 'def', OTHERWISE * just return one char at a time. */ char rules[] = "abc{666}/def;\n [\\p{L} - [a]]* {2}; . {1};"; /* 0123456789012345678 */ char data[] = "abcdex abcdefgh-def"; /* the test data string */ char breaks[] = "** ** * ** *"; /* * the expected break positions */ char tags[] = "01 21 6 21 2"; /* expected tag values at break positions */ int32_t tagMap[] = {0, 1, 2, 3, 4, 5, 666}; UChar *uData; void *freeHook = NULL; UErrorCode status = U_ZERO_ERROR; int32_t pos; int i; UBreakIterator *bi = testOpenRules(rules); if (bi == NULL) {return;} uData = toUChar(data, &freeHook); ubrk_setText(bi, uData, -1, &status); pos = ubrk_first(bi); for (i=0; ilocale != NULL; ++testPtr) { UErrorCode status = U_ZERO_ERROR; UBreakIterator* ubrkiter = ubrk_open(testPtr->type, testPtr->locale, testPtr->test, -1, &status); if ( U_SUCCESS(status) ) { int32_t offset, offsindx; UBool foundError; foundError = FALSE; for (offsindx = 0; (offset = ubrk_next(ubrkiter)) != UBRK_DONE; ++offsindx) { if (!foundError && offsindx >= testPtr->numOffsets) { log_err("FAIL: locale %s, break type %d, ubrk_next expected UBRK_DONE, got %d\n", testPtr->locale, testPtr->type, offset); foundError = TRUE; } else if (!foundError && offset != testPtr->offsFwd[offsindx]) { log_err("FAIL: locale %s, break type %d, ubrk_next expected %d, got %d\n", testPtr->locale, testPtr->type, testPtr->offsFwd[offsindx], offset); foundError = TRUE; } } if (!foundError && offsindx < testPtr->numOffsets) { log_err("FAIL: locale %s, break type %d, ubrk_next expected %d, got UBRK_DONE\n", testPtr->locale, testPtr->type, testPtr->offsFwd[offsindx]); } foundError = FALSE; for (offsindx = 0; (offset = ubrk_previous(ubrkiter)) != UBRK_DONE; ++offsindx) { if (!foundError && offsindx >= testPtr->numOffsets) { log_err("FAIL: locale %s, break type %d, ubrk_previous expected UBRK_DONE, got %d\n", testPtr->locale, testPtr->type, offset); foundError = TRUE; } else if (!foundError && offset != testPtr->offsRev[offsindx]) { log_err("FAIL: locale %s, break type %d, ubrk_previous expected %d, got %d\n", testPtr->locale, testPtr->type, testPtr->offsRev[offsindx], offset); foundError = TRUE; } } if (!foundError && offsindx < testPtr->numOffsets) { log_err("FAIL: locale %s, break type %d, ubrk_previous expected %d, got UBRK_DONE\n", testPtr->locale, testPtr->type, testPtr->offsRev[offsindx]); } ubrk_close(ubrkiter); } else { log_err_status(status, "FAIL: locale %s, break type %d, ubrk_open status: %s\n", testPtr->locale, testPtr->type, u_errorName(status)); } } } static void TestBreakIteratorRefresh(void) { /* * RefreshInput changes out the input of a Break Iterator without * changing anything else in the iterator's state. Used with Java JNI, * when Java moves the underlying string storage. This test * runs a ubrk_next() repeatedly, moving the text in the middle of the sequence. * The right set of boundaries should still be found. */ UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */ UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0}; UErrorCode status = U_ZERO_ERROR; UBreakIterator *bi; UText ut1 = UTEXT_INITIALIZER; UText ut2 = UTEXT_INITIALIZER; bi = ubrk_open(UBRK_LINE, "en_US", NULL, 0, &status); TEST_ASSERT_SUCCESS(status); utext_openUChars(&ut1, testStr, -1, &status); TEST_ASSERT_SUCCESS(status); ubrk_setUText(bi, &ut1, &status); TEST_ASSERT_SUCCESS(status); if (U_SUCCESS(status)) { /* Line boundaries will occur before each letter in the original string */ TEST_ASSERT(1 == ubrk_next(bi)); TEST_ASSERT(3 == ubrk_next(bi)); /* Move the string, kill the original string. */ u_strcpy(movedStr, testStr); u_memset(testStr, 0x20, u_strlen(testStr)); utext_openUChars(&ut2, movedStr, -1, &status); TEST_ASSERT_SUCCESS(status); ubrk_refreshUText(bi, &ut2, &status); TEST_ASSERT_SUCCESS(status); /* Find the following matches, now working in the moved string. */ TEST_ASSERT(5 == ubrk_next(bi)); TEST_ASSERT(7 == ubrk_next(bi)); TEST_ASSERT(8 == ubrk_next(bi)); TEST_ASSERT(UBRK_DONE == ubrk_next(bi)); TEST_ASSERT_SUCCESS(status); utext_close(&ut1); utext_close(&ut2); } ubrk_close(bi); } static const char testSentenceSuppressionsEn[] = "Mr. Jones comes home. Dr. Smith Ph.D. is out. In the U.S.A. it is hot."; static const int32_t testSentSuppFwdOffsetsEn[] = { 22, 46, 70, -1 }; /* With suppressions */ static const int32_t testSentFwdOffsetsEn[] = { 4, 22, 26, 35, 46, 70, -1 }; /* Without suppressions */ static const int32_t testSentSuppRevOffsetsEn[] = { 46, 22, 0, -1 }; /* With suppressions */ static const int32_t testSentRevOffsetsEn[] = { 46, 35, 26, 22, 4, 0, -1 }; /* Without suppressions */ static const char testSentenceSuppressionsDe[] = "Wenn ich schon h\\u00F6re zu Guttenberg kommt evtl. zur\\u00FCck."; // "Wenn ich schon höre zu Guttenberg kommt evtl. zurück." static const int32_t testSentSuppFwdOffsetsDe[] = { 53, -1 }; /* With suppressions */ static const int32_t testSentFwdOffsetsDe[] = { 53, -1 }; /* Without suppressions; no break in evtl. zur due to casing */ static const int32_t testSentSuppRevOffsetsDe[] = { 0, -1 }; /* With suppressions */ static const int32_t testSentRevOffsetsDe[] = { 0, -1 }; /* Without suppressions */ static const char testSentenceSuppressionsEs[] = "Te esperamos todos los miercoles en Bravo 416, Col. El Pueblo a las 7 PM."; static const int32_t testSentSuppFwdOffsetsEs[] = { 73, -1 }; /* With suppressions */ static const int32_t testSentFwdOffsetsEs[] = { 52, 73, -1 }; /* Without suppressions */ static const int32_t testSentSuppRevOffsetsEs[] = { 0, -1 }; /* With suppressions */ static const int32_t testSentRevOffsetsEs[] = { 52, 0, -1 }; /* Without suppressions */ enum { kTextULenMax = 128 }; typedef struct { const char * locale; const char * text; const int32_t * expFwdOffsets; const int32_t * expRevOffsets; } TestBISuppressionsItem; static const TestBISuppressionsItem testBISuppressionsItems[] = { { "en@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, { "en", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, { "fr@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, { "af@ss=standard", testSentenceSuppressionsEn, testSentSuppFwdOffsetsEn, testSentSuppRevOffsetsEn }, /* no brkiter data => en suppressions? */ { "zh@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ { "zh_Hant@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ { "fi@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ { "ja@ss=standard", testSentenceSuppressionsEn, testSentFwdOffsetsEn, testSentRevOffsetsEn }, /* brkiter data, no suppressions data => no suppressions */ { "de@ss=standard", testSentenceSuppressionsDe, testSentSuppFwdOffsetsDe, testSentSuppRevOffsetsDe }, { "de", testSentenceSuppressionsDe, testSentFwdOffsetsDe, testSentRevOffsetsDe }, { "es@ss=standard", testSentenceSuppressionsEs, testSentSuppFwdOffsetsEs, testSentSuppRevOffsetsEs }, { "es", testSentenceSuppressionsEs, testSentFwdOffsetsEs, testSentRevOffsetsEs }, { NULL, NULL, NULL } }; static void TestBreakIteratorSuppressions(void) { const TestBISuppressionsItem * itemPtr; for (itemPtr = testBISuppressionsItems; itemPtr->locale != NULL; itemPtr++) { UChar textU[kTextULenMax]; int32_t textULen = u_unescape(itemPtr->text, textU, kTextULenMax); UErrorCode status = U_ZERO_ERROR; UBreakIterator *bi = ubrk_open(UBRK_SENTENCE, itemPtr->locale, textU, textULen, &status); if (U_SUCCESS(status)) { int32_t offset, start; const int32_t * expOffsetPtr; expOffsetPtr = itemPtr->expFwdOffsets; ubrk_first(bi); for (; (offset = ubrk_next(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_next loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); } } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_next loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr); } expOffsetPtr = itemPtr->expFwdOffsets; start = ubrk_first(bi) + 1; for (; (offset = ubrk_following(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_following(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); } start = *expOffsetPtr + 1; } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_following(%d) loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); } expOffsetPtr = itemPtr->expRevOffsets; ubrk_last(bi); for (; (offset = ubrk_previous(bi)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_previous loc \"%s\", expected %d, got %d\n", itemPtr->locale, *expOffsetPtr, offset); } } if (offset != UBRK_DONE || *expOffsetPtr >= 0) { log_err("FAIL: ubrk_previous loc \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", itemPtr->locale, offset, *expOffsetPtr); } expOffsetPtr = itemPtr->expRevOffsets; start = ubrk_last(bi) - 1; for (; (offset = ubrk_preceding(bi, start)) != UBRK_DONE && *expOffsetPtr >= 0; expOffsetPtr++) { if (offset != *expOffsetPtr) { log_err("FAIL: ubrk_preceding(%d) loc \"%s\", expected %d, got %d\n", start, itemPtr->locale, *expOffsetPtr, offset); } start = *expOffsetPtr - 1; } if (start >=0 && (offset != UBRK_DONE || *expOffsetPtr >= 0)) { log_err("FAIL: ubrk_preceding loc(%d) \"%s\", expected UBRK_DONE & expOffset -1, got %d and %d\n", start, itemPtr->locale, offset, *expOffsetPtr); } ubrk_close(bi); } else { log_data_err("FAIL: ubrk_open(UBRK_SENTENCE, \"%s\", ...) status %s (Are you missing data?)\n", itemPtr->locale, u_errorName(status)); } } } #endif /* #if !UCONFIG_NO_BREAK_ITERATION */