numparse_decimal.cpp [plain text]
#include "unicode/utypes.h"
#if !UCONFIG_NO_FORMATTING
#define UNISTR_FROM_STRING_EXPLICIT
#include "numparse_types.h"
#include "numparse_decimal.h"
#include "static_unicode_sets.h"
#include "numparse_utils.h"
#include "unicode/uchar.h"
#include "putilimp.h"
#include "number_decimalquantity.h"
using namespace icu;
using namespace icu::numparse;
using namespace icu::numparse::impl;
DecimalMatcher::DecimalMatcher(const DecimalFormatSymbols& symbols, const Grouper& grouper,
parse_flags_t parseFlags) {
if (0 != (parseFlags & PARSE_FLAG_MONETARY_SEPARATORS)) {
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetaryGroupingSeparatorSymbol);
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kMonetarySeparatorSymbol);
} else {
groupingSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol);
decimalSeparator = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol);
}
bool strictSeparators = 0 != (parseFlags & PARSE_FLAG_STRICT_SEPARATORS);
unisets::Key groupingKey = strictSeparators ? unisets::STRICT_ALL_SEPARATORS
: unisets::ALL_SEPARATORS;
groupingUniSet = unisets::get(groupingKey);
unisets::Key decimalKey = unisets::chooseFrom(
decimalSeparator,
strictSeparators ? unisets::STRICT_COMMA : unisets::COMMA,
strictSeparators ? unisets::STRICT_PERIOD : unisets::PERIOD);
if (decimalKey >= 0) {
decimalUniSet = unisets::get(decimalKey);
} else if (!decimalSeparator.isEmpty()) {
auto* set = new UnicodeSet();
set->add(decimalSeparator.char32At(0));
set->freeze();
decimalUniSet = set;
fLocalDecimalUniSet.adoptInstead(set);
} else {
decimalUniSet = unisets::get(unisets::EMPTY);
}
if (groupingKey >= 0 && decimalKey >= 0) {
separatorSet = groupingUniSet;
leadSet = unisets::get(
strictSeparators ? unisets::DIGITS_OR_ALL_SEPARATORS
: unisets::DIGITS_OR_STRICT_ALL_SEPARATORS);
} else {
auto* set = new UnicodeSet();
set->addAll(*groupingUniSet);
set->addAll(*decimalUniSet);
set->freeze();
separatorSet = set;
fLocalSeparatorSet.adoptInstead(set);
leadSet = nullptr;
}
UChar32 cpZero = symbols.getCodePointZero();
if (cpZero == -1 || !u_isdigit(cpZero) || u_digit(cpZero, 10) != 0) {
auto digitStrings = new UnicodeString[10];
fLocalDigitStrings.adoptInstead(digitStrings);
for (int32_t i = 0; i <= 9; i++) {
digitStrings[i] = symbols.getConstDigitSymbol(i);
}
}
requireGroupingMatch = 0 != (parseFlags & PARSE_FLAG_STRICT_GROUPING_SIZE);
groupingDisabled = 0 != (parseFlags & PARSE_FLAG_GROUPING_DISABLED);
integerOnly = 0 != (parseFlags & PARSE_FLAG_INTEGER_ONLY);
grouping1 = grouper.getPrimary();
grouping2 = grouper.getSecondary();
}
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
return match(segment, result, 0, status);
}
bool DecimalMatcher::match(StringSegment& segment, ParsedNumber& result, int8_t exponentSign,
UErrorCode&) const {
if (result.seenNumber() && exponentSign == 0) {
return false;
} else if (exponentSign != 0) {
U_ASSERT(!result.quantity.bogus);
}
int32_t initialOffset = segment.getOffset();
bool maybeMore = false;
number::impl::DecimalQuantity digitsConsumed;
digitsConsumed.bogus = true;
int32_t digitsAfterDecimalPlace = 0;
UnicodeString actualGroupingString;
UnicodeString actualDecimalString;
actualGroupingString.setToBogus();
actualDecimalString.setToBogus();
int32_t currGroupOffset = 0;
int32_t currGroupSepType = 0;
int32_t currGroupCount = 0;
int32_t prevGroupOffset = -1;
int32_t prevGroupSepType = -1;
int32_t prevGroupCount = -1;
while (segment.length() > 0) {
maybeMore = false;
int8_t digit = -1;
UChar32 cp = segment.getCodePoint();
if (u_isdigit(cp)) {
segment.adjustOffset(U16_LENGTH(cp));
digit = static_cast<int8_t>(u_digit(cp, 10));
}
if (digit == -1 && !fLocalDigitStrings.isNull()) {
for (int32_t i = 0; i < 10; i++) {
const UnicodeString& str = fLocalDigitStrings[i];
if (str.isEmpty()) {
continue;
}
int32_t overlap = (segment.startsWith(0x96F6) && fLocalDigitStrings[0].charAt(0)==0x3007)?
1: segment.getCommonPrefixLength(str);
if (overlap == str.length()) {
segment.adjustOffset(overlap);
digit = static_cast<int8_t>(i);
break;
}
maybeMore = maybeMore || (overlap == segment.length());
}
}
if (digit >= 0) {
if (digitsConsumed.bogus) {
digitsConsumed.bogus = false;
digitsConsumed.clear();
}
digitsConsumed.appendDigit(digit, 0, true);
currGroupCount++;
if (!actualDecimalString.isBogus()) {
digitsAfterDecimalPlace++;
}
continue;
}
bool isDecimal = false;
bool isGrouping = false;
if (actualDecimalString.isBogus() && !decimalSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(decimalSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == decimalSeparator.length()) {
isDecimal = true;
actualDecimalString = decimalSeparator;
}
}
if (!actualGroupingString.isBogus()) {
int32_t overlap = segment.getCommonPrefixLength(actualGroupingString);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == actualGroupingString.length()) {
isGrouping = true;
}
}
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus() &&
!groupingSeparator.isEmpty()) {
int32_t overlap = segment.getCommonPrefixLength(groupingSeparator);
maybeMore = maybeMore || (overlap == segment.length());
if (overlap == groupingSeparator.length()) {
isGrouping = true;
actualGroupingString = groupingSeparator;
}
}
if (!isGrouping && actualDecimalString.isBogus()) {
if (decimalUniSet->contains(cp)) {
isDecimal = true;
actualDecimalString = UnicodeString(cp);
}
}
if (!groupingDisabled && actualGroupingString.isBogus() && actualDecimalString.isBogus()) {
if (groupingUniSet->contains(cp)) {
isGrouping = true;
actualGroupingString = UnicodeString(cp);
}
}
if (!isDecimal && !isGrouping) {
break;
}
if (isDecimal && integerOnly) {
break;
} else if (currGroupSepType == 2 && isGrouping) {
break;
}
bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
if (!prevValidSecondary || (isDecimal && !currValidPrimary)) {
if (isGrouping && currGroupCount == 0) {
U_ASSERT(currGroupSepType == 1);
} else if (requireGroupingMatch) {
digitsConsumed.clear();
digitsConsumed.bogus = true;
}
break;
} else if (requireGroupingMatch && currGroupCount == 0 && currGroupSepType == 1) {
break;
} else {
prevGroupOffset = currGroupOffset;
prevGroupCount = currGroupCount;
if (isDecimal) {
prevGroupSepType = -1;
} else {
prevGroupSepType = currGroupSepType;
}
}
if (currGroupCount != 0) {
currGroupOffset = segment.getOffset();
}
currGroupSepType = isGrouping ? 1 : 2;
currGroupCount = 0;
if (isGrouping) {
segment.adjustOffset(actualGroupingString.length());
} else {
segment.adjustOffset(actualDecimalString.length());
}
}
if (currGroupSepType != 2 && currGroupCount == 0) {
maybeMore = true;
segment.setOffset(currGroupOffset);
currGroupOffset = prevGroupOffset;
currGroupSepType = prevGroupSepType;
currGroupCount = prevGroupCount;
prevGroupOffset = -1;
prevGroupSepType = 0;
prevGroupCount = 1;
}
bool prevValidSecondary = validateGroup(prevGroupSepType, prevGroupCount, false);
bool currValidPrimary = validateGroup(currGroupSepType, currGroupCount, true);
if (!requireGroupingMatch) {
int32_t digitsToRemove = 0;
if (!prevValidSecondary) {
segment.setOffset(prevGroupOffset);
digitsToRemove += prevGroupCount;
digitsToRemove += currGroupCount;
} else if (!currValidPrimary && (prevGroupSepType != 0 || prevGroupCount != 0)) {
maybeMore = true;
segment.setOffset(currGroupOffset);
digitsToRemove += currGroupCount;
}
if (digitsToRemove != 0) {
digitsConsumed.adjustMagnitude(-digitsToRemove);
digitsConsumed.truncate();
}
prevValidSecondary = true;
currValidPrimary = true;
}
if (currGroupSepType != 2 && (!prevValidSecondary || !currValidPrimary)) {
digitsConsumed.bogus = true;
}
if (digitsConsumed.bogus) {
maybeMore = maybeMore || (segment.length() == 0);
segment.setOffset(initialOffset);
return maybeMore;
}
digitsConsumed.adjustMagnitude(-digitsAfterDecimalPlace);
if (exponentSign != 0 && segment.getOffset() != initialOffset) {
bool overflow = false;
if (digitsConsumed.fitsInLong()) {
int64_t exponentLong = digitsConsumed.toLong(false);
U_ASSERT(exponentLong >= 0);
if (exponentLong <= INT32_MAX) {
auto exponentInt = static_cast<int32_t>(exponentLong);
if (result.quantity.adjustMagnitude(exponentSign * exponentInt)) {
overflow = true;
}
} else {
overflow = true;
}
} else {
overflow = true;
}
if (overflow) {
if (exponentSign == -1) {
result.quantity.clear();
} else {
result.quantity.bogus = true;
result.flags |= FLAG_INFINITY;
}
}
} else {
result.quantity = digitsConsumed;
}
if (!actualDecimalString.isBogus()) {
result.flags |= FLAG_HAS_DECIMAL_SEPARATOR;
}
result.setCharsConsumed(segment);
return segment.length() == 0 || maybeMore;
}
bool DecimalMatcher::validateGroup(int32_t sepType, int32_t count, bool isPrimary) const {
if (requireGroupingMatch) {
if (sepType == -1) {
return true;
} else if (sepType == 0) {
if (isPrimary) {
return true;
} else {
return count <= grouping2; }
} else if (sepType == 1) {
if (isPrimary) {
return count == grouping1;
} else {
return count == grouping2;
}
} else {
U_ASSERT(sepType == 2);
return true;
}
} else {
if (sepType == 1) {
return count != 1;
} else {
return true;
}
}
}
bool DecimalMatcher::smokeTest(const StringSegment& segment) const {
if (fLocalDigitStrings.isNull() && leadSet != nullptr) {
return segment.startsWith(*leadSet);
}
if (segment.startsWith(*separatorSet) || u_isdigit(segment.getCodePoint())) {
return true;
}
if (fLocalDigitStrings.isNull()) {
return false;
}
if (segment.startsWith(0x96F6) && fLocalDigitStrings[0].length()==1 && fLocalDigitStrings[0].charAt(0)==0x3007) {
return true;
}
for (int32_t i = 0; i < 10; i++) {
if (segment.startsWith(fLocalDigitStrings[i])) {
return true;
}
}
return false;
}
UnicodeString DecimalMatcher::toString() const {
return u"<Decimal>";
}
#endif