#pragma once
#include <unicode/ubrk.h>
#include <wtf/ASCIICType.h>
#include <wtf/StdLibExtras.h>
#include <wtf/text/TextBreakIterator.h>
#include <wtf/unicode/CharacterNames.h>
namespace WebCore {
static const UChar lineBreakTableFirstCharacter = '!';
static const UChar lineBreakTableLastCharacter = 127;
static const unsigned lineBreakTableColumnCount = (lineBreakTableLastCharacter - lineBreakTableFirstCharacter) / 8 + 1;
WEBCORE_EXPORT extern const unsigned char lineBreakTable[][lineBreakTableColumnCount];
enum class NonBreakingSpaceBehavior {
IgnoreNonBreakingSpace,
TreatNonBreakingSpaceAsBreak,
};
template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
static inline bool isBreakableSpace(UChar character)
{
switch (character) {
case ' ':
case '\n':
case '\t':
return true;
case noBreakSpace:
return nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak;
default:
return false;
}
}
inline bool shouldBreakAfter(UChar lastCharacter, UChar character, UChar nextCharacter)
{
if (character == '-' && isASCIIDigit(nextCharacter))
return isASCIIAlphanumeric(lastCharacter);
if (character >= lineBreakTableFirstCharacter && character <= lineBreakTableLastCharacter && nextCharacter >= lineBreakTableFirstCharacter && nextCharacter <= lineBreakTableLastCharacter) {
const unsigned char* tableRow = lineBreakTable[character - lineBreakTableFirstCharacter];
unsigned nextCharacterIndex = nextCharacter - lineBreakTableFirstCharacter;
return tableRow[nextCharacterIndex / 8] & (1 << (nextCharacterIndex % 8));
}
return false;
}
template<NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
inline bool needsLineBreakIterator(UChar character)
{
if (nonBreakingSpaceBehavior == NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak)
return character > lineBreakTableLastCharacter;
return character > lineBreakTableLastCharacter && character != noBreakSpace;
}
template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
inline unsigned nextBreakablePositionNonLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
{
std::optional<unsigned> nextBreak;
CharacterType lastLastCharacter = startPosition > 1 ? string[startPosition - 2] : static_cast<CharacterType>(lazyBreakIterator.secondToLastCharacter());
CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
unsigned priorContextLength = lazyBreakIterator.priorContextLength();
for (unsigned i = startPosition; i < length; i++) {
CharacterType character = string[i];
if (isBreakableSpace<nonBreakingSpaceBehavior>(character) || shouldBreakAfter(lastLastCharacter, lastCharacter, character))
return i;
if (needsLineBreakIterator<nonBreakingSpaceBehavior>(character) || needsLineBreakIterator<nonBreakingSpaceBehavior>(lastCharacter)) {
if (!nextBreak || nextBreak.value() < i) {
if (i || priorContextLength) {
UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
if (breakIterator) {
int candidate = ubrk_following(breakIterator, i - 1 + priorContextLength);
if (candidate == UBRK_DONE)
nextBreak = std::nullopt;
else {
unsigned result = candidate;
ASSERT(result >= priorContextLength);
nextBreak = result - priorContextLength;
}
}
}
}
if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
return i;
}
lastLastCharacter = lastCharacter;
lastCharacter = character;
}
return length;
}
template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
static inline unsigned nextBreakablePositionLoosely(LazyLineBreakIterator& lazyBreakIterator, const CharacterType* string, unsigned length, unsigned startPosition)
{
std::optional<unsigned> nextBreak;
CharacterType lastCharacter = startPosition > 0 ? string[startPosition - 1] : static_cast<CharacterType>(lazyBreakIterator.lastCharacter());
unsigned priorContextLength = lazyBreakIterator.priorContextLength();
for (unsigned i = startPosition; i < length; i++) {
CharacterType character = string[i];
if (isBreakableSpace<nonBreakingSpaceBehavior>(character))
return i;
if (!nextBreak || nextBreak.value() < i) {
if (i || priorContextLength) {
UBreakIterator* breakIterator = lazyBreakIterator.get(priorContextLength);
if (breakIterator) {
ASSERT(i + priorContextLength >= 1);
int candidate = ubrk_following(breakIterator, i + priorContextLength - 1);
if (candidate == UBRK_DONE)
nextBreak = std::nullopt;
else {
unsigned result = candidate;
ASSERT(result > priorContextLength);
nextBreak = result - priorContextLength;
}
}
}
}
if (i == nextBreak && !isBreakableSpace<nonBreakingSpaceBehavior>(lastCharacter))
return i;
lastCharacter = character;
}
return length;
}
template<typename CharacterType, NonBreakingSpaceBehavior nonBreakingSpaceBehavior>
inline unsigned nextBreakablePositionKeepingAllWords(const CharacterType* string, unsigned length, unsigned startPosition)
{
for (unsigned i = startPosition; i < length; i++) {
if (isBreakableSpace<nonBreakingSpaceBehavior>(string[i]))
return i;
}
return length;
}
inline unsigned nextBreakablePositionKeepingAllWords(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
{
auto stringView = lazyBreakIterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(stringView.characters16(), stringView.length(), startPosition);
}
inline unsigned nextBreakablePositionKeepingAllWordsIgnoringNBSP(LazyLineBreakIterator& iterator, unsigned startPosition)
{
auto stringView = iterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionKeepingAllWords<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionKeepingAllWords<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(stringView.characters16(), stringView.length(), startPosition);
}
inline unsigned nextBreakablePosition(LazyLineBreakIterator& iterator, unsigned startPosition)
{
auto stringView = iterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionNonLoosely<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(iterator, stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionNonLoosely<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(iterator, stringView.characters16(), stringView.length(), startPosition);
}
inline unsigned nextBreakablePositionIgnoringNBSP(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
{
auto stringView = lazyBreakIterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionNonLoosely<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionNonLoosely<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
}
inline unsigned nextBreakablePositionLoose(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
{
auto stringView = lazyBreakIterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionLoosely<LChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionLoosely<UChar, NonBreakingSpaceBehavior::TreatNonBreakingSpaceAsBreak>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
}
inline unsigned nextBreakablePositionIgnoringNBSPLoose(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition)
{
auto stringView = lazyBreakIterator.stringView();
if (stringView.is8Bit())
return nextBreakablePositionLoosely<LChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(lazyBreakIterator, stringView.characters8(), stringView.length(), startPosition);
return nextBreakablePositionLoosely<UChar, NonBreakingSpaceBehavior::IgnoreNonBreakingSpace>(lazyBreakIterator, stringView.characters16(), stringView.length(), startPosition);
}
inline bool isBreakable(LazyLineBreakIterator& lazyBreakIterator, unsigned startPosition, std::optional<unsigned>& nextBreakable, bool breakNBSP, bool isLooseMode, bool keepAllWords)
{
if (nextBreakable && nextBreakable.value() >= startPosition)
return startPosition == nextBreakable;
if (keepAllWords) {
if (breakNBSP)
nextBreakable = nextBreakablePositionKeepingAllWords(lazyBreakIterator, startPosition);
else
nextBreakable = nextBreakablePositionKeepingAllWordsIgnoringNBSP(lazyBreakIterator, startPosition);
} else if (isLooseMode) {
if (breakNBSP)
nextBreakable = nextBreakablePositionLoose(lazyBreakIterator, startPosition);
else
nextBreakable = nextBreakablePositionIgnoringNBSPLoose(lazyBreakIterator, startPosition);
} else {
if (breakNBSP)
nextBreakable = nextBreakablePosition(lazyBreakIterator, startPosition);
else
nextBreakable = nextBreakablePositionIgnoringNBSP(lazyBreakIterator, startPosition);
}
return startPosition == nextBreakable;
}
}