TextCheckingHelper.cpp [plain text]
#include "config.h"
#include "TextCheckingHelper.h"
#include "Document.h"
#include "DocumentMarkerController.h"
#include "EditorClient.h"
#include "Frame.h"
#include "FrameSelection.h"
#include "Range.h"
#include "Settings.h"
#include "TextCheckerClient.h"
#include "TextIterator.h"
#include "VisibleUnits.h"
#include <unicode/ubrk.h>
#include <wtf/text/StringView.h>
#include <wtf/text/TextBreakIterator.h>
namespace WebCore {
#if !USE(UNIFIED_TEXT_CHECKING)
static void findGrammaticalErrors(TextCheckerClient& client, StringView text, Vector<TextCheckingResult>& results)
{
for (unsigned checkLocation = 0; checkLocation < text.length(); ) {
int badGrammarLocation = -1;
int badGrammarLength = 0;
Vector<GrammarDetail> badGrammarDetails;
client.checkGrammarOfString(text.substring(checkLocation), badGrammarDetails, &badGrammarLocation, &badGrammarLength);
if (!badGrammarLength)
break;
ASSERT(badGrammarLocation >= 0);
ASSERT(static_cast<unsigned>(badGrammarLocation) <= text.length() - checkLocation);
ASSERT(badGrammarLength > 0);
ASSERT(static_cast<unsigned>(badGrammarLength) <= text.length() - checkLocation - badGrammarLocation);
TextCheckingResult badGrammar;
badGrammar.type = TextCheckingType::Grammar;
badGrammar.range = CharacterRange(checkLocation + badGrammarLocation, badGrammarLength);
badGrammar.details = WTFMove(badGrammarDetails);
results.append(badGrammar);
checkLocation += badGrammarLocation + badGrammarLength;
}
}
static void findMisspellings(TextCheckerClient& client, StringView text, Vector<TextCheckingResult>& results)
{
UBreakIterator* iterator = wordBreakIterator(text);
if (!iterator)
return;
for (int wordStart = ubrk_current(iterator); wordStart >= 0; ) {
int wordEnd = ubrk_next(iterator);
if (wordEnd < 0)
break;
int wordLength = wordEnd - wordStart;
int misspellingLocation = -1;
int misspellingLength = 0;
client.checkSpellingOfString(text.substring(wordStart, wordLength), &misspellingLocation, &misspellingLength);
if (misspellingLength > 0) {
ASSERT(misspellingLocation >= 0);
ASSERT(misspellingLocation <= wordLength);
ASSERT(misspellingLength > 0);
ASSERT(misspellingLocation + misspellingLength <= wordLength);
TextCheckingResult misspelling;
misspelling.type = TextCheckingType::Spelling;
misspelling.range = CharacterRange(wordStart + misspellingLocation, misspellingLength);
misspelling.replacement = client.getAutoCorrectSuggestionForMisspelledWord(text.substring(misspelling.range.location, misspelling.range.length).toStringWithoutCopying());
results.append(misspelling);
}
wordStart = wordEnd;
}
}
#endif
static SimpleRange expandToParagraphBoundary(const SimpleRange& range)
{
auto start = makeBoundaryPoint(startOfParagraph(createLegacyEditingPosition(range.start)));
auto end = makeBoundaryPoint(endOfParagraph(createLegacyEditingPosition(range.end)));
if (!start || !end)
return range;
return { *start, *end };
}
TextCheckingParagraph::TextCheckingParagraph(const SimpleRange& range)
: m_checkingRange(range)
, m_automaticReplacementRange(range)
{
}
TextCheckingParagraph::TextCheckingParagraph(const SimpleRange& checkingRange, const SimpleRange& replacementRange, const Optional<SimpleRange>& paragraphRange)
: m_checkingRange(checkingRange)
, m_automaticReplacementRange(replacementRange)
, m_paragraphRange(paragraphRange)
{
}
void TextCheckingParagraph::expandRangeToNextEnd()
{
paragraphRange();
if (auto end = makeBoundaryPoint(endOfParagraph(startOfNextParagraph(createLegacyEditingPosition(m_paragraphRange->start)))))
m_paragraphRange->end = WTFMove(*end);
invalidateParagraphRangeValues();
}
void TextCheckingParagraph::invalidateParagraphRangeValues()
{
m_checkingStart.reset();
m_automaticReplacementStart.reset();
m_automaticReplacementLength.reset();
m_offsetAsRange = WTF::nullopt;
m_text = String();
}
uint64_t TextCheckingParagraph::rangeLength() const
{
return characterCount(paragraphRange());
}
const SimpleRange& TextCheckingParagraph::paragraphRange() const
{
if (!m_paragraphRange)
m_paragraphRange = expandToParagraphBoundary(m_checkingRange);
return *m_paragraphRange;
}
SimpleRange TextCheckingParagraph::subrange(CharacterRange range) const
{
return resolveCharacterRange(paragraphRange(), range);
}
ExceptionOr<uint64_t> TextCheckingParagraph::offsetTo(const Position& position) const
{
auto range = makeSimpleRange(paragraphRange().start, position);
if (!range)
return Exception { TypeError };
return characterCount(*range);
}
bool TextCheckingParagraph::isEmpty() const
{
return checkingStart() >= checkingEnd() || text().isEmpty();
}
const SimpleRange& TextCheckingParagraph::offsetAsRange() const
{
if (!m_offsetAsRange)
m_offsetAsRange = { { paragraphRange().start, m_checkingRange.start } };
return *m_offsetAsRange;
}
StringView TextCheckingParagraph::text() const
{
if (m_text.isNull())
m_text = plainText(paragraphRange());
return m_text;
}
uint64_t TextCheckingParagraph::checkingStart() const
{
if (!m_checkingStart)
m_checkingStart = characterCount(offsetAsRange());
return *m_checkingStart;
}
uint64_t TextCheckingParagraph::checkingEnd() const
{
return checkingStart() + checkingLength();
}
uint64_t TextCheckingParagraph::checkingLength() const
{
if (!m_checkingLength)
m_checkingLength = characterCount(m_checkingRange);
return *m_checkingLength;
}
uint64_t TextCheckingParagraph::automaticReplacementStart(bool oldBehaviour) const
{
if (oldBehaviour && is_gt(documentOrder(paragraphRange().start, m_automaticReplacementRange.start)))
return 0;
if (!m_automaticReplacementStart)
m_automaticReplacementStart = characterCount({ paragraphRange().start, m_automaticReplacementRange.start });
return *m_automaticReplacementStart;
}
uint64_t TextCheckingParagraph::automaticReplacementLength(bool oldBehaviour) const
{
if (oldBehaviour && is_gt(documentOrder(paragraphRange().start, m_automaticReplacementRange.start)))
return characterCount({ paragraphRange().start, m_automaticReplacementRange.end });
if (!m_automaticReplacementLength)
m_automaticReplacementLength = characterCount(m_automaticReplacementRange);
return *m_automaticReplacementLength;
}
TextCheckingHelper::TextCheckingHelper(EditorClient& client, const SimpleRange& range)
: m_client(client)
, m_range(range)
{
}
auto TextCheckingHelper::findMisspelledWords(Operation operation) const -> std::pair<MisspelledWord, Optional<SimpleRange>>
{
std::pair<MisspelledWord, Optional<SimpleRange>> first;
uint64_t currentChunkOffset = 0;
for (WordAwareIterator it(m_range); !it.atEnd(); currentChunkOffset += it.text().length(), it.advance()) {
StringView text = it.text();
if (text == " "_s)
continue;
int misspellingLocation = -1;
int misspellingLength = 0;
m_client.textChecker()->checkSpellingOfString(text, &misspellingLocation, &misspellingLength);
int textLength = text.length();
ASSERT(misspellingLength >= 0);
ASSERT(misspellingLocation >= -1);
ASSERT(!misspellingLength || misspellingLocation >= 0);
ASSERT(misspellingLocation < textLength);
ASSERT(misspellingLength <= textLength);
ASSERT(misspellingLocation + misspellingLength <= textLength);
if (!(misspellingLocation >= 0 && misspellingLength > 0 && misspellingLocation < textLength && misspellingLength <= textLength && misspellingLocation + misspellingLength <= textLength))
continue;
auto misspellingRange = resolveCharacterRange(m_range, CharacterRange(currentChunkOffset + misspellingLocation, misspellingLength));
if (operation == Operation::MarkAll)
addMarker(misspellingRange, DocumentMarker::Spelling);
if (first.first.word.isNull()) {
first = {
{
text.substring(misspellingLocation, misspellingLength).toString(),
currentChunkOffset + misspellingLocation
},
WTFMove(misspellingRange)
};
}
if (operation == Operation::FindFirst)
break;
}
return first;
}
auto TextCheckingHelper::findFirstMisspelledWord() const -> MisspelledWord
{
return findMisspelledWords(Operation::FindFirst).first;
}
auto TextCheckingHelper::findFirstMisspelledWordOrUngrammaticalPhrase(bool checkGrammar) const -> Variant<MisspelledWord, UngrammaticalPhrase>
{
if (!unifiedTextCheckerEnabled())
return { };
if (platformDrivenTextCheckerEnabled())
return { };
Variant<MisspelledWord, UngrammaticalPhrase> firstFoundItem;
GrammarDetail grammarDetail;
String misspelledWord;
Optional<SimpleRange> misspelledWordRange;
String badGrammarPhrase;
auto paragraphRange = *makeSimpleRange(startOfParagraph(createLegacyEditingPosition(m_range.start)), m_range.end);
auto totalRangeLength = characterCount(paragraphRange);
paragraphRange.end = *makeBoundaryPoint(endOfParagraph(createLegacyEditingPosition(m_range.start)));
auto rangeStartOffset = characterCount({ paragraphRange.start, m_range.start });
uint64_t totalLengthProcessed = 0;
bool firstIteration = true;
bool lastIteration = false;
while (totalLengthProcessed < totalRangeLength) {
auto currentLength = characterCount(paragraphRange);
uint64_t currentStartOffset = firstIteration ? rangeStartOffset : 0;
uint64_t currentEndOffset = currentLength;
if (inSameParagraph(createLegacyEditingPosition(paragraphRange.start), createLegacyEditingPosition(m_range.end))) {
currentEndOffset = characterCount({ paragraphRange.start, m_range.end });
lastIteration = true;
}
if (currentStartOffset < currentEndOffset) {
String paragraphString = plainText(paragraphRange);
if (paragraphString.length() > 0) {
bool foundGrammar = false;
uint64_t spellingLocation = 0;
uint64_t grammarPhraseLocation = 0;
uint64_t grammarDetailLocation = 0;
unsigned grammarDetailIndex = 0;
Vector<TextCheckingResult> results;
OptionSet<TextCheckingType> checkingTypes { TextCheckingType::Spelling };
if (checkGrammar)
checkingTypes.add(TextCheckingType::Grammar);
VisibleSelection currentSelection;
if (Frame* frame = paragraphRange.start.container->document().frame())
currentSelection = frame->selection().selection();
checkTextOfParagraph(*m_client.textChecker(), paragraphString, checkingTypes, results, currentSelection);
for (auto& result : results) {
if (result.type == TextCheckingType::Spelling && result.range.location >= currentStartOffset && result.range.location + result.range.length <= currentEndOffset) {
ASSERT(result.range.length > 0);
spellingLocation = result.range.location;
misspelledWord = paragraphString.substring(result.range.location, result.range.length);
ASSERT(misspelledWord.length());
break;
}
if (checkGrammar && result.type == TextCheckingType::Grammar && result.range.location < currentEndOffset && result.range.location + result.range.length > currentStartOffset) {
ASSERT(result.range.length > 0);
if (foundGrammar)
break;
for (unsigned j = 0; j < result.details.size(); j++) {
const GrammarDetail* detail = &result.details[j];
ASSERT(detail->range.length > 0);
if (result.range.location + detail->range.location >= currentStartOffset && result.range.location + detail->range.location + detail->range.length <= currentEndOffset && (!foundGrammar || result.range.location + detail->range.location < grammarDetailLocation)) {
grammarDetailIndex = j;
grammarDetailLocation = result.range.location + detail->range.location;
foundGrammar = true;
}
}
if (foundGrammar) {
grammarPhraseLocation = result.range.location;
grammarDetail = result.details[grammarDetailIndex];
badGrammarPhrase = paragraphString.substring(result.range.location, result.range.length);
ASSERT(badGrammarPhrase.length());
}
}
}
if (!misspelledWord.isEmpty() && (!checkGrammar || badGrammarPhrase.isEmpty() || spellingLocation <= grammarDetailLocation)) {
uint64_t spellingOffset = spellingLocation - currentStartOffset;
if (!firstIteration)
spellingOffset += characterCount({ m_range.start, paragraphRange.start });
firstFoundItem = MisspelledWord {
misspelledWord,
spellingOffset
};
break;
}
if (checkGrammar && !badGrammarPhrase.isEmpty()) {
uint64_t grammarPhraseOffset = grammarPhraseLocation - currentStartOffset;
if (!firstIteration)
grammarPhraseOffset += characterCount({ m_range.start, paragraphRange.start });
firstFoundItem = UngrammaticalPhrase {
badGrammarPhrase,
grammarPhraseOffset,
grammarDetail
};
break;
}
}
}
if (lastIteration || totalLengthProcessed + currentLength >= totalRangeLength)
break;
auto nextStart = startOfNextParagraph(createLegacyEditingPosition(paragraphRange.end));
auto nextParagraphRange = makeSimpleRange(nextStart, endOfParagraph(nextStart));
if (!nextParagraphRange)
break;
paragraphRange = WTFMove(*nextParagraphRange);
firstIteration = false;
totalLengthProcessed += currentLength;
}
return firstFoundItem;
}
int TextCheckingHelper::findUngrammaticalPhrases(Operation operation, const Vector<GrammarDetail>& grammarDetails, uint64_t badGrammarPhraseLocation, uint64_t startOffset, uint64_t endOffset) const
{
uint64_t earliestDetailLocationSoFar = 0;
int earliestDetailIndex = -1;
for (unsigned i = 0; i < grammarDetails.size(); i++) {
const GrammarDetail* detail = &grammarDetails[i];
ASSERT(detail->range.length > 0);
uint64_t detailStartOffsetInParagraph = badGrammarPhraseLocation + detail->range.location;
if (detailStartOffsetInParagraph < startOffset)
continue;
if (detailStartOffsetInParagraph >= endOffset)
continue;
if (operation == Operation::MarkAll) {
auto badGrammarRange = resolveCharacterRange(m_range, { badGrammarPhraseLocation - startOffset + detail->range.location, detail->range.length });
addMarker(badGrammarRange, DocumentMarker::Grammar, detail->userDescription);
}
if (earliestDetailIndex < 0 || earliestDetailLocationSoFar > detail->range.location) {
earliestDetailIndex = i;
earliestDetailLocationSoFar = detail->range.location;
}
}
return earliestDetailIndex;
}
auto TextCheckingHelper::findUngrammaticalPhrases(Operation operation) const -> UngrammaticalPhrase
{
UngrammaticalPhrase result;
TextCheckingParagraph paragraph(m_range);
for (uint64_t startOffset = 0; startOffset < paragraph.checkingEnd(); ) {
Vector<GrammarDetail> grammarDetails;
int badGrammarPhraseLocation = -1;
int badGrammarPhraseLength = 0;
m_client.textChecker()->checkGrammarOfString(paragraph.text().substring(startOffset), grammarDetails, &badGrammarPhraseLocation, &badGrammarPhraseLength);
if (!badGrammarPhraseLength) {
ASSERT(badGrammarPhraseLocation == -1);
return { };
}
ASSERT(badGrammarPhraseLocation >= 0);
badGrammarPhraseLocation += startOffset;
int badGrammarIndex = findUngrammaticalPhrases(operation, grammarDetails, badGrammarPhraseLocation, paragraph.checkingStart(), paragraph.checkingEnd());
if (badGrammarIndex >= 0 && result.phrase.isEmpty()) {
result.offset = badGrammarPhraseLocation - paragraph.checkingStart();
result.phrase = paragraph.text().substring(badGrammarPhraseLocation, badGrammarPhraseLength).toString();
ASSERT(static_cast<unsigned>(badGrammarIndex) < grammarDetails.size());
result.detail = grammarDetails[badGrammarIndex];
if (operation == Operation::FindFirst)
break;
}
startOffset = badGrammarPhraseLocation + badGrammarPhraseLength;
}
return result;
}
auto TextCheckingHelper::findFirstUngrammaticalPhrase() const -> UngrammaticalPhrase
{
return findUngrammaticalPhrases(Operation::FindFirst);
}
TextCheckingGuesses TextCheckingHelper::guessesForMisspelledWordOrUngrammaticalPhrase(bool checkGrammar) const
{
if (!unifiedTextCheckerEnabled())
return { };
if (platformDrivenTextCheckerEnabled())
return { };
if (m_range.collapsed())
return { };
TextCheckingParagraph paragraph(m_range);
if (paragraph.isEmpty())
return { };
Vector<TextCheckingResult> results;
OptionSet<TextCheckingType> checkingTypes { TextCheckingType::Spelling };
if (checkGrammar)
checkingTypes.add(TextCheckingType::Grammar);
VisibleSelection currentSelection;
if (auto frame = m_range.start.document().frame())
currentSelection = frame->selection().selection();
checkTextOfParagraph(*m_client.textChecker(), paragraph.text(), checkingTypes, results, currentSelection);
for (auto& result : results) {
if (result.type == TextCheckingType::Spelling && paragraph.checkingRangeMatches(result.range)) {
String misspelledWord = paragraph.checkingSubstring().toString();
ASSERT(misspelledWord.length());
Vector<String> guesses;
m_client.textChecker()->getGuessesForWord(misspelledWord, String(), currentSelection, guesses);
m_client.updateSpellingUIWithMisspelledWord(misspelledWord);
return { WTFMove(guesses), true, false };
}
}
if (!checkGrammar)
return { };
for (auto& result : results) {
if (result.type == TextCheckingType::Grammar && paragraph.isCheckingRangeCoveredBy(result.range)) {
for (auto& detail : result.details) {
ASSERT(detail.range.length > 0);
if (paragraph.checkingRangeMatches({ result.range.location + detail.range.location, detail.range.length })) {
String badGrammarPhrase = paragraph.text().substring(result.range.location, result.range.length).toString();
ASSERT(badGrammarPhrase.length());
m_client.updateSpellingUIWithGrammarString(badGrammarPhrase, detail);
return { WTFMove(detail.guesses), false, true };
}
}
}
}
return { };
}
Optional<SimpleRange> TextCheckingHelper::markAllMisspelledWords() const
{
return findMisspelledWords(Operation::MarkAll).second;
}
void TextCheckingHelper::markAllUngrammaticalPhrases() const
{
findUngrammaticalPhrases(Operation::MarkAll);
}
bool TextCheckingHelper::unifiedTextCheckerEnabled() const
{
return WebCore::unifiedTextCheckerEnabled(m_range.start.document().frame());
}
void checkTextOfParagraph(TextCheckerClient& client, StringView text, OptionSet<TextCheckingType> checkingTypes, Vector<TextCheckingResult>& results, const VisibleSelection& currentSelection)
{
#if USE(UNIFIED_TEXT_CHECKING)
results = client.checkTextOfParagraph(text, checkingTypes, currentSelection);
#else
UNUSED_PARAM(currentSelection);
Vector<TextCheckingResult> misspellings;
if (checkingTypes.contains(TextCheckingType::Spelling))
findMisspellings(client, text, misspellings);
Vector<TextCheckingResult> grammaticalErrors;
if (checkingTypes.contains(TextCheckingType::Grammar)) {
unsigned grammarCheckLength = text.length();
for (auto& misspelling : misspellings)
grammarCheckLength = std::min<unsigned>(grammarCheckLength, misspelling.range.location);
findGrammaticalErrors(client, text.substring(0, grammarCheckLength), grammaticalErrors);
}
results = WTFMove(grammaticalErrors);
if (results.isEmpty())
results = WTFMove(misspellings);
else
results.appendVector(misspellings);
#endif // USE(UNIFIED_TEXT_CHECKING)
}
bool unifiedTextCheckerEnabled(const Frame* frame)
{
if (!frame)
return false;
return frame->settings().unifiedTextCheckerEnabled();
}
bool platformDrivenTextCheckerEnabled()
{
#if ENABLE(PLATFORM_DRIVEN_TEXT_CHECKING)
return true;
#else
return false;
#endif
}
}