Pchen0/electron

Merge pull request #11819 from kwonoj/feat-worditerator

feat(worditerator): bump up worditerator into latest

Samuel Attard 7 years ago

parent

b3234f634b e869c27041

commit

01dcddef35

3 changed files with 111 additions and 67 deletions

Split View Show Diff Stats

						
							+ 22
							
							- 10
						
atom/renderer/api/atom_api_spell_check_client.cc
							 
								View File
							
				@@ -9,6 +9,7 @@
			
				 #include "atom/common/native_mate_converters/string16_converter.h"
			
				 #include "base/logging.h"
			
				+#include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
			
				 #include "native_mate/converter.h"
			
				 #include "native_mate/dictionary.h"
			
				 #include "third_party/icu/source/common/unicode/uscript.h"
			
				@@ -83,16 +84,14 @@ void SpellCheckClient::RequestCheckingOfText(
			
				   completionCallback->DidFinishCheckingText(results);
			
				 }
			
				-void SpellCheckClient::ShowSpellingUI(bool show) {
			
				-}
			
				+void SpellCheckClient::ShowSpellingUI(bool show) {}
			
				 bool SpellCheckClient::IsShowingSpellingUI() {
			
				   return false;
			
				 }
			
				 void SpellCheckClient::UpdateSpellingUIWithMisspelledWord(
			
				-    const blink::WebString& word) {
			
				-}
			
				+    const blink::WebString& word) {}
			
				 void SpellCheckClient::SpellCheckText(
			
				     const base::string16& text,
			
				@@ -103,9 +102,9 @@ void SpellCheckClient::SpellCheckText(
			
				   if (!text_iterator_.IsInitialized() &&
			
				       !text_iterator_.Initialize(&character_attributes_, true)) {
			
				-      // We failed to initialize text_iterator_, return as spelled correctly.
			
				-      VLOG(1) << "Failed to initialize SpellcheckWordIterator";
			
				-      return;
			
				+    // We failed to initialize text_iterator_, return as spelled correctly.
			
				+    VLOG(1) << "Failed to initialize SpellcheckWordIterator";
			
				+    return;
			
				   }
			
				   if (!contraction_iterator_.IsInitialized() &&
			
				@@ -121,7 +120,13 @@ void SpellCheckClient::SpellCheckText(
			
				   base::string16 word;
			
				   int word_start;
			
				   int word_length;
			
				-  while (text_iterator_.GetNextWord(&word, &word_start, &word_length)) {
			
				+  for (auto status =
			
				+           text_iterator_.GetNextWord(&word, &word_start, &word_length);
			
				+       status != SpellcheckWordIterator::IS_END_OF_TEXT;
			
				+       status = text_iterator_.GetNextWord(&word, &word_start, &word_length)) {
			
				+    if (status == SpellcheckWordIterator::IS_SKIPPABLE)
			
				+      continue;
			
				+
			
				     // Found a word (or a contraction) that the spellchecker can check the
			
				     // spelling of.
			
				     if (SpellCheckWord(scope, word))
			
				@@ -145,7 +150,7 @@ void SpellCheckClient::SpellCheckText(
			
				 bool SpellCheckClient::SpellCheckWord(
			
				     const SpellCheckScope& scope,
			
				     const base::string16& word_to_check) const {
			
				-    DCHECK(!scope.spell_check_.IsEmpty());
			
				+  DCHECK(!scope.spell_check_.IsEmpty());
			
				   v8::Local<v8::Value> word = mate::ConvertToV8(isolate_, word_to_check);
			
				   v8::Local<v8::Value> result =
			
				@@ -171,7 +176,14 @@ bool SpellCheckClient::IsValidContraction(const SpellCheckScope& scope,
			
				   int word_start;
			
				   int word_length;
			
				-  while (contraction_iterator_.GetNextWord(&word, &word_start, &word_length)) {
			
				+  for (auto status =
			
				+           contraction_iterator_.GetNextWord(&word, &word_start, &word_length);
			
				+       status != SpellcheckWordIterator::IS_END_OF_TEXT;
			
				+       status = contraction_iterator_.GetNextWord(&word, &word_start,
			
				+                                                  &word_length)) {
			
				+    if (status == SpellcheckWordIterator::IS_SKIPPABLE)
			
				+      continue;
			
				+
			
				     if (!SpellCheckWord(scope, word))
			
				       return false;
			
				   }

						
							+ 52
							
							- 44
						
chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc
							 
								View File
							
				@@ -7,10 +7,13 @@
			
				 #include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
			
				 #include <map>
			
				+#include <memory>
			
				 #include <string>
			
				+#include <utility>
			
				 #include "base/i18n/break_iterator.h"
			
				 #include "base/logging.h"
			
				+#include "base/macros.h"
			
				 #include "base/strings/stringprintf.h"
			
				 #include "base/strings/utf_string_conversions.h"
			
				 #include "third_party/icu/source/common/unicode/normlzr.h"
			
				@@ -21,11 +24,9 @@
			
				 // SpellcheckCharAttribute implementation:
			
				 SpellcheckCharAttribute::SpellcheckCharAttribute()
			
				-    : script_code_(USCRIPT_LATIN) {
			
				-}
			
				+    : script_code_(USCRIPT_LATIN) {}
			
				-SpellcheckCharAttribute::~SpellcheckCharAttribute() {
			
				-}
			
				+SpellcheckCharAttribute::~SpellcheckCharAttribute() {}
			
				 void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
			
				   CreateRuleSets(language);
			
				@@ -33,8 +34,8 @@ void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
			
				 base::string16 SpellcheckCharAttribute::GetRuleSet(
			
				     bool allow_contraction) const {
			
				-  return allow_contraction ?
			
				-      ruleset_allow_contraction_ : ruleset_disallow_contraction_;
			
				+  return allow_contraction ? ruleset_allow_contraction_
			
				+                           : ruleset_disallow_contraction_;
			
				 }
			
				 void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
			
				@@ -160,8 +161,13 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
			
				   // Treat numbers as word characters except for Arabic and Hebrew.
			
				   const char* aletter_extra = " [0123456789]";
			
				-  if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
			
				+  if (script_code_ == USCRIPT_HEBREW)
			
				     aletter_extra = "";
			
				+  else if (script_code_ == USCRIPT_ARABIC)
			
				+    // When "script=Arabic", it does not include tatweel, which is
			
				+    // "script=Common" so add it back. Otherwise, it creates unwanted
			
				+    // word breaks.
			
				+    aletter_extra = " [\\u0640]";
			
				   const char kMidLetterExtra[] = "";
			
				   // For Hebrew, treat single/double quoation marks as MidLetter.
			
				@@ -178,19 +184,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
			
				   const char kDisallowContraction[] = "";
			
				   ruleset_allow_contraction_ = base::ASCIIToUTF16(
			
				-      base::StringPrintf(kRuleTemplate,
			
				-                         aletter,
			
				-                         aletter_extra,
			
				-                         midletter_extra,
			
				-                         aletter_plus,
			
				-                         kAllowContraction));
			
				+      base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra,
			
				+                         aletter_plus, kAllowContraction));
			
				   ruleset_disallow_contraction_ = base::ASCIIToUTF16(
			
				-      base::StringPrintf(kRuleTemplate,
			
				-                         aletter,
			
				-                         aletter_extra,
			
				-                         midletter_extra,
			
				-                         aletter_plus,
			
				-                         kDisallowContraction));
			
				+      base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra,
			
				+                         aletter_plus, kDisallowContraction));
			
				 }
			
				 bool SpellcheckCharAttribute::OutputChar(UChar c,
			
				@@ -214,12 +212,11 @@ bool SpellcheckCharAttribute::OutputChar(UChar c,
			
				 bool SpellcheckCharAttribute::OutputArabic(UChar c,
			
				                                            base::string16* output) const {
			
				-  // Discard characters not from Arabic alphabets. We also discard vowel marks
			
				-  // of Arabic (Damma, Fatha, Kasra, etc.) to prevent our Arabic dictionary from
			
				-  // marking an Arabic word including vowel marks as misspelled. (We need to
			
				-  // check these vowel marks manually and filter them out since their script
			
				-  // codes are USCRIPT_ARABIC.)
			
				-  if (0x0621 <= c && c <= 0x064D)
			
				+  // Include non-Arabic characters (which should trigger a spelling error)
			
				+  // and Arabic characters excluding vowel marks and class "Lm".
			
				+  // We filter the latter because, while they are "letters", they are
			
				+  // optional and so don't affect the correctness of the rest of the word.
			
				+  if (!(0x0600 <= c && c <= 0x06FF) || (u_isalpha(c) && c != 0x0640))
			
				     output->push_back(c);
			
				   return true;
			
				 }
			
				@@ -281,8 +278,8 @@ bool SpellcheckCharAttribute::OutputHebrew(UChar c,
			
				   // USCRIPT_HEBREW.)
			
				   // Pass through ASCII single/double quotation marks and Hebrew Geresh and
			
				   // Gershayim.
			
				-  if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 ||
			
				-      c == 0x05F4 || c == 0x05F3)
			
				+  if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 || c == 0x05F4 ||
			
				+      c == 0x05F3)
			
				     output->push_back(c);
			
				   return true;
			
				 }
			
				@@ -301,10 +298,7 @@ bool SpellcheckCharAttribute::OutputDefault(UChar c,
			
				 // SpellcheckWordIterator implementation:
			
				 SpellcheckWordIterator::SpellcheckWordIterator()
			
				-    : text_(NULL),
			
				-      attribute_(NULL),
			
				-      iterator_() {
			
				-}
			
				+    : text_(nullptr), attribute_(nullptr), iterator_() {}
			
				 SpellcheckWordIterator::~SpellcheckWordIterator() {
			
				   Reset();
			
				@@ -357,9 +351,10 @@ bool SpellcheckWordIterator::SetText(const base::char16* text, size_t length) {
			
				   return true;
			
				 }
			
				-bool SpellcheckWordIterator::GetNextWord(base::string16* word_string,
			
				-                                         int* word_start,
			
				-                                         int* word_length) {
			
				+SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord(
			
				+    base::string16* word_string,
			
				+    int* word_start,
			
				+    int* word_length) {
			
				   DCHECK(!!text_);
			
				   word_string->clear();
			
				@@ -367,28 +362,41 @@ bool SpellcheckWordIterator::GetNextWord(base::string16* word_string,
			
				   *word_length = 0;
			
				   if (!text_) {
			
				-    return false;
			
				+    return IS_END_OF_TEXT;
			
				   }
			
				-  // Find a word that can be checked for spelling. Our rule sets filter out
			
				-  // invalid words (e.g. numbers and characters not supported by the
			
				-  // spellchecker language) so this ubrk_getRuleStatus() call returns
			
				-  // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such
			
				-  // words until we can find a valid word or reach the end of the input string.
			
				+  // Find a word that can be checked for spelling or a character that can be
			
				+  // skipped over. Rather than moving past a skippable character this returns
			
				+  // IS_SKIPPABLE and defers handling the character to the calling function.
			
				   while (iterator_->Advance()) {
			
				     const size_t start = iterator_->prev();
			
				     const size_t length = iterator_->pos() - start;
			
				-    if (iterator_->IsWord()) {
			
				-      if (Normalize(start, length, word_string)) {
			
				+    switch (iterator_->GetWordBreakStatus()) {
			
				+      case base::i18n::BreakIterator::IS_WORD_BREAK: {
			
				+        if (Normalize(start, length, word_string)) {
			
				+          *word_start = start;
			
				+          *word_length = length;
			
				+          return IS_WORD;
			
				+        }
			
				+        break;
			
				+      }
			
				+      case base::i18n::BreakIterator::IS_SKIPPABLE_WORD: {
			
				+        *word_string = iterator_->GetString();
			
				         *word_start = start;
			
				         *word_length = length;
			
				-        return true;
			
				+        return IS_SKIPPABLE;
			
				+      }
			
				+      // |iterator_| is RULE_BASED so the break status should never be
			
				+      // IS_LINE_OR_CHAR_BREAK.
			
				+      case base::i18n::BreakIterator::IS_LINE_OR_CHAR_BREAK: {
			
				+        NOTREACHED();
			
				+        break;
			
				       }
			
				     }
			
				   }
			
				   // There aren't any more words in the given text.
			
				-  return false;
			
				+  return IS_END_OF_TEXT;
			
				 }
			
				 void SpellcheckWordIterator::Reset() {

						
							+ 37
							
							- 13
						
chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h
							 
								View File
							
				@@ -9,6 +9,8 @@
			
				 #ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
			
				 #define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
			
				+#include <stddef.h>
			
				+
			
				 #include <memory>
			
				 #include <string>
			
				@@ -19,8 +21,8 @@
			
				 namespace base {
			
				 namespace i18n {
			
				 class BreakIterator;
			
				-} // namespace i18n
			
				-} // namespace base
			
				+}  // namespace i18n
			
				+}  // namespace base
			
				 // A class which encapsulates language-specific operations used by
			
				 // SpellcheckWordIterator. When we set the spellchecker language, this class
			
				@@ -112,6 +114,17 @@ class SpellcheckCharAttribute {
			
				 //
			
				 class SpellcheckWordIterator {
			
				  public:
			
				+  enum WordIteratorStatus {
			
				+    // The end of a sequence of text that the iterator recognizes as characters
			
				+    // that can form a word.
			
				+    IS_WORD,
			
				+    // Non-word characters that the iterator can skip past, such as punctuation,
			
				+    // whitespace, and characters from another character set.
			
				+    IS_SKIPPABLE,
			
				+    // The end of the text that the iterator is going over.
			
				+    IS_END_OF_TEXT
			
				+  };
			
				+
			
				   SpellcheckWordIterator();
			
				   ~SpellcheckWordIterator();
			
				@@ -130,19 +143,30 @@ class SpellcheckWordIterator {
			
				   // without calling Initialize().
			
				   bool SetText(const base::char16* text, size_t length);
			
				-  // Retrieves a word (or a contraction), stores its copy to 'word_string', and
			
				-  // stores the position and the length for input word to 'word_start'. Since
			
				-  // this function normalizes the output word, the length of 'word_string' may
			
				-  // be different from the 'word_length'. Therefore, when we call functions that
			
				-  // changes the input text, such as string16::replace(), we need to use
			
				-  // 'word_start' and 'word_length' as listed in the following snippet.
			
				+  // Advances |iterator_| through |text_| and gets the current status of the
			
				+  // word iterator within |text|:
			
				+  //
			
				+  //  - Returns IS_WORD if the iterator just found the end of a sequence of word
			
				+  //    characters and it was able to normalize the sequence. This stores the
			
				+  //    normalized string into |word_string| and stores the position and length
			
				+  //    into |word_start| and |word_length| respectively. Keep in mind that
			
				+  //    since this function normalizes the output word, the length of
			
				+  //    |word_string| may be different from the |word_length|. Therefore, when
			
				+  //    we call functions that change the input text, such as
			
				+  //    string16::replace(), we need to use |word_start| and |word_length| as
			
				+  //    listed in the following snippet:
			
				+  //
			
				+  //      while(iterator.GetNextWord(&word, &offset, &length))
			
				+  //        text.replace(offset, length, word);
			
				   //
			
				-  //   while(iterator.GetNextWord(&word, &offset, &length))
			
				-  //     text.replace(offset, length, word);
			
				+  //  - Returns IS_SKIPPABLE if the iterator just found a character that the
			
				+  //    iterator can skip past such as punctuation, whitespace, and characters
			
				+  //    from another character set. This stores the character, position, and
			
				+  //    length into |word_string|, |word_start|, and |word_length| respectively.
			
				   //
			
				-  bool GetNextWord(base::string16* word_string,
			
				-                   int* word_start,
			
				-                   int* word_length);
			
				+  //  - Returns IS_END_OF_TEXT if the iterator has reached the end of |text_|.
			
				+  SpellcheckWordIterator::WordIteratorStatus
			
				+  GetNextWord(base::string16* word_string, int* word_start, int* word_length);
			
				   // Releases all the resources attached to this object.
			
				   void Reset();

Merge pull request #11819 from kwonoj/feat-worditerator

+ 22 - 10 atom/renderer/api/atom_api_spell_check_client.cc View File

+ 52 - 44 chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc View File

+ 37 - 13 chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h View File

+ 22 - 10
atom/renderer/api/atom_api_spell_check_client.cc
View File

+ 52 - 44
chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc
View File

+ 37 - 13
chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h
View File