tesseract
3.03
|
00001 00002 // File: resultiterator.h 00003 // Description: Iterator for tesseract results that is capable of 00004 // iterating in proper reading order over Bi Directional 00005 // (e.g. mixed Hebrew and English) text. 00006 // Author: David Eger 00007 // Created: Fri May 27 13:58:06 PST 2011 00008 // 00009 // (C) Copyright 2011, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H__ 00023 #define TESSERACT_CCMAIN_RESULT_ITERATOR_H__ 00024 00025 #include "platform.h" 00026 #include "ltrresultiterator.h" 00027 00028 template <typename T> class GenericVector; 00029 template <typename T> class GenericVectorEqEq; 00030 class BLOB_CHOICE_IT; 00031 class WERD_RES; 00032 class STRING; 00033 00034 namespace tesseract { 00035 00036 class Tesseract; 00037 00038 class TESS_API ResultIterator : public LTRResultIterator { 00039 public: 00040 static ResultIterator *StartOfParagraph(const LTRResultIterator &resit); 00041 00046 virtual ~ResultIterator() {} 00047 00048 // ============= Moving around within the page ============. 00053 virtual void Begin(); 00054 00067 virtual bool Next(PageIteratorLevel level); 00068 00075 virtual bool IsAtBeginningOf(PageIteratorLevel level) const; 00076 00082 virtual bool IsAtFinalElement(PageIteratorLevel level, 00083 PageIteratorLevel element) const; 00084 00085 // ============= Accessing data ==============. 00086 00091 virtual char* GetUTF8Text(PageIteratorLevel level) const; 00092 00097 bool ParagraphIsLtr() const; 00098 00099 // ============= Exposed only for testing =============. 00100 00123 static void CalculateTextlineOrder( 00124 bool paragraph_is_ltr, 00125 const GenericVector<StrongScriptDirection> &word_dirs, 00126 GenericVectorEqEq<int> *reading_order); 00127 00128 static const int kMinorRunStart; 00129 static const int kMinorRunEnd; 00130 static const int kComplexWord; 00131 00132 protected: 00139 TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit); 00140 00141 private: 00146 bool CurrentParagraphIsLtr() const; 00147 00159 void CalculateTextlineOrder(bool paragraph_is_ltr, 00160 const LTRResultIterator &resit, 00161 GenericVectorEqEq<int> *indices) const; 00163 void CalculateTextlineOrder(bool paragraph_is_ltr, 00164 const LTRResultIterator &resit, 00165 GenericVector<StrongScriptDirection> *ssd, 00166 GenericVectorEqEq<int> *indices) const; 00167 00172 int LTRWordIndex() const; 00173 00178 void CalculateBlobOrder(GenericVector<int> *blob_indices) const; 00179 00181 void MoveToLogicalStartOfTextline(); 00182 00187 void MoveToLogicalStartOfWord(); 00188 00190 bool IsAtFinalSymbolOfWord() const; 00191 00193 bool IsAtFirstSymbolOfWord() const; 00194 00199 void AppendSuffixMarks(STRING *text) const; 00200 00202 void AppendUTF8WordText(STRING *text) const; 00203 00211 void IterateAndAppendUTF8TextlineText(STRING *text); 00212 00219 void AppendUTF8ParagraphText(STRING *text) const; 00220 00222 bool BidiDebug(int min_level) const; 00223 00224 bool current_paragraph_is_ltr_; 00225 00230 bool at_beginning_of_minor_run_; 00231 00233 bool in_minor_direction_; 00234 }; 00235 00236 } // namespace tesseract. 00237 00238 #endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H__