00001 00002 // File: publictypes.h 00003 // Description: Types used in both the API and internally 00004 // Author: Ray Smith 00005 // Created: Wed Mar 03 09:22:53 PST 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__ 00021 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H__ 00022 00023 // This file contains types that are used both by the API and internally 00024 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic 00025 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT. 00026 // Restated: It is OK for low-level Tesseract files to include publictypes.h, 00027 // but not for the low-level tesseract code to include top-level API code. 00028 // This file should not use other Tesseract types, as that would drag 00029 // their includes into the API-level. 00030 // API-level code should include apitypes.h in preference to this file. 00031 00032 // Number of printers' points in an inch. The unit of the pointsize return. 00033 const int kPointsPerInch = 72; 00034 00035 // Possible types for a POLY_BLOCK or ColPartition. 00036 // Must be kept in sync with kPBColors in polyblk.cpp and PTIs*Type functions 00037 // below, as well as kPolyBlockNames in publictypes.cpp. 00038 // Used extensively by ColPartition, and POLY_BLOCK. 00039 enum PolyBlockType { 00040 PT_UNKNOWN, // Type is not yet known. Keep as the first element. 00041 PT_FLOWING_TEXT, // Text that lives inside a column. 00042 PT_HEADING_TEXT, // Text that spans more than one column. 00043 PT_PULLOUT_TEXT, // Text that is in a cross-column pull-out region. 00044 PT_EQUATION, // Partition belonging to an equation region. 00045 PT_INLINE_EQUATION, // Partition has inline equation. 00046 PT_TABLE, // Partition belonging to a table region. 00047 PT_VERTICAL_TEXT, // Text-line runs vertically. 00048 PT_CAPTION_TEXT, // Text that belongs to an image. 00049 PT_FLOWING_IMAGE, // Image that lives inside a column. 00050 PT_HEADING_IMAGE, // Image that spans more than one column. 00051 PT_PULLOUT_IMAGE, // Image that is in a cross-column pull-out region. 00052 PT_HORZ_LINE, // Horizontal Line. 00053 PT_VERT_LINE, // Vertical Line. 00054 PT_NOISE, // Lies outside of any column. 00055 PT_COUNT 00056 }; 00057 00058 // Returns true if PolyBlockType is of horizontal line type 00059 inline bool PTIsLineType(PolyBlockType type) { 00060 return type == PT_HORZ_LINE || type == PT_VERT_LINE; 00061 } 00062 // Returns true if PolyBlockType is of image type 00063 inline bool PTIsImageType(PolyBlockType type) { 00064 return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE || 00065 type == PT_PULLOUT_IMAGE; 00066 } 00067 // Returns true if PolyBlockType is of text type 00068 inline bool PTIsTextType(PolyBlockType type) { 00069 return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT || 00070 type == PT_PULLOUT_TEXT || type == PT_TABLE || 00071 type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT || 00072 type == PT_INLINE_EQUATION; 00073 } 00074 00075 // String name for each block type. Keep in sync with PolyBlockType. 00076 extern const char* kPolyBlockNames[]; 00077 00078 namespace tesseract { 00079 // +------------------+ Orientation Example: 00080 // | 1 Aaaa Aaaa Aaaa | ==================== 00081 // | Aaa aa aaa aa | To left is a diagram of some (1) English and 00082 // | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. 00083 // | 2 | 00084 // | ####### c c C | Upright Latin characters are represented as A and a. 00085 // | ####### c c c | '<' represents a latin character rotated 00086 // | < ####### c c c | anti-clockwise 90 degrees. 00087 // | < ####### c c | 00088 // | < ####### . c | Upright Chinese characters are represented C and c. 00089 // | 3 ####### c | 00090 // +------------------+ NOTA BENE: enum values here should match goodoc.proto 00091 00092 // If you orient your head so that "up" aligns with Orientation, 00093 // then the characters will appear "right side up" and readable. 00094 // 00095 // In the example above, both the English and Chinese paragraphs are oriented 00096 // so their "up" is the top of the page (page up). The photo credit is read 00097 // with one's head turned leftward ("up" is to page left). 00098 // 00099 // The values of this enum match the convention of Tesseract's osdetect.h 00100 enum Orientation { 00101 ORIENTATION_PAGE_UP = 0, 00102 ORIENTATION_PAGE_RIGHT = 1, 00103 ORIENTATION_PAGE_DOWN = 2, 00104 ORIENTATION_PAGE_LEFT = 3, 00105 }; 00106 00107 // The grapheme clusters within a line of text are laid out logically 00108 // in this direction, judged when looking at the text line rotated so that 00109 // its Orientation is "page up". 00110 // 00111 // For English text, the writing direction is left-to-right. For the 00112 // Chinese text in the above example, the writing direction is top-to-bottom. 00113 enum WritingDirection { 00114 WRITING_DIRECTION_LEFT_TO_RIGHT = 0, 00115 WRITING_DIRECTION_RIGHT_TO_LEFT = 1, 00116 WRITING_DIRECTION_TOP_TO_BOTTOM = 2, 00117 }; 00118 00119 // The text lines are read in the given sequence. 00120 // 00121 // In English, the order is top-to-bottom. 00122 // In Chinese, vertical text lines are read right-to-left. Mongolian is 00123 // written in vertical columns top to bottom like Chinese, but the lines 00124 // order left-to right. 00125 // 00126 // Note that only some combinations make sense. For example, 00127 // WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM 00128 enum TextlineOrder { 00129 TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, 00130 TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, 00131 TEXTLINE_ORDER_TOP_TO_BOTTOM = 2, 00132 }; 00133 00134 // Possible modes for page layout analysis. These *must* be kept in order 00135 // of decreasing amount of layout analysis to be done, except for OSD_ONLY, 00136 // so that the inequality test macros below work. 00137 enum PageSegMode { 00138 PSM_OSD_ONLY, 00139 PSM_AUTO_OSD, 00140 00141 PSM_AUTO_ONLY, 00142 PSM_AUTO, 00143 PSM_SINGLE_COLUMN, 00144 PSM_SINGLE_BLOCK_VERT_TEXT, 00145 00146 PSM_SINGLE_BLOCK, 00147 PSM_SINGLE_LINE, 00148 PSM_SINGLE_WORD, 00149 PSM_CIRCLE_WORD, 00150 PSM_SINGLE_CHAR, 00151 00152 PSM_COUNT 00153 }; 00154 00155 // Macros that act on a PageSegMode to determine whether components of 00156 // layout analysis are enabled. 00157 // *Depend critically on the order of elements of PageSegMode.* 00158 #define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD) 00159 #define PSM_COL_FIND_ENABLED(pageseg_mode) \ 00160 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO) 00161 #define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \ 00162 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN) 00163 #define PSM_LINE_FIND_ENABLED(pageseg_mode) \ 00164 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK) 00165 #define PSM_WORD_FIND_ENABLED(pageseg_mode) \ 00166 ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) 00167 00168 // enum of the elements of the page hierarchy, used in ResultIterator 00169 // to provide functions that operate on each level without having to 00170 // have 5x as many functions. 00171 enum PageIteratorLevel { 00172 RIL_BLOCK, // Block of text/image/separator line. 00173 RIL_PARA, // Paragraph within a block. 00174 RIL_TEXTLINE, // Line within a paragraph. 00175 RIL_WORD, // Word within a textline. 00176 RIL_SYMBOL // Symbol/character within a word. 00177 }; 00178 00179 // JUSTIFICATION_UNKNONW 00180 // The alignment is not clearly one of the other options. This could happen 00181 // for example if there are only one or two lines of text or the text looks 00182 // like source code or poetry. 00183 // 00184 // NOTA BENE: Fully justified paragraphs (text aligned to both left and right 00185 // margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text 00186 // is written with a left-to-right script and with JUSTIFICATION_RIGHT if 00187 // their text is written in a right-to-left script. 00188 // 00189 // Interpretation for text read in vertical lines: 00190 // "Left" is wherever the starting reading position is. 00191 // 00192 // JUSTIFICATION_LEFT 00193 // Each line, except possibly the first, is flush to the same left tab stop. 00194 // 00195 // JUSTIFICATION_CENTER 00196 // The text lines of the paragraph are centered about a line going 00197 // down through their middle of the text lines. 00198 // 00199 // JUSTIFICATION_RIGHT 00200 // Each line, except possibly the first, is flush to the same right tab stop. 00201 enum ParagraphJustification { 00202 JUSTIFICATION_UNKNOWN, 00203 JUSTIFICATION_LEFT, 00204 JUSTIFICATION_CENTER, 00205 JUSTIFICATION_RIGHT, 00206 }; 00207 00208 // When Tesseract/Cube is initialized we can choose to instantiate/load/run 00209 // only the Tesseract part, only the Cube part or both along with the combiner. 00210 // The preference of which engine to use is stored in tessedit_ocr_engine_mode. 00211 // 00212 // ATTENTION: When modifying this enum, please make sure to make the 00213 // appropriate changes to all the enums mirroring it (e.g. OCREngine in 00214 // cityblock/workflow/detection/detection_storage.proto). Such enums will 00215 // mention the connection to OcrEngineMode in the comments. 00216 enum OcrEngineMode { 00217 OEM_TESSERACT_ONLY, // Run Tesseract only - fastest 00218 OEM_CUBE_ONLY, // Run Cube only - better accuracy, but slower 00219 OEM_TESSERACT_CUBE_COMBINED, // Run both and combine results - best accuracy 00220 OEM_DEFAULT // Specify this mode when calling init_*(), 00221 // to indicate that any of the above modes 00222 // should be automatically inferred from the 00223 // variables in the language-specific config, 00224 // command-line configs, or if not specified 00225 // in any of the above should be set to the 00226 // default OEM_TESSERACT_ONLY. 00227 }; 00228 00229 } // namespace tesseract. 00230 00231 #endif // TESSERACT_CCSTRUCT_PUBLICTYPES_H__