tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/publictypes.h
Go to the documentation of this file.
00001 
00002 // File:        publictypes.h
00003 // Description: Types used in both the API and internally
00004 // Author:      Ray Smith
00005 // Created:     Wed Mar 03 09:22:53 PST 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCSTRUCT_PUBLICTYPES_H__
00021 #define TESSERACT_CCSTRUCT_PUBLICTYPES_H__
00022 
00023 // This file contains types that are used both by the API and internally
00024 // to Tesseract. In order to decouple the API from Tesseract and prevent cyclic
00025 // dependencies, THIS FILE SHOULD NOT DEPEND ON ANY OTHER PART OF TESSERACT.
00026 // Restated: It is OK for low-level Tesseract files to include publictypes.h,
00027 // but not for the low-level tesseract code to include top-level API code.
00028 // This file should not use other Tesseract types, as that would drag
00029 // their includes into the API-level.
00030 // API-level code should include apitypes.h in preference to this file.
00031 
00033 const int kPointsPerInch = 72;
00034 
00041 enum PolyBlockType {
00042   PT_UNKNOWN,        // Type is not yet known. Keep as the first element.
00043   PT_FLOWING_TEXT,   // Text that lives inside a column.
00044   PT_HEADING_TEXT,   // Text that spans more than one column.
00045   PT_PULLOUT_TEXT,   // Text that is in a cross-column pull-out region.
00046   PT_EQUATION,       // Partition belonging to an equation region.
00047   PT_INLINE_EQUATION,  // Partition has inline equation.
00048   PT_TABLE,          // Partition belonging to a table region.
00049   PT_VERTICAL_TEXT,  // Text-line runs vertically.
00050   PT_CAPTION_TEXT,   // Text that belongs to an image.
00051   PT_FLOWING_IMAGE,  // Image that lives inside a column.
00052   PT_HEADING_IMAGE,  // Image that spans more than one column.
00053   PT_PULLOUT_IMAGE,  // Image that is in a cross-column pull-out region.
00054   PT_HORZ_LINE,      // Horizontal Line.
00055   PT_VERT_LINE,      // Vertical Line.
00056   PT_NOISE,          // Lies outside of any column.
00057   PT_COUNT
00058 };
00059 
00061 inline bool PTIsLineType(PolyBlockType type) {
00062   return type == PT_HORZ_LINE || type == PT_VERT_LINE;
00063 }
00065 inline bool PTIsImageType(PolyBlockType type) {
00066   return type == PT_FLOWING_IMAGE || type == PT_HEADING_IMAGE ||
00067          type == PT_PULLOUT_IMAGE;
00068 }
00070 inline bool PTIsTextType(PolyBlockType type) {
00071   return type == PT_FLOWING_TEXT || type == PT_HEADING_TEXT ||
00072          type == PT_PULLOUT_TEXT || type == PT_TABLE ||
00073          type == PT_VERTICAL_TEXT || type == PT_CAPTION_TEXT ||
00074          type == PT_INLINE_EQUATION;
00075 }
00076 // Returns true if PolyBlockType is of pullout(inter-column) type
00077 inline bool PTIsPulloutType(PolyBlockType type) {
00078   return type == PT_PULLOUT_IMAGE || type == PT_PULLOUT_TEXT;
00079 }
00080 
00082 extern const char* kPolyBlockNames[];
00083 
00084 namespace tesseract {
00108 enum Orientation {
00109   ORIENTATION_PAGE_UP = 0,
00110   ORIENTATION_PAGE_RIGHT = 1,
00111   ORIENTATION_PAGE_DOWN = 2,
00112   ORIENTATION_PAGE_LEFT = 3,
00113 };
00114 
00123 enum WritingDirection {
00124   WRITING_DIRECTION_LEFT_TO_RIGHT = 0,
00125   WRITING_DIRECTION_RIGHT_TO_LEFT = 1,
00126   WRITING_DIRECTION_TOP_TO_BOTTOM = 2,
00127 };
00128 
00140 enum TextlineOrder {
00141   TEXTLINE_ORDER_LEFT_TO_RIGHT = 0,
00142   TEXTLINE_ORDER_RIGHT_TO_LEFT = 1,
00143   TEXTLINE_ORDER_TOP_TO_BOTTOM = 2,
00144 };
00145 
00151 enum PageSegMode {
00152   PSM_OSD_ONLY,       
00153   PSM_AUTO_OSD,       
00154 
00155   PSM_AUTO_ONLY,      
00156   PSM_AUTO,           
00157   PSM_SINGLE_COLUMN,  
00158   PSM_SINGLE_BLOCK_VERT_TEXT,  
00159 
00160   PSM_SINGLE_BLOCK,   
00161   PSM_SINGLE_LINE,    
00162   PSM_SINGLE_WORD,    
00163   PSM_CIRCLE_WORD,    
00164   PSM_SINGLE_CHAR,    
00165   PSM_SPARSE_TEXT,    
00166   PSM_SPARSE_TEXT_OSD,  
00167 
00168   PSM_COUNT           
00169 };
00170 
00176 #define PSM_OSD_ENABLED(pageseg_mode) ((pageseg_mode) <= PSM_AUTO_OSD || \
00177     (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
00178 #define PSM_COL_FIND_ENABLED(pageseg_mode) \
00179   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_AUTO)
00180 #define PSM_SPARSE(pageseg_mode) \
00181   ((pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
00182 #define PSM_BLOCK_FIND_ENABLED(pageseg_mode) \
00183   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_COLUMN)
00184 #define PSM_LINE_FIND_ENABLED(pageseg_mode) \
00185   ((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_BLOCK)
00186 #define PSM_WORD_FIND_ENABLED(pageseg_mode) \
00187   (((pageseg_mode) >= PSM_AUTO_OSD && (pageseg_mode) <= PSM_SINGLE_LINE) || \
00188    (pageseg_mode) == PSM_SPARSE_TEXT || (pageseg_mode) == PSM_SPARSE_TEXT_OSD)
00189 
00195 enum PageIteratorLevel {
00196   RIL_BLOCK,     // Block of text/image/separator line.
00197   RIL_PARA,      // Paragraph within a block.
00198   RIL_TEXTLINE,  // Line within a paragraph.
00199   RIL_WORD,      // Word within a textline.
00200   RIL_SYMBOL     // Symbol/character within a word.
00201 };
00202 
00227 enum ParagraphJustification {
00228   JUSTIFICATION_UNKNOWN,
00229   JUSTIFICATION_LEFT,
00230   JUSTIFICATION_CENTER,
00231   JUSTIFICATION_RIGHT,
00232 };
00233 
00244 enum OcrEngineMode {
00245   OEM_TESSERACT_ONLY,           // Run Tesseract only - fastest
00246   OEM_CUBE_ONLY,                // Run Cube only - better accuracy, but slower
00247   OEM_TESSERACT_CUBE_COMBINED,  // Run both and combine results - best accuracy
00248   OEM_DEFAULT                   // Specify this mode when calling init_*(),
00249                                 // to indicate that any of the above modes
00250                                 // should be automatically inferred from the
00251                                 // variables in the language-specific config,
00252                                 // command-line configs, or if not specified
00253                                 // in any of the above should be set to the
00254                                 // default OEM_TESSERACT_ONLY.
00255 };
00256 
00257 }  // namespace tesseract.
00258 
00259 #endif  // TESSERACT_CCSTRUCT_PUBLICTYPES_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines