tesseract
3.03
|
00001 00002 // File: baseapi.h 00003 // Description: Simple API for calling tesseract. 00004 // Author: Ray Smith 00005 // Created: Fri Oct 06 15:35:01 PDT 2006 00006 // 00007 // (C) Copyright 2006, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_API_BASEAPI_H__ 00021 #define TESSERACT_API_BASEAPI_H__ 00022 00023 #include <stdio.h> 00024 // To avoid collision with other typenames include the ABSOLUTE MINIMUM 00025 // complexity of includes here. Use forward declarations wherever possible 00026 // and hide includes of complex types in baseapi.cpp. 00027 #include "platform.h" 00028 #include "apitypes.h" 00029 #include "thresholder.h" 00030 #include "unichar.h" 00031 #include "tesscallback.h" 00032 #include "publictypes.h" 00033 #include "pageiterator.h" 00034 #include "resultiterator.h" 00035 00036 template <typename T> class GenericVector; 00037 class PAGE_RES; 00038 class PAGE_RES_IT; 00039 class ParagraphModel; 00040 struct BlamerBundle; 00041 class BLOCK_LIST; 00042 class DENORM; 00043 class IMAGE; 00044 class MATRIX; 00045 class ROW; 00046 class STRING; 00047 class WERD; 00048 struct Pix; 00049 struct Box; 00050 struct Pixa; 00051 struct Boxa; 00052 class ETEXT_DESC; 00053 struct OSResults; 00054 class TBOX; 00055 class UNICHARSET; 00056 class WERD_CHOICE_LIST; 00057 00058 struct INT_FEATURE_STRUCT; 00059 typedef INT_FEATURE_STRUCT *INT_FEATURE; 00060 struct TBLOB; 00061 00062 namespace tesseract { 00063 00064 class CubeRecoContext; 00065 class Dawg; 00066 class Dict; 00067 class EquationDetect; 00068 class PageIterator; 00069 class LTRResultIterator; 00070 class ResultIterator; 00071 class MutableIterator; 00072 class TessResultRenderer; 00073 class Tesseract; 00074 class Trie; 00075 class Wordrec; 00076 00077 typedef int (Dict::*DictFunc)(void* void_dawg_args, 00078 UNICHAR_ID unichar_id, bool word_end) const; 00079 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang, 00080 const char* context, 00081 int context_bytes, 00082 const char* character, 00083 int character_bytes); 00084 typedef float (Dict::*ParamsModelClassifyFunc)( 00085 const char *lang, void *path); 00086 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings, 00087 const WERD_CHOICE_LIST &best_choices, 00088 const UNICHARSET &unicharset, 00089 BlamerBundle *blamer_bundle); 00090 typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *> 00091 TruthCallback; 00092 00101 class TESS_API TessBaseAPI { 00102 public: 00103 TessBaseAPI(); 00104 virtual ~TessBaseAPI(); 00105 00109 static const char* Version(); 00110 00118 static size_t getOpenCLDevice(void **device); 00119 00124 static void CatchSignals(); 00125 00130 void SetInputName(const char* name); 00138 const char* GetInputName(); 00139 void SetInputImage(Pix *pix); 00140 Pix* GetInputImage(); 00141 int GetSourceYResolution(); 00142 const char* GetDatapath(); 00143 00145 void SetOutputName(const char* name); 00146 00160 bool SetVariable(const char* name, const char* value); 00161 bool SetDebugVariable(const char* name, const char* value); 00162 00167 bool GetIntVariable(const char *name, int *value) const; 00168 bool GetBoolVariable(const char *name, bool *value) const; 00169 bool GetDoubleVariable(const char *name, double *value) const; 00170 00175 const char *GetStringVariable(const char *name) const; 00176 00180 void PrintVariables(FILE *fp) const; 00181 00185 bool GetVariableAsString(const char *name, STRING *val); 00186 00225 int Init(const char* datapath, const char* language, OcrEngineMode mode, 00226 char **configs, int configs_size, 00227 const GenericVector<STRING> *vars_vec, 00228 const GenericVector<STRING> *vars_values, 00229 bool set_only_non_debug_params); 00230 int Init(const char* datapath, const char* language, OcrEngineMode oem) { 00231 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false); 00232 } 00233 int Init(const char* datapath, const char* language) { 00234 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false); 00235 } 00236 00245 const char* GetInitLanguagesAsString() const; 00246 00252 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const; 00253 00257 void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const; 00258 00265 int InitLangMod(const char* datapath, const char* language); 00266 00271 void InitForAnalysePage(); 00272 00279 void ReadConfigFile(const char* filename); 00281 void ReadDebugConfigFile(const char* filename); 00282 00288 void SetPageSegMode(PageSegMode mode); 00289 00291 PageSegMode GetPageSegMode() const; 00292 00310 char* TesseractRect(const unsigned char* imagedata, 00311 int bytes_per_pixel, int bytes_per_line, 00312 int left, int top, int width, int height); 00313 00318 void ClearAdaptiveClassifier(); 00319 00326 /* @{ */ 00327 00337 void SetImage(const unsigned char* imagedata, int width, int height, 00338 int bytes_per_pixel, int bytes_per_line); 00339 00350 void SetImage(const Pix* pix); 00351 00356 void SetSourceResolution(int ppi); 00357 00363 void SetRectangle(int left, int top, int width, int height); 00364 00372 void SetThresholder(ImageThresholder* thresholder) { 00373 if (thresholder_ != NULL) 00374 delete thresholder_; 00375 thresholder_ = thresholder; 00376 ClearResults(); 00377 } 00378 00384 Pix* GetThresholdedImage(); 00385 00391 Boxa* GetRegions(Pixa** pixa); 00392 00404 Boxa* GetTextlines(const bool raw_image, const int raw_padding, 00405 Pixa** pixa, int** blockids, int** paraids); 00406 /* 00407 Helper method to extract from the thresholded image. (most common usage) 00408 */ 00409 Boxa* GetTextlines(Pixa** pixa, int** blockids) { 00410 return GetTextlines(false, 0, pixa, blockids, NULL); 00411 } 00412 00421 Boxa* GetStrips(Pixa** pixa, int** blockids); 00422 00428 Boxa* GetWords(Pixa** pixa); 00429 00438 Boxa* GetConnectedComponents(Pixa** cc); 00439 00453 Boxa* GetComponentImages(const PageIteratorLevel level, 00454 const bool text_only, const bool raw_image, 00455 const int raw_padding, 00456 Pixa** pixa, int** blockids, int** paraids); 00457 // Helper function to get binary images with no padding (most common usage). 00458 Boxa* GetComponentImages(const PageIteratorLevel level, 00459 const bool text_only, 00460 Pixa** pixa, int** blockids) { 00461 return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL); 00462 } 00463 00470 int GetThresholdedImageScaleFactor() const; 00471 00477 void DumpPGM(const char* filename); 00478 00490 PageIterator* AnalyseLayout(); 00491 00498 int Recognize(ETEXT_DESC* monitor); 00499 00506 int RecognizeForChopTest(ETEXT_DESC* monitor); 00507 00524 bool ProcessPages(const char* filename, 00525 const char* retry_config, int timeout_millisec, 00526 STRING* text_out); 00527 00528 bool ProcessPages(const char* filename, 00529 const char* retry_config, int timeout_millisec, 00530 TessResultRenderer* renderer); 00531 00543 bool ProcessPage(Pix* pix, int page_index, const char* filename, 00544 const char* retry_config, int timeout_millisec, 00545 STRING* text_out); 00546 00547 bool ProcessPage(Pix* pix, int page_index, const char* filename, 00548 const char* retry_config, int timeout_millisec, 00549 TessResultRenderer* renderer); 00550 00559 ResultIterator* GetIterator(); 00560 00569 MutableIterator* GetMutableIterator(); 00570 00575 char* GetUTF8Text(); 00576 00582 char* GetHOCRText(int page_number); 00583 00591 char* GetBoxText(int page_number); 00597 char* GetUNLVText(); 00599 int MeanTextConf(); 00606 int* AllWordConfidences(); 00607 00618 bool AdaptToWordStr(PageSegMode mode, const char* wordstr); 00619 00626 void Clear(); 00627 00634 void End(); 00635 00643 static void ClearPersistentCache(); 00644 00651 int IsValidWord(const char *word); 00652 00653 bool GetTextDirection(int* out_offset, float* out_slope); 00654 00656 void SetDictFunc(DictFunc f); 00657 00661 void SetProbabilityInContextFunc(ProbabilityInContextFunc f); 00662 00663 00667 void SetParamsModelClassifyFunc(ParamsModelClassifyFunc f); 00668 00670 void SetFillLatticeFunc(FillLatticeFunc f); 00671 00676 bool DetectOS(OSResults*); 00677 00679 void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features, 00680 int* num_features, int* feature_outline_index); 00681 00686 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top, 00687 int right, int bottom); 00688 00693 void RunAdaptiveClassifier(TBLOB* blob, 00694 int num_max_matches, 00695 int* unichar_ids, 00696 float* ratings, 00697 int* num_matches_returned); 00698 00700 const char* GetUnichar(int unichar_id); 00701 00703 const Dawg *GetDawg(int i) const; 00704 00706 int NumDawgs() const; 00707 00709 const char* GetLastInitLanguage() const; 00710 00712 static ROW *MakeTessOCRRow(float baseline, float xheight, 00713 float descender, float ascender); 00714 00716 static TBLOB *MakeTBLOB(Pix *pix); 00717 00723 static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode); 00724 00725 Tesseract* const tesseract() const { 00726 return tesseract_; 00727 } 00728 00729 OcrEngineMode const oem() const { 00730 return last_oem_requested_; 00731 } 00732 00733 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; } 00734 00736 CubeRecoContext *GetCubeRecoContext() const; 00737 00738 void set_min_orientation_margin(double margin); 00739 00744 void GetBlockTextOrientations(int** block_orientation, 00745 bool** vertical_writing); 00746 00748 BLOCK_LIST* FindLinesCreateBlockList(); 00749 00755 static void DeleteBlockList(BLOCK_LIST* block_list); 00756 /* @} */ 00757 00758 protected: 00759 00761 TESS_LOCAL bool InternalSetImage(); 00762 00767 TESS_LOCAL virtual void Threshold(Pix** pix); 00768 00773 TESS_LOCAL int FindLines(); 00774 00776 void ClearResults(); 00777 00783 TESS_LOCAL LTRResultIterator* GetLTRIterator(); 00784 00791 TESS_LOCAL int TextLength(int* blob_count); 00792 00794 /* @{ */ 00795 00800 TESS_LOCAL void AdaptToCharacter(const char *unichar_repr, 00801 int length, 00802 float baseline, 00803 float xheight, 00804 float descender, 00805 float ascender); 00806 00808 TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list); 00809 TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, 00810 PAGE_RES* pass1_result); 00811 00813 TESS_LOCAL void DetectParagraphs(bool after_text_recognition); 00814 00819 TESS_LOCAL static int TesseractExtractResult(char** text, 00820 int** lengths, 00821 float** costs, 00822 int** x0, 00823 int** y0, 00824 int** x1, 00825 int** y1, 00826 PAGE_RES* page_res); 00827 00828 TESS_LOCAL const PAGE_RES* GetPageRes() const { 00829 return page_res_; 00830 }; 00831 /* @} */ 00832 00833 00834 protected: 00835 Tesseract* tesseract_; 00836 Tesseract* osd_tesseract_; 00837 EquationDetect* equ_detect_; 00838 ImageThresholder* thresholder_; 00839 GenericVector<ParagraphModel *>* paragraph_models_; 00840 BLOCK_LIST* block_list_; 00841 PAGE_RES* page_res_; 00842 STRING* input_file_; 00843 Pix* input_image_; 00844 STRING* output_file_; 00845 STRING* datapath_; 00846 STRING* language_; 00847 OcrEngineMode last_oem_requested_; 00848 bool recognition_done_; 00849 TruthCallback *truth_cb_; 00850 00855 /* @{ */ 00856 int rect_left_; 00857 int rect_top_; 00858 int rect_width_; 00859 int rect_height_; 00860 int image_width_; 00861 int image_height_; 00862 /* @} */ 00863 00864 private: 00874 TessResultRenderer* NewRenderer(); 00875 }; 00876 00877 } // namespace tesseract. 00878 00879 #endif // TESSERACT_API_BASEAPI_H__