tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/api/baseapi.h
Go to the documentation of this file.
00001 
00002 // File:        baseapi.h
00003 // Description: Simple API for calling tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Fri Oct 06 15:35:01 PDT 2006
00006 //
00007 // (C) Copyright 2006, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022 
00023 #include <stdio.h>
00024 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
00025 // complexity of includes here. Use forward declarations wherever possible
00026 // and hide includes of complex types in baseapi.cpp.
00027 #include "platform.h"
00028 #include "apitypes.h"
00029 #include "thresholder.h"
00030 #include "unichar.h"
00031 #include "tesscallback.h"
00032 #include "publictypes.h"
00033 #include "pageiterator.h"
00034 #include "resultiterator.h"
00035 
00036 template <typename T> class GenericVector;
00037 class PAGE_RES;
00038 class PAGE_RES_IT;
00039 class ParagraphModel;
00040 struct BlamerBundle;
00041 class BLOCK_LIST;
00042 class DENORM;
00043 class IMAGE;
00044 class MATRIX;
00045 class ROW;
00046 class STRING;
00047 class WERD;
00048 struct Pix;
00049 struct Box;
00050 struct Pixa;
00051 struct Boxa;
00052 class ETEXT_DESC;
00053 struct OSResults;
00054 class TBOX;
00055 class UNICHARSET;
00056 class WERD_CHOICE_LIST;
00057 
00058 struct INT_FEATURE_STRUCT;
00059 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00060 struct TBLOB;
00061 
00062 namespace tesseract {
00063 
00064 class CubeRecoContext;
00065 class Dawg;
00066 class Dict;
00067 class EquationDetect;
00068 class PageIterator;
00069 class LTRResultIterator;
00070 class ResultIterator;
00071 class MutableIterator;
00072 class TessResultRenderer;
00073 class Tesseract;
00074 class Trie;
00075 class Wordrec;
00076 
00077 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00078                               UNICHAR_ID unichar_id, bool word_end) const;
00079 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00080                                                  const char* context,
00081                                                  int context_bytes,
00082                                                  const char* character,
00083                                                  int character_bytes);
00084 typedef float (Dict::*ParamsModelClassifyFunc)(
00085     const char *lang, void *path);
00086 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
00087                                          const WERD_CHOICE_LIST &best_choices,
00088                                          const UNICHARSET &unicharset,
00089                                          BlamerBundle *blamer_bundle);
00090 typedef TessCallback4<const UNICHARSET &, int, PageIterator *, Pix *>
00091     TruthCallback;
00092 
00101 class TESS_API TessBaseAPI {
00102  public:
00103   TessBaseAPI();
00104   virtual ~TessBaseAPI();
00105 
00109   static const char* Version();
00110 
00118   static size_t getOpenCLDevice(void **device);
00119 
00124   static void CatchSignals();
00125 
00130   void SetInputName(const char* name);
00138   const char* GetInputName();
00139   void SetInputImage(Pix *pix);
00140   Pix* GetInputImage();
00141   int GetSourceYResolution();
00142   const char* GetDatapath();
00143 
00145   void SetOutputName(const char* name);
00146 
00160   bool SetVariable(const char* name, const char* value);
00161   bool SetDebugVariable(const char* name, const char* value);
00162 
00167   bool GetIntVariable(const char *name, int *value) const;
00168   bool GetBoolVariable(const char *name, bool *value) const;
00169   bool GetDoubleVariable(const char *name, double *value) const;
00170 
00175   const char *GetStringVariable(const char *name) const;
00176 
00180   void PrintVariables(FILE *fp) const;
00181 
00185   bool GetVariableAsString(const char *name, STRING *val);
00186 
00225   int Init(const char* datapath, const char* language, OcrEngineMode mode,
00226            char **configs, int configs_size,
00227            const GenericVector<STRING> *vars_vec,
00228            const GenericVector<STRING> *vars_values,
00229            bool set_only_non_debug_params);
00230   int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00231     return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00232   }
00233   int Init(const char* datapath, const char* language) {
00234     return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00235   }
00236 
00245   const char* GetInitLanguagesAsString() const;
00246 
00252   void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
00253 
00257   void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
00258 
00265   int InitLangMod(const char* datapath, const char* language);
00266 
00271   void InitForAnalysePage();
00272 
00279   void ReadConfigFile(const char* filename);
00281   void ReadDebugConfigFile(const char* filename);
00282 
00288   void SetPageSegMode(PageSegMode mode);
00289 
00291   PageSegMode GetPageSegMode() const;
00292 
00310   char* TesseractRect(const unsigned char* imagedata,
00311                       int bytes_per_pixel, int bytes_per_line,
00312                       int left, int top, int width, int height);
00313 
00318   void ClearAdaptiveClassifier();
00319 
00326    /* @{ */
00327 
00337   void SetImage(const unsigned char* imagedata, int width, int height,
00338                 int bytes_per_pixel, int bytes_per_line);
00339 
00350   void SetImage(const Pix* pix);
00351 
00356   void SetSourceResolution(int ppi);
00357 
00363   void SetRectangle(int left, int top, int width, int height);
00364 
00372   void SetThresholder(ImageThresholder* thresholder) {
00373     if (thresholder_ != NULL)
00374       delete thresholder_;
00375     thresholder_ = thresholder;
00376     ClearResults();
00377   }
00378 
00384   Pix* GetThresholdedImage();
00385 
00391   Boxa* GetRegions(Pixa** pixa);
00392 
00404   Boxa* GetTextlines(const bool raw_image, const int raw_padding,
00405                      Pixa** pixa, int** blockids, int** paraids);
00406   /*
00407      Helper method to extract from the thresholded image. (most common usage)
00408   */
00409   Boxa* GetTextlines(Pixa** pixa, int** blockids) {
00410     return GetTextlines(false, 0, pixa, blockids, NULL);
00411   }
00412 
00421   Boxa* GetStrips(Pixa** pixa, int** blockids);
00422 
00428   Boxa* GetWords(Pixa** pixa);
00429 
00438   Boxa* GetConnectedComponents(Pixa** cc);
00439 
00453   Boxa* GetComponentImages(const PageIteratorLevel level,
00454                            const bool text_only, const bool raw_image,
00455                            const int raw_padding,
00456                            Pixa** pixa, int** blockids, int** paraids);
00457   // Helper function to get binary images with no padding (most common usage).
00458   Boxa* GetComponentImages(const PageIteratorLevel level,
00459                            const bool text_only,
00460                            Pixa** pixa, int** blockids) {
00461     return GetComponentImages(level, text_only, false, 0, pixa, blockids, NULL);
00462   }
00463 
00470   int GetThresholdedImageScaleFactor() const;
00471 
00477   void DumpPGM(const char* filename);
00478 
00490   PageIterator* AnalyseLayout();
00491 
00498   int Recognize(ETEXT_DESC* monitor);
00499 
00506   int RecognizeForChopTest(ETEXT_DESC* monitor);
00507 
00524   bool ProcessPages(const char* filename,
00525                     const char* retry_config, int timeout_millisec,
00526                     STRING* text_out);
00527 
00528   bool ProcessPages(const char* filename,
00529                     const char* retry_config, int timeout_millisec,
00530                     TessResultRenderer* renderer);
00531 
00543   bool ProcessPage(Pix* pix, int page_index, const char* filename,
00544                    const char* retry_config, int timeout_millisec,
00545                    STRING* text_out);
00546 
00547   bool ProcessPage(Pix* pix, int page_index, const char* filename,
00548                    const char* retry_config, int timeout_millisec,
00549                    TessResultRenderer* renderer);
00550 
00559   ResultIterator* GetIterator();
00560 
00569   MutableIterator* GetMutableIterator();
00570 
00575   char* GetUTF8Text();
00576 
00582   char* GetHOCRText(int page_number);
00583 
00591   char* GetBoxText(int page_number);
00597   char* GetUNLVText();
00599   int MeanTextConf();
00606   int* AllWordConfidences();
00607 
00618   bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00619 
00626   void Clear();
00627 
00634   void End();
00635 
00643   static void ClearPersistentCache();
00644 
00651   int IsValidWord(const char *word);
00652 
00653   bool GetTextDirection(int* out_offset, float* out_slope);
00654 
00656   void SetDictFunc(DictFunc f);
00657 
00661   void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00662 
00663 
00667   void SetParamsModelClassifyFunc(ParamsModelClassifyFunc f);
00668 
00670   void SetFillLatticeFunc(FillLatticeFunc f);
00671 
00676   bool DetectOS(OSResults*);
00677 
00679   void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
00680                           int* num_features, int* feature_outline_index);
00681 
00686   static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00687                             int right, int bottom);
00688 
00693   void RunAdaptiveClassifier(TBLOB* blob,
00694                              int num_max_matches,
00695                              int* unichar_ids,
00696                              float* ratings,
00697                              int* num_matches_returned);
00698 
00700   const char* GetUnichar(int unichar_id);
00701 
00703   const Dawg *GetDawg(int i) const;
00704 
00706   int NumDawgs() const;
00707 
00709   const char* GetLastInitLanguage() const;
00710 
00712   static ROW *MakeTessOCRRow(float baseline, float xheight,
00713                              float descender, float ascender);
00714 
00716   static TBLOB *MakeTBLOB(Pix *pix);
00717 
00723   static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
00724 
00725   Tesseract* const tesseract() const {
00726     return tesseract_;
00727   }
00728 
00729   OcrEngineMode const oem() const {
00730     return last_oem_requested_;
00731   }
00732 
00733   void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00734 
00736   CubeRecoContext *GetCubeRecoContext() const;
00737 
00738   void set_min_orientation_margin(double margin);
00739 
00744   void GetBlockTextOrientations(int** block_orientation,
00745                                 bool** vertical_writing);
00746 
00748   BLOCK_LIST* FindLinesCreateBlockList();
00749 
00755   static void DeleteBlockList(BLOCK_LIST* block_list);
00756  /* @} */
00757 
00758  protected:
00759 
00761   TESS_LOCAL bool InternalSetImage();
00762 
00767   TESS_LOCAL virtual void Threshold(Pix** pix);
00768 
00773   TESS_LOCAL int FindLines();
00774 
00776   void ClearResults();
00777 
00783   TESS_LOCAL LTRResultIterator* GetLTRIterator();
00784 
00791   TESS_LOCAL int TextLength(int* blob_count);
00792 
00794   /* @{ */
00795 
00800   TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
00801                                    int length,
00802                                    float baseline,
00803                                    float xheight,
00804                                    float descender,
00805                                    float ascender);
00806 
00808   TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00809   TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
00810                                         PAGE_RES* pass1_result);
00811 
00813   TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
00814 
00819   TESS_LOCAL static int TesseractExtractResult(char** text,
00820                                     int** lengths,
00821                                     float** costs,
00822                                     int** x0,
00823                                     int** y0,
00824                                     int** x1,
00825                                     int** y1,
00826                                     PAGE_RES* page_res);
00827 
00828   TESS_LOCAL const PAGE_RES* GetPageRes() const {
00829     return page_res_;
00830   };
00831   /* @} */
00832 
00833 
00834  protected:
00835   Tesseract*        tesseract_;       
00836   Tesseract*        osd_tesseract_;   
00837   EquationDetect*   equ_detect_;      
00838   ImageThresholder* thresholder_;     
00839   GenericVector<ParagraphModel *>* paragraph_models_;
00840   BLOCK_LIST*       block_list_;      
00841   PAGE_RES*         page_res_;        
00842   STRING*           input_file_;      
00843   Pix*              input_image_;     
00844   STRING*           output_file_;     
00845   STRING*           datapath_;        
00846   STRING*           language_;        
00847   OcrEngineMode last_oem_requested_;  
00848   bool          recognition_done_;   
00849   TruthCallback *truth_cb_;           
00850 
00855   /* @{ */
00856   int rect_left_;
00857   int rect_top_;
00858   int rect_width_;
00859   int rect_height_;
00860   int image_width_;
00861   int image_height_;
00862   /* @} */
00863 
00864  private:
00874   TessResultRenderer* NewRenderer();
00875 };
00876 
00877 }  // namespace tesseract.
00878 
00879 #endif  // TESSERACT_API_BASEAPI_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines