tesseract
3.03
|
00001 /********************************************************************** 00002 * File: baseapi.cpp 00003 * Description: Simple API for calling tesseract. 00004 * Author: Ray Smith 00005 * Created: Fri Oct 06 15:35:01 PDT 2006 00006 * 00007 * (C) Copyright 2006, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #ifdef __linux__ 00026 #include <signal.h> 00027 #endif 00028 00029 #if defined(_WIN32) 00030 #ifdef _MSC_VER 00031 #include "mathfix.h" 00032 #elif MINGW 00033 // workaround for stdlib.h with -std=c++11 for _splitpath and _MAX_FNAME 00034 #undef __STRICT_ANSI__ 00035 #endif // _MSC_VER 00036 #include <stdlib.h> 00037 #include <windows.h> 00038 #else 00039 #include <dirent.h> 00040 #include <libgen.h> 00041 #include <string.h> 00042 #endif // _WIN32 00043 00044 #if !defined(VERSION) 00045 #include "version.h" 00046 #endif 00047 00048 #include "allheaders.h" 00049 00050 #include "baseapi.h" 00051 #include "resultiterator.h" 00052 #include "mutableiterator.h" 00053 #include "thresholder.h" 00054 #include "tesseractclass.h" 00055 #include "pageres.h" 00056 #include "paragraphs.h" 00057 #include "tessvars.h" 00058 #include "control.h" 00059 #include "dict.h" 00060 #include "pgedit.h" 00061 #include "paramsd.h" 00062 #include "output.h" 00063 #include "globaloc.h" 00064 #include "globals.h" 00065 #include "edgblob.h" 00066 #include "equationdetect.h" 00067 #include "tessbox.h" 00068 #include "makerow.h" 00069 #include "otsuthr.h" 00070 #include "osdetect.h" 00071 #include "params.h" 00072 #include "renderer.h" 00073 #include "strngs.h" 00074 #include "openclwrapper.h" 00075 00076 namespace tesseract { 00077 00079 const int kMinRectSize = 10; 00081 const char kTesseractReject = '~'; 00083 const char kUNLVReject = '~'; 00085 const char kUNLVSuspect = '^'; 00090 const char* kInputFile = "noname.tif"; 00094 const char* kOldVarsFile = "failed_vars.txt"; 00096 const int kMaxIntSize = 22; 00101 const int kMinCredibleResolution = 70; 00103 const int kMaxCredibleResolution = 2400; 00104 00105 TessBaseAPI::TessBaseAPI() 00106 : tesseract_(NULL), 00107 osd_tesseract_(NULL), 00108 equ_detect_(NULL), 00109 // Thresholder is initialized to NULL here, but will be set before use by: 00110 // A constructor of a derived API, SetThresholder(), or 00111 // created implicitly when used in InternalSetImage. 00112 thresholder_(NULL), 00113 paragraph_models_(NULL), 00114 block_list_(NULL), 00115 page_res_(NULL), 00116 input_file_(NULL), 00117 input_image_(NULL), 00118 output_file_(NULL), 00119 datapath_(NULL), 00120 language_(NULL), 00121 last_oem_requested_(OEM_DEFAULT), 00122 recognition_done_(false), 00123 truth_cb_(NULL), 00124 rect_left_(0), rect_top_(0), rect_width_(0), rect_height_(0), 00125 image_width_(0), image_height_(0) { 00126 } 00127 00128 TessBaseAPI::~TessBaseAPI() { 00129 End(); 00130 } 00131 00135 const char* TessBaseAPI::Version() { 00136 return VERSION; 00137 } 00138 00146 #ifdef USE_OPENCL 00147 #if USE_DEVICE_SELECTION 00148 #include "opencl_device_selection.h" 00149 #endif 00150 #endif 00151 size_t TessBaseAPI::getOpenCLDevice(void **data) { 00152 #ifdef USE_OPENCL 00153 #if USE_DEVICE_SELECTION 00154 ds_device device = OpenclDevice::getDeviceSelection(); 00155 if (device.type == DS_DEVICE_OPENCL_DEVICE) { 00156 *data = reinterpret_cast<void*>(new cl_device_id); 00157 memcpy(*data, &device.oclDeviceID, sizeof(cl_device_id)); 00158 return sizeof(cl_device_id); 00159 } 00160 #endif 00161 #endif 00162 00163 *data = NULL; 00164 return 0; 00165 } 00166 00171 void TessBaseAPI::CatchSignals() { 00172 #ifdef __linux__ 00173 struct sigaction action; 00174 memset(&action, 0, sizeof(action)); 00175 action.sa_handler = &signal_exit; 00176 action.sa_flags = SA_RESETHAND; 00177 sigaction(SIGSEGV, &action, NULL); 00178 sigaction(SIGFPE, &action, NULL); 00179 sigaction(SIGBUS, &action, NULL); 00180 #else 00181 // Warn API users that an implementation is needed. 00182 tprintf("CatchSignals has no non-linux implementation!\n"); 00183 #endif 00184 } 00185 00190 void TessBaseAPI::SetInputName(const char* name) { 00191 if (input_file_ == NULL) 00192 input_file_ = new STRING(name); 00193 else 00194 *input_file_ = name; 00195 } 00196 00198 void TessBaseAPI::SetOutputName(const char* name) { 00199 if (output_file_ == NULL) 00200 output_file_ = new STRING(name); 00201 else 00202 *output_file_ = name; 00203 } 00204 00205 bool TessBaseAPI::SetVariable(const char* name, const char* value) { 00206 if (tesseract_ == NULL) tesseract_ = new Tesseract; 00207 return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_NON_INIT_ONLY, 00208 tesseract_->params()); 00209 } 00210 00211 bool TessBaseAPI::SetDebugVariable(const char* name, const char* value) { 00212 if (tesseract_ == NULL) tesseract_ = new Tesseract; 00213 return ParamUtils::SetParam(name, value, SET_PARAM_CONSTRAINT_DEBUG_ONLY, 00214 tesseract_->params()); 00215 } 00216 00217 bool TessBaseAPI::GetIntVariable(const char *name, int *value) const { 00218 IntParam *p = ParamUtils::FindParam<IntParam>( 00219 name, GlobalParams()->int_params, tesseract_->params()->int_params); 00220 if (p == NULL) return false; 00221 *value = (inT32)(*p); 00222 return true; 00223 } 00224 00225 bool TessBaseAPI::GetBoolVariable(const char *name, bool *value) const { 00226 BoolParam *p = ParamUtils::FindParam<BoolParam>( 00227 name, GlobalParams()->bool_params, tesseract_->params()->bool_params); 00228 if (p == NULL) return false; 00229 *value = (BOOL8)(*p); 00230 return true; 00231 } 00232 00233 const char *TessBaseAPI::GetStringVariable(const char *name) const { 00234 StringParam *p = ParamUtils::FindParam<StringParam>( 00235 name, GlobalParams()->string_params, tesseract_->params()->string_params); 00236 return (p != NULL) ? p->string() : NULL; 00237 } 00238 00239 bool TessBaseAPI::GetDoubleVariable(const char *name, double *value) const { 00240 DoubleParam *p = ParamUtils::FindParam<DoubleParam>( 00241 name, GlobalParams()->double_params, tesseract_->params()->double_params); 00242 if (p == NULL) return false; 00243 *value = (double)(*p); 00244 return true; 00245 } 00246 00248 bool TessBaseAPI::GetVariableAsString(const char *name, STRING *val) { 00249 return ParamUtils::GetParamAsString(name, tesseract_->params(), val); 00250 } 00251 00253 void TessBaseAPI::PrintVariables(FILE *fp) const { 00254 ParamUtils::PrintParams(fp, tesseract_->params()); 00255 } 00256 00265 int TessBaseAPI::Init(const char* datapath, const char* language, 00266 OcrEngineMode oem, char **configs, int configs_size, 00267 const GenericVector<STRING> *vars_vec, 00268 const GenericVector<STRING> *vars_values, 00269 bool set_only_non_debug_params) { 00270 PERF_COUNT_START("TessBaseAPI::Init") 00271 // Default language is "eng". 00272 if (language == NULL) language = "eng"; 00273 // If the datapath, OcrEngineMode or the language have changed - start again. 00274 // Note that the language_ field stores the last requested language that was 00275 // initialized successfully, while tesseract_->lang stores the language 00276 // actually used. They differ only if the requested language was NULL, in 00277 // which case tesseract_->lang is set to the Tesseract default ("eng"). 00278 if (tesseract_ != NULL && 00279 (datapath_ == NULL || language_ == NULL || 00280 *datapath_ != datapath || last_oem_requested_ != oem || 00281 (*language_ != language && tesseract_->lang != language))) { 00282 delete tesseract_; 00283 tesseract_ = NULL; 00284 } 00285 // PERF_COUNT_SUB("delete tesseract_") 00286 #ifdef USE_OPENCL 00287 OpenclDevice od; 00288 od.InitEnv(); 00289 #endif 00290 PERF_COUNT_SUB("OD::InitEnv()") 00291 bool reset_classifier = true; 00292 if (tesseract_ == NULL) { 00293 reset_classifier = false; 00294 tesseract_ = new Tesseract; 00295 if (tesseract_->init_tesseract( 00296 datapath, output_file_ != NULL ? output_file_->string() : NULL, 00297 language, oem, configs, configs_size, vars_vec, vars_values, 00298 set_only_non_debug_params) != 0) { 00299 return -1; 00300 } 00301 } 00302 PERF_COUNT_SUB("update tesseract_") 00303 // Update datapath and language requested for the last valid initialization. 00304 if (datapath_ == NULL) 00305 datapath_ = new STRING(datapath); 00306 else 00307 *datapath_ = datapath; 00308 if ((strcmp(datapath_->string(), "") == 0) && 00309 (strcmp(tesseract_->datadir.string(), "") != 0)) 00310 *datapath_ = tesseract_->datadir; 00311 00312 if (language_ == NULL) 00313 language_ = new STRING(language); 00314 else 00315 *language_ = language; 00316 last_oem_requested_ = oem; 00317 // PERF_COUNT_SUB("update last_oem_requested_") 00318 // For same language and datapath, just reset the adaptive classifier. 00319 if (reset_classifier) { 00320 tesseract_->ResetAdaptiveClassifier(); 00321 PERF_COUNT_SUB("tesseract_->ResetAdaptiveClassifier()") 00322 } 00323 PERF_COUNT_END 00324 return 0; 00325 } 00326 00335 const char* TessBaseAPI::GetInitLanguagesAsString() const { 00336 return (language_ == NULL || language_->string() == NULL) ? 00337 "" : language_->string(); 00338 } 00339 00345 void TessBaseAPI::GetLoadedLanguagesAsVector( 00346 GenericVector<STRING>* langs) const { 00347 langs->clear(); 00348 if (tesseract_ != NULL) { 00349 langs->push_back(tesseract_->lang); 00350 int num_subs = tesseract_->num_sub_langs(); 00351 for (int i = 0; i < num_subs; ++i) 00352 langs->push_back(tesseract_->get_sub_lang(i)->lang); 00353 } 00354 } 00355 00359 void TessBaseAPI::GetAvailableLanguagesAsVector( 00360 GenericVector<STRING>* langs) const { 00361 langs->clear(); 00362 if (tesseract_ != NULL) { 00363 #ifdef _WIN32 00364 STRING pattern = tesseract_->datadir + "/*." + kTrainedDataSuffix; 00365 char fname[_MAX_FNAME]; 00366 WIN32_FIND_DATA data; 00367 BOOL result = TRUE; 00368 HANDLE handle = FindFirstFile(pattern.string(), &data); 00369 if (handle != INVALID_HANDLE_VALUE) { 00370 for (; result; result = FindNextFile(handle, &data)) { 00371 _splitpath(data.cFileName, NULL, NULL, fname, NULL); 00372 langs->push_back(STRING(fname)); 00373 } 00374 FindClose(handle); 00375 } 00376 #else // _WIN32 00377 DIR *dir; 00378 struct dirent *dirent; 00379 char *dot; 00380 00381 STRING extension = STRING(".") + kTrainedDataSuffix; 00382 00383 dir = opendir(tesseract_->datadir.string()); 00384 if (dir != NULL) { 00385 while ((dirent = readdir(dir))) { 00386 // Skip '.', '..', and hidden files 00387 if (dirent->d_name[0] != '.') { 00388 if (strstr(dirent->d_name, extension.string()) != NULL) { 00389 dot = strrchr(dirent->d_name, '.'); 00390 // This ensures that .traineddata is at the end of the file name 00391 if (strncmp(dot, extension.string(), 00392 strlen(extension.string())) == 0) { 00393 *dot = '\0'; 00394 langs->push_back(STRING(dirent->d_name)); 00395 } 00396 } 00397 } 00398 } 00399 closedir(dir); 00400 } 00401 #endif 00402 } 00403 } 00404 00411 int TessBaseAPI::InitLangMod(const char* datapath, const char* language) { 00412 if (tesseract_ == NULL) 00413 tesseract_ = new Tesseract; 00414 else 00415 ParamUtils::ResetToDefaults(tesseract_->params()); 00416 return tesseract_->init_tesseract_lm(datapath, NULL, language); 00417 } 00418 00423 void TessBaseAPI::InitForAnalysePage() { 00424 if (tesseract_ == NULL) { 00425 tesseract_ = new Tesseract; 00426 tesseract_->InitAdaptiveClassifier(false); 00427 } 00428 } 00429 00435 void TessBaseAPI::ReadConfigFile(const char* filename) { 00436 tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_NON_INIT_ONLY); 00437 } 00438 00440 void TessBaseAPI::ReadDebugConfigFile(const char* filename) { 00441 tesseract_->read_config_file(filename, SET_PARAM_CONSTRAINT_DEBUG_ONLY); 00442 } 00443 00449 void TessBaseAPI::SetPageSegMode(PageSegMode mode) { 00450 if (tesseract_ == NULL) 00451 tesseract_ = new Tesseract; 00452 tesseract_->tessedit_pageseg_mode.set_value(mode); 00453 } 00454 00456 PageSegMode TessBaseAPI::GetPageSegMode() const { 00457 if (tesseract_ == NULL) 00458 return PSM_SINGLE_BLOCK; 00459 return static_cast<PageSegMode>( 00460 static_cast<int>(tesseract_->tessedit_pageseg_mode)); 00461 } 00462 00476 char* TessBaseAPI::TesseractRect(const unsigned char* imagedata, 00477 int bytes_per_pixel, 00478 int bytes_per_line, 00479 int left, int top, 00480 int width, int height) { 00481 if (tesseract_ == NULL || width < kMinRectSize || height < kMinRectSize) 00482 return NULL; // Nothing worth doing. 00483 00484 // Since this original api didn't give the exact size of the image, 00485 // we have to invent a reasonable value. 00486 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8; 00487 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top, 00488 bytes_per_pixel, bytes_per_line); 00489 SetRectangle(left, top, width, height); 00490 00491 return GetUTF8Text(); 00492 } 00493 00498 void TessBaseAPI::ClearAdaptiveClassifier() { 00499 if (tesseract_ == NULL) 00500 return; 00501 tesseract_->ResetAdaptiveClassifier(); 00502 tesseract_->ResetDocumentDictionary(); 00503 } 00504 00514 void TessBaseAPI::SetImage(const unsigned char* imagedata, 00515 int width, int height, 00516 int bytes_per_pixel, int bytes_per_line) { 00517 if (InternalSetImage()) 00518 thresholder_->SetImage(imagedata, width, height, 00519 bytes_per_pixel, bytes_per_line); 00520 } 00521 00522 void TessBaseAPI::SetSourceResolution(int ppi) { 00523 if (thresholder_) 00524 thresholder_->SetSourceYResolution(ppi); 00525 else 00526 tprintf("Please call SetImage before SetSourceResolution.\n"); 00527 } 00528 00539 void TessBaseAPI::SetImage(const Pix* pix) { 00540 if (InternalSetImage()) 00541 thresholder_->SetImage(pix); 00542 } 00543 00549 void TessBaseAPI::SetRectangle(int left, int top, int width, int height) { 00550 if (thresholder_ == NULL) 00551 return; 00552 thresholder_->SetRectangle(left, top, width, height); 00553 ClearResults(); 00554 } 00555 00560 Pix* TessBaseAPI::GetThresholdedImage() { 00561 if (tesseract_ == NULL) 00562 return NULL; 00563 if (tesseract_->pix_binary() == NULL) 00564 Threshold(tesseract_->mutable_pix_binary()); 00565 return pixClone(tesseract_->pix_binary()); 00566 } 00567 00573 Boxa* TessBaseAPI::GetRegions(Pixa** pixa) { 00574 return GetComponentImages(RIL_BLOCK, false, pixa, NULL); 00575 } 00576 00585 Boxa* TessBaseAPI::GetTextlines(const bool raw_image, const int raw_padding, 00586 Pixa** pixa, int** blockids, int** paraids) { 00587 return GetComponentImages(RIL_TEXTLINE, true, raw_image, raw_padding, 00588 pixa, blockids, paraids); 00589 } 00590 00599 Boxa* TessBaseAPI::GetStrips(Pixa** pixa, int** blockids) { 00600 return GetComponentImages(RIL_TEXTLINE, false, pixa, blockids); 00601 } 00602 00608 Boxa* TessBaseAPI::GetWords(Pixa** pixa) { 00609 return GetComponentImages(RIL_WORD, true, pixa, NULL); 00610 } 00611 00618 Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) { 00619 return GetComponentImages(RIL_SYMBOL, true, pixa, NULL); 00620 } 00621 00630 Boxa* TessBaseAPI::GetComponentImages(PageIteratorLevel level, 00631 bool text_only, bool raw_image, 00632 const int raw_padding, 00633 Pixa** pixa, int** blockids, 00634 int** paraids) { 00635 PageIterator* page_it = GetIterator(); 00636 if (page_it == NULL) 00637 page_it = AnalyseLayout(); 00638 if (page_it == NULL) 00639 return NULL; // Failed. 00640 00641 // Count the components to get a size for the arrays. 00642 int component_count = 0; 00643 int left, top, right, bottom; 00644 00645 TessResultCallback<bool>* get_bbox = NULL; 00646 if (raw_image) { 00647 // Get bounding box in original raw image with padding. 00648 get_bbox = NewPermanentTessCallback(page_it, &PageIterator::BoundingBox, 00649 level, raw_padding, 00650 &left, &top, &right, &bottom); 00651 } else { 00652 // Get bounding box from binarized imaged. Note that this could be 00653 // differently scaled from the original image. 00654 get_bbox = NewPermanentTessCallback(page_it, 00655 &PageIterator::BoundingBoxInternal, 00656 level, &left, &top, &right, &bottom); 00657 } 00658 do { 00659 if (get_bbox->Run() && 00660 (!text_only || PTIsTextType(page_it->BlockType()))) 00661 ++component_count; 00662 } while (page_it->Next(level)); 00663 00664 Boxa* boxa = boxaCreate(component_count); 00665 if (pixa != NULL) 00666 *pixa = pixaCreate(component_count); 00667 if (blockids != NULL) 00668 *blockids = new int[component_count]; 00669 if (paraids != NULL) 00670 *paraids = new int[component_count]; 00671 00672 int blockid = 0; 00673 int paraid = 0; 00674 int component_index = 0; 00675 page_it->Begin(); 00676 do { 00677 if (get_bbox->Run() && 00678 (!text_only || PTIsTextType(page_it->BlockType()))) { 00679 Box* lbox = boxCreate(left, top, right - left, bottom - top); 00680 boxaAddBox(boxa, lbox, L_INSERT); 00681 if (pixa != NULL) { 00682 Pix* pix = NULL; 00683 if (raw_image) { 00684 pix = page_it->GetImage(level, raw_padding, &left, &top); 00685 } else { 00686 pix = page_it->GetBinaryImage(level); 00687 } 00688 pixaAddPix(*pixa, pix, L_INSERT); 00689 pixaAddBox(*pixa, lbox, L_CLONE); 00690 } 00691 if (paraids != NULL) { 00692 (*paraids)[component_index] = paraid; 00693 if (page_it->IsAtFinalElement(RIL_PARA, level)) 00694 ++paraid; 00695 } 00696 if (blockids != NULL) { 00697 (*blockids)[component_index] = blockid; 00698 if (page_it->IsAtFinalElement(RIL_BLOCK, level)) { 00699 ++blockid; 00700 paraid = 0; 00701 } 00702 } 00703 ++component_index; 00704 } 00705 } while (page_it->Next(level)); 00706 delete page_it; 00707 delete get_bbox; 00708 return boxa; 00709 } 00710 00711 int TessBaseAPI::GetThresholdedImageScaleFactor() const { 00712 if (thresholder_ == NULL) { 00713 return 0; 00714 } 00715 return thresholder_->GetScaleFactor(); 00716 } 00717 00719 void TessBaseAPI::DumpPGM(const char* filename) { 00720 if (tesseract_ == NULL) 00721 return; 00722 FILE *fp = fopen(filename, "wb"); 00723 Pix* pix = tesseract_->pix_binary(); 00724 int width = pixGetWidth(pix); 00725 int height = pixGetHeight(pix); 00726 l_uint32* data = pixGetData(pix); 00727 fprintf(fp, "P5 %d %d 255\n", width, height); 00728 for (int y = 0; y < height; ++y, data += pixGetWpl(pix)) { 00729 for (int x = 0; x < width; ++x) { 00730 uinT8 b = GET_DATA_BIT(data, x) ? 0 : 255; 00731 fwrite(&b, 1, 1, fp); 00732 } 00733 } 00734 fclose(fp); 00735 } 00736 00743 int CubeAPITest(Boxa* boxa_blocks, Pixa* pixa_blocks, 00744 Boxa* boxa_words, Pixa* pixa_words, 00745 const FCOORD& reskew, Pix* page_pix, 00746 PAGE_RES* page_res) { 00747 int block_count = boxaGetCount(boxa_blocks); 00748 ASSERT_HOST(block_count == pixaGetCount(pixa_blocks)); 00749 // Write each block to the current directory as junk_write_display.nnn.png. 00750 for (int i = 0; i < block_count; ++i) { 00751 Pix* pix = pixaGetPix(pixa_blocks, i, L_CLONE); 00752 pixDisplayWrite(pix, 1); 00753 } 00754 int word_count = boxaGetCount(boxa_words); 00755 ASSERT_HOST(word_count == pixaGetCount(pixa_words)); 00756 int pr_word = 0; 00757 PAGE_RES_IT page_res_it(page_res); 00758 for (page_res_it.restart_page(); page_res_it.word () != NULL; 00759 page_res_it.forward(), ++pr_word) { 00760 WERD_RES *word = page_res_it.word(); 00761 WERD_CHOICE* choice = word->best_choice; 00762 // Write the first 100 words to files names wordims/<wordstring>.tif. 00763 if (pr_word < 100) { 00764 STRING filename("wordims/"); 00765 if (choice != NULL) { 00766 filename += choice->unichar_string(); 00767 } else { 00768 char numbuf[32]; 00769 filename += "unclassified"; 00770 snprintf(numbuf, 32, "%03d", pr_word); 00771 filename += numbuf; 00772 } 00773 filename += ".tif"; 00774 Pix* pix = pixaGetPix(pixa_words, pr_word, L_CLONE); 00775 pixWrite(filename.string(), pix, IFF_TIFF_G4); 00776 } 00777 } 00778 ASSERT_HOST(pr_word == word_count); 00779 return 0; 00780 } 00781 00793 PageIterator* TessBaseAPI::AnalyseLayout() { 00794 if (FindLines() == 0) { 00795 if (block_list_->empty()) 00796 return NULL; // The page was empty. 00797 page_res_ = new PAGE_RES(block_list_, NULL); 00798 DetectParagraphs(false); 00799 return new PageIterator( 00800 page_res_, tesseract_, thresholder_->GetScaleFactor(), 00801 thresholder_->GetScaledYResolution(), 00802 rect_left_, rect_top_, rect_width_, rect_height_); 00803 } 00804 return NULL; 00805 } 00806 00811 int TessBaseAPI::Recognize(ETEXT_DESC* monitor) { 00812 if (tesseract_ == NULL) 00813 return -1; 00814 if (FindLines() != 0) 00815 return -1; 00816 if (page_res_ != NULL) 00817 delete page_res_; 00818 if (block_list_->empty()) { 00819 page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_); 00820 return 0; // Empty page. 00821 } 00822 00823 tesseract_->SetBlackAndWhitelist(); 00824 recognition_done_ = true; 00825 if (tesseract_->tessedit_resegment_from_line_boxes) 00826 page_res_ = tesseract_->ApplyBoxes(*input_file_, true, block_list_); 00827 else if (tesseract_->tessedit_resegment_from_boxes) 00828 page_res_ = tesseract_->ApplyBoxes(*input_file_, false, block_list_); 00829 else 00830 page_res_ = new PAGE_RES(block_list_, &tesseract_->prev_word_best_choice_); 00831 if (tesseract_->tessedit_make_boxes_from_boxes) { 00832 tesseract_->CorrectClassifyWords(page_res_); 00833 return 0; 00834 } 00835 00836 if (truth_cb_ != NULL) { 00837 tesseract_->wordrec_run_blamer.set_value(true); 00838 PageIterator *page_it = new PageIterator( 00839 page_res_, tesseract_, thresholder_->GetScaleFactor(), 00840 thresholder_->GetScaledYResolution(), 00841 rect_left_, rect_top_, rect_width_, rect_height_); 00842 truth_cb_->Run(tesseract_->getDict().getUnicharset(), 00843 image_height_, page_it, this->tesseract()->pix_grey()); 00844 delete page_it; 00845 } 00846 00847 int result = 0; 00848 if (tesseract_->interactive_display_mode) { 00849 #ifndef GRAPHICS_DISABLED 00850 tesseract_->pgeditor_main(rect_width_, rect_height_, page_res_); 00851 #endif // GRAPHICS_DISABLED 00852 // The page_res is invalid after an interactive session, so cleanup 00853 // in a way that lets us continue to the next page without crashing. 00854 delete page_res_; 00855 page_res_ = NULL; 00856 return -1; 00857 } else if (tesseract_->tessedit_train_from_boxes) { 00858 tesseract_->ApplyBoxTraining(*output_file_, page_res_); 00859 } else if (tesseract_->tessedit_ambigs_training) { 00860 FILE *training_output_file = tesseract_->init_recog_training(*input_file_); 00861 // OCR the page segmented into words by tesseract. 00862 tesseract_->recog_training_segmented( 00863 *input_file_, page_res_, monitor, training_output_file); 00864 fclose(training_output_file); 00865 } else { 00866 // Now run the main recognition. 00867 bool wait_for_text = true; 00868 GetBoolVariable("paragraph_text_based", &wait_for_text); 00869 if (!wait_for_text) DetectParagraphs(false); 00870 if (tesseract_->recog_all_words(page_res_, monitor, NULL, NULL, 0)) { 00871 if (wait_for_text) DetectParagraphs(true); 00872 } else { 00873 result = -1; 00874 } 00875 } 00876 return result; 00877 } 00878 00880 int TessBaseAPI::RecognizeForChopTest(ETEXT_DESC* monitor) { 00881 if (tesseract_ == NULL) 00882 return -1; 00883 if (thresholder_ == NULL || thresholder_->IsEmpty()) { 00884 tprintf("Please call SetImage before attempting recognition."); 00885 return -1; 00886 } 00887 if (page_res_ != NULL) 00888 ClearResults(); 00889 if (FindLines() != 0) 00890 return -1; 00891 // Additional conditions under which chopper test cannot be run 00892 if (tesseract_->interactive_display_mode) return -1; 00893 00894 recognition_done_ = true; 00895 00896 page_res_ = new PAGE_RES(block_list_, &(tesseract_->prev_word_best_choice_)); 00897 00898 PAGE_RES_IT page_res_it(page_res_); 00899 00900 while (page_res_it.word() != NULL) { 00901 WERD_RES *word_res = page_res_it.word(); 00902 GenericVector<TBOX> boxes; 00903 tesseract_->MaximallyChopWord(boxes, page_res_it.block()->block, 00904 page_res_it.row()->row, word_res); 00905 page_res_it.forward(); 00906 } 00907 return 0; 00908 } 00909 00926 bool TessBaseAPI::ProcessPages(const char* filename, 00927 const char* retry_config, int timeout_millisec, 00928 STRING* text_out) { 00929 TessResultRenderer* renderer = NewRenderer(); 00930 00931 if (!ProcessPages(filename, retry_config, timeout_millisec, renderer)) { 00932 delete renderer; 00933 return false; 00934 } 00935 00936 const char* out_data; 00937 inT32 out_len; 00938 bool success = renderer->GetOutput(&out_data, &out_len); 00939 if (success) { 00940 // TODO(ewiseblatt): 20111103 00941 // if text_out->size() != out_len then we have binary data which STRING wont 00942 // support so this should fail. Really want to eliminate this interface 00943 // alltogether so not worrying about at this time. 00944 text_out->assign(out_data, out_len); 00945 } 00946 delete renderer; 00947 return success; 00948 } 00949 00950 void TessBaseAPI::SetInputImage(Pix *pix) { 00951 if (input_image_) 00952 pixDestroy(&input_image_); 00953 input_image_ = pixClone(pix); 00954 } 00955 00956 Pix* TessBaseAPI::GetInputImage() { 00957 return input_image_; 00958 } 00959 00960 const char * TessBaseAPI::GetInputName() { 00961 if (input_file_) 00962 return input_file_->c_str(); 00963 return NULL; 00964 } 00965 00966 const char * TessBaseAPI::GetDatapath() { 00967 return tesseract_->datadir.c_str(); 00968 } 00969 00970 int TessBaseAPI::GetSourceYResolution() { 00971 return thresholder_->GetSourceYResolution(); 00972 } 00973 00974 bool TessBaseAPI::ProcessPages(const char* filename, 00975 const char* retry_config, int timeout_millisec, 00976 TessResultRenderer* renderer) { 00977 PERF_COUNT_START("ProcessPages") 00978 int page = tesseract_->tessedit_page_number; 00979 if (page < 0) 00980 page = 0; 00981 FILE* fp = fopen(filename, "rb"); 00982 if (fp == NULL) { 00983 tprintf("Image file %s cannot be opened!\n", filename); 00984 return false; 00985 } 00986 // Find the number of pages if a tiff file, or zero otherwise. 00987 int npages = 0; 00988 int format; 00989 Pix *pix; 00990 pix = pixRead(filename); 00991 format = pixGetInputFormat(pix); 00992 if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS || 00993 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 || 00994 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW || 00995 format == IFF_TIFF_ZIP) 00996 tiffGetCount(fp, &npages); 00997 fclose(fp); 00998 00999 bool success = true; 01000 const char* kUnknownTitle = ""; 01001 if (renderer && !renderer->BeginDocument(kUnknownTitle)) { 01002 success = false; 01003 } 01004 01005 #ifdef USE_OPENCL 01006 OpenclDevice od; 01007 #endif 01008 01009 if (npages > 0) { 01010 pixDestroy(&pix); 01011 for (; page < npages; ++page) { 01012 // only use opencl if compiled w/ OpenCL and selected device is opencl 01013 #ifdef USE_OPENCL 01014 if ( od.selectedDeviceIsOpenCL() ) { 01015 pix = od.pixReadTiffCl(filename, page); 01016 } else { 01017 #endif 01018 pix = pixReadTiff(filename, page); 01019 #ifdef USE_OPENCL 01020 } 01021 #endif 01022 01023 if (pix == NULL) break; 01024 01025 if ((page >= 0) && (npages > 1)) 01026 tprintf("Page %d of %d\n", page + 1, npages); 01027 char page_str[kMaxIntSize]; 01028 snprintf(page_str, kMaxIntSize - 1, "%d", page); 01029 SetVariable("applybox_page", page_str); 01030 success &= ProcessPage(pix, page, filename, retry_config, 01031 timeout_millisec, renderer); 01032 pixDestroy(&pix); 01033 if (tesseract_->tessedit_page_number >= 0 || npages == 1) { 01034 break; 01035 } 01036 } 01037 } else { 01038 // The file is not a tiff file. 01039 if (pix != NULL) { 01040 success &= ProcessPage(pix, 0, filename, retry_config, 01041 timeout_millisec, renderer); 01042 pixDestroy(&pix); 01043 } else { 01044 // The file is not an image file, so try it as a list of filenames. 01045 FILE* fimg = fopen(filename, "rb"); 01046 if (fimg == NULL) { 01047 tprintf("File %s cannot be opened!\n", filename); 01048 return false; 01049 } 01050 tprintf("Reading %s as a list of filenames...\n", filename); 01051 char pagename[MAX_PATH]; 01052 // Skip to the requested page number. 01053 for (int i = 0; i < page && 01054 fgets(pagename, sizeof(pagename), fimg) != NULL; 01055 ++i); 01056 while (fgets(pagename, sizeof(pagename), fimg) != NULL) { 01057 chomp_string(pagename); 01058 pix = pixRead(pagename); 01059 if (pix == NULL) { 01060 tprintf("Image file %s cannot be read!\n", pagename); 01061 fclose(fimg); 01062 return false; 01063 } 01064 tprintf("Page %d : %s\n", page, pagename); 01065 success &= ProcessPage(pix, page, pagename, retry_config, 01066 timeout_millisec, renderer); 01067 pixDestroy(&pix); 01068 ++page; 01069 } 01070 fclose(fimg); 01071 } 01072 } 01073 01074 bool all_ok = success; 01075 if (renderer && !renderer->EndDocument()) { 01076 all_ok = false; 01077 } 01078 PERF_COUNT_END 01079 return all_ok; 01080 } 01081 01093 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, 01094 const char* retry_config, int timeout_millisec, 01095 STRING* text_out) { 01096 TessResultRenderer* renderer = NewRenderer(); 01097 01098 if (!ProcessPage(pix, page_index, filename, retry_config, timeout_millisec, 01099 renderer)) { 01100 return false; 01101 } 01102 01103 const char* out_data; 01104 inT32 out_len; 01105 if (!renderer->GetOutput(&out_data, &out_len)) { 01106 return false; 01107 } 01108 01109 // TODO(ewiseblatt): 20111103 01110 // if text_out->size() != out_len then we have binary data which STRING wont 01111 // support so this should fail. Really want to eliminate this interface 01112 // alltogether so not worrying about at this time. 01113 text_out->assign(out_data, out_len); 01114 01115 return true; 01116 } 01117 01129 bool TessBaseAPI::ProcessPage(Pix* pix, int page_index, const char* filename, 01130 const char* retry_config, int timeout_millisec, 01131 TessResultRenderer* renderer) { 01132 PERF_COUNT_START("ProcessPage") 01133 SetInputName(filename); 01134 SetImage(pix); 01135 SetInputImage(pix); 01136 bool failed = false; 01137 if (timeout_millisec > 0) { 01138 // Running with a timeout. 01139 ETEXT_DESC monitor; 01140 monitor.cancel = NULL; 01141 monitor.cancel_this = NULL; 01142 monitor.set_deadline_msecs(timeout_millisec); 01143 // Now run the main recognition. 01144 failed = Recognize(&monitor) < 0; 01145 } else if (tesseract_->tessedit_pageseg_mode == PSM_OSD_ONLY || 01146 tesseract_->tessedit_pageseg_mode == PSM_AUTO_ONLY) { 01147 // Disabled character recognition. 01148 PageIterator* it = AnalyseLayout(); 01149 if (it == NULL) { 01150 failed = true; 01151 } else { 01152 delete it; 01153 PERF_COUNT_END 01154 return true; 01155 } 01156 } else { 01157 // Normal layout and character recognition with no timeout. 01158 failed = Recognize(NULL) < 0; 01159 } 01160 if (tesseract_->tessedit_write_images) { 01161 Pix* page_pix = GetThresholdedImage(); 01162 pixWrite("tessinput.tif", page_pix, IFF_TIFF_G4); 01163 } 01164 if (failed && retry_config != NULL && retry_config[0] != '\0') { 01165 // Save current config variables before switching modes. 01166 FILE* fp = fopen(kOldVarsFile, "wb"); 01167 PrintVariables(fp); 01168 fclose(fp); 01169 // Switch to alternate mode for retry. 01170 ReadConfigFile(retry_config); 01171 SetImage(pix); 01172 Recognize(NULL); 01173 // Restore saved config variables. 01174 ReadConfigFile(kOldVarsFile); 01175 } 01176 01177 if (renderer) { 01178 if (failed) { 01179 renderer->AddError(this); 01180 } else { 01181 failed = !renderer->AddImage(this); 01182 } 01183 } 01184 PERF_COUNT_END 01185 return !failed; 01186 } 01187 01192 LTRResultIterator* TessBaseAPI::GetLTRIterator() { 01193 if (tesseract_ == NULL || page_res_ == NULL) 01194 return NULL; 01195 return new LTRResultIterator( 01196 page_res_, tesseract_, 01197 thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), 01198 rect_left_, rect_top_, rect_width_, rect_height_); 01199 } 01200 01209 ResultIterator* TessBaseAPI::GetIterator() { 01210 if (tesseract_ == NULL || page_res_ == NULL) 01211 return NULL; 01212 return ResultIterator::StartOfParagraph(LTRResultIterator( 01213 page_res_, tesseract_, 01214 thresholder_->GetScaleFactor(), thresholder_->GetScaledYResolution(), 01215 rect_left_, rect_top_, rect_width_, rect_height_)); 01216 } 01217 01226 MutableIterator* TessBaseAPI::GetMutableIterator() { 01227 if (tesseract_ == NULL || page_res_ == NULL) 01228 return NULL; 01229 return new MutableIterator(page_res_, tesseract_, 01230 thresholder_->GetScaleFactor(), 01231 thresholder_->GetScaledYResolution(), 01232 rect_left_, rect_top_, rect_width_, rect_height_); 01233 } 01234 01236 char* TessBaseAPI::GetUTF8Text() { 01237 if (tesseract_ == NULL || 01238 (!recognition_done_ && Recognize(NULL) < 0)) 01239 return NULL; 01240 STRING text(""); 01241 ResultIterator *it = GetIterator(); 01242 do { 01243 if (it->Empty(RIL_PARA)) continue; 01244 char *para_text = it->GetUTF8Text(RIL_PARA); 01245 text += para_text; 01246 delete []para_text; 01247 } while (it->Next(RIL_PARA)); 01248 char* result = new char[text.length() + 1]; 01249 strncpy(result, text.string(), text.length() + 1); 01250 delete it; 01251 return result; 01252 } 01253 01257 static tesseract::Orientation GetBlockTextOrientation(const PageIterator *it) { 01258 tesseract::Orientation orientation; 01259 tesseract::WritingDirection writing_direction; 01260 tesseract::TextlineOrder textline_order; 01261 float deskew_angle; 01262 it->Orientation(&orientation, &writing_direction, &textline_order, 01263 &deskew_angle); 01264 return orientation; 01265 } 01266 01275 static void AddBaselineCoordsTohOCR(const PageIterator *it, 01276 PageIteratorLevel level, 01277 STRING* hocr_str) { 01278 tesseract::Orientation orientation = GetBlockTextOrientation(it); 01279 if (orientation != ORIENTATION_PAGE_UP) { 01280 hocr_str->add_str_int("; textangle ", 360 - orientation * 90); 01281 return; 01282 } 01283 01284 int left, top, right, bottom; 01285 it->BoundingBox(level, &left, &top, &right, &bottom); 01286 01287 // Try to get the baseline coordinates at this level. 01288 int x1, y1, x2, y2; 01289 if (!it->Baseline(level, &x1, &y1, &x2, &y2)) 01290 return; 01291 // Following the description of this field of the hOCR spec, we convert the 01292 // baseline coordinates so that "the bottom left of the bounding box is the 01293 // origin". 01294 x1 -= left; 01295 x2 -= left; 01296 y1 -= bottom; 01297 y2 -= bottom; 01298 01299 // Now fit a line through the points so we can extract coefficients for the 01300 // equation: y = p1 x + p0 01301 double p1 = 0; 01302 double p0 = 0; 01303 if (x1 == x2) { 01304 // Problem computing the polynomial coefficients. 01305 return; 01306 } 01307 p1 = (y2 - y1) / static_cast<double>(x2 - x1); 01308 p0 = y1 - static_cast<double>(p1 * x1); 01309 01310 hocr_str->add_str_double("; baseline ", round(p1 * 1000.0) / 1000.0); 01311 hocr_str->add_str_double(" ", round(p0 * 1000.0) / 1000.0); 01312 } 01313 01314 static void AddBoxTohOCR(const PageIterator *it, 01315 PageIteratorLevel level, 01316 STRING* hocr_str) { 01317 int left, top, right, bottom; 01318 it->BoundingBox(level, &left, &top, &right, &bottom); 01319 hocr_str->add_str_int("' title=\"bbox ", left); 01320 hocr_str->add_str_int(" ", top); 01321 hocr_str->add_str_int(" ", right); 01322 hocr_str->add_str_int(" ", bottom); 01323 // Add baseline coordinates for textlines only. 01324 if (level == RIL_TEXTLINE) 01325 AddBaselineCoordsTohOCR(it, level, hocr_str); 01326 *hocr_str += "\">"; 01327 } 01328 01337 char* TessBaseAPI::GetHOCRText(int page_number) { 01338 if (tesseract_ == NULL || 01339 (page_res_ == NULL && Recognize(NULL) < 0)) 01340 return NULL; 01341 01342 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1; 01343 int page_id = page_number + 1; // hOCR uses 1-based page numbers. 01344 01345 STRING hocr_str(""); 01346 01347 if (input_file_ == NULL) 01348 SetInputName(NULL); 01349 01350 #ifdef _WIN32 01351 // convert input name from ANSI encoding to utf-8 01352 int str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, 01353 NULL, NULL); 01354 wchar_t *uni16_str = new WCHAR[str16_len]; 01355 str16_len = MultiByteToWideChar(CP_ACP, 0, input_file_->string(), -1, 01356 uni16_str, str16_len); 01357 int utf8_len = WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, NULL, 01358 NULL, NULL, NULL); 01359 char *utf8_str = new char[utf8_len]; 01360 WideCharToMultiByte(CP_UTF8, 0, uni16_str, str16_len, utf8_str, 01361 utf8_len, NULL, NULL); 01362 *input_file_ = utf8_str; 01363 delete[] uni16_str; 01364 delete[] utf8_str; 01365 #endif 01366 01367 hocr_str.add_str_int(" <div class='ocr_page' id='page_", page_id); 01368 hocr_str += "' title='image \""; 01369 hocr_str += input_file_ ? *input_file_ : "unknown"; 01370 hocr_str.add_str_int("\"; bbox ", rect_left_); 01371 hocr_str.add_str_int(" ", rect_top_); 01372 hocr_str.add_str_int(" ", rect_width_); 01373 hocr_str.add_str_int(" ", rect_height_); 01374 hocr_str.add_str_int("; ppageno ", page_number); 01375 hocr_str += "'>\n"; 01376 01377 ResultIterator *res_it = GetIterator(); 01378 while (!res_it->Empty(RIL_BLOCK)) { 01379 if (res_it->Empty(RIL_WORD)) { 01380 res_it->Next(RIL_WORD); 01381 continue; 01382 } 01383 01384 // Open any new block/paragraph/textline. 01385 if (res_it->IsAtBeginningOf(RIL_BLOCK)) { 01386 hocr_str.add_str_int(" <div class='ocr_carea' id='block_", page_id); 01387 hocr_str.add_str_int("_", bcnt); 01388 AddBoxTohOCR(res_it, RIL_BLOCK, &hocr_str); 01389 } 01390 if (res_it->IsAtBeginningOf(RIL_PARA)) { 01391 if (res_it->ParagraphIsLtr()) { 01392 hocr_str.add_str_int("\n <p class='ocr_par' dir='ltr' id='par_", 01393 page_id); 01394 hocr_str.add_str_int("_", pcnt); 01395 } else { 01396 hocr_str.add_str_int("\n <p class='ocr_par' dir='rtl' id='par_", 01397 page_id); 01398 hocr_str.add_str_int("_", pcnt); 01399 } 01400 AddBoxTohOCR(res_it, RIL_PARA, &hocr_str); 01401 } 01402 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) { 01403 hocr_str.add_str_int("\n <span class='ocr_line' id='line_", page_id); 01404 hocr_str.add_str_int("_", lcnt); 01405 AddBoxTohOCR(res_it, RIL_TEXTLINE, &hocr_str); 01406 } 01407 01408 // Now, process the word... 01409 hocr_str.add_str_int("<span class='ocrx_word' id='word_", page_id); 01410 hocr_str.add_str_int("_", wcnt); 01411 int left, top, right, bottom; 01412 res_it->BoundingBox(RIL_WORD, &left, &top, &right, &bottom); 01413 hocr_str.add_str_int("' title='bbox ", left); 01414 hocr_str.add_str_int(" ", top); 01415 hocr_str.add_str_int(" ", right); 01416 hocr_str.add_str_int(" ", bottom); 01417 hocr_str.add_str_int("; x_wconf ", res_it->Confidence(RIL_WORD)); 01418 hocr_str += "'"; 01419 if (res_it->WordRecognitionLanguage()) { 01420 hocr_str += " lang='"; 01421 hocr_str += res_it->WordRecognitionLanguage(); 01422 hocr_str += "'"; 01423 } 01424 switch (res_it->WordDirection()) { 01425 case DIR_LEFT_TO_RIGHT: hocr_str += " dir='ltr'"; break; 01426 case DIR_RIGHT_TO_LEFT: hocr_str += " dir='rtl'"; break; 01427 default: // Do nothing. 01428 break; 01429 } 01430 hocr_str += ">"; 01431 const char *font_name; 01432 bool bold, italic, underlined, monospace, serif, smallcaps; 01433 int pointsize, font_id; 01434 font_name = res_it->WordFontAttributes(&bold, &italic, &underlined, 01435 &monospace, &serif, &smallcaps, 01436 &pointsize, &font_id); 01437 bool last_word_in_line = res_it->IsAtFinalElement(RIL_TEXTLINE, RIL_WORD); 01438 bool last_word_in_para = res_it->IsAtFinalElement(RIL_PARA, RIL_WORD); 01439 bool last_word_in_block = res_it->IsAtFinalElement(RIL_BLOCK, RIL_WORD); 01440 if (bold) hocr_str += "<strong>"; 01441 if (italic) hocr_str += "<em>"; 01442 do { 01443 const char *grapheme = res_it->GetUTF8Text(RIL_SYMBOL); 01444 if (grapheme && grapheme[0] != 0) { 01445 if (grapheme[1] == 0) { 01446 switch (grapheme[0]) { 01447 case '<': hocr_str += "<"; break; 01448 case '>': hocr_str += ">"; break; 01449 case '&': hocr_str += "&"; break; 01450 case '"': hocr_str += """; break; 01451 case '\'': hocr_str += "'"; break; 01452 default: hocr_str += grapheme; 01453 } 01454 } else { 01455 hocr_str += grapheme; 01456 } 01457 } 01458 delete []grapheme; 01459 res_it->Next(RIL_SYMBOL); 01460 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD)); 01461 if (italic) hocr_str += "</em>"; 01462 if (bold) hocr_str += "</strong>"; 01463 hocr_str += "</span> "; 01464 wcnt++; 01465 // Close any ending block/paragraph/textline. 01466 if (last_word_in_line) { 01467 hocr_str += "\n </span>"; 01468 lcnt++; 01469 } 01470 if (last_word_in_para) { 01471 hocr_str += "\n </p>\n"; 01472 pcnt++; 01473 } 01474 if (last_word_in_block) { 01475 hocr_str += " </div>\n"; 01476 bcnt++; 01477 } 01478 } 01479 hocr_str += " </div>\n"; 01480 01481 char *ret = new char[hocr_str.length() + 1]; 01482 strcpy(ret, hocr_str.string()); 01483 delete res_it; 01484 return ret; 01485 } 01486 01488 const int kNumbersPerBlob = 5; 01493 const int kBytesPerNumber = 5; 01499 const int kBytesPerBlob = kNumbersPerBlob * (kBytesPerNumber + 1) + 1; 01500 const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1; 01502 const int kBytesPer64BitNumber = 20; 01509 const int kMaxBytesPerLine = kNumbersPerBlob * (kBytesPer64BitNumber + 1) + 1 + 01510 UNICHAR_LEN; 01511 01517 char* TessBaseAPI::GetBoxText(int page_number) { 01518 if (tesseract_ == NULL || 01519 (!recognition_done_ && Recognize(NULL) < 0)) 01520 return NULL; 01521 int blob_count; 01522 int utf8_length = TextLength(&blob_count); 01523 int total_length = blob_count * kBytesPerBoxFileLine + utf8_length + 01524 kMaxBytesPerLine; 01525 char* result = new char[total_length]; 01526 strcpy(result, "\0"); 01527 int output_length = 0; 01528 LTRResultIterator* it = GetLTRIterator(); 01529 do { 01530 int left, top, right, bottom; 01531 if (it->BoundingBox(RIL_SYMBOL, &left, &top, &right, &bottom)) { 01532 char* text = it->GetUTF8Text(RIL_SYMBOL); 01533 // Tesseract uses space for recognition failure. Fix to a reject 01534 // character, kTesseractReject so we don't create illegal box files. 01535 for (int i = 0; text[i] != '\0'; ++i) { 01536 if (text[i] == ' ') 01537 text[i] = kTesseractReject; 01538 } 01539 snprintf(result + output_length, total_length - output_length, 01540 "%s %d %d %d %d %d\n", 01541 text, left, image_height_ - bottom, 01542 right, image_height_ - top, page_number); 01543 output_length += strlen(result + output_length); 01544 delete [] text; 01545 // Just in case... 01546 if (output_length + kMaxBytesPerLine > total_length) 01547 break; 01548 } 01549 } while (it->Next(RIL_SYMBOL)); 01550 delete it; 01551 return result; 01552 } 01553 01559 const int kUniChs[] = { 01560 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0 01561 }; 01563 const int kLatinChs[] = { 01564 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0 01565 }; 01566 01572 char* TessBaseAPI::GetUNLVText() { 01573 if (tesseract_ == NULL || 01574 (!recognition_done_ && Recognize(NULL) < 0)) 01575 return NULL; 01576 bool tilde_crunch_written = false; 01577 bool last_char_was_newline = true; 01578 bool last_char_was_tilde = false; 01579 01580 int total_length = TextLength(NULL); 01581 PAGE_RES_IT page_res_it(page_res_); 01582 char* result = new char[total_length]; 01583 char* ptr = result; 01584 for (page_res_it.restart_page(); page_res_it.word () != NULL; 01585 page_res_it.forward()) { 01586 WERD_RES *word = page_res_it.word(); 01587 // Process the current word. 01588 if (word->unlv_crunch_mode != CR_NONE) { 01589 if (word->unlv_crunch_mode != CR_DELETE && 01590 (!tilde_crunch_written || 01591 (word->unlv_crunch_mode == CR_KEEP_SPACE && 01592 word->word->space() > 0 && 01593 !word->word->flag(W_FUZZY_NON) && 01594 !word->word->flag(W_FUZZY_SP)))) { 01595 if (!word->word->flag(W_BOL) && 01596 word->word->space() > 0 && 01597 !word->word->flag(W_FUZZY_NON) && 01598 !word->word->flag(W_FUZZY_SP)) { 01599 /* Write a space to separate from preceeding good text */ 01600 *ptr++ = ' '; 01601 last_char_was_tilde = false; 01602 } 01603 if (!last_char_was_tilde) { 01604 // Write a reject char. 01605 last_char_was_tilde = true; 01606 *ptr++ = kUNLVReject; 01607 tilde_crunch_written = true; 01608 last_char_was_newline = false; 01609 } 01610 } 01611 } else { 01612 // NORMAL PROCESSING of non tilde crunched words. 01613 tilde_crunch_written = false; 01614 tesseract_->set_unlv_suspects(word); 01615 const char* wordstr = word->best_choice->unichar_string().string(); 01616 const STRING& lengths = word->best_choice->unichar_lengths(); 01617 int length = lengths.length(); 01618 int i = 0; 01619 int offset = 0; 01620 01621 if (last_char_was_tilde && 01622 word->word->space() == 0 && wordstr[offset] == ' ') { 01623 // Prevent adjacent tilde across words - we know that adjacent tildes 01624 // within words have been removed. 01625 // Skip the first character. 01626 offset = lengths[i++]; 01627 } 01628 if (i < length && wordstr[offset] != 0) { 01629 if (!last_char_was_newline) 01630 *ptr++ = ' '; 01631 else 01632 last_char_was_newline = false; 01633 for (; i < length; offset += lengths[i++]) { 01634 if (wordstr[offset] == ' ' || 01635 wordstr[offset] == kTesseractReject) { 01636 *ptr++ = kUNLVReject; 01637 last_char_was_tilde = true; 01638 } else { 01639 if (word->reject_map[i].rejected()) 01640 *ptr++ = kUNLVSuspect; 01641 UNICHAR ch(wordstr + offset, lengths[i]); 01642 int uni_ch = ch.first_uni(); 01643 for (int j = 0; kUniChs[j] != 0; ++j) { 01644 if (kUniChs[j] == uni_ch) { 01645 uni_ch = kLatinChs[j]; 01646 break; 01647 } 01648 } 01649 if (uni_ch <= 0xff) { 01650 *ptr++ = static_cast<char>(uni_ch); 01651 last_char_was_tilde = false; 01652 } else { 01653 *ptr++ = kUNLVReject; 01654 last_char_was_tilde = true; 01655 } 01656 } 01657 } 01658 } 01659 } 01660 if (word->word->flag(W_EOL) && !last_char_was_newline) { 01661 /* Add a new line output */ 01662 *ptr++ = '\n'; 01663 tilde_crunch_written = false; 01664 last_char_was_newline = true; 01665 last_char_was_tilde = false; 01666 } 01667 } 01668 *ptr++ = '\n'; 01669 *ptr = '\0'; 01670 return result; 01671 } 01672 01674 int TessBaseAPI::MeanTextConf() { 01675 int* conf = AllWordConfidences(); 01676 if (!conf) return 0; 01677 int sum = 0; 01678 int *pt = conf; 01679 while (*pt >= 0) sum += *pt++; 01680 if (pt != conf) sum /= pt - conf; 01681 delete [] conf; 01682 return sum; 01683 } 01684 01686 int* TessBaseAPI::AllWordConfidences() { 01687 if (tesseract_ == NULL || 01688 (!recognition_done_ && Recognize(NULL) < 0)) 01689 return NULL; 01690 int n_word = 0; 01691 PAGE_RES_IT res_it(page_res_); 01692 for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) 01693 n_word++; 01694 01695 int* conf = new int[n_word+1]; 01696 n_word = 0; 01697 for (res_it.restart_page(); res_it.word() != NULL; res_it.forward()) { 01698 WERD_RES *word = res_it.word(); 01699 WERD_CHOICE* choice = word->best_choice; 01700 int w_conf = static_cast<int>(100 + 5 * choice->certainty()); 01701 // This is the eq for converting Tesseract confidence to 1..100 01702 if (w_conf < 0) w_conf = 0; 01703 if (w_conf > 100) w_conf = 100; 01704 conf[n_word++] = w_conf; 01705 } 01706 conf[n_word] = -1; 01707 return conf; 01708 } 01709 01720 bool TessBaseAPI::AdaptToWordStr(PageSegMode mode, const char* wordstr) { 01721 int debug = 0; 01722 GetIntVariable("applybox_debug", &debug); 01723 bool success = true; 01724 PageSegMode current_psm = GetPageSegMode(); 01725 SetPageSegMode(mode); 01726 SetVariable("classify_enable_learning", "0"); 01727 char* text = GetUTF8Text(); 01728 if (debug) { 01729 tprintf("Trying to adapt \"%s\" to \"%s\"\n", text, wordstr); 01730 } 01731 if (text != NULL) { 01732 PAGE_RES_IT it(page_res_); 01733 WERD_RES* word_res = it.word(); 01734 if (word_res != NULL) { 01735 word_res->word->set_text(wordstr); 01736 } else { 01737 success = false; 01738 } 01739 // Check to see if text matches wordstr. 01740 int w = 0; 01741 int t = 0; 01742 for (t = 0; text[t] != '\0'; ++t) { 01743 if (text[t] == '\n' || text[t] == ' ') 01744 continue; 01745 while (wordstr[w] != '\0' && wordstr[w] == ' ') 01746 ++w; 01747 if (text[t] != wordstr[w]) 01748 break; 01749 ++w; 01750 } 01751 if (text[t] != '\0' || wordstr[w] != '\0') { 01752 // No match. 01753 delete page_res_; 01754 GenericVector<TBOX> boxes; 01755 page_res_ = tesseract_->SetupApplyBoxes(boxes, block_list_); 01756 tesseract_->ReSegmentByClassification(page_res_); 01757 tesseract_->TidyUp(page_res_); 01758 PAGE_RES_IT pr_it(page_res_); 01759 if (pr_it.word() == NULL) 01760 success = false; 01761 else 01762 word_res = pr_it.word(); 01763 } else { 01764 word_res->BestChoiceToCorrectText(); 01765 } 01766 if (success) { 01767 tesseract_->EnableLearning = true; 01768 tesseract_->LearnWord(NULL, word_res); 01769 } 01770 delete [] text; 01771 } else { 01772 success = false; 01773 } 01774 SetPageSegMode(current_psm); 01775 return success; 01776 } 01777 01784 void TessBaseAPI::Clear() { 01785 if (thresholder_ != NULL) 01786 thresholder_->Clear(); 01787 ClearResults(); 01788 } 01789 01796 void TessBaseAPI::End() { 01797 if (thresholder_ != NULL) { 01798 delete thresholder_; 01799 thresholder_ = NULL; 01800 } 01801 if (page_res_ != NULL) { 01802 delete page_res_; 01803 page_res_ = NULL; 01804 } 01805 if (block_list_ != NULL) { 01806 delete block_list_; 01807 block_list_ = NULL; 01808 } 01809 if (paragraph_models_ != NULL) { 01810 paragraph_models_->delete_data_pointers(); 01811 delete paragraph_models_; 01812 paragraph_models_ = NULL; 01813 } 01814 if (tesseract_ != NULL) { 01815 delete tesseract_; 01816 if (osd_tesseract_ == tesseract_) 01817 osd_tesseract_ = NULL; 01818 tesseract_ = NULL; 01819 } 01820 if (osd_tesseract_ != NULL) { 01821 delete osd_tesseract_; 01822 osd_tesseract_ = NULL; 01823 } 01824 if (equ_detect_ != NULL) { 01825 delete equ_detect_; 01826 equ_detect_ = NULL; 01827 } 01828 if (input_file_ != NULL) { 01829 delete input_file_; 01830 input_file_ = NULL; 01831 } 01832 if (output_file_ != NULL) { 01833 delete output_file_; 01834 output_file_ = NULL; 01835 } 01836 if (datapath_ != NULL) { 01837 delete datapath_; 01838 datapath_ = NULL; 01839 } 01840 if (language_ != NULL) { 01841 delete language_; 01842 language_ = NULL; 01843 } 01844 } 01845 01846 // Clear any library-level memory caches. 01847 // There are a variety of expensive-to-load constant data structures (mostly 01848 // language dictionaries) that are cached globally -- surviving the Init() 01849 // and End() of individual TessBaseAPI's. This function allows the clearing 01850 // of these caches. 01851 void TessBaseAPI::ClearPersistentCache() { 01852 Dict::GlobalDawgCache()->DeleteUnusedDawgs(); 01853 } 01854 01859 int TessBaseAPI::IsValidWord(const char *word) { 01860 return tesseract_->getDict().valid_word(word); 01861 } 01862 01863 01864 // TODO(rays) Obsolete this function and replace with a more aptly named 01865 // function that returns image coordinates rather than tesseract coordinates. 01866 bool TessBaseAPI::GetTextDirection(int* out_offset, float* out_slope) { 01867 PageIterator* it = AnalyseLayout(); 01868 if (it == NULL) { 01869 return false; 01870 } 01871 int x1, x2, y1, y2; 01872 it->Baseline(RIL_TEXTLINE, &x1, &y1, &x2, &y2); 01873 // Calculate offset and slope (NOTE: Kind of ugly) 01874 if (x2 <= x1) x2 = x1 + 1; 01875 // Convert the point pair to slope/offset of the baseline (in image coords.) 01876 *out_slope = static_cast<float>(y2 - y1) / (x2 - x1); 01877 *out_offset = static_cast<int>(y1 - *out_slope * x1); 01878 // Get the y-coord of the baseline at the left and right edges of the 01879 // textline's bounding box. 01880 int left, top, right, bottom; 01881 if (!it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom)) 01882 return false; 01883 int left_y = IntCastRounded(*out_slope * left + *out_offset); 01884 int right_y = IntCastRounded(*out_slope * right + *out_offset); 01885 // Shift the baseline down so it passes through the nearest bottom-corner 01886 // of the textline's bounding box. This is the difference between the y 01887 // at the lowest (max) edge of the box and the actual box bottom. 01888 *out_offset += bottom - MAX(left_y, right_y); 01889 // Switch back to bottom-up tesseract coordinates. Requires negation of 01890 // the slope and height - offset for the offset. 01891 *out_slope = -*out_slope; 01892 *out_offset = rect_height_ - *out_offset; 01893 delete it; 01894 01895 return true; 01896 } 01897 01899 void TessBaseAPI::SetDictFunc(DictFunc f) { 01900 if (tesseract_ != NULL) { 01901 tesseract_->getDict().letter_is_okay_ = f; 01902 } 01903 } 01904 01909 void TessBaseAPI::SetProbabilityInContextFunc(ProbabilityInContextFunc f) { 01910 if (tesseract_ != NULL) { 01911 tesseract_->getDict().probability_in_context_ = f; 01912 // Set it for the sublangs too. 01913 int num_subs = tesseract_->num_sub_langs(); 01914 for (int i = 0; i < num_subs; ++i) { 01915 tesseract_->get_sub_lang(i)->getDict().probability_in_context_ = f; 01916 } 01917 } 01918 } 01919 01921 void TessBaseAPI::SetFillLatticeFunc(FillLatticeFunc f) { 01922 if (tesseract_ != NULL) tesseract_->fill_lattice_ = f; 01923 } 01924 01926 bool TessBaseAPI::InternalSetImage() { 01927 if (tesseract_ == NULL) { 01928 tprintf("Please call Init before attempting to send an image."); 01929 return false; 01930 } 01931 if (thresholder_ == NULL) 01932 thresholder_ = new ImageThresholder; 01933 ClearResults(); 01934 return true; 01935 } 01936 01943 void TessBaseAPI::Threshold(Pix** pix) { 01944 ASSERT_HOST(pix != NULL); 01945 if (*pix != NULL) 01946 pixDestroy(pix); 01947 // Zero resolution messes up the algorithms, so make sure it is credible. 01948 int y_res = thresholder_->GetScaledYResolution(); 01949 if (y_res < kMinCredibleResolution || y_res > kMaxCredibleResolution) { 01950 // Use the minimum default resolution, as it is safer to under-estimate 01951 // than over-estimate resolution. 01952 thresholder_->SetSourceYResolution(kMinCredibleResolution); 01953 } 01954 thresholder_->ThresholdToPix(pix); 01955 thresholder_->GetImageSizes(&rect_left_, &rect_top_, 01956 &rect_width_, &rect_height_, 01957 &image_width_, &image_height_); 01958 if (!thresholder_->IsBinary()) { 01959 tesseract_->set_pix_thresholds(thresholder_->GetPixRectThresholds()); 01960 tesseract_->set_pix_grey(thresholder_->GetPixRectGrey()); 01961 } else { 01962 tesseract_->set_pix_thresholds(NULL); 01963 tesseract_->set_pix_grey(NULL); 01964 } 01965 // Set the internal resolution that is used for layout parameters from the 01966 // estimated resolution, rather than the image resolution, which may be 01967 // fabricated, but we will use the image resolution, if there is one, to 01968 // report output point sizes. 01969 int estimated_res = ClipToRange(thresholder_->GetScaledEstimatedResolution(), 01970 kMinCredibleResolution, 01971 kMaxCredibleResolution); 01972 if (estimated_res != thresholder_->GetScaledEstimatedResolution()) { 01973 tprintf("Estimated resolution %d out of range! Corrected to %d\n", 01974 thresholder_->GetScaledEstimatedResolution(), estimated_res); 01975 } 01976 tesseract_->set_source_resolution(estimated_res); 01977 SavePixForCrash(estimated_res, *pix); 01978 } 01979 01981 int TessBaseAPI::FindLines() { 01982 if (thresholder_ == NULL || thresholder_->IsEmpty()) { 01983 tprintf("Please call SetImage before attempting recognition."); 01984 return -1; 01985 } 01986 if (recognition_done_) 01987 ClearResults(); 01988 if (!block_list_->empty()) { 01989 return 0; 01990 } 01991 if (tesseract_ == NULL) { 01992 tesseract_ = new Tesseract; 01993 tesseract_->InitAdaptiveClassifier(false); 01994 } 01995 if (tesseract_->pix_binary() == NULL) 01996 Threshold(tesseract_->mutable_pix_binary()); 01997 if (tesseract_->ImageWidth() > MAX_INT16 || 01998 tesseract_->ImageHeight() > MAX_INT16) { 01999 tprintf("Image too large: (%d, %d)\n", 02000 tesseract_->ImageWidth(), tesseract_->ImageHeight()); 02001 return -1; 02002 } 02003 02004 tesseract_->PrepareForPageseg(); 02005 02006 if (tesseract_->textord_equation_detect) { 02007 if (equ_detect_ == NULL && datapath_ != NULL) { 02008 equ_detect_ = new EquationDetect(datapath_->string(), NULL); 02009 } 02010 tesseract_->SetEquationDetect(equ_detect_); 02011 } 02012 02013 Tesseract* osd_tess = osd_tesseract_; 02014 OSResults osr; 02015 if (PSM_OSD_ENABLED(tesseract_->tessedit_pageseg_mode) && osd_tess == NULL) { 02016 if (strcmp(language_->string(), "osd") == 0) { 02017 osd_tess = tesseract_; 02018 } else { 02019 osd_tesseract_ = new Tesseract; 02020 if (osd_tesseract_->init_tesseract( 02021 datapath_->string(), NULL, "osd", OEM_TESSERACT_ONLY, 02022 NULL, 0, NULL, NULL, false) == 0) { 02023 osd_tess = osd_tesseract_; 02024 osd_tesseract_->set_source_resolution( 02025 thresholder_->GetSourceYResolution()); 02026 } else { 02027 tprintf("Warning: Auto orientation and script detection requested," 02028 " but osd language failed to load\n"); 02029 delete osd_tesseract_; 02030 osd_tesseract_ = NULL; 02031 } 02032 } 02033 } 02034 02035 if (tesseract_->SegmentPage(input_file_, block_list_, osd_tess, &osr) < 0) 02036 return -1; 02037 // If Devanagari is being recognized, we use different images for page seg 02038 // and for OCR. 02039 tesseract_->PrepareForTessOCR(block_list_, osd_tess, &osr); 02040 return 0; 02041 } 02042 02044 void TessBaseAPI::ClearResults() { 02045 if (tesseract_ != NULL) { 02046 tesseract_->Clear(); 02047 } 02048 if (page_res_ != NULL) { 02049 delete page_res_; 02050 page_res_ = NULL; 02051 } 02052 recognition_done_ = false; 02053 if (block_list_ == NULL) 02054 block_list_ = new BLOCK_LIST; 02055 else 02056 block_list_->clear(); 02057 if (paragraph_models_ != NULL) { 02058 paragraph_models_->delete_data_pointers(); 02059 delete paragraph_models_; 02060 paragraph_models_ = NULL; 02061 } 02062 SavePixForCrash(0, NULL); 02063 } 02064 02072 int TessBaseAPI::TextLength(int* blob_count) { 02073 if (tesseract_ == NULL || page_res_ == NULL) 02074 return 0; 02075 02076 PAGE_RES_IT page_res_it(page_res_); 02077 int total_length = 2; 02078 int total_blobs = 0; 02079 // Iterate over the data structures to extract the recognition result. 02080 for (page_res_it.restart_page(); page_res_it.word () != NULL; 02081 page_res_it.forward()) { 02082 WERD_RES *word = page_res_it.word(); 02083 WERD_CHOICE* choice = word->best_choice; 02084 if (choice != NULL) { 02085 total_blobs += choice->length() + 2; 02086 total_length += choice->unichar_string().length() + 2; 02087 for (int i = 0; i < word->reject_map.length(); ++i) { 02088 if (word->reject_map[i].rejected()) 02089 ++total_length; 02090 } 02091 } 02092 } 02093 if (blob_count != NULL) 02094 *blob_count = total_blobs; 02095 return total_length; 02096 } 02097 02102 bool TessBaseAPI::DetectOS(OSResults* osr) { 02103 if (tesseract_ == NULL) 02104 return false; 02105 ClearResults(); 02106 if (tesseract_->pix_binary() == NULL) 02107 Threshold(tesseract_->mutable_pix_binary()); 02108 if (input_file_ == NULL) 02109 input_file_ = new STRING(kInputFile); 02110 return orientation_and_script_detection(*input_file_, osr, tesseract_); 02111 } 02112 02113 void TessBaseAPI::set_min_orientation_margin(double margin) { 02114 tesseract_->min_orientation_margin.set_value(margin); 02115 } 02116 02131 void TessBaseAPI::GetBlockTextOrientations(int** block_orientation, 02132 bool** vertical_writing) { 02133 delete[] *block_orientation; 02134 *block_orientation = NULL; 02135 delete[] *vertical_writing; 02136 *vertical_writing = NULL; 02137 BLOCK_IT block_it(block_list_); 02138 02139 block_it.move_to_first(); 02140 int num_blocks = 0; 02141 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { 02142 if (!block_it.data()->poly_block()->IsText()) { 02143 continue; 02144 } 02145 ++num_blocks; 02146 } 02147 if (!num_blocks) { 02148 tprintf("WARNING: Found no blocks\n"); 02149 return; 02150 } 02151 *block_orientation = new int[num_blocks]; 02152 *vertical_writing = new bool[num_blocks]; 02153 block_it.move_to_first(); 02154 int i = 0; 02155 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); 02156 block_it.forward()) { 02157 if (!block_it.data()->poly_block()->IsText()) { 02158 continue; 02159 } 02160 FCOORD re_rotation = block_it.data()->re_rotation(); 02161 float re_theta = re_rotation.angle(); 02162 FCOORD classify_rotation = block_it.data()->classify_rotation(); 02163 float classify_theta = classify_rotation.angle(); 02164 double rot_theta = - (re_theta - classify_theta) * 2.0 / PI; 02165 if (rot_theta < 0) rot_theta += 4; 02166 int num_rotations = static_cast<int>(rot_theta + 0.5); 02167 (*block_orientation)[i] = num_rotations; 02168 // The classify_rotation is non-zero only if the text has vertical 02169 // writing direction. 02170 (*vertical_writing)[i] = classify_rotation.y() != 0.0f; 02171 ++i; 02172 } 02173 } 02174 02175 // ____________________________________________________________________________ 02176 // Ocropus add-ons. 02177 02179 BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() { 02180 FindLines(); 02181 BLOCK_LIST* result = block_list_; 02182 block_list_ = NULL; 02183 return result; 02184 } 02185 02191 void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) { 02192 delete block_list; 02193 } 02194 02195 02196 ROW *TessBaseAPI::MakeTessOCRRow(float baseline, 02197 float xheight, 02198 float descender, 02199 float ascender) { 02200 inT32 xstarts[] = {-32000}; 02201 double quad_coeffs[] = {0, 0, baseline}; 02202 return new ROW(1, 02203 xstarts, 02204 quad_coeffs, 02205 xheight, 02206 ascender - (baseline + xheight), 02207 descender - baseline, 02208 0, 02209 0); 02210 } 02211 02213 TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) { 02214 int width = pixGetWidth(pix); 02215 int height = pixGetHeight(pix); 02216 BLOCK block("a character", TRUE, 0, 0, 0, 0, width, height); 02217 02218 // Create C_BLOBs from the page 02219 extract_edges(pix, &block); 02220 02221 // Merge all C_BLOBs 02222 C_BLOB_LIST *list = block.blob_list(); 02223 C_BLOB_IT c_blob_it(list); 02224 if (c_blob_it.empty()) 02225 return NULL; 02226 // Move all the outlines to the first blob. 02227 C_OUTLINE_IT ol_it(c_blob_it.data()->out_list()); 02228 for (c_blob_it.forward(); 02229 !c_blob_it.at_first(); 02230 c_blob_it.forward()) { 02231 C_BLOB *c_blob = c_blob_it.data(); 02232 ol_it.add_list_after(c_blob->out_list()); 02233 } 02234 // Convert the first blob to the output TBLOB. 02235 return TBLOB::PolygonalCopy(false, c_blob_it.data()); 02236 } 02237 02243 void TessBaseAPI::NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode) { 02244 TBOX box = tblob->bounding_box(); 02245 float x_center = (box.left() + box.right()) / 2.0f; 02246 float baseline = row->base_line(x_center); 02247 float scale = kBlnXHeight / row->x_height(); 02248 tblob->Normalize(NULL, NULL, NULL, x_center, baseline, scale, scale, 02249 0.0f, static_cast<float>(kBlnBaselineOffset), false, NULL); 02250 } 02251 02256 TBLOB *make_tesseract_blob(float baseline, float xheight, 02257 float descender, float ascender, 02258 bool numeric_mode, Pix* pix) { 02259 TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix); 02260 02261 // Normalize TBLOB 02262 ROW *row = 02263 TessBaseAPI::MakeTessOCRRow(baseline, xheight, descender, ascender); 02264 TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode); 02265 delete row; 02266 return tblob; 02267 } 02268 02274 void TessBaseAPI::AdaptToCharacter(const char *unichar_repr, 02275 int length, 02276 float baseline, 02277 float xheight, 02278 float descender, 02279 float ascender) { 02280 UNICHAR_ID id = tesseract_->unicharset.unichar_to_id(unichar_repr, length); 02281 TBLOB *blob = make_tesseract_blob(baseline, xheight, descender, ascender, 02282 tesseract_->classify_bln_numeric_mode, 02283 tesseract_->pix_binary()); 02284 float threshold; 02285 float best_rating = -100; 02286 02287 02288 // Classify to get a raw choice. 02289 BLOB_CHOICE_LIST choices; 02290 tesseract_->AdaptiveClassifier(blob, &choices); 02291 BLOB_CHOICE_IT choice_it; 02292 choice_it.set_to_list(&choices); 02293 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list(); 02294 choice_it.forward()) { 02295 if (choice_it.data()->rating() > best_rating) { 02296 best_rating = choice_it.data()->rating(); 02297 } 02298 } 02299 02300 threshold = tesseract_->matcher_good_threshold; 02301 02302 if (blob->outlines) 02303 tesseract_->AdaptToChar(blob, id, kUnknownFontinfoId, threshold); 02304 delete blob; 02305 } 02306 02307 02308 PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) { 02309 PAGE_RES *page_res = new PAGE_RES(block_list, 02310 &(tesseract_->prev_word_best_choice_)); 02311 tesseract_->recog_all_words(page_res, NULL, NULL, NULL, 1); 02312 return page_res; 02313 } 02314 02315 PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list, 02316 PAGE_RES* pass1_result) { 02317 if (!pass1_result) 02318 pass1_result = new PAGE_RES(block_list, 02319 &(tesseract_->prev_word_best_choice_)); 02320 tesseract_->recog_all_words(pass1_result, NULL, NULL, NULL, 2); 02321 return pass1_result; 02322 } 02323 02324 void TessBaseAPI::DetectParagraphs(bool after_text_recognition) { 02325 int debug_level = 0; 02326 GetIntVariable("paragraph_debug_level", &debug_level); 02327 if (paragraph_models_ == NULL) 02328 paragraph_models_ = new GenericVector<ParagraphModel*>; 02329 MutableIterator *result_it = GetMutableIterator(); 02330 do { // Detect paragraphs for this block 02331 GenericVector<ParagraphModel *> models; 02332 ::tesseract::DetectParagraphs(debug_level, after_text_recognition, 02333 result_it, &models); 02334 *paragraph_models_ += models; 02335 } while (result_it->Next(RIL_BLOCK)); 02336 delete result_it; 02337 } 02338 02339 struct TESS_CHAR : ELIST_LINK { 02340 char *unicode_repr; 02341 int length; // of unicode_repr 02342 float cost; 02343 TBOX box; 02344 02345 TESS_CHAR(float _cost, const char *repr, int len = -1) : cost(_cost) { 02346 length = (len == -1 ? strlen(repr) : len); 02347 unicode_repr = new char[length + 1]; 02348 strncpy(unicode_repr, repr, length); 02349 } 02350 02351 TESS_CHAR() { // Satisfies ELISTIZE. 02352 } 02353 ~TESS_CHAR() { 02354 delete [] unicode_repr; 02355 } 02356 }; 02357 02358 ELISTIZEH(TESS_CHAR) 02359 ELISTIZE(TESS_CHAR) 02360 02361 static void add_space(TESS_CHAR_IT* it) { 02362 TESS_CHAR *t = new TESS_CHAR(0, " "); 02363 it->add_after_then_move(t); 02364 } 02365 02366 02367 static float rating_to_cost(float rating) { 02368 rating = 100 + rating; 02369 // cuddled that to save from coverage profiler 02370 // (I have never seen ratings worse than -100, 02371 // but the check won't hurt) 02372 if (rating < 0) rating = 0; 02373 return rating; 02374 } 02375 02380 static void extract_result(TESS_CHAR_IT* out, 02381 PAGE_RES* page_res) { 02382 PAGE_RES_IT page_res_it(page_res); 02383 int word_count = 0; 02384 while (page_res_it.word() != NULL) { 02385 WERD_RES *word = page_res_it.word(); 02386 const char *str = word->best_choice->unichar_string().string(); 02387 const char *len = word->best_choice->unichar_lengths().string(); 02388 TBOX real_rect = word->word->bounding_box(); 02389 02390 if (word_count) 02391 add_space(out); 02392 int n = strlen(len); 02393 for (int i = 0; i < n; i++) { 02394 TESS_CHAR *tc = new TESS_CHAR(rating_to_cost(word->best_choice->rating()), 02395 str, *len); 02396 tc->box = real_rect.intersection(word->box_word->BlobBox(i)); 02397 out->add_after_then_move(tc); 02398 str += *len; 02399 len++; 02400 } 02401 page_res_it.forward(); 02402 word_count++; 02403 } 02404 } 02405 02410 int TessBaseAPI::TesseractExtractResult(char** text, 02411 int** lengths, 02412 float** costs, 02413 int** x0, 02414 int** y0, 02415 int** x1, 02416 int** y1, 02417 PAGE_RES* page_res) { 02418 TESS_CHAR_LIST tess_chars; 02419 TESS_CHAR_IT tess_chars_it(&tess_chars); 02420 extract_result(&tess_chars_it, page_res); 02421 tess_chars_it.move_to_first(); 02422 int n = tess_chars.length(); 02423 int text_len = 0; 02424 *lengths = new int[n]; 02425 *costs = new float[n]; 02426 *x0 = new int[n]; 02427 *y0 = new int[n]; 02428 *x1 = new int[n]; 02429 *y1 = new int[n]; 02430 int i = 0; 02431 for (tess_chars_it.mark_cycle_pt(); 02432 !tess_chars_it.cycled_list(); 02433 tess_chars_it.forward(), i++) { 02434 TESS_CHAR *tc = tess_chars_it.data(); 02435 text_len += (*lengths)[i] = tc->length; 02436 (*costs)[i] = tc->cost; 02437 (*x0)[i] = tc->box.left(); 02438 (*y0)[i] = tc->box.bottom(); 02439 (*x1)[i] = tc->box.right(); 02440 (*y1)[i] = tc->box.top(); 02441 } 02442 char *p = *text = new char[text_len]; 02443 02444 tess_chars_it.move_to_first(); 02445 for (tess_chars_it.mark_cycle_pt(); 02446 !tess_chars_it.cycled_list(); 02447 tess_chars_it.forward()) { 02448 TESS_CHAR *tc = tess_chars_it.data(); 02449 strncpy(p, tc->unicode_repr, tc->length); 02450 p += tc->length; 02451 } 02452 return n; 02453 } 02454 02456 // The resulting features are returned in int_features, which must be 02457 // of size MAX_NUM_INT_FEATURES. The number of features is returned in 02458 // num_features (or 0 if there was a failure). 02459 // On return feature_outline_index is filled with an index of the outline 02460 // corresponding to each feature in int_features. 02461 // TODO(rays) Fix the caller to out outline_counts instead. 02462 void TessBaseAPI::GetFeaturesForBlob(TBLOB* blob, 02463 INT_FEATURE_STRUCT* int_features, 02464 int* num_features, 02465 int* feature_outline_index) { 02466 GenericVector<int> outline_counts; 02467 GenericVector<INT_FEATURE_STRUCT> bl_features; 02468 GenericVector<INT_FEATURE_STRUCT> cn_features; 02469 INT_FX_RESULT_STRUCT fx_info; 02470 tesseract_->ExtractFeatures(*blob, false, &bl_features, 02471 &cn_features, &fx_info, &outline_counts); 02472 if (cn_features.size() == 0 || cn_features.size() > MAX_NUM_INT_FEATURES) { 02473 *num_features = 0; 02474 return; // Feature extraction failed. 02475 } 02476 *num_features = cn_features.size(); 02477 memcpy(int_features, &cn_features[0], *num_features * sizeof(cn_features[0])); 02478 // TODO(rays) Pass outline_counts back and simplify the calling code. 02479 if (feature_outline_index != NULL) { 02480 int f = 0; 02481 for (int i = 0; i < outline_counts.size(); ++i) { 02482 while (f < outline_counts[i]) 02483 feature_outline_index[f++] = i; 02484 } 02485 } 02486 } 02487 02488 // This method returns the row to which a box of specified dimensions would 02489 // belong. If no good match is found, it returns NULL. 02490 ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks, 02491 int left, int top, int right, int bottom) { 02492 TBOX box(left, bottom, right, top); 02493 BLOCK_IT b_it(blocks); 02494 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 02495 BLOCK* block = b_it.data(); 02496 if (!box.major_overlap(block->bounding_box())) 02497 continue; 02498 ROW_IT r_it(block->row_list()); 02499 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { 02500 ROW* row = r_it.data(); 02501 if (!box.major_overlap(row->bounding_box())) 02502 continue; 02503 WERD_IT w_it(row->word_list()); 02504 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { 02505 WERD* word = w_it.data(); 02506 if (box.major_overlap(word->bounding_box())) 02507 return row; 02508 } 02509 } 02510 } 02511 return NULL; 02512 } 02513 02515 void TessBaseAPI::RunAdaptiveClassifier(TBLOB* blob, 02516 int num_max_matches, 02517 int* unichar_ids, 02518 float* ratings, 02519 int* num_matches_returned) { 02520 BLOB_CHOICE_LIST* choices = new BLOB_CHOICE_LIST; 02521 tesseract_->AdaptiveClassifier(blob, choices); 02522 BLOB_CHOICE_IT choices_it(choices); 02523 int& index = *num_matches_returned; 02524 index = 0; 02525 for (choices_it.mark_cycle_pt(); 02526 !choices_it.cycled_list() && index < num_max_matches; 02527 choices_it.forward()) { 02528 BLOB_CHOICE* choice = choices_it.data(); 02529 unichar_ids[index] = choice->unichar_id(); 02530 ratings[index] = choice->rating(); 02531 ++index; 02532 } 02533 *num_matches_returned = index; 02534 delete choices; 02535 } 02536 02538 const char* TessBaseAPI::GetUnichar(int unichar_id) { 02539 return tesseract_->unicharset.id_to_unichar(unichar_id); 02540 } 02541 02543 const Dawg *TessBaseAPI::GetDawg(int i) const { 02544 if (tesseract_ == NULL || i >= NumDawgs()) return NULL; 02545 return tesseract_->getDict().GetDawg(i); 02546 } 02547 02549 int TessBaseAPI::NumDawgs() const { 02550 return tesseract_ == NULL ? 0 : tesseract_->getDict().NumDawgs(); 02551 } 02552 02554 CubeRecoContext *TessBaseAPI::GetCubeRecoContext() const { 02555 return (tesseract_ == NULL) ? NULL : tesseract_->GetCubeRecoContext(); 02556 } 02557 02558 TessResultRenderer* TessBaseAPI::NewRenderer() { 02559 if (tesseract_->tessedit_create_boxfile 02560 || tesseract_->tessedit_make_boxes_from_boxes) { 02561 return new TessBoxTextRenderer(); 02562 } else if (tesseract_->tessedit_create_hocr) { 02563 return new TessHOcrRenderer(); 02564 } else if (tesseract_->tessedit_create_pdf) { 02565 return new TessPDFRenderer(tesseract_->datadir.c_str()); 02566 } else if (tesseract_->tessedit_write_unlv) { 02567 return new TessUnlvRenderer(); 02568 } else if (tesseract_->tessedit_create_boxfile) { 02569 return new TessBoxTextRenderer(); 02570 } else { 02571 return new TessTextRenderer(); 02572 } 02573 } 02574 } // namespace tesseract.