tesseract
3.03
|
00001 // Copyright 2011 Google Inc. All Rights Reserved. 00002 // Author: rays@google.com (Ray Smith) 00004 // File: cubeclassifier.cpp 00005 // Description: Cube implementation of a ShapeClassifier. 00006 // Author: Ray Smith 00007 // Created: Wed Nov 23 10:39:45 PST 2011 00008 // 00009 // (C) Copyright 2011, Google Inc. 00010 // Licensed under the Apache License, Version 2.0 (the "License"); 00011 // you may not use this file except in compliance with the License. 00012 // You may obtain a copy of the License at 00013 // http://www.apache.org/licenses/LICENSE-2.0 00014 // Unless required by applicable law or agreed to in writing, software 00015 // distributed under the License is distributed on an "AS IS" BASIS, 00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00017 // See the License for the specific language governing permissions and 00018 // limitations under the License. 00019 // 00021 00022 #include "cubeclassifier.h" 00023 00024 #include "char_altlist.h" 00025 #include "char_set.h" 00026 #include "cube_object.h" 00027 #include "cube_reco_context.h" 00028 #include "tessclassifier.h" 00029 #include "tesseractclass.h" 00030 #include "trainingsample.h" 00031 #include "unicharset.h" 00032 00033 namespace tesseract { 00034 00035 CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract) 00036 : cube_cntxt_(tesseract->GetCubeRecoContext()), 00037 shape_table_(*tesseract->shape_table()) { 00038 } 00039 CubeClassifier::~CubeClassifier() { 00040 } 00041 00042 // Classifies the given [training] sample, writing to results. 00043 // See ShapeClassifier for a full description. 00044 int CubeClassifier::UnicharClassifySample( 00045 const TrainingSample& sample, Pix* page_pix, int debug, 00046 UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) { 00047 results->clear(); 00048 if (page_pix == NULL) return 0; 00049 00050 ASSERT_HOST(cube_cntxt_ != NULL); 00051 const TBOX& char_box = sample.bounding_box(); 00052 CubeObject* cube_obj = new tesseract::CubeObject( 00053 cube_cntxt_, page_pix, char_box.left(), 00054 pixGetHeight(page_pix) - char_box.top(), 00055 char_box.width(), char_box.height()); 00056 CharAltList* alt_list = cube_obj->RecognizeChar(); 00057 alt_list->Sort(); 00058 CharSet* char_set = cube_cntxt_->CharacterSet(); 00059 if (alt_list != NULL) { 00060 for (int i = 0; i < alt_list->AltCount(); ++i) { 00061 // Convert cube representation to a shape_id. 00062 int alt_id = alt_list->Alt(i); 00063 int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id)); 00064 if (unichar_id >= 0) 00065 results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i))); 00066 } 00067 delete alt_list; 00068 } 00069 delete cube_obj; 00070 return results->size(); 00071 } 00072 00073 // Provides access to the ShapeTable that this classifier works with. 00074 const ShapeTable* CubeClassifier::GetShapeTable() const { 00075 return &shape_table_; 00076 } 00077 00078 CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract) 00079 : cube_cntxt_(tesseract->GetCubeRecoContext()), 00080 shape_table_(*tesseract->shape_table()), 00081 pruner_(new TessClassifier(true, tesseract)) { 00082 } 00083 CubeTessClassifier::~CubeTessClassifier() { 00084 delete pruner_; 00085 } 00086 00087 // Classifies the given [training] sample, writing to results. 00088 // See ShapeClassifier for a full description. 00089 int CubeTessClassifier::UnicharClassifySample( 00090 const TrainingSample& sample, Pix* page_pix, int debug, 00091 UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) { 00092 int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug, 00093 keep_this, results); 00094 if (page_pix == NULL) return num_results; 00095 00096 ASSERT_HOST(cube_cntxt_ != NULL); 00097 const TBOX& char_box = sample.bounding_box(); 00098 CubeObject* cube_obj = new tesseract::CubeObject( 00099 cube_cntxt_, page_pix, char_box.left(), 00100 pixGetHeight(page_pix) - char_box.top(), 00101 char_box.width(), char_box.height()); 00102 CharAltList* alt_list = cube_obj->RecognizeChar(); 00103 CharSet* char_set = cube_cntxt_->CharacterSet(); 00104 if (alt_list != NULL) { 00105 for (int r = 0; r < num_results; ++r) { 00106 // Get the best cube probability of the unichar in the result. 00107 double best_prob = 0.0; 00108 for (int i = 0; i < alt_list->AltCount(); ++i) { 00109 int alt_id = alt_list->Alt(i); 00110 int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id)); 00111 if (unichar_id == (*results)[r].unichar_id && 00112 alt_list->AltProb(i) > best_prob) { 00113 best_prob = alt_list->AltProb(i); 00114 } 00115 } 00116 (*results)[r].rating = best_prob; 00117 } 00118 delete alt_list; 00119 // Re-sort by rating. 00120 results->sort(&UnicharRating::SortDescendingRating); 00121 } 00122 delete cube_obj; 00123 return results->size(); 00124 } 00125 00126 // Provides access to the ShapeTable that this classifier works with. 00127 const ShapeTable* CubeTessClassifier::GetShapeTable() const { 00128 return &shape_table_; 00129 } 00130 00131 } // namespace tesseract 00132 00133 00134