tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/cubeclassifier.cpp
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00004 // File:        cubeclassifier.cpp
00005 // Description: Cube implementation of a ShapeClassifier.
00006 // Author:      Ray Smith
00007 // Created:     Wed Nov 23 10:39:45 PST 2011
00008 //
00009 // (C) Copyright 2011, Google Inc.
00010 // Licensed under the Apache License, Version 2.0 (the "License");
00011 // you may not use this file except in compliance with the License.
00012 // You may obtain a copy of the License at
00013 // http://www.apache.org/licenses/LICENSE-2.0
00014 // Unless required by applicable law or agreed to in writing, software
00015 // distributed under the License is distributed on an "AS IS" BASIS,
00016 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00017 // See the License for the specific language governing permissions and
00018 // limitations under the License.
00019 //
00021 
00022 #include "cubeclassifier.h"
00023 
00024 #include "char_altlist.h"
00025 #include "char_set.h"
00026 #include "cube_object.h"
00027 #include "cube_reco_context.h"
00028 #include "tessclassifier.h"
00029 #include "tesseractclass.h"
00030 #include "trainingsample.h"
00031 #include "unicharset.h"
00032 
00033 namespace tesseract {
00034 
00035 CubeClassifier::CubeClassifier(tesseract::Tesseract* tesseract)
00036     : cube_cntxt_(tesseract->GetCubeRecoContext()),
00037       shape_table_(*tesseract->shape_table()) {
00038 }
00039 CubeClassifier::~CubeClassifier() {
00040 }
00041 
00042 // Classifies the given [training] sample, writing to results.
00043 // See ShapeClassifier for a full description.
00044 int CubeClassifier::UnicharClassifySample(
00045     const TrainingSample& sample, Pix* page_pix, int debug,
00046     UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
00047   results->clear();
00048   if (page_pix == NULL) return 0;
00049 
00050   ASSERT_HOST(cube_cntxt_ != NULL);
00051   const TBOX& char_box = sample.bounding_box();
00052   CubeObject* cube_obj = new tesseract::CubeObject(
00053       cube_cntxt_, page_pix, char_box.left(),
00054       pixGetHeight(page_pix) - char_box.top(),
00055       char_box.width(), char_box.height());
00056   CharAltList* alt_list = cube_obj->RecognizeChar();
00057   alt_list->Sort();
00058   CharSet* char_set = cube_cntxt_->CharacterSet();
00059   if (alt_list != NULL) {
00060     for (int i = 0; i < alt_list->AltCount(); ++i) {
00061       // Convert cube representation to a shape_id.
00062       int alt_id = alt_list->Alt(i);
00063       int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
00064       if (unichar_id >= 0)
00065         results->push_back(UnicharRating(unichar_id, alt_list->AltProb(i)));
00066     }
00067     delete alt_list;
00068   }
00069   delete cube_obj;
00070   return results->size();
00071 }
00072 
00073 // Provides access to the ShapeTable that this classifier works with.
00074 const ShapeTable* CubeClassifier::GetShapeTable() const {
00075   return &shape_table_;
00076 }
00077 
00078 CubeTessClassifier::CubeTessClassifier(tesseract::Tesseract* tesseract)
00079     : cube_cntxt_(tesseract->GetCubeRecoContext()),
00080       shape_table_(*tesseract->shape_table()),
00081       pruner_(new TessClassifier(true, tesseract)) {
00082 }
00083 CubeTessClassifier::~CubeTessClassifier() {
00084   delete pruner_;
00085 }
00086 
00087 // Classifies the given [training] sample, writing to results.
00088 // See ShapeClassifier for a full description.
00089 int CubeTessClassifier::UnicharClassifySample(
00090     const TrainingSample& sample, Pix* page_pix, int debug,
00091     UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
00092   int num_results = pruner_->UnicharClassifySample(sample, page_pix, debug,
00093                                                    keep_this, results);
00094   if (page_pix == NULL) return num_results;
00095 
00096   ASSERT_HOST(cube_cntxt_ != NULL);
00097   const TBOX& char_box = sample.bounding_box();
00098   CubeObject* cube_obj = new tesseract::CubeObject(
00099       cube_cntxt_, page_pix, char_box.left(),
00100       pixGetHeight(page_pix) - char_box.top(),
00101       char_box.width(), char_box.height());
00102   CharAltList* alt_list = cube_obj->RecognizeChar();
00103   CharSet* char_set = cube_cntxt_->CharacterSet();
00104   if (alt_list != NULL) {
00105     for (int r = 0; r < num_results; ++r) {
00106       // Get the best cube probability of the unichar in the result.
00107       double best_prob = 0.0;
00108       for (int i = 0; i < alt_list->AltCount(); ++i) {
00109         int alt_id = alt_list->Alt(i);
00110         int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
00111         if (unichar_id == (*results)[r].unichar_id &&
00112             alt_list->AltProb(i) > best_prob) {
00113           best_prob = alt_list->AltProb(i);
00114         }
00115       }
00116       (*results)[r].rating = best_prob;
00117     }
00118     delete alt_list;
00119     // Re-sort by rating.
00120     results->sort(&UnicharRating::SortDescendingRating);
00121   }
00122   delete cube_obj;
00123   return results->size();
00124 }
00125 
00126 // Provides access to the ShapeTable that this classifier works with.
00127 const ShapeTable* CubeTessClassifier::GetShapeTable() const {
00128   return &shape_table_;
00129 }
00130 
00131 }  // namespace tesseract
00132 
00133 
00134 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines