tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/matrix.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  *
00004  * File:        matrix.c  (Formerly matrix.c)
00005  * Description:  Ratings matrix code. (Used by associator)
00006  * Author:       Mark Seaman, OCR Technology
00007  * Created:      Wed May 16 13:18:47 1990
00008  * Modified:     Wed Mar 20 09:44:47 1991 (Mark Seaman) marks@hpgrlt
00009  * Language:     C
00010  * Package:      N/A
00011  * Status:       Experimental (Do Not Distribute)
00012  *
00013  * (c) Copyright 1990, Hewlett-Packard Company.
00014  ** Licensed under the Apache License, Version 2.0 (the "License");
00015  ** you may not use this file except in compliance with the License.
00016  ** You may obtain a copy of the License at
00017  ** http://www.apache.org/licenses/LICENSE-2.0
00018  ** Unless required by applicable law or agreed to in writing, software
00019  ** distributed under the License is distributed on an "AS IS" BASIS,
00020  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00021  ** See the License for the specific language governing permissions and
00022  ** limitations under the License.
00023  *
00024  *********************************************************************************/
00025 /*----------------------------------------------------------------------
00026               I n c l u d e s
00027 ----------------------------------------------------------------------*/
00028 #include "matrix.h"
00029 
00030 #include "callcpp.h"
00031 #include "ratngs.h"
00032 #include "tprintf.h"
00033 #include "unicharset.h"
00034 
00035 // Returns true if there are any real classification results.
00036 bool MATRIX::Classified(int col, int row, int wildcard_id) const {
00037   if (get(col, row) == NOT_CLASSIFIED) return false;
00038   BLOB_CHOICE_IT b_it(get(col, row));
00039   for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00040     BLOB_CHOICE* choice = b_it.data();
00041     if (choice->IsClassified())
00042       return true;
00043   }
00044   return false;
00045 }
00046 
00047 // Expands the existing matrix in-place to make the band wider, without
00048 // losing any existing data.
00049 void MATRIX::IncreaseBandSize(int bandwidth) {
00050   ResizeWithCopy(dimension(), bandwidth);
00051 }
00052 
00053 // Returns a bigger MATRIX with a new column and row in the matrix in order
00054 // to split the blob at the given (ind,ind) diagonal location.
00055 // Entries are relocated to the new MATRIX using the transformation defined
00056 // by MATRIX_COORD::MapForSplit.
00057 // Transfers the pointer data to the new MATRIX and deletes *this.
00058 MATRIX* MATRIX::ConsumeAndMakeBigger(int ind) {
00059   int dim = dimension();
00060   int band_width = bandwidth();
00061   // Check to see if bandwidth needs expanding.
00062   for (int col = ind; col >= 0 && col > ind - band_width; --col) {
00063     if (array_[col * band_width + band_width - 1] != empty_) {
00064       ++band_width;
00065       break;
00066     }
00067   }
00068   MATRIX* result = new MATRIX(dim + 1, band_width);
00069 
00070   for (int col = 0; col < dim; ++col) {
00071     for (int row = col; row < dim && row < col + bandwidth(); ++row) {
00072       MATRIX_COORD coord(col, row);
00073       coord.MapForSplit(ind);
00074       BLOB_CHOICE_LIST* choices = get(col, row);
00075       if (choices != NULL) {
00076         // Correct matrix location on each choice.
00077         BLOB_CHOICE_IT bc_it(choices);
00078         for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
00079           BLOB_CHOICE* choice = bc_it.data();
00080           choice->set_matrix_cell(coord.col, coord.row);
00081         }
00082         ASSERT_HOST(coord.Valid(*result));
00083         result->put(coord.col, coord.row, choices);
00084       }
00085     }
00086   }
00087   delete this;
00088   return result;
00089 }
00090 
00091 // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs
00092 // on the lists, but not any LanguageModelState that may be attached to the
00093 // BLOB_CHOICEs.
00094 MATRIX* MATRIX::DeepCopy() const {
00095   int dim = dimension();
00096   int band_width = bandwidth();
00097   MATRIX* result = new MATRIX(dim, band_width);
00098   for (int col = 0; col < dim; ++col) {
00099     for (int row = col; row < col + band_width; ++row) {
00100       BLOB_CHOICE_LIST* choices = get(col, row);
00101       if (choices != NULL) {
00102         BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST;
00103         choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy);
00104         result->put(col, row, copy_choices);
00105       }
00106     }
00107   }
00108   return result;
00109 }
00110 
00111 // Print the best guesses out of the match rating matrix.
00112 void MATRIX::print(const UNICHARSET &unicharset) const {
00113   tprintf("Ratings Matrix (top 3 choices)\n");
00114   int dim = dimension();
00115   int band_width = bandwidth();
00116   int row, col;
00117   for (col = 0; col < dim; ++col) {
00118     for (row = col; row < dim && row < col + band_width; ++row) {
00119       BLOB_CHOICE_LIST *rating = this->get(col, row);
00120       if (rating == NOT_CLASSIFIED) continue;
00121       BLOB_CHOICE_IT b_it(rating);
00122       tprintf("col=%d row=%d ", col, row);
00123       for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00124         tprintf("%s rat=%g cert=%g " ,
00125                 unicharset.id_to_unichar(b_it.data()->unichar_id()),
00126                 b_it.data()->rating(), b_it.data()->certainty());
00127       }
00128       tprintf("\n");
00129     }
00130     tprintf("\n");
00131   }
00132   tprintf("\n");
00133   for (col = 0; col < dim; ++col) tprintf("\t%d", col);
00134   tprintf("\n");
00135   for (row = 0; row < dim; ++row) {
00136     for (col = 0; col <= row; ++col) {
00137       if (col == 0) tprintf("%d\t", row);
00138       if (row >= col + band_width) {
00139         tprintf(" \t");
00140         continue;
00141       }
00142       BLOB_CHOICE_LIST *rating = this->get(col, row);
00143       if (rating != NOT_CLASSIFIED) {
00144         BLOB_CHOICE_IT b_it(rating);
00145         int counter = 0;
00146         for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
00147           tprintf("%s ",
00148                   unicharset.id_to_unichar(b_it.data()->unichar_id()));
00149           ++counter;
00150           if (counter == 3) break;
00151         }
00152         tprintf("\t");
00153       } else {
00154         tprintf(" \t");
00155       }
00156     }
00157     tprintf("\n");
00158   }
00159 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines