tesseract
3.03
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * 00004 * File: matrix.c (Formerly matrix.c) 00005 * Description: Ratings matrix code. (Used by associator) 00006 * Author: Mark Seaman, OCR Technology 00007 * Created: Wed May 16 13:18:47 1990 00008 * Modified: Wed Mar 20 09:44:47 1991 (Mark Seaman) marks@hpgrlt 00009 * Language: C 00010 * Package: N/A 00011 * Status: Experimental (Do Not Distribute) 00012 * 00013 * (c) Copyright 1990, Hewlett-Packard Company. 00014 ** Licensed under the Apache License, Version 2.0 (the "License"); 00015 ** you may not use this file except in compliance with the License. 00016 ** You may obtain a copy of the License at 00017 ** http://www.apache.org/licenses/LICENSE-2.0 00018 ** Unless required by applicable law or agreed to in writing, software 00019 ** distributed under the License is distributed on an "AS IS" BASIS, 00020 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00021 ** See the License for the specific language governing permissions and 00022 ** limitations under the License. 00023 * 00024 *********************************************************************************/ 00025 /*---------------------------------------------------------------------- 00026 I n c l u d e s 00027 ----------------------------------------------------------------------*/ 00028 #include "matrix.h" 00029 00030 #include "callcpp.h" 00031 #include "ratngs.h" 00032 #include "tprintf.h" 00033 #include "unicharset.h" 00034 00035 // Returns true if there are any real classification results. 00036 bool MATRIX::Classified(int col, int row, int wildcard_id) const { 00037 if (get(col, row) == NOT_CLASSIFIED) return false; 00038 BLOB_CHOICE_IT b_it(get(col, row)); 00039 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00040 BLOB_CHOICE* choice = b_it.data(); 00041 if (choice->IsClassified()) 00042 return true; 00043 } 00044 return false; 00045 } 00046 00047 // Expands the existing matrix in-place to make the band wider, without 00048 // losing any existing data. 00049 void MATRIX::IncreaseBandSize(int bandwidth) { 00050 ResizeWithCopy(dimension(), bandwidth); 00051 } 00052 00053 // Returns a bigger MATRIX with a new column and row in the matrix in order 00054 // to split the blob at the given (ind,ind) diagonal location. 00055 // Entries are relocated to the new MATRIX using the transformation defined 00056 // by MATRIX_COORD::MapForSplit. 00057 // Transfers the pointer data to the new MATRIX and deletes *this. 00058 MATRIX* MATRIX::ConsumeAndMakeBigger(int ind) { 00059 int dim = dimension(); 00060 int band_width = bandwidth(); 00061 // Check to see if bandwidth needs expanding. 00062 for (int col = ind; col >= 0 && col > ind - band_width; --col) { 00063 if (array_[col * band_width + band_width - 1] != empty_) { 00064 ++band_width; 00065 break; 00066 } 00067 } 00068 MATRIX* result = new MATRIX(dim + 1, band_width); 00069 00070 for (int col = 0; col < dim; ++col) { 00071 for (int row = col; row < dim && row < col + bandwidth(); ++row) { 00072 MATRIX_COORD coord(col, row); 00073 coord.MapForSplit(ind); 00074 BLOB_CHOICE_LIST* choices = get(col, row); 00075 if (choices != NULL) { 00076 // Correct matrix location on each choice. 00077 BLOB_CHOICE_IT bc_it(choices); 00078 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) { 00079 BLOB_CHOICE* choice = bc_it.data(); 00080 choice->set_matrix_cell(coord.col, coord.row); 00081 } 00082 ASSERT_HOST(coord.Valid(*result)); 00083 result->put(coord.col, coord.row, choices); 00084 } 00085 } 00086 } 00087 delete this; 00088 return result; 00089 } 00090 00091 // Makes and returns a deep copy of *this, including all the BLOB_CHOICEs 00092 // on the lists, but not any LanguageModelState that may be attached to the 00093 // BLOB_CHOICEs. 00094 MATRIX* MATRIX::DeepCopy() const { 00095 int dim = dimension(); 00096 int band_width = bandwidth(); 00097 MATRIX* result = new MATRIX(dim, band_width); 00098 for (int col = 0; col < dim; ++col) { 00099 for (int row = col; row < col + band_width; ++row) { 00100 BLOB_CHOICE_LIST* choices = get(col, row); 00101 if (choices != NULL) { 00102 BLOB_CHOICE_LIST* copy_choices = new BLOB_CHOICE_LIST; 00103 choices->deep_copy(copy_choices, &BLOB_CHOICE::deep_copy); 00104 result->put(col, row, copy_choices); 00105 } 00106 } 00107 } 00108 return result; 00109 } 00110 00111 // Print the best guesses out of the match rating matrix. 00112 void MATRIX::print(const UNICHARSET &unicharset) const { 00113 tprintf("Ratings Matrix (top 3 choices)\n"); 00114 int dim = dimension(); 00115 int band_width = bandwidth(); 00116 int row, col; 00117 for (col = 0; col < dim; ++col) { 00118 for (row = col; row < dim && row < col + band_width; ++row) { 00119 BLOB_CHOICE_LIST *rating = this->get(col, row); 00120 if (rating == NOT_CLASSIFIED) continue; 00121 BLOB_CHOICE_IT b_it(rating); 00122 tprintf("col=%d row=%d ", col, row); 00123 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00124 tprintf("%s rat=%g cert=%g " , 00125 unicharset.id_to_unichar(b_it.data()->unichar_id()), 00126 b_it.data()->rating(), b_it.data()->certainty()); 00127 } 00128 tprintf("\n"); 00129 } 00130 tprintf("\n"); 00131 } 00132 tprintf("\n"); 00133 for (col = 0; col < dim; ++col) tprintf("\t%d", col); 00134 tprintf("\n"); 00135 for (row = 0; row < dim; ++row) { 00136 for (col = 0; col <= row; ++col) { 00137 if (col == 0) tprintf("%d\t", row); 00138 if (row >= col + band_width) { 00139 tprintf(" \t"); 00140 continue; 00141 } 00142 BLOB_CHOICE_LIST *rating = this->get(col, row); 00143 if (rating != NOT_CLASSIFIED) { 00144 BLOB_CHOICE_IT b_it(rating); 00145 int counter = 0; 00146 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { 00147 tprintf("%s ", 00148 unicharset.id_to_unichar(b_it.data()->unichar_id())); 00149 ++counter; 00150 if (counter == 3) break; 00151 } 00152 tprintf("\t"); 00153 } else { 00154 tprintf(" \t"); 00155 } 00156 } 00157 tprintf("\n"); 00158 } 00159 }