tesseract
3.03
|
00001 /********************************************************************** 00002 * File: rejctmap.cpp (Formerly rejmap.c) 00003 * Description: REJ and REJMAP class functions. 00004 * Author: Phil Cheatle 00005 * Created: Thu Jun 9 13:46:38 BST 1994 00006 * 00007 * (C) Copyright 1994, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "host.h" 00021 #include "rejctmap.h" 00022 #include "secname.h" 00023 #include "params.h" 00024 00025 BOOL8 REJ::perm_rejected() { //Is char perm reject? 00026 return (flag (R_TESS_FAILURE) || 00027 flag (R_SMALL_XHT) || 00028 flag (R_EDGE_CHAR) || 00029 flag (R_1IL_CONFLICT) || 00030 flag (R_POSTNN_1IL) || 00031 flag (R_REJ_CBLOB) || 00032 flag (R_BAD_REPETITION) || flag (R_MM_REJECT)); 00033 } 00034 00035 00036 BOOL8 REJ::rej_before_nn_accept() { 00037 return flag (R_POOR_MATCH) || 00038 flag (R_NOT_TESS_ACCEPTED) || 00039 flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER); 00040 } 00041 00042 00043 BOOL8 REJ::rej_between_nn_and_mm() { 00044 return flag (R_HYPHEN) || 00045 flag (R_DUBIOUS) || 00046 flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP); 00047 } 00048 00049 00050 BOOL8 REJ::rej_between_mm_and_quality_accept() { 00051 return flag (R_BAD_QUALITY); 00052 } 00053 00054 00055 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() { 00056 return flag (R_DOC_REJ) || 00057 flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ); 00058 } 00059 00060 00061 BOOL8 REJ::rej_before_mm_accept() { 00062 return rej_between_nn_and_mm () || 00063 (rej_before_nn_accept () && 00064 !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT)); 00065 } 00066 00067 00068 BOOL8 REJ::rej_before_quality_accept() { 00069 return rej_between_mm_and_quality_accept () || 00070 (!flag (R_MM_ACCEPT) && rej_before_mm_accept ()); 00071 } 00072 00073 00074 BOOL8 REJ::rejected() { //Is char rejected? 00075 if (flag (R_MINIMAL_REJ_ACCEPT)) 00076 return FALSE; 00077 else 00078 return (perm_rejected () || 00079 rej_between_quality_and_minimal_rej_accept () || 00080 (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ())); 00081 } 00082 00083 00084 BOOL8 REJ::accept_if_good_quality() { //potential rej? 00085 return (rejected () && 00086 !perm_rejected () && 00087 flag (R_BAD_PERMUTER) && 00088 !flag (R_POOR_MATCH) && 00089 !flag (R_NOT_TESS_ACCEPTED) && 00090 !flag (R_CONTAINS_BLANKS) && 00091 (!rej_between_nn_and_mm () && 00092 !rej_between_mm_and_quality_accept () && 00093 !rej_between_quality_and_minimal_rej_accept ())); 00094 } 00095 00096 00097 void REJ::setrej_tess_failure() { //Tess generated blank 00098 set_flag(R_TESS_FAILURE); 00099 } 00100 00101 00102 void REJ::setrej_small_xht() { //Small xht char/wd 00103 set_flag(R_SMALL_XHT); 00104 } 00105 00106 00107 void REJ::setrej_edge_char() { //Close to image edge 00108 set_flag(R_EDGE_CHAR); 00109 } 00110 00111 00112 void REJ::setrej_1Il_conflict() { //Initial reject map 00113 set_flag(R_1IL_CONFLICT); 00114 } 00115 00116 00117 void REJ::setrej_postNN_1Il() { //1Il after NN 00118 set_flag(R_POSTNN_1IL); 00119 } 00120 00121 00122 void REJ::setrej_rej_cblob() { //Insert duff blob 00123 set_flag(R_REJ_CBLOB); 00124 } 00125 00126 00127 void REJ::setrej_mm_reject() { //Matrix matcher 00128 set_flag(R_MM_REJECT); 00129 } 00130 00131 00132 void REJ::setrej_bad_repetition() { //Odd repeated char 00133 set_flag(R_BAD_REPETITION); 00134 } 00135 00136 00137 void REJ::setrej_poor_match() { //Failed Rays heuristic 00138 set_flag(R_POOR_MATCH); 00139 } 00140 00141 00142 void REJ::setrej_not_tess_accepted() { 00143 //TEMP reject_word 00144 set_flag(R_NOT_TESS_ACCEPTED); 00145 } 00146 00147 00148 void REJ::setrej_contains_blanks() { 00149 //TEMP reject_word 00150 set_flag(R_CONTAINS_BLANKS); 00151 } 00152 00153 00154 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word 00155 set_flag(R_BAD_PERMUTER); 00156 } 00157 00158 00159 void REJ::setrej_hyphen() { //PostNN dubious hyphen or . 00160 set_flag(R_HYPHEN); 00161 } 00162 00163 00164 void REJ::setrej_dubious() { //PostNN dubious limit 00165 set_flag(R_DUBIOUS); 00166 } 00167 00168 00169 void REJ::setrej_no_alphanums() { //TEMP reject_word 00170 set_flag(R_NO_ALPHANUMS); 00171 } 00172 00173 00174 void REJ::setrej_mostly_rej() { //TEMP reject_word 00175 set_flag(R_MOSTLY_REJ); 00176 } 00177 00178 00179 void REJ::setrej_xht_fixup() { //xht fixup 00180 set_flag(R_XHT_FIXUP); 00181 } 00182 00183 00184 void REJ::setrej_bad_quality() { //TEMP reject_word 00185 set_flag(R_BAD_QUALITY); 00186 } 00187 00188 00189 void REJ::setrej_doc_rej() { //TEMP reject_word 00190 set_flag(R_DOC_REJ); 00191 } 00192 00193 00194 void REJ::setrej_block_rej() { //TEMP reject_word 00195 set_flag(R_BLOCK_REJ); 00196 } 00197 00198 00199 void REJ::setrej_row_rej() { //TEMP reject_word 00200 set_flag(R_ROW_REJ); 00201 } 00202 00203 00204 void REJ::setrej_unlv_rej() { //TEMP reject_word 00205 set_flag(R_UNLV_REJ); 00206 } 00207 00208 00209 void REJ::setrej_hyphen_accept() { //NN Flipped a char 00210 set_flag(R_HYPHEN_ACCEPT); 00211 } 00212 00213 00214 void REJ::setrej_nn_accept() { //NN Flipped a char 00215 set_flag(R_NN_ACCEPT); 00216 } 00217 00218 00219 void REJ::setrej_mm_accept() { //Matrix matcher 00220 set_flag(R_MM_ACCEPT); 00221 } 00222 00223 00224 void REJ::setrej_quality_accept() { //Quality flip a char 00225 set_flag(R_QUALITY_ACCEPT); 00226 } 00227 00228 00229 void REJ::setrej_minimal_rej_accept() { 00230 //Accept all except blank 00231 set_flag(R_MINIMAL_REJ_ACCEPT); 00232 } 00233 00234 00235 void REJ::full_print(FILE *fp) { 00236 #ifndef SECURE_NAMES 00237 00238 fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F"); 00239 fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F"); 00240 fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F"); 00241 fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F"); 00242 fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F"); 00243 fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F"); 00244 fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F"); 00245 fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F"); 00246 fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F"); 00247 fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n", 00248 flag (R_NOT_TESS_ACCEPTED) ? "T" : "F"); 00249 fprintf (fp, "R_CONTAINS_BLANKS: %s\n", 00250 flag (R_CONTAINS_BLANKS) ? "T" : "F"); 00251 fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F"); 00252 fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F"); 00253 fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F"); 00254 fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F"); 00255 fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F"); 00256 fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F"); 00257 fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F"); 00258 fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F"); 00259 fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F"); 00260 fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F"); 00261 fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F"); 00262 fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F"); 00263 fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F"); 00264 fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F"); 00265 fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F"); 00266 fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n", 00267 flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F"); 00268 #endif 00269 } 00270 00271 00272 //The REJMAP class has been hacked to use alloc_struct instead of new []. 00273 //This is to reduce memory fragmentation only as it is rather kludgy. 00274 //alloc_struct by-passes the call to the contsructor of REJ on each 00275 //array element. Although the constructor is empty, the BITS16 members 00276 //do have a constructor which sets all the flags to 0. The memset 00277 //replaces this functionality. 00278 00279 REJMAP::REJMAP( //classwise copy 00280 const REJMAP &source) { 00281 REJ *to; 00282 REJ *from = source.ptr; 00283 int i; 00284 00285 len = source.length (); 00286 00287 if (len > 0) { 00288 ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ"); 00289 to = ptr; 00290 for (i = 0; i < len; i++) { 00291 *to = *from; 00292 to++; 00293 from++; 00294 } 00295 } 00296 else 00297 ptr = NULL; 00298 } 00299 00300 00301 REJMAP & REJMAP::operator= ( //assign REJMAP 00302 const REJMAP & source //from this 00303 ) { 00304 REJ * 00305 to; 00306 REJ * 00307 from = source.ptr; 00308 int 00309 i; 00310 00311 initialise (source.len); 00312 to = ptr; 00313 for (i = 0; i < len; i++) { 00314 *to = *from; 00315 to++; 00316 from++; 00317 } 00318 return *this; 00319 } 00320 00321 00322 void REJMAP::initialise( //Redefine map 00323 inT16 length) { 00324 if (ptr != NULL) 00325 free_struct (ptr, len * sizeof (REJ), "REJ"); 00326 len = length; 00327 if (len > 0) 00328 ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00329 0, len * sizeof (REJ)); 00330 else 00331 ptr = NULL; 00332 } 00333 00334 00335 inT16 REJMAP::accept_count() { //How many accepted? 00336 int i; 00337 inT16 count = 0; 00338 00339 for (i = 0; i < len; i++) { 00340 if (ptr[i].accepted ()) 00341 count++; 00342 } 00343 return count; 00344 } 00345 00346 00347 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs? 00348 int i; 00349 00350 for (i = 0; i < len; i++) { 00351 if (ptr[i].recoverable ()) 00352 return TRUE; 00353 } 00354 return FALSE; 00355 } 00356 00357 00358 BOOL8 REJMAP::quality_recoverable_rejects() { //Any potential rejs? 00359 int i; 00360 00361 for (i = 0; i < len; i++) { 00362 if (ptr[i].accept_if_good_quality ()) 00363 return TRUE; 00364 } 00365 return FALSE; 00366 } 00367 00368 00369 void REJMAP::remove_pos( //Cut out an element 00370 inT16 pos //element to remove 00371 ) { 00372 REJ *new_ptr; //new, smaller map 00373 int i; 00374 00375 ASSERT_HOST (pos >= 0); 00376 ASSERT_HOST (pos < len); 00377 ASSERT_HOST (len > 0); 00378 00379 len--; 00380 if (len > 0) 00381 new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"), 00382 0, len * sizeof (REJ)); 00383 else 00384 new_ptr = NULL; 00385 00386 for (i = 0; i < pos; i++) 00387 new_ptr[i] = ptr[i]; //copy pre pos 00388 00389 for (; pos < len; pos++) 00390 new_ptr[pos] = ptr[pos + 1]; //copy post pos 00391 00392 //delete old map 00393 free_struct (ptr, (len + 1) * sizeof (REJ), "REJ"); 00394 ptr = new_ptr; 00395 } 00396 00397 00398 void REJMAP::print(FILE *fp) { 00399 int i; 00400 char buff[512]; 00401 00402 for (i = 0; i < len; i++) { 00403 buff[i] = ptr[i].display_char (); 00404 } 00405 buff[i] = '\0'; 00406 fprintf (fp, "\"%s\"", buff); 00407 } 00408 00409 00410 void REJMAP::full_print(FILE *fp) { 00411 int i; 00412 00413 for (i = 0; i < len; i++) { 00414 ptr[i].full_print (fp); 00415 fprintf (fp, "\n"); 00416 } 00417 } 00418 00419 00420 void REJMAP::rej_word_small_xht() { //Reject whole word 00421 int i; 00422 00423 for (i = 0; i < len; i++) { 00424 ptr[i].setrej_small_xht (); 00425 } 00426 } 00427 00428 00429 void REJMAP::rej_word_tess_failure() { //Reject whole word 00430 int i; 00431 00432 for (i = 0; i < len; i++) { 00433 ptr[i].setrej_tess_failure (); 00434 } 00435 } 00436 00437 00438 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word 00439 int i; 00440 00441 for (i = 0; i < len; i++) { 00442 if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted(); 00443 } 00444 } 00445 00446 00447 void REJMAP::rej_word_contains_blanks() { //Reject whole word 00448 int i; 00449 00450 for (i = 0; i < len; i++) { 00451 if (ptr[i].accepted()) ptr[i].setrej_contains_blanks(); 00452 } 00453 } 00454 00455 00456 void REJMAP::rej_word_bad_permuter() { //Reject whole word 00457 int i; 00458 00459 for (i = 0; i < len; i++) { 00460 if (ptr[i].accepted()) ptr[i].setrej_bad_permuter (); 00461 } 00462 } 00463 00464 00465 void REJMAP::rej_word_xht_fixup() { //Reject whole word 00466 int i; 00467 00468 for (i = 0; i < len; i++) { 00469 if (ptr[i].accepted()) ptr[i].setrej_xht_fixup(); 00470 } 00471 } 00472 00473 00474 void REJMAP::rej_word_no_alphanums() { //Reject whole word 00475 int i; 00476 00477 for (i = 0; i < len; i++) { 00478 if (ptr[i].accepted()) ptr[i].setrej_no_alphanums(); 00479 } 00480 } 00481 00482 00483 void REJMAP::rej_word_mostly_rej() { //Reject whole word 00484 int i; 00485 00486 for (i = 0; i < len; i++) { 00487 if (ptr[i].accepted()) ptr[i].setrej_mostly_rej(); 00488 } 00489 } 00490 00491 00492 void REJMAP::rej_word_bad_quality() { //Reject whole word 00493 int i; 00494 00495 for (i = 0; i < len; i++) { 00496 if (ptr[i].accepted()) ptr[i].setrej_bad_quality(); 00497 } 00498 } 00499 00500 00501 void REJMAP::rej_word_doc_rej() { //Reject whole word 00502 int i; 00503 00504 for (i = 0; i < len; i++) { 00505 if (ptr[i].accepted()) ptr[i].setrej_doc_rej(); 00506 } 00507 } 00508 00509 00510 void REJMAP::rej_word_block_rej() { //Reject whole word 00511 int i; 00512 00513 for (i = 0; i < len; i++) { 00514 if (ptr[i].accepted()) ptr[i].setrej_block_rej(); 00515 } 00516 } 00517 00518 00519 void REJMAP::rej_word_row_rej() { //Reject whole word 00520 int i; 00521 00522 for (i = 0; i < len; i++) { 00523 if (ptr[i].accepted()) ptr[i].setrej_row_rej(); 00524 } 00525 }