tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/rejctmap.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        rejctmap.cpp  (Formerly rejmap.c)
00003  * Description: REJ and REJMAP class functions.
00004  * Author:              Phil Cheatle
00005  * Created:             Thu Jun  9 13:46:38 BST 1994
00006  *
00007  * (C) Copyright 1994, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include          "host.h"
00021 #include          "rejctmap.h"
00022 #include          "secname.h"
00023 #include          "params.h"
00024 
00025 BOOL8 REJ::perm_rejected() {  //Is char perm reject?
00026   return (flag (R_TESS_FAILURE) ||
00027     flag (R_SMALL_XHT) ||
00028     flag (R_EDGE_CHAR) ||
00029     flag (R_1IL_CONFLICT) ||
00030     flag (R_POSTNN_1IL) ||
00031     flag (R_REJ_CBLOB) ||
00032     flag (R_BAD_REPETITION) || flag (R_MM_REJECT));
00033 }
00034 
00035 
00036 BOOL8 REJ::rej_before_nn_accept() {
00037   return flag (R_POOR_MATCH) ||
00038     flag (R_NOT_TESS_ACCEPTED) ||
00039     flag (R_CONTAINS_BLANKS) || flag (R_BAD_PERMUTER);
00040 }
00041 
00042 
00043 BOOL8 REJ::rej_between_nn_and_mm() {
00044   return flag (R_HYPHEN) ||
00045     flag (R_DUBIOUS) ||
00046     flag (R_NO_ALPHANUMS) || flag (R_MOSTLY_REJ) || flag (R_XHT_FIXUP);
00047 }
00048 
00049 
00050 BOOL8 REJ::rej_between_mm_and_quality_accept() {
00051   return flag (R_BAD_QUALITY);
00052 }
00053 
00054 
00055 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
00056   return flag (R_DOC_REJ) ||
00057     flag (R_BLOCK_REJ) || flag (R_ROW_REJ) || flag (R_UNLV_REJ);
00058 }
00059 
00060 
00061 BOOL8 REJ::rej_before_mm_accept() {
00062   return rej_between_nn_and_mm () ||
00063     (rej_before_nn_accept () &&
00064     !flag (R_NN_ACCEPT) && !flag (R_HYPHEN_ACCEPT));
00065 }
00066 
00067 
00068 BOOL8 REJ::rej_before_quality_accept() {
00069   return rej_between_mm_and_quality_accept () ||
00070     (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
00071 }
00072 
00073 
00074 BOOL8 REJ::rejected() {  //Is char rejected?
00075   if (flag (R_MINIMAL_REJ_ACCEPT))
00076     return FALSE;
00077   else
00078     return (perm_rejected () ||
00079       rej_between_quality_and_minimal_rej_accept () ||
00080       (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
00081 }
00082 
00083 
00084 BOOL8 REJ::accept_if_good_quality() {  //potential rej?
00085   return (rejected () &&
00086     !perm_rejected () &&
00087     flag (R_BAD_PERMUTER) &&
00088     !flag (R_POOR_MATCH) &&
00089     !flag (R_NOT_TESS_ACCEPTED) &&
00090     !flag (R_CONTAINS_BLANKS) &&
00091     (!rej_between_nn_and_mm () &&
00092      !rej_between_mm_and_quality_accept () &&
00093      !rej_between_quality_and_minimal_rej_accept ()));
00094 }
00095 
00096 
00097 void REJ::setrej_tess_failure() {  //Tess generated blank
00098   set_flag(R_TESS_FAILURE);
00099 }
00100 
00101 
00102 void REJ::setrej_small_xht() {  //Small xht char/wd
00103   set_flag(R_SMALL_XHT);
00104 }
00105 
00106 
00107 void REJ::setrej_edge_char() {  //Close to image edge
00108   set_flag(R_EDGE_CHAR);
00109 }
00110 
00111 
00112 void REJ::setrej_1Il_conflict() {  //Initial reject map
00113   set_flag(R_1IL_CONFLICT);
00114 }
00115 
00116 
00117 void REJ::setrej_postNN_1Il() {  //1Il after NN
00118   set_flag(R_POSTNN_1IL);
00119 }
00120 
00121 
00122 void REJ::setrej_rej_cblob() {  //Insert duff blob
00123   set_flag(R_REJ_CBLOB);
00124 }
00125 
00126 
00127 void REJ::setrej_mm_reject() {  //Matrix matcher
00128   set_flag(R_MM_REJECT);
00129 }
00130 
00131 
00132 void REJ::setrej_bad_repetition() {  //Odd repeated char
00133   set_flag(R_BAD_REPETITION);
00134 }
00135 
00136 
00137 void REJ::setrej_poor_match() {  //Failed Rays heuristic
00138   set_flag(R_POOR_MATCH);
00139 }
00140 
00141 
00142 void REJ::setrej_not_tess_accepted() {
00143                                  //TEMP reject_word
00144   set_flag(R_NOT_TESS_ACCEPTED);
00145 }
00146 
00147 
00148 void REJ::setrej_contains_blanks() {
00149                                  //TEMP reject_word
00150   set_flag(R_CONTAINS_BLANKS);
00151 }
00152 
00153 
00154 void REJ::setrej_bad_permuter() {  //POTENTIAL reject_word
00155   set_flag(R_BAD_PERMUTER);
00156 }
00157 
00158 
00159 void REJ::setrej_hyphen() {  //PostNN dubious hyphen or .
00160   set_flag(R_HYPHEN);
00161 }
00162 
00163 
00164 void REJ::setrej_dubious() {  //PostNN dubious limit
00165   set_flag(R_DUBIOUS);
00166 }
00167 
00168 
00169 void REJ::setrej_no_alphanums() {  //TEMP reject_word
00170   set_flag(R_NO_ALPHANUMS);
00171 }
00172 
00173 
00174 void REJ::setrej_mostly_rej() {  //TEMP reject_word
00175   set_flag(R_MOSTLY_REJ);
00176 }
00177 
00178 
00179 void REJ::setrej_xht_fixup() {  //xht fixup
00180   set_flag(R_XHT_FIXUP);
00181 }
00182 
00183 
00184 void REJ::setrej_bad_quality() {  //TEMP reject_word
00185   set_flag(R_BAD_QUALITY);
00186 }
00187 
00188 
00189 void REJ::setrej_doc_rej() {  //TEMP reject_word
00190   set_flag(R_DOC_REJ);
00191 }
00192 
00193 
00194 void REJ::setrej_block_rej() {  //TEMP reject_word
00195   set_flag(R_BLOCK_REJ);
00196 }
00197 
00198 
00199 void REJ::setrej_row_rej() {  //TEMP reject_word
00200   set_flag(R_ROW_REJ);
00201 }
00202 
00203 
00204 void REJ::setrej_unlv_rej() {  //TEMP reject_word
00205   set_flag(R_UNLV_REJ);
00206 }
00207 
00208 
00209 void REJ::setrej_hyphen_accept() {  //NN Flipped a char
00210   set_flag(R_HYPHEN_ACCEPT);
00211 }
00212 
00213 
00214 void REJ::setrej_nn_accept() {  //NN Flipped a char
00215   set_flag(R_NN_ACCEPT);
00216 }
00217 
00218 
00219 void REJ::setrej_mm_accept() {  //Matrix matcher
00220   set_flag(R_MM_ACCEPT);
00221 }
00222 
00223 
00224 void REJ::setrej_quality_accept() {  //Quality flip a char
00225   set_flag(R_QUALITY_ACCEPT);
00226 }
00227 
00228 
00229 void REJ::setrej_minimal_rej_accept() {
00230                                  //Accept all except blank
00231   set_flag(R_MINIMAL_REJ_ACCEPT);
00232 }
00233 
00234 
00235 void REJ::full_print(FILE *fp) {
00236   #ifndef SECURE_NAMES
00237 
00238   fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
00239   fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
00240   fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
00241   fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
00242   fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
00243   fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
00244   fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
00245   fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
00246   fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
00247   fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
00248     flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
00249   fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
00250     flag (R_CONTAINS_BLANKS) ? "T" : "F");
00251   fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
00252   fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
00253   fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
00254   fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
00255   fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
00256   fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
00257   fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
00258   fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
00259   fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
00260   fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
00261   fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
00262   fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
00263   fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
00264   fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
00265   fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
00266   fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
00267     flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
00268   #endif
00269 }
00270 
00271 
00272 //The REJMAP class has been hacked to use alloc_struct instead of new [].
00273 //This is to reduce memory fragmentation only as it is rather kludgy.
00274 //alloc_struct by-passes the call to the contsructor of REJ on each
00275 //array element. Although the constructor is empty, the BITS16 members
00276 //do have a constructor which sets all the flags to 0. The memset
00277 //replaces this functionality.
00278 
00279 REJMAP::REJMAP(  //classwise copy
00280                const REJMAP &source) {
00281   REJ *to;
00282   REJ *from = source.ptr;
00283   int i;
00284 
00285   len = source.length ();
00286 
00287   if (len > 0) {
00288     ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
00289     to = ptr;
00290     for (i = 0; i < len; i++) {
00291       *to = *from;
00292       to++;
00293       from++;
00294     }
00295   }
00296   else
00297     ptr = NULL;
00298 }
00299 
00300 
00301 REJMAP & REJMAP::operator= (     //assign REJMAP
00302 const REJMAP & source            //from this
00303 ) {
00304   REJ *
00305     to;
00306   REJ *
00307     from = source.ptr;
00308   int
00309     i;
00310 
00311   initialise (source.len);
00312   to = ptr;
00313   for (i = 0; i < len; i++) {
00314     *to = *from;
00315     to++;
00316     from++;
00317   }
00318   return *this;
00319 }
00320 
00321 
00322 void REJMAP::initialise(  //Redefine map
00323                         inT16 length) {
00324   if (ptr != NULL)
00325     free_struct (ptr, len * sizeof (REJ), "REJ");
00326   len = length;
00327   if (len > 0)
00328     ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00329       0, len * sizeof (REJ));
00330   else
00331     ptr = NULL;
00332 }
00333 
00334 
00335 inT16 REJMAP::accept_count() {  //How many accepted?
00336   int i;
00337   inT16 count = 0;
00338 
00339   for (i = 0; i < len; i++) {
00340     if (ptr[i].accepted ())
00341       count++;
00342   }
00343   return count;
00344 }
00345 
00346 
00347 BOOL8 REJMAP::recoverable_rejects() {  //Any non perm rejs?
00348   int i;
00349 
00350   for (i = 0; i < len; i++) {
00351     if (ptr[i].recoverable ())
00352       return TRUE;
00353   }
00354   return FALSE;
00355 }
00356 
00357 
00358 BOOL8 REJMAP::quality_recoverable_rejects() {  //Any potential rejs?
00359   int i;
00360 
00361   for (i = 0; i < len; i++) {
00362     if (ptr[i].accept_if_good_quality ())
00363       return TRUE;
00364   }
00365   return FALSE;
00366 }
00367 
00368 
00369 void REJMAP::remove_pos(           //Cut out an element
00370                         inT16 pos  //element to remove
00371                        ) {
00372   REJ *new_ptr;                  //new, smaller map
00373   int i;
00374 
00375   ASSERT_HOST (pos >= 0);
00376   ASSERT_HOST (pos < len);
00377   ASSERT_HOST (len > 0);
00378 
00379   len--;
00380   if (len > 0)
00381     new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
00382       0, len * sizeof (REJ));
00383   else
00384     new_ptr = NULL;
00385 
00386   for (i = 0; i < pos; i++)
00387     new_ptr[i] = ptr[i];         //copy pre pos
00388 
00389   for (; pos < len; pos++)
00390     new_ptr[pos] = ptr[pos + 1]; //copy post pos
00391 
00392                                  //delete old map
00393   free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
00394   ptr = new_ptr;
00395 }
00396 
00397 
00398 void REJMAP::print(FILE *fp) {
00399   int i;
00400   char buff[512];
00401 
00402   for (i = 0; i < len; i++) {
00403     buff[i] = ptr[i].display_char ();
00404   }
00405   buff[i] = '\0';
00406   fprintf (fp, "\"%s\"", buff);
00407 }
00408 
00409 
00410 void REJMAP::full_print(FILE *fp) {
00411   int i;
00412 
00413   for (i = 0; i < len; i++) {
00414     ptr[i].full_print (fp);
00415     fprintf (fp, "\n");
00416   }
00417 }
00418 
00419 
00420 void REJMAP::rej_word_small_xht() {  //Reject whole word
00421   int i;
00422 
00423   for (i = 0; i < len; i++) {
00424     ptr[i].setrej_small_xht ();
00425   }
00426 }
00427 
00428 
00429 void REJMAP::rej_word_tess_failure() {  //Reject whole word
00430   int i;
00431 
00432   for (i = 0; i < len; i++) {
00433     ptr[i].setrej_tess_failure ();
00434   }
00435 }
00436 
00437 
00438 void REJMAP::rej_word_not_tess_accepted() {  //Reject whole word
00439   int i;
00440 
00441   for (i = 0; i < len; i++) {
00442     if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
00443   }
00444 }
00445 
00446 
00447 void REJMAP::rej_word_contains_blanks() {  //Reject whole word
00448   int i;
00449 
00450   for (i = 0; i < len; i++) {
00451     if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
00452   }
00453 }
00454 
00455 
00456 void REJMAP::rej_word_bad_permuter() {  //Reject whole word
00457   int i;
00458 
00459   for (i = 0; i < len; i++) {
00460     if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
00461   }
00462 }
00463 
00464 
00465 void REJMAP::rej_word_xht_fixup() {  //Reject whole word
00466   int i;
00467 
00468   for (i = 0; i < len; i++) {
00469     if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
00470   }
00471 }
00472 
00473 
00474 void REJMAP::rej_word_no_alphanums() {  //Reject whole word
00475   int i;
00476 
00477   for (i = 0; i < len; i++) {
00478     if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
00479   }
00480 }
00481 
00482 
00483 void REJMAP::rej_word_mostly_rej() {  //Reject whole word
00484   int i;
00485 
00486   for (i = 0; i < len; i++) {
00487     if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
00488   }
00489 }
00490 
00491 
00492 void REJMAP::rej_word_bad_quality() {  //Reject whole word
00493   int i;
00494 
00495   for (i = 0; i < len; i++) {
00496     if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
00497   }
00498 }
00499 
00500 
00501 void REJMAP::rej_word_doc_rej() {  //Reject whole word
00502   int i;
00503 
00504   for (i = 0; i < len; i++) {
00505     if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
00506   }
00507 }
00508 
00509 
00510 void REJMAP::rej_word_block_rej() {  //Reject whole word
00511   int i;
00512 
00513   for (i = 0; i < len; i++) {
00514     if (ptr[i].accepted()) ptr[i].setrej_block_rej();
00515   }
00516 }
00517 
00518 
00519 void REJMAP::rej_word_row_rej() {  //Reject whole word
00520   int i;
00521 
00522   for (i = 0; i < len; i++) {
00523     if (ptr[i].accepted()) ptr[i].setrej_row_rej();
00524   }
00525 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines