tesseract
3.03
|
00001 /********************************************************************** 00002 * File: adaptions.cpp (Formerly adaptions.c) 00003 * Description: Functions used to adapt to blobs already confidently 00004 * identified 00005 * Author: Chris Newton 00006 * Created: Thu Oct 7 10:17:28 BST 1993 00007 * 00008 * (C) Copyright 1992, Hewlett-Packard Ltd. 00009 ** Licensed under the Apache License, Version 2.0 (the "License"); 00010 ** you may not use this file except in compliance with the License. 00011 ** You may obtain a copy of the License at 00012 ** http://www.apache.org/licenses/LICENSE-2.0 00013 ** Unless required by applicable law or agreed to in writing, software 00014 ** distributed under the License is distributed on an "AS IS" BASIS, 00015 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 ** See the License for the specific language governing permissions and 00017 ** limitations under the License. 00018 * 00019 **********************************************************************/ 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #pragma warning(disable:4305) // int/float warnings 00024 #endif 00025 00026 #ifdef __UNIX__ 00027 #include <assert.h> 00028 #endif 00029 #include <ctype.h> 00030 #include <string.h> 00031 #include "tessbox.h" 00032 #include "tessvars.h" 00033 #include "memry.h" 00034 #include "reject.h" 00035 #include "control.h" 00036 #include "stopper.h" 00037 #include "secname.h" 00038 #include "tesseractclass.h" 00039 00040 // Include automatically generated configuration file if running autoconf. 00041 #ifdef HAVE_CONFIG_H 00042 #include "config_auto.h" 00043 #endif 00044 00045 namespace tesseract { 00046 BOOL8 Tesseract::word_adaptable( //should we adapt? 00047 WERD_RES *word, 00048 uinT16 mode) { 00049 if (tessedit_adaption_debug) { 00050 tprintf("Running word_adaptable() for %s rating %.4f certainty %.4f\n", 00051 word->best_choice == NULL ? "" : 00052 word->best_choice->unichar_string().string(), 00053 word->best_choice->rating(), word->best_choice->certainty()); 00054 } 00055 00056 BOOL8 status = FALSE; 00057 BITS16 flags(mode); 00058 00059 enum MODES 00060 { 00061 ADAPTABLE_WERD, 00062 ACCEPTABLE_WERD, 00063 CHECK_DAWGS, 00064 CHECK_SPACES, 00065 CHECK_ONE_ELL_CONFLICT, 00066 CHECK_AMBIG_WERD 00067 }; 00068 00069 /* 00070 0: NO adaption 00071 */ 00072 if (mode == 0) { 00073 if (tessedit_adaption_debug) tprintf("adaption disabled\n"); 00074 return FALSE; 00075 } 00076 00077 if (flags.bit (ADAPTABLE_WERD)) { 00078 status |= word->tess_would_adapt; // result of Classify::AdaptableWord() 00079 if (tessedit_adaption_debug && !status) { 00080 tprintf("tess_would_adapt bit is false\n"); 00081 } 00082 } 00083 00084 if (flags.bit (ACCEPTABLE_WERD)) { 00085 status |= word->tess_accepted; 00086 if (tessedit_adaption_debug && !status) { 00087 tprintf("tess_accepted bit is false\n"); 00088 } 00089 } 00090 00091 if (!status) { // If not set then 00092 return FALSE; // ignore other checks 00093 } 00094 00095 if (flags.bit (CHECK_DAWGS) && 00096 (word->best_choice->permuter () != SYSTEM_DAWG_PERM) && 00097 (word->best_choice->permuter () != FREQ_DAWG_PERM) && 00098 (word->best_choice->permuter () != USER_DAWG_PERM) && 00099 (word->best_choice->permuter () != NUMBER_PERM)) { 00100 if (tessedit_adaption_debug) tprintf("word not in dawgs\n"); 00101 return FALSE; 00102 } 00103 00104 if (flags.bit (CHECK_ONE_ELL_CONFLICT) && one_ell_conflict (word, FALSE)) { 00105 if (tessedit_adaption_debug) tprintf("word has ell conflict\n"); 00106 return FALSE; 00107 } 00108 00109 if (flags.bit (CHECK_SPACES) && 00110 (strchr(word->best_choice->unichar_string().string(), ' ') != NULL)) { 00111 if (tessedit_adaption_debug) tprintf("word contains spaces\n"); 00112 return FALSE; 00113 } 00114 00115 if (flags.bit (CHECK_AMBIG_WERD) && 00116 word->best_choice->dangerous_ambig_found()) { 00117 if (tessedit_adaption_debug) tprintf("word is ambiguous\n"); 00118 return FALSE; 00119 } 00120 00121 if (tessedit_adaption_debug) { 00122 tprintf("returning status %d\n", status); 00123 } 00124 return status; 00125 } 00126 00127 } // namespace tesseract