tesseract
3.03
|
00001 00002 // File: dawg_cache.h 00003 // Description: A class that knows about loading and caching dawgs. 00004 // Author: David Eger 00005 // Created: Fri Jan 27 12:08:00 PST 2012 00006 // 00007 // (C) Copyright 2012, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "dawg_cache.h" 00021 00022 #include "dawg.h" 00023 #include "object_cache.h" 00024 #include "strngs.h" 00025 #include "tessdatamanager.h" 00026 00027 namespace tesseract { 00028 00029 struct DawgLoader { 00030 DawgLoader(const STRING &lang, 00031 const char *data_file_name, 00032 TessdataType tessdata_dawg_type, 00033 int dawg_debug_level) 00034 : lang_(lang), 00035 data_file_name_(data_file_name), 00036 tessdata_dawg_type_(tessdata_dawg_type), 00037 dawg_debug_level_(dawg_debug_level) {} 00038 00039 Dawg *Load(); 00040 00041 STRING lang_; 00042 const char *data_file_name_; 00043 TessdataType tessdata_dawg_type_; 00044 int dawg_debug_level_; 00045 }; 00046 00047 Dawg *DawgCache::GetSquishedDawg( 00048 const STRING &lang, 00049 const char *data_file_name, 00050 TessdataType tessdata_dawg_type, 00051 int debug_level) { 00052 STRING data_id = data_file_name; 00053 data_id += kTessdataFileSuffixes[tessdata_dawg_type]; 00054 DawgLoader loader(lang, data_file_name, tessdata_dawg_type, debug_level); 00055 return dawgs_.Get(data_id, NewTessCallback(&loader, &DawgLoader::Load)); 00056 } 00057 00058 Dawg *DawgLoader::Load() { 00059 TessdataManager data_loader; 00060 if (!data_loader.Init(data_file_name_, dawg_debug_level_)) { 00061 return NULL; 00062 } 00063 if (!data_loader.SeekToStart(tessdata_dawg_type_)) return NULL; 00064 FILE *fp = data_loader.GetDataFilePtr(); 00065 DawgType dawg_type; 00066 PermuterType perm_type; 00067 switch (tessdata_dawg_type_) { 00068 case TESSDATA_PUNC_DAWG: 00069 dawg_type = DAWG_TYPE_PUNCTUATION; 00070 perm_type = PUNC_PERM; 00071 break; 00072 case TESSDATA_SYSTEM_DAWG: 00073 dawg_type = DAWG_TYPE_WORD; 00074 perm_type = SYSTEM_DAWG_PERM; 00075 break; 00076 case TESSDATA_NUMBER_DAWG: 00077 dawg_type = DAWG_TYPE_NUMBER; 00078 perm_type = NUMBER_PERM; 00079 break; 00080 case TESSDATA_BIGRAM_DAWG: 00081 dawg_type = DAWG_TYPE_WORD; // doesn't actually matter 00082 perm_type = COMPOUND_PERM; // doesn't actually matter 00083 break; 00084 case TESSDATA_UNAMBIG_DAWG: 00085 dawg_type = DAWG_TYPE_WORD; 00086 perm_type = SYSTEM_DAWG_PERM; 00087 break; 00088 case TESSDATA_FREQ_DAWG: 00089 dawg_type = DAWG_TYPE_WORD; 00090 perm_type = FREQ_DAWG_PERM; 00091 break; 00092 default: 00093 data_loader.End(); 00094 return NULL; 00095 } 00096 SquishedDawg *retval = 00097 new SquishedDawg(fp, dawg_type, lang_, perm_type, dawg_debug_level_); 00098 data_loader.End(); 00099 return retval; 00100 } 00101 00102 } // namespace tesseract