tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/dict/dawg_cache.cpp
Go to the documentation of this file.
00001 
00002 // File:        dawg_cache.h
00003 // Description: A class that knows about loading and caching dawgs.
00004 // Author:      David Eger
00005 // Created:     Fri Jan 27 12:08:00 PST 2012
00006 //
00007 // (C) Copyright 2012, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "dawg_cache.h"
00021 
00022 #include "dawg.h"
00023 #include "object_cache.h"
00024 #include "strngs.h"
00025 #include "tessdatamanager.h"
00026 
00027 namespace tesseract {
00028 
00029 struct DawgLoader {
00030   DawgLoader(const STRING &lang,
00031              const char *data_file_name,
00032              TessdataType tessdata_dawg_type,
00033              int dawg_debug_level)
00034       : lang_(lang),
00035         data_file_name_(data_file_name),
00036         tessdata_dawg_type_(tessdata_dawg_type),
00037         dawg_debug_level_(dawg_debug_level) {}
00038 
00039   Dawg *Load();
00040 
00041   STRING lang_;
00042   const char *data_file_name_;
00043   TessdataType tessdata_dawg_type_;
00044   int dawg_debug_level_;
00045 };
00046 
00047 Dawg *DawgCache::GetSquishedDawg(
00048     const STRING &lang,
00049     const char *data_file_name,
00050     TessdataType tessdata_dawg_type,
00051     int debug_level) {
00052   STRING data_id = data_file_name;
00053   data_id += kTessdataFileSuffixes[tessdata_dawg_type];
00054   DawgLoader loader(lang, data_file_name, tessdata_dawg_type, debug_level);
00055   return dawgs_.Get(data_id, NewTessCallback(&loader, &DawgLoader::Load));
00056 }
00057 
00058 Dawg *DawgLoader::Load() {
00059   TessdataManager data_loader;
00060   if (!data_loader.Init(data_file_name_, dawg_debug_level_)) {
00061     return NULL;
00062   }
00063   if (!data_loader.SeekToStart(tessdata_dawg_type_)) return NULL;
00064   FILE *fp = data_loader.GetDataFilePtr();
00065   DawgType dawg_type;
00066   PermuterType perm_type;
00067   switch (tessdata_dawg_type_) {
00068     case TESSDATA_PUNC_DAWG:
00069       dawg_type = DAWG_TYPE_PUNCTUATION;
00070       perm_type = PUNC_PERM;
00071       break;
00072     case TESSDATA_SYSTEM_DAWG:
00073       dawg_type = DAWG_TYPE_WORD;
00074       perm_type = SYSTEM_DAWG_PERM;
00075       break;
00076     case TESSDATA_NUMBER_DAWG:
00077       dawg_type = DAWG_TYPE_NUMBER;
00078       perm_type = NUMBER_PERM;
00079       break;
00080     case TESSDATA_BIGRAM_DAWG:
00081       dawg_type = DAWG_TYPE_WORD;  // doesn't actually matter
00082       perm_type = COMPOUND_PERM;   // doesn't actually matter
00083       break;
00084     case TESSDATA_UNAMBIG_DAWG:
00085       dawg_type = DAWG_TYPE_WORD;
00086       perm_type = SYSTEM_DAWG_PERM;
00087       break;
00088     case TESSDATA_FREQ_DAWG:
00089       dawg_type = DAWG_TYPE_WORD;
00090       perm_type = FREQ_DAWG_PERM;
00091       break;
00092     default:
00093       data_loader.End();
00094       return NULL;
00095   }
00096   SquishedDawg *retval =
00097       new SquishedDawg(fp, dawg_type, lang_, perm_type, dawg_debug_level_);
00098   data_loader.End();
00099   return retval;
00100 }
00101 
00102 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines