tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/cube/char_samp_set.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        char_samp_enum.cpp
00003  * Description: Implementation of a Character Sample Set Class
00004  * Author:    Ahmad Abdulkader
00005  * Created:   2007
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include <stdlib.h>
00021 #include <string>
00022 #include "char_samp_set.h"
00023 #include "cached_file.h"
00024 
00025 namespace tesseract {
00026 
00027 CharSampSet::CharSampSet() {
00028   cnt_ = 0;
00029   samp_buff_ = NULL;
00030   own_samples_ = false;
00031 }
00032 
00033 CharSampSet::~CharSampSet() {
00034   Cleanup();
00035 }
00036 
00037 // free buffers and init vars
00038 void CharSampSet::Cleanup() {
00039   if (samp_buff_ != NULL) {
00040     // only free samples if owned by class
00041     if (own_samples_ == true) {
00042       for (int samp_idx = 0; samp_idx < cnt_; samp_idx++) {
00043         if (samp_buff_[samp_idx] != NULL) {
00044           delete samp_buff_[samp_idx];
00045         }
00046       }
00047     }
00048     delete []samp_buff_;
00049   }
00050   cnt_ = 0;
00051   samp_buff_ = NULL;
00052 }
00053 
00054 // add a new sample
00055 bool CharSampSet::Add(CharSamp *char_samp) {
00056   if ((cnt_ % SAMP_ALLOC_BLOCK) == 0) {
00057       // create an extended buffer
00058     CharSamp **new_samp_buff =
00059         reinterpret_cast<CharSamp **>(new CharSamp *[cnt_ + SAMP_ALLOC_BLOCK]);
00060     if (new_samp_buff == NULL) {
00061       return false;
00062     }
00063     // copy old contents
00064     if (cnt_ > 0) {
00065       memcpy(new_samp_buff, samp_buff_, cnt_ * sizeof(*samp_buff_));
00066       delete []samp_buff_;
00067     }
00068     samp_buff_ = new_samp_buff;
00069   }
00070   samp_buff_[cnt_++] = char_samp;
00071   return true;
00072 }
00073 
00074 // load char samples from file
00075 bool CharSampSet::LoadCharSamples(FILE *fp) {
00076   // free existing
00077   Cleanup();
00078   // samples are created here and owned by the class
00079   own_samples_ = true;
00080   // start loading char samples
00081   while (feof(fp) == 0) {
00082     CharSamp *new_samp = CharSamp::FromCharDumpFile(fp);
00083     if (new_samp != NULL) {
00084       if (Add(new_samp) == false) {
00085         return false;
00086       }
00087     }
00088   }
00089   return true;
00090 }
00091 
00092 // creates a CharSampSet object from file
00093 CharSampSet * CharSampSet::FromCharDumpFile(string file_name) {
00094   FILE *fp;
00095   unsigned int val32;
00096   // open the file
00097   fp = fopen(file_name.c_str(), "rb");
00098   if (fp == NULL) {
00099     return NULL;
00100   }
00101   // read and verify marker
00102   if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00103     fclose(fp);
00104     return NULL;
00105   }
00106   if (val32 != 0xfefeabd0) {
00107     fclose(fp);
00108     return NULL;
00109   }
00110   // create an object
00111   CharSampSet *samp_set = new CharSampSet();
00112   if (samp_set == NULL) {
00113     fclose(fp);
00114     return NULL;
00115   }
00116   if (samp_set->LoadCharSamples(fp) == false) {
00117     delete samp_set;
00118     samp_set = NULL;
00119   }
00120   fclose(fp);
00121   return samp_set;
00122 }
00123 
00124 // Create a new Char Dump file
00125 FILE *CharSampSet::CreateCharDumpFile(string file_name) {
00126   FILE *fp;
00127   unsigned int val32;
00128   // create the file
00129   fp =  fopen(file_name.c_str(), "wb");
00130   if (!fp) {
00131     return NULL;
00132   }
00133   // read and verify marker
00134   val32 = 0xfefeabd0;
00135   if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
00136     fclose(fp);
00137     return NULL;
00138   }
00139   return fp;
00140 }
00141 
00142 // Enumerate the Samples in the set one-by-one calling the enumertor's
00143   // EnumCharSamp method for each sample
00144 bool CharSampSet::EnumSamples(string file_name, CharSampEnum *enum_obj) {
00145   CachedFile *fp_in;
00146   unsigned int val32;
00147   long i64_size,
00148     i64_pos;
00149   // open the file
00150   fp_in = new CachedFile(file_name);
00151   if (fp_in == NULL) {
00152     return false;
00153   }
00154   i64_size = fp_in->Size();
00155   if (i64_size < 1) {
00156     return false;
00157   }
00158   // read and verify marker
00159   if (fp_in->Read(&val32, sizeof(val32)) != sizeof(val32)) {
00160     return false;
00161   }
00162   if (val32 != 0xfefeabd0) {
00163     return false;
00164   }
00165   // start loading char samples
00166   while (fp_in->eof() == false) {
00167     CharSamp *new_samp = CharSamp::FromCharDumpFile(fp_in);
00168     i64_pos = fp_in->Tell();
00169     if (new_samp != NULL) {
00170       bool ret_flag = (enum_obj)->EnumCharSamp(new_samp,
00171                                                (100.0f * i64_pos / i64_size));
00172       delete new_samp;
00173       if (ret_flag == false) {
00174         break;
00175       }
00176     }
00177   }
00178   delete fp_in;
00179   return true;
00180 }
00181 
00182 }  // namespace ocrlib
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines