tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/dict/hyphen.cpp
Go to the documentation of this file.
00001 /* -*-C-*-
00002  ********************************************************************************
00003  * File:        hyphen.c  (Formerly hyphen.c)
00004  * Description: Functions for maintaining information about hyphenated words.
00005  * Author:       Mark Seaman, OCR Technology
00006  * Created:      Fri Oct 16 14:37:00 1987
00007  * Modified:     Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt
00008  * Language:     C
00009  * Package:      N/A
00010  * Status:       Reusable Software Component
00011  *
00012  * (c) Copyright 1987, Hewlett-Packard Company.
00013  ** Licensed under the Apache License, Version 2.0 (the "License");
00014  ** you may not use this file except in compliance with the License.
00015  ** You may obtain a copy of the License at
00016  ** http://www.apache.org/licenses/LICENSE-2.0
00017  ** Unless required by applicable law or agreed to in writing, software
00018  ** distributed under the License is distributed on an "AS IS" BASIS,
00019  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020  ** See the License for the specific language governing permissions and
00021  ** limitations under the License.
00022  *
00023  *********************************************************************************/
00024 
00025 #include "dict.h"
00026 
00027 namespace tesseract {
00028 
00029 // Unless the previous word was the last one on the line, and the current
00030 // one is not (thus it is the first one on the line), erase hyphen_word_,
00031 // clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_.
00032 void Dict::reset_hyphen_vars(bool last_word_on_line) {
00033   if (!(last_word_on_line_ == true && last_word_on_line == false)) {
00034     if (hyphen_word_ != NULL) {
00035       delete hyphen_word_;
00036       hyphen_word_ = NULL;
00037       hyphen_active_dawgs_.clear();
00038     }
00039   }
00040   if (hyphen_debug_level) {
00041     tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n",
00042             last_word_on_line_, last_word_on_line);
00043   }
00044   last_word_on_line_ = last_word_on_line;
00045 }
00046 
00047 // Update hyphen_word_, and copy the given DawgPositionVectors into
00048 // hyphen_active_dawgs_.
00049 void Dict::set_hyphen_word(const WERD_CHOICE &word,
00050                            const DawgPositionVector &active_dawgs) {
00051   if (hyphen_word_ == NULL) {
00052     hyphen_word_ = new WERD_CHOICE(word.unicharset());
00053     hyphen_word_->make_bad();
00054   }
00055   if (hyphen_word_->rating() > word.rating()) {
00056     *hyphen_word_ = word;
00057     // Remove the last unichar id as it is a hyphen, and remove
00058     // any unichar_string/lengths that are present.
00059     hyphen_word_->remove_last_unichar_id();
00060     hyphen_active_dawgs_ = active_dawgs;
00061   }
00062   if (hyphen_debug_level) {
00063     hyphen_word_->print("set_hyphen_word: ");
00064   }
00065 }
00066 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines