tesseract
3.03
|
00001 /* -*-C-*- 00002 ******************************************************************************** 00003 * File: hyphen.c (Formerly hyphen.c) 00004 * Description: Functions for maintaining information about hyphenated words. 00005 * Author: Mark Seaman, OCR Technology 00006 * Created: Fri Oct 16 14:37:00 1987 00007 * Modified: Thu Mar 14 11:09:43 1991 (Mark Seaman) marks@hpgrlt 00008 * Language: C 00009 * Package: N/A 00010 * Status: Reusable Software Component 00011 * 00012 * (c) Copyright 1987, Hewlett-Packard Company. 00013 ** Licensed under the Apache License, Version 2.0 (the "License"); 00014 ** you may not use this file except in compliance with the License. 00015 ** You may obtain a copy of the License at 00016 ** http://www.apache.org/licenses/LICENSE-2.0 00017 ** Unless required by applicable law or agreed to in writing, software 00018 ** distributed under the License is distributed on an "AS IS" BASIS, 00019 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00020 ** See the License for the specific language governing permissions and 00021 ** limitations under the License. 00022 * 00023 *********************************************************************************/ 00024 00025 #include "dict.h" 00026 00027 namespace tesseract { 00028 00029 // Unless the previous word was the last one on the line, and the current 00030 // one is not (thus it is the first one on the line), erase hyphen_word_, 00031 // clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_. 00032 void Dict::reset_hyphen_vars(bool last_word_on_line) { 00033 if (!(last_word_on_line_ == true && last_word_on_line == false)) { 00034 if (hyphen_word_ != NULL) { 00035 delete hyphen_word_; 00036 hyphen_word_ = NULL; 00037 hyphen_active_dawgs_.clear(); 00038 } 00039 } 00040 if (hyphen_debug_level) { 00041 tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n", 00042 last_word_on_line_, last_word_on_line); 00043 } 00044 last_word_on_line_ = last_word_on_line; 00045 } 00046 00047 // Update hyphen_word_, and copy the given DawgPositionVectors into 00048 // hyphen_active_dawgs_. 00049 void Dict::set_hyphen_word(const WERD_CHOICE &word, 00050 const DawgPositionVector &active_dawgs) { 00051 if (hyphen_word_ == NULL) { 00052 hyphen_word_ = new WERD_CHOICE(word.unicharset()); 00053 hyphen_word_->make_bad(); 00054 } 00055 if (hyphen_word_->rating() > word.rating()) { 00056 *hyphen_word_ = word; 00057 // Remove the last unichar id as it is a hyphen, and remove 00058 // any unichar_string/lengths that are present. 00059 hyphen_word_->remove_last_unichar_id(); 00060 hyphen_active_dawgs_ = active_dawgs; 00061 } 00062 if (hyphen_debug_level) { 00063 hyphen_word_->print("set_hyphen_word: "); 00064 } 00065 } 00066 } // namespace tesseract