tesseract
3.03
|
00001 00002 // File: fontinfo.h 00003 // Description: Font information classes abstracted from intproto.h/cpp. 00004 // Author: rays@google.com (Ray Smith) 00005 // Created: Tue May 17 17:08:01 PDT 2011 00006 // 00007 // (C) Copyright 2011, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 00021 #ifndef TESSERACT_CCSTRUCT_FONTINFO_H_ 00022 #define TESSERACT_CCSTRUCT_FONTINFO_H_ 00023 00024 #include "genericvector.h" 00025 #include "host.h" 00026 #include "unichar.h" 00027 00028 template <typename T> class UnicityTable; 00029 00030 namespace tesseract { 00031 00032 class BitVector; 00033 00034 // Struct for information about spacing between characters in a particular font. 00035 struct FontSpacingInfo { 00036 inT16 x_gap_before; 00037 inT16 x_gap_after; 00038 GenericVector<UNICHAR_ID> kerned_unichar_ids; 00039 GenericVector<inT16> kerned_x_gaps; 00040 }; 00041 00042 /* 00043 * font_properties contains properties about boldness, italicness, fixed pitch, 00044 * serif, fraktur 00045 */ 00046 struct FontInfo { 00047 FontInfo() : name(NULL), properties(0), universal_id(0), spacing_vec(NULL) {} 00048 ~FontInfo() {} 00049 00050 // Writes to the given file. Returns false in case of error. 00051 bool Serialize(FILE* fp) const; 00052 // Reads from the given file. Returns false in case of error. 00053 // If swap is true, assumes a big/little-endian swap is needed. 00054 bool DeSerialize(bool swap, FILE* fp); 00055 00056 // Reserves unicharset_size spots in spacing_vec. 00057 void init_spacing(int unicharset_size) { 00058 spacing_vec = new GenericVector<FontSpacingInfo *>(); 00059 spacing_vec->init_to_size(unicharset_size, NULL); 00060 } 00061 // Adds the given pointer to FontSpacingInfo to spacing_vec member 00062 // (FontInfo class takes ownership of the pointer). 00063 // Note: init_spacing should be called before calling this function. 00064 void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { 00065 ASSERT_HOST(spacing_vec != NULL && spacing_vec->size() > uch_id); 00066 (*spacing_vec)[uch_id] = spacing_info; 00067 } 00068 00069 // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. 00070 const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { 00071 return (spacing_vec == NULL || spacing_vec->size() <= uch_id) ? 00072 NULL : (*spacing_vec)[uch_id]; 00073 } 00074 00075 // Fills spacing with the value of the x gap expected between the two given 00076 // UNICHAR_IDs. Returns true on success. 00077 bool get_spacing(UNICHAR_ID prev_uch_id, 00078 UNICHAR_ID uch_id, 00079 int *spacing) const { 00080 const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); 00081 const FontSpacingInfo *fsi = this->get_spacing(uch_id); 00082 if (prev_fsi == NULL || fsi == NULL) return false; 00083 int i = 0; 00084 for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { 00085 if (prev_fsi->kerned_unichar_ids[i] == uch_id) break; 00086 } 00087 if (i < prev_fsi->kerned_unichar_ids.size()) { 00088 *spacing = prev_fsi->kerned_x_gaps[i]; 00089 } else { 00090 *spacing = prev_fsi->x_gap_after + fsi->x_gap_before; 00091 } 00092 return true; 00093 } 00094 00095 bool is_italic() const { return properties & 1; } 00096 bool is_bold() const { return (properties & 2) != 0; } 00097 bool is_fixed_pitch() const { return (properties & 4) != 0; } 00098 bool is_serif() const { return (properties & 8) != 0; } 00099 bool is_fraktur() const { return (properties & 16) != 0; } 00100 00101 char* name; 00102 uinT32 properties; 00103 // The universal_id is a field reserved for the initialization process 00104 // to assign a unique id number to all fonts loaded for the current 00105 // combination of languages. This id will then be returned by 00106 // ResultIterator::WordFontAttributes. 00107 inT32 universal_id; 00108 // Horizontal spacing between characters (indexed by UNICHAR_ID). 00109 GenericVector<FontSpacingInfo *> *spacing_vec; 00110 }; 00111 00112 // Every class (character) owns a FontSet that represents all the fonts that can 00113 // render this character. 00114 // Since almost all the characters from the same script share the same set of 00115 // fonts, the sets are shared over multiple classes (see 00116 // Classify::fontset_table_). Thus, a class only store an id to a set. 00117 // Because some fonts cannot render just one character of a set, there are a 00118 // lot of FontSet that differ only by one font. Rather than storing directly 00119 // the FontInfo in the FontSet structure, it's better to share FontInfos among 00120 // FontSets (Classify::fontinfo_table_). 00121 struct FontSet { 00122 int size; 00123 int* configs; // FontInfo ids 00124 }; 00125 00126 // Class that adds a bit of functionality on top of GenericVector to 00127 // implement a table of FontInfo that replaces UniCityTable<FontInfo>. 00128 // TODO(rays) change all references once all existing traineddata files 00129 // are replaced. 00130 class FontInfoTable : public GenericVector<FontInfo> { 00131 public: 00132 FontInfoTable(); 00133 ~FontInfoTable(); 00134 00135 // Writes to the given file. Returns false in case of error. 00136 bool Serialize(FILE* fp) const; 00137 // Reads from the given file. Returns false in case of error. 00138 // If swap is true, assumes a big/little-endian swap is needed. 00139 bool DeSerialize(bool swap, FILE* fp); 00140 00141 // Returns true if the given set of fonts includes one with the same 00142 // properties as font_id. 00143 bool SetContainsFontProperties(int font_id, 00144 const GenericVector<int>& font_set) const; 00145 // Returns true if the given set of fonts includes multiple properties. 00146 bool SetContainsMultipleFontProperties( 00147 const GenericVector<int>& font_set) const; 00148 00149 // Moves any non-empty FontSpacingInfo entries from other to this. 00150 void MoveSpacingInfoFrom(FontInfoTable* other); 00151 // Moves this to the target unicity table. 00152 void MoveTo(UnicityTable<FontInfo>* target); 00153 }; 00154 00155 // Compare FontInfo structures. 00156 bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2); 00157 // Compare FontSet structures. 00158 bool CompareFontSet(const FontSet& fs1, const FontSet& fs2); 00159 // Deletion callbacks for GenericVector. 00160 void FontInfoDeleteCallback(FontInfo f); 00161 void FontSetDeleteCallback(FontSet fs); 00162 00163 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. 00164 bool read_info(FILE* f, FontInfo* fi, bool swap); 00165 bool write_info(FILE* f, const FontInfo& fi); 00166 bool read_spacing_info(FILE *f, FontInfo* fi, bool swap); 00167 bool write_spacing_info(FILE* f, const FontInfo& fi); 00168 bool read_set(FILE* f, FontSet* fs, bool swap); 00169 bool write_set(FILE* f, const FontSet& fs); 00170 00171 } // namespace tesseract. 00172 00173 #endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */