tesseract
3.03
|
00001 /********************************************************************** 00002 * File: stringrenderer.h 00003 * Description: Class for rendering UTF-8 text to an image, and retrieving 00004 * bounding boxes around each grapheme cluster. 00005 * 00006 * Instances are created using a font description string 00007 * (eg. "Arial Italic 12"; see pango_font_info.h for the format) 00008 * and the page dimensions. Other renderer properties such as 00009 * spacing, ligaturization, as well a preprocessing behavior such 00010 * as removal of unrenderable words and a special n-gram mode may 00011 * be set using respective set_* methods. 00012 * 00013 * Author: Ranjith Unnikrishnan 00014 * Created: Mon Nov 18 2013 00015 * 00016 * (C) Copyright 2013, Google Inc. 00017 * Licensed under the Apache License, Version 2.0 (the "License"); 00018 * you may not use this file except in compliance with the License. 00019 * You may obtain a copy of the License at 00020 * http://www.apache.org/licenses/LICENSE-2.0 00021 * Unless required by applicable law or agreed to in writing, software 00022 * distributed under the License is distributed on an "AS IS" BASIS, 00023 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00024 * See the License for the specific language governing permissions and 00025 * limitations under the License. 00026 * 00027 **********************************************************************/ 00028 00029 #ifndef TESSERACT_TRAINING_STRINGRENDERER_H_ 00030 #define TESSERACT_TRAINING_STRINGRENDERER_H_ 00031 00032 #include <string> 00033 #include <vector> 00034 00035 #include "hashfn.h" 00036 #include "host.h" 00037 #include "pango_font_info.h" 00038 #include "pango/pango-layout.h" 00039 #include "pango/pangocairo.h" 00040 00041 struct Boxa; 00042 struct Pix; 00043 00044 namespace tesseract { 00045 00046 class BoxChar; 00047 00048 class StringRenderer { 00049 public: 00050 StringRenderer(const string& font_desc, int page_width, int page_height); 00051 ~StringRenderer(); 00052 00053 // Renders the text with the chosen font and returns the byte offset upto 00054 // which the text could be rendered so as to fit the specified page 00055 // dimensions. 00056 int RenderToImage(const char* text, int text_length, Pix** pix); 00057 int RenderToBinaryImage(const char* text, int text_length, int threshold, 00058 Pix** pix); 00059 // Renders a line of text with all available fonts that were able to render 00060 // the text. 00061 int RenderAllFontsToImage(const char* text, int text_length, 00062 string* font_used, Pix** pix); 00063 00064 bool set_font(const string& desc); 00065 void set_char_spacing(double char_spacing) { 00066 char_spacing_ = char_spacing; 00067 } 00068 void set_leading(int leading) { 00069 leading_ = leading; 00070 } 00071 void set_resolution(const int resolution); 00072 void set_vertical_text(bool vertical_text) { 00073 vertical_text_ = vertical_text; 00074 } 00075 void set_gravity_hint_strong(bool gravity_hint_strong) { 00076 gravity_hint_strong_ = gravity_hint_strong; 00077 } 00078 void set_render_fullwidth_latin(bool render_fullwidth_latin) { 00079 render_fullwidth_latin_ = render_fullwidth_latin; 00080 } 00081 void set_page(int page) { 00082 page_ = page; 00083 } 00084 void set_box_padding(int val) { 00085 box_padding_ = val; 00086 } 00087 void set_drop_uncovered_chars(bool val) { 00088 drop_uncovered_chars_ = val; 00089 } 00090 void set_strip_unrenderable_words(bool val) { 00091 strip_unrenderable_words_ = val; 00092 } 00093 void set_output_word_boxes(bool val) { 00094 output_word_boxes_ = val; 00095 } 00096 // Before rendering the string, replace latin characters with their optional 00097 // ligatured forms (such as "fi", "ffi" etc.) if the font_ covers those 00098 // unicodes. 00099 void set_add_ligatures(bool add_ligatures) { 00100 add_ligatures_ = add_ligatures; 00101 } 00102 // Set the rgb value of the text ink. Values range in [0, 1.0] 00103 void set_pen_color(double r, double g, double b) { 00104 pen_color_[0] = r; 00105 pen_color_[1] = g; 00106 pen_color_[2] = b; 00107 } 00108 void set_h_margin(const int h_margin) { 00109 h_margin_ = h_margin; 00110 } 00111 void set_v_margin(const int v_margin) { 00112 v_margin_ = v_margin; 00113 } 00114 const PangoFontInfo& font() const { 00115 return font_; 00116 } 00117 const int h_margin() const { 00118 return h_margin_; 00119 } 00120 const int v_margin() const { 00121 return v_margin_; 00122 } 00123 00124 // Get the boxchars of all clusters rendered thus far (or since the last call 00125 // to ClearBoxes()). 00126 const vector<BoxChar*>& GetBoxes() const; 00127 // Get the rendered page bounding boxes of all pages created thus far (or 00128 // since last call to ClearBoxes()). 00129 Boxa* GetPageBoxes() const; 00130 00131 // Rotate the boxes on the most recent page by the given rotation. 00132 void RotatePageBoxes(float rotation); 00133 // Delete all boxes. 00134 void ClearBoxes(); 00135 void WriteAllBoxes(const string& filename) const; 00136 // Removes space-delimited words from the string that are not renderable by 00137 // the current font and returns the count of such words. 00138 int StripUnrenderableWords(string* utf8_text) const; 00139 00140 // Insert a Word Joiner symbol (U+2060) between adjacent characters, excluding 00141 // spaces and combining types, in each word before rendering to ensure words 00142 // are not broken across lines. The output boxchars will not contain the 00143 // joiner. 00144 static string InsertWordJoiners(const string& text); 00145 00146 // Helper functions to convert fullwidth Latin and halfwidth Basic Latin. 00147 static string ConvertBasicLatinToFullwidthLatin(const string& text); 00148 static string ConvertFullwidthLatinToBasicLatin(const string& text); 00149 00150 protected: 00151 // Init and free local renderer objects. 00152 void InitPangoCairo(); 00153 void SetLayoutProperties(); 00154 void FreePangoCairo(); 00155 // Compute bounding boxes around grapheme clusters. 00156 void ComputeClusterBoxes(); 00157 void CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars); 00158 bool GetClusterStrings(vector<string>* cluster_text); 00159 int FindFirstPageBreakOffset(const char* text, int text_length); 00160 00161 PangoFontInfo font_; 00162 // Page properties 00163 int page_width_, page_height_, h_margin_, v_margin_; 00164 // Text rendering properties 00165 int pen_color_[3]; 00166 double char_spacing_; 00167 int leading_, resolution_; 00168 bool vertical_text_; 00169 bool gravity_hint_strong_; 00170 bool render_fullwidth_latin_; 00171 // Text filtering options 00172 bool drop_uncovered_chars_; 00173 bool strip_unrenderable_words_; 00174 bool add_ligatures_; 00175 bool output_word_boxes_; 00176 // Pango and cairo specific objects 00177 cairo_surface_t* surface_; 00178 cairo_t* cr_; 00179 PangoLayout* layout_; 00180 // Internal state of current page number, updated on successive calls to 00181 // RenderToImage() 00182 int start_box_; 00183 int page_; 00184 // Boxes and associated text for all pages rendered with RenderToImage() since 00185 // the last call to ClearBoxes(). 00186 vector<BoxChar*> boxchars_; 00187 int box_padding_; 00188 // Bounding boxes for pages since the last call to ClearBoxes(). 00189 Boxa* page_boxes_; 00190 00191 // Objects cached for subsequent calls to RenderAllFontsToImage() 00192 hash_map<char32, inT64> char_map_; // Time-saving char histogram. 00193 int total_chars_; // Number in the string to be rendered. 00194 int font_index_; // Index of next font to use in font list. 00195 int last_offset_; // Offset returned from last successful rendering 00196 00197 private: 00198 StringRenderer(const StringRenderer&); 00199 void operator=(const StringRenderer&); 00200 }; 00201 } // namespace tesseract 00202 00203 #endif // THIRD_PARTY_TESSERACT_TRAINING_STRINGRENDERER_H_