tesseract
3.03
|
00001 /********************************************************************** 00002 * File: stringrenderer.cpp 00003 * Description: Class for rendering UTF-8 text to an image, and retrieving 00004 * bounding boxes around each grapheme cluster. 00005 * Author: Ranjith Unnikrishnan 00006 * Created: Mon Nov 18 2013 00007 * 00008 * (C) Copyright 2013, Google Inc. 00009 * Licensed under the Apache License, Version 2.0 (the "License"); 00010 * you may not use this file except in compliance with the License. 00011 * You may obtain a copy of the License at 00012 * http://www.apache.org/licenses/LICENSE-2.0 00013 * Unless required by applicable law or agreed to in writing, software 00014 * distributed under the License is distributed on an "AS IS" BASIS, 00015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 * See the License for the specific language governing permissions and 00017 * limitations under the License. 00018 * 00019 **********************************************************************/ 00020 00021 #include "stringrenderer.h" 00022 00023 #include <stdio.h> 00024 #include <string.h> 00025 #include <algorithm> 00026 #include <map> 00027 #include <utility> 00028 #include <vector> 00029 00030 #include "allheaders.h" // from leptonica 00031 #include "boxchar.h" 00032 #include "ligature_table.h" 00033 #include "normstrngs.h" 00034 #include "pango/pango-font.h" 00035 #include "pango/pango-glyph-item.h" 00036 #include "tlog.h" 00037 #include "unichar.h" 00038 #include "unicode/uchar.h" // from libicu 00039 #include "util.h" 00040 00041 #ifndef USE_STD_NAMESPACE 00042 // Compatability with pango 1.20. 00043 #include "pango/pango-glyph-item-private.h" 00044 #define pango_glyph_item_iter_init_start _pango_glyph_item_iter_init_start 00045 #define pango_glyph_item_iter_next_cluster _pango_glyph_item_iter_next_cluster 00046 #else 00047 using std::map; 00048 using std::max; 00049 using std::min; 00050 using std::swap; 00051 #endif 00052 00053 namespace tesseract { 00054 00055 static const int kDefaultOutputResolution = 300; 00056 00057 // Word joiner (U+2060) inserted after letters in ngram mode, as per 00058 // recommendation in http://unicode.org/reports/tr14/ to avoid line-breaks at 00059 // hyphens and other non-alpha characters. 00060 static const char* kWordJoinerUTF8 = "\u2060"; 00061 static const char32 kWordJoiner = 0x2060; 00062 00063 static bool IsCombiner(int ch) { 00064 const int char_type = u_charType(ch); 00065 return ((char_type == U_NON_SPACING_MARK) || 00066 (char_type == U_ENCLOSING_MARK) || 00067 (char_type == U_COMBINING_SPACING_MARK)); 00068 } 00069 00070 static string EncodeAsUTF8(const char32 ch32) { 00071 UNICHAR uni_ch(ch32); 00072 return string(uni_ch.utf8(), uni_ch.utf8_len()); 00073 } 00074 00075 00076 /* static */ 00077 Pix* CairoARGB32ToPixFormat(cairo_surface_t *surface) { 00078 if (cairo_image_surface_get_format(surface) != CAIRO_FORMAT_ARGB32) { 00079 printf("Unexpected surface format %d\n", 00080 cairo_image_surface_get_format(surface)); 00081 return NULL; 00082 } 00083 const int width = cairo_image_surface_get_width(surface); 00084 const int height = cairo_image_surface_get_height(surface); 00085 Pix* pix = pixCreate(width, height, 32); 00086 int byte_stride = cairo_image_surface_get_stride(surface); 00087 00088 for (int i = 0; i < height; ++i) { 00089 memcpy(reinterpret_cast<unsigned char*>(pix->data + i * pix->wpl) + 1, 00090 cairo_image_surface_get_data(surface) + i * byte_stride, 00091 byte_stride - ((i == height - 1) ? 1 : 0)); 00092 } 00093 return pix; 00094 } 00095 00096 StringRenderer::StringRenderer(const string& font_desc, int page_width, 00097 int page_height) : 00098 page_width_(page_width), page_height_(page_height), 00099 h_margin_(50), v_margin_(50), char_spacing_(0), leading_(0), 00100 vertical_text_(false), gravity_hint_strong_(false), 00101 render_fullwidth_latin_(false) ,drop_uncovered_chars_(true), 00102 strip_unrenderable_words_(false), add_ligatures_(false), 00103 output_word_boxes_(false), surface_(NULL), cr_(NULL), 00104 layout_(NULL), start_box_(0), page_(0), box_padding_(0), 00105 total_chars_(0), font_index_(0), last_offset_(0) { 00106 pen_color_[0] = 0.0; 00107 pen_color_[1] = 0.0; 00108 pen_color_[2] = 0.0; 00109 set_font(font_desc); 00110 set_resolution(kDefaultOutputResolution); 00111 page_boxes_ = NULL; 00112 } 00113 00114 bool StringRenderer::set_font(const string& desc) { 00115 bool success = font_.ParseFontDescriptionName(desc); 00116 font_.set_resolution(resolution_); 00117 return success; 00118 } 00119 00120 void StringRenderer::set_resolution(const int resolution) { 00121 resolution_ = resolution; 00122 font_.set_resolution(resolution); 00123 } 00124 00125 StringRenderer::~StringRenderer() { 00126 ClearBoxes(); 00127 FreePangoCairo(); 00128 } 00129 00130 void StringRenderer::InitPangoCairo() { 00131 FreePangoCairo(); 00132 surface_ = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, page_width_, 00133 page_height_); 00134 cr_ = cairo_create(surface_); 00135 layout_ = pango_cairo_create_layout(cr_); 00136 00137 if (vertical_text_) { 00138 PangoContext* context = pango_layout_get_context(layout_); 00139 pango_context_set_base_gravity(context, PANGO_GRAVITY_EAST); 00140 if (gravity_hint_strong_) { 00141 pango_context_set_gravity_hint(context, PANGO_GRAVITY_HINT_STRONG); 00142 } 00143 pango_layout_context_changed(layout_); 00144 } 00145 00146 SetLayoutProperties(); 00147 } 00148 00149 void StringRenderer::SetLayoutProperties() { 00150 string font_desc = font_.DescriptionName(); 00151 // Specify the font via a description name 00152 PangoFontDescription *desc = 00153 pango_font_description_from_string(font_desc.c_str()); 00154 // Assign the font description to the layout 00155 pango_layout_set_font_description(layout_, desc); 00156 pango_font_description_free(desc); // free the description 00157 pango_cairo_context_set_resolution(pango_layout_get_context(layout_), 00158 resolution_); 00159 00160 int max_width = page_width_ - 2 * h_margin_; 00161 int max_height = page_height_ - 2 * v_margin_; 00162 tlog(3, "max_width = %d, max_height = %d\n", max_width, max_height); 00163 if (vertical_text_) { 00164 swap(max_width, max_height); 00165 } 00166 pango_layout_set_width(layout_, max_width * PANGO_SCALE); 00167 pango_layout_set_wrap(layout_, PANGO_WRAP_WORD); 00168 00169 // Adjust character spacing 00170 PangoAttrList* attr_list = pango_attr_list_new(); 00171 if (char_spacing_) { 00172 PangoAttribute* spacing_attr = pango_attr_letter_spacing_new( 00173 static_cast<int>(char_spacing_ * PANGO_SCALE + 0.5)); 00174 spacing_attr->start_index = 0; 00175 spacing_attr->end_index = static_cast<guint>(-1); 00176 pango_attr_list_change(attr_list, spacing_attr); 00177 } 00178 pango_layout_set_attributes(layout_, attr_list); 00179 pango_attr_list_unref(attr_list); 00180 // Adjust line spacing 00181 if (leading_) { 00182 pango_layout_set_spacing(layout_, leading_ * PANGO_SCALE); 00183 } 00184 } 00185 00186 void StringRenderer::FreePangoCairo() { 00187 if (layout_) { 00188 g_object_unref(layout_); 00189 layout_ = NULL; 00190 } 00191 if (cr_) { 00192 cairo_destroy(cr_); 00193 cr_ = NULL; 00194 } 00195 if (surface_) { 00196 cairo_surface_destroy(surface_); 00197 surface_ = NULL; 00198 } 00199 } 00200 00201 00202 // Returns offset in utf8 bytes to first page. 00203 int StringRenderer::FindFirstPageBreakOffset(const char* text, 00204 int text_length) { 00205 if (!text_length) return 0; 00206 const int max_height = (page_height_ - 2 * v_margin_); 00207 const int max_width = (page_width_ - 2 * h_margin_); 00208 const int max_layout_height = vertical_text_ ? max_width : max_height; 00209 00210 UNICHAR::const_iterator it = UNICHAR::begin(text, text_length); 00211 const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length); 00212 const int kMaxUnicodeBufLength = 15000; 00213 for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i); 00214 int buf_length = it.utf8_data() - text; 00215 tlog(1, "len = %d buf_len = %d\n", text_length, buf_length); 00216 pango_layout_set_text(layout_, text, buf_length); 00217 00218 PangoLayoutIter* line_iter = NULL; 00219 { // Fontconfig caches some info here that is not freed before exit. 00220 DISABLE_HEAP_LEAK_CHECK; 00221 line_iter = pango_layout_get_iter(layout_); 00222 } 00223 bool first_page = true; 00224 int page_top = 0; 00225 int offset = buf_length; 00226 do { 00227 // Get bounding box of the current line 00228 PangoRectangle line_ink_rect; 00229 pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, NULL); 00230 pango_extents_to_pixels(&line_ink_rect, NULL); 00231 PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter); 00232 if (first_page) { 00233 page_top = line_ink_rect.y; 00234 first_page = false; 00235 } 00236 int line_bottom = line_ink_rect.y + line_ink_rect.height; 00237 if (line_bottom - page_top > max_layout_height) { 00238 offset = line->start_index; 00239 tlog(1, "Found offset = %d\n", offset); 00240 break; 00241 } 00242 } while (pango_layout_iter_next_line(line_iter)); 00243 pango_layout_iter_free(line_iter); 00244 return offset; 00245 } 00246 00247 const vector<BoxChar*>& StringRenderer::GetBoxes() const { 00248 return boxchars_; 00249 } 00250 00251 Boxa* StringRenderer::GetPageBoxes() const { 00252 return page_boxes_; 00253 } 00254 00255 void StringRenderer::RotatePageBoxes(float rotation) { 00256 BoxChar::RotateBoxes(rotation, page_width_ / 2, page_height_ / 2, 00257 start_box_, boxchars_.size(), &boxchars_); 00258 } 00259 00260 00261 void StringRenderer::ClearBoxes() { 00262 for (int i = 0; i < boxchars_.size(); ++i) 00263 delete boxchars_[i]; 00264 boxchars_.clear(); 00265 boxaDestroy(&page_boxes_); 00266 } 00267 00268 void StringRenderer::WriteAllBoxes(const string& filename) const { 00269 BoxChar::WriteTesseractBoxFile(filename, page_height_, boxchars_); 00270 } 00271 00272 // Returns cluster strings in logical order. 00273 bool StringRenderer::GetClusterStrings(vector<string>* cluster_text) { 00274 map<int, string> start_byte_to_text; 00275 PangoLayoutIter* run_iter = pango_layout_get_iter(layout_); 00276 const char* full_text = pango_layout_get_text(layout_); 00277 do { 00278 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter); 00279 if (!run) { 00280 // End of line NULL run marker 00281 tlog(2, "Found end of line marker\n"); 00282 continue; 00283 } 00284 PangoGlyphItemIter cluster_iter; 00285 gboolean have_cluster; 00286 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, 00287 run, full_text); 00288 have_cluster; 00289 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { 00290 const int start_byte_index = cluster_iter.start_index; 00291 const int end_byte_index = cluster_iter.end_index; 00292 string text = string(full_text + start_byte_index, 00293 end_byte_index - start_byte_index); 00294 if (IsUTF8Whitespace(text.c_str())) { 00295 tlog(2, "Found whitespace\n"); 00296 text = " "; 00297 } 00298 tlog(2, "start_byte=%d end_byte=%d : '%s'\n", start_byte_index, 00299 end_byte_index, text.c_str()); 00300 if (add_ligatures_) { 00301 // Make sure the output box files have ligatured text in case the font 00302 // decided to use an unmapped glyph. 00303 text = LigatureTable::Get()->AddLigatures(text, NULL); 00304 } 00305 start_byte_to_text[start_byte_index] = text; 00306 } 00307 } while (pango_layout_iter_next_run(run_iter)); 00308 pango_layout_iter_free(run_iter); 00309 00310 cluster_text->clear(); 00311 for (map<int, string>::const_iterator it = start_byte_to_text.begin(); 00312 it != start_byte_to_text.end(); ++it) { 00313 cluster_text->push_back(it->second); 00314 } 00315 return cluster_text->size(); 00316 } 00317 00318 // Merges an array of BoxChars into words based on the identification of 00319 // BoxChars containing the space character as inter-word separators. 00320 // 00321 // Sometime two adjacent characters in the sequence may be detected as lying on 00322 // different lines based on their spatial positions. This may be the result of a 00323 // newline character at end of the last word on a line in the source text, or of 00324 // a discretionary line-break created by Pango at intra-word locations like 00325 // hyphens. When this is detected the word is split at that location into 00326 // multiple BoxChars. Otherwise, each resulting BoxChar will contain a word and 00327 // its bounding box. 00328 static void MergeBoxCharsToWords(vector<BoxChar*>* boxchars) { 00329 vector<BoxChar*> result; 00330 bool started_word = false; 00331 for (int i = 0; i < boxchars->size(); ++i) { 00332 if (boxchars->at(i)->ch() == " " || 00333 boxchars->at(i)->box() == NULL) { 00334 result.push_back(boxchars->at(i)); 00335 boxchars->at(i) = NULL; 00336 started_word = false; 00337 continue; 00338 } 00339 00340 if (!started_word) { 00341 // Begin new word 00342 started_word = true; 00343 result.push_back(boxchars->at(i)); 00344 boxchars->at(i) = NULL; 00345 } else { 00346 BoxChar* last_boxchar = result.back(); 00347 // Compute bounding box union 00348 const Box* box = boxchars->at(i)->box(); 00349 Box* last_box = last_boxchar->mutable_box(); 00350 int left = min(last_box->x, box->x); 00351 int right = max(last_box->x + last_box->w, box->x + box->w); 00352 int top = min(last_box->y, box->y); 00353 int bottom = max(last_box->y + last_box->h, box->y + box->h); 00354 // Conclude that the word was broken to span multiple lines based on the 00355 // size of the merged bounding box in relation to those of the individual 00356 // characters seen so far. 00357 if (right - left > last_box->w + 5 * box->w) { 00358 tlog(1, "Found line break after '%s'", last_boxchar->ch().c_str()); 00359 // Insert a fake interword space and start a new word with the current 00360 // boxchar. 00361 result.push_back(new BoxChar(" ", 1)); 00362 result.push_back(boxchars->at(i)); 00363 boxchars->at(i) = NULL; 00364 continue; 00365 } 00366 // Append to last word 00367 last_boxchar->mutable_ch()->append(boxchars->at(i)->ch()); 00368 last_box->x = left; 00369 last_box->w = right - left; 00370 last_box->y = top; 00371 last_box->h = bottom - top; 00372 delete boxchars->at(i); 00373 boxchars->at(i) = NULL; 00374 } 00375 } 00376 boxchars->swap(result); 00377 } 00378 00379 00380 void StringRenderer::ComputeClusterBoxes() { 00381 const char* text = pango_layout_get_text(layout_); 00382 PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_); 00383 00384 // Do a first pass to store cluster start indexes. 00385 vector<int> cluster_start_indices; 00386 do { 00387 cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter)); 00388 tlog(3, "Added %d\n", cluster_start_indices.back()); 00389 } while (pango_layout_iter_next_cluster(cluster_iter)); 00390 pango_layout_iter_free(cluster_iter); 00391 cluster_start_indices.push_back(strlen(text)); 00392 tlog(3, "Added last index %d\n", cluster_start_indices.back()); 00393 // Sort the indices and create a map from start to end indices. 00394 sort(cluster_start_indices.begin(), cluster_start_indices.end()); 00395 map<int, int> cluster_start_to_end_index; 00396 for (int i = 0; i < cluster_start_indices.size() - 1; ++i) { 00397 cluster_start_to_end_index[cluster_start_indices[i]] 00398 = cluster_start_indices[i + 1]; 00399 } 00400 00401 // Iterate again to compute cluster boxes and their text with the obtained 00402 // cluster extent information. 00403 cluster_iter = pango_layout_get_iter(layout_); 00404 // Store BoxChars* sorted by their byte start positions 00405 map<int, BoxChar*> start_byte_to_box; 00406 do { 00407 PangoRectangle cluster_rect; 00408 pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect, 00409 NULL); 00410 pango_extents_to_pixels(&cluster_rect, NULL); 00411 const int start_byte_index = pango_layout_iter_get_index(cluster_iter); 00412 const int end_byte_index = cluster_start_to_end_index[start_byte_index]; 00413 string cluster_text = string(text + start_byte_index, 00414 end_byte_index - start_byte_index); 00415 if (cluster_text.size() && cluster_text[0] == '\n') { 00416 tlog(2, "Skipping newlines at start of text.\n"); 00417 continue; 00418 } 00419 if (!cluster_rect.width || !cluster_rect.height || 00420 IsUTF8Whitespace(cluster_text.c_str())) { 00421 tlog(2, "Skipping whitespace with boxdim (%d,%d) '%s'\n", 00422 cluster_rect.width, cluster_rect.height, cluster_text.c_str()); 00423 BoxChar* boxchar = new BoxChar(" ", 1); 00424 boxchar->set_page(page_); 00425 start_byte_to_box[start_byte_index] = boxchar; 00426 continue; 00427 } 00428 // Prepare a boxchar for addition at this byte position. 00429 tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n", 00430 cluster_rect.x, cluster_rect.y, 00431 cluster_rect.width, cluster_rect.height, 00432 start_byte_index, end_byte_index, 00433 cluster_text.c_str()); 00434 ASSERT_HOST_MSG(cluster_rect.width, 00435 "cluster_text:%s start_byte_index:%d\n", 00436 cluster_text.c_str(), start_byte_index); 00437 ASSERT_HOST_MSG(cluster_rect.height, 00438 "cluster_text:%s start_byte_index:%d\n", 00439 cluster_text.c_str(), start_byte_index); 00440 if (box_padding_) { 00441 cluster_rect.x = max(0, cluster_rect.x - box_padding_); 00442 cluster_rect.width += 2 * box_padding_; 00443 cluster_rect.y = max(0, cluster_rect.y - box_padding_); 00444 cluster_rect.height += 2 * box_padding_; 00445 } 00446 if (add_ligatures_) { 00447 // Make sure the output box files have ligatured text in case the font 00448 // decided to use an unmapped glyph. 00449 cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, NULL); 00450 } 00451 BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size()); 00452 boxchar->set_page(page_); 00453 boxchar->AddBox(cluster_rect.x, cluster_rect.y, 00454 cluster_rect.width, cluster_rect.height); 00455 start_byte_to_box[start_byte_index] = boxchar; 00456 } while (pango_layout_iter_next_cluster(cluster_iter)); 00457 pango_layout_iter_free(cluster_iter); 00458 00459 // There is a subtle bug in the cluster text reported by the PangoLayoutIter 00460 // on ligatured characters (eg. The word "Lam-Aliph" in arabic). To work 00461 // around this, we use text reported using the PangoGlyphIter which is 00462 // accurate. 00463 // TODO(ranjith): Revisit whether this is still needed in newer versions of 00464 // pango. 00465 vector<string> cluster_text; 00466 if (GetClusterStrings(&cluster_text)) { 00467 ASSERT_HOST(cluster_text.size() == start_byte_to_box.size()); 00468 int ind = 0; 00469 for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin(); 00470 it != start_byte_to_box.end(); ++it, ++ind) { 00471 it->second->mutable_ch()->swap(cluster_text[ind]); 00472 } 00473 } 00474 00475 // Append to the boxchars list in byte order. 00476 vector<BoxChar*> page_boxchars; 00477 page_boxchars.reserve(start_byte_to_box.size()); 00478 string last_ch; 00479 for (map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin(); 00480 it != start_byte_to_box.end(); ++it) { 00481 if (it->second->ch() == kWordJoinerUTF8) { 00482 // Skip zero-width joiner characters (ZWJs) here. 00483 delete it->second; 00484 } else { 00485 page_boxchars.push_back(it->second); 00486 } 00487 } 00488 CorrectBoxPositionsToLayout(&page_boxchars); 00489 00490 if (render_fullwidth_latin_) { 00491 for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin(); 00492 it != start_byte_to_box.end(); ++it) { 00493 // Convert fullwidth Latin characters to their halfwidth forms. 00494 string half(ConvertFullwidthLatinToBasicLatin(it->second->ch())); 00495 it->second->mutable_ch()->swap(half); 00496 } 00497 } 00498 00499 // Merge the character boxes into word boxes if we are rendering n-grams. 00500 if (output_word_boxes_) { 00501 MergeBoxCharsToWords(&page_boxchars); 00502 } 00503 00504 boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end()); 00505 00506 // Compute the page bounding box 00507 Box* page_box = NULL; 00508 Boxa* all_boxes = NULL; 00509 for (int i = 0; i < page_boxchars.size(); ++i) { 00510 if (page_boxchars[i]->box() == NULL) continue; 00511 if (all_boxes == NULL) 00512 all_boxes = boxaCreate(0); 00513 boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE); 00514 } 00515 boxaGetExtent(all_boxes, NULL, NULL, &page_box); 00516 boxaDestroy(&all_boxes); 00517 if (page_boxes_ == NULL) 00518 page_boxes_ = boxaCreate(0); 00519 boxaAddBox(page_boxes_, page_box, L_INSERT); 00520 } 00521 00522 00523 void StringRenderer::CorrectBoxPositionsToLayout(vector<BoxChar*>* boxchars) { 00524 if (vertical_text_) { 00525 const double rotation = - pango_gravity_to_rotation( 00526 pango_context_get_base_gravity(pango_layout_get_context(layout_))); 00527 BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars); 00528 BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_, 00529 0, boxchars->size(), boxchars); 00530 } else { 00531 BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars); 00532 } 00533 } 00534 00535 int StringRenderer::StripUnrenderableWords(string* utf8_text) const { 00536 string output_text; 00537 const char* text = utf8_text->c_str(); 00538 int offset = 0; 00539 int num_dropped = 0; 00540 while (offset < utf8_text->length()) { 00541 int space_len = SpanUTF8Whitespace(text + offset); 00542 output_text.append(text + offset, space_len); 00543 offset += space_len; 00544 if (offset == utf8_text->length()) break; 00545 00546 int word_len = SpanUTF8NotWhitespace(text + offset); 00547 if (font_.CanRenderString(text + offset, word_len)) { 00548 output_text.append(text + offset, word_len); 00549 } else { 00550 ++num_dropped; 00551 } 00552 offset += word_len; 00553 } 00554 utf8_text->swap(output_text); 00555 00556 if (num_dropped > 0) { 00557 tprintf("Stripped %d unrenderable words\n", num_dropped); 00558 } 00559 return num_dropped; 00560 } 00561 00562 int StringRenderer::RenderToBinaryImage(const char* text, int text_length, 00563 int threshold, Pix** pix) { 00564 Pix *orig_pix = NULL; 00565 int offset = RenderToImage(text, text_length, &orig_pix); 00566 if (orig_pix) { 00567 Pix* gray_pix = pixConvertTo8(orig_pix, false); 00568 pixDestroy(&orig_pix); 00569 *pix = pixThresholdToBinary(gray_pix, threshold); 00570 pixDestroy(&gray_pix); 00571 } else { 00572 *pix = orig_pix; 00573 } 00574 return offset; 00575 } 00576 00577 // Add word joiner (WJ) characters between adjacent non-space characters except 00578 // immediately before a combiner. 00579 /* static */ 00580 string StringRenderer::InsertWordJoiners(const string& text) { 00581 string out_str; 00582 const UNICHAR::const_iterator it_end = UNICHAR::end(text.c_str(), 00583 text.length()); 00584 for (UNICHAR::const_iterator it = UNICHAR::begin(text.c_str(), text.length()); 00585 it != it_end; ++it) { 00586 // Add the symbol to the output string. 00587 out_str.append(it.utf8_data(), it.utf8_len()); 00588 // Check the next symbol. 00589 UNICHAR::const_iterator next_it = it; 00590 ++next_it; 00591 bool next_char_is_boundary = (next_it == it_end || *next_it == ' '); 00592 bool next_char_is_combiner = (next_it == it_end) ? 00593 false : IsCombiner(*next_it); 00594 if (*it != ' ' && *it != '\n' && !next_char_is_boundary && 00595 !next_char_is_combiner) { 00596 out_str += kWordJoinerUTF8; 00597 } 00598 } 00599 return out_str; 00600 } 00601 00602 // Convert halfwidth Basic Latin characters to their fullwidth forms. 00603 string StringRenderer::ConvertBasicLatinToFullwidthLatin(const string& str) { 00604 string full_str; 00605 const UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), 00606 str.length()); 00607 for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); 00608 it != it_end; ++it) { 00609 // Convert printable and non-space 7-bit ASCII characters to 00610 // their fullwidth forms. 00611 if (IsInterchangeValid7BitAscii(*it) && isprint(*it) && !isspace(*it)) { 00612 // Convert by adding 0xFEE0 to the codepoint of 7-bit ASCII. 00613 char32 full_char = *it + 0xFEE0; 00614 full_str.append(EncodeAsUTF8(full_char)); 00615 } else { 00616 full_str.append(it.utf8_data(), it.utf8_len()); 00617 } 00618 } 00619 return full_str; 00620 } 00621 00622 // Convert fullwidth Latin characters to their halfwidth forms. 00623 string StringRenderer::ConvertFullwidthLatinToBasicLatin(const string& str) { 00624 string half_str; 00625 UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length()); 00626 for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length()); 00627 it != it_end; ++it) { 00628 char32 half_char = FullwidthToHalfwidth(*it); 00629 // Convert fullwidth Latin characters to their halfwidth forms 00630 // only if halfwidth forms are printable and non-space 7-bit ASCII. 00631 if (IsInterchangeValid7BitAscii(half_char) && 00632 isprint(half_char) && !isspace(half_char)) { 00633 half_str.append(EncodeAsUTF8(half_char)); 00634 } else { 00635 half_str.append(it.utf8_data(), it.utf8_len()); 00636 } 00637 } 00638 return half_str; 00639 } 00640 00641 // Returns offset to end of text substring rendered in this method. 00642 int StringRenderer::RenderToImage(const char* text, int text_length, 00643 Pix** pix) { 00644 if (pix && *pix) pixDestroy(pix); 00645 InitPangoCairo(); 00646 00647 const int page_offset = FindFirstPageBreakOffset(text, text_length); 00648 if (!page_offset) { 00649 return 0; 00650 } 00651 start_box_ = boxchars_.size(); 00652 00653 if (!vertical_text_) { 00654 // Translate by the specified margin 00655 cairo_translate(cr_, h_margin_, v_margin_); 00656 } else { 00657 // Vertical text rendering is achieved by a two-step process of first 00658 // performing regular horizontal layout with character orientation set to 00659 // EAST, and then translating and rotating the layout before rendering onto 00660 // the desired image surface. The settings required for the former step are 00661 // done within InitPangoCairo(). 00662 // 00663 // Translate to the top-right margin of page 00664 cairo_translate(cr_, page_width_ - h_margin_, v_margin_); 00665 // Rotate the layout 00666 double rotation = - pango_gravity_to_rotation( 00667 pango_context_get_base_gravity(pango_layout_get_context(layout_))); 00668 tlog(2, "Rotating by %f radians\n", rotation); 00669 cairo_rotate(cr_, rotation); 00670 pango_cairo_update_layout(cr_, layout_); 00671 } 00672 string page_text(text, page_offset); 00673 if (render_fullwidth_latin_) { 00674 // Convert Basic Latin to their fullwidth forms. 00675 page_text = ConvertBasicLatinToFullwidthLatin(page_text); 00676 } 00677 if (strip_unrenderable_words_) { 00678 StripUnrenderableWords(&page_text); 00679 } 00680 if (drop_uncovered_chars_ && 00681 !font_.CoversUTF8Text(page_text.c_str(), page_text.length())) { 00682 int num_dropped = font_.DropUncoveredChars(&page_text); 00683 if (num_dropped) { 00684 tprintf("WARNING: Dropped %d uncovered characters\n", num_dropped); 00685 } 00686 } 00687 if (add_ligatures_) { 00688 // Add ligatures wherever possible, including custom ligatures. 00689 page_text = LigatureTable::Get()->AddLigatures(page_text, &font_); 00690 } 00691 00692 pango_layout_set_text(layout_, page_text.c_str(), page_text.length()); 00693 00694 if (pix) { 00695 // Set a white background for the target image surface. 00696 cairo_set_source_rgb(cr_, 1.0, 1.0, 1.0); // sets drawing colour to white 00697 // Fill the surface with the active colour (if you don't do this, you will 00698 // be given a surface with a transparent background to draw on) 00699 cairo_paint(cr_); 00700 // Set the ink color to black 00701 cairo_set_source_rgb(cr_, pen_color_[0], pen_color_[1], pen_color_[2]); 00702 // If the target surface or transformation properties of the cairo instance 00703 // have changed, update the pango layout to reflect this 00704 pango_cairo_update_layout(cr_, layout_); 00705 // Draw the pango layout onto the cairo surface 00706 pango_cairo_show_layout(cr_, layout_); 00707 *pix = CairoARGB32ToPixFormat(surface_); 00708 } 00709 ComputeClusterBoxes(); 00710 FreePangoCairo(); 00711 // Update internal state variables. 00712 ++page_; 00713 return page_offset; 00714 } 00715 00716 // Render a string to an image, returning it as an 8 bit pix. Behaves as 00717 // RenderString, except that it ignores the font set at construction and works 00718 // through all the fonts, returning 0 until they are exhausted, at which point 00719 // it returns the value it should have returned all along, but no pix this time. 00720 // Fonts that don't contain a large proportion of the characters in the string 00721 // get skipped. 00722 // Fonts that work each get rendered and the font name gets added 00723 // to the image. 00724 // NOTE that no boxes are produced by this function. 00725 // 00726 // Example usage: To render a null terminated char-array "txt" 00727 // 00728 // int offset = 0; 00729 // do { 00730 // Pix *pix; 00731 // offset += renderer.RenderAllFontsToImage(txt + offset, 00732 // strlen(txt + offset), NULL, &pix); 00733 // ... 00734 // } while (offset < strlen(text)); 00735 // 00736 int StringRenderer::RenderAllFontsToImage(const char* text, int text_length, 00737 string* font_used, Pix** image) { 00738 // Select a suitable font to render the title with. 00739 const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%"; 00740 string title_font; 00741 if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate), 00742 &title_font, NULL)) { 00743 tprintf("WARNING: Could not find a font to render image title with!\n"); 00744 title_font = "Arial"; 00745 } 00746 title_font += " 8"; 00747 tlog(1, "Selected title font: %s\n", title_font.c_str()); 00748 if (font_used) font_used->clear(); 00749 00750 string orig_font = font_.DescriptionName(); 00751 if (char_map_.empty()) { 00752 total_chars_ = 0; 00753 // Fill the hash table and use that for computing which fonts to use. 00754 for (UNICHAR::const_iterator it = UNICHAR::begin(text, text_length); 00755 it != UNICHAR::end(text, text_length); ++it) { 00756 ++total_chars_; 00757 ++char_map_[*it]; 00758 } 00759 tprintf("Total chars = %d\n", total_chars_); 00760 } 00761 const vector<string>& all_fonts = FontUtils::ListAvailableFonts(); 00762 for (int i = font_index_; i < all_fonts.size(); ++i) { 00763 ++font_index_; 00764 int raw_score = 0; 00765 int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score, 00766 NULL); 00767 if (ok_chars > 0 && ok_chars == total_chars_) { 00768 set_font(all_fonts[i]); 00769 int offset = RenderToBinaryImage(text, text_length, 128, image); 00770 ClearBoxes(); // Get rid of them as they are garbage. 00771 const int kMaxTitleLength = 1024; 00772 char title[kMaxTitleLength]; 00773 snprintf(title, kMaxTitleLength, kTitleTemplate, 00774 all_fonts[i].c_str(), ok_chars, 00775 100.0 * ok_chars / total_chars_, raw_score, 00776 100.0 * raw_score / char_map_.size()); 00777 tprintf("%s\n", title); 00778 // This is a good font! Store the offset to return once we've tried all 00779 // the fonts. 00780 if (offset) { 00781 last_offset_ = offset; 00782 if (font_used) *font_used = all_fonts[i]; 00783 } 00784 // Add the font to the image. 00785 set_font(title_font); 00786 v_margin_ /= 8; 00787 Pix* title_image = NULL; 00788 RenderToBinaryImage(title, strlen(title), 128, &title_image); 00789 pixOr(*image, *image, title_image); 00790 pixDestroy(&title_image); 00791 00792 v_margin_ *= 8; 00793 set_font(orig_font); 00794 // We return the real offset only after cycling through the list of fonts. 00795 return 0; 00796 } else { 00797 tprintf("Font %s failed with %d hits = %.2f%%\n", 00798 all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_); 00799 } 00800 } 00801 *image = NULL; 00802 font_index_ = 0; 00803 char_map_.clear(); 00804 return last_offset_; 00805 } 00806 00807 } // namespace tesseract