tesseract  3.03
tesseract::StringRenderer Class Reference

#include <stringrenderer.h>

List of all members.

Public Member Functions

 StringRenderer (const string &font_desc, int page_width, int page_height)
 ~StringRenderer ()
int RenderToImage (const char *text, int text_length, Pix **pix)
int RenderToBinaryImage (const char *text, int text_length, int threshold, Pix **pix)
int RenderAllFontsToImage (const char *text, int text_length, string *font_used, Pix **pix)
bool set_font (const string &desc)
void set_char_spacing (double char_spacing)
void set_leading (int leading)
void set_resolution (const int resolution)
void set_vertical_text (bool vertical_text)
void set_gravity_hint_strong (bool gravity_hint_strong)
void set_render_fullwidth_latin (bool render_fullwidth_latin)
void set_page (int page)
void set_box_padding (int val)
void set_drop_uncovered_chars (bool val)
void set_strip_unrenderable_words (bool val)
void set_output_word_boxes (bool val)
void set_add_ligatures (bool add_ligatures)
void set_pen_color (double r, double g, double b)
void set_h_margin (const int h_margin)
void set_v_margin (const int v_margin)
const PangoFontInfofont () const
const int h_margin () const
const int v_margin () const
const vector< BoxChar * > & GetBoxes () const
Boxa * GetPageBoxes () const
void RotatePageBoxes (float rotation)
void ClearBoxes ()
void WriteAllBoxes (const string &filename) const
int StripUnrenderableWords (string *utf8_text) const

Static Public Member Functions

static string InsertWordJoiners (const string &text)
static string ConvertBasicLatinToFullwidthLatin (const string &text)
static string ConvertFullwidthLatinToBasicLatin (const string &text)

Protected Member Functions

void InitPangoCairo ()
void SetLayoutProperties ()
void FreePangoCairo ()
void ComputeClusterBoxes ()
void CorrectBoxPositionsToLayout (vector< BoxChar * > *boxchars)
bool GetClusterStrings (vector< string > *cluster_text)
int FindFirstPageBreakOffset (const char *text, int text_length)

Protected Attributes

PangoFontInfo font_
int page_width_
int page_height_
int h_margin_
int v_margin_
int pen_color_ [3]
double char_spacing_
int leading_
int resolution_
bool vertical_text_
bool gravity_hint_strong_
bool render_fullwidth_latin_
bool drop_uncovered_chars_
bool strip_unrenderable_words_
bool add_ligatures_
bool output_word_boxes_
cairo_surface_t * surface_
cairo_t * cr_
PangoLayout * layout_
int start_box_
int page_
vector< BoxChar * > boxchars_
int box_padding_
Boxa * page_boxes_
hash_map< char32, inT64char_map_
int total_chars_
int font_index_
int last_offset_

Detailed Description

Definition at line 48 of file stringrenderer.h.


Constructor & Destructor Documentation

tesseract::StringRenderer::StringRenderer ( const string &  font_desc,
int  page_width,
int  page_height 
)

Definition at line 96 of file stringrenderer.cpp.

                                                :
    page_width_(page_width), page_height_(page_height),
    h_margin_(50), v_margin_(50), char_spacing_(0), leading_(0),
    vertical_text_(false), gravity_hint_strong_(false),
    render_fullwidth_latin_(false) ,drop_uncovered_chars_(true),
    strip_unrenderable_words_(false), add_ligatures_(false),
    output_word_boxes_(false), surface_(NULL), cr_(NULL),
    layout_(NULL), start_box_(0), page_(0), box_padding_(0),
    total_chars_(0), font_index_(0), last_offset_(0) {
  pen_color_[0] = 0.0;
  pen_color_[1] = 0.0;
  pen_color_[2] = 0.0;
  set_font(font_desc);
  set_resolution(kDefaultOutputResolution);
  page_boxes_ = NULL;
}

Member Function Documentation

Definition at line 261 of file stringrenderer.cpp.

                                {
  for (int i = 0; i < boxchars_.size(); ++i)
    delete boxchars_[i];
  boxchars_.clear();
  boxaDestroy(&page_boxes_);
}

Definition at line 380 of file stringrenderer.cpp.

                                         {
  const char* text = pango_layout_get_text(layout_);
  PangoLayoutIter* cluster_iter = pango_layout_get_iter(layout_);

  // Do a first pass to store cluster start indexes.
  vector<int> cluster_start_indices;
  do {
    cluster_start_indices.push_back(pango_layout_iter_get_index(cluster_iter));
    tlog(3, "Added %d\n", cluster_start_indices.back());
  } while (pango_layout_iter_next_cluster(cluster_iter));
  pango_layout_iter_free(cluster_iter);
  cluster_start_indices.push_back(strlen(text));
  tlog(3, "Added last index %d\n", cluster_start_indices.back());
  // Sort the indices and create a map from start to end indices.
  sort(cluster_start_indices.begin(), cluster_start_indices.end());
  map<int, int> cluster_start_to_end_index;
  for (int i = 0; i < cluster_start_indices.size() - 1; ++i) {
    cluster_start_to_end_index[cluster_start_indices[i]]
        = cluster_start_indices[i + 1];
  }

  // Iterate again to compute cluster boxes and their text with the obtained
  // cluster extent information.
  cluster_iter = pango_layout_get_iter(layout_);
  // Store BoxChars* sorted by their byte start positions
  map<int, BoxChar*> start_byte_to_box;
  do {
    PangoRectangle cluster_rect;
    pango_layout_iter_get_cluster_extents(cluster_iter, &cluster_rect,
                                          NULL);
    pango_extents_to_pixels(&cluster_rect, NULL);
    const int start_byte_index = pango_layout_iter_get_index(cluster_iter);
    const int end_byte_index = cluster_start_to_end_index[start_byte_index];
    string cluster_text = string(text + start_byte_index,
                                 end_byte_index - start_byte_index);
    if (cluster_text.size() && cluster_text[0] == '\n') {
      tlog(2, "Skipping newlines at start of text.\n");
      continue;
    }
    if (!cluster_rect.width || !cluster_rect.height ||
        IsUTF8Whitespace(cluster_text.c_str())) {
      tlog(2, "Skipping whitespace with boxdim (%d,%d) '%s'\n",
           cluster_rect.width, cluster_rect.height, cluster_text.c_str());
      BoxChar* boxchar = new BoxChar(" ", 1);
      boxchar->set_page(page_);
      start_byte_to_box[start_byte_index] = boxchar;
      continue;
    }
    // Prepare a boxchar for addition at this byte position.
    tlog(2, "[%d %d], %d, %d : start_byte=%d end_byte=%d : '%s'\n",
         cluster_rect.x, cluster_rect.y,
         cluster_rect.width, cluster_rect.height,
         start_byte_index, end_byte_index,
         cluster_text.c_str());
    ASSERT_HOST_MSG(cluster_rect.width,
                    "cluster_text:%s  start_byte_index:%d\n",
                    cluster_text.c_str(), start_byte_index);
    ASSERT_HOST_MSG(cluster_rect.height,
                    "cluster_text:%s  start_byte_index:%d\n",
                    cluster_text.c_str(), start_byte_index);
    if (box_padding_) {
      cluster_rect.x = max(0, cluster_rect.x - box_padding_);
      cluster_rect.width += 2 * box_padding_;
      cluster_rect.y = max(0, cluster_rect.y - box_padding_);
      cluster_rect.height += 2 * box_padding_;
    }
    if (add_ligatures_) {
      // Make sure the output box files have ligatured text in case the font
      // decided to use an unmapped glyph.
      cluster_text = LigatureTable::Get()->AddLigatures(cluster_text, NULL);
    }
    BoxChar* boxchar = new BoxChar(cluster_text.c_str(), cluster_text.size());
    boxchar->set_page(page_);
    boxchar->AddBox(cluster_rect.x, cluster_rect.y,
                    cluster_rect.width, cluster_rect.height);
    start_byte_to_box[start_byte_index] = boxchar;
  } while (pango_layout_iter_next_cluster(cluster_iter));
  pango_layout_iter_free(cluster_iter);

  // There is a subtle bug in the cluster text reported by the PangoLayoutIter
  // on ligatured characters (eg. The word "Lam-Aliph" in arabic). To work
  // around this, we use text reported using the PangoGlyphIter which is
  // accurate.
  // TODO(ranjith): Revisit whether this is still needed in newer versions of
  // pango.
  vector<string> cluster_text;
  if (GetClusterStrings(&cluster_text)) {
    ASSERT_HOST(cluster_text.size() == start_byte_to_box.size());
    int ind = 0;
    for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
         it != start_byte_to_box.end(); ++it, ++ind) {
      it->second->mutable_ch()->swap(cluster_text[ind]);
    }
  }

  // Append to the boxchars list in byte order.
  vector<BoxChar*> page_boxchars;
  page_boxchars.reserve(start_byte_to_box.size());
  string last_ch;
  for (map<int, BoxChar*>::const_iterator it = start_byte_to_box.begin();
       it != start_byte_to_box.end(); ++it) {
    if (it->second->ch() == kWordJoinerUTF8) {
      // Skip zero-width joiner characters (ZWJs) here.
      delete it->second;
    } else {
      page_boxchars.push_back(it->second);
    }
  }
  CorrectBoxPositionsToLayout(&page_boxchars);

  if (render_fullwidth_latin_) {
    for (map<int, BoxChar*>::iterator it = start_byte_to_box.begin();
         it != start_byte_to_box.end(); ++it) {
      // Convert fullwidth Latin characters to their halfwidth forms.
      string half(ConvertFullwidthLatinToBasicLatin(it->second->ch()));
      it->second->mutable_ch()->swap(half);
    }
  }

  // Merge the character boxes into word boxes if we are rendering n-grams.
  if (output_word_boxes_) {
    MergeBoxCharsToWords(&page_boxchars);
  }

  boxchars_.insert(boxchars_.end(), page_boxchars.begin(), page_boxchars.end());

  // Compute the page bounding box
  Box* page_box = NULL;
  Boxa* all_boxes = NULL;
  for (int i = 0; i < page_boxchars.size(); ++i) {
    if (page_boxchars[i]->box() == NULL) continue;
    if (all_boxes == NULL)
      all_boxes = boxaCreate(0);
    boxaAddBox(all_boxes, page_boxchars[i]->mutable_box(), L_CLONE);
  }
  boxaGetExtent(all_boxes, NULL, NULL, &page_box);
  boxaDestroy(&all_boxes);
  if (page_boxes_ == NULL)
    page_boxes_ = boxaCreate(0);
  boxaAddBox(page_boxes_, page_box, L_INSERT);
}
string tesseract::StringRenderer::ConvertBasicLatinToFullwidthLatin ( const string &  text) [static]

Definition at line 603 of file stringrenderer.cpp.

                                                                          {
  string full_str;
  const UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(),
                                                      str.length());
  for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length());
       it != it_end; ++it) {
    // Convert printable and non-space 7-bit ASCII characters to
    // their fullwidth forms.
    if (IsInterchangeValid7BitAscii(*it) && isprint(*it) && !isspace(*it)) {
      // Convert by adding 0xFEE0 to the codepoint of 7-bit ASCII.
      char32 full_char = *it + 0xFEE0;
      full_str.append(EncodeAsUTF8(full_char));
    } else {
      full_str.append(it.utf8_data(), it.utf8_len());
    }
  }
  return full_str;
}
string tesseract::StringRenderer::ConvertFullwidthLatinToBasicLatin ( const string &  text) [static]

Definition at line 623 of file stringrenderer.cpp.

                                                                          {
  string half_str;
  UNICHAR::const_iterator it_end = UNICHAR::end(str.c_str(), str.length());
  for (UNICHAR::const_iterator it = UNICHAR::begin(str.c_str(), str.length());
       it != it_end; ++it) {
    char32 half_char = FullwidthToHalfwidth(*it);
    // Convert fullwidth Latin characters to their halfwidth forms
    // only if halfwidth forms are printable and non-space 7-bit ASCII.
    if (IsInterchangeValid7BitAscii(half_char) &&
        isprint(half_char) && !isspace(half_char)) {
      half_str.append(EncodeAsUTF8(half_char));
    } else {
      half_str.append(it.utf8_data(), it.utf8_len());
    }
  }
  return half_str;
}
void tesseract::StringRenderer::CorrectBoxPositionsToLayout ( vector< BoxChar * > *  boxchars) [protected]

Definition at line 523 of file stringrenderer.cpp.

                                                                           {
  if (vertical_text_) {
    const double rotation = - pango_gravity_to_rotation(
        pango_context_get_base_gravity(pango_layout_get_context(layout_)));
    BoxChar::TranslateBoxes(page_width_ - h_margin_, v_margin_, boxchars);
    BoxChar::RotateBoxes(rotation, page_width_ - h_margin_, v_margin_,
                         0, boxchars->size(), boxchars);
  } else {
    BoxChar::TranslateBoxes(h_margin_, v_margin_, boxchars);
  }
}
int tesseract::StringRenderer::FindFirstPageBreakOffset ( const char *  text,
int  text_length 
) [protected]

Definition at line 203 of file stringrenderer.cpp.

                                                              {
  if (!text_length) return 0;
  const int max_height = (page_height_ - 2 * v_margin_);
  const int max_width = (page_width_ - 2 * h_margin_);
  const int max_layout_height = vertical_text_ ? max_width : max_height;

  UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
  const UNICHAR::const_iterator it_end = UNICHAR::end(text, text_length);
  const int kMaxUnicodeBufLength = 15000;
  for (int i = 0; i < kMaxUnicodeBufLength && it != it_end; ++it, ++i);
  int buf_length = it.utf8_data() - text;
  tlog(1, "len = %d  buf_len = %d\n", text_length, buf_length);
  pango_layout_set_text(layout_, text, buf_length);

  PangoLayoutIter* line_iter = NULL;
  { // Fontconfig caches some info here that is not freed before exit.
    DISABLE_HEAP_LEAK_CHECK;
    line_iter = pango_layout_get_iter(layout_);
  }
  bool first_page = true;
  int page_top = 0;
  int offset = buf_length;
  do {
    // Get bounding box of the current line
    PangoRectangle line_ink_rect;
    pango_layout_iter_get_line_extents(line_iter, &line_ink_rect, NULL);
    pango_extents_to_pixels(&line_ink_rect, NULL);
    PangoLayoutLine* line = pango_layout_iter_get_line_readonly(line_iter);
    if (first_page) {
      page_top = line_ink_rect.y;
      first_page = false;
    }
    int line_bottom = line_ink_rect.y + line_ink_rect.height;
    if (line_bottom - page_top > max_layout_height) {
      offset = line->start_index;
      tlog(1, "Found offset = %d\n", offset);
      break;
    }
  } while (pango_layout_iter_next_line(line_iter));
  pango_layout_iter_free(line_iter);
  return offset;
}

Definition at line 114 of file stringrenderer.h.

                                    {
    return font_;
  }

Definition at line 186 of file stringrenderer.cpp.

                                    {
  if (layout_) {
    g_object_unref(layout_);
    layout_ = NULL;
  }
  if (cr_) {
    cairo_destroy(cr_);
    cr_ = NULL;
  }
  if (surface_) {
    cairo_surface_destroy(surface_);
    surface_ = NULL;
  }
}
const vector< BoxChar * > & tesseract::StringRenderer::GetBoxes ( ) const

Definition at line 247 of file stringrenderer.cpp.

                                                       {
    return boxchars_;
}
bool tesseract::StringRenderer::GetClusterStrings ( vector< string > *  cluster_text) [protected]

Definition at line 273 of file stringrenderer.cpp.

                                                                   {
  map<int, string> start_byte_to_text;
  PangoLayoutIter* run_iter = pango_layout_get_iter(layout_);
  const char* full_text = pango_layout_get_text(layout_);
  do {
    PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
    if (!run) {
      // End of line NULL run marker
      tlog(2, "Found end of line marker\n");
      continue;
    }
    PangoGlyphItemIter cluster_iter;
    gboolean have_cluster;
    for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
                                                          run, full_text);
         have_cluster;
         have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
      const int start_byte_index = cluster_iter.start_index;
      const int end_byte_index = cluster_iter.end_index;
      string text = string(full_text + start_byte_index,
                           end_byte_index - start_byte_index);
      if (IsUTF8Whitespace(text.c_str())) {
        tlog(2, "Found whitespace\n");
        text = " ";
      }
      tlog(2, "start_byte=%d end_byte=%d : '%s'\n", start_byte_index,
           end_byte_index, text.c_str());
      if (add_ligatures_) {
        // Make sure the output box files have ligatured text in case the font
        // decided to use an unmapped glyph.
        text = LigatureTable::Get()->AddLigatures(text, NULL);
      }
      start_byte_to_text[start_byte_index] = text;
    }
  } while (pango_layout_iter_next_run(run_iter));
  pango_layout_iter_free(run_iter);

  cluster_text->clear();
  for (map<int, string>::const_iterator it = start_byte_to_text.begin();
       it != start_byte_to_text.end(); ++it) {
    cluster_text->push_back(it->second);
  }
  return cluster_text->size();
}

Definition at line 251 of file stringrenderer.cpp.

                                         {
    return page_boxes_;
}
const int tesseract::StringRenderer::h_margin ( ) const [inline]

Definition at line 117 of file stringrenderer.h.

                             {
    return h_margin_;
  }

Definition at line 130 of file stringrenderer.cpp.

                                    {
  FreePangoCairo();
  surface_ = cairo_image_surface_create(CAIRO_FORMAT_ARGB32, page_width_,
                                        page_height_);
  cr_ = cairo_create(surface_);
  layout_ = pango_cairo_create_layout(cr_);

  if (vertical_text_) {
    PangoContext* context = pango_layout_get_context(layout_);
    pango_context_set_base_gravity(context, PANGO_GRAVITY_EAST);
    if (gravity_hint_strong_) {
      pango_context_set_gravity_hint(context, PANGO_GRAVITY_HINT_STRONG);
    }
    pango_layout_context_changed(layout_);
  }

  SetLayoutProperties();
}
string tesseract::StringRenderer::InsertWordJoiners ( const string &  text) [static]

Definition at line 580 of file stringrenderer.cpp.

                                                           {
  string out_str;
  const UNICHAR::const_iterator it_end = UNICHAR::end(text.c_str(),
                                                      text.length());
  for (UNICHAR::const_iterator it = UNICHAR::begin(text.c_str(), text.length());
       it != it_end; ++it) {
    // Add the symbol to the output string.
    out_str.append(it.utf8_data(), it.utf8_len());
    // Check the next symbol.
    UNICHAR::const_iterator next_it = it;
    ++next_it;
    bool next_char_is_boundary = (next_it == it_end || *next_it == ' ');
    bool next_char_is_combiner = (next_it == it_end) ?
        false : IsCombiner(*next_it);
    if (*it != ' ' && *it != '\n' && !next_char_is_boundary &&
        !next_char_is_combiner) {
      out_str += kWordJoinerUTF8;
    }
  }
  return out_str;
}
int tesseract::StringRenderer::RenderAllFontsToImage ( const char *  text,
int  text_length,
string *  font_used,
Pix **  pix 
)

Definition at line 736 of file stringrenderer.cpp.

                                                                          {
  // Select a suitable font to render the title with.
  const char kTitleTemplate[] = "%s : %d hits = %.2f%%, raw = %d = %.2f%%";
  string title_font;
  if (!FontUtils::SelectFont(kTitleTemplate, strlen(kTitleTemplate),
                             &title_font, NULL)) {
    tprintf("WARNING: Could not find a font to render image title with!\n");
    title_font = "Arial";
  }
  title_font += " 8";
  tlog(1, "Selected title font: %s\n", title_font.c_str());
  if (font_used) font_used->clear();

  string orig_font = font_.DescriptionName();
  if (char_map_.empty()) {
    total_chars_ = 0;
    // Fill the hash table and use that for computing which fonts to use.
    for (UNICHAR::const_iterator it = UNICHAR::begin(text, text_length);
         it != UNICHAR::end(text, text_length); ++it) {
      ++total_chars_;
      ++char_map_[*it];
    }
    tprintf("Total chars = %d\n", total_chars_);
  }
  const vector<string>& all_fonts = FontUtils::ListAvailableFonts();
  for (int i = font_index_; i < all_fonts.size(); ++i) {
    ++font_index_;
    int raw_score = 0;
    int ok_chars = FontUtils::FontScore(char_map_, all_fonts[i], &raw_score,
                                        NULL);
    if (ok_chars > 0 && ok_chars == total_chars_) {
      set_font(all_fonts[i]);
      int offset = RenderToBinaryImage(text, text_length, 128, image);
      ClearBoxes();  // Get rid of them as they are garbage.
      const int kMaxTitleLength = 1024;
      char title[kMaxTitleLength];
      snprintf(title, kMaxTitleLength, kTitleTemplate,
               all_fonts[i].c_str(), ok_chars,
               100.0 * ok_chars / total_chars_, raw_score,
               100.0 * raw_score / char_map_.size());
      tprintf("%s\n", title);
      // This is a good font! Store the offset to return once we've tried all
      // the fonts.
      if (offset) {
        last_offset_ = offset;
        if (font_used) *font_used = all_fonts[i];
      }
      // Add the font to the image.
      set_font(title_font);
      v_margin_ /= 8;
      Pix* title_image = NULL;
      RenderToBinaryImage(title, strlen(title), 128, &title_image);
      pixOr(*image, *image, title_image);
      pixDestroy(&title_image);

      v_margin_ *= 8;
      set_font(orig_font);
      // We return the real offset only after cycling through the list of fonts.
      return 0;
    } else {
      tprintf("Font %s failed with %d hits = %.2f%%\n",
              all_fonts[i].c_str(), ok_chars, 100.0 * ok_chars / total_chars_);
    }
  }
  *image = NULL;
  font_index_ = 0;
  char_map_.clear();
  return last_offset_;
}
int tesseract::StringRenderer::RenderToBinaryImage ( const char *  text,
int  text_length,
int  threshold,
Pix **  pix 
)

Definition at line 562 of file stringrenderer.cpp.

                                                                  {
  Pix *orig_pix = NULL;
  int offset = RenderToImage(text, text_length, &orig_pix);
  if (orig_pix) {
    Pix* gray_pix = pixConvertTo8(orig_pix, false);
    pixDestroy(&orig_pix);
    *pix = pixThresholdToBinary(gray_pix, threshold);
    pixDestroy(&gray_pix);
  } else {
    *pix = orig_pix;
  }
  return offset;
}
int tesseract::StringRenderer::RenderToImage ( const char *  text,
int  text_length,
Pix **  pix 
)

Definition at line 642 of file stringrenderer.cpp.

                                             {
  if (pix && *pix) pixDestroy(pix);
  InitPangoCairo();

  const int page_offset = FindFirstPageBreakOffset(text, text_length);
  if (!page_offset) {
    return 0;
  }
  start_box_ = boxchars_.size();

  if (!vertical_text_) {
    // Translate by the specified margin
    cairo_translate(cr_, h_margin_, v_margin_);
  } else {
    // Vertical text rendering is achieved by a two-step process of first
    // performing regular horizontal layout with character orientation set to
    // EAST, and then translating and rotating the layout before rendering onto
    // the desired image surface. The settings required for the former step are
    // done within InitPangoCairo().
    //
    // Translate to the top-right margin of page
    cairo_translate(cr_, page_width_ - h_margin_, v_margin_);
    // Rotate the layout
    double rotation = - pango_gravity_to_rotation(
        pango_context_get_base_gravity(pango_layout_get_context(layout_)));
    tlog(2, "Rotating by %f radians\n", rotation);
    cairo_rotate(cr_, rotation);
    pango_cairo_update_layout(cr_, layout_);
  }
  string page_text(text, page_offset);
  if (render_fullwidth_latin_) {
    // Convert Basic Latin to their fullwidth forms.
    page_text = ConvertBasicLatinToFullwidthLatin(page_text);
  }
  if (strip_unrenderable_words_) {
    StripUnrenderableWords(&page_text);
  }
  if (drop_uncovered_chars_ &&
      !font_.CoversUTF8Text(page_text.c_str(), page_text.length())) {
    int num_dropped = font_.DropUncoveredChars(&page_text);
    if (num_dropped) {
      tprintf("WARNING: Dropped %d uncovered characters\n", num_dropped);
    }
  }
  if (add_ligatures_) {
    // Add ligatures wherever possible, including custom ligatures.
    page_text = LigatureTable::Get()->AddLigatures(page_text, &font_);
  }

  pango_layout_set_text(layout_, page_text.c_str(), page_text.length());

  if (pix) {
    // Set a white background for the target image surface.
    cairo_set_source_rgb(cr_, 1.0, 1.0, 1.0);  // sets drawing colour to white
    // Fill the surface with the active colour (if you don't do this, you will
    // be given a surface with a transparent background to draw on)
    cairo_paint(cr_);
    // Set the ink color to black
    cairo_set_source_rgb(cr_, pen_color_[0], pen_color_[1], pen_color_[2]);
    // If the target surface or transformation properties of the cairo instance
    // have changed, update the pango layout to reflect this
    pango_cairo_update_layout(cr_, layout_);
    // Draw the pango layout onto the cairo surface
    pango_cairo_show_layout(cr_, layout_);
    *pix = CairoARGB32ToPixFormat(surface_);
  }
  ComputeClusterBoxes();
  FreePangoCairo();
  // Update internal state variables.
  ++page_;
  return page_offset;
}

Definition at line 255 of file stringrenderer.cpp.

void tesseract::StringRenderer::set_add_ligatures ( bool  add_ligatures) [inline]

Definition at line 99 of file stringrenderer.h.

                                             {
    add_ligatures_ = add_ligatures;
  }
void tesseract::StringRenderer::set_box_padding ( int  val) [inline]

Definition at line 84 of file stringrenderer.h.

                                {
    box_padding_ = val;
  }
void tesseract::StringRenderer::set_char_spacing ( double  char_spacing) [inline]

Definition at line 65 of file stringrenderer.h.

                                             {
    char_spacing_ = char_spacing;
  }

Definition at line 87 of file stringrenderer.h.

bool tesseract::StringRenderer::set_font ( const string &  desc)

Definition at line 114 of file stringrenderer.cpp.

                                                {
  bool success = font_.ParseFontDescriptionName(desc);
  font_.set_resolution(resolution_);
  return success;
}
void tesseract::StringRenderer::set_gravity_hint_strong ( bool  gravity_hint_strong) [inline]

Definition at line 75 of file stringrenderer.h.

                                                         {
    gravity_hint_strong_ = gravity_hint_strong;
  }
void tesseract::StringRenderer::set_h_margin ( const int  h_margin) [inline]

Definition at line 108 of file stringrenderer.h.

void tesseract::StringRenderer::set_leading ( int  leading) [inline]

Definition at line 68 of file stringrenderer.h.

                                {
    leading_ = leading;
  }

Definition at line 93 of file stringrenderer.h.

                                       {
    output_word_boxes_ = val;
  }
void tesseract::StringRenderer::set_page ( int  page) [inline]

Definition at line 81 of file stringrenderer.h.

                          {
    page_ = page;
  }
void tesseract::StringRenderer::set_pen_color ( double  r,
double  g,
double  b 
) [inline]

Definition at line 103 of file stringrenderer.h.

                                                   {
    pen_color_[0] = r;
    pen_color_[1] = g;
    pen_color_[2] = b;
  }
void tesseract::StringRenderer::set_render_fullwidth_latin ( bool  render_fullwidth_latin) [inline]

Definition at line 78 of file stringrenderer.h.

                                                               {
    render_fullwidth_latin_ = render_fullwidth_latin;
  }
void tesseract::StringRenderer::set_resolution ( const int  resolution)

Definition at line 120 of file stringrenderer.cpp.

                                                        {
  resolution_ = resolution;
  font_.set_resolution(resolution);
}

Definition at line 90 of file stringrenderer.h.

void tesseract::StringRenderer::set_v_margin ( const int  v_margin) [inline]

Definition at line 111 of file stringrenderer.h.

void tesseract::StringRenderer::set_vertical_text ( bool  vertical_text) [inline]

Definition at line 72 of file stringrenderer.h.

                                             {
    vertical_text_ = vertical_text;
  }

Definition at line 149 of file stringrenderer.cpp.

                                         {
  string font_desc = font_.DescriptionName();
  // Specify the font via a description name
  PangoFontDescription *desc =
      pango_font_description_from_string(font_desc.c_str());
  // Assign the font description to the layout
  pango_layout_set_font_description(layout_, desc);
  pango_font_description_free(desc);  // free the description
  pango_cairo_context_set_resolution(pango_layout_get_context(layout_),
                                     resolution_);

  int max_width = page_width_ - 2 * h_margin_;
  int max_height = page_height_ - 2 * v_margin_;
  tlog(3, "max_width = %d, max_height = %d\n", max_width, max_height);
  if (vertical_text_) {
    swap(max_width, max_height);
  }
  pango_layout_set_width(layout_, max_width * PANGO_SCALE);
  pango_layout_set_wrap(layout_, PANGO_WRAP_WORD);

  // Adjust character spacing
  PangoAttrList* attr_list = pango_attr_list_new();
  if (char_spacing_) {
    PangoAttribute* spacing_attr = pango_attr_letter_spacing_new(
        static_cast<int>(char_spacing_ * PANGO_SCALE + 0.5));
    spacing_attr->start_index = 0;
    spacing_attr->end_index = static_cast<guint>(-1);
    pango_attr_list_change(attr_list, spacing_attr);
  }
  pango_layout_set_attributes(layout_, attr_list);
  pango_attr_list_unref(attr_list);
  // Adjust line spacing
  if (leading_) {
    pango_layout_set_spacing(layout_, leading_ * PANGO_SCALE);
  }
}
int tesseract::StringRenderer::StripUnrenderableWords ( string *  utf8_text) const

Definition at line 535 of file stringrenderer.cpp.

                                                                  {
  string output_text;
  const char* text = utf8_text->c_str();
  int offset = 0;
  int num_dropped = 0;
  while (offset < utf8_text->length()) {
    int space_len = SpanUTF8Whitespace(text + offset);
    output_text.append(text + offset, space_len);
    offset += space_len;
    if (offset == utf8_text->length()) break;

    int word_len = SpanUTF8NotWhitespace(text + offset);
    if (font_.CanRenderString(text + offset, word_len)) {
      output_text.append(text + offset, word_len);
    } else {
      ++num_dropped;
    }
    offset += word_len;
  }
  utf8_text->swap(output_text);

  if (num_dropped > 0) {
    tprintf("Stripped %d unrenderable words\n", num_dropped);
  }
  return num_dropped;
}
const int tesseract::StringRenderer::v_margin ( ) const [inline]

Definition at line 120 of file stringrenderer.h.

                             {
    return v_margin_;
  }
void tesseract::StringRenderer::WriteAllBoxes ( const string &  filename) const

Member Data Documentation

Definition at line 174 of file stringrenderer.h.

Definition at line 187 of file stringrenderer.h.

Definition at line 186 of file stringrenderer.h.

Definition at line 192 of file stringrenderer.h.

Definition at line 166 of file stringrenderer.h.

cairo_t* tesseract::StringRenderer::cr_ [protected]

Definition at line 178 of file stringrenderer.h.

Definition at line 172 of file stringrenderer.h.

Definition at line 161 of file stringrenderer.h.

Definition at line 194 of file stringrenderer.h.

Definition at line 169 of file stringrenderer.h.

Definition at line 163 of file stringrenderer.h.

Definition at line 195 of file stringrenderer.h.

PangoLayout* tesseract::StringRenderer::layout_ [protected]

Definition at line 179 of file stringrenderer.h.

Definition at line 167 of file stringrenderer.h.

Definition at line 175 of file stringrenderer.h.

Definition at line 183 of file stringrenderer.h.

Definition at line 189 of file stringrenderer.h.

Definition at line 163 of file stringrenderer.h.

Definition at line 163 of file stringrenderer.h.

Definition at line 165 of file stringrenderer.h.

Definition at line 170 of file stringrenderer.h.

Definition at line 167 of file stringrenderer.h.

Definition at line 182 of file stringrenderer.h.

Definition at line 173 of file stringrenderer.h.

cairo_surface_t* tesseract::StringRenderer::surface_ [protected]

Definition at line 177 of file stringrenderer.h.

Definition at line 193 of file stringrenderer.h.

Definition at line 163 of file stringrenderer.h.

Definition at line 168 of file stringrenderer.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines