tesseract  3.03
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

List of all members.

Public Types

enum  FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE }

Public Member Functions

 PangoFontInfo ()
 PangoFontInfo (const string &name)
bool ParseFontDescriptionName (const string &name)
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
int DropUncoveredChars (string *utf8_text) const
bool CanRenderString (const char *utf8_word, int len, vector< string > *graphemes) const
bool CanRenderString (const char *utf8_word, int len) const
bool GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const
string DescriptionName () const
const string & family_name () const
const int font_size () const
const bool is_bold () const
const bool is_italic () const
const bool is_smallcaps () const
const bool is_monospace () const
const bool is_fraktur () const
const FontTypeEnum font_type () const
const int resolution () const
void set_resolution (const int resolution)

Friends

class FontUtils

Detailed Description

Definition at line 38 of file pango_font_info.h.


Member Enumeration Documentation

Enumerator:
UNKNOWN 
SERIF 
SANS_SERIF 
DECORATIVE 

Definition at line 40 of file pango_font_info.h.


Constructor & Destructor Documentation

Definition at line 79 of file pango_font_info.cpp.

                             : desc_(NULL), resolution_(kDefaultResolution) {
  Clear();
}
tesseract::PangoFontInfo::PangoFontInfo ( const string &  name) [explicit]

Definition at line 83 of file pango_font_info.cpp.

    : desc_(NULL), resolution_(kDefaultResolution) {
  if (!ParseFontDescriptionName(desc)) {
    tprintf("ERROR: Could not parse %s\n", desc.c_str());
    Clear();
  }
}

Member Function Documentation

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
vector< string > *  graphemes 
) const

Definition at line 341 of file pango_font_info.cpp.

                                                                     {
  if (graphemes) graphemes->clear();
  // We check for font coverage of the text first, as otherwise Pango could
  // (undesirably) fall back to another font that does have the required
  // coverage.
  if (!CoversUTF8Text(utf8_word, len)) {
    return false;
  }
  // U+25CC dotted circle character that often (but not always) gets rendered
  // when there is an illegal grapheme sequence.
  const char32 kDottedCircleGlyph = 9676;
  bool bad_glyph = false;
  PangoFontMap* font_map = pango_cairo_font_map_get_default();
  PangoContext* context = pango_context_new();
  pango_context_set_font_map(context, font_map);
  PangoLayout* layout = pango_layout_new(context);
  if (desc_) {
    pango_layout_set_font_description(layout, desc_);
  } else {
    PangoFontDescription *desc = pango_font_description_from_string(
        DescriptionName().c_str());
    pango_layout_set_font_description(layout, desc);
    pango_font_description_free(desc);
  }
  pango_layout_set_text(layout, utf8_word, len);
  PangoLayoutIter* run_iter = NULL;
  { // Fontconfig caches some information here that is not freed before exit.
    DISABLE_HEAP_LEAK_CHECK;
    run_iter = pango_layout_get_iter(layout);
  }
  do {
    PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
    if (!run) {
      tlog(2, "Found end of line NULL run marker\n");
      continue;
    }
    PangoGlyph dotted_circle_glyph;
    PangoFont* font = run->item->analysis.font;
    dotted_circle_glyph = pango_fc_font_get_glyph(
        reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
    if (TLOG_IS_ON(2)) {
      PangoFontDescription* desc = pango_font_describe(font);
      char* desc_str = pango_font_description_to_string(desc);
      tlog(2, "Desc of font in run: %s\n", desc_str);
      g_free(desc_str);
      pango_font_description_free(desc);
    }

    PangoGlyphItemIter cluster_iter;
    gboolean have_cluster;
    for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
                                                         run, utf8_word);
         have_cluster && !bad_glyph;
         have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
      const int start_byte_index = cluster_iter.start_index;
      const int end_byte_index = cluster_iter.end_index;
      int start_glyph_index = cluster_iter.start_glyph;
      int end_glyph_index = cluster_iter.end_glyph;
      string cluster_text = string(utf8_word + start_byte_index,
                                   end_byte_index - start_byte_index);
      if (graphemes) graphemes->push_back(cluster_text);
      if (IsUTF8Whitespace(cluster_text.c_str())) {
        tlog(2, "Skipping whitespace\n");
        continue;
      }
      if (TLOG_IS_ON(2)) {
        printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
               start_byte_index, end_byte_index,
               start_glyph_index, end_glyph_index);
      }
      for (int i = start_glyph_index,
               step = (end_glyph_index > start_glyph_index) ? 1 : -1;
           !bad_glyph && i != end_glyph_index; i+= step) {
        const bool unknown_glyph =
            (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
             PANGO_GLYPH_UNKNOWN_FLAG);
        const bool illegal_glyph =
            (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
             dotted_circle_glyph);
        bad_glyph = unknown_glyph || illegal_glyph;
        if (TLOG_IS_ON(2)) {
          printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
                 bad_glyph ? 1 : 0);
        }
      }
      if (TLOG_IS_ON(2)) {
        printf("  '%s'\n", cluster_text.c_str());
      }
      if (bad_glyph)
        tlog(1, "Found illegal glyph!\n");
    }
  } while (!bad_glyph && pango_layout_iter_next_run(run_iter));

  pango_layout_iter_free(run_iter);
  g_object_unref(context);
  g_object_unref(layout);
  if (bad_glyph && graphemes) graphemes->clear();
  return !bad_glyph;
}
bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 336 of file pango_font_info.cpp.

                                                                        {
  vector<string> graphemes;
  return CanRenderString(utf8_word, len, &graphemes);
}
bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 248 of file pango_font_info.cpp.

                                                                               {
  PangoFont* font = ToPangoFont();
  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
  for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
       it != UNICHAR::end(utf8_text, byte_length);
       ++it) {
    if (IsWhitespace(*it) || pango_is_zero_width(*it))
      continue;
    if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
      char tmp[5];
      int len = it.get_utf8(tmp);
      tmp[len] = '\0';
      tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
      return false;
    }
  }
  return true;
}

Definition at line 105 of file pango_font_info.cpp.

                                            {
  if (!desc_) return "";
  char* desc_str = pango_font_description_to_string(desc_);
  string desc_name(desc_str);
  g_free(desc_str);
  return desc_name;
}
int tesseract::PangoFontInfo::DropUncoveredChars ( string *  utf8_text) const

Definition at line 267 of file pango_font_info.cpp.

                                                             {
  PangoFont* font = ToPangoFont();
  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);
  int num_dropped_chars = 0;
  // Maintain two iterators that point into the string. For space efficiency, we
  // will repeatedly copy one covered UTF8 character from one to the other, and
  // at the end resize the string to the right length.
  char* out = const_cast<char*>(utf8_text->c_str());
  const UNICHAR::const_iterator it_begin =
      UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
  const UNICHAR::const_iterator it_end =
      UNICHAR::end(utf8_text->c_str(), utf8_text->length());
  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
    if (!IsWhitespace(*it) && !pango_is_zero_width(*it) &&
        pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
      if (TLOG_IS_ON(2)) {
        char tmp[5];
        int len = it.get_utf8(tmp);
        tmp[len] = '\0';
        tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
      }
      ++num_dropped_chars;
      continue;
    }
    strncpy(out, it.utf8_data(), it.utf8_len());
    out += it.utf8_len();
  }
  utf8_text->resize(out - utf8_text->c_str());
  return num_dropped_chars;
}
const string& tesseract::PangoFontInfo::family_name ( ) const [inline]

Definition at line 89 of file pango_font_info.h.

{ return family_name_; }
const int tesseract::PangoFontInfo::font_size ( ) const [inline]

Definition at line 91 of file pango_font_info.h.

{ return font_size_; }

Definition at line 97 of file pango_font_info.h.

{ return font_type_; }
bool tesseract::PangoFontInfo::GetSpacingProperties ( const string &  utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 298 of file pango_font_info.cpp.

                                                                               {
  // Convert to equivalent PangoFont structure
  PangoFont* font = ToPangoFont();
  // Find the glyph index in the font for the supplied utf8 character.
  int total_advance = 0;
  int min_bearing = 0;
  // Handle multi-unicode strings by reporting the left-most position of the
  // x-bearing, and right-most position of the x-advance if the string were to
  // be rendered.
  const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
                                                          utf8_char.length());
  const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
                                                      utf8_char.length());
  for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
    PangoGlyph glyph_index = pango_fc_font_get_glyph(
        reinterpret_cast<PangoFcFont*>(font), *it);
    if (!glyph_index) {
      // Glyph for given unicode character doesn't exist in font.
      return false;
    }
    // Find the ink glyph extents for the glyph
    PangoRectangle ink_rect, logical_rect;
    pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
    pango_extents_to_pixels(&ink_rect, NULL);
    pango_extents_to_pixels(&logical_rect, NULL);

    int bearing = total_advance + PANGO_LBEARING(ink_rect);
    if (it == it_begin || bearing < min_bearing) {
      min_bearing = bearing;
    }
    total_advance += PANGO_RBEARING(logical_rect);
  }
  *x_bearing = min_bearing;
  *x_advance = total_advance;
  return true;
}
const bool tesseract::PangoFontInfo::is_bold ( ) const [inline]

Definition at line 92 of file pango_font_info.h.

{ return is_bold_; }
const bool tesseract::PangoFontInfo::is_fraktur ( ) const [inline]

Definition at line 96 of file pango_font_info.h.

{ return is_fraktur_; }
const bool tesseract::PangoFontInfo::is_italic ( ) const [inline]

Definition at line 93 of file pango_font_info.h.

{ return is_italic_; }
const bool tesseract::PangoFontInfo::is_monospace ( ) const [inline]

Definition at line 95 of file pango_font_info.h.

{ return is_monospace_; }
const bool tesseract::PangoFontInfo::is_smallcaps ( ) const [inline]

Definition at line 94 of file pango_font_info.h.

{ return is_smallcaps_; }

Definition at line 223 of file pango_font_info.cpp.

                                                               {
  PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
  bool success = ParseFontDescription(desc);
  pango_font_description_free(desc);
  return success;
}
const int tesseract::PangoFontInfo::resolution ( ) const [inline]

Definition at line 99 of file pango_font_info.h.

{ return resolution_; }
void tesseract::PangoFontInfo::set_resolution ( const int  resolution) [inline]

Definition at line 100 of file pango_font_info.h.

                                            {
    resolution_ = resolution;
  }

Friends And Related Function Documentation

friend class FontUtils [friend]

Definition at line 105 of file pango_font_info.h.


The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines