tesseract
3.03
|
#include <pango_font_info.h>
Public Types | |
enum | FontTypeEnum { UNKNOWN, SERIF, SANS_SERIF, DECORATIVE } |
Public Member Functions | |
PangoFontInfo () | |
PangoFontInfo (const string &name) | |
bool | ParseFontDescriptionName (const string &name) |
bool | CoversUTF8Text (const char *utf8_text, int byte_length) const |
int | DropUncoveredChars (string *utf8_text) const |
bool | CanRenderString (const char *utf8_word, int len, vector< string > *graphemes) const |
bool | CanRenderString (const char *utf8_word, int len) const |
bool | GetSpacingProperties (const string &utf8_char, int *x_bearing, int *x_advance) const |
string | DescriptionName () const |
const string & | family_name () const |
const int | font_size () const |
const bool | is_bold () const |
const bool | is_italic () const |
const bool | is_smallcaps () const |
const bool | is_monospace () const |
const bool | is_fraktur () const |
const FontTypeEnum | font_type () const |
const int | resolution () const |
void | set_resolution (const int resolution) |
Friends | |
class | FontUtils |
Definition at line 38 of file pango_font_info.h.
Definition at line 40 of file pango_font_info.h.
{ UNKNOWN, SERIF, SANS_SERIF, DECORATIVE, };
Definition at line 79 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { Clear(); }
tesseract::PangoFontInfo::PangoFontInfo | ( | const string & | name | ) | [explicit] |
Definition at line 83 of file pango_font_info.cpp.
: desc_(NULL), resolution_(kDefaultResolution) { if (!ParseFontDescriptionName(desc)) { tprintf("ERROR: Could not parse %s\n", desc.c_str()); Clear(); } }
bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
int | len, | ||
vector< string > * | graphemes | ||
) | const |
Definition at line 341 of file pango_font_info.cpp.
{ if (graphemes) graphemes->clear(); // We check for font coverage of the text first, as otherwise Pango could // (undesirably) fall back to another font that does have the required // coverage. if (!CoversUTF8Text(utf8_word, len)) { return false; } // U+25CC dotted circle character that often (but not always) gets rendered // when there is an illegal grapheme sequence. const char32 kDottedCircleGlyph = 9676; bool bad_glyph = false; PangoFontMap* font_map = pango_cairo_font_map_get_default(); PangoContext* context = pango_context_new(); pango_context_set_font_map(context, font_map); PangoLayout* layout = pango_layout_new(context); if (desc_) { pango_layout_set_font_description(layout, desc_); } else { PangoFontDescription *desc = pango_font_description_from_string( DescriptionName().c_str()); pango_layout_set_font_description(layout, desc); pango_font_description_free(desc); } pango_layout_set_text(layout, utf8_word, len); PangoLayoutIter* run_iter = NULL; { // Fontconfig caches some information here that is not freed before exit. DISABLE_HEAP_LEAK_CHECK; run_iter = pango_layout_get_iter(layout); } do { PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter); if (!run) { tlog(2, "Found end of line NULL run marker\n"); continue; } PangoGlyph dotted_circle_glyph; PangoFont* font = run->item->analysis.font; dotted_circle_glyph = pango_fc_font_get_glyph( reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph); if (TLOG_IS_ON(2)) { PangoFontDescription* desc = pango_font_describe(font); char* desc_str = pango_font_description_to_string(desc); tlog(2, "Desc of font in run: %s\n", desc_str); g_free(desc_str); pango_font_description_free(desc); } PangoGlyphItemIter cluster_iter; gboolean have_cluster; for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter, run, utf8_word); have_cluster && !bad_glyph; have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) { const int start_byte_index = cluster_iter.start_index; const int end_byte_index = cluster_iter.end_index; int start_glyph_index = cluster_iter.start_glyph; int end_glyph_index = cluster_iter.end_glyph; string cluster_text = string(utf8_word + start_byte_index, end_byte_index - start_byte_index); if (graphemes) graphemes->push_back(cluster_text); if (IsUTF8Whitespace(cluster_text.c_str())) { tlog(2, "Skipping whitespace\n"); continue; } if (TLOG_IS_ON(2)) { printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ", start_byte_index, end_byte_index, start_glyph_index, end_glyph_index); } for (int i = start_glyph_index, step = (end_glyph_index > start_glyph_index) ? 1 : -1; !bad_glyph && i != end_glyph_index; i+= step) { const bool unknown_glyph = (cluster_iter.glyph_item->glyphs->glyphs[i].glyph & PANGO_GLYPH_UNKNOWN_FLAG); const bool illegal_glyph = (cluster_iter.glyph_item->glyphs->glyphs[i].glyph == dotted_circle_glyph); bad_glyph = unknown_glyph || illegal_glyph; if (TLOG_IS_ON(2)) { printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph, bad_glyph ? 1 : 0); } } if (TLOG_IS_ON(2)) { printf(" '%s'\n", cluster_text.c_str()); } if (bad_glyph) tlog(1, "Found illegal glyph!\n"); } } while (!bad_glyph && pango_layout_iter_next_run(run_iter)); pango_layout_iter_free(run_iter); g_object_unref(context); g_object_unref(layout); if (bad_glyph && graphemes) graphemes->clear(); return !bad_glyph; }
bool tesseract::PangoFontInfo::CanRenderString | ( | const char * | utf8_word, |
int | len | ||
) | const |
Definition at line 336 of file pango_font_info.cpp.
{ vector<string> graphemes; return CanRenderString(utf8_word, len, &graphemes); }
bool tesseract::PangoFontInfo::CoversUTF8Text | ( | const char * | utf8_text, |
int | byte_length | ||
) | const |
Definition at line 248 of file pango_font_info.cpp.
{ PangoFont* font = ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, NULL); for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length); it != UNICHAR::end(utf8_text, byte_length); ++it) { if (IsWhitespace(*it) || pango_is_zero_width(*it)) continue; if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { char tmp[5]; int len = it.get_utf8(tmp); tmp[len] = '\0'; tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); return false; } } return true; }
string tesseract::PangoFontInfo::DescriptionName | ( | ) | const |
Definition at line 105 of file pango_font_info.cpp.
{ if (!desc_) return ""; char* desc_str = pango_font_description_to_string(desc_); string desc_name(desc_str); g_free(desc_str); return desc_name; }
int tesseract::PangoFontInfo::DropUncoveredChars | ( | string * | utf8_text | ) | const |
Definition at line 267 of file pango_font_info.cpp.
{ PangoFont* font = ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, NULL); int num_dropped_chars = 0; // Maintain two iterators that point into the string. For space efficiency, we // will repeatedly copy one covered UTF8 character from one to the other, and // at the end resize the string to the right length. char* out = const_cast<char*>(utf8_text->c_str()); const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_text->c_str(), utf8_text->length()); const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_text->c_str(), utf8_text->length()); for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { if (!IsWhitespace(*it) && !pango_is_zero_width(*it) && pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) { if (TLOG_IS_ON(2)) { char tmp[5]; int len = it.get_utf8(tmp); tmp[len] = '\0'; tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it); } ++num_dropped_chars; continue; } strncpy(out, it.utf8_data(), it.utf8_len()); out += it.utf8_len(); } utf8_text->resize(out - utf8_text->c_str()); return num_dropped_chars; }
const string& tesseract::PangoFontInfo::family_name | ( | ) | const [inline] |
Definition at line 89 of file pango_font_info.h.
{ return family_name_; }
const int tesseract::PangoFontInfo::font_size | ( | ) | const [inline] |
Definition at line 91 of file pango_font_info.h.
{ return font_size_; }
const FontTypeEnum tesseract::PangoFontInfo::font_type | ( | ) | const [inline] |
Definition at line 97 of file pango_font_info.h.
{ return font_type_; }
bool tesseract::PangoFontInfo::GetSpacingProperties | ( | const string & | utf8_char, |
int * | x_bearing, | ||
int * | x_advance | ||
) | const |
Definition at line 298 of file pango_font_info.cpp.
{ // Convert to equivalent PangoFont structure PangoFont* font = ToPangoFont(); // Find the glyph index in the font for the supplied utf8 character. int total_advance = 0; int min_bearing = 0; // Handle multi-unicode strings by reporting the left-most position of the // x-bearing, and right-most position of the x-advance if the string were to // be rendered. const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(), utf8_char.length()); const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(), utf8_char.length()); for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) { PangoGlyph glyph_index = pango_fc_font_get_glyph( reinterpret_cast<PangoFcFont*>(font), *it); if (!glyph_index) { // Glyph for given unicode character doesn't exist in font. return false; } // Find the ink glyph extents for the glyph PangoRectangle ink_rect, logical_rect; pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect); pango_extents_to_pixels(&ink_rect, NULL); pango_extents_to_pixels(&logical_rect, NULL); int bearing = total_advance + PANGO_LBEARING(ink_rect); if (it == it_begin || bearing < min_bearing) { min_bearing = bearing; } total_advance += PANGO_RBEARING(logical_rect); } *x_bearing = min_bearing; *x_advance = total_advance; return true; }
const bool tesseract::PangoFontInfo::is_bold | ( | ) | const [inline] |
Definition at line 92 of file pango_font_info.h.
{ return is_bold_; }
const bool tesseract::PangoFontInfo::is_fraktur | ( | ) | const [inline] |
Definition at line 96 of file pango_font_info.h.
{ return is_fraktur_; }
const bool tesseract::PangoFontInfo::is_italic | ( | ) | const [inline] |
Definition at line 93 of file pango_font_info.h.
{ return is_italic_; }
const bool tesseract::PangoFontInfo::is_monospace | ( | ) | const [inline] |
Definition at line 95 of file pango_font_info.h.
{ return is_monospace_; }
const bool tesseract::PangoFontInfo::is_smallcaps | ( | ) | const [inline] |
Definition at line 94 of file pango_font_info.h.
{ return is_smallcaps_; }
bool tesseract::PangoFontInfo::ParseFontDescriptionName | ( | const string & | name | ) |
Definition at line 223 of file pango_font_info.cpp.
{ PangoFontDescription *desc = pango_font_description_from_string(name.c_str()); bool success = ParseFontDescription(desc); pango_font_description_free(desc); return success; }
const int tesseract::PangoFontInfo::resolution | ( | ) | const [inline] |
Definition at line 99 of file pango_font_info.h.
{ return resolution_; }
void tesseract::PangoFontInfo::set_resolution | ( | const int | resolution | ) | [inline] |
Definition at line 100 of file pango_font_info.h.
{ resolution_ = resolution; }
friend class FontUtils [friend] |
Definition at line 105 of file pango_font_info.h.