tesseract
3.03
|
#include <pango_font_info.h>
Static Public Member Functions | |
static bool | IsAvailableFont (const char *font_desc) |
static const vector< string > & | ListAvailableFonts () |
static bool | SelectFont (const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes) |
static bool | SelectFont (const char *utf8_word, const int utf8_len, const vector< string > &all_fonts, string *font_name, vector< string > *graphemes) |
static void | GetAllRenderableCharacters (vector< bool > *unichar_bitmap) |
static void | GetAllRenderableCharacters (const vector< string > &font_names, vector< bool > *unichar_bitmap) |
static void | GetAllRenderableCharacters (const string &font_name, vector< bool > *unichar_bitmap) |
static string | BestFonts (const unordered_map< char32, inT64 > &ch_map, vector< std::pair< const char *, vector< bool > > > *font_flag) |
static int | FontScore (const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags) |
Definition at line 134 of file pango_font_info.h.
string tesseract::FontUtils::BestFonts | ( | const unordered_map< char32, inT64 > & | ch_map, |
vector< std::pair< const char *, vector< bool > > > * | font_flag | ||
) | [static] |
Definition at line 631 of file pango_font_info.cpp.
{ const double kMinOKFraction = 0.99; // Weighted fraction of characters that must be renderable in a font to make // it OK even if the raw count is not good. const double kMinWeightedFraction = 0.99995; fonts->clear(); vector<vector<bool> > font_flags; vector<int> font_scores; vector<int> raw_scores; int most_ok_chars = 0; int best_raw_score = 0; const vector<string>& font_names = FontUtils::ListAvailableFonts(); for (int i = 0; i < font_names.size(); ++i) { vector<bool> ch_flags; int raw_score = 0; int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags); most_ok_chars = MAX(ok_chars, most_ok_chars); best_raw_score = MAX(raw_score, best_raw_score); font_flags.push_back(ch_flags); font_scores.push_back(ok_chars); raw_scores.push_back(raw_score); } // Now select the fonts with a score above a threshold fraction // of both the raw and weighted best scores. To prevent bogus fonts being // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of // BOTH weighted and raw scores. // In low character-count scripts, the issue is more getting enough fonts, // when only 1 or 2 might have all those rare dingbats etc in them, so we // allow a font with a very high weighted (coverage) score // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor. int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction); int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction); int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction); string font_list; for (int i = 0; i < font_names.size(); ++i) { int score = font_scores[i]; int raw_score = raw_scores[i]; if ((score >= least_good_enough && raw_score >= least_raw_enough) || score >= override_enough) { fonts->push_back(make_pair(font_names[i].c_str(), font_flags[i])); tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(), 100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score); font_list += font_names[i]; font_list += "\n"; } else if (score >= least_good_enough || raw_score >= least_raw_enough) { tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n", font_names[i].c_str(), 100.0 * score / most_ok_chars, raw_score, 100.0 * raw_score / best_raw_score); } } return font_list; }
int tesseract::FontUtils::FontScore | ( | const unordered_map< char32, inT64 > & | ch_map, |
const string & | fontname, | ||
int * | raw_score, | ||
vector< bool > * | ch_flags | ||
) | [static] |
Definition at line 596 of file pango_font_info.cpp.
{ PangoFontInfo font_info; if (!font_info.ParseFontDescriptionName(fontname)) { tprintf("ERROR: Could not parse %s\n", fontname.c_str()); } PangoFont* font = font_info.ToPangoFont(); PangoCoverage* coverage = pango_font_get_coverage(font, NULL); if (ch_flags) { ch_flags->clear(); ch_flags->reserve(ch_map.size()); } *raw_score = 0; int ok_chars = 0; for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin(); it != ch_map.end(); ++it) { bool covered = (IsWhitespace(it->first) || (pango_coverage_get(coverage, it->first) == PANGO_COVERAGE_EXACT)); if (covered) { ++(*raw_score); ok_chars += it->second; } if (ch_flags) { ch_flags->push_back(covered); } } return ok_chars; }
void tesseract::FontUtils::GetAllRenderableCharacters | ( | vector< bool > * | unichar_bitmap | ) | [static] |
Definition at line 561 of file pango_font_info.cpp.
{ const vector<string>& all_fonts = ListAvailableFonts(); return GetAllRenderableCharacters(all_fonts, unichar_bitmap); }
void tesseract::FontUtils::GetAllRenderableCharacters | ( | const vector< string > & | font_names, |
vector< bool > * | unichar_bitmap | ||
) | [static] |
Definition at line 576 of file pango_font_info.cpp.
{ // Form the union of coverage maps from the fonts PangoCoverage* all_coverage = pango_coverage_new(); tlog(1, "Processing %d fonts\n", fonts.size()); for (int i = 0; i < fonts.size(); ++i) { PangoFontInfo font_info(fonts[i]); PangoCoverage* coverage = pango_font_get_coverage( font_info.ToPangoFont(), NULL); // Mark off characters that any font can render. pango_coverage_max(all_coverage, coverage); } CharCoverageMapToBitmap(all_coverage, unichar_bitmap); pango_coverage_unref(all_coverage); }
void tesseract::FontUtils::GetAllRenderableCharacters | ( | const string & | font_name, |
vector< bool > * | unichar_bitmap | ||
) | [static] |
Definition at line 567 of file pango_font_info.cpp.
{ PangoFontInfo font_info(font_name); PangoCoverage* coverage = pango_font_get_coverage( font_info.ToPangoFont(), NULL); CharCoverageMapToBitmap(coverage, unichar_bitmap); }
bool tesseract::FontUtils::IsAvailableFont | ( | const char * | font_desc | ) | [static] |
Definition at line 455 of file pango_font_info.cpp.
{ PangoFontDescription *desc = pango_font_description_from_string(query_desc); PangoFont* selected_font = NULL; { InitFontconfig(); PangoFontMap* font_map = pango_cairo_font_map_get_default(); PangoContext* context = pango_context_new(); pango_context_set_font_map(context, font_map); { DISABLE_HEAP_LEAK_CHECK; selected_font = pango_font_map_load_font(font_map, context, desc); } g_object_unref(context); } PangoFontDescription* selected_desc = pango_font_describe(selected_font); bool equal = pango_font_description_equal(desc, selected_desc); tlog(3, "query weight = %d \t selected weight =%d\n", pango_font_description_get_weight(desc), pango_font_description_get_weight(selected_desc)); char* selected_desc_str = pango_font_description_to_string(selected_desc); tlog(2, "query_desc: '%s' Selected: 's'\n", query_desc, selected_desc_str); g_free(selected_desc_str); pango_font_description_free(selected_desc); pango_font_description_free(desc); return equal; }
const vector< string > & tesseract::FontUtils::ListAvailableFonts | ( | ) | [static] |
Definition at line 498 of file pango_font_info.cpp.
{ static vector<string> available_fonts_; // cache list if (available_fonts_.size()) { return available_fonts_; } #ifndef USE_STD_NAMESPACE if (FLAGS_use_only_legacy_fonts) { // Restrict view to list of fonts in legacy_fonts.h tprintf("Using list of legacy fonts only\n"); const int kNumFontLists = 4; for (int i = 0; i < kNumFontLists; ++i) { for (int j = 0; kFontlists[i][j] != NULL; ++j) { available_fonts_.push_back(kFontlists[i][j]); } } return available_fonts_; } #endif PangoFontFamily** families = 0; int n_families = 0; ListFontFamilies(&families, &n_families); for (int i = 0; i < n_families; ++i) { const char* family_name = pango_font_family_get_name(families[i]); tlog(2, "Listing family %s\n", family_name); if (ShouldIgnoreFontFamilyName(family_name)) continue; int n_faces; PangoFontFace** faces = NULL; pango_font_family_list_faces(families[i], &faces, &n_faces); for (int j = 0; j < n_faces; ++j) { PangoFontDescription* desc = pango_font_face_describe(faces[j]); char* desc_str = pango_font_description_to_string(desc); if (IsAvailableFont(desc_str)) { available_fonts_.push_back(desc_str); } pango_font_description_free(desc); g_free(desc_str); } g_free(faces); } g_free(families); sort(available_fonts_.begin(), available_fonts_.end()); return available_fonts_; }
bool tesseract::FontUtils::SelectFont | ( | const char * | utf8_word, |
const int | utf8_len, | ||
string * | font_name, | ||
vector< string > * | graphemes | ||
) | [static] |
Definition at line 693 of file pango_font_info.cpp.
{ return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name, graphemes); }
bool tesseract::FontUtils::SelectFont | ( | const char * | utf8_word, |
const int | utf8_len, | ||
const vector< string > & | all_fonts, | ||
string * | font_name, | ||
vector< string > * | graphemes | ||
) | [static] |
Definition at line 700 of file pango_font_info.cpp.
{ if (font_name) font_name->clear(); if (graphemes) graphemes->clear(); for (int i = 0; i < all_fonts.size(); ++i) { PangoFontInfo font; vector<string> found_graphemes; ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]), "Could not parse font desc name %s\n", all_fonts[i].c_str()); if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) { if (graphemes) graphemes->swap(found_graphemes); if (font_name) *font_name = all_fonts[i]; return true; } } return false; }