tesseract  3.03
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

List of all members.

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
static const vector< string > & ListAvailableFonts ()
static bool SelectFont (const char *utf8_word, const int utf8_len, string *font_name, vector< string > *graphemes)
static bool SelectFont (const char *utf8_word, const int utf8_len, const vector< string > &all_fonts, string *font_name, vector< string > *graphemes)
static void GetAllRenderableCharacters (vector< bool > *unichar_bitmap)
static void GetAllRenderableCharacters (const vector< string > &font_names, vector< bool > *unichar_bitmap)
static void GetAllRenderableCharacters (const string &font_name, vector< bool > *unichar_bitmap)
static string BestFonts (const unordered_map< char32, inT64 > &ch_map, vector< std::pair< const char *, vector< bool > > > *font_flag)
static int FontScore (const unordered_map< char32, inT64 > &ch_map, const string &fontname, int *raw_score, vector< bool > *ch_flags)

Detailed Description

Definition at line 134 of file pango_font_info.h.


Member Function Documentation

string tesseract::FontUtils::BestFonts ( const unordered_map< char32, inT64 > &  ch_map,
vector< std::pair< const char *, vector< bool > > > *  font_flag 
) [static]

Definition at line 631 of file pango_font_info.cpp.

                                                                              {
  const double kMinOKFraction = 0.99;
  // Weighted fraction of characters that must be renderable in a font to make
  // it OK even if the raw count is not good.
  const double kMinWeightedFraction = 0.99995;

  fonts->clear();
  vector<vector<bool> > font_flags;
  vector<int> font_scores;
  vector<int> raw_scores;
  int most_ok_chars = 0;
  int best_raw_score = 0;
  const vector<string>& font_names = FontUtils::ListAvailableFonts();
  for (int i = 0; i < font_names.size(); ++i) {
    vector<bool> ch_flags;
    int raw_score = 0;
    int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
    most_ok_chars = MAX(ok_chars, most_ok_chars);
    best_raw_score = MAX(raw_score, best_raw_score);

    font_flags.push_back(ch_flags);
    font_scores.push_back(ok_chars);
    raw_scores.push_back(raw_score);
  }

  // Now select the fonts with a score above a threshold fraction
  // of both the raw and weighted best scores. To prevent bogus fonts being
  // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
  // BOTH weighted and raw scores.
  // In low character-count scripts, the issue is more getting enough fonts,
  // when only 1 or 2 might have all those rare dingbats etc in them, so we
  // allow a font with a very high weighted (coverage) score
  // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
  int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
  int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
  int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);

  string font_list;
  for (int i = 0; i < font_names.size(); ++i) {
    int score = font_scores[i];
    int raw_score = raw_scores[i];
    if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
        score >= override_enough) {
      fonts->push_back(make_pair(font_names[i].c_str(), font_flags[i]));
      tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
           font_names[i].c_str(),
           100.0 * score / most_ok_chars,
           raw_score, 100.0 * raw_score / best_raw_score);
      font_list += font_names[i];
      font_list += "\n";
    } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
      tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
           font_names[i].c_str(),
           100.0 * score / most_ok_chars,
           raw_score, 100.0 * raw_score / best_raw_score);
    }
  }
  return font_list;
}
int tesseract::FontUtils::FontScore ( const unordered_map< char32, inT64 > &  ch_map,
const string &  fontname,
int *  raw_score,
vector< bool > *  ch_flags 
) [static]

Definition at line 596 of file pango_font_info.cpp.

                                                 {
  PangoFontInfo font_info;
  if (!font_info.ParseFontDescriptionName(fontname)) {
    tprintf("ERROR: Could not parse %s\n", fontname.c_str());
  }
  PangoFont* font = font_info.ToPangoFont();
  PangoCoverage* coverage = pango_font_get_coverage(font, NULL);

  if (ch_flags) {
    ch_flags->clear();
    ch_flags->reserve(ch_map.size());
  }
  *raw_score = 0;
  int ok_chars = 0;
  for (unordered_map<char32, inT64>::const_iterator it = ch_map.begin();
       it != ch_map.end(); ++it) {
    bool covered = (IsWhitespace(it->first) ||
                    (pango_coverage_get(coverage, it->first)
                     == PANGO_COVERAGE_EXACT));
    if (covered) {
      ++(*raw_score);
      ok_chars += it->second;
    }
    if (ch_flags) {
      ch_flags->push_back(covered);
    }
  }
  return ok_chars;
}
void tesseract::FontUtils::GetAllRenderableCharacters ( vector< bool > *  unichar_bitmap) [static]

Definition at line 561 of file pango_font_info.cpp.

                                                                       {
  const vector<string>& all_fonts = ListAvailableFonts();
  return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
}
void tesseract::FontUtils::GetAllRenderableCharacters ( const vector< string > &  font_names,
vector< bool > *  unichar_bitmap 
) [static]

Definition at line 576 of file pango_font_info.cpp.

                                                                         {
  // Form the union of coverage maps from the fonts
  PangoCoverage* all_coverage = pango_coverage_new();
  tlog(1, "Processing %d fonts\n", fonts.size());
  for (int i = 0; i < fonts.size(); ++i) {
    PangoFontInfo font_info(fonts[i]);
    PangoCoverage* coverage = pango_font_get_coverage(
        font_info.ToPangoFont(), NULL);
    // Mark off characters that any font can render.
    pango_coverage_max(all_coverage, coverage);
  }
  CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
  pango_coverage_unref(all_coverage);
}
void tesseract::FontUtils::GetAllRenderableCharacters ( const string &  font_name,
vector< bool > *  unichar_bitmap 
) [static]

Definition at line 567 of file pango_font_info.cpp.

                                                                         {
  PangoFontInfo font_info(font_name);
  PangoCoverage* coverage = pango_font_get_coverage(
      font_info.ToPangoFont(), NULL);
  CharCoverageMapToBitmap(coverage, unichar_bitmap);
}
bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc) [static]

Definition at line 455 of file pango_font_info.cpp.

                                                      {
  PangoFontDescription *desc = pango_font_description_from_string(query_desc);
  PangoFont* selected_font = NULL;
  {
    InitFontconfig();
    PangoFontMap* font_map = pango_cairo_font_map_get_default();
    PangoContext* context = pango_context_new();
    pango_context_set_font_map(context, font_map);
    {
      DISABLE_HEAP_LEAK_CHECK;
      selected_font = pango_font_map_load_font(font_map, context, desc);
    }
    g_object_unref(context);
  }
  PangoFontDescription* selected_desc = pango_font_describe(selected_font);

  bool equal = pango_font_description_equal(desc, selected_desc);
  tlog(3, "query weight = %d \t selected weight =%d\n",
       pango_font_description_get_weight(desc),
       pango_font_description_get_weight(selected_desc));

  char* selected_desc_str = pango_font_description_to_string(selected_desc);
  tlog(2, "query_desc: '%s' Selected: 's'\n", query_desc, selected_desc_str);

  g_free(selected_desc_str);
  pango_font_description_free(selected_desc);
  pango_font_description_free(desc);
  return equal;
}
const vector< string > & tesseract::FontUtils::ListAvailableFonts ( ) [static]

Definition at line 498 of file pango_font_info.cpp.

                                                    {
  static vector<string> available_fonts_;  // cache list
  if (available_fonts_.size()) {
    return available_fonts_;
  }
#ifndef USE_STD_NAMESPACE
  if (FLAGS_use_only_legacy_fonts) {
    // Restrict view to list of fonts in legacy_fonts.h
    tprintf("Using list of legacy fonts only\n");
    const int kNumFontLists = 4;
    for (int i = 0; i < kNumFontLists; ++i) {
      for (int j = 0; kFontlists[i][j] != NULL; ++j) {
        available_fonts_.push_back(kFontlists[i][j]);
      }
    }
    return available_fonts_;
  }
#endif

  PangoFontFamily** families = 0;
  int n_families = 0;
  ListFontFamilies(&families, &n_families);
  for (int i = 0; i < n_families; ++i) {
    const char* family_name = pango_font_family_get_name(families[i]);
    tlog(2, "Listing family %s\n", family_name);
    if (ShouldIgnoreFontFamilyName(family_name))
      continue;

    int n_faces;
    PangoFontFace** faces = NULL;
    pango_font_family_list_faces(families[i], &faces, &n_faces);
    for (int j = 0; j < n_faces; ++j) {
      PangoFontDescription* desc = pango_font_face_describe(faces[j]);
      char* desc_str = pango_font_description_to_string(desc);
      if (IsAvailableFont(desc_str)) {
        available_fonts_.push_back(desc_str);
      }
      pango_font_description_free(desc);
      g_free(desc_str);
    }
    g_free(faces);
  }
  g_free(families);
  sort(available_fonts_.begin(), available_fonts_.end());
  return available_fonts_;
}
bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
string *  font_name,
vector< string > *  graphemes 
) [static]

Definition at line 693 of file pango_font_info.cpp.

                                                                         {
  return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
                    graphemes);
}
bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const vector< string > &  all_fonts,
string *  font_name,
vector< string > *  graphemes 
) [static]

Definition at line 700 of file pango_font_info.cpp.

                                                                         {
  if (font_name) font_name->clear();
  if (graphemes) graphemes->clear();
  for (int i = 0; i < all_fonts.size(); ++i) {
    PangoFontInfo font;
    vector<string> found_graphemes;
    ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
                    "Could not parse font desc name %s\n",
                    all_fonts[i].c_str());
    if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
      if (graphemes) graphemes->swap(found_graphemes);
      if (font_name) *font_name = all_fonts[i];
      return true;
    }
  }
  return false;
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines