tesseract
3.03
|
#include <lm_consistency.h>
Definition at line 38 of file lm_consistency.h.
tesseract::LMConsistencyInfo::LMConsistencyInfo | ( | const LMConsistencyInfo * | parent_info | ) | [inline, explicit] |
Definition at line 53 of file lm_consistency.h.
{ if (parent_info == NULL) { // Initialize from scratch. num_alphas = 0; num_digits = 0; num_punc = 0; num_other = 0; chartype = CT_NONE; punc_ref = NO_EDGE; invalid_punc = false; num_non_first_upper = 0; num_lower = 0; script_id = 0; inconsistent_script = false; num_inconsistent_spaces = 0; inconsistent_font = false; // Initialize XHeight stats. for (int i = 0; i < kNumPos; i++) { xht_count[i] = 0; xht_count_punc[i] = 0; xht_lo[i] = 0; xht_hi[i] = 256; // kBlnCellHeight } xht_sp = -1; // This invalid value indicates that there was no parent. xpos_entropy = 0; xht_decision = XH_GOOD; } else { // Copy parent info *this = *parent_info; } }
float tesseract::LMConsistencyInfo::BodyMaxXHeight | ( | ) | const [inline] |
Definition at line 111 of file lm_consistency.h.
{ if (InconsistentXHeight()) return static_cast<float>(MAX_INT16); return xht_hi[kNORM]; }
float tesseract::LMConsistencyInfo::BodyMinXHeight | ( | ) | const [inline] |
Definition at line 106 of file lm_consistency.h.
{ if (InconsistentXHeight()) return 0.0f; return xht_lo[kNORM]; }
void tesseract::LMConsistencyInfo::ComputeXheightConsistency | ( | const BLOB_CHOICE * | b, |
bool | is_punc | ||
) |
Definition at line 29 of file lm_consistency.cpp.
{ if (xht_decision == XH_INCONSISTENT) return; // It isn't going to get any better. // Compute xheight consistency. bool parent_null = xht_sp < 0; int parent_sp = xht_sp; // Debug strings. if (b->yshift() > LMConsistencyInfo::kShiftThresh) { xht_sp = LMConsistencyInfo::kSUP; } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) { xht_sp = LMConsistencyInfo::kSUB; } else { xht_sp = LMConsistencyInfo::kNORM; } xht_count[xht_sp]++; if (is_punc) xht_count_punc[xht_sp]++; if (!parent_null) { xpos_entropy += abs(parent_sp - xht_sp); } // TODO(eger): Figure out a better way to account for small caps. // For the first character not y-shifted, we only care if it is too small. // Too large is common in drop caps and small caps. // inT16 small_xht = b->min_xheight(); // if (parent_vse == NULL && sp == LanguageModelConsistencyInfo::kNORM) { // small_xht = 0; // } IntersectRange(b->min_xheight(), b->max_xheight(), &(xht_lo[xht_sp]), &(xht_hi[xht_sp])); // Compute xheight inconsistency kinds. if (parent_null) { if (xht_count[kNORM] == 1) { xht_decision = XH_GOOD; } else { xht_decision = XH_SUBNORMAL; } return; } // When we intersect the ranges of xheights in pixels for all characters in // each position (subscript, normal, superscript), // How much range must be left? 0? [exactly one pixel height for xheight] 1? // TODO(eger): Extend this code to take a prior for the rest of the line. const int kMinIntersectedXHeightRange = 0; for (int i = 0; i < kNumPos; i++) { if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) { xht_decision = XH_INCONSISTENT; return; } } // Reject as improbable anything where there's much punctuation in subscript // or superscript regions. if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 || xht_count_punc[kSUP] > xht_count[kSUP] * 0.4) { xht_decision = XH_INCONSISTENT; return; } // Now check that the subscript and superscript aren't too small relative to // the mainline. double mainline_xht = static_cast<double>(xht_lo[kNORM]); double kMinSizeRatio = 0.4; if (mainline_xht > 0.0 && (static_cast<double>(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio || static_cast<double>(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) { xht_decision = XH_INCONSISTENT; return; } // TODO(eger): Check into inconsistency of super/subscript y offsets. if (xpos_entropy > kMaxEntropy) { xht_decision = XH_INCONSISTENT; return; } if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) { xht_decision = XH_GOOD; return; } xht_decision = XH_SUBNORMAL; }
bool tesseract::LMConsistencyInfo::Consistent | ( | ) | const [inline] |
Definition at line 94 of file lm_consistency.h.
{ return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 && NumInconsistentChartype() == 0 && !inconsistent_script && !inconsistent_font && !InconsistentXHeight()); }
int tesseract::LMConsistencyInfo::InconsistentXHeight | ( | ) | const [inline] |
Definition at line 102 of file lm_consistency.h.
{ return xht_decision == XH_INCONSISTENT; }
int tesseract::LMConsistencyInfo::NumInconsistentCase | ( | ) | const [inline] |
Definition at line 87 of file lm_consistency.h.
{ return (num_non_first_upper > num_lower) ? num_lower : num_non_first_upper; }
int tesseract::LMConsistencyInfo::NumInconsistentChartype | ( | ) | const [inline] |
Definition at line 90 of file lm_consistency.h.
{ return (NumInconsistentPunc() + num_other + ((num_alphas > num_digits) ? num_digits : num_alphas)); }
int tesseract::LMConsistencyInfo::NumInconsistentPunc | ( | ) | const [inline] |
Definition at line 84 of file lm_consistency.h.
{ return invalid_punc ? num_punc : 0; }
int tesseract::LMConsistencyInfo::NumInconsistentSpaces | ( | ) | const [inline] |
Definition at line 99 of file lm_consistency.h.
{ return num_inconsistent_spaces; }
Definition at line 121 of file lm_consistency.h.
Definition at line 129 of file lm_consistency.h.
Definition at line 127 of file lm_consistency.h.
Definition at line 123 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kMaxEntropy = 1 [static] |
Definition at line 47 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kNORM = 1 [static] |
Definition at line 50 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kNumPos = 3 [static] |
Definition at line 51 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kShiftThresh = 1 [static] |
Definition at line 43 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kSUB = 0 [static] |
Definition at line 50 of file lm_consistency.h.
const int tesseract::LMConsistencyInfo::kSUP = 2 [static] |
Definition at line 50 of file lm_consistency.h.
Definition at line 117 of file lm_consistency.h.
Definition at line 118 of file lm_consistency.h.
Definition at line 128 of file lm_consistency.h.
Definition at line 125 of file lm_consistency.h.
Definition at line 124 of file lm_consistency.h.
Definition at line 120 of file lm_consistency.h.
Definition at line 119 of file lm_consistency.h.
Definition at line 122 of file lm_consistency.h.
Definition at line 126 of file lm_consistency.h.
Definition at line 133 of file lm_consistency.h.
Definition at line 134 of file lm_consistency.h.
Definition at line 137 of file lm_consistency.h.
Definition at line 132 of file lm_consistency.h.
Definition at line 131 of file lm_consistency.h.
Definition at line 135 of file lm_consistency.h.
Definition at line 136 of file lm_consistency.h.