tesseract  3.03
tesseract::LMConsistencyInfo Struct Reference

#include <lm_consistency.h>

List of all members.

Public Types

enum  ChartypeEnum { CT_NONE, CT_ALPHA, CT_DIGIT, CT_OTHER }

Public Member Functions

 LMConsistencyInfo (const LMConsistencyInfo *parent_info)
int NumInconsistentPunc () const
int NumInconsistentCase () const
int NumInconsistentChartype () const
bool Consistent () const
int NumInconsistentSpaces () const
int InconsistentXHeight () const
void ComputeXheightConsistency (const BLOB_CHOICE *b, bool is_punc)
float BodyMinXHeight () const
float BodyMaxXHeight () const

Public Attributes

int num_alphas
int num_digits
int num_punc
int num_other
ChartypeEnum chartype
EDGE_REF punc_ref
bool invalid_punc
int num_non_first_upper
int num_lower
int script_id
bool inconsistent_script
int num_inconsistent_spaces
bool inconsistent_font
float xht_lo [kNumPos]
float xht_hi [kNumPos]
inT16 xht_count [kNumPos]
inT16 xht_count_punc [kNumPos]
inT16 xht_sp
inT16 xpos_entropy
XHeightConsistencyEnum xht_decision

Static Public Attributes

static const int kShiftThresh = 1
static const int kMaxEntropy = 1
static const int kSUB = 0
static const int kNORM = 1
static const int kSUP = 2
static const int kNumPos = 3

Detailed Description

Definition at line 38 of file lm_consistency.h.


Member Enumeration Documentation

Enumerator:
CT_NONE 
CT_ALPHA 
CT_DIGIT 
CT_OTHER 

Definition at line 39 of file lm_consistency.h.


Constructor & Destructor Documentation

tesseract::LMConsistencyInfo::LMConsistencyInfo ( const LMConsistencyInfo parent_info) [inline, explicit]

Definition at line 53 of file lm_consistency.h.

                                                                   {
    if (parent_info == NULL) {
      // Initialize from scratch.
      num_alphas = 0;
      num_digits = 0;
      num_punc = 0;
      num_other = 0;
      chartype = CT_NONE;
      punc_ref = NO_EDGE;
      invalid_punc = false;
      num_non_first_upper = 0;
      num_lower = 0;
      script_id = 0;
      inconsistent_script = false;
      num_inconsistent_spaces = 0;
      inconsistent_font = false;
      // Initialize XHeight stats.
      for (int i = 0; i < kNumPos; i++) {
        xht_count[i] = 0;
        xht_count_punc[i] = 0;
        xht_lo[i] = 0;
        xht_hi[i] = 256;  // kBlnCellHeight
      }
      xht_sp = -1;  // This invalid value indicates that there was no parent.
      xpos_entropy = 0;
      xht_decision = XH_GOOD;
    } else {
      // Copy parent info
      *this = *parent_info;
    }
  }

Member Function Documentation

Definition at line 111 of file lm_consistency.h.

                               {
    if (InconsistentXHeight())
      return static_cast<float>(MAX_INT16);
    return xht_hi[kNORM];
  }

Definition at line 106 of file lm_consistency.h.

                               {
    if (InconsistentXHeight())
      return 0.0f;
    return xht_lo[kNORM];
  }

Definition at line 29 of file lm_consistency.cpp.

                                        {
  if (xht_decision == XH_INCONSISTENT)
    return;  // It isn't going to get any better.

  // Compute xheight consistency.
  bool parent_null = xht_sp < 0;
  int parent_sp = xht_sp;
  // Debug strings.
  if (b->yshift() > LMConsistencyInfo::kShiftThresh) {
    xht_sp = LMConsistencyInfo::kSUP;
  } else if (b->yshift() < -LMConsistencyInfo::kShiftThresh) {
    xht_sp = LMConsistencyInfo::kSUB;
  } else {
    xht_sp = LMConsistencyInfo::kNORM;
  }
  xht_count[xht_sp]++;
  if (is_punc) xht_count_punc[xht_sp]++;
  if (!parent_null) {
    xpos_entropy += abs(parent_sp - xht_sp);
  }
  // TODO(eger): Figure out a better way to account for small caps.
  // For the first character not y-shifted, we only care if it is too small.
  // Too large is common in drop caps and small caps.
  // inT16 small_xht = b->min_xheight();
  //  if (parent_vse == NULL && sp == LanguageModelConsistencyInfo::kNORM) {
  //  small_xht = 0;
  // }
  IntersectRange(b->min_xheight(), b->max_xheight(),
                 &(xht_lo[xht_sp]), &(xht_hi[xht_sp]));


  // Compute xheight inconsistency kinds.
  if (parent_null) {
    if (xht_count[kNORM] == 1) {
      xht_decision = XH_GOOD;
    } else {
      xht_decision = XH_SUBNORMAL;
    }
    return;
  }

  // When we intersect the ranges of xheights in pixels for all characters in
  // each position (subscript, normal, superscript),
  // How much range must be left?  0? [exactly one pixel height for xheight] 1?
  // TODO(eger): Extend this code to take a prior for the rest of the line.
  const int kMinIntersectedXHeightRange = 0;
  for (int i = 0; i < kNumPos; i++) {
    if (xht_lo[i] > xht_hi[i] - kMinIntersectedXHeightRange) {
      xht_decision = XH_INCONSISTENT;
      return;
    }
  }

  // Reject as improbable anything where there's much punctuation in subscript
  // or superscript regions.
  if (xht_count_punc[kSUB] > xht_count[kSUB] * 0.4 ||
      xht_count_punc[kSUP] > xht_count[kSUP] * 0.4) {
    xht_decision = XH_INCONSISTENT;
    return;
  }

  // Now check that the subscript and superscript aren't too small relative to
  // the mainline.
  double mainline_xht = static_cast<double>(xht_lo[kNORM]);
  double kMinSizeRatio = 0.4;
  if (mainline_xht > 0.0 &&
      (static_cast<double>(xht_hi[kSUB]) / mainline_xht < kMinSizeRatio ||
       static_cast<double>(xht_hi[kSUP]) / mainline_xht < kMinSizeRatio)) {
    xht_decision = XH_INCONSISTENT;
    return;
  }
  // TODO(eger): Check into inconsistency of super/subscript y offsets.
  if (xpos_entropy > kMaxEntropy) {
    xht_decision = XH_INCONSISTENT;
    return;
  }
  if (xht_count[kSUB] == 0 && xht_count[kSUP] == 0) {
    xht_decision = XH_GOOD;
    return;
  }
  xht_decision = XH_SUBNORMAL;
}

Definition at line 94 of file lm_consistency.h.

Definition at line 102 of file lm_consistency.h.

                                         {
    return xht_decision == XH_INCONSISTENT;
  }

Definition at line 84 of file lm_consistency.h.

                                         {
    return invalid_punc ? num_punc : 0;
  }

Definition at line 99 of file lm_consistency.h.


Member Data Documentation

Definition at line 47 of file lm_consistency.h.

const int tesseract::LMConsistencyInfo::kNORM = 1 [static]

Definition at line 50 of file lm_consistency.h.

const int tesseract::LMConsistencyInfo::kNumPos = 3 [static]

Definition at line 51 of file lm_consistency.h.

Definition at line 43 of file lm_consistency.h.

const int tesseract::LMConsistencyInfo::kSUB = 0 [static]

Definition at line 50 of file lm_consistency.h.

const int tesseract::LMConsistencyInfo::kSUP = 2 [static]

Definition at line 50 of file lm_consistency.h.


The documentation for this struct was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines