tesseract  3.03
tesseract::BaselineRow Class Reference

#include <baselinedetect.h>

List of all members.

Public Member Functions

 BaselineRow (double line_size, TO_ROW *to_row)
const TBOXbounding_box () const
void SetupOldLineParameters (TO_ROW *row) const
void Print () const
double BaselineAngle () const
double SpaceBetween (const BaselineRow &other) const
double PerpDisp (const FCOORD &direction) const
double StraightYAtX (double x) const
bool FitBaseline (bool use_box_bottoms)
void AdjustBaselineToParallel (int debug, const FCOORD &direction)
double AdjustBaselineToGrid (int debug, const FCOORD &direction, double line_spacing, double line_offset)

Detailed Description

Definition at line 40 of file baselinedetect.h.


Constructor & Destructor Documentation

tesseract::BaselineRow::BaselineRow ( double  line_size,
TO_ROW to_row 
)

Definition at line 65 of file baselinedetect.cpp.

  : blobs_(to_row->blob_list()),
    baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
    baseline_error_(0.0), good_baseline_(false) {
  ComputeBoundingBox();
  // Compute a scale factor for rounding to ints.
  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
  max_baseline_error_ = kMaxBaselineError * line_spacing;
}

Member Function Documentation

double tesseract::BaselineRow::AdjustBaselineToGrid ( int  debug,
const FCOORD direction,
double  line_spacing,
double  line_offset 
)

Definition at line 229 of file baselinedetect.cpp.

                                                             {
  if (blobs_->empty()) {
    if (debug > 1) {
      tprintf("Row empty at:");
      bounding_box_.print();
    }
    return line_offset;
  }
  // Find the displacement_modes_ entry nearest to the grid.
  double best_error = 0.0;
  int best_index = -1;
  for (int i = 0; i < displacement_modes_.size(); ++i) {
    double blob_y = displacement_modes_[i];
    double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
                                                    line_offset);
    if (debug > 1) {
      tprintf("Mode at %g has error %g from model \n", blob_y, error);
    }
    if (best_index < 0 || error < best_error) {
      best_error = error;
      best_index = i;
    }
  }
  // We will move the baseline only if the chosen mode is close enough to the
  // model.
  double model_margin = max_baseline_error_ - best_error;
  if (best_index >= 0 && model_margin > 0.0) {
    // But if the current baseline is already close to the mode there is no
    // point, and only the potential to damage accuracy by changing its angle.
    double perp_disp = PerpDisp(direction);
    double shift = displacement_modes_[best_index] - perp_disp;
    if (fabs(shift) > max_baseline_error_) {
      if (debug > 1) {
        tprintf("Attempting linespacing model fit with mode %g to row at:",
                displacement_modes_[best_index]);
        bounding_box_.print();
      }
      FitConstrainedIfBetter(debug, direction, model_margin,
                             displacement_modes_[best_index]);
    } else if (debug > 1) {
      tprintf("Linespacing model only moves current line by %g for row at:",
              shift);
      bounding_box_.print();
    }
  } else if (debug > 1) {
    tprintf("Linespacing model not close enough to any mode for row at:");
    bounding_box_.print();
  }
  return fmod(PerpDisp(direction), line_spacing);
}
void tesseract::BaselineRow::AdjustBaselineToParallel ( int  debug,
const FCOORD direction 
)

Definition at line 214 of file baselinedetect.cpp.

                                                                    {
  SetupBlobDisplacements(direction);
  if (displacement_modes_.empty())
    return;
#ifdef kDebugYCoord
  if (bounding_box_.bottom() < kDebugYCoord &&
      bounding_box_.top() > kDebugYCoord && debug < 3)
    debug = 3;
#endif
  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
}

Definition at line 98 of file baselinedetect.cpp.

                                        {
  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
  double angle = baseline_dir.angle();
  // Baseline directions are only unique in a range of pi so constrain to
  // [-pi/2, pi/2].
  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
}
const TBOX& tesseract::BaselineRow::bounding_box ( ) const [inline]

Definition at line 44 of file baselinedetect.h.

                                   {
    return bounding_box_;
  }
bool tesseract::BaselineRow::FitBaseline ( bool  use_box_bottoms)

Definition at line 142 of file baselinedetect.cpp.

                                                  {
  // Deterministic fitting is used wherever possible.
  fitter_.Clear();
  // Linear least squares is a backup if the DetLineFit produces a bad line.
  LLSQ llsq;
  BLOBNBOX_IT blob_it(blobs_);

  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
    BLOBNBOX* blob = blob_it.data();
    if (!use_box_bottoms) blob->EstimateBaselinePosition();
    const TBOX& box = blob->bounding_box();
    int x_middle = (box.left() + box.right()) / 2;
#ifdef kDebugYCoord
    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
      tprintf("Box bottom = %d, baseline pos=%d for box at:",
              box.bottom(), blob->baseline_position());
      box.print();
    }
#endif
    fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
    llsq.add(x_middle, blob->baseline_position());
  }
  // Fit the line.
  ICOORD pt1, pt2;
  baseline_error_ = fitter_.Fit(&pt1, &pt2);
  baseline_pt1_ = pt1;
  baseline_pt2_ = pt2;
  if (baseline_error_ > max_baseline_error_ &&
      fitter_.SufficientPointsForIndependentFit()) {
    // The fit was bad but there were plenty of points, so try skipping
    // the first and last few, and use the new line if it dramatically improves
    // the error of fit.
    double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
    if (error < baseline_error_ / 2.0) {
      baseline_error_ = error;
      baseline_pt1_ = pt1;
      baseline_pt2_ = pt2;
    }
  }
  int debug = 0;
#ifdef kDebugYCoord
  Print();
  debug = bounding_box_.bottom() < kDebugYCoord &&
      bounding_box_.top() > kDebugYCoord
            ? 3 : 2;
#endif
  // Now we obtained a direction from that fit, see if we can improve the
  // fit using the same direction and some other start point.
  FCOORD direction(pt2 - pt1);
  double target_offset = direction * pt1;
  good_baseline_ = false;
  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
  // Wild lines can be produced because DetLineFit allows vertical lines, but
  // vertical text has been rotated so angles over pi/4 should be disallowed.
  // Near vertical lines can still be produced by vertically aligned components
  // on very short lines.
  double angle = BaselineAngle();
  if (fabs(angle) > M_PI * 0.25) {
    // Use the llsq fit as a backup.
    baseline_pt1_ = llsq.mean_point();
    baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
    // TODO(rays) get rid of this when m and c are no longer used.
    double m = llsq.m();
    double c = llsq.c(m);
    baseline_error_ = llsq.rms(m, c);
    good_baseline_ = false;
  }
  return good_baseline_;
}
double tesseract::BaselineRow::PerpDisp ( const FCOORD direction) const

Definition at line 121 of file baselinedetect.cpp.

                                                          {
  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
  return direction * middle_pos / direction.length();
}

Definition at line 87 of file baselinedetect.cpp.

                              {
  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
          baseline_pt1_.x(), baseline_pt1_.y(),
          baseline_pt2_.x(), baseline_pt2_.y(),
          BaselineAngle(), StraightYAtX(0.0));
  tprintf("Quant factor=%g, error=%g, good=%d, box:",
          disp_quant_factor_, baseline_error_, good_baseline_);
  bounding_box_.print();
}

Definition at line 77 of file baselinedetect.cpp.

                                                          {
  // TODO(rays) get rid of this when m and c are no longer used.
  double gradient = tan(BaselineAngle());
  // para_c is the actual intercept of the baseline on the y-axis.
  float para_c = StraightYAtX(0.0);
  row->set_line(gradient, para_c, baseline_error_);
  row->set_parallel_line(gradient, para_c, baseline_error_);
}
double tesseract::BaselineRow::SpaceBetween ( const BaselineRow other) const

Definition at line 108 of file baselinedetect.cpp.

                                                               {
  // Find the x-centre of overlap of the lines.
  float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
      MIN(bounding_box_.right(), other.bounding_box_.right())) / 2;
  // Find the vertical centre between them.
  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
  // Find the perpendicular distance of (x,y) from each line.
  FCOORD pt(x, y);
  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
}
double tesseract::BaselineRow::StraightYAtX ( double  x) const

Definition at line 129 of file baselinedetect.cpp.

                                               {
  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
  if (denominator == 0.0)
    return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
  return baseline_pt1_.y() +
      (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
          denominator;
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines