tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/baselinedetect.cpp
Go to the documentation of this file.
00001 
00002 // File:        baselinedetect.cpp
00003 // Description: Initial Baseline Determination.
00004 // Copyright 2012 Google Inc. All Rights Reserved.
00005 // Author:      rays@google.com (Ray Smith)
00006 // Created:     Mon Apr 30 10:15:31 PDT 2012
00007 //
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifdef _MSC_VER
00021 #define _USE_MATH_DEFINES
00022 #endif  // _MSC_VER
00023 
00024 #ifdef HAVE_CONFIG_H
00025 #include "config_auto.h"
00026 #endif
00027 
00028 #include "baselinedetect.h"
00029 
00030 #include <math.h>
00031 #include "allheaders.h"
00032 #include "blobbox.h"
00033 #include "detlinefit.h"
00034 #include "drawtord.h"
00035 #include "helpers.h"
00036 #include "linlsq.h"
00037 #include "makerow.h"
00038 #include "textord.h"
00039 #include "tprintf.h"
00040 #include "underlin.h"
00041 
00042 // Number of displacement modes kept in displacement_modes_;
00043 const int kMaxDisplacementsModes = 3;
00044 // Number of points to skip when retrying initial fit.
00045 const int kNumSkipPoints = 3;
00046 // Max angle deviation (in radians) allowed to keep the independent baseline.
00047 const double kMaxSkewDeviation = 1.0 / 64;
00048 // Fraction of line spacing estimate for quantization of blob displacements.
00049 const double kOffsetQuantizationFactor = 3.0 / 64;
00050 // Fraction of line spacing estimate for computing blob fit error.
00051 const double kFitHalfrangeFactor = 6.0 / 64;
00052 // Max fraction of line spacing allowed before a baseline counts as badly fitting.
00053 const double kMaxBaselineError = 3.0 / 64;
00054 // Multiple of linespacing that sets max_blob_size in TO_BLOCK.
00055 // Copied from textord_excess_blobsize.
00056 const double kMaxBlobSizeMultiple = 1.3;
00057 // Min fraction of linespacing gaps that should be close to the model before
00058 // we will force the linespacing model on all the lines.
00059 const double kMinFittingLinespacings = 0.25;
00060 // A y-coordinate within a textline that is to be debugged.
00061 //#define kDebugYCoord 1525
00062 
00063 namespace tesseract {
00064 
00065 BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
00066   : blobs_(to_row->blob_list()),
00067     baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
00068     baseline_error_(0.0), good_baseline_(false) {
00069   ComputeBoundingBox();
00070   // Compute a scale factor for rounding to ints.
00071   disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
00072   fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
00073   max_baseline_error_ = kMaxBaselineError * line_spacing;
00074 }
00075 
00076 // Sets the TO_ROW with the output straight line.
00077 void BaselineRow::SetupOldLineParameters(TO_ROW* row) const {
00078   // TODO(rays) get rid of this when m and c are no longer used.
00079   double gradient = tan(BaselineAngle());
00080   // para_c is the actual intercept of the baseline on the y-axis.
00081   float para_c = StraightYAtX(0.0);
00082   row->set_line(gradient, para_c, baseline_error_);
00083   row->set_parallel_line(gradient, para_c, baseline_error_);
00084 }
00085 
00086 // Outputs diagnostic information.
00087 void BaselineRow::Print() const {
00088   tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
00089           baseline_pt1_.x(), baseline_pt1_.y(),
00090           baseline_pt2_.x(), baseline_pt2_.y(),
00091           BaselineAngle(), StraightYAtX(0.0));
00092   tprintf("Quant factor=%g, error=%g, good=%d, box:",
00093           disp_quant_factor_, baseline_error_, good_baseline_);
00094   bounding_box_.print();
00095 }
00096 
00097 // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
00098 double BaselineRow::BaselineAngle() const {
00099   FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
00100   double angle = baseline_dir.angle();
00101   // Baseline directions are only unique in a range of pi so constrain to
00102   // [-pi/2, pi/2].
00103   return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
00104 }
00105 
00106 // Computes and returns the linespacing at the middle of the overlap
00107 // between this and other.
00108 double BaselineRow::SpaceBetween(const BaselineRow& other) const {
00109   // Find the x-centre of overlap of the lines.
00110   float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) +
00111       MIN(bounding_box_.right(), other.bounding_box_.right())) / 2;
00112   // Find the vertical centre between them.
00113   float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
00114   // Find the perpendicular distance of (x,y) from each line.
00115   FCOORD pt(x, y);
00116   return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
00117 }
00118 
00119 // Computes and returns the displacement of the center of the line
00120 // perpendicular to the given direction.
00121 double BaselineRow::PerpDisp(const FCOORD& direction) const {
00122   float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
00123   FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
00124   return direction * middle_pos / direction.length();
00125 }
00126 
00127 // Computes the y coordinate at the given x using the straight baseline
00128 // defined by baseline_pt1_ and baseline_pt2__.
00129 double BaselineRow::StraightYAtX(double x) const {
00130   double denominator = baseline_pt2_.x() - baseline_pt1_.x();
00131   if (denominator == 0.0)
00132     return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
00133   return baseline_pt1_.y() +
00134       (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
00135           denominator;
00136 }
00137 
00138 // Fits a straight baseline to the points. Returns true if it had enough
00139 // points to be reasonably sure of the fitted baseline.
00140 // If use_box_bottoms is false, baselines positions are formed by
00141 // considering the outlines of the blobs.
00142 bool BaselineRow::FitBaseline(bool use_box_bottoms) {
00143   // Deterministic fitting is used wherever possible.
00144   fitter_.Clear();
00145   // Linear least squares is a backup if the DetLineFit produces a bad line.
00146   LLSQ llsq;
00147   BLOBNBOX_IT blob_it(blobs_);
00148 
00149   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00150     BLOBNBOX* blob = blob_it.data();
00151     if (!use_box_bottoms) blob->EstimateBaselinePosition();
00152     const TBOX& box = blob->bounding_box();
00153     int x_middle = (box.left() + box.right()) / 2;
00154 #ifdef kDebugYCoord
00155     if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
00156       tprintf("Box bottom = %d, baseline pos=%d for box at:",
00157               box.bottom(), blob->baseline_position());
00158       box.print();
00159     }
00160 #endif
00161     fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
00162     llsq.add(x_middle, blob->baseline_position());
00163   }
00164   // Fit the line.
00165   ICOORD pt1, pt2;
00166   baseline_error_ = fitter_.Fit(&pt1, &pt2);
00167   baseline_pt1_ = pt1;
00168   baseline_pt2_ = pt2;
00169   if (baseline_error_ > max_baseline_error_ &&
00170       fitter_.SufficientPointsForIndependentFit()) {
00171     // The fit was bad but there were plenty of points, so try skipping
00172     // the first and last few, and use the new line if it dramatically improves
00173     // the error of fit.
00174     double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
00175     if (error < baseline_error_ / 2.0) {
00176       baseline_error_ = error;
00177       baseline_pt1_ = pt1;
00178       baseline_pt2_ = pt2;
00179     }
00180   }
00181   int debug = 0;
00182 #ifdef kDebugYCoord
00183   Print();
00184   debug = bounding_box_.bottom() < kDebugYCoord &&
00185       bounding_box_.top() > kDebugYCoord
00186             ? 3 : 2;
00187 #endif
00188   // Now we obtained a direction from that fit, see if we can improve the
00189   // fit using the same direction and some other start point.
00190   FCOORD direction(pt2 - pt1);
00191   double target_offset = direction * pt1;
00192   good_baseline_ = false;
00193   FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
00194   // Wild lines can be produced because DetLineFit allows vertical lines, but
00195   // vertical text has been rotated so angles over pi/4 should be disallowed.
00196   // Near vertical lines can still be produced by vertically aligned components
00197   // on very short lines.
00198   double angle = BaselineAngle();
00199   if (fabs(angle) > M_PI * 0.25) {
00200     // Use the llsq fit as a backup.
00201     baseline_pt1_ = llsq.mean_point();
00202     baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
00203     // TODO(rays) get rid of this when m and c are no longer used.
00204     double m = llsq.m();
00205     double c = llsq.c(m);
00206     baseline_error_ = llsq.rms(m, c);
00207     good_baseline_ = false;
00208   }
00209   return good_baseline_;
00210 }
00211 
00212 // Modifies an existing result of FitBaseline to be parallel to the given
00213 // direction vector if that produces a better result.
00214 void BaselineRow::AdjustBaselineToParallel(int debug,
00215                                            const FCOORD& direction) {
00216   SetupBlobDisplacements(direction);
00217   if (displacement_modes_.empty())
00218     return;
00219 #ifdef kDebugYCoord
00220   if (bounding_box_.bottom() < kDebugYCoord &&
00221       bounding_box_.top() > kDebugYCoord && debug < 3)
00222     debug = 3;
00223 #endif
00224   FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
00225 }
00226 
00227 // Modifies the baseline to snap to the textline grid if the existing
00228 // result is not good enough.
00229 double BaselineRow::AdjustBaselineToGrid(int debug,
00230                                          const FCOORD& direction,
00231                                          double line_spacing,
00232                                          double line_offset) {
00233   if (blobs_->empty()) {
00234     if (debug > 1) {
00235       tprintf("Row empty at:");
00236       bounding_box_.print();
00237     }
00238     return line_offset;
00239   }
00240   // Find the displacement_modes_ entry nearest to the grid.
00241   double best_error = 0.0;
00242   int best_index = -1;
00243   for (int i = 0; i < displacement_modes_.size(); ++i) {
00244     double blob_y = displacement_modes_[i];
00245     double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
00246                                                     line_offset);
00247     if (debug > 1) {
00248       tprintf("Mode at %g has error %g from model \n", blob_y, error);
00249     }
00250     if (best_index < 0 || error < best_error) {
00251       best_error = error;
00252       best_index = i;
00253     }
00254   }
00255   // We will move the baseline only if the chosen mode is close enough to the
00256   // model.
00257   double model_margin = max_baseline_error_ - best_error;
00258   if (best_index >= 0 && model_margin > 0.0) {
00259     // But if the current baseline is already close to the mode there is no
00260     // point, and only the potential to damage accuracy by changing its angle.
00261     double perp_disp = PerpDisp(direction);
00262     double shift = displacement_modes_[best_index] - perp_disp;
00263     if (fabs(shift) > max_baseline_error_) {
00264       if (debug > 1) {
00265         tprintf("Attempting linespacing model fit with mode %g to row at:",
00266                 displacement_modes_[best_index]);
00267         bounding_box_.print();
00268       }
00269       FitConstrainedIfBetter(debug, direction, model_margin,
00270                              displacement_modes_[best_index]);
00271     } else if (debug > 1) {
00272       tprintf("Linespacing model only moves current line by %g for row at:",
00273               shift);
00274       bounding_box_.print();
00275     }
00276   } else if (debug > 1) {
00277     tprintf("Linespacing model not close enough to any mode for row at:");
00278     bounding_box_.print();
00279   }
00280   return fmod(PerpDisp(direction), line_spacing);
00281 }
00282 
00283 // Sets up displacement_modes_ with the top few modes of the perpendicular
00284 // distance of each blob from the given direction vector, after rounding.
00285 void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
00286   // Set of perpendicular displacements of the blob bottoms from the required
00287   // baseline direction.
00288   GenericVector<double> perp_blob_dists;
00289   displacement_modes_.truncate(0);
00290   // Gather the skew-corrected position of every blob.
00291   double min_dist = MAX_FLOAT32;
00292   double max_dist = -MAX_FLOAT32;
00293   BLOBNBOX_IT blob_it(blobs_);
00294   bool debug = false;
00295   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00296     BLOBNBOX* blob = blob_it.data();
00297     const TBOX& box = blob->bounding_box();
00298 #ifdef kDebugYCoord
00299     if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
00300 #endif
00301     FCOORD blob_pos((box.left() + box.right()) / 2.0f,
00302                     blob->baseline_position());
00303     double offset = direction * blob_pos;
00304     perp_blob_dists.push_back(offset);
00305     if (debug) {
00306       tprintf("Displacement %g for blob at:", offset);
00307       box.print();
00308     }
00309     UpdateRange(offset, &min_dist, &max_dist);
00310   }
00311   // Set up a histogram using disp_quant_factor_ as the bucket size.
00312   STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
00313                    IntCastRounded(max_dist / disp_quant_factor_) + 1);
00314   for (int i = 0; i < perp_blob_dists.size(); ++i) {
00315     dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
00316   }
00317   GenericVector<KDPairInc<float, int> > scaled_modes;
00318   dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
00319   if (debug) {
00320     for (int i = 0; i < scaled_modes.size(); ++i) {
00321       tprintf("Top mode = %g * %d\n",
00322               scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
00323     }
00324   }
00325   for (int i = 0; i < scaled_modes.size(); ++i)
00326     displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
00327 }
00328 
00329 // Fits a line in the given direction to blobs that are close to the given
00330 // target_offset perpendicular displacement from the direction. The fit
00331 // error is allowed to be cheat_allowance worse than the existing fit, and
00332 // will still be used.
00333 // If cheat_allowance > 0, the new fit will be good and replace the current
00334 // fit if it has better fit (with cheat) OR its error is below
00335 // max_baseline_error_ and the old fit is marked bad.
00336 // Otherwise the new fit will only replace the old if it is really better,
00337 // or the old fit is marked bad and the new fit has sufficient points, as
00338 // well as being within the max_baseline_error_.
00339 void BaselineRow::FitConstrainedIfBetter(int debug,
00340                                          const FCOORD& direction,
00341                                          double cheat_allowance,
00342                                          double target_offset) {
00343   double halfrange = fit_halfrange_ * direction.length();
00344   double min_dist = target_offset - halfrange;
00345   double max_dist = target_offset + halfrange;
00346   ICOORD line_pt;
00347   double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
00348                                             debug > 2, &line_pt);
00349   // Allow cheat_allowance off the new error
00350   new_error -= cheat_allowance;
00351   double old_angle = BaselineAngle();
00352   double new_angle = direction.angle();
00353   if (debug > 1) {
00354     tprintf("Constrained error = %g, original = %g",
00355             new_error, baseline_error_);
00356     tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
00357             old_angle, new_angle,
00358             new_angle - old_angle, kMaxSkewDeviation);
00359   }
00360   bool new_good_baseline = new_error <= max_baseline_error_ &&
00361       (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
00362   // The new will replace the old if any are true:
00363   // 1. the new error is better
00364   // 2. the old is NOT good, but the new is
00365   // 3. there is a wild angular difference between them (assuming that the new
00366   //    is a better guess at the angle.)
00367   if (new_error <= baseline_error_ ||
00368       (!good_baseline_ && new_good_baseline) ||
00369       fabs(new_angle - old_angle) > kMaxSkewDeviation) {
00370     baseline_error_ = new_error;
00371     baseline_pt1_ = line_pt;
00372     baseline_pt2_ = baseline_pt1_ + direction;
00373     good_baseline_ = new_good_baseline;
00374     if (debug > 1) {
00375       tprintf("Replacing with constrained baseline, good = %d\n",
00376               good_baseline_);
00377     }
00378   } else if (debug > 1) {
00379     tprintf("Keeping old baseline\n");
00380   }
00381 }
00382 
00383 // Returns the perpendicular distance of the point from the straight
00384 // baseline.
00385 double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
00386   FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
00387   FCOORD offset_vector(pt - baseline_pt1_);
00388   double distance = baseline_vector * offset_vector;
00389   return sqrt(distance * distance / baseline_vector.sqlength());
00390 }
00391 
00392 // Computes the bounding box of the row.
00393 void BaselineRow::ComputeBoundingBox() {
00394   BLOBNBOX_IT it(blobs_);
00395   TBOX box;
00396   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00397     box += it.data()->bounding_box();
00398   }
00399   bounding_box_ = box;
00400 }
00401 
00402 
00403 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
00404   : block_(block), debug_level_(debug_level), non_text_block_(non_text),
00405     good_skew_angle_(false), skew_angle_(0.0),
00406     line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
00407   TO_ROW_IT row_it(block_->get_rows());
00408   for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00409     // Sort the blobs on the rows.
00410     row_it.data()->blob_list()->sort(blob_x_order);
00411     rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
00412   }
00413 }
00414 
00415 // Computes and returns the absolute error of the given perp_disp from the
00416 // given linespacing model.
00417 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
00418                                         double line_offset) {
00419   // Round to the nearest multiple of line_spacing + line offset.
00420   int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
00421   double model_y = line_spacing * multiple + line_offset;
00422   return fabs(perp_disp - model_y);
00423 }
00424 
00425 // Fits straight line baselines and computes the skew angle from the
00426 // median angle. Returns true if a good angle is found.
00427 // If use_box_bottoms is false, baseline positions are formed by
00428 // considering the outlines of the blobs.
00429 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
00430   if (non_text_block_) return false;
00431   GenericVector<double> angles;
00432   for (int r = 0; r < rows_.size(); ++r) {
00433     BaselineRow* row = rows_[r];
00434     if (row->FitBaseline(use_box_bottoms)) {
00435       double angle = row->BaselineAngle();
00436       angles.push_back(angle);
00437     }
00438     if (debug_level_ > 1)
00439       row->Print();
00440   }
00441 
00442   if (!angles.empty()) {
00443     skew_angle_ = MedianOfCircularValues(M_PI, &angles);
00444     good_skew_angle_ = true;
00445   } else {
00446     skew_angle_ = 0.0f;
00447     good_skew_angle_ = false;
00448   }
00449   if (debug_level_ > 0) {
00450     tprintf("Initial block skew angle = %g, good = %d\n",
00451             skew_angle_, good_skew_angle_);
00452   }
00453   return good_skew_angle_;
00454 }
00455 
00456 // Refits the baseline to a constrained angle, using the stored block
00457 // skew if good enough, otherwise the supplied default skew.
00458 void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
00459   if (non_text_block_) return;
00460   if (!good_skew_angle_) skew_angle_ = default_block_skew;
00461   if (debug_level_ > 0)
00462     tprintf("Adjusting block to skew angle %g\n", skew_angle_);
00463   FCOORD direction(cos(skew_angle_), sin(skew_angle_));
00464   for (int r = 0; r < rows_.size(); ++r) {
00465     BaselineRow* row = rows_[r];
00466     row->AdjustBaselineToParallel(debug_level_, direction);
00467     if (debug_level_ > 1)
00468       row->Print();
00469   }
00470   if (rows_.size() < 3 || !ComputeLineSpacing())
00471     return;
00472   // Enforce the line spacing model on all lines that don't yet have a good
00473   // baseline.
00474   // Start by finding the row that is best fitted to the model.
00475   int best_row = 0;
00476   double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
00477                                         line_spacing_, line_offset_);
00478   for (int r = 1; r < rows_.size(); ++r) {
00479     double error = SpacingModelError(rows_[r]->PerpDisp(direction),
00480                                      line_spacing_, line_offset_);
00481     if (error < best_error) {
00482       best_error = error;
00483       best_row = r;
00484     }
00485   }
00486   // Starting at the best fitting row, work outwards, syncing the offset.
00487   double offset = line_offset_;
00488   for (int r = best_row + 1; r < rows_.size(); ++r) {
00489     offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
00490                                             line_spacing_, offset);
00491   }
00492   offset = line_offset_;
00493   for (int r = best_row - 1; r >= 0; --r) {
00494     offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
00495                                             line_spacing_, offset);
00496   }
00497 }
00498 
00499 // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
00500 void BaselineBlock::SetupBlockParameters() const {
00501   if (line_spacing_ > 0.0) {
00502     // Where was block_line_spacing set before?
00503     float min_spacing = MIN(block_->line_spacing, line_spacing_);
00504     if (min_spacing < block_->line_size)
00505       block_->line_size = min_spacing;
00506     block_->line_spacing = line_spacing_;
00507     block_->baseline_offset = line_offset_;
00508     block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
00509   }
00510   // Setup the parameters on all the rows.
00511   TO_ROW_IT row_it(block_->get_rows());
00512   for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
00513     BaselineRow* row = rows_[r];
00514     TO_ROW* to_row = row_it.data();
00515     row->SetupOldLineParameters(to_row);
00516   }
00517 }
00518 
00519 // Processing that is required before fitting baseline splines, but requires
00520 // linear baselines in order to be successful:
00521 //   Removes noise if required
00522 //   Separates out underlines
00523 //   Pre-associates blob fragments.
00524 // TODO(rays/joeliu) This entire section of code is inherited from the past
00525 // and could be improved/eliminated.
00526 // page_tr is used to size a debug window.
00527 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
00528   if (non_text_block_) return;
00529   if (remove_noise) {
00530     vigorous_noise_removal(block_);
00531   }
00532   FCOORD rotation(1.0f, 0.0f);
00533   double gradient = tan(skew_angle_);
00534   separate_underlines(block_, gradient, rotation, true);
00535   pre_associate_blobs(page_tr, block_, rotation, true);
00536 }
00537 
00538 // Fits splines to the textlines, or creates fake QSPLINES from the straight
00539 // baselines that are already on the TO_ROWs.
00540 // As a side-effect, computes the xheights of the rows and the block.
00541 // Although x-height estimation is conceptually separate, it is part of
00542 // detecting perspective distortion and therefore baseline fitting.
00543 void BaselineBlock::FitBaselineSplines(bool enable_splines,
00544                                        bool show_final_rows,
00545                                        Textord* textord) {
00546   double gradient = tan(skew_angle_);
00547   FCOORD rotation(1.0f, 0.0f);
00548 
00549   if (enable_splines) {
00550     textord->make_spline_rows(block_, gradient, show_final_rows);
00551   } else {
00552     // Make a fake spline from the existing line.
00553     TBOX block_box= block_->block->bounding_box();
00554     TO_ROW_IT row_it = block_->get_rows();
00555     for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00556       TO_ROW* row = row_it.data();
00557       inT32 xstarts[2] = { block_box.left(), block_box.right() };
00558       double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
00559       row->baseline = QSPLINE(1, xstarts, coeffs);
00560       textord->compute_row_xheight(row, block_->block->classify_rotation(),
00561                                    row->line_m(), block_->line_size);
00562     }
00563   }
00564   textord->compute_block_xheight(block_, gradient);
00565   block_->block->set_xheight(block_->xheight);
00566   if (textord_restore_underlines)  // fix underlines
00567     restore_underlined_blobs(block_);
00568 }
00569 
00570 // Draws the (straight) baselines and final blobs colored according to
00571 // what was discarded as noise and what is associated with each row.
00572 void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
00573 #ifndef GRAPHICS_DISABLED
00574   if (non_text_block_) return;
00575   double gradient = tan(skew_angle_);
00576   FCOORD rotation(1.0f, 0.0f);
00577   int left_edge = block_->block->bounding_box().left();
00578   ScrollView* win = create_to_win(page_tr);
00579   ScrollView::Color colour = ScrollView::RED;
00580   TO_ROW_IT row_it = block_->get_rows();
00581   for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00582     plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
00583     colour = static_cast<ScrollView::Color>(colour + 1);
00584     if (colour > ScrollView::MAGENTA)
00585       colour = ScrollView::RED;
00586   }
00587   plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
00588   // Show discarded blobs.
00589   plot_blob_list(win, &block_->underlines,
00590                  ScrollView::YELLOW, ScrollView::CORAL);
00591   if (block_->blobs.length() > 0)
00592     tprintf("%d blobs discarded as noise\n", block_->blobs.length());
00593   draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
00594 #endif
00595 }
00596 
00597 void BaselineBlock::DrawPixSpline(Pix* pix_in) {
00598   if (non_text_block_) return;
00599   TO_ROW_IT row_it = block_->get_rows();
00600   for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
00601     row_it.data()->baseline.plot(pix_in);
00602   }
00603 }
00604 
00605 // Top-level line-spacing calculation. Computes an estimate of the line-
00606 // spacing, using the current baselines in the TO_ROWS of the block, and
00607 // then refines it by fitting a regression line to the baseline positions
00608 // as a function of their integer index.
00609 // Returns true if it seems that the model is a reasonable fit to the
00610 // observations.
00611 bool BaselineBlock::ComputeLineSpacing() {
00612   FCOORD direction(cos(skew_angle_), sin(skew_angle_));
00613   GenericVector<double> row_positions;
00614   ComputeBaselinePositions(direction, &row_positions);
00615   if (row_positions.size() < 2) return false;
00616   EstimateLineSpacing();
00617   RefineLineSpacing(row_positions);
00618   // Verify that the model is reasonable.
00619   double max_baseline_error = kMaxBaselineError * line_spacing_;
00620   int non_trivial_gaps = 0;
00621   int fitting_gaps = 0;
00622   for (int i = 1; i < row_positions.size(); ++i) {
00623     double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
00624     if (row_gap > max_baseline_error) {
00625       ++non_trivial_gaps;
00626       if (fabs(row_gap - line_spacing_) <= max_baseline_error)
00627         ++fitting_gaps;
00628     }
00629   }
00630   if (debug_level_ > 0) {
00631     tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
00632             line_spacing_, row_positions.size(), fitting_gaps,
00633             non_trivial_gaps);
00634   }
00635   return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
00636 }
00637 
00638 // Computes the deskewed vertical position of each baseline in the block and
00639 // stores them in the given vector.
00640 // This is calculated as the perpendicular distance of the middle of each
00641 // baseline (in case it has a different skew angle) from the line passing
00642 // through the origin parallel to the block baseline angle.
00643 // NOTE that "distance" above is a signed quantity so we can tell which side
00644 // of the block baseline a line sits, hence the function and argument name
00645 // positions not distances.
00646 void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
00647                                              GenericVector<double>* positions) {
00648   positions->clear();
00649   for (int r = 0; r < rows_.size(); ++r) {
00650     BaselineRow* row = rows_[r];
00651     const TBOX& row_box = row->bounding_box();
00652     float x_middle = (row_box.left() + row_box.right()) / 2.0f;
00653     FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
00654     float offset = direction * row_pos;
00655     positions->push_back(offset);
00656   }
00657 }
00658 
00659 // Computes an estimate of the line spacing of the block from the median
00660 // of the spacings between adjacent overlapping textlines.
00661 void BaselineBlock::EstimateLineSpacing() {
00662   GenericVector<float> spacings;
00663   for (int r = 0; r < rows_.size(); ++r) {
00664     BaselineRow* row = rows_[r];
00665     // Exclude silly lines.
00666     if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
00667     // Find the first row after row that overlaps it significantly.
00668     const TBOX& row_box = row->bounding_box();
00669     int r2;
00670     for (r2 = r + 1; r2 < rows_.size() &&
00671          !row_box.major_x_overlap(rows_[r2]->bounding_box());
00672          ++r2);
00673     if (r2 < rows_.size()) {
00674       BaselineRow* row2 = rows_[r2];
00675       // Exclude silly lines.
00676       if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
00677       float spacing = row->SpaceBetween(*row2);
00678       spacings.push_back(spacing);
00679     }
00680   }
00681   // If we have at least one value, use it, otherwise leave the previous
00682   // value unchanged.
00683   if (!spacings.empty()) {
00684     line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
00685     if (debug_level_ > 1)
00686       tprintf("Estimate of linespacing = %g\n", line_spacing_);
00687   }
00688 }
00689 
00690 // Refines the line spacing of the block by fitting a regression
00691 // line to the deskewed y-position of each baseline as a function of its
00692 // estimated line index, allowing for a small error in the initial linespacing
00693 // and choosing the best available model.
00694 void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
00695   double spacings[3], offsets[3], errors[3];
00696   int index_range;
00697   errors[0] = FitLineSpacingModel(positions, line_spacing_,
00698                                   &spacings[0], &offsets[0], &index_range);
00699   if (index_range > 1) {
00700     double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
00701     // Try the hypotheses that there might be index_range +/- 1 line spaces.
00702     errors[1] = FitLineSpacingModel(positions, spacing_plus,
00703                                     &spacings[1], &offsets[1], NULL);
00704     double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
00705     errors[2] = FitLineSpacingModel(positions, spacing_minus,
00706                                     &spacings[2], &offsets[2], NULL);
00707     for (int i = 1; i <= 2; ++i) {
00708       if (errors[i] < errors[0]) {
00709         spacings[0] = spacings[i];
00710         offsets[0] = offsets[i];
00711         errors[0] = errors[i];
00712       }
00713     }
00714   }
00715   if (spacings[0] > 0.0) {
00716     line_spacing_ = spacings[0];
00717     line_offset_ = offsets[0];
00718     model_error_ = errors[0];
00719     if (debug_level_ > 0) {
00720       tprintf("Final linespacing model = %g + offset %g, error %g\n",
00721               line_spacing_, line_offset_, model_error_);
00722     }
00723   }
00724 }
00725 
00726 // Given an initial estimate of line spacing (m_in) and the positions of each
00727 // baseline, computes the line spacing of the block more accurately in m_out,
00728 // and the corresponding intercept in c_out, and the number of spacings seen
00729 // in index_delta. Returns the error of fit to the line spacing model.
00730 // Uses a simple linear regression, but optimized the offset using the median.
00731 double BaselineBlock::FitLineSpacingModel(
00732     const GenericVector<double>& positions, double m_in,
00733     double* m_out, double* c_out, int* index_delta) {
00734   if (m_in == 0.0f || positions.size() < 2) {
00735     *m_out = m_in;
00736     *c_out = 0.0;
00737     if (index_delta != NULL) *index_delta = 0;
00738     return 0.0;
00739   }
00740   GenericVector<double> offsets;
00741   // Get the offset (remainder) linespacing for each line and choose the median.
00742   for (int i = 0; i < positions.size(); ++i)
00743     offsets.push_back(fmod(positions[i], m_in));
00744   // Get the median offset.
00745   double median_offset = MedianOfCircularValues(m_in, &offsets);
00746   // Now fit a line to quantized line number and offset.
00747   LLSQ llsq;
00748   int min_index = MAX_INT32;
00749   int max_index = -MAX_INT32;
00750   for (int i = 0; i < positions.size(); ++i) {
00751     double y_pos = positions[i];
00752     int row_index = IntCastRounded((y_pos - median_offset) / m_in);
00753     UpdateRange(row_index, &min_index, &max_index);
00754     llsq.add(row_index, y_pos);
00755   }
00756   // Get the refined line spacing.
00757   *m_out = llsq.m();
00758   // Use the median offset rather than the mean.
00759   offsets.truncate(0);
00760   for (int i = 0; i < positions.size(); ++i)
00761     offsets.push_back(fmod(positions[i], *m_out));
00762   // Get the median offset.
00763   if (debug_level_ > 2) {
00764     for (int i = 0; i < offsets.size(); ++i)
00765       tprintf("%d: %g\n", i, offsets[i]);
00766   }
00767   *c_out = MedianOfCircularValues(*m_out, &offsets);
00768   if (debug_level_ > 1) {
00769     tprintf("Median offset = %g, compared to mean of %g.\n",
00770             *c_out, llsq.c(*m_out));
00771   }
00772   // Index_delta is the number of hypothesized line gaps present.
00773   if (index_delta != NULL)
00774     *index_delta = max_index - min_index;
00775   // Use the regression model's intercept to compute the error, as it may be
00776   // a full line-spacing in disagreement with the median.
00777   double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
00778   if (debug_level_ > 1) {
00779     tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
00780             m_in, median_offset, *m_out, *c_out, rms_error);
00781   }
00782   return rms_error;
00783 }
00784 
00785 
00786 BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
00787                                TO_BLOCK_LIST* blocks)
00788     : page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL),
00789       debug_file_prefix_("") {
00790   TO_BLOCK_IT it(blocks);
00791   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00792     TO_BLOCK* to_block = it.data();
00793     BLOCK* block = to_block->block;
00794     POLY_BLOCK* pb = block->poly_block();
00795     // A note about non-text blocks.
00796     // On output, non-text blocks are supposed to contain a single empty word
00797     // in each incoming text line. These mark out the polygonal bounds of the
00798     // block. Ideally no baselines should be required, but currently
00799     // make_words crashes if a baseline and xheight are not provided, so we
00800     // include non-text blocks here, but flag them for special treatment.
00801     bool non_text = pb != NULL && !pb->IsText();
00802     blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
00803   }
00804 }
00805 
00806 BaselineDetect::~BaselineDetect() {
00807   pixDestroy(&pix_debug_);
00808 }
00809 
00810 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
00811 // block-wise and page-wise data to smooth small blocks/rows, and applies
00812 // smoothing based on block/page-level skew and block-level linespacing.
00813 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
00814   GenericVector<double> block_skew_angles;
00815   for (int i = 0; i < blocks_.size(); ++i) {
00816     BaselineBlock* bl_block = blocks_[i];
00817     if (debug_level_ > 0)
00818       tprintf("Fitting initial baselines...\n");
00819     if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
00820       block_skew_angles.push_back(bl_block->skew_angle());
00821     }
00822   }
00823   // Compute a page-wide default skew for blocks with too little information.
00824   double default_block_skew = page_skew_.angle();
00825   if (!block_skew_angles.empty()) {
00826     default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
00827   }
00828   if (debug_level_ > 0) {
00829     tprintf("Page skew angle = %g\n", default_block_skew);
00830   }
00831   // Set bad lines in each block to the default block skew and then force fit
00832   // a linespacing model where it makes sense to do so.
00833   for (int i = 0; i < blocks_.size(); ++i) {
00834     BaselineBlock* bl_block = blocks_[i];
00835     bl_block->ParallelizeBaselines(default_block_skew);
00836     bl_block->SetupBlockParameters();  // This replaced compute_row_stats.
00837   }
00838 }
00839 
00840 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
00841 // other associated side-effects, including pre-associating blobs, computing
00842 // x-heights and displaying debug information.
00843 // NOTE that ComputeStraightBaselines must have been called first as this
00844 // sets up data in the TO_ROWs upon which this function depends.
00845 void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
00846                                                        bool enable_splines,
00847                                                        bool remove_noise,
00848                                                        bool show_final_rows,
00849                                                       Textord* textord) {
00850   Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL;
00851   for (int i = 0; i < blocks_.size(); ++i) {
00852     BaselineBlock* bl_block = blocks_[i];
00853     bl_block->PrepareForSplineFitting(page_tr, remove_noise);
00854     bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
00855     if (pix_spline) {
00856       bl_block->DrawPixSpline(pix_spline);
00857     }
00858     if (show_final_rows) {
00859       bl_block->DrawFinalRows(page_tr);
00860     }
00861   }
00862 
00863   if (pix_spline) {
00864     STRING outfile_name = debug_file_prefix_ + "_spline.png";
00865     pixWrite(outfile_name.string(), pix_spline, IFF_PNG);
00866     pixDestroy(&pix_spline);
00867   }
00868 }
00869 
00870 void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) {
00871   pixDestroy(&pix_debug_);
00872   pix_debug_ = pixClone(pixIn);
00873   debug_file_prefix_ = output_path;
00874 }
00875 
00876 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines