tesseract
3.03
|
00001 00002 // File: baselinedetect.cpp 00003 // Description: Initial Baseline Determination. 00004 // Copyright 2012 Google Inc. All Rights Reserved. 00005 // Author: rays@google.com (Ray Smith) 00006 // Created: Mon Apr 30 10:15:31 PDT 2012 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef _MSC_VER 00021 #define _USE_MATH_DEFINES 00022 #endif // _MSC_VER 00023 00024 #ifdef HAVE_CONFIG_H 00025 #include "config_auto.h" 00026 #endif 00027 00028 #include "baselinedetect.h" 00029 00030 #include <math.h> 00031 #include "allheaders.h" 00032 #include "blobbox.h" 00033 #include "detlinefit.h" 00034 #include "drawtord.h" 00035 #include "helpers.h" 00036 #include "linlsq.h" 00037 #include "makerow.h" 00038 #include "textord.h" 00039 #include "tprintf.h" 00040 #include "underlin.h" 00041 00042 // Number of displacement modes kept in displacement_modes_; 00043 const int kMaxDisplacementsModes = 3; 00044 // Number of points to skip when retrying initial fit. 00045 const int kNumSkipPoints = 3; 00046 // Max angle deviation (in radians) allowed to keep the independent baseline. 00047 const double kMaxSkewDeviation = 1.0 / 64; 00048 // Fraction of line spacing estimate for quantization of blob displacements. 00049 const double kOffsetQuantizationFactor = 3.0 / 64; 00050 // Fraction of line spacing estimate for computing blob fit error. 00051 const double kFitHalfrangeFactor = 6.0 / 64; 00052 // Max fraction of line spacing allowed before a baseline counts as badly fitting. 00053 const double kMaxBaselineError = 3.0 / 64; 00054 // Multiple of linespacing that sets max_blob_size in TO_BLOCK. 00055 // Copied from textord_excess_blobsize. 00056 const double kMaxBlobSizeMultiple = 1.3; 00057 // Min fraction of linespacing gaps that should be close to the model before 00058 // we will force the linespacing model on all the lines. 00059 const double kMinFittingLinespacings = 0.25; 00060 // A y-coordinate within a textline that is to be debugged. 00061 //#define kDebugYCoord 1525 00062 00063 namespace tesseract { 00064 00065 BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row) 00066 : blobs_(to_row->blob_list()), 00067 baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f), 00068 baseline_error_(0.0), good_baseline_(false) { 00069 ComputeBoundingBox(); 00070 // Compute a scale factor for rounding to ints. 00071 disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; 00072 fit_halfrange_ = kFitHalfrangeFactor * line_spacing; 00073 max_baseline_error_ = kMaxBaselineError * line_spacing; 00074 } 00075 00076 // Sets the TO_ROW with the output straight line. 00077 void BaselineRow::SetupOldLineParameters(TO_ROW* row) const { 00078 // TODO(rays) get rid of this when m and c are no longer used. 00079 double gradient = tan(BaselineAngle()); 00080 // para_c is the actual intercept of the baseline on the y-axis. 00081 float para_c = StraightYAtX(0.0); 00082 row->set_line(gradient, para_c, baseline_error_); 00083 row->set_parallel_line(gradient, para_c, baseline_error_); 00084 } 00085 00086 // Outputs diagnostic information. 00087 void BaselineRow::Print() const { 00088 tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", 00089 baseline_pt1_.x(), baseline_pt1_.y(), 00090 baseline_pt2_.x(), baseline_pt2_.y(), 00091 BaselineAngle(), StraightYAtX(0.0)); 00092 tprintf("Quant factor=%g, error=%g, good=%d, box:", 00093 disp_quant_factor_, baseline_error_, good_baseline_); 00094 bounding_box_.print(); 00095 } 00096 00097 // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. 00098 double BaselineRow::BaselineAngle() const { 00099 FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_); 00100 double angle = baseline_dir.angle(); 00101 // Baseline directions are only unique in a range of pi so constrain to 00102 // [-pi/2, pi/2]. 00103 return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5; 00104 } 00105 00106 // Computes and returns the linespacing at the middle of the overlap 00107 // between this and other. 00108 double BaselineRow::SpaceBetween(const BaselineRow& other) const { 00109 // Find the x-centre of overlap of the lines. 00110 float x = (MAX(bounding_box_.left(), other.bounding_box_.left()) + 00111 MIN(bounding_box_.right(), other.bounding_box_.right())) / 2; 00112 // Find the vertical centre between them. 00113 float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f; 00114 // Find the perpendicular distance of (x,y) from each line. 00115 FCOORD pt(x, y); 00116 return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt); 00117 } 00118 00119 // Computes and returns the displacement of the center of the line 00120 // perpendicular to the given direction. 00121 double BaselineRow::PerpDisp(const FCOORD& direction) const { 00122 float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f; 00123 FCOORD middle_pos(middle_x, StraightYAtX(middle_x)); 00124 return direction * middle_pos / direction.length(); 00125 } 00126 00127 // Computes the y coordinate at the given x using the straight baseline 00128 // defined by baseline_pt1_ and baseline_pt2__. 00129 double BaselineRow::StraightYAtX(double x) const { 00130 double denominator = baseline_pt2_.x() - baseline_pt1_.x(); 00131 if (denominator == 0.0) 00132 return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; 00133 return baseline_pt1_.y() + 00134 (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / 00135 denominator; 00136 } 00137 00138 // Fits a straight baseline to the points. Returns true if it had enough 00139 // points to be reasonably sure of the fitted baseline. 00140 // If use_box_bottoms is false, baselines positions are formed by 00141 // considering the outlines of the blobs. 00142 bool BaselineRow::FitBaseline(bool use_box_bottoms) { 00143 // Deterministic fitting is used wherever possible. 00144 fitter_.Clear(); 00145 // Linear least squares is a backup if the DetLineFit produces a bad line. 00146 LLSQ llsq; 00147 BLOBNBOX_IT blob_it(blobs_); 00148 00149 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00150 BLOBNBOX* blob = blob_it.data(); 00151 if (!use_box_bottoms) blob->EstimateBaselinePosition(); 00152 const TBOX& box = blob->bounding_box(); 00153 int x_middle = (box.left() + box.right()) / 2; 00154 #ifdef kDebugYCoord 00155 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { 00156 tprintf("Box bottom = %d, baseline pos=%d for box at:", 00157 box.bottom(), blob->baseline_position()); 00158 box.print(); 00159 } 00160 #endif 00161 fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); 00162 llsq.add(x_middle, blob->baseline_position()); 00163 } 00164 // Fit the line. 00165 ICOORD pt1, pt2; 00166 baseline_error_ = fitter_.Fit(&pt1, &pt2); 00167 baseline_pt1_ = pt1; 00168 baseline_pt2_ = pt2; 00169 if (baseline_error_ > max_baseline_error_ && 00170 fitter_.SufficientPointsForIndependentFit()) { 00171 // The fit was bad but there were plenty of points, so try skipping 00172 // the first and last few, and use the new line if it dramatically improves 00173 // the error of fit. 00174 double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); 00175 if (error < baseline_error_ / 2.0) { 00176 baseline_error_ = error; 00177 baseline_pt1_ = pt1; 00178 baseline_pt2_ = pt2; 00179 } 00180 } 00181 int debug = 0; 00182 #ifdef kDebugYCoord 00183 Print(); 00184 debug = bounding_box_.bottom() < kDebugYCoord && 00185 bounding_box_.top() > kDebugYCoord 00186 ? 3 : 2; 00187 #endif 00188 // Now we obtained a direction from that fit, see if we can improve the 00189 // fit using the same direction and some other start point. 00190 FCOORD direction(pt2 - pt1); 00191 double target_offset = direction * pt1; 00192 good_baseline_ = false; 00193 FitConstrainedIfBetter(debug, direction, 0.0, target_offset); 00194 // Wild lines can be produced because DetLineFit allows vertical lines, but 00195 // vertical text has been rotated so angles over pi/4 should be disallowed. 00196 // Near vertical lines can still be produced by vertically aligned components 00197 // on very short lines. 00198 double angle = BaselineAngle(); 00199 if (fabs(angle) > M_PI * 0.25) { 00200 // Use the llsq fit as a backup. 00201 baseline_pt1_ = llsq.mean_point(); 00202 baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); 00203 // TODO(rays) get rid of this when m and c are no longer used. 00204 double m = llsq.m(); 00205 double c = llsq.c(m); 00206 baseline_error_ = llsq.rms(m, c); 00207 good_baseline_ = false; 00208 } 00209 return good_baseline_; 00210 } 00211 00212 // Modifies an existing result of FitBaseline to be parallel to the given 00213 // direction vector if that produces a better result. 00214 void BaselineRow::AdjustBaselineToParallel(int debug, 00215 const FCOORD& direction) { 00216 SetupBlobDisplacements(direction); 00217 if (displacement_modes_.empty()) 00218 return; 00219 #ifdef kDebugYCoord 00220 if (bounding_box_.bottom() < kDebugYCoord && 00221 bounding_box_.top() > kDebugYCoord && debug < 3) 00222 debug = 3; 00223 #endif 00224 FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]); 00225 } 00226 00227 // Modifies the baseline to snap to the textline grid if the existing 00228 // result is not good enough. 00229 double BaselineRow::AdjustBaselineToGrid(int debug, 00230 const FCOORD& direction, 00231 double line_spacing, 00232 double line_offset) { 00233 if (blobs_->empty()) { 00234 if (debug > 1) { 00235 tprintf("Row empty at:"); 00236 bounding_box_.print(); 00237 } 00238 return line_offset; 00239 } 00240 // Find the displacement_modes_ entry nearest to the grid. 00241 double best_error = 0.0; 00242 int best_index = -1; 00243 for (int i = 0; i < displacement_modes_.size(); ++i) { 00244 double blob_y = displacement_modes_[i]; 00245 double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, 00246 line_offset); 00247 if (debug > 1) { 00248 tprintf("Mode at %g has error %g from model \n", blob_y, error); 00249 } 00250 if (best_index < 0 || error < best_error) { 00251 best_error = error; 00252 best_index = i; 00253 } 00254 } 00255 // We will move the baseline only if the chosen mode is close enough to the 00256 // model. 00257 double model_margin = max_baseline_error_ - best_error; 00258 if (best_index >= 0 && model_margin > 0.0) { 00259 // But if the current baseline is already close to the mode there is no 00260 // point, and only the potential to damage accuracy by changing its angle. 00261 double perp_disp = PerpDisp(direction); 00262 double shift = displacement_modes_[best_index] - perp_disp; 00263 if (fabs(shift) > max_baseline_error_) { 00264 if (debug > 1) { 00265 tprintf("Attempting linespacing model fit with mode %g to row at:", 00266 displacement_modes_[best_index]); 00267 bounding_box_.print(); 00268 } 00269 FitConstrainedIfBetter(debug, direction, model_margin, 00270 displacement_modes_[best_index]); 00271 } else if (debug > 1) { 00272 tprintf("Linespacing model only moves current line by %g for row at:", 00273 shift); 00274 bounding_box_.print(); 00275 } 00276 } else if (debug > 1) { 00277 tprintf("Linespacing model not close enough to any mode for row at:"); 00278 bounding_box_.print(); 00279 } 00280 return fmod(PerpDisp(direction), line_spacing); 00281 } 00282 00283 // Sets up displacement_modes_ with the top few modes of the perpendicular 00284 // distance of each blob from the given direction vector, after rounding. 00285 void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { 00286 // Set of perpendicular displacements of the blob bottoms from the required 00287 // baseline direction. 00288 GenericVector<double> perp_blob_dists; 00289 displacement_modes_.truncate(0); 00290 // Gather the skew-corrected position of every blob. 00291 double min_dist = MAX_FLOAT32; 00292 double max_dist = -MAX_FLOAT32; 00293 BLOBNBOX_IT blob_it(blobs_); 00294 bool debug = false; 00295 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00296 BLOBNBOX* blob = blob_it.data(); 00297 const TBOX& box = blob->bounding_box(); 00298 #ifdef kDebugYCoord 00299 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; 00300 #endif 00301 FCOORD blob_pos((box.left() + box.right()) / 2.0f, 00302 blob->baseline_position()); 00303 double offset = direction * blob_pos; 00304 perp_blob_dists.push_back(offset); 00305 if (debug) { 00306 tprintf("Displacement %g for blob at:", offset); 00307 box.print(); 00308 } 00309 UpdateRange(offset, &min_dist, &max_dist); 00310 } 00311 // Set up a histogram using disp_quant_factor_ as the bucket size. 00312 STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), 00313 IntCastRounded(max_dist / disp_quant_factor_) + 1); 00314 for (int i = 0; i < perp_blob_dists.size(); ++i) { 00315 dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); 00316 } 00317 GenericVector<KDPairInc<float, int> > scaled_modes; 00318 dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); 00319 if (debug) { 00320 for (int i = 0; i < scaled_modes.size(); ++i) { 00321 tprintf("Top mode = %g * %d\n", 00322 scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data); 00323 } 00324 } 00325 for (int i = 0; i < scaled_modes.size(); ++i) 00326 displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key); 00327 } 00328 00329 // Fits a line in the given direction to blobs that are close to the given 00330 // target_offset perpendicular displacement from the direction. The fit 00331 // error is allowed to be cheat_allowance worse than the existing fit, and 00332 // will still be used. 00333 // If cheat_allowance > 0, the new fit will be good and replace the current 00334 // fit if it has better fit (with cheat) OR its error is below 00335 // max_baseline_error_ and the old fit is marked bad. 00336 // Otherwise the new fit will only replace the old if it is really better, 00337 // or the old fit is marked bad and the new fit has sufficient points, as 00338 // well as being within the max_baseline_error_. 00339 void BaselineRow::FitConstrainedIfBetter(int debug, 00340 const FCOORD& direction, 00341 double cheat_allowance, 00342 double target_offset) { 00343 double halfrange = fit_halfrange_ * direction.length(); 00344 double min_dist = target_offset - halfrange; 00345 double max_dist = target_offset + halfrange; 00346 ICOORD line_pt; 00347 double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, 00348 debug > 2, &line_pt); 00349 // Allow cheat_allowance off the new error 00350 new_error -= cheat_allowance; 00351 double old_angle = BaselineAngle(); 00352 double new_angle = direction.angle(); 00353 if (debug > 1) { 00354 tprintf("Constrained error = %g, original = %g", 00355 new_error, baseline_error_); 00356 tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", 00357 old_angle, new_angle, 00358 new_angle - old_angle, kMaxSkewDeviation); 00359 } 00360 bool new_good_baseline = new_error <= max_baseline_error_ && 00361 (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); 00362 // The new will replace the old if any are true: 00363 // 1. the new error is better 00364 // 2. the old is NOT good, but the new is 00365 // 3. there is a wild angular difference between them (assuming that the new 00366 // is a better guess at the angle.) 00367 if (new_error <= baseline_error_ || 00368 (!good_baseline_ && new_good_baseline) || 00369 fabs(new_angle - old_angle) > kMaxSkewDeviation) { 00370 baseline_error_ = new_error; 00371 baseline_pt1_ = line_pt; 00372 baseline_pt2_ = baseline_pt1_ + direction; 00373 good_baseline_ = new_good_baseline; 00374 if (debug > 1) { 00375 tprintf("Replacing with constrained baseline, good = %d\n", 00376 good_baseline_); 00377 } 00378 } else if (debug > 1) { 00379 tprintf("Keeping old baseline\n"); 00380 } 00381 } 00382 00383 // Returns the perpendicular distance of the point from the straight 00384 // baseline. 00385 double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const { 00386 FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_); 00387 FCOORD offset_vector(pt - baseline_pt1_); 00388 double distance = baseline_vector * offset_vector; 00389 return sqrt(distance * distance / baseline_vector.sqlength()); 00390 } 00391 00392 // Computes the bounding box of the row. 00393 void BaselineRow::ComputeBoundingBox() { 00394 BLOBNBOX_IT it(blobs_); 00395 TBOX box; 00396 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00397 box += it.data()->bounding_box(); 00398 } 00399 bounding_box_ = box; 00400 } 00401 00402 00403 BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block) 00404 : block_(block), debug_level_(debug_level), non_text_block_(non_text), 00405 good_skew_angle_(false), skew_angle_(0.0), 00406 line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) { 00407 TO_ROW_IT row_it(block_->get_rows()); 00408 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00409 // Sort the blobs on the rows. 00410 row_it.data()->blob_list()->sort(blob_x_order); 00411 rows_.push_back(new BaselineRow(block->line_spacing, row_it.data())); 00412 } 00413 } 00414 00415 // Computes and returns the absolute error of the given perp_disp from the 00416 // given linespacing model. 00417 double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, 00418 double line_offset) { 00419 // Round to the nearest multiple of line_spacing + line offset. 00420 int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); 00421 double model_y = line_spacing * multiple + line_offset; 00422 return fabs(perp_disp - model_y); 00423 } 00424 00425 // Fits straight line baselines and computes the skew angle from the 00426 // median angle. Returns true if a good angle is found. 00427 // If use_box_bottoms is false, baseline positions are formed by 00428 // considering the outlines of the blobs. 00429 bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { 00430 if (non_text_block_) return false; 00431 GenericVector<double> angles; 00432 for (int r = 0; r < rows_.size(); ++r) { 00433 BaselineRow* row = rows_[r]; 00434 if (row->FitBaseline(use_box_bottoms)) { 00435 double angle = row->BaselineAngle(); 00436 angles.push_back(angle); 00437 } 00438 if (debug_level_ > 1) 00439 row->Print(); 00440 } 00441 00442 if (!angles.empty()) { 00443 skew_angle_ = MedianOfCircularValues(M_PI, &angles); 00444 good_skew_angle_ = true; 00445 } else { 00446 skew_angle_ = 0.0f; 00447 good_skew_angle_ = false; 00448 } 00449 if (debug_level_ > 0) { 00450 tprintf("Initial block skew angle = %g, good = %d\n", 00451 skew_angle_, good_skew_angle_); 00452 } 00453 return good_skew_angle_; 00454 } 00455 00456 // Refits the baseline to a constrained angle, using the stored block 00457 // skew if good enough, otherwise the supplied default skew. 00458 void BaselineBlock::ParallelizeBaselines(double default_block_skew) { 00459 if (non_text_block_) return; 00460 if (!good_skew_angle_) skew_angle_ = default_block_skew; 00461 if (debug_level_ > 0) 00462 tprintf("Adjusting block to skew angle %g\n", skew_angle_); 00463 FCOORD direction(cos(skew_angle_), sin(skew_angle_)); 00464 for (int r = 0; r < rows_.size(); ++r) { 00465 BaselineRow* row = rows_[r]; 00466 row->AdjustBaselineToParallel(debug_level_, direction); 00467 if (debug_level_ > 1) 00468 row->Print(); 00469 } 00470 if (rows_.size() < 3 || !ComputeLineSpacing()) 00471 return; 00472 // Enforce the line spacing model on all lines that don't yet have a good 00473 // baseline. 00474 // Start by finding the row that is best fitted to the model. 00475 int best_row = 0; 00476 double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), 00477 line_spacing_, line_offset_); 00478 for (int r = 1; r < rows_.size(); ++r) { 00479 double error = SpacingModelError(rows_[r]->PerpDisp(direction), 00480 line_spacing_, line_offset_); 00481 if (error < best_error) { 00482 best_error = error; 00483 best_row = r; 00484 } 00485 } 00486 // Starting at the best fitting row, work outwards, syncing the offset. 00487 double offset = line_offset_; 00488 for (int r = best_row + 1; r < rows_.size(); ++r) { 00489 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, 00490 line_spacing_, offset); 00491 } 00492 offset = line_offset_; 00493 for (int r = best_row - 1; r >= 0; --r) { 00494 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, 00495 line_spacing_, offset); 00496 } 00497 } 00498 00499 // Sets the parameters in TO_BLOCK that are needed by subsequent processes. 00500 void BaselineBlock::SetupBlockParameters() const { 00501 if (line_spacing_ > 0.0) { 00502 // Where was block_line_spacing set before? 00503 float min_spacing = MIN(block_->line_spacing, line_spacing_); 00504 if (min_spacing < block_->line_size) 00505 block_->line_size = min_spacing; 00506 block_->line_spacing = line_spacing_; 00507 block_->baseline_offset = line_offset_; 00508 block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; 00509 } 00510 // Setup the parameters on all the rows. 00511 TO_ROW_IT row_it(block_->get_rows()); 00512 for (int r = 0; r < rows_.size(); ++r, row_it.forward()) { 00513 BaselineRow* row = rows_[r]; 00514 TO_ROW* to_row = row_it.data(); 00515 row->SetupOldLineParameters(to_row); 00516 } 00517 } 00518 00519 // Processing that is required before fitting baseline splines, but requires 00520 // linear baselines in order to be successful: 00521 // Removes noise if required 00522 // Separates out underlines 00523 // Pre-associates blob fragments. 00524 // TODO(rays/joeliu) This entire section of code is inherited from the past 00525 // and could be improved/eliminated. 00526 // page_tr is used to size a debug window. 00527 void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { 00528 if (non_text_block_) return; 00529 if (remove_noise) { 00530 vigorous_noise_removal(block_); 00531 } 00532 FCOORD rotation(1.0f, 0.0f); 00533 double gradient = tan(skew_angle_); 00534 separate_underlines(block_, gradient, rotation, true); 00535 pre_associate_blobs(page_tr, block_, rotation, true); 00536 } 00537 00538 // Fits splines to the textlines, or creates fake QSPLINES from the straight 00539 // baselines that are already on the TO_ROWs. 00540 // As a side-effect, computes the xheights of the rows and the block. 00541 // Although x-height estimation is conceptually separate, it is part of 00542 // detecting perspective distortion and therefore baseline fitting. 00543 void BaselineBlock::FitBaselineSplines(bool enable_splines, 00544 bool show_final_rows, 00545 Textord* textord) { 00546 double gradient = tan(skew_angle_); 00547 FCOORD rotation(1.0f, 0.0f); 00548 00549 if (enable_splines) { 00550 textord->make_spline_rows(block_, gradient, show_final_rows); 00551 } else { 00552 // Make a fake spline from the existing line. 00553 TBOX block_box= block_->block->bounding_box(); 00554 TO_ROW_IT row_it = block_->get_rows(); 00555 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00556 TO_ROW* row = row_it.data(); 00557 inT32 xstarts[2] = { block_box.left(), block_box.right() }; 00558 double coeffs[3] = { 0.0, row->line_m(), row->line_c() }; 00559 row->baseline = QSPLINE(1, xstarts, coeffs); 00560 textord->compute_row_xheight(row, block_->block->classify_rotation(), 00561 row->line_m(), block_->line_size); 00562 } 00563 } 00564 textord->compute_block_xheight(block_, gradient); 00565 block_->block->set_xheight(block_->xheight); 00566 if (textord_restore_underlines) // fix underlines 00567 restore_underlined_blobs(block_); 00568 } 00569 00570 // Draws the (straight) baselines and final blobs colored according to 00571 // what was discarded as noise and what is associated with each row. 00572 void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) { 00573 #ifndef GRAPHICS_DISABLED 00574 if (non_text_block_) return; 00575 double gradient = tan(skew_angle_); 00576 FCOORD rotation(1.0f, 0.0f); 00577 int left_edge = block_->block->bounding_box().left(); 00578 ScrollView* win = create_to_win(page_tr); 00579 ScrollView::Color colour = ScrollView::RED; 00580 TO_ROW_IT row_it = block_->get_rows(); 00581 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00582 plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); 00583 colour = static_cast<ScrollView::Color>(colour + 1); 00584 if (colour > ScrollView::MAGENTA) 00585 colour = ScrollView::RED; 00586 } 00587 plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); 00588 // Show discarded blobs. 00589 plot_blob_list(win, &block_->underlines, 00590 ScrollView::YELLOW, ScrollView::CORAL); 00591 if (block_->blobs.length() > 0) 00592 tprintf("%d blobs discarded as noise\n", block_->blobs.length()); 00593 draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); 00594 #endif 00595 } 00596 00597 void BaselineBlock::DrawPixSpline(Pix* pix_in) { 00598 if (non_text_block_) return; 00599 TO_ROW_IT row_it = block_->get_rows(); 00600 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00601 row_it.data()->baseline.plot(pix_in); 00602 } 00603 } 00604 00605 // Top-level line-spacing calculation. Computes an estimate of the line- 00606 // spacing, using the current baselines in the TO_ROWS of the block, and 00607 // then refines it by fitting a regression line to the baseline positions 00608 // as a function of their integer index. 00609 // Returns true if it seems that the model is a reasonable fit to the 00610 // observations. 00611 bool BaselineBlock::ComputeLineSpacing() { 00612 FCOORD direction(cos(skew_angle_), sin(skew_angle_)); 00613 GenericVector<double> row_positions; 00614 ComputeBaselinePositions(direction, &row_positions); 00615 if (row_positions.size() < 2) return false; 00616 EstimateLineSpacing(); 00617 RefineLineSpacing(row_positions); 00618 // Verify that the model is reasonable. 00619 double max_baseline_error = kMaxBaselineError * line_spacing_; 00620 int non_trivial_gaps = 0; 00621 int fitting_gaps = 0; 00622 for (int i = 1; i < row_positions.size(); ++i) { 00623 double row_gap = fabs(row_positions[i - 1] - row_positions[i]); 00624 if (row_gap > max_baseline_error) { 00625 ++non_trivial_gaps; 00626 if (fabs(row_gap - line_spacing_) <= max_baseline_error) 00627 ++fitting_gaps; 00628 } 00629 } 00630 if (debug_level_ > 0) { 00631 tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n", 00632 line_spacing_, row_positions.size(), fitting_gaps, 00633 non_trivial_gaps); 00634 } 00635 return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; 00636 } 00637 00638 // Computes the deskewed vertical position of each baseline in the block and 00639 // stores them in the given vector. 00640 // This is calculated as the perpendicular distance of the middle of each 00641 // baseline (in case it has a different skew angle) from the line passing 00642 // through the origin parallel to the block baseline angle. 00643 // NOTE that "distance" above is a signed quantity so we can tell which side 00644 // of the block baseline a line sits, hence the function and argument name 00645 // positions not distances. 00646 void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction, 00647 GenericVector<double>* positions) { 00648 positions->clear(); 00649 for (int r = 0; r < rows_.size(); ++r) { 00650 BaselineRow* row = rows_[r]; 00651 const TBOX& row_box = row->bounding_box(); 00652 float x_middle = (row_box.left() + row_box.right()) / 2.0f; 00653 FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle))); 00654 float offset = direction * row_pos; 00655 positions->push_back(offset); 00656 } 00657 } 00658 00659 // Computes an estimate of the line spacing of the block from the median 00660 // of the spacings between adjacent overlapping textlines. 00661 void BaselineBlock::EstimateLineSpacing() { 00662 GenericVector<float> spacings; 00663 for (int r = 0; r < rows_.size(); ++r) { 00664 BaselineRow* row = rows_[r]; 00665 // Exclude silly lines. 00666 if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue; 00667 // Find the first row after row that overlaps it significantly. 00668 const TBOX& row_box = row->bounding_box(); 00669 int r2; 00670 for (r2 = r + 1; r2 < rows_.size() && 00671 !row_box.major_x_overlap(rows_[r2]->bounding_box()); 00672 ++r2); 00673 if (r2 < rows_.size()) { 00674 BaselineRow* row2 = rows_[r2]; 00675 // Exclude silly lines. 00676 if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue; 00677 float spacing = row->SpaceBetween(*row2); 00678 spacings.push_back(spacing); 00679 } 00680 } 00681 // If we have at least one value, use it, otherwise leave the previous 00682 // value unchanged. 00683 if (!spacings.empty()) { 00684 line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)]; 00685 if (debug_level_ > 1) 00686 tprintf("Estimate of linespacing = %g\n", line_spacing_); 00687 } 00688 } 00689 00690 // Refines the line spacing of the block by fitting a regression 00691 // line to the deskewed y-position of each baseline as a function of its 00692 // estimated line index, allowing for a small error in the initial linespacing 00693 // and choosing the best available model. 00694 void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) { 00695 double spacings[3], offsets[3], errors[3]; 00696 int index_range; 00697 errors[0] = FitLineSpacingModel(positions, line_spacing_, 00698 &spacings[0], &offsets[0], &index_range); 00699 if (index_range > 1) { 00700 double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); 00701 // Try the hypotheses that there might be index_range +/- 1 line spaces. 00702 errors[1] = FitLineSpacingModel(positions, spacing_plus, 00703 &spacings[1], &offsets[1], NULL); 00704 double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); 00705 errors[2] = FitLineSpacingModel(positions, spacing_minus, 00706 &spacings[2], &offsets[2], NULL); 00707 for (int i = 1; i <= 2; ++i) { 00708 if (errors[i] < errors[0]) { 00709 spacings[0] = spacings[i]; 00710 offsets[0] = offsets[i]; 00711 errors[0] = errors[i]; 00712 } 00713 } 00714 } 00715 if (spacings[0] > 0.0) { 00716 line_spacing_ = spacings[0]; 00717 line_offset_ = offsets[0]; 00718 model_error_ = errors[0]; 00719 if (debug_level_ > 0) { 00720 tprintf("Final linespacing model = %g + offset %g, error %g\n", 00721 line_spacing_, line_offset_, model_error_); 00722 } 00723 } 00724 } 00725 00726 // Given an initial estimate of line spacing (m_in) and the positions of each 00727 // baseline, computes the line spacing of the block more accurately in m_out, 00728 // and the corresponding intercept in c_out, and the number of spacings seen 00729 // in index_delta. Returns the error of fit to the line spacing model. 00730 // Uses a simple linear regression, but optimized the offset using the median. 00731 double BaselineBlock::FitLineSpacingModel( 00732 const GenericVector<double>& positions, double m_in, 00733 double* m_out, double* c_out, int* index_delta) { 00734 if (m_in == 0.0f || positions.size() < 2) { 00735 *m_out = m_in; 00736 *c_out = 0.0; 00737 if (index_delta != NULL) *index_delta = 0; 00738 return 0.0; 00739 } 00740 GenericVector<double> offsets; 00741 // Get the offset (remainder) linespacing for each line and choose the median. 00742 for (int i = 0; i < positions.size(); ++i) 00743 offsets.push_back(fmod(positions[i], m_in)); 00744 // Get the median offset. 00745 double median_offset = MedianOfCircularValues(m_in, &offsets); 00746 // Now fit a line to quantized line number and offset. 00747 LLSQ llsq; 00748 int min_index = MAX_INT32; 00749 int max_index = -MAX_INT32; 00750 for (int i = 0; i < positions.size(); ++i) { 00751 double y_pos = positions[i]; 00752 int row_index = IntCastRounded((y_pos - median_offset) / m_in); 00753 UpdateRange(row_index, &min_index, &max_index); 00754 llsq.add(row_index, y_pos); 00755 } 00756 // Get the refined line spacing. 00757 *m_out = llsq.m(); 00758 // Use the median offset rather than the mean. 00759 offsets.truncate(0); 00760 for (int i = 0; i < positions.size(); ++i) 00761 offsets.push_back(fmod(positions[i], *m_out)); 00762 // Get the median offset. 00763 if (debug_level_ > 2) { 00764 for (int i = 0; i < offsets.size(); ++i) 00765 tprintf("%d: %g\n", i, offsets[i]); 00766 } 00767 *c_out = MedianOfCircularValues(*m_out, &offsets); 00768 if (debug_level_ > 1) { 00769 tprintf("Median offset = %g, compared to mean of %g.\n", 00770 *c_out, llsq.c(*m_out)); 00771 } 00772 // Index_delta is the number of hypothesized line gaps present. 00773 if (index_delta != NULL) 00774 *index_delta = max_index - min_index; 00775 // Use the regression model's intercept to compute the error, as it may be 00776 // a full line-spacing in disagreement with the median. 00777 double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); 00778 if (debug_level_ > 1) { 00779 tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", 00780 m_in, median_offset, *m_out, *c_out, rms_error); 00781 } 00782 return rms_error; 00783 } 00784 00785 00786 BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, 00787 TO_BLOCK_LIST* blocks) 00788 : page_skew_(page_skew), debug_level_(debug_level), pix_debug_(NULL), 00789 debug_file_prefix_("") { 00790 TO_BLOCK_IT it(blocks); 00791 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00792 TO_BLOCK* to_block = it.data(); 00793 BLOCK* block = to_block->block; 00794 POLY_BLOCK* pb = block->poly_block(); 00795 // A note about non-text blocks. 00796 // On output, non-text blocks are supposed to contain a single empty word 00797 // in each incoming text line. These mark out the polygonal bounds of the 00798 // block. Ideally no baselines should be required, but currently 00799 // make_words crashes if a baseline and xheight are not provided, so we 00800 // include non-text blocks here, but flag them for special treatment. 00801 bool non_text = pb != NULL && !pb->IsText(); 00802 blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block)); 00803 } 00804 } 00805 00806 BaselineDetect::~BaselineDetect() { 00807 pixDestroy(&pix_debug_); 00808 } 00809 00810 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers 00811 // block-wise and page-wise data to smooth small blocks/rows, and applies 00812 // smoothing based on block/page-level skew and block-level linespacing. 00813 void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { 00814 GenericVector<double> block_skew_angles; 00815 for (int i = 0; i < blocks_.size(); ++i) { 00816 BaselineBlock* bl_block = blocks_[i]; 00817 if (debug_level_ > 0) 00818 tprintf("Fitting initial baselines...\n"); 00819 if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) { 00820 block_skew_angles.push_back(bl_block->skew_angle()); 00821 } 00822 } 00823 // Compute a page-wide default skew for blocks with too little information. 00824 double default_block_skew = page_skew_.angle(); 00825 if (!block_skew_angles.empty()) { 00826 default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles); 00827 } 00828 if (debug_level_ > 0) { 00829 tprintf("Page skew angle = %g\n", default_block_skew); 00830 } 00831 // Set bad lines in each block to the default block skew and then force fit 00832 // a linespacing model where it makes sense to do so. 00833 for (int i = 0; i < blocks_.size(); ++i) { 00834 BaselineBlock* bl_block = blocks_[i]; 00835 bl_block->ParallelizeBaselines(default_block_skew); 00836 bl_block->SetupBlockParameters(); // This replaced compute_row_stats. 00837 } 00838 } 00839 00840 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and 00841 // other associated side-effects, including pre-associating blobs, computing 00842 // x-heights and displaying debug information. 00843 // NOTE that ComputeStraightBaselines must have been called first as this 00844 // sets up data in the TO_ROWs upon which this function depends. 00845 void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, 00846 bool enable_splines, 00847 bool remove_noise, 00848 bool show_final_rows, 00849 Textord* textord) { 00850 Pix* pix_spline = pix_debug_ ? pixConvertTo32(pix_debug_) : NULL; 00851 for (int i = 0; i < blocks_.size(); ++i) { 00852 BaselineBlock* bl_block = blocks_[i]; 00853 bl_block->PrepareForSplineFitting(page_tr, remove_noise); 00854 bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); 00855 if (pix_spline) { 00856 bl_block->DrawPixSpline(pix_spline); 00857 } 00858 if (show_final_rows) { 00859 bl_block->DrawFinalRows(page_tr); 00860 } 00861 } 00862 00863 if (pix_spline) { 00864 STRING outfile_name = debug_file_prefix_ + "_spline.png"; 00865 pixWrite(outfile_name.string(), pix_spline, IFF_PNG); 00866 pixDestroy(&pix_spline); 00867 } 00868 } 00869 00870 void BaselineDetect::SetDebugImage(Pix* pixIn, const STRING& output_path) { 00871 pixDestroy(&pix_debug_); 00872 pix_debug_ = pixClone(pixIn); 00873 debug_file_prefix_ = output_path; 00874 } 00875 00876 } // namespace tesseract.