tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/baselinedetect.h
Go to the documentation of this file.
00001 
00002 // File:        baselinedetect.h
00003 // Description: Initial Baseline Determination.
00004 // Copyright 2012 Google Inc. All Rights Reserved.
00005 // Author:      rays@google.com (Ray Smith)
00006 // Created:     Mon Apr 30 10:03:19 PDT 2012
00007 //
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
00021 #define TESSERACT_TEXTORD_BASELINEDETECT_H_
00022 
00023 #include "detlinefit.h"
00024 #include "genericvector.h"
00025 #include "points.h"
00026 #include "rect.h"
00027 #include "strngs.h"
00028 
00029 class BLOBNBOX_LIST;
00030 class TO_BLOCK;
00031 class TO_BLOCK_LIST;
00032 class TO_ROW;
00033 struct Pix;
00034 
00035 namespace tesseract {
00036 
00037 class Textord;
00038 
00039 // Class to compute and hold baseline data for a TO_ROW.
00040 class BaselineRow {
00041  public:
00042   BaselineRow(double line_size, TO_ROW* to_row);
00043 
00044   const TBOX& bounding_box() const {
00045     return bounding_box_;
00046   }
00047   // Sets the TO_ROW with the output straight line.
00048   void SetupOldLineParameters(TO_ROW* row) const;
00049 
00050   // Outputs diagnostic information.
00051   void Print() const;
00052 
00053   // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
00054   double BaselineAngle() const;
00055   // Computes and returns the linespacing at the middle of the overlap
00056   // between this and other.
00057   double SpaceBetween(const BaselineRow& other) const;
00058   // Computes and returns the displacement of the center of the line
00059   // perpendicular to the given direction.
00060   double PerpDisp(const FCOORD& direction) const;
00061   // Computes the y coordinate at the given x using the straight baseline
00062   // defined by baseline1_ and baseline2_.
00063   double StraightYAtX(double x) const;
00064 
00065   // Fits a straight baseline to the points. Returns true if it had enough
00066   // points to be reasonably sure of the fitted baseline.
00067   // If use_box_bottoms is false, baselines positions are formed by
00068   // considering the outlines of the blobs.
00069   bool FitBaseline(bool use_box_bottoms);
00070   // Modifies an existing result of FitBaseline to be parallel to the given
00071   // vector if that produces a better result.
00072   void AdjustBaselineToParallel(int debug, const FCOORD& direction);
00073   // Modifies the baseline to snap to the textline grid if the existing
00074   // result is not good enough.
00075   double AdjustBaselineToGrid(int debug, const FCOORD& direction,
00076                               double line_spacing, double line_offset);
00077 
00078  private:
00079   // Sets up displacement_modes_ with the top few modes of the perpendicular
00080   // distance of each blob from the given direction vector, after rounding.
00081   void SetupBlobDisplacements(const FCOORD& direction);
00082 
00083   // Fits a line in the given direction to blobs that are close to the given
00084   // target_offset perpendicular displacement from the direction. The fit
00085   // error is allowed to be cheat_allowance worse than the existing fit, and
00086   // will still be used.
00087   // If cheat_allowance > 0, the new fit will be good and replace the current
00088   // fit if it has better fit (with cheat) OR its error is below
00089   // max_baseline_error_ and the old fit is marked bad.
00090   // Otherwise the new fit will only replace the old if it is really better,
00091   // or the old fit is marked bad and the new fit has sufficient points, as
00092   // well as being within the max_baseline_error_.
00093   void FitConstrainedIfBetter(int debug, const FCOORD& direction,
00094                               double cheat_allowance,
00095                               double target_offset);
00096   // Returns the perpendicular distance of the point from the straight
00097   // baseline.
00098   double PerpDistanceFromBaseline(const FCOORD& pt) const;
00099   // Computes the bounding box of the row.
00100   void ComputeBoundingBox();
00101 
00102   // The blobs of the row to which this BaselineRow adds extra information
00103   // during baseline fitting. Note that blobs_ could easily come from either
00104   // a TO_ROW or a ColPartition.
00105   BLOBNBOX_LIST* blobs_;
00106   // Bounding box of all the blobs.
00107   TBOX bounding_box_;
00108   // Fitter used to fit lines to the blobs.
00109   DetLineFit fitter_;
00110   // 2 points on the straight baseline.
00111   FCOORD baseline_pt1_;
00112   FCOORD baseline_pt2_;
00113   // Set of modes of displacements. They indicate preferable baseline positions.
00114   GenericVector<double> displacement_modes_;
00115   // Quantization factor used for displacement_modes_.
00116   double disp_quant_factor_;
00117   // Half the acceptance range of blob displacements for computing the
00118   // error during a constrained fit.
00119   double fit_halfrange_;
00120   // Max baseline error before a line is regarded as fitting badly.
00121   double max_baseline_error_;
00122   // The error of fit of the baseline.
00123   double baseline_error_;
00124   // True if this row seems to have a good baseline.
00125   bool good_baseline_;
00126 };
00127 
00128 // Class to compute and hold baseline data for a TO_BLOCK.
00129 class BaselineBlock {
00130  public:
00131   BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block);
00132 
00133   TO_BLOCK* block() const {
00134     return block_;
00135   }
00136   double skew_angle() const {
00137     return skew_angle_;
00138   }
00139 
00140   // Computes and returns the absolute error of the given perp_disp from the
00141   // given linespacing model.
00142   static double SpacingModelError(double perp_disp, double line_spacing,
00143                                   double line_offset);
00144 
00145   // Fits straight line baselines and computes the skew angle from the
00146   // median angle. Returns true if a good angle is found.
00147   // If use_box_bottoms is false, baseline positions are formed by
00148   // considering the outlines of the blobs.
00149   bool FitBaselinesAndFindSkew(bool use_box_bottoms);
00150 
00151   // Refits the baseline to a constrained angle, using the stored block
00152   // skew if good enough, otherwise the supplied default skew.
00153   void ParallelizeBaselines(double default_block_skew);
00154 
00155   // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
00156   void SetupBlockParameters() const;
00157 
00158   // Processing that is required before fitting baseline splines, but requires
00159   // linear baselines in order to be successful:
00160   //   Removes noise if required
00161   //   Separates out underlines
00162   //   Pre-associates blob fragments.
00163   // TODO(rays/joeliu) This entire section of code is inherited from the past
00164   // and could be improved/eliminated.
00165   // page_tr is used to size a debug window.
00166   void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
00167 
00168   // Fits splines to the textlines, or creates fake QSPLINES from the straight
00169   // baselines that are already on the TO_ROWs.
00170   // As a side-effect, computes the xheights of the rows and the block.
00171   // Although x-height estimation is conceptually separate, it is part of
00172   // detecting perspective distortion and therefore baseline fitting.
00173   void FitBaselineSplines(bool enable_splines, bool show_final_rows,
00174                           Textord* textord);
00175 
00176   // Draws the (straight) baselines and final blobs colored according to
00177   // what was discarded as noise and what is associated with each row.
00178   void DrawFinalRows(const ICOORD& page_tr);
00179 
00180   // Render the generated spline baselines for this block on pix_in.
00181   void DrawPixSpline(Pix* pix_in);
00182 
00183  private:
00184   // Top-level line-spacing calculation. Computes an estimate of the line-
00185   // spacing, using the current baselines in the TO_ROWS of the block, and
00186   // then refines it by fitting a regression line to the baseline positions
00187   // as a function of their integer index.
00188   // Returns true if it seems that the model is a reasonable fit to the
00189   // observations.
00190   bool ComputeLineSpacing();
00191 
00192   // Computes the deskewed vertical position of each baseline in the block and
00193   // stores them in the given vector.
00194   void ComputeBaselinePositions(const FCOORD& direction,
00195                                 GenericVector<double>* positions);
00196 
00197   // Computes an estimate of the line spacing of the block from the median
00198   // of the spacings between adjacent overlapping textlines.
00199   void EstimateLineSpacing();
00200 
00201   // Refines the line spacing of the block by fitting a regression
00202   // line to the deskewed y-position of each baseline as a function of its
00203   // estimated line index, allowing for a small error in the initial linespacing
00204   // and choosing the best available model.
00205   void RefineLineSpacing(const GenericVector<double>& positions);
00206 
00207   // Given an initial estimate of line spacing (m_in) and the positions of each
00208   // baseline, computes the line spacing of the block more accurately in m_out,
00209   // and the corresponding intercept in c_out, and the number of spacings seen
00210   // in index_delta. Returns the error of fit to the line spacing model.
00211   double FitLineSpacingModel(const GenericVector<double>& positions,
00212                              double m_in, double* m_out, double* c_out,
00213                              int* index_delta);
00214 
00215 
00216   // The block to which this class adds extra information used during baseline
00217   // calculation.
00218   TO_BLOCK* block_;
00219   // The rows in the block that we will be working with.
00220   PointerVector<BaselineRow> rows_;
00221   // Amount of debugging output to provide.
00222   int debug_level_;
00223   // True if the block is non-text (graphic).
00224   bool non_text_block_;
00225   // True if the block has at least one good enough baseline to compute the
00226   // skew angle and therefore skew_angle_ is valid.
00227   bool good_skew_angle_;
00228   // Angle of skew in radians using the conventional anticlockwise from x-axis.
00229   double skew_angle_;
00230   // Current best estimate line spacing in pixels perpendicular to skew_angle_.
00231   double line_spacing_;
00232   // Offset for baseline positions, in pixels. Each baseline is at
00233   // line_spacing_ * n + line_offset_ for integer n, which represents
00234   // [textline] line number in a line numbering system that has line 0 on or
00235   // at least near the x-axis. Not equal to the actual line number of a line
00236   // within a block as most blocks are not near the x-axis.
00237   double line_offset_;
00238   // The error of the line spacing model.
00239   double model_error_;
00240 };
00241 
00242 class BaselineDetect {
00243  public:
00244   BaselineDetect(int debug_level, const FCOORD& page_skew,
00245                  TO_BLOCK_LIST* blocks);
00246 
00247   ~BaselineDetect();
00248 
00249   // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
00250   // block-wise and page-wise data to smooth small blocks/rows, and applies
00251   // smoothing based on block/page-level skew and block-level linespacing.
00252   void ComputeStraightBaselines(bool use_box_bottoms);
00253 
00254   // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
00255   // other associated side-effects, including pre-associating blobs, computing
00256   // x-heights and displaying debug information.
00257   // NOTE that ComputeStraightBaselines must have been called first as this
00258   // sets up data in the TO_ROWs upon which this function depends.
00259   void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
00260                                          bool enable_splines,
00261                                          bool remove_noise,
00262                                          bool show_final_rows,
00263                                          Textord* textord);
00264 
00265   // Set up the image and filename, so that a debug image with the detected
00266   // baseline rendered will be saved.
00267   void SetDebugImage(Pix* pixIn, const STRING& output_path);
00268 
00269  private:
00270   // Average (median) skew of the blocks on the page among those that have
00271   // a good angle of their own.
00272   FCOORD page_skew_;
00273   // Amount of debug output to produce.
00274   int debug_level_;
00275   // The blocks that we are working with.
00276   PointerVector<BaselineBlock> blocks_;
00277 
00278   Pix* pix_debug_;
00279   STRING debug_file_prefix_;
00280 };
00281 
00282 }  // namespace tesseract
00283 
00284 #endif  // TESSERACT_TEXTORD_BASELINEDETECT_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines