tesseract
3.03
|
00001 00002 // File: baselinedetect.h 00003 // Description: Initial Baseline Determination. 00004 // Copyright 2012 Google Inc. All Rights Reserved. 00005 // Author: rays@google.com (Ray Smith) 00006 // Created: Mon Apr 30 10:03:19 PDT 2012 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ 00021 #define TESSERACT_TEXTORD_BASELINEDETECT_H_ 00022 00023 #include "detlinefit.h" 00024 #include "genericvector.h" 00025 #include "points.h" 00026 #include "rect.h" 00027 #include "strngs.h" 00028 00029 class BLOBNBOX_LIST; 00030 class TO_BLOCK; 00031 class TO_BLOCK_LIST; 00032 class TO_ROW; 00033 struct Pix; 00034 00035 namespace tesseract { 00036 00037 class Textord; 00038 00039 // Class to compute and hold baseline data for a TO_ROW. 00040 class BaselineRow { 00041 public: 00042 BaselineRow(double line_size, TO_ROW* to_row); 00043 00044 const TBOX& bounding_box() const { 00045 return bounding_box_; 00046 } 00047 // Sets the TO_ROW with the output straight line. 00048 void SetupOldLineParameters(TO_ROW* row) const; 00049 00050 // Outputs diagnostic information. 00051 void Print() const; 00052 00053 // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. 00054 double BaselineAngle() const; 00055 // Computes and returns the linespacing at the middle of the overlap 00056 // between this and other. 00057 double SpaceBetween(const BaselineRow& other) const; 00058 // Computes and returns the displacement of the center of the line 00059 // perpendicular to the given direction. 00060 double PerpDisp(const FCOORD& direction) const; 00061 // Computes the y coordinate at the given x using the straight baseline 00062 // defined by baseline1_ and baseline2_. 00063 double StraightYAtX(double x) const; 00064 00065 // Fits a straight baseline to the points. Returns true if it had enough 00066 // points to be reasonably sure of the fitted baseline. 00067 // If use_box_bottoms is false, baselines positions are formed by 00068 // considering the outlines of the blobs. 00069 bool FitBaseline(bool use_box_bottoms); 00070 // Modifies an existing result of FitBaseline to be parallel to the given 00071 // vector if that produces a better result. 00072 void AdjustBaselineToParallel(int debug, const FCOORD& direction); 00073 // Modifies the baseline to snap to the textline grid if the existing 00074 // result is not good enough. 00075 double AdjustBaselineToGrid(int debug, const FCOORD& direction, 00076 double line_spacing, double line_offset); 00077 00078 private: 00079 // Sets up displacement_modes_ with the top few modes of the perpendicular 00080 // distance of each blob from the given direction vector, after rounding. 00081 void SetupBlobDisplacements(const FCOORD& direction); 00082 00083 // Fits a line in the given direction to blobs that are close to the given 00084 // target_offset perpendicular displacement from the direction. The fit 00085 // error is allowed to be cheat_allowance worse than the existing fit, and 00086 // will still be used. 00087 // If cheat_allowance > 0, the new fit will be good and replace the current 00088 // fit if it has better fit (with cheat) OR its error is below 00089 // max_baseline_error_ and the old fit is marked bad. 00090 // Otherwise the new fit will only replace the old if it is really better, 00091 // or the old fit is marked bad and the new fit has sufficient points, as 00092 // well as being within the max_baseline_error_. 00093 void FitConstrainedIfBetter(int debug, const FCOORD& direction, 00094 double cheat_allowance, 00095 double target_offset); 00096 // Returns the perpendicular distance of the point from the straight 00097 // baseline. 00098 double PerpDistanceFromBaseline(const FCOORD& pt) const; 00099 // Computes the bounding box of the row. 00100 void ComputeBoundingBox(); 00101 00102 // The blobs of the row to which this BaselineRow adds extra information 00103 // during baseline fitting. Note that blobs_ could easily come from either 00104 // a TO_ROW or a ColPartition. 00105 BLOBNBOX_LIST* blobs_; 00106 // Bounding box of all the blobs. 00107 TBOX bounding_box_; 00108 // Fitter used to fit lines to the blobs. 00109 DetLineFit fitter_; 00110 // 2 points on the straight baseline. 00111 FCOORD baseline_pt1_; 00112 FCOORD baseline_pt2_; 00113 // Set of modes of displacements. They indicate preferable baseline positions. 00114 GenericVector<double> displacement_modes_; 00115 // Quantization factor used for displacement_modes_. 00116 double disp_quant_factor_; 00117 // Half the acceptance range of blob displacements for computing the 00118 // error during a constrained fit. 00119 double fit_halfrange_; 00120 // Max baseline error before a line is regarded as fitting badly. 00121 double max_baseline_error_; 00122 // The error of fit of the baseline. 00123 double baseline_error_; 00124 // True if this row seems to have a good baseline. 00125 bool good_baseline_; 00126 }; 00127 00128 // Class to compute and hold baseline data for a TO_BLOCK. 00129 class BaselineBlock { 00130 public: 00131 BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block); 00132 00133 TO_BLOCK* block() const { 00134 return block_; 00135 } 00136 double skew_angle() const { 00137 return skew_angle_; 00138 } 00139 00140 // Computes and returns the absolute error of the given perp_disp from the 00141 // given linespacing model. 00142 static double SpacingModelError(double perp_disp, double line_spacing, 00143 double line_offset); 00144 00145 // Fits straight line baselines and computes the skew angle from the 00146 // median angle. Returns true if a good angle is found. 00147 // If use_box_bottoms is false, baseline positions are formed by 00148 // considering the outlines of the blobs. 00149 bool FitBaselinesAndFindSkew(bool use_box_bottoms); 00150 00151 // Refits the baseline to a constrained angle, using the stored block 00152 // skew if good enough, otherwise the supplied default skew. 00153 void ParallelizeBaselines(double default_block_skew); 00154 00155 // Sets the parameters in TO_BLOCK that are needed by subsequent processes. 00156 void SetupBlockParameters() const; 00157 00158 // Processing that is required before fitting baseline splines, but requires 00159 // linear baselines in order to be successful: 00160 // Removes noise if required 00161 // Separates out underlines 00162 // Pre-associates blob fragments. 00163 // TODO(rays/joeliu) This entire section of code is inherited from the past 00164 // and could be improved/eliminated. 00165 // page_tr is used to size a debug window. 00166 void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); 00167 00168 // Fits splines to the textlines, or creates fake QSPLINES from the straight 00169 // baselines that are already on the TO_ROWs. 00170 // As a side-effect, computes the xheights of the rows and the block. 00171 // Although x-height estimation is conceptually separate, it is part of 00172 // detecting perspective distortion and therefore baseline fitting. 00173 void FitBaselineSplines(bool enable_splines, bool show_final_rows, 00174 Textord* textord); 00175 00176 // Draws the (straight) baselines and final blobs colored according to 00177 // what was discarded as noise and what is associated with each row. 00178 void DrawFinalRows(const ICOORD& page_tr); 00179 00180 // Render the generated spline baselines for this block on pix_in. 00181 void DrawPixSpline(Pix* pix_in); 00182 00183 private: 00184 // Top-level line-spacing calculation. Computes an estimate of the line- 00185 // spacing, using the current baselines in the TO_ROWS of the block, and 00186 // then refines it by fitting a regression line to the baseline positions 00187 // as a function of their integer index. 00188 // Returns true if it seems that the model is a reasonable fit to the 00189 // observations. 00190 bool ComputeLineSpacing(); 00191 00192 // Computes the deskewed vertical position of each baseline in the block and 00193 // stores them in the given vector. 00194 void ComputeBaselinePositions(const FCOORD& direction, 00195 GenericVector<double>* positions); 00196 00197 // Computes an estimate of the line spacing of the block from the median 00198 // of the spacings between adjacent overlapping textlines. 00199 void EstimateLineSpacing(); 00200 00201 // Refines the line spacing of the block by fitting a regression 00202 // line to the deskewed y-position of each baseline as a function of its 00203 // estimated line index, allowing for a small error in the initial linespacing 00204 // and choosing the best available model. 00205 void RefineLineSpacing(const GenericVector<double>& positions); 00206 00207 // Given an initial estimate of line spacing (m_in) and the positions of each 00208 // baseline, computes the line spacing of the block more accurately in m_out, 00209 // and the corresponding intercept in c_out, and the number of spacings seen 00210 // in index_delta. Returns the error of fit to the line spacing model. 00211 double FitLineSpacingModel(const GenericVector<double>& positions, 00212 double m_in, double* m_out, double* c_out, 00213 int* index_delta); 00214 00215 00216 // The block to which this class adds extra information used during baseline 00217 // calculation. 00218 TO_BLOCK* block_; 00219 // The rows in the block that we will be working with. 00220 PointerVector<BaselineRow> rows_; 00221 // Amount of debugging output to provide. 00222 int debug_level_; 00223 // True if the block is non-text (graphic). 00224 bool non_text_block_; 00225 // True if the block has at least one good enough baseline to compute the 00226 // skew angle and therefore skew_angle_ is valid. 00227 bool good_skew_angle_; 00228 // Angle of skew in radians using the conventional anticlockwise from x-axis. 00229 double skew_angle_; 00230 // Current best estimate line spacing in pixels perpendicular to skew_angle_. 00231 double line_spacing_; 00232 // Offset for baseline positions, in pixels. Each baseline is at 00233 // line_spacing_ * n + line_offset_ for integer n, which represents 00234 // [textline] line number in a line numbering system that has line 0 on or 00235 // at least near the x-axis. Not equal to the actual line number of a line 00236 // within a block as most blocks are not near the x-axis. 00237 double line_offset_; 00238 // The error of the line spacing model. 00239 double model_error_; 00240 }; 00241 00242 class BaselineDetect { 00243 public: 00244 BaselineDetect(int debug_level, const FCOORD& page_skew, 00245 TO_BLOCK_LIST* blocks); 00246 00247 ~BaselineDetect(); 00248 00249 // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers 00250 // block-wise and page-wise data to smooth small blocks/rows, and applies 00251 // smoothing based on block/page-level skew and block-level linespacing. 00252 void ComputeStraightBaselines(bool use_box_bottoms); 00253 00254 // Computes the baseline splines for each TO_ROW in each TO_BLOCK and 00255 // other associated side-effects, including pre-associating blobs, computing 00256 // x-heights and displaying debug information. 00257 // NOTE that ComputeStraightBaselines must have been called first as this 00258 // sets up data in the TO_ROWs upon which this function depends. 00259 void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, 00260 bool enable_splines, 00261 bool remove_noise, 00262 bool show_final_rows, 00263 Textord* textord); 00264 00265 // Set up the image and filename, so that a debug image with the detected 00266 // baseline rendered will be saved. 00267 void SetDebugImage(Pix* pixIn, const STRING& output_path); 00268 00269 private: 00270 // Average (median) skew of the blocks on the page among those that have 00271 // a good angle of their own. 00272 FCOORD page_skew_; 00273 // Amount of debug output to produce. 00274 int debug_level_; 00275 // The blocks that we are working with. 00276 PointerVector<BaselineBlock> blocks_; 00277 00278 Pix* pix_debug_; 00279 STRING debug_file_prefix_; 00280 }; 00281 00282 } // namespace tesseract 00283 00284 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_