tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/textlineprojection.cpp
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 //
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 #ifdef HAVE_CONFIG_H
00015 #include "config_auto.h"
00016 #endif
00017 
00018 #include "textlineprojection.h"
00019 #include "allheaders.h"
00020 #include "bbgrid.h"         // Base class.
00021 #include "blobbox.h"        // BlobNeighourDir.
00022 #include "blobs.h"
00023 #include "colpartition.h"
00024 #include "normalis.h"
00025 
00026 // Padding factor to use on definitely oriented blobs
00027 const int kOrientedPadFactor = 8;
00028 // Padding factor to use on not definitely oriented blobs.
00029 const int kDefaultPadFactor = 2;
00030 // Penalty factor for going away from the line center.
00031 const int kWrongWayPenalty = 4;
00032 // Ratio between parallel gap and perpendicular gap used to measure total
00033 // distance of a box from a target box in curved textline space.
00034 // parallel-gap is treated more favorably by this factor to allow catching
00035 // quotes and elipsis at the end of textlines.
00036 const int kParaPerpDistRatio = 4;
00037 // Multiple of scale_factor_ that the inter-line gap must be before we start
00038 // padding the increment box perpendicular to the text line.
00039 const int kMinLineSpacingFactor = 4;
00040 // Maximum tab-stop overrun for horizontal padding, in projection pixels.
00041 const int kMaxTabStopOverrun = 6;
00042 
00043 namespace tesseract {
00044 
00045 TextlineProjection::TextlineProjection(int resolution)
00046   : x_origin_(0), y_origin_(0), pix_(NULL) {
00047   // The projection map should be about 100 ppi, whatever the input.
00048   scale_factor_ = IntCastRounded(resolution / 100.0);
00049   if (scale_factor_ < 1) scale_factor_ = 1;
00050 }
00051 TextlineProjection::~TextlineProjection() {
00052   pixDestroy(&pix_);
00053 }
00054 
00055 // Build the projection profile given the input_block containing lists of
00056 // blobs, a rotation to convert to image coords,
00057 // and a full-resolution nontext_map, marking out areas to avoid.
00058 // During construction, we have the following assumptions:
00059 // The rotation is a multiple of 90 degrees, ie no deskew yet.
00060 // The blobs have had their left and right rules set to also limit
00061 // the range of projection.
00062 void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
00063                                              const FCOORD& rotation,
00064                                              Pix* nontext_map) {
00065   pixDestroy(&pix_);
00066   TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
00067   x_origin_ = 0;
00068   y_origin_ = image_box.height();
00069   int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
00070   int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
00071 
00072   pix_ = pixCreate(width, height, 8);
00073   ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
00074   ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
00075   Pix* final_pix = pixBlockconv(pix_, 1, 1);
00076 //  Pix* final_pix = pixBlockconv(pix_, 2, 2);
00077   pixDestroy(&pix_);
00078   pix_ = final_pix;
00079 }
00080 
00081 // Display the blobs in the window colored according to textline quality.
00082 void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
00083                                          ScrollView* win) {
00084 #ifndef GRAPHICS_DISABLED
00085   BLOBNBOX_IT it(blobs);
00086   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00087     BLOBNBOX* blob = it.data();
00088     const TBOX& box = blob->bounding_box();
00089     bool bad_box = BoxOutOfHTextline(box, NULL, false);
00090     if (blob->UniquelyVertical())
00091       win->Pen(ScrollView::YELLOW);
00092     else
00093       win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
00094     win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
00095   }
00096   win->Update();
00097 #endif  // GRAPHICS_DISABLED
00098 }
00099 
00100 // Moves blobs that look like they don't sit well on a textline from the
00101 // input blobs list to the output small_blobs list.
00102 // This gets them away from initial textline finding to stop diacritics
00103 // from forming incorrect textlines. (Introduced mainly to fix Thai.)
00104 void TextlineProjection::MoveNonTextlineBlobs(
00105     BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
00106   BLOBNBOX_IT it(blobs);
00107   BLOBNBOX_IT small_it(small_blobs);
00108   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00109     BLOBNBOX* blob = it.data();
00110     const TBOX& box = blob->bounding_box();
00111     bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
00112                                                box.bottom());
00113     if (BoxOutOfHTextline(box, NULL, debug) && !blob->UniquelyVertical()) {
00114       blob->ClearNeighbours();
00115       small_it.add_to_end(it.extract());
00116     }
00117   }
00118 }
00119 
00120 // Create a window and display the projection in it.
00121 void TextlineProjection::DisplayProjection() const {
00122   int width = pixGetWidth(pix_);
00123   int height = pixGetHeight(pix_);
00124   Pix* pixc = pixCreate(width, height, 32);
00125   int src_wpl = pixGetWpl(pix_);
00126   int col_wpl = pixGetWpl(pixc);
00127   uinT32* src_data = pixGetData(pix_);
00128   uinT32* col_data = pixGetData(pixc);
00129   for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
00130     for (int x = 0; x < width; ++x) {
00131       int pixel = GET_DATA_BYTE(src_data, x);
00132       l_uint32 result;
00133       if (pixel <= 17)
00134         composeRGBPixel(0, 0, pixel * 15, &result);
00135       else if (pixel <= 145)
00136         composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
00137       else
00138         composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
00139       col_data[x] = result;
00140     }
00141   }
00142 #if 0
00143   // TODO(rays) uncomment when scrollview can display non-binary images.
00144   ScrollView* win = new ScrollView("Projection", 0, 0,
00145                                    width, height, width, height);
00146   win->Image(pixc, 0, 0);
00147   win->Update();
00148 #else
00149   pixWrite("projection.png", pixc, IFF_PNG);
00150 #endif
00151   pixDestroy(&pixc);
00152 }
00153 
00154 // Compute the distance of the box from the partition using curved projection
00155 // space. As DistanceOfBoxFromBox, except that the direction is taken from
00156 // the ColPartition and the median bounds of the ColPartition are used as
00157 // the to_box.
00158 int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
00159                                                    const ColPartition& part,
00160                                                    const DENORM* denorm,
00161                                                    bool debug) const {
00162   // Compute a partition box that uses the median top/bottom of the blobs
00163   // within and median left/right for vertical.
00164   TBOX part_box = part.bounding_box();
00165   if (part.IsHorizontalType()) {
00166     part_box.set_top(part.median_top());
00167     part_box.set_bottom(part.median_bottom());
00168   } else {
00169     part_box.set_left(part.median_left());
00170     part_box.set_right(part.median_right());
00171   }
00172   // Now use DistanceOfBoxFromBox to make the actual calculation.
00173   return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
00174                               denorm, debug);
00175 }
00176 
00177 // Compute the distance from the from_box to the to_box using curved
00178 // projection space. Separation that involves a decrease in projection
00179 // density (moving from the from_box to the to_box) is weighted more heavily
00180 // than constant density, and an increase is weighted less.
00181 // If horizontal_textline is true, then curved space is used vertically,
00182 // as for a diacritic on the edge of a textline.
00183 // The projection uses original image coords, so denorm is used to get
00184 // back to the image coords from box/part space.
00185 // How the calculation works: Think of a diacritic near a textline.
00186 // Distance is measured from the far side of the from_box to the near side of
00187 // the to_box. Shown is the horizontal textline case.
00188 //          |------^-----|
00189 //          | from | box |
00190 //          |------|-----|
00191 //   perpendicular |
00192 //          <------v-------->|--------------------|
00193 //                  parallel |     to box         |
00194 //                           |--------------------|
00195 // Perpendicular distance uses "curved space" See VerticalDistance below.
00196 // Parallel distance is linear.
00197 // Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
00198 int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
00199                                              const TBOX& to_box,
00200                                              bool horizontal_textline,
00201                                              const DENORM* denorm,
00202                                              bool debug) const {
00203   // The parallel_gap is the horizontal gap between a horizontal textline and
00204   // the box. Analogous for vertical.
00205   int parallel_gap = 0;
00206   // start_pt is the box end of the line to be modified for curved space.
00207   TPOINT start_pt;
00208   // end_pt is the partition end of the line to be modified for curved space.
00209   TPOINT end_pt;
00210   if (horizontal_textline) {
00211     parallel_gap = from_box.x_gap(to_box) + from_box.width();
00212     start_pt.x = (from_box.left() + from_box.right()) / 2;
00213     end_pt.x = start_pt.x;
00214     if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
00215       start_pt.y = from_box.top();
00216       end_pt.y = MIN(to_box.top(), start_pt.y);
00217     } else {
00218       start_pt.y = from_box.bottom();
00219       end_pt.y = MAX(to_box.bottom(), start_pt.y);
00220     }
00221   } else {
00222     parallel_gap = from_box.y_gap(to_box) + from_box.height();
00223     if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
00224       start_pt.x = from_box.right();
00225       end_pt.x = MIN(to_box.right(), start_pt.x);
00226     } else {
00227       start_pt.x = from_box.left();
00228       end_pt.x = MAX(to_box.left(), start_pt.x);
00229     }
00230     start_pt.y = (from_box.bottom() + from_box.top()) / 2;
00231     end_pt.y = start_pt.y;
00232   }
00233   // The perpendicular gap is the max vertical distance gap out of:
00234   // top of from_box to to_box top and bottom of from_box to to_box bottom.
00235   // This value is then modified for curved projection space.
00236   // Analogous for vertical.
00237   int perpendicular_gap = 0;
00238   // If start_pt == end_pt, then the from_box lies entirely within the to_box
00239   // (in the perpendicular direction), so we don't need to calculate the
00240   // perpendicular_gap.
00241   if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
00242     if (denorm != NULL) {
00243       // Denormalize the start and end.
00244       denorm->DenormTransform(NULL, start_pt, &start_pt);
00245       denorm->DenormTransform(NULL, end_pt, &end_pt);
00246     }
00247     if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
00248       perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
00249                                            end_pt.y);
00250     } else {
00251       perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
00252                                              start_pt.y);
00253     }
00254   }
00255   // The parallel_gap weighs less than the perpendicular_gap.
00256   return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
00257 }
00258 
00259 // Compute the distance between (x, y1) and (x, y2) using the rule that
00260 // a decrease in textline density is weighted more heavily than an increase.
00261 // The coordinates are in source image space, ie processed by any denorm
00262 // already, but not yet scaled by scale_factor_.
00263 // Going from the outside of a textline to the inside should measure much
00264 // less distance than going from the inside of a textline to the outside.
00265 // How it works:
00266 // An increase is cheap (getting closer to a textline).
00267 // Constant costs unity.
00268 // A decrease is expensive (getting further from a textline).
00269 // Pixels in projection map Counted distance
00270 //              2
00271 //              3              1/x
00272 //              3               1
00273 //              2               x
00274 //              5              1/x
00275 //              7              1/x
00276 // Total: 1 + x + 3/x where x = kWrongWayPenalty.
00277 int TextlineProjection::VerticalDistance(bool debug, int x,
00278                                          int y1, int y2) const {
00279   x = ImageXToProjectionX(x);
00280   y1 = ImageYToProjectionY(y1);
00281   y2 = ImageYToProjectionY(y2);
00282   if (y1 == y2) return 0;
00283   int wpl = pixGetWpl(pix_);
00284   int step = y1 < y2 ? 1 : -1;
00285   uinT32* data = pixGetData(pix_) + y1 * wpl;
00286   wpl *= step;
00287   int prev_pixel = GET_DATA_BYTE(data, x);
00288   int distance = 0;
00289   int right_way_steps = 0;
00290   for (int y = y1; y != y2; y += step) {
00291     data += wpl;
00292     int pixel = GET_DATA_BYTE(data, x);
00293     if (debug)
00294       tprintf("At (%d,%d), pix = %d, prev=%d\n",
00295               x, y + step, pixel, prev_pixel);
00296     if (pixel < prev_pixel)
00297       distance += kWrongWayPenalty;
00298     else if (pixel > prev_pixel)
00299       ++right_way_steps;
00300     else
00301       ++distance;
00302     prev_pixel = pixel;
00303   }
00304   return distance * scale_factor_ +
00305       right_way_steps * scale_factor_ / kWrongWayPenalty;
00306 }
00307 
00308 // Compute the distance between (x1, y) and (x2, y) using the rule that
00309 // a decrease in textline density is weighted more heavily than an increase.
00310 int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
00311                                            int y) const {
00312   x1 = ImageXToProjectionX(x1);
00313   x2 = ImageXToProjectionX(x2);
00314   y = ImageYToProjectionY(y);
00315   if (x1 == x2) return 0;
00316   int wpl = pixGetWpl(pix_);
00317   int step = x1 < x2 ? 1 : -1;
00318   uinT32* data = pixGetData(pix_) + y * wpl;
00319   int prev_pixel = GET_DATA_BYTE(data, x1);
00320   int distance = 0;
00321   int right_way_steps = 0;
00322   for (int x = x1; x != x2; x += step) {
00323     int pixel = GET_DATA_BYTE(data, x + step);
00324     if (debug)
00325       tprintf("At (%d,%d), pix = %d, prev=%d\n",
00326               x + step, y, pixel, prev_pixel);
00327     if (pixel < prev_pixel)
00328       distance += kWrongWayPenalty;
00329     else if (pixel > prev_pixel)
00330       ++right_way_steps;
00331     else
00332       ++distance;
00333     prev_pixel = pixel;
00334   }
00335   return distance * scale_factor_ +
00336       right_way_steps * scale_factor_ / kWrongWayPenalty;
00337 }
00338 
00339 // Returns true if the blob appears to be outside of a textline.
00340 // Such blobs are potentially diacritics (even if large in Thai) and should
00341 // be kept away from initial textline finding.
00342 bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
00343                                           const DENORM* denorm,
00344                                           bool debug) const {
00345   int grad1 = 0;
00346   int grad2 = 0;
00347   EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, NULL, NULL);
00348   int worst_result = MIN(grad1, grad2);
00349   int total_result = grad1 + grad2;
00350   if (total_result >= 6) return false;  // Strongly in textline.
00351   // Medium strength: if either gradient is negative, it is likely outside
00352   // the body of the textline.
00353   if (worst_result < 0)
00354     return true;
00355   return false;
00356 }
00357 
00358 // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
00359 // but uses the median top/bottom for horizontal and median left/right for
00360 // vertical instead of the bounding box edges.
00361 // Evaluates for both horizontal and vertical and returns the best result,
00362 // with a positive value for horizontal and a negative value for vertical.
00363 int TextlineProjection::EvaluateColPartition(const ColPartition& part,
00364                                              const DENORM* denorm,
00365                                              bool debug) const {
00366   if (part.IsSingleton())
00367     return EvaluateBox(part.bounding_box(), denorm, debug);
00368   // Test vertical orientation.
00369   TBOX box = part.bounding_box();
00370   // Use the partition median for left/right.
00371   box.set_left(part.median_left());
00372   box.set_right(part.median_right());
00373   int vresult = EvaluateBox(box, denorm, debug);
00374 
00375   // Test horizontal orientation.
00376   box = part.bounding_box();
00377   // Use the partition median for top/bottom.
00378   box.set_top(part.median_top());
00379   box.set_bottom(part.median_bottom());
00380   int hresult = EvaluateBox(box, denorm, debug);
00381   if (debug) {
00382     tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
00383     part.bounding_box().print();
00384     part.Print();
00385   }
00386   return hresult >= -vresult ? hresult : vresult;
00387 }
00388 
00389 // Computes the mean projection gradients over the horizontal and vertical
00390 // edges of the box:
00391 //   -h-h-h-h-h-h
00392 //  |------------| mean=htop   -v|+v--------+v|-v
00393 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
00394 //  |            |             -v|+v        +v|-v
00395 //  |    box     |             -v|+v  box   +v|-v
00396 //  |            |             -v|+v        +v|-v
00397 //  |+h+h+h+h+h+h|             -v|+v        +v|-v
00398 //  |------------| mean=hbot   -v|+v--------+v|-v
00399 //   -h-h-h-h-h-h
00400 //                           mean=vleft  mean=vright
00401 //
00402 // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
00403 // for a horizontal textline, a negative number for a vertical textline,
00404 // and near zero for undecided. Undecided is most likely non-text.
00405 // All the gradients are truncated to remain non-negative, since negative
00406 // horizontal gradients don't give any indication of being vertical and
00407 // vice versa.
00408 // Additional complexity: The coordinates have to be transformed to original
00409 // image coordinates with denorm (if not null), scaled to match the projection
00410 // pix, and THEN step out 2 pixels each way from the edge to compute the
00411 // gradient, and tries 3 positions, each measuring the gradient over a
00412 // 4-pixel spread: (+3/-1), (+2/-2), (+1/-3).  This complexity is handled by
00413 // several layers of helpers below.
00414 int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
00415                                     bool debug) const {
00416   return EvaluateBoxInternal(box, denorm, debug, NULL, NULL, NULL, NULL);
00417 }
00418 
00419 // Internal version of EvaluateBox returns the unclipped gradients as well
00420 // as the result of EvaluateBox.
00421 // hgrad1 and hgrad2 are the gradients for the horizontal textline.
00422 int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
00423                                             const DENORM* denorm, bool debug,
00424                                             int* hgrad1, int* hgrad2,
00425                                             int* vgrad1, int* vgrad2) const {
00426   int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
00427                                            box.top(), true);
00428   int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
00429                                                box.bottom(), false);
00430   int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
00431                                                box.top(), true);
00432   int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
00433                                                  box.bottom(), box.top(),
00434                                                  false);
00435   int top_clipped = MAX(top_gradient, 0);
00436   int bottom_clipped = MAX(bottom_gradient, 0);
00437   int left_clipped = MAX(left_gradient, 0);
00438   int right_clipped = MAX(right_gradient, 0);
00439   if (debug) {
00440     tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
00441             top_gradient, bottom_gradient, left_gradient, right_gradient);
00442     box.print();
00443   }
00444   int result = MAX(top_clipped, bottom_clipped) -
00445       MAX(left_clipped, right_clipped);
00446   if (hgrad1 != NULL && hgrad2 != NULL) {
00447     *hgrad1 = top_gradient;
00448     *hgrad2 = bottom_gradient;
00449   }
00450   if (vgrad1 != NULL && vgrad2 != NULL) {
00451     *vgrad1 = left_gradient;
00452     *vgrad2 = right_gradient;
00453   }
00454   return result;
00455 }
00456 
00457 // Helper returns the mean gradient value for the horizontal row at the given
00458 // y, (in the external coordinates) by subtracting the mean of the transformed
00459 // row 2 pixels above from the mean of the transformed row 2 pixels below.
00460 // This gives a positive value for a good top edge and negative for bottom.
00461 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
00462 int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
00463                                               inT16 min_x, inT16 max_x, inT16 y,
00464                                               bool best_is_max) const {
00465   TPOINT start_pt(min_x, y);
00466   TPOINT end_pt(max_x, y);
00467   int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
00468   int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
00469   int best_gradient = lower - upper;
00470   upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
00471   lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
00472   int gradient = lower - upper;
00473   if ((gradient > best_gradient) == best_is_max)
00474     best_gradient = gradient;
00475   upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
00476   lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
00477   gradient = lower - upper;
00478   if ((gradient > best_gradient) == best_is_max)
00479     best_gradient = gradient;
00480   return best_gradient;
00481 }
00482 
00483 // Helper returns the mean gradient value for the vertical column at the
00484 // given x, (in the external coordinates) by subtracting the mean of the
00485 // transformed column 2 pixels left from the mean of the transformed column
00486 // 2 pixels to the right.
00487 // This gives a positive value for a good left edge and negative for right.
00488 // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
00489 int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, inT16 x,
00490                                                  inT16 min_y, inT16 max_y,
00491                                                  bool best_is_max) const {
00492   TPOINT start_pt(x, min_y);
00493   TPOINT end_pt(x, max_y);
00494   int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
00495   int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
00496   int best_gradient = right - left;
00497   left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
00498   right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
00499   int gradient = right - left;
00500   if ((gradient > best_gradient) == best_is_max)
00501     best_gradient = gradient;
00502   left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
00503   right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
00504   gradient = right - left;
00505   if ((gradient > best_gradient) == best_is_max)
00506     best_gradient = gradient;
00507   return best_gradient;
00508 }
00509 
00510 // Helper returns the mean pixel value over the line between the start_pt and
00511 // end_pt (inclusive), but shifted perpendicular to the line in the projection
00512 // image by offset pixels. For simplicity, it is assumed that the vector is
00513 // either nearly horizontal or nearly vertical. It works on skewed textlines!
00514 // The end points are in external coordinates, and will be denormalized with
00515 // the denorm if not NULL before further conversion to pix coordinates.
00516 // After all the conversions, the offset is added to the direction
00517 // perpendicular to the line direction. The offset is thus in projection image
00518 // coordinates, which allows the caller to get a guaranteed displacement
00519 // between pixels used to calculate gradients.
00520 int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
00521                                                 int offset,
00522                                                 TPOINT start_pt,
00523                                                 TPOINT end_pt) const {
00524   TransformToPixCoords(denorm, &start_pt);
00525   TransformToPixCoords(denorm, &end_pt);
00526   TruncateToImageBounds(&start_pt);
00527   TruncateToImageBounds(&end_pt);
00528   int wpl = pixGetWpl(pix_);
00529   uinT32* data = pixGetData(pix_);
00530   int total = 0;
00531   int count = 0;
00532   int x_delta = end_pt.x - start_pt.x;
00533   int y_delta = end_pt.y - start_pt.y;
00534   if (abs(x_delta) >= abs(y_delta)) {
00535     if (x_delta == 0)
00536       return 0;
00537     // Horizontal line. Add the offset vertically.
00538     int x_step = x_delta > 0 ? 1 : -1;
00539     // Correct offset for rotation, keeping it anti-clockwise of the delta.
00540     offset *= x_step;
00541     start_pt.y += offset;
00542     end_pt.y += offset;
00543     TruncateToImageBounds(&start_pt);
00544     TruncateToImageBounds(&end_pt);
00545     x_delta = end_pt.x - start_pt.x;
00546     y_delta = end_pt.y - start_pt.y;
00547     count = x_delta * x_step + 1;
00548     for (int x = start_pt.x; x != end_pt.x; x += x_step) {
00549       int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
00550       total += GET_DATA_BYTE(data + wpl * y, x);
00551     }
00552   } else {
00553     // Vertical line. Add the offset horizontally.
00554     int y_step = y_delta > 0 ? 1 : -1;
00555     // Correct offset for rotation, keeping it anti-clockwise of the delta.
00556     // Pix holds the image with y=0 at the top, so the offset is negated.
00557     offset *= -y_step;
00558     start_pt.x += offset;
00559     end_pt.x += offset;
00560     TruncateToImageBounds(&start_pt);
00561     TruncateToImageBounds(&end_pt);
00562     x_delta = end_pt.x - start_pt.x;
00563     y_delta = end_pt.y - start_pt.y;
00564     count = y_delta * y_step + 1;
00565     for (int y = start_pt.y; y != end_pt.y; y += y_step) {
00566       int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
00567       total += GET_DATA_BYTE(data + wpl * y, x);
00568     }
00569   }
00570   return DivRounded(total, count);
00571 }
00572 
00573 // Given an input pix, and a box, the sides of the box are shrunk inwards until
00574 // they bound any black pixels found within the original box.
00575 // The function converts between tesseract coords and the pix coords assuming
00576 // that this pix is full resolution equal in size to the original image.
00577 // Returns an empty box if there are no black pixels in the source box.
00578 static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
00579   int im_height = pixGetHeight(pix);
00580   Box* input_box = boxCreate(box.left(), im_height - box.top(),
00581                              box.width(), box.height());
00582   Box* output_box = NULL;
00583   pixClipBoxToForeground(pix, input_box, NULL, &output_box);
00584   TBOX result_box;
00585   if (output_box != NULL) {
00586     l_int32 x, y, width, height;
00587     boxGetGeometry(output_box, &x, &y, &width, &height);
00588     result_box.set_left(x);
00589     result_box.set_right(x + width);
00590     result_box.set_top(im_height - y);
00591     result_box.set_bottom(result_box.top() - height);
00592     boxDestroy(&output_box);
00593   }
00594   boxDestroy(&input_box);
00595   return result_box;
00596 }
00597 
00598 // Splits the given box in half at x_middle or y_middle according to split_on_x
00599 // and checks for nontext_map pixels in each half. Reduces the bbox so that it
00600 // still includes the middle point, but does not touch any fg pixels in
00601 // nontext_map. An empty box may be returned if there is no such box.
00602 static void TruncateBoxToMissNonText(int x_middle, int y_middle,
00603                                      bool split_on_x, Pix* nontext_map,
00604                                      TBOX* bbox) {
00605   TBOX box1(*bbox);
00606   TBOX box2(*bbox);
00607   TBOX im_box;
00608   if (split_on_x) {
00609     box1.set_right(x_middle);
00610     im_box = BoundsWithinBox(nontext_map, box1);
00611     if (!im_box.null_box()) box1.set_left(im_box.right());
00612     box2.set_left(x_middle);
00613     im_box = BoundsWithinBox(nontext_map, box2);
00614     if (!im_box.null_box()) box2.set_right(im_box.left());
00615   } else {
00616     box1.set_bottom(y_middle);
00617     im_box = BoundsWithinBox(nontext_map, box1);
00618     if (!im_box.null_box()) box1.set_top(im_box.bottom());
00619     box2.set_top(y_middle);
00620     im_box = BoundsWithinBox(nontext_map, box2);
00621     if (!im_box.null_box()) box2.set_bottom(im_box.top());
00622   }
00623   box1 += box2;
00624   *bbox = box1;
00625 }
00626 
00627 
00628 // Helper function to add 1 to a rectangle in source image coords to the
00629 // internal projection pix_.
00630 void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
00631   int scaled_left = ImageXToProjectionX(box.left());
00632   int scaled_top = ImageYToProjectionY(box.top());
00633   int scaled_right = ImageXToProjectionX(box.right());
00634   int scaled_bottom = ImageYToProjectionY(box.bottom());
00635   int wpl = pixGetWpl(pix_);
00636   uinT32* data = pixGetData(pix_) + scaled_top * wpl;
00637   for (int y = scaled_top; y <= scaled_bottom; ++y) {
00638     for (int x = scaled_left; x <= scaled_right; ++x) {
00639       int pixel = GET_DATA_BYTE(data, x);
00640       if (pixel < 255)
00641         SET_DATA_BYTE(data, x, pixel + 1);
00642     }
00643     data += wpl;
00644   }
00645 }
00646 
00647 // Inserts a list of blobs into the projection.
00648 // Rotation is a multiple of 90 degrees to get from blob coords to
00649 // nontext_map coords, nontext_map_box is the bounds of the nontext_map.
00650 // Blobs are spread horizontally or vertically according to their internal
00651 // flags, but the spreading is truncated by set pixels in the nontext_map
00652 // and also by the horizontal rule line limits on the blobs.
00653 void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
00654                                       const FCOORD& rotation,
00655                                       const TBOX& nontext_map_box,
00656                                       Pix* nontext_map) {
00657   BLOBNBOX_IT blob_it(blobs);
00658   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00659     BLOBNBOX* blob = blob_it.data();
00660     TBOX bbox = blob->bounding_box();
00661     ICOORD middle((bbox.left() + bbox.right()) / 2,
00662                   (bbox.bottom() + bbox.top()) / 2);
00663     bool spreading_horizontally = PadBlobBox(blob, &bbox);
00664     // Rotate to match the nontext_map.
00665     bbox.rotate(rotation);
00666     middle.rotate(rotation);
00667     if (rotation.x() == 0.0f)
00668       spreading_horizontally = !spreading_horizontally;
00669     // Clip to the image before applying the increments.
00670     bbox &= nontext_map_box;  // This is in-place box intersection.
00671     // Check for image pixels before spreading.
00672     TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
00673                              nontext_map, &bbox);
00674     if (bbox.area() > 0) {
00675       IncrementRectangle8Bit(bbox);
00676     }
00677   }
00678 }
00679 
00680 // Pads the bounding box of the given blob according to whether it is on
00681 // a horizontal or vertical text line, taking into account tab-stops near
00682 // the blob. Returns true if padding was in the horizontal direction.
00683 bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
00684   // Determine which direction to spread.
00685   // If text is well spaced out, it can be useful to pad perpendicular to
00686   // the textline direction, so as to ensure diacritics get absorbed
00687   // correctly, but if the text is tightly spaced, this will destroy the
00688   // blank space between textlines in the projection map, and that would
00689   // be very bad.
00690   int pad_limit = scale_factor_ * kMinLineSpacingFactor;
00691   int xpad = 0;
00692   int ypad = 0;
00693   bool padding_horizontally = false;
00694   if (blob->UniquelyHorizontal()) {
00695     xpad = bbox->height() * kOrientedPadFactor;
00696     padding_horizontally = true;
00697     // If the text appears to be very well spaced, pad the other direction by a
00698     // single pixel in the projection profile space to help join diacritics to
00699     // the textline.
00700     if ((blob->neighbour(BND_ABOVE) == NULL ||
00701         bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
00702         (blob->neighbour(BND_BELOW) == NULL ||
00703         bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
00704       ypad = scale_factor_;
00705     }
00706   } else if (blob->UniquelyVertical()) {
00707     ypad = bbox->width() * kOrientedPadFactor;
00708     if ((blob->neighbour(BND_LEFT) == NULL ||
00709         bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
00710         (blob->neighbour(BND_RIGHT) == NULL ||
00711         bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
00712       xpad = scale_factor_;
00713     }
00714   } else {
00715     if ((blob->neighbour(BND_ABOVE) != NULL &&
00716          blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
00717         (blob->neighbour(BND_BELOW) != NULL &&
00718             blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
00719       ypad = bbox->width() * kDefaultPadFactor;
00720     }
00721     if ((blob->neighbour(BND_RIGHT) != NULL &&
00722          blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
00723         (blob->neighbour(BND_LEFT) != NULL &&
00724             blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
00725       xpad = bbox->height() * kDefaultPadFactor;
00726       padding_horizontally = true;
00727     }
00728   }
00729   bbox->pad(xpad, ypad);
00730   pad_limit = scale_factor_ * kMaxTabStopOverrun;
00731   // Now shrink horizontally to avoid stepping more than pad_limit over a
00732   // tab-stop.
00733   if (bbox->left() < blob->left_rule() - pad_limit) {
00734     bbox->set_left(blob->left_rule() - pad_limit);
00735   }
00736   if (bbox->right() > blob->right_rule() + pad_limit) {
00737     bbox->set_right(blob->right_rule() + pad_limit);
00738   }
00739   return padding_horizontally;
00740 }
00741 
00742 // Helper denormalizes the TPOINT with the denorm if not NULL, then
00743 // converts to pix_ coordinates.
00744 void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
00745                                               TPOINT* pt) const {
00746   if (denorm != NULL) {
00747     // Denormalize the point.
00748     denorm->DenormTransform(NULL, *pt, pt);
00749   }
00750   pt->x = ImageXToProjectionX(pt->x);
00751   pt->y = ImageYToProjectionY(pt->y);
00752 }
00753 
00754 // Helper truncates the TPOINT to be within the pix_.
00755 void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
00756   pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
00757   pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
00758 }
00759 
00760 // Transform tesseract image coordinates to coordinates used in the projection.
00761 int TextlineProjection::ImageXToProjectionX(int x) const {
00762   x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
00763   return x;
00764 }
00765 int TextlineProjection::ImageYToProjectionY(int y) const {
00766   y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
00767   return y;
00768 }
00769 
00770 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines