tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/imagefind.cpp
Go to the documentation of this file.
00001 
00002 // File:        imagefind.cpp
00003 // Description: Function to find image and drawing regions in an image
00004 //              and create a corresponding list of empty blobs.
00005 // Author:      Ray Smith
00006 // Created:     Thu Mar 20 09:49:01 PDT 2008
00007 //
00008 // (C) Copyright 2008, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifdef _MSC_VER
00022 #pragma warning(disable:4244)  // Conversion warnings
00023 #endif
00024 
00025 #ifdef HAVE_CONFIG_H
00026 #include "config_auto.h"
00027 #endif
00028 
00029 #include "imagefind.h"
00030 #include "colpartitiongrid.h"
00031 #include "linlsq.h"
00032 #include "ndminx.h"
00033 #include "statistc.h"
00034 #include "params.h"
00035 
00036 #include "allheaders.h"
00037 
00038 INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
00039 
00040 namespace tesseract {
00041 
00042 // Fraction of width or height of on pixels that can be discarded from a
00043 // roughly rectangular image.
00044 const double kMinRectangularFraction = 0.125;
00045 // Fraction of width or height to consider image completely used.
00046 const double kMaxRectangularFraction = 0.75;
00047 // Fraction of width or height to allow transition from kMinRectangularFraction
00048 // to kMaxRectangularFraction, equivalent to a dy/dx skew.
00049 const double kMaxRectangularGradient = 0.1;  // About 6 degrees.
00050 // Minimum image size to be worth looking for images on.
00051 const int kMinImageFindSize = 100;
00052 // Scale factor for the rms color fit error.
00053 const double kRMSFitScaling = 8.0;
00054 // Min color difference to call it two colors.
00055 const int kMinColorDifference = 16;
00056 // Pixel padding for noise blobs and partitions when rendering on the image
00057 // mask to encourage them to join together. Make it too big and images
00058 // will fatten out too much and have to be clipped to text.
00059 const int kNoisePadding = 4;
00060 
00061 // Finds image regions within the BINARY source pix (page image) and returns
00062 // the image regions as a mask image.
00063 // The returned pix may be NULL, meaning no images found.
00064 // If not NULL, it must be PixDestroyed by the caller.
00065 Pix* ImageFind::FindImages(Pix* pix) {
00066   // Not worth looking at small images.
00067   if (pixGetWidth(pix) < kMinImageFindSize ||
00068       pixGetHeight(pix) < kMinImageFindSize)
00069     return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
00070   // Reduce by factor 2.
00071   Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
00072   pixDisplayWrite(pixr, textord_tabfind_show_images);
00073 
00074   // Get the halftone mask directly from Leptonica.
00075   l_int32 ht_found = 0;
00076   Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, &ht_found,
00077                                    textord_tabfind_show_images);
00078   pixDestroy(&pixr);
00079   if (!ht_found && pixht2 != NULL)
00080     pixDestroy(&pixht2);
00081   if (pixht2 == NULL)
00082     return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
00083 
00084   // Expand back up again.
00085   Pix *pixht = pixExpandReplicate(pixht2, 2);
00086   pixDisplayWrite(pixht, textord_tabfind_show_images);
00087   pixDestroy(&pixht2);
00088 
00089   // Fill to capture pixels near the mask edges that were missed
00090   Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
00091   pixOr(pixht, pixht, pixt);
00092   pixDestroy(&pixt);
00093 
00094   // Eliminate lines and bars that may be joined to images.
00095   Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
00096   pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
00097   pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
00098   Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
00099   Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
00100   pixDestroy(&pixreduced);
00101   pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
00102   Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
00103   pixDestroy(&pixreduced2);
00104   pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
00105   // Combine the coarse and fine image masks.
00106   pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
00107   pixDestroy(&pixfinemask);
00108   // Dilate a bit to make sure we get everything.
00109   pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
00110   Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
00111   pixDestroy(&pixcoarsemask);
00112   if (textord_tabfind_show_images)
00113     pixWrite("junkexpandedcoarsemask.png", pixmask, IFF_PNG);
00114   // And the image mask with the line and bar remover.
00115   pixAnd(pixht, pixht, pixmask);
00116   pixDestroy(&pixmask);
00117   if (textord_tabfind_show_images)
00118     pixWrite("junkfinalimagemask.png", pixht, IFF_PNG);
00119   // Make the result image the same size as the input.
00120   Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
00121   pixOr(result, result, pixht);
00122   pixDestroy(&pixht);
00123   return result;
00124 }
00125 
00126 // Generates a Boxa, Pixa pair from the input binary (image mask) pix,
00127 // analgous to pixConnComp, except that connected components which are nearly
00128 // rectangular are replaced with solid rectangles.
00129 // The returned boxa, pixa may be NULL, meaning no images found.
00130 // If not NULL, they must be destroyed by the caller.
00131 // Resolution of pix should match the source image (Tesseract::pix_binary_)
00132 // so the output coordinate systems match.
00133 void ImageFind::ConnCompAndRectangularize(Pix* pix, Boxa** boxa, Pixa** pixa) {
00134   *boxa = NULL;
00135   *pixa = NULL;
00136 
00137   if (textord_tabfind_show_images)
00138     pixWrite("junkconncompimage.png", pix, IFF_PNG);
00139   // Find the individual image regions in the mask image.
00140   *boxa = pixConnComp(pix, pixa, 8);
00141   // Rectangularize the individual images. If a sharp edge in vertical and/or
00142   // horizontal occupancy can be found, it indicates a probably rectangular
00143   // image with unwanted bits merged on, so clip to the approximate rectangle.
00144   int npixes = pixaGetCount(*pixa);
00145   for (int i = 0; i < npixes; ++i) {
00146     int x_start, x_end, y_start, y_end;
00147     Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
00148     pixDisplayWrite(img_pix, textord_tabfind_show_images);
00149     if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
00150                              kMaxRectangularFraction,
00151                              kMaxRectangularGradient,
00152                              &x_start, &y_start, &x_end, &y_end)) {
00153       Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
00154       pixSetAll(simple_pix);
00155       pixDestroy(&img_pix);
00156       // pixaReplacePix takes ownership of the simple_pix.
00157       pixaReplacePix(*pixa, i, simple_pix, NULL);
00158       img_pix = pixaGetPix(*pixa, i, L_CLONE);
00159       // Fix the box to match the new pix.
00160       l_int32 x, y, width, height;
00161       boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
00162       Box* simple_box = boxCreate(x + x_start, y + y_start,
00163                                   x_end - x_start, y_end - y_start);
00164       boxaReplaceBox(*boxa, i, simple_box);
00165     }
00166     pixDestroy(&img_pix);
00167   }
00168 }
00169 
00170 // Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
00171 // stepping y+=y_step, until y=y_end. *ystart is input/output.
00172 // If the number of black pixels in a row, pix_count fits this pattern:
00173 // 0 or more rows with pix_count < min_count then
00174 // <= mid_width rows with min_count <= pix_count <= max_count then
00175 // a row with pix_count > max_count then
00176 // true is returned, and *y_start = the first y with pix_count >= min_count.
00177 static bool HScanForEdge(uinT32* data, int wpl, int x_start, int x_end,
00178                          int min_count, int mid_width, int max_count,
00179                          int y_end, int y_step, int* y_start) {
00180   int mid_rows = 0;
00181   for (int y = *y_start; y != y_end; y += y_step) {
00182     // Need pixCountPixelsInRow(pix, y, &pix_count, NULL) to count in a subset.
00183     int pix_count = 0;
00184     uinT32* line = data + wpl * y;
00185     for (int x = x_start; x < x_end; ++x) {
00186       if (GET_DATA_BIT(line, x))
00187         ++pix_count;
00188     }
00189     if (mid_rows == 0 && pix_count < min_count)
00190       continue;      // In the min phase.
00191     if (mid_rows == 0)
00192       *y_start = y;  // Save the y_start where we came out of the min phase.
00193     if (pix_count > max_count)
00194       return true;   // Found the pattern.
00195     ++mid_rows;
00196     if (mid_rows > mid_width)
00197       break;         // Middle too big.
00198   }
00199   return false;      // Never found max_count.
00200 }
00201 
00202 // Scans vertically on y=[y_start,y_end), starting with x=*x_start,
00203 // stepping x+=x_step, until x=x_end. *x_start is input/output.
00204 // If the number of black pixels in a column, pix_count fits this pattern:
00205 // 0 or more cols with pix_count < min_count then
00206 // <= mid_width cols with min_count <= pix_count <= max_count then
00207 // a column with pix_count > max_count then
00208 // true is returned, and *x_start = the first x with pix_count >= min_count.
00209 static bool VScanForEdge(uinT32* data, int wpl, int y_start, int y_end,
00210                          int min_count, int mid_width, int max_count,
00211                          int x_end, int x_step, int* x_start) {
00212   int mid_cols = 0;
00213   for (int x = *x_start; x != x_end; x += x_step) {
00214     int pix_count = 0;
00215     uinT32* line = data + y_start * wpl;
00216     for (int y = y_start; y < y_end; ++y, line += wpl) {
00217       if (GET_DATA_BIT(line, x))
00218         ++pix_count;
00219     }
00220     if (mid_cols == 0 && pix_count < min_count)
00221       continue;      // In the min phase.
00222     if (mid_cols == 0)
00223       *x_start = x;  // Save the place where we came out of the min phase.
00224     if (pix_count > max_count)
00225       return true;   // found the pattern.
00226     ++mid_cols;
00227     if (mid_cols > mid_width)
00228       break;         // Middle too big.
00229   }
00230   return false;      // Never found max_count.
00231 }
00232 
00233 // Returns true if there is a rectangle in the source pix, such that all
00234 // pixel rows and column slices outside of it have less than
00235 // min_fraction of the pixels black, and within max_skew_gradient fraction
00236 // of the pixels on the inside, there are at least max_fraction of the
00237 // pixels black. In other words, the inside of the rectangle looks roughly
00238 // rectangular, and the outside of it looks like extra bits.
00239 // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
00240 // Note: the algorithm is iterative, allowing it to slice off pixels from
00241 // one edge, allowing it to then slice off more pixels from another edge.
00242 bool ImageFind::pixNearlyRectangular(Pix* pix,
00243                                      double min_fraction, double max_fraction,
00244                                      double max_skew_gradient,
00245                                      int* x_start, int* y_start,
00246                                      int* x_end, int* y_end) {
00247   ASSERT_HOST(pix != NULL);
00248   *x_start = 0;
00249   *x_end = pixGetWidth(pix);
00250   *y_start = 0;
00251   *y_end = pixGetHeight(pix);
00252 
00253   uinT32* data = pixGetData(pix);
00254   int wpl = pixGetWpl(pix);
00255   bool any_cut = false;
00256   bool left_done = false;
00257   bool right_done = false;
00258   bool top_done = false;
00259   bool bottom_done = false;
00260   do {
00261     any_cut = false;
00262     // Find the top/bottom edges.
00263     int width = *x_end - *x_start;
00264     int min_count = static_cast<int>(width * min_fraction);
00265     int max_count = static_cast<int>(width * max_fraction);
00266     int edge_width = static_cast<int>(width * max_skew_gradient);
00267     if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
00268                      max_count, *y_end, 1, y_start) && !top_done) {
00269       top_done = true;
00270       any_cut = true;
00271     }
00272     --(*y_end);
00273     if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
00274                      max_count, *y_start, -1, y_end) && !bottom_done) {
00275       bottom_done = true;
00276       any_cut = true;
00277     }
00278     ++(*y_end);
00279 
00280     // Find the left/right edges.
00281     int height = *y_end - *y_start;
00282     min_count = static_cast<int>(height * min_fraction);
00283     max_count = static_cast<int>(height * max_fraction);
00284     edge_width = static_cast<int>(height * max_skew_gradient);
00285     if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
00286                      max_count, *x_end, 1, x_start) && !left_done) {
00287       left_done = true;
00288       any_cut = true;
00289     }
00290     --(*x_end);
00291     if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
00292                      max_count, *x_start, -1, x_end) && !right_done) {
00293       right_done = true;
00294       any_cut = true;
00295     }
00296     ++(*x_end);
00297   } while (any_cut);
00298 
00299   // All edges must satisfy the condition of sharp gradient in pixel density
00300   // in order for the full rectangle to be present.
00301   return left_done && right_done && top_done && bottom_done;
00302 }
00303 
00304 // Given an input pix, and a bounding rectangle, the sides of the rectangle
00305 // are shrunk inwards until they bound any black pixels found within the
00306 // original rectangle. Returns false if the rectangle contains no black
00307 // pixels at all.
00308 bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
00309                                  int* x_end, int* y_end) {
00310   Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
00311                              *y_end - *y_start);
00312   Box* output_box = NULL;
00313   pixClipBoxToForeground(pix, input_box, NULL, &output_box);
00314   bool result = output_box != NULL;
00315   if (result) {
00316     l_int32 x, y, width, height;
00317     boxGetGeometry(output_box, &x, &y, &width, &height);
00318     *x_start = x;
00319     *y_start = y;
00320     *x_end = x + width;
00321     *y_end = y + height;
00322     boxDestroy(&output_box);
00323   }
00324   boxDestroy(&input_box);
00325   return result;
00326 }
00327 
00328 // Given a point in 3-D (RGB) space, returns the squared Euclidean distance
00329 // of the point from the given line, defined by a pair of points in the 3-D
00330 // (RGB) space, line1 and line2.
00331 double ImageFind::ColorDistanceFromLine(const uinT8* line1,
00332                                         const uinT8* line2,
00333                                         const uinT8* point) {
00334   int line_vector[kRGBRMSColors];
00335   int point_vector[kRGBRMSColors];
00336   for (int i = 0; i < kRGBRMSColors; ++i) {
00337     line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
00338     point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
00339   }
00340   line_vector[L_ALPHA_CHANNEL] = 0;
00341   // Now the cross product in 3d.
00342   int cross[kRGBRMSColors];
00343   cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
00344                    - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
00345   cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
00346                    - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
00347   cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
00348                    - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
00349   cross[L_ALPHA_CHANNEL] = 0;
00350   // Now the sums of the squares.
00351   double cross_sq = 0.0;
00352   double line_sq = 0.0;
00353   for (int j = 0; j < kRGBRMSColors; ++j) {
00354     cross_sq += static_cast<double>(cross[j]) * cross[j];
00355     line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
00356   }
00357   if (line_sq == 0.0) {
00358     return 0.0;
00359   }
00360   return cross_sq / line_sq;  // This is the squared distance.
00361 }
00362 
00363 
00364 // Returns the leptonica combined code for the given RGB triplet.
00365 uinT32 ImageFind::ComposeRGB(uinT32 r, uinT32 g, uinT32 b) {
00366   l_uint32 result;
00367   composeRGBPixel(r, g, b, &result);
00368   return result;
00369 }
00370 
00371 // Returns the input value clipped to a uinT8.
00372 uinT8 ImageFind::ClipToByte(double pixel) {
00373   if (pixel < 0.0)
00374     return 0;
00375   else if (pixel >= 255.0)
00376     return 255;
00377   return static_cast<uinT8>(pixel);
00378 }
00379 
00380 // Computes the light and dark extremes of color in the given rectangle of
00381 // the given pix, which is factor smaller than the coordinate system in rect.
00382 // The light and dark points are taken to be the upper and lower 8th-ile of
00383 // the most deviant of R, G and B. The value of the other 2 channels are
00384 // computed by linear fit against the most deviant.
00385 // The colors of the two points are returned in color1 and color2, with the
00386 // alpha channel set to a scaled mean rms of the fits.
00387 // If color_map1 is not null then it and color_map2 get rect pasted in them
00388 // with the two calculated colors, and rms map gets a pasted rect of the rms.
00389 // color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
00390 void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
00391                                        Pix* color_map1, Pix* color_map2,
00392                                        Pix* rms_map,
00393                                        uinT8* color1, uinT8* color2) {
00394   ASSERT_HOST(pix != NULL && pixGetDepth(pix) == 32);
00395   // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
00396   // background.
00397   int width = pixGetWidth(pix);
00398   int height = pixGetHeight(pix);
00399   int left_pad = MAX(rect.left() - 2 * factor, 0) / factor;
00400   int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
00401   top_pad = MIN(height, top_pad);
00402   int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
00403   right_pad = MIN(width, right_pad);
00404   int bottom_pad = MAX(rect.bottom() - 2 * factor, 0) / factor;
00405   int width_pad = right_pad - left_pad;
00406   int height_pad = top_pad - bottom_pad;
00407   if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
00408     return;
00409   // Now crop the pix to the rectangle.
00410   Box* scaled_box = boxCreate(left_pad, height - top_pad,
00411                               width_pad, height_pad);
00412   Pix* scaled = pixClipRectangle(pix, scaled_box, NULL);
00413 
00414   // Compute stats over the whole image.
00415   STATS red_stats(0, 256);
00416   STATS green_stats(0, 256);
00417   STATS blue_stats(0, 256);
00418   uinT32* data = pixGetData(scaled);
00419   ASSERT_HOST(pixGetWpl(scaled) == width_pad);
00420   for (int y = 0; y < height_pad; ++y) {
00421     for (int x = 0; x < width_pad; ++x, ++data) {
00422       int r = GET_DATA_BYTE(data, COLOR_RED);
00423       int g = GET_DATA_BYTE(data, COLOR_GREEN);
00424       int b = GET_DATA_BYTE(data, COLOR_BLUE);
00425       red_stats.add(r, 1);
00426       green_stats.add(g, 1);
00427       blue_stats.add(b, 1);
00428     }
00429   }
00430   // Find the RGB component with the greatest 8th-ile-range.
00431   // 8th-iles are used instead of quartiles to get closer to the true
00432   // foreground color, which is going to be faint at best because of the
00433   // pre-scaling of the input image.
00434   int best_l8 = static_cast<int>(red_stats.ile(0.125f));
00435   int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
00436   int best_i8r = best_u8 - best_l8;
00437   int x_color = COLOR_RED;
00438   int y1_color = COLOR_GREEN;
00439   int y2_color = COLOR_BLUE;
00440   int l8 = static_cast<int>(green_stats.ile(0.125f));
00441   int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
00442   if (u8 - l8 > best_i8r) {
00443     best_i8r = u8 - l8;
00444     best_l8 = l8;
00445     best_u8 = u8;
00446     x_color = COLOR_GREEN;
00447     y1_color = COLOR_RED;
00448   }
00449   l8 = static_cast<int>(blue_stats.ile(0.125f));
00450   u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
00451   if (u8 - l8 > best_i8r) {
00452     best_i8r = u8 - l8;
00453     best_l8 = l8;
00454     best_u8 = u8;
00455     x_color = COLOR_BLUE;
00456     y1_color = COLOR_GREEN;
00457     y2_color = COLOR_RED;
00458   }
00459   if (best_i8r >= kMinColorDifference) {
00460     LLSQ line1;
00461     LLSQ line2;
00462     uinT32* data = pixGetData(scaled);
00463     for (int im_y = 0; im_y < height_pad; ++im_y) {
00464       for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
00465         int x = GET_DATA_BYTE(data, x_color);
00466         int y1 = GET_DATA_BYTE(data, y1_color);
00467         int y2 = GET_DATA_BYTE(data, y2_color);
00468         line1.add(x, y1);
00469         line2.add(x, y2);
00470       }
00471     }
00472     double m1 = line1.m();
00473     double c1 = line1.c(m1);
00474     double m2 = line2.m();
00475     double c2 = line2.c(m2);
00476     double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
00477     rms *= kRMSFitScaling;
00478     // Save the results.
00479     color1[x_color] = ClipToByte(best_l8);
00480     color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
00481     color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
00482     color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
00483     color2[x_color] = ClipToByte(best_u8);
00484     color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
00485     color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
00486     color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
00487   } else {
00488     // There is only one color.
00489     color1[COLOR_RED] = ClipToByte(red_stats.median());
00490     color1[COLOR_GREEN] = ClipToByte(green_stats.median());
00491     color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
00492     color1[L_ALPHA_CHANNEL] = 0;
00493     memcpy(color2, color1, 4);
00494   }
00495   if (color_map1 != NULL) {
00496     pixSetInRectArbitrary(color_map1, scaled_box,
00497                           ComposeRGB(color1[COLOR_RED],
00498                               color1[COLOR_GREEN],
00499                               color1[COLOR_BLUE]));
00500     pixSetInRectArbitrary(color_map2, scaled_box,
00501                           ComposeRGB(color2[COLOR_RED],
00502                               color2[COLOR_GREEN],
00503                               color2[COLOR_BLUE]));
00504     pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
00505   }
00506   pixDestroy(&scaled);
00507   boxDestroy(&scaled_box);
00508 }
00509 
00510 // ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
00511 // The following functions are responsible for cutting a polygonal image from
00512 // a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
00513 // with DivideImageIntoParts as the master.
00514 // Problem statement:
00515 // We start with a single connected component from the image mask: we get
00516 // a Pix of the component, and its location on the page (im_box).
00517 // The objective of cutting a polygonal image from its rectangle is to avoid
00518 // interfering text, but not text that completely overlaps the image.
00519 //     ------------------------------      ------------------------------
00520 //     |   Single input partition   |      | 1 Cut up output partitions |
00521 //     |                            |      ------------------------------
00522 //   Av|oid                         |    Avoid |                        |
00523 //     |                            |          |________________________|
00524 //  Int|erfering                    |   Interfering  |                  |
00525 //     |                            |           _____|__________________|
00526 //    T|ext                         |     Text |                        |
00527 //     |        Text-on-image       |          |     Text-on-image      |
00528 //     ------------------------------          --------------------------
00529 // DivideImageIntoParts does this by building a ColPartition_LIST (not in the
00530 // grid) with each ColPartition representing one of the rectangles needed,
00531 // starting with a single rectangle for the whole image component, and cutting
00532 // bits out of it with CutChunkFromParts as needed to avoid text. The output
00533 // ColPartitions are supposed to be ordered from top to bottom.
00534 
00535 // The problem is complicated by the fact that we have rotated the coordinate
00536 // system to make text lines horizontal, so if we need to look at the component
00537 // image, we have to rotate the coordinates. Throughout the functions in this
00538 // section im_box is the rectangle representing the image component in the
00539 // rotated page coordinates (where we are building our output ColPartitions),
00540 // rotation is the rotation that we used to get there, and rerotation is the
00541 // rotation required to get back to original page image coordinates.
00542 // To get to coordinates in the component image, pix, we rotate the im_box,
00543 // the point we want to locate, and subtract the rotated point from the top-left
00544 // of the rotated im_box.
00545 // im_box is therefore essential to calculating coordinates within the pix.
00546 
00547 // Returns true if there are no black pixels in between the boxes.
00548 // The im_box must represent the bounding box of the pix in tesseract
00549 // coordinates, which may be negative, due to rotations to make the textlines
00550 // horizontal. The boxes are rotated by rotation, which should undo such
00551 // rotations, before mapping them onto the pix.
00552 bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
00553                                     const TBOX& im_box, const FCOORD& rotation,
00554                                     Pix* pix) {
00555   TBOX search_box(box1);
00556   search_box += box2;
00557   if (box1.x_gap(box2) >= box1.y_gap(box2)) {
00558     if (box1.x_gap(box2) <= 0)
00559       return true;
00560     search_box.set_left(MIN(box1.right(), box2.right()));
00561     search_box.set_right(MAX(box1.left(), box2.left()));
00562   } else {
00563     if (box1.y_gap(box2) <= 0)
00564       return true;
00565     search_box.set_top(MAX(box1.bottom(), box2.bottom()));
00566     search_box.set_bottom(MIN(box1.top(), box2.top()));
00567   }
00568   return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
00569 }
00570 
00571 // Returns the number of pixels in box in the pix.
00572 // rotation, pix and im_box are defined in the large comment above.
00573 int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
00574                                        const FCOORD& rotation, Pix* pix) {
00575   // Intersect it with the image box.
00576   box &= im_box;  // This is in-place box intersection.
00577   if (box.null_box())
00578     return 0;
00579   box.rotate(rotation);
00580   TBOX rotated_im_box(im_box);
00581   rotated_im_box.rotate(rotation);
00582   Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
00583   pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
00584               PIX_SRC, pix, box.left() - rotated_im_box.left(),
00585               rotated_im_box.top() - box.top());
00586   l_int32 result;
00587   pixCountPixels(rect_pix, &result, NULL);
00588   pixDestroy(&rect_pix);
00589   return result;
00590 }
00591 
00592 // The box given by slice contains some black pixels, but not necessarily
00593 // over the whole box. Shrink the x bounds of slice, but not the y bounds
00594 // until there is at least one black pixel in the outermost columns.
00595 // rotation, rerotation, pix and im_box are defined in the large comment above.
00596 static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
00597                                const TBOX& im_box, Pix* pix, TBOX* slice) {
00598   TBOX rotated_box(*slice);
00599   rotated_box.rotate(rerotation);
00600   TBOX rotated_im_box(im_box);
00601   rotated_im_box.rotate(rerotation);
00602   int left = rotated_box.left() - rotated_im_box.left();
00603   int right = rotated_box.right() - rotated_im_box.left();
00604   int top = rotated_im_box.top() - rotated_box.top();
00605   int bottom = rotated_im_box.top() - rotated_box.bottom();
00606   ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
00607   top = rotated_im_box.top() - top;
00608   bottom = rotated_im_box.top() - bottom;
00609   left += rotated_im_box.left();
00610   right += rotated_im_box.left();
00611   rotated_box.set_to_given_coords(left, bottom, right, top);
00612   rotated_box.rotate(rotation);
00613   slice->set_left(rotated_box.left());
00614   slice->set_right(rotated_box.right());
00615 }
00616 
00617 // The meat of cutting a polygonal image around text.
00618 // This function covers the general case of cutting a box out of a box
00619 // as shown:
00620 // Input                               Output
00621 // ------------------------------      ------------------------------
00622 // |   Single input partition   |      | 1 Cut up output partitions |
00623 // |                            |      ------------------------------
00624 // |         ----------         |      ---------           ----------
00625 // |         |  box   |         |      |   2   |   box     |    3   |
00626 // |         |        |         |      |       |  is cut   |        |
00627 // |         ----------         |      ---------   out     ----------
00628 // |                            |      ------------------------------
00629 // |                            |      |   4                        |
00630 // ------------------------------      ------------------------------
00631 // In the context that this function is used, at most 3 of the above output
00632 // boxes will be created, as the overlapping box is never contained by the
00633 // input.
00634 // The above cutting operation is executed for each element of part_list that
00635 // is overlapped by the input box. Each modified ColPartition is replaced
00636 // in place in the list by the output of the cutting operation in the order
00637 // shown above, so iff no holes are ever created, the output will be in
00638 // top-to-bottom order, but in extreme cases, hole creation is possible.
00639 // In such cases, the output order may cause strange block polygons.
00640 // rotation, rerotation, pix and im_box are defined in the large comment above.
00641 static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
00642                               const FCOORD& rotation, const FCOORD& rerotation,
00643                               Pix* pix, ColPartition_LIST* part_list) {
00644   ASSERT_HOST(!part_list->empty());
00645   ColPartition_IT part_it(part_list);
00646   do {
00647     ColPartition* part = part_it.data();
00648     TBOX part_box = part->bounding_box();
00649     if (part_box.overlap(box)) {
00650       // This part must be cut and replaced with the remains. There are
00651       // upto 4 pieces to be made. Start with the first one and use
00652       // add_before_stay_put. For each piece if it has no black pixels
00653       // left, just don't make the box.
00654       // Above box.
00655       if (box.top() < part_box.top()) {
00656         TBOX slice(part_box);
00657         slice.set_bottom(box.top());
00658         if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
00659                                                pix) > 0) {
00660           AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
00661           part_it.add_before_stay_put(
00662               ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
00663                                           BTFT_NONTEXT));
00664         }
00665       }
00666       // Left of box.
00667       if (box.left() > part_box.left()) {
00668         TBOX slice(part_box);
00669         slice.set_right(box.left());
00670         if (box.top() < part_box.top())
00671           slice.set_top(box.top());
00672         if (box.bottom() > part_box.bottom())
00673           slice.set_bottom(box.bottom());
00674         if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
00675                                                pix) > 0) {
00676           AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
00677           part_it.add_before_stay_put(
00678               ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
00679                                           BTFT_NONTEXT));
00680         }
00681       }
00682       // Right of box.
00683       if (box.right() < part_box.right()) {
00684         TBOX slice(part_box);
00685         slice.set_left(box.right());
00686         if (box.top() < part_box.top())
00687           slice.set_top(box.top());
00688         if (box.bottom() > part_box.bottom())
00689           slice.set_bottom(box.bottom());
00690         if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
00691                                                pix) > 0) {
00692           AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
00693           part_it.add_before_stay_put(
00694               ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
00695                                           BTFT_NONTEXT));
00696         }
00697       }
00698       // Below box.
00699       if (box.bottom() > part_box.bottom()) {
00700         TBOX slice(part_box);
00701         slice.set_top(box.bottom());
00702         if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
00703                                                pix) > 0) {
00704           AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
00705           part_it.add_before_stay_put(
00706               ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
00707                                           BTFT_NONTEXT));
00708         }
00709       }
00710       part->DeleteBoxes();
00711       delete part_it.extract();
00712     }
00713     part_it.forward();
00714   } while (!part_it.at_first());
00715 }
00716 
00717 // Starts with the bounding box of the image component and cuts it up
00718 // so that it doesn't intersect text where possible.
00719 // Strong fully contained horizontal text is marked as text on image,
00720 // and does not cause a division of the image.
00721 // For more detail see the large comment above on cutting polygonal images
00722 // from a rectangle.
00723 // rotation, rerotation, pix and im_box are defined in the large comment above.
00724 static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
00725                                  const FCOORD& rerotation, Pix* pix,
00726                                  ColPartitionGridSearch* rectsearch,
00727                                  ColPartition_LIST* part_list) {
00728   // Add the full im_box partition to the list to begin with.
00729   ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN,
00730                                                        BRT_RECTIMAGE,
00731                                                        BTFT_NONTEXT);
00732   ColPartition_IT part_it(part_list);
00733   part_it.add_after_then_move(pix_part);
00734 
00735   rectsearch->StartRectSearch(im_box);
00736   ColPartition* part;
00737   while ((part = rectsearch->NextRectSearch()) != NULL) {
00738     TBOX part_box = part->bounding_box();
00739     if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
00740       // This image is completely covered by an existing text partition.
00741       for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
00742         ColPartition* pix_part = part_it.extract();
00743         pix_part->DeleteBoxes();
00744         delete pix_part;
00745       }
00746     } else if (part->flow() == BTFT_STRONG_CHAIN) {
00747       // Text intersects the box.
00748       TBOX overlap_box = part_box.intersection(im_box);
00749       // Intersect it with the image box.
00750       int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
00751                                                           rerotation, pix);
00752       if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
00753         // Eat a piece out of the image.
00754         // Pad it so that pieces eaten out look decent.
00755         int padding = part->blob_type() == BRT_VERT_TEXT
00756                     ? part_box.width() : part_box.height();
00757         part_box.set_top(part_box.top() + padding / 2);
00758         part_box.set_bottom(part_box.bottom() - padding / 2);
00759         CutChunkFromParts(part_box, im_box, rotation, rerotation,
00760                           pix, part_list);
00761       } else {
00762         // Strong overlap with the black area, so call it text on image.
00763         part->set_flow(BTFT_TEXT_ON_IMAGE);
00764       }
00765     }
00766     if (part_list->empty()) {
00767       break;
00768     }
00769   }
00770 }
00771 
00772 // Search for the rightmost text that overlaps vertically and is to the left
00773 // of the given box, but within the given left limit.
00774 static int ExpandImageLeft(const TBOX& box, int left_limit,
00775                            ColPartitionGrid* part_grid) {
00776   ColPartitionGridSearch search(part_grid);
00777   ColPartition* part;
00778   // Search right to left for any text that overlaps.
00779   search.StartSideSearch(box.left(), box.bottom(), box.top());
00780   while ((part = search.NextSideSearch(true)) != NULL) {
00781     if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00782       const TBOX& part_box(part->bounding_box());
00783       if (part_box.y_gap(box) < 0) {
00784         if (part_box.right() > left_limit && part_box.right() < box.left())
00785           left_limit = part_box.right();
00786         break;
00787       }
00788     }
00789   }
00790   if (part != NULL) {
00791     // Search for the nearest text up to the one we already found.
00792     TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
00793     search.StartRectSearch(search_box);
00794     while ((part = search.NextRectSearch()) != NULL) {
00795       if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00796         const TBOX& part_box(part->bounding_box());
00797         if (part_box.y_gap(box) < 0) {
00798           if (part_box.right() > left_limit && part_box.right() < box.left()) {
00799             left_limit = part_box.right();
00800           }
00801         }
00802       }
00803     }
00804   }
00805   return left_limit;
00806 }
00807 
00808 // Search for the leftmost text that overlaps vertically and is to the right
00809 // of the given box, but within the given right limit.
00810 static int ExpandImageRight(const TBOX& box, int right_limit,
00811                             ColPartitionGrid* part_grid) {
00812   ColPartitionGridSearch search(part_grid);
00813   ColPartition* part;
00814   // Search left to right for any text that overlaps.
00815   search.StartSideSearch(box.right(), box.bottom(), box.top());
00816   while ((part = search.NextSideSearch(false)) != NULL) {
00817     if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00818       const TBOX& part_box(part->bounding_box());
00819       if (part_box.y_gap(box) < 0) {
00820         if (part_box.left() < right_limit && part_box.left() > box.right())
00821           right_limit = part_box.left();
00822         break;
00823       }
00824     }
00825   }
00826   if (part != NULL) {
00827     // Search for the nearest text up to the one we already found.
00828     TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
00829     search.StartRectSearch(search_box);
00830     while ((part = search.NextRectSearch()) != NULL) {
00831       if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00832         const TBOX& part_box(part->bounding_box());
00833         if (part_box.y_gap(box) < 0) {
00834           if (part_box.left() < right_limit && part_box.left() > box.right())
00835             right_limit = part_box.left();
00836         }
00837       }
00838     }
00839   }
00840   return right_limit;
00841 }
00842 
00843 // Search for the topmost text that overlaps horizontally and is below
00844 // the given box, but within the given bottom limit.
00845 static int ExpandImageBottom(const TBOX& box, int bottom_limit,
00846                              ColPartitionGrid* part_grid) {
00847   ColPartitionGridSearch search(part_grid);
00848   ColPartition* part;
00849   // Search right to left for any text that overlaps.
00850   search.StartVerticalSearch(box.left(), box.right(), box.bottom());
00851   while ((part = search.NextVerticalSearch(true)) != NULL) {
00852     if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00853       const TBOX& part_box(part->bounding_box());
00854       if (part_box.x_gap(box) < 0) {
00855         if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
00856           bottom_limit = part_box.top();
00857         break;
00858       }
00859     }
00860   }
00861   if (part != NULL) {
00862     // Search for the nearest text up to the one we already found.
00863     TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
00864     search.StartRectSearch(search_box);
00865     while ((part = search.NextRectSearch()) != NULL) {
00866       if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00867         const TBOX& part_box(part->bounding_box());
00868         if (part_box.x_gap(box) < 0) {
00869           if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
00870             bottom_limit = part_box.top();
00871         }
00872       }
00873     }
00874   }
00875   return bottom_limit;
00876 }
00877 
00878 // Search for the bottommost text that overlaps horizontally and is above
00879 // the given box, but within the given top limit.
00880 static int ExpandImageTop(const TBOX& box, int top_limit,
00881                           ColPartitionGrid* part_grid) {
00882   ColPartitionGridSearch search(part_grid);
00883   ColPartition* part;
00884   // Search right to left for any text that overlaps.
00885   search.StartVerticalSearch(box.left(), box.right(), box.top());
00886   while ((part = search.NextVerticalSearch(false)) != NULL) {
00887     if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00888       const TBOX& part_box(part->bounding_box());
00889       if (part_box.x_gap(box) < 0) {
00890         if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
00891           top_limit = part_box.bottom();
00892         break;
00893       }
00894     }
00895   }
00896   if (part != NULL) {
00897     // Search for the nearest text up to the one we already found.
00898     TBOX search_box(box.left(), box.top(), box.right(), top_limit);
00899     search.StartRectSearch(search_box);
00900     while ((part = search.NextRectSearch()) != NULL) {
00901       if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
00902         const TBOX& part_box(part->bounding_box());
00903         if (part_box.x_gap(box) < 0) {
00904           if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
00905             top_limit = part_box.bottom();
00906         }
00907       }
00908     }
00909   }
00910   return top_limit;
00911 }
00912 
00913 // Expands the image box in the given direction until it hits text,
00914 // limiting the expansion to the given limit box, returning the result
00915 // in the expanded box, and
00916 // returning the increase in area resulting from the expansion.
00917 static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
00918                           const TBOX& limit_box,
00919                           ColPartitionGrid* part_grid, TBOX* expanded_box) {
00920   *expanded_box = im_box;
00921   switch (dir) {
00922     case BND_LEFT:
00923       expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
00924                                              part_grid));
00925       break;
00926     case BND_RIGHT:
00927       expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
00928                                                part_grid));
00929       break;
00930     case BND_ABOVE:
00931       expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
00932       break;
00933     case BND_BELOW:
00934       expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
00935                                                  part_grid));
00936       break;
00937     default:
00938       return 0;
00939   }
00940   return expanded_box->area() - im_box.area();
00941 }
00942 
00943 // Expands the image partition into any non-text until it touches text.
00944 // The expansion proceeds in the order of increasing increase in area
00945 // as a heuristic to find the best rectangle by expanding in the most
00946 // constrained direction first.
00947 static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
00948   bool dunnit[BND_COUNT];
00949   memset(dunnit, 0, sizeof(dunnit));
00950   TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
00951                  part_grid->tright().x(), part_grid->tright().y());
00952   TBOX text_box(*im_box);
00953   for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
00954     // Find the direction with least area increase.
00955     int best_delta = -1;
00956     BlobNeighbourDir best_dir = BND_LEFT;
00957     TBOX expanded_boxes[BND_COUNT];
00958     for (int dir = 0; dir < BND_COUNT; ++dir) {
00959       BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
00960       if (!dunnit[bnd]) {
00961         TBOX expanded_box;
00962         int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
00963                                         &expanded_boxes[bnd]);
00964         if (best_delta < 0 || area_delta < best_delta) {
00965           best_delta = area_delta;
00966           best_dir = bnd;
00967         }
00968       }
00969     }
00970     // Run the best and remember the direction.
00971     dunnit[best_dir] = true;
00972     text_box = expanded_boxes[best_dir];
00973   }
00974   *im_box = text_box;
00975 }
00976 
00977 // Helper deletes the given partition but first marks up all the blobs as
00978 // noise, so they get deleted later, and disowns them.
00979 // If the initial type of the partition is image, then it actually deletes
00980 // the blobs, as the partition owns them in that case.
00981 static void DeletePartition(ColPartition* part) {
00982   BlobRegionType type = part->blob_type();
00983   if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
00984     // The partition owns the boxes of these types, so just delete them.
00985     part->DeleteBoxes();  // From a previous iteration.
00986   } else {
00987     // Once marked, the blobs will be swept up by TidyBlobs.
00988     part->set_flow(BTFT_NONTEXT);
00989     part->set_blob_type(BRT_NOISE);
00990     part->SetBlobTypes();
00991     part->DisownBoxes();  // Created before FindImagePartitions.
00992   }
00993   delete part;
00994 }
00995 
00996 // The meat of joining fragmented images and consuming ColPartitions of
00997 // uncertain type.
00998 // *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
00999 // expanded to consume overlapping and nearby ColPartitions of uncertain type
01000 // and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
01001 // max_image_box. *part_ptr is NOT in the part_grid.
01002 // rectsearch is already constructed on the part_grid, and is used for
01003 // searching for overlapping and nearby ColPartitions.
01004 // ExpandImageIntoParts is called iteratively until it returns false. Each
01005 // time it absorbs the nearest non-contained candidate, and everything that
01006 // is fully contained within part_ptr's bounding box.
01007 // TODO(rays) what if it just eats everything inside max_image_box in one go?
01008 static bool ExpandImageIntoParts(const TBOX& max_image_box,
01009                                  ColPartitionGridSearch* rectsearch,
01010                                  ColPartitionGrid* part_grid,
01011                                  ColPartition** part_ptr) {
01012   ColPartition* image_part = *part_ptr;
01013   TBOX im_part_box = image_part->bounding_box();
01014   if (textord_tabfind_show_images > 1) {
01015     tprintf("Searching for merge with image part:");
01016     im_part_box.print();
01017     tprintf("Text box=");
01018     max_image_box.print();
01019   }
01020   rectsearch->StartRectSearch(max_image_box);
01021   ColPartition* part;
01022   ColPartition* best_part = NULL;
01023   int best_dist = 0;
01024   while ((part = rectsearch->NextRectSearch()) != NULL) {
01025     if (textord_tabfind_show_images > 1) {
01026       tprintf("Considering merge with part:");
01027       part->Print();
01028       if (im_part_box.contains(part->bounding_box()))
01029         tprintf("Fully contained\n");
01030       else if (!max_image_box.contains(part->bounding_box()))
01031         tprintf("Not within text box\n");
01032       else if (part->flow() == BTFT_STRONG_CHAIN)
01033         tprintf("Too strong text\n");
01034       else
01035         tprintf("Real candidate\n");
01036     }
01037     if (part->flow() == BTFT_STRONG_CHAIN ||
01038         part->flow() == BTFT_TEXT_ON_IMAGE ||
01039         part->blob_type() == BRT_POLYIMAGE)
01040       continue;
01041     TBOX box = part->bounding_box();
01042     if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
01043       if (im_part_box.contains(box)) {
01044         // Eat it completely.
01045         rectsearch->RemoveBBox();
01046         DeletePartition(part);
01047         continue;
01048       }
01049       int x_dist = MAX(0, box.x_gap(im_part_box));
01050       int y_dist = MAX(0, box.y_gap(im_part_box));
01051       int dist = x_dist * x_dist + y_dist * y_dist;
01052       if (dist > box.area() || dist > im_part_box.area())
01053         continue;  // Not close enough.
01054       if (best_part == NULL || dist < best_dist) {
01055         // We keep the nearest qualifier, which is not necessarily the nearest.
01056         best_part = part;
01057         best_dist = dist;
01058       }
01059     }
01060   }
01061   if (best_part != NULL) {
01062     // It needs expanding. We can do it without touching text.
01063     TBOX box = best_part->bounding_box();
01064     if (textord_tabfind_show_images > 1) {
01065       tprintf("Merging image part:");
01066       im_part_box.print();
01067       tprintf("with part:");
01068       box.print();
01069     }
01070     im_part_box += box;
01071     *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
01072                                             BRT_RECTIMAGE,
01073                                             BTFT_NONTEXT);
01074     DeletePartition(image_part);
01075     part_grid->RemoveBBox(best_part);
01076     DeletePartition(best_part);
01077     rectsearch->RepositionIterator();
01078     return true;
01079   }
01080   return false;
01081 }
01082 
01083 // Helper function to compute the overlap area between the box and the
01084 // given list of partitions.
01085 static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
01086   int intersect_area = 0;
01087   ColPartition_IT part_it(part_list);
01088   // Iterate the parts and subtract intersecting area.
01089   for (part_it.mark_cycle_pt(); !part_it.cycled_list();
01090        part_it.forward()) {
01091     ColPartition* image_part = part_it.data();
01092     TBOX intersect = box.intersection(image_part->bounding_box());
01093     intersect_area += intersect.area();
01094   }
01095   return intersect_area;
01096 }
01097 
01098 // part_list is a set of ColPartitions representing a polygonal image, and
01099 // im_box is the union of the bounding boxes of all the parts in part_list.
01100 // Tests whether part is to be consumed by the polygonal image.
01101 // Returns true if part is weak text and more than half of its area is
01102 // intersected by parts from the part_list, and it is contained within im_box.
01103 static bool TestWeakIntersectedPart(const TBOX& im_box,
01104                                     ColPartition_LIST* part_list,
01105                                     ColPartition* part) {
01106   if (part->flow() < BTFT_STRONG_CHAIN) {
01107     // A weak partition intersects the box.
01108     TBOX part_box = part->bounding_box();
01109     if (im_box.contains(part_box)) {
01110       int area = part_box.area();
01111       int intersect_area = IntersectArea(part_box, part_list);
01112       if (area < 2 * intersect_area) {
01113         return true;
01114       }
01115     }
01116   }
01117   return false;
01118 }
01119 
01120 // A rectangular or polygonal image has been completed, in part_list, bounding
01121 // box in im_box. We want to eliminate weak text or other uncertain partitions
01122 // (basically anything that is not BRT_STRONG_CHAIN or better) from both the
01123 // part_grid and the big_parts list that are contained within im_box and
01124 // overlapped enough by the possibly polygonal image.
01125 static void EliminateWeakParts(const TBOX& im_box,
01126                                ColPartitionGrid* part_grid,
01127                                ColPartition_LIST* big_parts,
01128                                ColPartition_LIST* part_list) {
01129   ColPartitionGridSearch rectsearch(part_grid);
01130   ColPartition* part;
01131   rectsearch.StartRectSearch(im_box);
01132   while ((part = rectsearch.NextRectSearch()) != NULL) {
01133     if (TestWeakIntersectedPart(im_box, part_list, part)) {
01134       BlobRegionType type = part->blob_type();
01135       if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
01136         rectsearch.RemoveBBox();
01137         DeletePartition(part);
01138       } else {
01139         // The part is mostly covered, so mark it. Non-image partitions are
01140         // kept hanging around to mark the image for pass2
01141         part->set_flow(BTFT_NONTEXT);
01142         part->set_blob_type(BRT_NOISE);
01143         part->SetBlobTypes();
01144       }
01145     }
01146   }
01147   ColPartition_IT big_it(big_parts);
01148   for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
01149     part = big_it.data();
01150     if (TestWeakIntersectedPart(im_box, part_list, part)) {
01151       // Once marked, the blobs will be swept up by TidyBlobs.
01152       DeletePartition(big_it.extract());
01153     }
01154   }
01155 }
01156 
01157 // Helper scans for good text partitions overlapping the given box.
01158 // If there are no good text partitions overlapping an expanded box, then
01159 // the box is expanded, otherwise, the original box is returned.
01160 // If good text overlaps the box, true is returned.
01161 static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
01162   ColPartitionGridSearch rectsearch(part_grid);
01163   TBOX padded_box(*box);
01164   padded_box.pad(kNoisePadding, kNoisePadding);
01165   rectsearch.StartRectSearch(padded_box);
01166   ColPartition* part;
01167   bool any_text_in_padded_rect = false;
01168   while ((part = rectsearch.NextRectSearch()) != NULL) {
01169     if (part->flow() == BTFT_CHAIN ||
01170         part->flow() == BTFT_STRONG_CHAIN) {
01171       // Text intersects the box.
01172       any_text_in_padded_rect = true;
01173       TBOX part_box = part->bounding_box();
01174       if (box->overlap(part_box)) {
01175         return true;
01176       }
01177     }
01178   }
01179   if (!any_text_in_padded_rect)
01180     *box = padded_box;
01181   return false;
01182 }
01183 
01184 // Renders the boxes of image parts from the supplied list onto the image_pix,
01185 // except where they interfere with existing strong text in the part_grid,
01186 // and then deletes them.
01187 // Box coordinates are rotated by rerotate to match the image.
01188 static void MarkAndDeleteImageParts(const FCOORD& rerotate,
01189                                     ColPartitionGrid* part_grid,
01190                                     ColPartition_LIST* image_parts,
01191                                     Pix* image_pix) {
01192   if (image_pix == NULL)
01193     return;
01194   int imageheight = pixGetHeight(image_pix);
01195   ColPartition_IT part_it(image_parts);
01196   for (; !part_it.empty(); part_it.forward()) {
01197     ColPartition* part = part_it.extract();
01198     TBOX part_box = part->bounding_box();
01199     BlobRegionType type = part->blob_type();
01200     if (!ScanForOverlappingText(part_grid, &part_box) ||
01201         type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
01202       // Mark the box on the image.
01203       // All coords need to be rotated to match the image.
01204       part_box.rotate(rerotate);
01205       int left = part_box.left();
01206       int top = part_box.top();
01207       pixRasterop(image_pix, left, imageheight - top,
01208                   part_box.width(), part_box.height(), PIX_SET, NULL, 0, 0);
01209     }
01210     DeletePartition(part);
01211   }
01212 }
01213 
01214 // Locates all the image partitions in the part_grid, that were found by a
01215 // previous call to FindImagePartitions, marks them in the image_mask,
01216 // removes them from the grid, and deletes them. This makes it possble to
01217 // call FindImagePartitions again to produce less broken-up and less
01218 // overlapping image partitions.
01219 // rerotation specifies how to rotate the partition coords to match
01220 // the image_mask, since this function is used after orientation correction.
01221 void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation,
01222                                               ColPartitionGrid* part_grid,
01223                                               Pix* image_mask) {
01224   // Extract the noise parts from the grid and put them on a temporary list.
01225   ColPartition_LIST parts_list;
01226   ColPartition_IT part_it(&parts_list);
01227   ColPartitionGridSearch gsearch(part_grid);
01228   gsearch.StartFullSearch();
01229   ColPartition* part;
01230   while ((part = gsearch.NextFullSearch()) != NULL) {
01231     BlobRegionType type = part->blob_type();
01232     if (type  == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
01233       part_it.add_after_then_move(part);
01234       gsearch.RemoveBBox();
01235     }
01236   }
01237   // Render listed noise partitions to the image mask.
01238   MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
01239 }
01240 
01241 // Removes and deletes all image partitions that are too small to be worth
01242 // keeping. We have to do this as a separate phase after creating the image
01243 // partitions as the small images are needed to join the larger ones together.
01244 static void DeleteSmallImages(ColPartitionGrid* part_grid) {
01245   if (part_grid != NULL) return;
01246   ColPartitionGridSearch gsearch(part_grid);
01247   gsearch.StartFullSearch();
01248   ColPartition* part;
01249   while ((part = gsearch.NextFullSearch()) != NULL) {
01250     // Only delete rectangular images, since if it became a poly image, it
01251     // is more evidence that it is somehow important.
01252     if (part->blob_type() == BRT_RECTIMAGE) {
01253       const TBOX& part_box = part->bounding_box();
01254       if (part_box.width() < kMinImageFindSize ||
01255           part_box.height() < kMinImageFindSize) {
01256         // It is too small to keep. Just make it disappear.
01257         gsearch.RemoveBBox();
01258         DeletePartition(part);
01259       }
01260     }
01261   }
01262 }
01263 
01264 // Runs a CC analysis on the image_pix mask image, and creates
01265 // image partitions from them, cutting out strong text, and merging with
01266 // nearby image regions such that they don't interfere with text.
01267 // Rotation and rerotation specify how to rotate image coords to match
01268 // the blob and partition coords and back again.
01269 // The input/output part_grid owns all the created partitions, and
01270 // the partitions own all the fake blobs that belong in the partitions.
01271 // Since the other blobs in the other partitions will be owned by the block,
01272 // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
01273 // situation and collect the image blobs.
01274 void ImageFind::FindImagePartitions(Pix* image_pix,
01275                                    const FCOORD& rotation,
01276                                    const FCOORD& rerotation,
01277                                    TO_BLOCK* block,
01278                                    TabFind* tab_grid,
01279                                    ColPartitionGrid* part_grid,
01280                                    ColPartition_LIST* big_parts) {
01281   int imageheight = pixGetHeight(image_pix);
01282   Boxa* boxa;
01283   Pixa* pixa;
01284   ConnCompAndRectangularize(image_pix, &boxa, &pixa);
01285   // Iterate the connected components in the image regions mask.
01286   int nboxes = boxaGetCount(boxa);
01287   for (int i = 0; i < nboxes; ++i) {
01288     l_int32 x, y, width, height;
01289     boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
01290     Pix* pix = pixaGetPix(pixa, i, L_CLONE);
01291     TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
01292     im_box.rotate(rotation);  // Now matches all partitions and blobs.
01293     ColPartitionGridSearch rectsearch(part_grid);
01294     rectsearch.SetUniqueMode(true);
01295     ColPartition_LIST part_list;
01296     DivideImageIntoParts(im_box, rotation, rerotation, pix,
01297                          &rectsearch, &part_list);
01298     if (textord_tabfind_show_images) {
01299       pixWrite("junkimagecomponent.png", pix, IFF_PNG);
01300       tprintf("Component has %d parts\n", part_list.length());
01301     }
01302     pixDestroy(&pix);
01303     if (!part_list.empty()) {
01304       ColPartition_IT part_it(&part_list);
01305       if (part_list.singleton()) {
01306         // We didn't have to chop it into a polygon to fit around text, so
01307         // try expanding it to merge fragmented image parts, as long as it
01308         // doesn't touch strong text.
01309         ColPartition* part = part_it.extract();
01310         TBOX text_box(im_box);
01311         MaximalImageBoundingBox(part_grid, &text_box);
01312         while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
01313         part_it.set_to_list(&part_list);
01314         part_it.add_after_then_move(part);
01315         im_box = part->bounding_box();
01316       }
01317       EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
01318       // Iterate the part_list and put the parts into the grid.
01319       for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
01320         ColPartition* image_part = part_it.extract();
01321         im_box = image_part->bounding_box();
01322         part_grid->InsertBBox(true, true, image_part);
01323         if (!part_it.at_last()) {
01324           ColPartition* neighbour = part_it.data_relative(1);
01325           image_part->AddPartner(false, neighbour);
01326           neighbour->AddPartner(true, image_part);
01327         }
01328       }
01329     }
01330   }
01331   boxaDestroy(&boxa);
01332   pixaDestroy(&pixa);
01333   DeleteSmallImages(part_grid);
01334   if (textord_tabfind_show_images) {
01335     ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
01336     part_grid->DisplayBoxes(images_win_);
01337   }
01338 }
01339 
01340 
01341 }  // namespace tesseract.
01342 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines