tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/linefind.cpp
Go to the documentation of this file.
00001 
00002 // File:        linefind.cpp
00003 // Description: Class to find vertical lines in an image and create
00004 //              a corresponding list of empty blobs.
00005 // Author:      Ray Smith
00006 // Created:     Thu Mar 20 09:49:01 PDT 2008
00007 //
00008 // (C) Copyright 2008, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifdef _MSC_VER
00022 #pragma warning(disable:4244)  // Conversion warnings
00023 #endif
00024 
00025 #ifdef HAVE_CONFIG_H
00026 #include "config_auto.h"
00027 #endif
00028 
00029 #include "linefind.h"
00030 #include "alignedblob.h"
00031 #include "tabvector.h"
00032 #include "blobbox.h"
00033 #include "edgblob.h"
00034 #include "openclwrapper.h"
00035 
00036 #include "allheaders.h"
00037 
00038 namespace tesseract {
00039 
00041 const int kThinLineFraction = 20;
00043 const int kMinLineLengthFraction = 4;
00045 const int kCrackSpacing = 100;
00047 const int kLineFindGridSize = 50;
00048 // Min width of a line in pixels to be considered thick.
00049 const int kMinThickLineWidth = 12;
00050 // Max size of line residue. (The pixels that fail the long thin opening, and
00051 // therefore don't make it to the candidate line mask, but are nevertheless
00052 // part of the line.)
00053 const int kMaxLineResidue = 6;
00054 // Min length in inches of a line segment that exceeds kMinThickLineWidth in
00055 // thickness. (Such lines shouldn't break by simple image degradation.)
00056 const double kThickLengthMultiple = 0.75;
00057 // Max fraction of line box area that can be occupied by non-line pixels.
00058 const double kMaxNonLineDensity = 0.25;
00059 // Max height of a music stave in inches.
00060 const double kMaxStaveHeight = 1.0;
00061 // Minimum fraction of pixels in a music rectangle connected to the staves.
00062 const double kMinMusicPixelFraction = 0.75;
00063 
00064 // Erases the unused blobs from the line_pix image, taking into account
00065 // whether this was a horizontal or vertical line set.
00066 static void RemoveUnusedLineSegments(bool horizontal_lines,
00067                                      BLOBNBOX_LIST* line_bblobs,
00068                                      Pix* line_pix) {
00069   int height = pixGetHeight(line_pix);
00070   BLOBNBOX_IT bbox_it(line_bblobs);
00071   for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
00072     BLOBNBOX* blob = bbox_it.data();
00073     if (blob->left_tab_type() != TT_VLINE) {
00074       const TBOX& box = blob->bounding_box();
00075       Box* pixbox = NULL;
00076       if (horizontal_lines) {
00077         // Horizontal lines are in tess format and also have x and y flipped
00078         // (to use FindVerticalAlignment) so we have to flip x and y and then
00079         // convert to Leptonica by height - flipped x (ie the right edge).
00080         // See GetLineBoxes for more explanation.
00081         pixbox = boxCreate(box.bottom(), height - box.right(),
00082                            box.height(), box.width());
00083       } else {
00084         // For vertical lines, just flip upside-down to convert to Leptonica.
00085         // The y position of the box in Leptonica terms is the distance from
00086         // the top of the image to the top of the box.
00087         pixbox = boxCreate(box.left(), height - box.top(),
00088                            box.width(), box.height());
00089       }
00090       pixClearInRect(line_pix, pixbox);
00091       boxDestroy(&pixbox);
00092     }
00093   }
00094 }
00095 
00096 // Helper subtracts the line_pix image from the src_pix, and removes residue
00097 // as well by removing components that touch the line, but are not in the
00098 // non_line_pix mask. It is assumed that the non_line_pix mask has already
00099 // been prepared to required accuracy.
00100 static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
00101                                     int resolution, Pix* src_pix) {
00102   // First remove the lines themselves.
00103   pixSubtract(src_pix, src_pix, line_pix);
00104   // Subtract the non-lines from the image to get the residue.
00105   Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix);
00106   // Dilate the lines so they touch the residue.
00107   Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3);
00108   // Seed fill the fat lines to get all the residue.
00109   pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
00110   // Subtract the residue from the original image.
00111   pixSubtract(src_pix, src_pix, fat_line_pix);
00112   pixDestroy(&fat_line_pix);
00113   pixDestroy(&residue_pix);
00114 }
00115 
00116 // Returns the maximum strokewidth in the given binary image by doubling
00117 // the maximum of the distance function.
00118 static int MaxStrokeWidth(Pix* pix) {
00119   Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
00120   int width = pixGetWidth(dist_pix);
00121   int height = pixGetHeight(dist_pix);
00122   int wpl = pixGetWpl(dist_pix);
00123   l_uint32* data = pixGetData(dist_pix);
00124   // Find the maximum value in the distance image.
00125   int max_dist = 0;
00126   for (int y = 0; y < height; ++y) {
00127     for (int x = 0; x < width; ++x) {
00128       int pixel = GET_DATA_BYTE(data, x);
00129       if (pixel > max_dist)
00130         max_dist = pixel;
00131     }
00132     data += wpl;
00133   }
00134   pixDestroy(&dist_pix);
00135   return max_dist * 2;
00136 }
00137 
00138 // Returns the number of components in the intersection_pix touched by line_box.
00139 static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
00140   if (intersection_pix == NULL) return 0;
00141   Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL);
00142   Boxa* boxa = pixConnComp(rect_pix, NULL, 8);
00143   pixDestroy(&rect_pix);
00144   if (boxa == NULL) return false;
00145   int result = boxaGetCount(boxa);
00146   boxaDestroy(&boxa);
00147   return result;
00148 }
00149 
00150 // Returns the number of black pixels found in the box made by adding the line
00151 // width to both sides of the line bounding box. (Increasing the smallest
00152 // dimension of the bounding box.)
00153 static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
00154                                      Pix* nonline_pix) {
00155   l_int32 x, y, box_width, box_height;
00156   boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
00157   if (box_width > box_height) {
00158     // horizontal line.
00159     int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width);
00160     y = MAX(0, y - line_width);
00161     box_height = bottom - y;
00162   } else {
00163     // Vertical line.
00164     int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width);
00165     x = MAX(0, x - line_width);
00166     box_width = right - x;
00167   }
00168   Box* box = boxCreate(x, y, box_width, box_height);
00169   Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL);
00170   boxDestroy(&box);
00171   l_int32 result;
00172   pixCountPixels(rect_pix, &result, NULL);
00173   pixDestroy(&rect_pix);
00174   return result;
00175 }
00176 
00177 // Helper erases false-positive line segments from the input/output line_pix.
00178 // 1. Since thick lines shouldn't really break up, we can eliminate some false
00179 //    positives by marking segments that are at least kMinThickLineWidth
00180 //    thickness, yet have a length less than min_thick_length.
00181 // 2. Lines that don't have at least 2 intersections with other lines and have
00182 //    a lot of neighbouring non-lines are probably not lines (perhaps arabic
00183 //    or Hindi words, or underlines.)
00184 // Bad line components are erased from line_pix.
00185 // Returns the number of remaining connected components.
00186 static int FilterFalsePositives(int resolution, Pix* nonline_pix,
00187                                 Pix* intersection_pix, Pix* line_pix) {
00188   int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
00189   Pixa* pixa = NULL;
00190   Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
00191   // Iterate over the boxes to remove false positives.
00192   int nboxes = boxaGetCount(boxa);
00193   int remaining_boxes = nboxes;
00194   for (int i = 0; i < nboxes; ++i) {
00195     Box* box = boxaGetBox(boxa, i, L_CLONE);
00196     l_int32 x, y, box_width, box_height;
00197     boxGetGeometry(box, &x, &y, &box_width, &box_height);
00198     Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
00199     int max_width = MaxStrokeWidth(comp_pix);
00200     pixDestroy(&comp_pix);
00201     bool bad_line = false;
00202     // If the length is too short to stand-alone as a line, and the box width
00203     // is thick enough, and the stroke width is thick enough it is bad.
00204     if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
00205         box_width < min_thick_length && box_height < min_thick_length &&
00206         max_width > kMinThickLineWidth) {
00207       // Too thick for the length.
00208       bad_line = true;
00209     }
00210     if (!bad_line &&
00211         (intersection_pix == NULL ||
00212         NumTouchingIntersections(box, intersection_pix) < 2)) {
00213       // Test non-line density near the line.
00214       int nonline_count = CountPixelsAdjacentToLine(max_width, box,
00215                                                     nonline_pix);
00216       if (nonline_count > box_height * box_width * kMaxNonLineDensity)
00217         bad_line = true;
00218     }
00219     if (bad_line) {
00220       // Not a good line.
00221       pixClearInRect(line_pix, box);
00222       --remaining_boxes;
00223     }
00224     boxDestroy(&box);
00225   }
00226   pixaDestroy(&pixa);
00227   boxaDestroy(&boxa);
00228   return remaining_boxes;
00229 }
00230 
00231 // Finds vertical and horizontal line objects in the given pix.
00232 // Uses the given resolution to determine size thresholds instead of any
00233 // that may be present in the pix.
00234 // The output vertical_x and vertical_y contain a sum of the output vectors,
00235 // thereby giving the mean vertical direction.
00236 // If pix_music_mask != NULL, and music is detected, a mask of the staves
00237 // and anything that is connected (bars, notes etc.) will be returned in
00238 // pix_music_mask, the mask subtracted from pix, and the lines will not
00239 // appear in v_lines or h_lines.
00240 // The output vectors are owned by the list and Frozen (cannot refit) by
00241 // having no boxes, as there is no need to refit or merge separator lines.
00242 // The detected lines are removed from the pix.
00243 void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
00244                                     int* vertical_x, int* vertical_y,
00245                                     Pix** pix_music_mask,
00246                                     TabVector_LIST* v_lines,
00247                                     TabVector_LIST* h_lines) {
00248   PERF_COUNT_START("FindAndRemoveLines")
00249   if (pix == NULL || vertical_x == NULL || vertical_y == NULL) {
00250     tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
00251     return;
00252   }
00253   Pix* pix_vline = NULL;
00254   Pix* pix_non_vline = NULL;
00255   Pix* pix_hline = NULL;
00256   Pix* pix_non_hline = NULL;
00257   Pix* pix_intersections = NULL;
00258   Pixa* pixa_display = debug ? pixaCreate(0) : NULL;
00259   GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
00260                &pix_non_hline, &pix_intersections, pix_music_mask,
00261                pixa_display);
00262   // Find lines, convert to TabVector_LIST and remove those that are used.
00263   FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
00264                       &pix_vline, pix_non_vline, pix, v_lines);
00265   if (pix_hline != NULL) {
00266     // Recompute intersections and re-filter false positive h-lines.
00267     if (pix_vline != NULL)
00268       pixAnd(pix_intersections, pix_vline, pix_hline);
00269     else
00270       pixDestroy(&pix_intersections);
00271     if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
00272                               pix_hline)) {
00273       pixDestroy(&pix_hline);
00274     }
00275   }
00276   FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
00277                       &pix_hline, pix_non_hline, pix, h_lines);
00278   if (pixa_display != NULL && pix_vline != NULL)
00279     pixaAddPix(pixa_display, pix_vline, L_CLONE);
00280   if (pixa_display != NULL && pix_hline != NULL)
00281     pixaAddPix(pixa_display, pix_hline, L_CLONE);
00282   if (pix_vline != NULL && pix_hline != NULL) {
00283     // Remove joins (intersections) where lines cross, and the residue.
00284     // Recalculate the intersections, since some lines have been deleted.
00285     pixAnd(pix_intersections, pix_vline, pix_hline);
00286     // Fatten up the intersections and seed-fill to get the intersection
00287     // residue.
00288     Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5);
00289     pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
00290     // Now remove the intersection residue.
00291     pixSubtract(pix, pix, pix_join_residue);
00292     pixDestroy(&pix_join_residue);
00293   }
00294   // Remove any detected music.
00295   if (pix_music_mask != NULL && *pix_music_mask != NULL) {
00296     if (pixa_display != NULL)
00297       pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
00298     pixSubtract(pix, pix, *pix_music_mask);
00299   }
00300   if (pixa_display != NULL)
00301     pixaAddPix(pixa_display, pix, L_CLONE);
00302 
00303   pixDestroy(&pix_vline);
00304   pixDestroy(&pix_non_vline);
00305   pixDestroy(&pix_hline);
00306   pixDestroy(&pix_non_hline);
00307   pixDestroy(&pix_intersections);
00308   if (pixa_display != NULL) {
00309 #if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1
00310     pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
00311                      "vhlinefinding.pdf");
00312 #endif
00313     pixaDestroy(&pixa_display);
00314   }
00315   PERF_COUNT_END
00316 }
00317 
00318 // Converts the Boxa array to a list of C_BLOB, getting rid of severely
00319 // overlapping outlines and those that are children of a bigger one.
00320 // The output is a list of C_BLOBs that are owned by the list.
00321 // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
00322 // bounding boxes. The Boxa is consumed and destroyed.
00323 void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
00324                                     Boxa** boxes, C_BLOB_LIST* blobs) {
00325   C_OUTLINE_LIST outlines;
00326   C_OUTLINE_IT ol_it = &outlines;
00327   // Iterate the boxes to convert to outlines.
00328   int nboxes = boxaGetCount(*boxes);
00329   for (int i = 0; i < nboxes; ++i) {
00330     l_int32 x, y, width, height;
00331     boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
00332     // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
00333     // as there is no outline, just a bounding box, but with some very
00334     // small changes to coutln.cpp, it works nicely.
00335     ICOORD top_left(x, y);
00336     ICOORD bot_right(x + width, y + height);
00337     CRACKEDGE startpt;
00338     startpt.pos = top_left;
00339     C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
00340     ol_it.add_after_then_move(outline);
00341   }
00342   // Use outlines_to_blobs to convert the outlines to blobs and find
00343   // overlapping and contained objects. The output list of blobs in the block
00344   // has all the bad ones filtered out and deleted.
00345   BLOCK block;
00346   ICOORD page_tl(0, 0);
00347   ICOORD page_br(image_width, image_height);
00348   outlines_to_blobs(&block, page_tl, page_br, &outlines);
00349   // Transfer the created blobs to the output list.
00350   C_BLOB_IT blob_it(blobs);
00351   blob_it.add_list_after(block.blob_list());
00352   // The boxes aren't needed any more.
00353   boxaDestroy(boxes);
00354 }
00355 
00356 // Finds vertical line objects in pix_vline and removes the from src_pix.
00357 // Uses the given resolution to determine size thresholds instead of any
00358 // that may be present in the pix.
00359 // The output vertical_x and vertical_y contain a sum of the output vectors,
00360 // thereby giving the mean vertical direction.
00361 // The output vectors are owned by the list and Frozen (cannot refit) by
00362 // having no boxes, as there is no need to refit or merge separator lines.
00363 // If no good lines are found, pix_vline is destroyed.
00364 // None of the input pointers may be NULL, and if *pix_vline is NULL then
00365 // the function does nothing.
00366 void LineFinder::FindAndRemoveVLines(int resolution,
00367                                      Pix* pix_intersections,
00368                                      int* vertical_x, int* vertical_y,
00369                                      Pix** pix_vline, Pix* pix_non_vline,
00370                                      Pix* src_pix, TabVector_LIST* vectors) {
00371   if (pix_vline == NULL || *pix_vline == NULL) return;
00372   C_BLOB_LIST line_cblobs;
00373   BLOBNBOX_LIST line_bblobs;
00374   GetLineBoxes(false, *pix_vline, pix_intersections,
00375                &line_cblobs, &line_bblobs);
00376   int width = pixGetWidth(src_pix);
00377   int height = pixGetHeight(src_pix);
00378   ICOORD bleft(0, 0);
00379   ICOORD tright(width, height);
00380   FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
00381   if (!vectors->empty()) {
00382     RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
00383     SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
00384     ICOORD vertical;
00385     vertical.set_with_shrink(*vertical_x, *vertical_y);
00386     TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
00387   } else {
00388     pixDestroy(pix_vline);
00389   }
00390 }
00391 
00392 // Finds horizontal line objects in pix_hline and removes them from src_pix.
00393 // Uses the given resolution to determine size thresholds instead of any
00394 // that may be present in the pix.
00395 // The output vertical_x and vertical_y contain a sum of the output vectors,
00396 // thereby giving the mean vertical direction.
00397 // The output vectors are owned by the list and Frozen (cannot refit) by
00398 // having no boxes, as there is no need to refit or merge separator lines.
00399 // If no good lines are found, pix_hline is destroyed.
00400 // None of the input pointers may be NULL, and if *pix_hline is NULL then
00401 // the function does nothing.
00402 void LineFinder::FindAndRemoveHLines(int resolution,
00403                                      Pix* pix_intersections,
00404                                      int vertical_x, int vertical_y,
00405                                      Pix** pix_hline, Pix* pix_non_hline,
00406                                      Pix* src_pix, TabVector_LIST* vectors) {
00407   if (pix_hline == NULL || *pix_hline == NULL) return;
00408   C_BLOB_LIST line_cblobs;
00409   BLOBNBOX_LIST line_bblobs;
00410   GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
00411   int width = pixGetWidth(src_pix);
00412   int height = pixGetHeight(src_pix);
00413   ICOORD bleft(0, 0);
00414   ICOORD tright(height, width);
00415   FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
00416                   vectors);
00417   if (!vectors->empty()) {
00418     RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
00419     SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
00420     ICOORD vertical;
00421     vertical.set_with_shrink(vertical_x, vertical_y);
00422     TabVector::MergeSimilarTabVectors(vertical, vectors, NULL);
00423     // Iterate the vectors to flip them. x and y were flipped for horizontal
00424     // lines, so FindLineVectors can work just with the vertical case.
00425     // See GetLineBoxes for more on the flip.
00426     TabVector_IT h_it(vectors);
00427     for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
00428       h_it.data()->XYFlip();
00429     }
00430   } else {
00431     pixDestroy(pix_hline);
00432   }
00433 }
00434 
00435 // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
00436 // are the bounds of the image on which the input line_bblobs were found.
00437 // The input line_bblobs list is const really.
00438 // The output vertical_x and vertical_y are the total of all the vectors.
00439 // The output list of TabVector makes no reference to the input BLOBNBOXes.
00440 void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
00441                                  BLOBNBOX_LIST* line_bblobs,
00442                                  int* vertical_x, int* vertical_y,
00443                                  TabVector_LIST* vectors) {
00444   BLOBNBOX_IT bbox_it(line_bblobs);
00445   int b_count = 0;
00446   // Put all the blobs into the grid to find the lines, and move the blobs
00447   // to the output lists.
00448   AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
00449   for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
00450     BLOBNBOX* bblob = bbox_it.data();
00451     bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
00452     bblob->set_left_rule(bleft.x());
00453     bblob->set_right_rule(tright.x());
00454     bblob->set_left_crossing_rule(bleft.x());
00455     bblob->set_right_crossing_rule(tright.x());
00456     blob_grid.InsertBBox(false, true, bblob);
00457     ++b_count;
00458   }
00459   if (b_count == 0)
00460     return;
00461 
00462   // Search the entire grid, looking for vertical line vectors.
00463   BlobGridSearch lsearch(&blob_grid);
00464   BLOBNBOX* bbox;
00465   TabVector_IT vector_it(vectors);
00466   *vertical_x = 0;
00467   *vertical_y = 1;
00468   lsearch.StartFullSearch();
00469   while ((bbox = lsearch.NextFullSearch()) != NULL) {
00470     if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
00471       const TBOX& box = bbox->bounding_box();
00472       if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
00473         tprintf("Finding line vector starting at bbox (%d,%d)\n",
00474                 box.left(), box.bottom());
00475       AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
00476       TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
00477                                                           vertical_x,
00478                                                           vertical_y);
00479       if (vector != NULL) {
00480         vector->Freeze();
00481         vector_it.add_to_end(vector);
00482       }
00483     }
00484   }
00485 }
00486 
00487 // Returns a Pix music mask if music is detected.
00488 // Any vertical line that has at least 5 intersections in sufficient density
00489 // is taken to be a bar. Bars are used as a seed and the entire touching
00490 // component is added to the output music mask and subtracted from the lines.
00491 // Returns NULL and does minimal work if no music is found.
00492 static Pix* FilterMusic(int resolution, Pix* pix_closed,
00493                         Pix* pix_vline, Pix* pix_hline,
00494                         l_int32* v_empty, l_int32* h_empty) {
00495   int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
00496   Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline);
00497   Boxa* boxa = pixConnComp(pix_vline, NULL, 8);
00498   // Iterate over the boxes to find music bars.
00499   int nboxes = boxaGetCount(boxa);
00500   Pix* music_mask = NULL;
00501   for (int i = 0; i < nboxes; ++i) {
00502     Box* box = boxaGetBox(boxa, i, L_CLONE);
00503     l_int32 x, y, box_width, box_height;
00504     boxGetGeometry(box, &x, &y, &box_width, &box_height);
00505     int joins = NumTouchingIntersections(box, intersection_pix);
00506     // Test for the join density being at least 5 per max_stave_height,
00507     // ie (joins-1)/box_height >= (5-1)/max_stave_height.
00508     if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
00509       // This is a music bar. Add to the mask.
00510       if (music_mask == NULL)
00511         music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
00512                                1);
00513       pixSetInRect(music_mask, box);
00514     }
00515     boxDestroy(&box);
00516   }
00517   boxaDestroy(&boxa);
00518   pixDestroy(&intersection_pix);
00519   if (music_mask != NULL) {
00520     // The mask currently contains just the bars. Use the mask as a seed
00521     // and the pix_closed as the mask for a seedfill to get all the
00522     // intersecting staves.
00523     pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
00524     // Filter out false positives. CCs in the music_mask should be the vast
00525     // majority of the pixels in their bounding boxes, as we expect just a
00526     // tiny amount of text, a few phrase marks, and crescendo etc left.
00527     Boxa* boxa = pixConnComp(music_mask, NULL, 8);
00528     // Iterate over the boxes to find music components.
00529     int nboxes = boxaGetCount(boxa);
00530     for (int i = 0; i < nboxes; ++i) {
00531       Box* box = boxaGetBox(boxa, i, L_CLONE);
00532       Pix* rect_pix = pixClipRectangle(music_mask, box, NULL);
00533       l_int32 music_pixels;
00534       pixCountPixels(rect_pix, &music_pixels, NULL);
00535       pixDestroy(&rect_pix);
00536       rect_pix = pixClipRectangle(pix_closed, box, NULL);
00537       l_int32 all_pixels;
00538       pixCountPixels(rect_pix, &all_pixels, NULL);
00539       pixDestroy(&rect_pix);
00540       if (music_pixels < kMinMusicPixelFraction * all_pixels) {
00541         // False positive. Delete from the music mask.
00542         pixClearInRect(music_mask, box);
00543       }
00544       boxDestroy(&box);
00545     }
00546     l_int32 no_remaining_music;
00547     boxaDestroy(&boxa);
00548     pixZero(music_mask, &no_remaining_music);
00549     if (no_remaining_music) {
00550       pixDestroy(&music_mask);
00551     } else {
00552       pixSubtract(pix_vline, pix_vline, music_mask);
00553       pixSubtract(pix_hline, pix_hline, music_mask);
00554       // We may have deleted all the lines
00555       pixZero(pix_vline, v_empty);
00556       pixZero(pix_hline, h_empty);
00557     }
00558   }
00559   return music_mask;
00560 }
00561 
00562 // Most of the heavy lifting of line finding. Given src_pix and its separate
00563 // resolution, returns image masks:
00564 // pix_vline           candidate vertical lines.
00565 // pix_non_vline       pixels that didn't look like vertical lines.
00566 // pix_hline           candidate horizontal lines.
00567 // pix_non_hline       pixels that didn't look like horizontal lines.
00568 // pix_intersections   pixels where vertical and horizontal lines meet.
00569 // pix_music_mask      candidate music staves.
00570 // This function promises to initialize all the output (2nd level) pointers,
00571 // but any of the returns that are empty will be NULL on output.
00572 // None of the input (1st level) pointers may be NULL except pix_music_mask,
00573 // which will disable music detection, and pixa_display.
00574 void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
00575                               Pix** pix_vline, Pix** pix_non_vline,
00576                               Pix** pix_hline, Pix** pix_non_hline,
00577                               Pix** pix_intersections, Pix** pix_music_mask,
00578                               Pixa* pixa_display) {
00579   Pix* pix_closed = NULL;
00580   Pix* pix_hollow = NULL;
00581 
00582   int max_line_width = resolution / kThinLineFraction;
00583   int min_line_length = resolution / kMinLineLengthFraction;
00584   if (pixa_display != NULL) {
00585     tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
00586             resolution, max_line_width, min_line_length);
00587   }
00588   int closing_brick = max_line_width / 3;
00589 
00590   PERF_COUNT_START("GetLineMasksMorph")
00591 // only use opencl if compiled w/ OpenCL and selected device is opencl
00592 #ifdef USE_OPENCL
00593   if (OpenclDevice::selectedDeviceIsOpenCL()) {
00594     // OpenCL pixGetLines Operation
00595     int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix),
00596                                                         pixGetHeight(src_pix),
00597                                                         src_pix);
00598     bool getpixclosed = pix_music_mask != NULL ? true : false;
00599     OpenclDevice::pixGetLinesCL(NULL, src_pix, pix_vline, pix_hline,
00600                                 &pix_closed, getpixclosed, closing_brick,
00601                                 closing_brick, max_line_width, max_line_width,
00602                                 min_line_length, min_line_length);
00603   } else {
00604 #endif
00605   // Close up small holes, making it less likely that false alarms are found
00606   // in thickened text (as it will become more solid) and also smoothing over
00607   // some line breaks and nicks in the edges of the lines.
00608   pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick);
00609   if (pixa_display != NULL)
00610     pixaAddPix(pixa_display, pix_closed, L_CLONE);
00611   // Open up with a big box to detect solid areas, which can then be subtracted.
00612   // This is very generous and will leave in even quite wide lines.
00613   Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width,
00614                                 max_line_width);
00615   if (pixa_display != NULL)
00616     pixaAddPix(pixa_display, pix_solid, L_CLONE);
00617   pix_hollow = pixSubtract(NULL, pix_closed, pix_solid);
00618 
00619   pixDestroy(&pix_solid);
00620 
00621   // Now open up in both directions independently to find lines of at least
00622   // 1 inch/kMinLineLengthFraction in length.
00623   if (pixa_display != NULL)
00624     pixaAddPix(pixa_display, pix_hollow, L_CLONE);
00625   *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length);
00626   *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1);
00627 
00628   pixDestroy(&pix_hollow);
00629 #ifdef USE_OPENCL
00630   }
00631 #endif
00632   PERF_COUNT_END
00633 
00634   // Lines are sufficiently rare, that it is worth checking for a zero image.
00635   l_int32 v_empty = 0;
00636   l_int32 h_empty = 0;
00637   pixZero(*pix_vline, &v_empty);
00638   pixZero(*pix_hline, &h_empty);
00639   if (pix_music_mask != NULL) {
00640     if (!v_empty && !h_empty) {
00641       *pix_music_mask = FilterMusic(resolution, pix_closed,
00642                                     *pix_vline, *pix_hline,
00643                                     &v_empty, &h_empty);
00644     } else {
00645       *pix_music_mask = NULL;
00646     }
00647   }
00648   pixDestroy(&pix_closed);
00649   Pix* pix_nonlines = NULL;
00650   *pix_intersections = NULL;
00651   Pix* extra_non_hlines = NULL;
00652   if (!v_empty) {
00653     // Subtract both line candidates from the source to get definite non-lines.
00654     pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline);
00655     if (!h_empty) {
00656       pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
00657       // Intersections are a useful indicator for likelihood of being a line.
00658       *pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline);
00659       // Candidate vlines are not hlines (apart from the intersections)
00660       // and vice versa.
00661       extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections);
00662     }
00663     *pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1);
00664     pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
00665     if (!h_empty) {
00666       // Candidate hlines are not vlines.
00667       pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
00668       pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
00669     }
00670     if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
00671                               *pix_vline))
00672       pixDestroy(pix_vline);  // No candidates left.
00673   } else {
00674     // No vertical lines.
00675     pixDestroy(pix_vline);
00676     *pix_non_vline = NULL;
00677     if (!h_empty) {
00678       pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline);
00679     }
00680   }
00681   if (h_empty) {
00682     pixDestroy(pix_hline);
00683     *pix_non_hline = NULL;
00684     if (v_empty) {
00685       return;
00686     }
00687   } else {
00688     *pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue);
00689     pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
00690     if (extra_non_hlines != NULL) {
00691       pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
00692       pixDestroy(&extra_non_hlines);
00693     }
00694     if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
00695                               *pix_hline))
00696       pixDestroy(pix_hline);  // No candidates left.
00697   }
00698   if (pixa_display != NULL) {
00699     if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
00700     if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
00701     if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
00702     if (*pix_non_vline != NULL)
00703       pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
00704     if (*pix_non_hline != NULL)
00705       pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
00706     if (*pix_intersections != NULL)
00707       pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
00708     if (pix_music_mask != NULL && *pix_music_mask != NULL)
00709       pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
00710   }
00711   pixDestroy(&pix_nonlines);
00712 }
00713 
00714 // Returns a list of boxes corresponding to the candidate line segments. Sets
00715 // the line_crossings member of the boxes so we can later determin the number
00716 // of intersections touched by a full line.
00717 void LineFinder::GetLineBoxes(bool horizontal_lines,
00718                               Pix* pix_lines, Pix* pix_intersections,
00719                               C_BLOB_LIST* line_cblobs,
00720                               BLOBNBOX_LIST* line_bblobs) {
00721   // Put a single pixel crack in every line at an arbitrary spacing,
00722   // so they break up and the bounding boxes can be used to get the
00723   // direction accurately enough without needing outlines.
00724   int wpl = pixGetWpl(pix_lines);
00725   int width = pixGetWidth(pix_lines);
00726   int height = pixGetHeight(pix_lines);
00727   l_uint32* data = pixGetData(pix_lines);
00728   if (horizontal_lines) {
00729     for (int y = 0; y < height; ++y, data += wpl) {
00730       for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
00731         CLEAR_DATA_BIT(data, x);
00732       }
00733     }
00734   } else {
00735     for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
00736       memset(data + wpl * y, 0, wpl * sizeof(*data));
00737     }
00738   }
00739   // Get the individual connected components
00740   Boxa* boxa = pixConnComp(pix_lines, NULL, 8);
00741   ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
00742   // Make the BLOBNBOXes from the C_BLOBs.
00743   C_BLOB_IT blob_it(line_cblobs);
00744   BLOBNBOX_IT bbox_it(line_bblobs);
00745   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00746     C_BLOB* cblob = blob_it.data();
00747     BLOBNBOX* bblob = new BLOBNBOX(cblob);
00748     bbox_it.add_to_end(bblob);
00749     // Determine whether the line segment touches two intersections.
00750     const TBOX& bbox = bblob->bounding_box();
00751     Box* box = boxCreate(bbox.left(), bbox.bottom(),
00752                          bbox.width(), bbox.height());
00753     bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
00754     boxDestroy(&box);
00755     // Transform the bounding box prior to finding lines. To save writing
00756     // two line finders, flip x and y for horizontal lines and re-use the
00757     // tab-stop detection code. For vertical lines we still have to flip the
00758     // y-coordinates to switch from leptonica coords to tesseract coords.
00759     if (horizontal_lines) {
00760       // Note that we have Leptonica coords stored in a Tesseract box, so that
00761       // bbox.bottom(), being the MIN y coord, is actually the top, so to get
00762       // back to Leptonica coords in RemoveUnusedLineSegments, we have to
00763       // use height - box.right() as the top, which looks very odd.
00764       TBOX new_box(height - bbox.top(), bbox.left(),
00765                    height - bbox.bottom(), bbox.right());
00766       bblob->set_bounding_box(new_box);
00767     } else {
00768       TBOX new_box(bbox.left(), height - bbox.top(),
00769                    bbox.right(), height - bbox.bottom());
00770       bblob->set_bounding_box(new_box);
00771     }
00772   }
00773 }
00774 
00775 }  // namespace tesseract.
00776 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines