tesseract
3.03
|
00001 00002 // File: linefind.cpp 00003 // Description: Class to find vertical lines in an image and create 00004 // a corresponding list of empty blobs. 00005 // Author: Ray Smith 00006 // Created: Thu Mar 20 09:49:01 PDT 2008 00007 // 00008 // (C) Copyright 2008, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifdef _MSC_VER 00022 #pragma warning(disable:4244) // Conversion warnings 00023 #endif 00024 00025 #ifdef HAVE_CONFIG_H 00026 #include "config_auto.h" 00027 #endif 00028 00029 #include "linefind.h" 00030 #include "alignedblob.h" 00031 #include "tabvector.h" 00032 #include "blobbox.h" 00033 #include "edgblob.h" 00034 #include "openclwrapper.h" 00035 00036 #include "allheaders.h" 00037 00038 namespace tesseract { 00039 00041 const int kThinLineFraction = 20; 00043 const int kMinLineLengthFraction = 4; 00045 const int kCrackSpacing = 100; 00047 const int kLineFindGridSize = 50; 00048 // Min width of a line in pixels to be considered thick. 00049 const int kMinThickLineWidth = 12; 00050 // Max size of line residue. (The pixels that fail the long thin opening, and 00051 // therefore don't make it to the candidate line mask, but are nevertheless 00052 // part of the line.) 00053 const int kMaxLineResidue = 6; 00054 // Min length in inches of a line segment that exceeds kMinThickLineWidth in 00055 // thickness. (Such lines shouldn't break by simple image degradation.) 00056 const double kThickLengthMultiple = 0.75; 00057 // Max fraction of line box area that can be occupied by non-line pixels. 00058 const double kMaxNonLineDensity = 0.25; 00059 // Max height of a music stave in inches. 00060 const double kMaxStaveHeight = 1.0; 00061 // Minimum fraction of pixels in a music rectangle connected to the staves. 00062 const double kMinMusicPixelFraction = 0.75; 00063 00064 // Erases the unused blobs from the line_pix image, taking into account 00065 // whether this was a horizontal or vertical line set. 00066 static void RemoveUnusedLineSegments(bool horizontal_lines, 00067 BLOBNBOX_LIST* line_bblobs, 00068 Pix* line_pix) { 00069 int height = pixGetHeight(line_pix); 00070 BLOBNBOX_IT bbox_it(line_bblobs); 00071 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { 00072 BLOBNBOX* blob = bbox_it.data(); 00073 if (blob->left_tab_type() != TT_VLINE) { 00074 const TBOX& box = blob->bounding_box(); 00075 Box* pixbox = NULL; 00076 if (horizontal_lines) { 00077 // Horizontal lines are in tess format and also have x and y flipped 00078 // (to use FindVerticalAlignment) so we have to flip x and y and then 00079 // convert to Leptonica by height - flipped x (ie the right edge). 00080 // See GetLineBoxes for more explanation. 00081 pixbox = boxCreate(box.bottom(), height - box.right(), 00082 box.height(), box.width()); 00083 } else { 00084 // For vertical lines, just flip upside-down to convert to Leptonica. 00085 // The y position of the box in Leptonica terms is the distance from 00086 // the top of the image to the top of the box. 00087 pixbox = boxCreate(box.left(), height - box.top(), 00088 box.width(), box.height()); 00089 } 00090 pixClearInRect(line_pix, pixbox); 00091 boxDestroy(&pixbox); 00092 } 00093 } 00094 } 00095 00096 // Helper subtracts the line_pix image from the src_pix, and removes residue 00097 // as well by removing components that touch the line, but are not in the 00098 // non_line_pix mask. It is assumed that the non_line_pix mask has already 00099 // been prepared to required accuracy. 00100 static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix, 00101 int resolution, Pix* src_pix) { 00102 // First remove the lines themselves. 00103 pixSubtract(src_pix, src_pix, line_pix); 00104 // Subtract the non-lines from the image to get the residue. 00105 Pix* residue_pix = pixSubtract(NULL, src_pix, non_line_pix); 00106 // Dilate the lines so they touch the residue. 00107 Pix* fat_line_pix = pixDilateBrick(NULL, line_pix, 3, 3); 00108 // Seed fill the fat lines to get all the residue. 00109 pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); 00110 // Subtract the residue from the original image. 00111 pixSubtract(src_pix, src_pix, fat_line_pix); 00112 pixDestroy(&fat_line_pix); 00113 pixDestroy(&residue_pix); 00114 } 00115 00116 // Returns the maximum strokewidth in the given binary image by doubling 00117 // the maximum of the distance function. 00118 static int MaxStrokeWidth(Pix* pix) { 00119 Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); 00120 int width = pixGetWidth(dist_pix); 00121 int height = pixGetHeight(dist_pix); 00122 int wpl = pixGetWpl(dist_pix); 00123 l_uint32* data = pixGetData(dist_pix); 00124 // Find the maximum value in the distance image. 00125 int max_dist = 0; 00126 for (int y = 0; y < height; ++y) { 00127 for (int x = 0; x < width; ++x) { 00128 int pixel = GET_DATA_BYTE(data, x); 00129 if (pixel > max_dist) 00130 max_dist = pixel; 00131 } 00132 data += wpl; 00133 } 00134 pixDestroy(&dist_pix); 00135 return max_dist * 2; 00136 } 00137 00138 // Returns the number of components in the intersection_pix touched by line_box. 00139 static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) { 00140 if (intersection_pix == NULL) return 0; 00141 Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, NULL); 00142 Boxa* boxa = pixConnComp(rect_pix, NULL, 8); 00143 pixDestroy(&rect_pix); 00144 if (boxa == NULL) return false; 00145 int result = boxaGetCount(boxa); 00146 boxaDestroy(&boxa); 00147 return result; 00148 } 00149 00150 // Returns the number of black pixels found in the box made by adding the line 00151 // width to both sides of the line bounding box. (Increasing the smallest 00152 // dimension of the bounding box.) 00153 static int CountPixelsAdjacentToLine(int line_width, Box* line_box, 00154 Pix* nonline_pix) { 00155 l_int32 x, y, box_width, box_height; 00156 boxGetGeometry(line_box, &x, &y, &box_width, &box_height); 00157 if (box_width > box_height) { 00158 // horizontal line. 00159 int bottom = MIN(pixGetHeight(nonline_pix), y + box_height + line_width); 00160 y = MAX(0, y - line_width); 00161 box_height = bottom - y; 00162 } else { 00163 // Vertical line. 00164 int right = MIN(pixGetWidth(nonline_pix), x + box_width + line_width); 00165 x = MAX(0, x - line_width); 00166 box_width = right - x; 00167 } 00168 Box* box = boxCreate(x, y, box_width, box_height); 00169 Pix* rect_pix = pixClipRectangle(nonline_pix, box, NULL); 00170 boxDestroy(&box); 00171 l_int32 result; 00172 pixCountPixels(rect_pix, &result, NULL); 00173 pixDestroy(&rect_pix); 00174 return result; 00175 } 00176 00177 // Helper erases false-positive line segments from the input/output line_pix. 00178 // 1. Since thick lines shouldn't really break up, we can eliminate some false 00179 // positives by marking segments that are at least kMinThickLineWidth 00180 // thickness, yet have a length less than min_thick_length. 00181 // 2. Lines that don't have at least 2 intersections with other lines and have 00182 // a lot of neighbouring non-lines are probably not lines (perhaps arabic 00183 // or Hindi words, or underlines.) 00184 // Bad line components are erased from line_pix. 00185 // Returns the number of remaining connected components. 00186 static int FilterFalsePositives(int resolution, Pix* nonline_pix, 00187 Pix* intersection_pix, Pix* line_pix) { 00188 int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); 00189 Pixa* pixa = NULL; 00190 Boxa* boxa = pixConnComp(line_pix, &pixa, 8); 00191 // Iterate over the boxes to remove false positives. 00192 int nboxes = boxaGetCount(boxa); 00193 int remaining_boxes = nboxes; 00194 for (int i = 0; i < nboxes; ++i) { 00195 Box* box = boxaGetBox(boxa, i, L_CLONE); 00196 l_int32 x, y, box_width, box_height; 00197 boxGetGeometry(box, &x, &y, &box_width, &box_height); 00198 Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); 00199 int max_width = MaxStrokeWidth(comp_pix); 00200 pixDestroy(&comp_pix); 00201 bool bad_line = false; 00202 // If the length is too short to stand-alone as a line, and the box width 00203 // is thick enough, and the stroke width is thick enough it is bad. 00204 if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && 00205 box_width < min_thick_length && box_height < min_thick_length && 00206 max_width > kMinThickLineWidth) { 00207 // Too thick for the length. 00208 bad_line = true; 00209 } 00210 if (!bad_line && 00211 (intersection_pix == NULL || 00212 NumTouchingIntersections(box, intersection_pix) < 2)) { 00213 // Test non-line density near the line. 00214 int nonline_count = CountPixelsAdjacentToLine(max_width, box, 00215 nonline_pix); 00216 if (nonline_count > box_height * box_width * kMaxNonLineDensity) 00217 bad_line = true; 00218 } 00219 if (bad_line) { 00220 // Not a good line. 00221 pixClearInRect(line_pix, box); 00222 --remaining_boxes; 00223 } 00224 boxDestroy(&box); 00225 } 00226 pixaDestroy(&pixa); 00227 boxaDestroy(&boxa); 00228 return remaining_boxes; 00229 } 00230 00231 // Finds vertical and horizontal line objects in the given pix. 00232 // Uses the given resolution to determine size thresholds instead of any 00233 // that may be present in the pix. 00234 // The output vertical_x and vertical_y contain a sum of the output vectors, 00235 // thereby giving the mean vertical direction. 00236 // If pix_music_mask != NULL, and music is detected, a mask of the staves 00237 // and anything that is connected (bars, notes etc.) will be returned in 00238 // pix_music_mask, the mask subtracted from pix, and the lines will not 00239 // appear in v_lines or h_lines. 00240 // The output vectors are owned by the list and Frozen (cannot refit) by 00241 // having no boxes, as there is no need to refit or merge separator lines. 00242 // The detected lines are removed from the pix. 00243 void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, 00244 int* vertical_x, int* vertical_y, 00245 Pix** pix_music_mask, 00246 TabVector_LIST* v_lines, 00247 TabVector_LIST* h_lines) { 00248 PERF_COUNT_START("FindAndRemoveLines") 00249 if (pix == NULL || vertical_x == NULL || vertical_y == NULL) { 00250 tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); 00251 return; 00252 } 00253 Pix* pix_vline = NULL; 00254 Pix* pix_non_vline = NULL; 00255 Pix* pix_hline = NULL; 00256 Pix* pix_non_hline = NULL; 00257 Pix* pix_intersections = NULL; 00258 Pixa* pixa_display = debug ? pixaCreate(0) : NULL; 00259 GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, 00260 &pix_non_hline, &pix_intersections, pix_music_mask, 00261 pixa_display); 00262 // Find lines, convert to TabVector_LIST and remove those that are used. 00263 FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, 00264 &pix_vline, pix_non_vline, pix, v_lines); 00265 if (pix_hline != NULL) { 00266 // Recompute intersections and re-filter false positive h-lines. 00267 if (pix_vline != NULL) 00268 pixAnd(pix_intersections, pix_vline, pix_hline); 00269 else 00270 pixDestroy(&pix_intersections); 00271 if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, 00272 pix_hline)) { 00273 pixDestroy(&pix_hline); 00274 } 00275 } 00276 FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, 00277 &pix_hline, pix_non_hline, pix, h_lines); 00278 if (pixa_display != NULL && pix_vline != NULL) 00279 pixaAddPix(pixa_display, pix_vline, L_CLONE); 00280 if (pixa_display != NULL && pix_hline != NULL) 00281 pixaAddPix(pixa_display, pix_hline, L_CLONE); 00282 if (pix_vline != NULL && pix_hline != NULL) { 00283 // Remove joins (intersections) where lines cross, and the residue. 00284 // Recalculate the intersections, since some lines have been deleted. 00285 pixAnd(pix_intersections, pix_vline, pix_hline); 00286 // Fatten up the intersections and seed-fill to get the intersection 00287 // residue. 00288 Pix* pix_join_residue = pixDilateBrick(NULL, pix_intersections, 5, 5); 00289 pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); 00290 // Now remove the intersection residue. 00291 pixSubtract(pix, pix, pix_join_residue); 00292 pixDestroy(&pix_join_residue); 00293 } 00294 // Remove any detected music. 00295 if (pix_music_mask != NULL && *pix_music_mask != NULL) { 00296 if (pixa_display != NULL) 00297 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); 00298 pixSubtract(pix, pix, *pix_music_mask); 00299 } 00300 if (pixa_display != NULL) 00301 pixaAddPix(pixa_display, pix, L_CLONE); 00302 00303 pixDestroy(&pix_vline); 00304 pixDestroy(&pix_non_vline); 00305 pixDestroy(&pix_hline); 00306 pixDestroy(&pix_non_hline); 00307 pixDestroy(&pix_intersections); 00308 if (pixa_display != NULL) { 00309 #if LIBLEPT_MINOR_VERSION >= 69 || LIBLEPT_MAJOR_VERSION > 1 00310 pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", 00311 "vhlinefinding.pdf"); 00312 #endif 00313 pixaDestroy(&pixa_display); 00314 } 00315 PERF_COUNT_END 00316 } 00317 00318 // Converts the Boxa array to a list of C_BLOB, getting rid of severely 00319 // overlapping outlines and those that are children of a bigger one. 00320 // The output is a list of C_BLOBs that are owned by the list. 00321 // The C_OUTLINEs in the C_BLOBs contain no outline data - just empty 00322 // bounding boxes. The Boxa is consumed and destroyed. 00323 void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, 00324 Boxa** boxes, C_BLOB_LIST* blobs) { 00325 C_OUTLINE_LIST outlines; 00326 C_OUTLINE_IT ol_it = &outlines; 00327 // Iterate the boxes to convert to outlines. 00328 int nboxes = boxaGetCount(*boxes); 00329 for (int i = 0; i < nboxes; ++i) { 00330 l_int32 x, y, width, height; 00331 boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); 00332 // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, 00333 // as there is no outline, just a bounding box, but with some very 00334 // small changes to coutln.cpp, it works nicely. 00335 ICOORD top_left(x, y); 00336 ICOORD bot_right(x + width, y + height); 00337 CRACKEDGE startpt; 00338 startpt.pos = top_left; 00339 C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); 00340 ol_it.add_after_then_move(outline); 00341 } 00342 // Use outlines_to_blobs to convert the outlines to blobs and find 00343 // overlapping and contained objects. The output list of blobs in the block 00344 // has all the bad ones filtered out and deleted. 00345 BLOCK block; 00346 ICOORD page_tl(0, 0); 00347 ICOORD page_br(image_width, image_height); 00348 outlines_to_blobs(&block, page_tl, page_br, &outlines); 00349 // Transfer the created blobs to the output list. 00350 C_BLOB_IT blob_it(blobs); 00351 blob_it.add_list_after(block.blob_list()); 00352 // The boxes aren't needed any more. 00353 boxaDestroy(boxes); 00354 } 00355 00356 // Finds vertical line objects in pix_vline and removes the from src_pix. 00357 // Uses the given resolution to determine size thresholds instead of any 00358 // that may be present in the pix. 00359 // The output vertical_x and vertical_y contain a sum of the output vectors, 00360 // thereby giving the mean vertical direction. 00361 // The output vectors are owned by the list and Frozen (cannot refit) by 00362 // having no boxes, as there is no need to refit or merge separator lines. 00363 // If no good lines are found, pix_vline is destroyed. 00364 // None of the input pointers may be NULL, and if *pix_vline is NULL then 00365 // the function does nothing. 00366 void LineFinder::FindAndRemoveVLines(int resolution, 00367 Pix* pix_intersections, 00368 int* vertical_x, int* vertical_y, 00369 Pix** pix_vline, Pix* pix_non_vline, 00370 Pix* src_pix, TabVector_LIST* vectors) { 00371 if (pix_vline == NULL || *pix_vline == NULL) return; 00372 C_BLOB_LIST line_cblobs; 00373 BLOBNBOX_LIST line_bblobs; 00374 GetLineBoxes(false, *pix_vline, pix_intersections, 00375 &line_cblobs, &line_bblobs); 00376 int width = pixGetWidth(src_pix); 00377 int height = pixGetHeight(src_pix); 00378 ICOORD bleft(0, 0); 00379 ICOORD tright(width, height); 00380 FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); 00381 if (!vectors->empty()) { 00382 RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline); 00383 SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix); 00384 ICOORD vertical; 00385 vertical.set_with_shrink(*vertical_x, *vertical_y); 00386 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); 00387 } else { 00388 pixDestroy(pix_vline); 00389 } 00390 } 00391 00392 // Finds horizontal line objects in pix_hline and removes them from src_pix. 00393 // Uses the given resolution to determine size thresholds instead of any 00394 // that may be present in the pix. 00395 // The output vertical_x and vertical_y contain a sum of the output vectors, 00396 // thereby giving the mean vertical direction. 00397 // The output vectors are owned by the list and Frozen (cannot refit) by 00398 // having no boxes, as there is no need to refit or merge separator lines. 00399 // If no good lines are found, pix_hline is destroyed. 00400 // None of the input pointers may be NULL, and if *pix_hline is NULL then 00401 // the function does nothing. 00402 void LineFinder::FindAndRemoveHLines(int resolution, 00403 Pix* pix_intersections, 00404 int vertical_x, int vertical_y, 00405 Pix** pix_hline, Pix* pix_non_hline, 00406 Pix* src_pix, TabVector_LIST* vectors) { 00407 if (pix_hline == NULL || *pix_hline == NULL) return; 00408 C_BLOB_LIST line_cblobs; 00409 BLOBNBOX_LIST line_bblobs; 00410 GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs); 00411 int width = pixGetWidth(src_pix); 00412 int height = pixGetHeight(src_pix); 00413 ICOORD bleft(0, 0); 00414 ICOORD tright(height, width); 00415 FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, 00416 vectors); 00417 if (!vectors->empty()) { 00418 RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline); 00419 SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix); 00420 ICOORD vertical; 00421 vertical.set_with_shrink(vertical_x, vertical_y); 00422 TabVector::MergeSimilarTabVectors(vertical, vectors, NULL); 00423 // Iterate the vectors to flip them. x and y were flipped for horizontal 00424 // lines, so FindLineVectors can work just with the vertical case. 00425 // See GetLineBoxes for more on the flip. 00426 TabVector_IT h_it(vectors); 00427 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { 00428 h_it.data()->XYFlip(); 00429 } 00430 } else { 00431 pixDestroy(pix_hline); 00432 } 00433 } 00434 00435 // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright 00436 // are the bounds of the image on which the input line_bblobs were found. 00437 // The input line_bblobs list is const really. 00438 // The output vertical_x and vertical_y are the total of all the vectors. 00439 // The output list of TabVector makes no reference to the input BLOBNBOXes. 00440 void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, 00441 BLOBNBOX_LIST* line_bblobs, 00442 int* vertical_x, int* vertical_y, 00443 TabVector_LIST* vectors) { 00444 BLOBNBOX_IT bbox_it(line_bblobs); 00445 int b_count = 0; 00446 // Put all the blobs into the grid to find the lines, and move the blobs 00447 // to the output lists. 00448 AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); 00449 for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { 00450 BLOBNBOX* bblob = bbox_it.data(); 00451 bblob->set_left_tab_type(TT_MAYBE_ALIGNED); 00452 bblob->set_left_rule(bleft.x()); 00453 bblob->set_right_rule(tright.x()); 00454 bblob->set_left_crossing_rule(bleft.x()); 00455 bblob->set_right_crossing_rule(tright.x()); 00456 blob_grid.InsertBBox(false, true, bblob); 00457 ++b_count; 00458 } 00459 if (b_count == 0) 00460 return; 00461 00462 // Search the entire grid, looking for vertical line vectors. 00463 BlobGridSearch lsearch(&blob_grid); 00464 BLOBNBOX* bbox; 00465 TabVector_IT vector_it(vectors); 00466 *vertical_x = 0; 00467 *vertical_y = 1; 00468 lsearch.StartFullSearch(); 00469 while ((bbox = lsearch.NextFullSearch()) != NULL) { 00470 if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { 00471 const TBOX& box = bbox->bounding_box(); 00472 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) 00473 tprintf("Finding line vector starting at bbox (%d,%d)\n", 00474 box.left(), box.bottom()); 00475 AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); 00476 TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, 00477 vertical_x, 00478 vertical_y); 00479 if (vector != NULL) { 00480 vector->Freeze(); 00481 vector_it.add_to_end(vector); 00482 } 00483 } 00484 } 00485 } 00486 00487 // Returns a Pix music mask if music is detected. 00488 // Any vertical line that has at least 5 intersections in sufficient density 00489 // is taken to be a bar. Bars are used as a seed and the entire touching 00490 // component is added to the output music mask and subtracted from the lines. 00491 // Returns NULL and does minimal work if no music is found. 00492 static Pix* FilterMusic(int resolution, Pix* pix_closed, 00493 Pix* pix_vline, Pix* pix_hline, 00494 l_int32* v_empty, l_int32* h_empty) { 00495 int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight); 00496 Pix* intersection_pix = pixAnd(NULL, pix_vline, pix_hline); 00497 Boxa* boxa = pixConnComp(pix_vline, NULL, 8); 00498 // Iterate over the boxes to find music bars. 00499 int nboxes = boxaGetCount(boxa); 00500 Pix* music_mask = NULL; 00501 for (int i = 0; i < nboxes; ++i) { 00502 Box* box = boxaGetBox(boxa, i, L_CLONE); 00503 l_int32 x, y, box_width, box_height; 00504 boxGetGeometry(box, &x, &y, &box_width, &box_height); 00505 int joins = NumTouchingIntersections(box, intersection_pix); 00506 // Test for the join density being at least 5 per max_stave_height, 00507 // ie (joins-1)/box_height >= (5-1)/max_stave_height. 00508 if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { 00509 // This is a music bar. Add to the mask. 00510 if (music_mask == NULL) 00511 music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), 00512 1); 00513 pixSetInRect(music_mask, box); 00514 } 00515 boxDestroy(&box); 00516 } 00517 boxaDestroy(&boxa); 00518 pixDestroy(&intersection_pix); 00519 if (music_mask != NULL) { 00520 // The mask currently contains just the bars. Use the mask as a seed 00521 // and the pix_closed as the mask for a seedfill to get all the 00522 // intersecting staves. 00523 pixSeedfillBinary(music_mask, music_mask, pix_closed, 8); 00524 // Filter out false positives. CCs in the music_mask should be the vast 00525 // majority of the pixels in their bounding boxes, as we expect just a 00526 // tiny amount of text, a few phrase marks, and crescendo etc left. 00527 Boxa* boxa = pixConnComp(music_mask, NULL, 8); 00528 // Iterate over the boxes to find music components. 00529 int nboxes = boxaGetCount(boxa); 00530 for (int i = 0; i < nboxes; ++i) { 00531 Box* box = boxaGetBox(boxa, i, L_CLONE); 00532 Pix* rect_pix = pixClipRectangle(music_mask, box, NULL); 00533 l_int32 music_pixels; 00534 pixCountPixels(rect_pix, &music_pixels, NULL); 00535 pixDestroy(&rect_pix); 00536 rect_pix = pixClipRectangle(pix_closed, box, NULL); 00537 l_int32 all_pixels; 00538 pixCountPixels(rect_pix, &all_pixels, NULL); 00539 pixDestroy(&rect_pix); 00540 if (music_pixels < kMinMusicPixelFraction * all_pixels) { 00541 // False positive. Delete from the music mask. 00542 pixClearInRect(music_mask, box); 00543 } 00544 boxDestroy(&box); 00545 } 00546 l_int32 no_remaining_music; 00547 boxaDestroy(&boxa); 00548 pixZero(music_mask, &no_remaining_music); 00549 if (no_remaining_music) { 00550 pixDestroy(&music_mask); 00551 } else { 00552 pixSubtract(pix_vline, pix_vline, music_mask); 00553 pixSubtract(pix_hline, pix_hline, music_mask); 00554 // We may have deleted all the lines 00555 pixZero(pix_vline, v_empty); 00556 pixZero(pix_hline, h_empty); 00557 } 00558 } 00559 return music_mask; 00560 } 00561 00562 // Most of the heavy lifting of line finding. Given src_pix and its separate 00563 // resolution, returns image masks: 00564 // pix_vline candidate vertical lines. 00565 // pix_non_vline pixels that didn't look like vertical lines. 00566 // pix_hline candidate horizontal lines. 00567 // pix_non_hline pixels that didn't look like horizontal lines. 00568 // pix_intersections pixels where vertical and horizontal lines meet. 00569 // pix_music_mask candidate music staves. 00570 // This function promises to initialize all the output (2nd level) pointers, 00571 // but any of the returns that are empty will be NULL on output. 00572 // None of the input (1st level) pointers may be NULL except pix_music_mask, 00573 // which will disable music detection, and pixa_display. 00574 void LineFinder::GetLineMasks(int resolution, Pix* src_pix, 00575 Pix** pix_vline, Pix** pix_non_vline, 00576 Pix** pix_hline, Pix** pix_non_hline, 00577 Pix** pix_intersections, Pix** pix_music_mask, 00578 Pixa* pixa_display) { 00579 Pix* pix_closed = NULL; 00580 Pix* pix_hollow = NULL; 00581 00582 int max_line_width = resolution / kThinLineFraction; 00583 int min_line_length = resolution / kMinLineLengthFraction; 00584 if (pixa_display != NULL) { 00585 tprintf("Image resolution = %d, max line width = %d, min length=%d\n", 00586 resolution, max_line_width, min_line_length); 00587 } 00588 int closing_brick = max_line_width / 3; 00589 00590 PERF_COUNT_START("GetLineMasksMorph") 00591 // only use opencl if compiled w/ OpenCL and selected device is opencl 00592 #ifdef USE_OPENCL 00593 if (OpenclDevice::selectedDeviceIsOpenCL()) { 00594 // OpenCL pixGetLines Operation 00595 int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), 00596 pixGetHeight(src_pix), 00597 src_pix); 00598 bool getpixclosed = pix_music_mask != NULL ? true : false; 00599 OpenclDevice::pixGetLinesCL(NULL, src_pix, pix_vline, pix_hline, 00600 &pix_closed, getpixclosed, closing_brick, 00601 closing_brick, max_line_width, max_line_width, 00602 min_line_length, min_line_length); 00603 } else { 00604 #endif 00605 // Close up small holes, making it less likely that false alarms are found 00606 // in thickened text (as it will become more solid) and also smoothing over 00607 // some line breaks and nicks in the edges of the lines. 00608 pix_closed = pixCloseBrick(NULL, src_pix, closing_brick, closing_brick); 00609 if (pixa_display != NULL) 00610 pixaAddPix(pixa_display, pix_closed, L_CLONE); 00611 // Open up with a big box to detect solid areas, which can then be subtracted. 00612 // This is very generous and will leave in even quite wide lines. 00613 Pix* pix_solid = pixOpenBrick(NULL, pix_closed, max_line_width, 00614 max_line_width); 00615 if (pixa_display != NULL) 00616 pixaAddPix(pixa_display, pix_solid, L_CLONE); 00617 pix_hollow = pixSubtract(NULL, pix_closed, pix_solid); 00618 00619 pixDestroy(&pix_solid); 00620 00621 // Now open up in both directions independently to find lines of at least 00622 // 1 inch/kMinLineLengthFraction in length. 00623 if (pixa_display != NULL) 00624 pixaAddPix(pixa_display, pix_hollow, L_CLONE); 00625 *pix_vline = pixOpenBrick(NULL, pix_hollow, 1, min_line_length); 00626 *pix_hline = pixOpenBrick(NULL, pix_hollow, min_line_length, 1); 00627 00628 pixDestroy(&pix_hollow); 00629 #ifdef USE_OPENCL 00630 } 00631 #endif 00632 PERF_COUNT_END 00633 00634 // Lines are sufficiently rare, that it is worth checking for a zero image. 00635 l_int32 v_empty = 0; 00636 l_int32 h_empty = 0; 00637 pixZero(*pix_vline, &v_empty); 00638 pixZero(*pix_hline, &h_empty); 00639 if (pix_music_mask != NULL) { 00640 if (!v_empty && !h_empty) { 00641 *pix_music_mask = FilterMusic(resolution, pix_closed, 00642 *pix_vline, *pix_hline, 00643 &v_empty, &h_empty); 00644 } else { 00645 *pix_music_mask = NULL; 00646 } 00647 } 00648 pixDestroy(&pix_closed); 00649 Pix* pix_nonlines = NULL; 00650 *pix_intersections = NULL; 00651 Pix* extra_non_hlines = NULL; 00652 if (!v_empty) { 00653 // Subtract both line candidates from the source to get definite non-lines. 00654 pix_nonlines = pixSubtract(NULL, src_pix, *pix_vline); 00655 if (!h_empty) { 00656 pixSubtract(pix_nonlines, pix_nonlines, *pix_hline); 00657 // Intersections are a useful indicator for likelihood of being a line. 00658 *pix_intersections = pixAnd(NULL, *pix_vline, *pix_hline); 00659 // Candidate vlines are not hlines (apart from the intersections) 00660 // and vice versa. 00661 extra_non_hlines = pixSubtract(NULL, *pix_vline, *pix_intersections); 00662 } 00663 *pix_non_vline = pixErodeBrick(NULL, pix_nonlines, kMaxLineResidue, 1); 00664 pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8); 00665 if (!h_empty) { 00666 // Candidate hlines are not vlines. 00667 pixOr(*pix_non_vline, *pix_non_vline, *pix_hline); 00668 pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); 00669 } 00670 if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, 00671 *pix_vline)) 00672 pixDestroy(pix_vline); // No candidates left. 00673 } else { 00674 // No vertical lines. 00675 pixDestroy(pix_vline); 00676 *pix_non_vline = NULL; 00677 if (!h_empty) { 00678 pix_nonlines = pixSubtract(NULL, src_pix, *pix_hline); 00679 } 00680 } 00681 if (h_empty) { 00682 pixDestroy(pix_hline); 00683 *pix_non_hline = NULL; 00684 if (v_empty) { 00685 return; 00686 } 00687 } else { 00688 *pix_non_hline = pixErodeBrick(NULL, pix_nonlines, 1, kMaxLineResidue); 00689 pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); 00690 if (extra_non_hlines != NULL) { 00691 pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines); 00692 pixDestroy(&extra_non_hlines); 00693 } 00694 if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, 00695 *pix_hline)) 00696 pixDestroy(pix_hline); // No candidates left. 00697 } 00698 if (pixa_display != NULL) { 00699 if (*pix_vline != NULL) pixaAddPix(pixa_display, *pix_vline, L_CLONE); 00700 if (*pix_hline != NULL) pixaAddPix(pixa_display, *pix_hline, L_CLONE); 00701 if (pix_nonlines != NULL) pixaAddPix(pixa_display, pix_nonlines, L_CLONE); 00702 if (*pix_non_vline != NULL) 00703 pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); 00704 if (*pix_non_hline != NULL) 00705 pixaAddPix(pixa_display, *pix_non_hline, L_CLONE); 00706 if (*pix_intersections != NULL) 00707 pixaAddPix(pixa_display, *pix_intersections, L_CLONE); 00708 if (pix_music_mask != NULL && *pix_music_mask != NULL) 00709 pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); 00710 } 00711 pixDestroy(&pix_nonlines); 00712 } 00713 00714 // Returns a list of boxes corresponding to the candidate line segments. Sets 00715 // the line_crossings member of the boxes so we can later determin the number 00716 // of intersections touched by a full line. 00717 void LineFinder::GetLineBoxes(bool horizontal_lines, 00718 Pix* pix_lines, Pix* pix_intersections, 00719 C_BLOB_LIST* line_cblobs, 00720 BLOBNBOX_LIST* line_bblobs) { 00721 // Put a single pixel crack in every line at an arbitrary spacing, 00722 // so they break up and the bounding boxes can be used to get the 00723 // direction accurately enough without needing outlines. 00724 int wpl = pixGetWpl(pix_lines); 00725 int width = pixGetWidth(pix_lines); 00726 int height = pixGetHeight(pix_lines); 00727 l_uint32* data = pixGetData(pix_lines); 00728 if (horizontal_lines) { 00729 for (int y = 0; y < height; ++y, data += wpl) { 00730 for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { 00731 CLEAR_DATA_BIT(data, x); 00732 } 00733 } 00734 } else { 00735 for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { 00736 memset(data + wpl * y, 0, wpl * sizeof(*data)); 00737 } 00738 } 00739 // Get the individual connected components 00740 Boxa* boxa = pixConnComp(pix_lines, NULL, 8); 00741 ConvertBoxaToBlobs(width, height, &boxa, line_cblobs); 00742 // Make the BLOBNBOXes from the C_BLOBs. 00743 C_BLOB_IT blob_it(line_cblobs); 00744 BLOBNBOX_IT bbox_it(line_bblobs); 00745 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00746 C_BLOB* cblob = blob_it.data(); 00747 BLOBNBOX* bblob = new BLOBNBOX(cblob); 00748 bbox_it.add_to_end(bblob); 00749 // Determine whether the line segment touches two intersections. 00750 const TBOX& bbox = bblob->bounding_box(); 00751 Box* box = boxCreate(bbox.left(), bbox.bottom(), 00752 bbox.width(), bbox.height()); 00753 bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); 00754 boxDestroy(&box); 00755 // Transform the bounding box prior to finding lines. To save writing 00756 // two line finders, flip x and y for horizontal lines and re-use the 00757 // tab-stop detection code. For vertical lines we still have to flip the 00758 // y-coordinates to switch from leptonica coords to tesseract coords. 00759 if (horizontal_lines) { 00760 // Note that we have Leptonica coords stored in a Tesseract box, so that 00761 // bbox.bottom(), being the MIN y coord, is actually the top, so to get 00762 // back to Leptonica coords in RemoveUnusedLineSegments, we have to 00763 // use height - box.right() as the top, which looks very odd. 00764 TBOX new_box(height - bbox.top(), bbox.left(), 00765 height - bbox.bottom(), bbox.right()); 00766 bblob->set_bounding_box(new_box); 00767 } else { 00768 TBOX new_box(bbox.left(), height - bbox.top(), 00769 bbox.right(), height - bbox.bottom()); 00770 bblob->set_bounding_box(new_box); 00771 } 00772 } 00773 } 00774 00775 } // namespace tesseract. 00776