tesseract
3.03
|
00001 00002 // File: alignedblob.cpp 00003 // Description: Subclass of BBGrid to find vertically aligned blobs. 00004 // Author: Ray Smith 00005 // Created: Fri Mar 21 15:03:01 PST 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef HAVE_CONFIG_H 00021 #include "config_auto.h" 00022 #endif 00023 00024 #include "alignedblob.h" 00025 #include "ndminx.h" 00026 00027 INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); 00028 INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); 00029 INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle"); 00030 INT_VAR(textord_testregion_top, -1, "Top edge of debug reporting rectangle"); 00031 INT_VAR(textord_testregion_right, MAX_INT32, "Right edge of debug rectangle"); 00032 INT_VAR(textord_testregion_bottom, MAX_INT32, "Bottom edge of debug rectangle"); 00033 BOOL_VAR(textord_debug_images, false, "Use greyed image background for debug"); 00034 BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); 00035 00036 namespace tesseract { 00037 00038 // Fraction of resolution used as alignment tolerance for aligned tabs. 00039 const double kAlignedFraction = 0.03125; 00040 // Fraction of resolution used as alignment tolerance for ragged tabs. 00041 const double kRaggedFraction = 2.5; 00042 // Fraction of height used as a minimum gutter gap for aligned blobs. 00043 const double kAlignedGapFraction = 0.75; 00044 // Fraction of height used as a minimum gutter gap for ragged tabs. 00045 const double kRaggedGapFraction = 1.0; 00046 // Constant number of pixels used as alignment tolerance for line finding. 00047 const int kVLineAlignment = 3; 00048 // Constant number of pixels used as gutter gap tolerance for line finding. 00049 const int kVLineGutter = 1; 00050 // Constant number of pixels used as the search size for line finding. 00051 const int kVLineSearchSize = 150; 00052 // Min number of points to accept for a ragged tab stop. 00053 const int kMinRaggedTabs = 5; 00054 // Min number of points to accept for an aligned tab stop. 00055 const int kMinAlignedTabs = 4; 00056 // Constant number of pixels minimum height of a vertical line. 00057 const int kVLineMinLength = 500; 00058 // Minimum gradient for a vertical tab vector. Used to prune away junk 00059 // tab vectors with what would be a ridiculously large skew angle. 00060 // Value corresponds to tan(90 - max allowed skew angle) 00061 const double kMinTabGradient = 4.0; 00062 // Tolerance to skew on top of current estimate of skew. Divide x or y length 00063 // by kMaxSkewFactor to get the y or x skew distance. 00064 // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. 00065 const int kMaxSkewFactor = 15; 00066 00067 // Constant part of textord_debug_pix_. 00068 const char* kTextordDebugPix = "psdebug_pix"; 00069 00070 // Name of image file to use if textord_debug_images is true. 00071 STRING AlignedBlob::textord_debug_pix_ = kTextordDebugPix; 00072 // Index to image file to use if textord_debug_images is true. 00073 int AlignedBlob::debug_pix_index_ = 0; 00074 00075 // Increment the serial number counter and set the string to use 00076 // for a filename if textord_debug_images is true. 00077 void AlignedBlob::IncrementDebugPix() { 00078 ++debug_pix_index_; 00079 textord_debug_pix_ = kTextordDebugPix; 00080 char numbuf[32]; 00081 snprintf(numbuf, sizeof(numbuf), "%d", debug_pix_index_); 00082 textord_debug_pix_ += numbuf; 00083 textord_debug_pix_ += ".pix"; 00084 } 00085 00086 // Constructor to set the parameters for finding aligned and ragged tabs. 00087 // Vertical_x and vertical_y are the current estimates of the true vertical 00088 // direction (up) in the image. Height is the height of the starter blob. 00089 // v_gap_multiple is the multiple of height that will be used as a limit 00090 // on vertical gap before giving up and calling the line ended. 00091 // resolution is the original image resolution, and align0 indicates the 00092 // type of tab stop to be found. 00093 AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, 00094 int height, int v_gap_multiple, 00095 int min_gutter_width, 00096 int resolution, TabAlignment align0) 00097 : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), 00098 ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), 00099 alignment(align0), 00100 confirmed_type(TT_CONFIRMED), 00101 min_length(0) { 00102 // Set the tolerances according to the type of line sought. 00103 // For tab search, these are based on the image resolution for most, or 00104 // the height of the starting blob for the maximum vertical gap. 00105 max_v_gap = height * v_gap_multiple; 00106 if (ragged) { 00107 // In the case of a ragged edge, we are much more generous with the 00108 // inside alignment fraction, but also require a much bigger gutter. 00109 gutter_fraction = kRaggedGapFraction; 00110 if (alignment == TA_RIGHT_RAGGED) { 00111 l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); 00112 r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00113 } else { 00114 l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00115 r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); 00116 } 00117 min_points = kMinRaggedTabs; 00118 } else { 00119 gutter_fraction = kAlignedGapFraction; 00120 l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00121 r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); 00122 min_points = kMinAlignedTabs; 00123 } 00124 min_gutter = static_cast<int>(height * gutter_fraction + 0.5); 00125 if (min_gutter < min_gutter_width) 00126 min_gutter = min_gutter_width; 00127 // Fit the vertical vector into an ICOORD, which is 16 bit. 00128 set_vertical(vertical_x, vertical_y); 00129 } 00130 00131 // Constructor to set the parameters for finding vertical lines. 00132 // Vertical_x and vertical_y are the current estimates of the true vertical 00133 // direction (up) in the image. Width is the width of the starter blob. 00134 AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, 00135 int width) 00136 : gutter_fraction(0.0), 00137 right_tab(false), 00138 ragged(false), 00139 alignment(TA_SEPARATOR), 00140 confirmed_type(TT_VLINE), 00141 max_v_gap(kVLineSearchSize), 00142 min_gutter(kVLineGutter), 00143 min_points(1), 00144 min_length(kVLineMinLength) { 00145 // Compute threshold for left and right alignment. 00146 l_align_tolerance = MAX(kVLineAlignment, width); 00147 r_align_tolerance = MAX(kVLineAlignment, width); 00148 00149 // Fit the vertical vector into an ICOORD, which is 16 bit. 00150 set_vertical(vertical_x, vertical_y); 00151 } 00152 00153 // Fit the vertical vector into an ICOORD, which is 16 bit. 00154 void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { 00155 int factor = 1; 00156 if (vertical_y > MAX_INT16) 00157 factor = vertical_y / MAX_INT16 + 1; 00158 vertical.set_x(vertical_x / factor); 00159 vertical.set_y(vertical_y / factor); 00160 } 00161 00162 00163 AlignedBlob::AlignedBlob(int gridsize, 00164 const ICOORD& bleft, const ICOORD& tright) 00165 : BlobGrid(gridsize, bleft, tright) { 00166 } 00167 00168 AlignedBlob::~AlignedBlob() { 00169 } 00170 00171 // Return true if the given coordinates are within the test rectangle 00172 // and the debug level is at least the given detail level. 00173 bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { 00174 if (textord_debug_tabfind < detail_level) 00175 return false; 00176 return x >= textord_testregion_left && x <= textord_testregion_right && 00177 y <= textord_testregion_top && y >= textord_testregion_bottom; 00178 } 00179 00180 // Display the tab codes of the BLOBNBOXes in this grid. 00181 ScrollView* AlignedBlob::DisplayTabs(const char* window_name, 00182 ScrollView* tab_win) { 00183 #ifndef GRAPHICS_DISABLED 00184 if (tab_win == NULL) 00185 tab_win = MakeWindow(0, 50, window_name); 00186 // For every tab in the grid, display it. 00187 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); 00188 gsearch.StartFullSearch(); 00189 BLOBNBOX* bbox; 00190 while ((bbox = gsearch.NextFullSearch()) != NULL) { 00191 TBOX box = bbox->bounding_box(); 00192 int left_x = box.left(); 00193 int right_x = box.right(); 00194 int top_y = box.top(); 00195 int bottom_y = box.bottom(); 00196 TabType tabtype = bbox->left_tab_type(); 00197 if (tabtype != TT_NONE) { 00198 if (tabtype == TT_MAYBE_ALIGNED) 00199 tab_win->Pen(ScrollView::BLUE); 00200 else if (tabtype == TT_MAYBE_RAGGED) 00201 tab_win->Pen(ScrollView::YELLOW); 00202 else if (tabtype == TT_CONFIRMED) 00203 tab_win->Pen(ScrollView::GREEN); 00204 else 00205 tab_win->Pen(ScrollView::GREY); 00206 tab_win->Line(left_x, top_y, left_x, bottom_y); 00207 } 00208 tabtype = bbox->right_tab_type(); 00209 if (tabtype != TT_NONE) { 00210 if (tabtype == TT_MAYBE_ALIGNED) 00211 tab_win->Pen(ScrollView::MAGENTA); 00212 else if (tabtype == TT_MAYBE_RAGGED) 00213 tab_win->Pen(ScrollView::ORANGE); 00214 else if (tabtype == TT_CONFIRMED) 00215 tab_win->Pen(ScrollView::RED); 00216 else 00217 tab_win->Pen(ScrollView::GREY); 00218 tab_win->Line(right_x, top_y, right_x, bottom_y); 00219 } 00220 } 00221 tab_win->Update(); 00222 #endif 00223 return tab_win; 00224 } 00225 00226 // Helper returns true if the total number of line_crossings of all the blobs 00227 // in the list is at least 2. 00228 static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) { 00229 BLOBNBOX_C_IT it(blobs); 00230 int total_crossings = 0; 00231 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00232 total_crossings += it.data()->line_crossings(); 00233 } 00234 return total_crossings >= 2; 00235 } 00236 00237 // Finds a vector corresponding to a set of vertically aligned blob edges 00238 // running through the given box. The type of vector returned and the 00239 // search parameters are determined by the AlignedBlobParams. 00240 // vertical_x and y are updated with an estimate of the real 00241 // vertical direction. (skew finding.) 00242 // Returns NULL if no decent vector can be found. 00243 TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, 00244 BLOBNBOX* bbox, 00245 int* vertical_x, 00246 int* vertical_y) { 00247 int ext_start_y, ext_end_y; 00248 BLOBNBOX_CLIST good_points; 00249 // Search up and then down from the starting bbox. 00250 TBOX box = bbox->bounding_box(); 00251 bool debug = WithinTestRegion(2, box.left(), box.bottom()); 00252 int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); 00253 pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); 00254 BLOBNBOX_C_IT it(&good_points); 00255 it.move_to_last(); 00256 box = it.data()->bounding_box(); 00257 int end_y = box.top(); 00258 int end_x = align_params.right_tab ? box.right() : box.left(); 00259 it.move_to_first(); 00260 box = it.data()->bounding_box(); 00261 int start_x = align_params.right_tab ? box.right() : box.left(); 00262 int start_y = box.bottom(); 00263 // Acceptable tab vectors must have a mininum number of points, 00264 // have a minimum acceptable length, and have a minimum gradient. 00265 // The gradient corresponds to the skew angle. 00266 // Ragged tabs don't need to satisfy the gradient condition, as they 00267 // will always end up parallel to the vertical direction. 00268 bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); 00269 if ((pt_count >= align_params.min_points && 00270 end_y - start_y >= align_params.min_length && 00271 (align_params.ragged || 00272 end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || 00273 at_least_2_crossings) { 00274 int confirmed_points = 0; 00275 // Count existing confirmed points to see if vector is acceptable. 00276 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00277 bbox = it.data(); 00278 if (align_params.right_tab) { 00279 if (bbox->right_tab_type() == align_params.confirmed_type) 00280 ++confirmed_points; 00281 } else { 00282 if (bbox->left_tab_type() == align_params.confirmed_type) 00283 ++confirmed_points; 00284 } 00285 } 00286 // Ragged vectors are not allowed to use too many already used points. 00287 if (!align_params.ragged || 00288 confirmed_points + confirmed_points < pt_count) { 00289 const TBOX& box = bbox->bounding_box(); 00290 if (debug) { 00291 tprintf("Confirming tab vector of %d pts starting at %d,%d\n", 00292 pt_count, box.left(), box.bottom()); 00293 } 00294 // Flag all the aligned neighbours as confirmed . 00295 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00296 bbox = it.data(); 00297 if (align_params.right_tab) { 00298 bbox->set_right_tab_type(align_params.confirmed_type); 00299 } else { 00300 bbox->set_left_tab_type(align_params.confirmed_type); 00301 } 00302 if (debug) { 00303 bbox->bounding_box().print(); 00304 } 00305 } 00306 // Now make the vector and return it. 00307 TabVector* result = TabVector::FitVector(align_params.alignment, 00308 align_params.vertical, 00309 ext_start_y, ext_end_y, 00310 &good_points, 00311 vertical_x, vertical_y); 00312 result->set_intersects_other_lines(at_least_2_crossings); 00313 if (debug) { 00314 tprintf("Box was %d, %d\n", box.left(), box.bottom()); 00315 result->Print("After fitting"); 00316 } 00317 return result; 00318 } else if (debug) { 00319 tprintf("Ragged tab used too many used points: %d out of %d\n", 00320 confirmed_points, pt_count); 00321 } 00322 } else if (debug) { 00323 tprintf("Tab vector failed basic tests: pt count %d vs min %d, " 00324 "length %d vs min %d, min grad %g\n", 00325 pt_count, align_params.min_points, end_y - start_y, 00326 align_params.min_length, abs(end_x - start_x) * kMinTabGradient); 00327 } 00328 return NULL; 00329 } 00330 00331 // Find a set of blobs that are aligned in the given vertical 00332 // direction with the given blob. Returns a list of aligned 00333 // blobs and the number in the list. 00334 // For other parameters see FindAlignedBlob below. 00335 int AlignedBlob::AlignTabs(const AlignedBlobParams& params, 00336 bool top_to_bottom, BLOBNBOX* bbox, 00337 BLOBNBOX_CLIST* good_points, int* end_y) { 00338 int ptcount = 0; 00339 BLOBNBOX_C_IT it(good_points); 00340 00341 TBOX box = bbox->bounding_box(); 00342 bool debug = WithinTestRegion(2, box.left(), box.bottom()); 00343 if (debug) { 00344 tprintf("Starting alignment run at blob:"); 00345 box.print(); 00346 } 00347 int x_start = params.right_tab ? box.right() : box.left(); 00348 while (bbox != NULL) { 00349 // Add the blob to the list if the appropriate side is a tab candidate, 00350 // or if we are working on a ragged tab. 00351 TabType type = params.right_tab ? bbox->right_tab_type() 00352 : bbox->left_tab_type(); 00353 if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && 00354 (it.empty() || it.data() != bbox)) { 00355 if (top_to_bottom) 00356 it.add_before_then_move(bbox); 00357 else 00358 it.add_after_then_move(bbox); 00359 ++ptcount; 00360 } 00361 // Find the next blob that is aligned with the current one. 00362 // FindAlignedBlob guarantees that forward progress will be made in the 00363 // top_to_bottom direction, and therefore eventually it will return NULL, 00364 // making this while (bbox != NULL) loop safe. 00365 bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); 00366 if (bbox != NULL) { 00367 box = bbox->bounding_box(); 00368 if (!params.ragged) 00369 x_start = params.right_tab ? box.right() : box.left(); 00370 } 00371 } 00372 if (debug) { 00373 tprintf("Alignment run ended with %d pts at blob:", ptcount); 00374 box.print(); 00375 } 00376 return ptcount; 00377 } 00378 00379 // Search vertically for a blob that is aligned with the input bbox. 00380 // The search parameters are determined by AlignedBlobParams. 00381 // top_to_bottom tells whether to search down or up. 00382 // The return value is NULL if nothing was found in the search box 00383 // or if a blob was found in the gutter. On a NULL return, end_y 00384 // is set to the edge of the search box or the leading edge of the 00385 // gutter blob if one was found. 00386 BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, 00387 bool top_to_bottom, BLOBNBOX* bbox, 00388 int x_start, int* end_y) { 00389 TBOX box = bbox->bounding_box(); 00390 // If there are separator lines, get the column edges. 00391 int left_column_edge = bbox->left_rule(); 00392 int right_column_edge = bbox->right_rule(); 00393 // start_y is used to guarantee that forward progress is made and the 00394 // search does not go into an infinite loop. New blobs must extend the 00395 // line beyond start_y. 00396 int start_y = top_to_bottom ? box.bottom() : box.top(); 00397 if (WithinTestRegion(2, x_start, start_y)) { 00398 tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", 00399 box.left(), box.top(), box.right(), box.bottom(), 00400 left_column_edge, right_column_edge); 00401 } 00402 // Compute skew tolerance. 00403 int skew_tolerance = p.max_v_gap / kMaxSkewFactor; 00404 // Calculate xmin and xmax of the search box so that it contains 00405 // all possibly relevant boxes upto p.max_v_gap above or below accoording 00406 // to top_to_bottom. 00407 // Start with a notion of vertical with the current estimate. 00408 int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); 00409 if (top_to_bottom) { 00410 x2 = x_start - x2; 00411 *end_y = start_y - p.max_v_gap; 00412 } else { 00413 x2 = x_start + x2; 00414 *end_y = start_y + p.max_v_gap; 00415 } 00416 // Expand the box by an additional skew tolerance 00417 int xmin = MIN(x_start, x2) - skew_tolerance; 00418 int xmax = MAX(x_start, x2) + skew_tolerance; 00419 // Now add direction-specific tolerances. 00420 if (p.right_tab) { 00421 xmax += p.min_gutter; 00422 xmin -= p.l_align_tolerance; 00423 } else { 00424 xmax += p.r_align_tolerance; 00425 xmin -= p.min_gutter; 00426 } 00427 // Setup a vertical search for an aligned blob. 00428 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this); 00429 if (WithinTestRegion(2, x_start, start_y)) 00430 tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", 00431 p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", 00432 xmin, xmax, start_y, p.max_v_gap, p.min_gutter); 00433 vsearch.StartVerticalSearch(xmin, xmax, start_y); 00434 // result stores the best real return value. 00435 BLOBNBOX* result = NULL; 00436 // The backup_result is not a tab candidate and can be used if no 00437 // real tab candidate result is found. 00438 BLOBNBOX* backup_result = NULL; 00439 // neighbour is the blob that is currently being investigated. 00440 BLOBNBOX* neighbour = NULL; 00441 while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != NULL) { 00442 if (neighbour == bbox) 00443 continue; 00444 TBOX nbox = neighbour->bounding_box(); 00445 int n_y = (nbox.top() + nbox.bottom()) / 2; 00446 if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || 00447 (top_to_bottom && n_y < start_y - p.max_v_gap)) { 00448 if (WithinTestRegion(2, x_start, start_y)) 00449 tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", 00450 nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); 00451 break; // Gone far enough. 00452 } 00453 // It is CRITICAL to ensure that forward progress is made, (strictly 00454 // in/decreasing n_y) or the caller could loop infinitely, while 00455 // waiting for a sequence of blobs in a line to end. 00456 // NextVerticalSearch alone does not guarantee this, as there may be 00457 // more than one blob in a grid cell. See comment in AlignTabs. 00458 if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) 00459 continue; // Only look in the required direction. 00460 if (result != NULL && result->bounding_box().y_gap(nbox) > gridsize()) 00461 return result; // This result is clear. 00462 if (backup_result != NULL && p.ragged && result == NULL && 00463 backup_result->bounding_box().y_gap(nbox) > gridsize()) 00464 return backup_result; // This result is clear. 00465 00466 // If the neighbouring blob is the wrong side of a separator line, then it 00467 // "doesn't exist" as far as we are concerned. 00468 int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); 00469 if (x_at_n_y < neighbour->left_crossing_rule() || 00470 x_at_n_y > neighbour->right_crossing_rule()) 00471 continue; // Separator line in the way. 00472 int n_left = nbox.left(); 00473 int n_right = nbox.right(); 00474 int n_x = p.right_tab ? n_right : n_left; 00475 if (WithinTestRegion(2, x_start, start_y)) 00476 tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", 00477 nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), 00478 n_x, n_y, x_at_n_y); 00479 if (p.right_tab && 00480 n_left < x_at_n_y + p.min_gutter && 00481 n_right > x_at_n_y + p.r_align_tolerance && 00482 (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { 00483 // In the gutter so end of line. 00484 if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) 00485 bbox->set_right_tab_type(TT_DELETED); 00486 *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); 00487 if (WithinTestRegion(2, x_start, start_y)) 00488 tprintf("gutter\n"); 00489 return NULL; 00490 } 00491 if (!p.right_tab && 00492 n_left < x_at_n_y - p.l_align_tolerance && 00493 n_right > x_at_n_y - p.min_gutter && 00494 (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { 00495 // In the gutter so end of line. 00496 if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) 00497 bbox->set_left_tab_type(TT_DELETED); 00498 *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); 00499 if (WithinTestRegion(2, x_start, start_y)) 00500 tprintf("gutter\n"); 00501 return NULL; 00502 } 00503 if ((p.right_tab && neighbour->leader_on_right()) || 00504 (!p.right_tab && neighbour->leader_on_left())) 00505 continue; // Neigbours of leaders are not allowed to be used. 00506 if (n_x <= x_at_n_y + p.r_align_tolerance && 00507 n_x >= x_at_n_y - p.l_align_tolerance) { 00508 // Aligned so keep it. If it is a marked tab save it as result, 00509 // otherwise keep it as backup_result to return in case of later failure. 00510 if (WithinTestRegion(2, x_start, start_y)) 00511 tprintf("aligned, seeking%d, l=%d, r=%d\n", 00512 p.right_tab, neighbour->left_tab_type(), 00513 neighbour->right_tab_type()); 00514 TabType n_type = p.right_tab ? neighbour->right_tab_type() 00515 : neighbour->left_tab_type(); 00516 if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { 00517 if (result == NULL) { 00518 result = neighbour; 00519 } else { 00520 // Keep the closest neighbour by Euclidean distance. 00521 // This prevents it from picking a tab blob in another column. 00522 const TBOX& old_box = result->bounding_box(); 00523 int x_diff = p.right_tab ? old_box.right() : old_box.left(); 00524 x_diff -= x_at_n_y; 00525 int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; 00526 int old_dist = x_diff * x_diff + y_diff * y_diff; 00527 x_diff = n_x - x_at_n_y; 00528 y_diff = n_y - start_y; 00529 int new_dist = x_diff * x_diff + y_diff * y_diff; 00530 if (new_dist < old_dist) 00531 result = neighbour; 00532 } 00533 } else if (backup_result == NULL) { 00534 if (WithinTestRegion(2, x_start, start_y)) 00535 tprintf("Backup\n"); 00536 backup_result = neighbour; 00537 } else { 00538 TBOX backup_box = backup_result->bounding_box(); 00539 if ((p.right_tab && backup_box.right() < nbox.right()) || 00540 (!p.right_tab && backup_box.left() > nbox.left())) { 00541 if (WithinTestRegion(2, x_start, start_y)) 00542 tprintf("Better backup\n"); 00543 backup_result = neighbour; 00544 } 00545 } 00546 } 00547 } 00548 return result != NULL ? result : backup_result; 00549 } 00550 00551 } // namespace tesseract. 00552