tesseract
3.03
|
00001 00002 // File: colpartitionrid.h 00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 00004 // Author: Ray Smith 00005 // Created: Mon Oct 05 08:42:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef HAVE_CONFIG_H 00021 #include "config_auto.h" 00022 #endif 00023 00024 #include "colpartitiongrid.h" 00025 #include "colpartitionset.h" 00026 #include "imagefind.h" 00027 00028 namespace tesseract { 00029 00030 BOOL_VAR(textord_tabfind_show_color_fit, false, "Show stroke widths"); 00031 00032 // Max pad factor used to search the neighbourhood of a partition to smooth 00033 // partition types. 00034 const int kMaxPadFactor = 6; 00035 // Max multiple of size (min(height, width)) for the distance of the nearest 00036 // neighbour for the change of type to be used. 00037 const int kMaxNeighbourDistFactor = 4; 00038 // Max RMS color noise to compare colors. 00039 const int kMaxRMSColorNoise = 128; 00040 // Minimum number of blobs in text to make a strong text partition. 00041 const int kHorzStrongTextlineCount = 10; 00042 // Maximum number of lines in a credible figure caption. 00043 const int kMaxCaptionLines = 7; 00044 // Min ratio between biggest and smallest gap to bound a caption. 00045 const double kMinCaptionGapRatio = 2.0; 00046 // Min ratio between biggest gap and mean line height to bound a caption. 00047 const double kMinCaptionGapHeightRatio = 0.5; 00048 // Min fraction of ColPartition height to be overlapping for margin purposes. 00049 const double kMarginOverlapFraction = 0.25; 00050 // Size ratio required to consider an unmerged overlapping partition to be big. 00051 const double kBigPartSizeRatio = 1.75; 00052 // Allowed proportional change in stroke width to match for smoothing. 00053 const double kStrokeWidthFractionTolerance = 0.25; 00054 // Allowed constant change in stroke width to match for smoothing. 00055 const double kStrokeWidthConstantTolerance = 2.0; 00056 // Fraction of gridsize to allow arbitrary overlap between partitions. 00057 const double kTinyEnoughTextlineOverlapFraction = 0.25; 00058 // Max vertical distance of neighbouring ColPartition as a multiple of 00059 // partition height for it to be a partner. 00060 // TODO(rays) fix the problem that causes a larger number to not work well. 00061 // The value needs to be larger as sparse text blocks in a page that gets 00062 // marked as single column will not find adjacent lines as partners, and 00063 // will merge horizontally distant, but aligned lines. See rep.4B3 p5. 00064 // The value needs to be small because double-spaced legal docs written 00065 // in a single column, but justified courier have widely spaced lines 00066 // that need to get merged before they partner-up with the lines above 00067 // and below. See legal.3B5 p13/17. Neither of these should depend on 00068 // the value of kMaxPartitionSpacing to be successful, and ColPartition 00069 // merging needs attention to fix this problem. 00070 const double kMaxPartitionSpacing = 1.75; 00071 // Margin by which text has to beat image or vice-versa to make a firm 00072 // decision in GridSmoothNeighbour. 00073 const int kSmoothDecisionMargin = 4; 00074 00075 ColPartitionGrid::ColPartitionGrid() { 00076 } 00077 ColPartitionGrid::ColPartitionGrid(int gridsize, 00078 const ICOORD& bleft, const ICOORD& tright) 00079 : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize, 00080 bleft, tright) { 00081 } 00082 00083 ColPartitionGrid::~ColPartitionGrid() { 00084 } 00085 00086 // Handles a click event in a display window. 00087 void ColPartitionGrid::HandleClick(int x, int y) { 00088 BBGrid<ColPartition, 00089 ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y); 00090 // Run a radial search for partitions that overlap. 00091 ColPartitionGridSearch radsearch(this); 00092 radsearch.SetUniqueMode(true); 00093 radsearch.StartRadSearch(x, y, 1); 00094 ColPartition* neighbour; 00095 FCOORD click(x, y); 00096 while ((neighbour = radsearch.NextRadSearch()) != NULL) { 00097 TBOX nbox = neighbour->bounding_box(); 00098 if (nbox.contains(click)) { 00099 tprintf("Block box:"); 00100 neighbour->bounding_box().print(); 00101 neighbour->Print(); 00102 } 00103 } 00104 } 00105 00106 // Merges ColPartitions in the grid that look like they belong in the same 00107 // textline. 00108 // For all partitions in the grid, calls the box_cb permanent callback 00109 // to compute the search box, seaches the box, and if a candidate is found, 00110 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00111 // true, then the partitions are merged. 00112 // Both callbacks are deleted before returning. 00113 void ColPartitionGrid::Merges( 00114 TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00115 TessResultCallback2<bool, const ColPartition*, 00116 const ColPartition*>* confirm_cb) { 00117 // Iterate the ColPartitions in the grid. 00118 ColPartitionGridSearch gsearch(this); 00119 gsearch.StartFullSearch(); 00120 ColPartition* part; 00121 while ((part = gsearch.NextFullSearch()) != NULL) { 00122 if (MergePart(box_cb, confirm_cb, part)) 00123 gsearch.RepositionIterator(); 00124 } 00125 delete box_cb; 00126 delete confirm_cb; 00127 } 00128 00129 // For the given partition, calls the box_cb permanent callback 00130 // to compute the search box, searches the box, and if a candidate is found, 00131 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00132 // true, then the partitions are merged. 00133 // Returns true if the partition is consumed by one or more merges. 00134 bool ColPartitionGrid::MergePart( 00135 TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00136 TessResultCallback2<bool, const ColPartition*, 00137 const ColPartition*>* confirm_cb, 00138 ColPartition* part) { 00139 if (part->IsUnMergeableType()) 00140 return false; 00141 bool any_done = false; 00142 // Repeatedly merge part while we find a best merge candidate that works. 00143 bool merge_done = false; 00144 do { 00145 merge_done = false; 00146 TBOX box = part->bounding_box(); 00147 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); 00148 if (debug) { 00149 tprintf("Merge candidate:"); 00150 box.print(); 00151 } 00152 // Set up a rectangle search bounded by the part. 00153 if (!box_cb->Run(part, &box)) 00154 continue; 00155 // Create a list of merge candidates. 00156 ColPartition_CLIST merge_candidates; 00157 FindMergeCandidates(part, box, debug, &merge_candidates); 00158 // Find the best merge candidate based on minimal overlap increase. 00159 int overlap_increase; 00160 ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, 00161 confirm_cb, 00162 &overlap_increase); 00163 if (neighbour != NULL && overlap_increase <= 0) { 00164 if (debug) { 00165 tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", 00166 part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), 00167 overlap_increase); 00168 } 00169 // Looks like a good candidate so merge it. 00170 RemoveBBox(neighbour); 00171 // We will modify the box of part, so remove it from the grid, merge 00172 // it and then re-insert it into the grid. 00173 RemoveBBox(part); 00174 part->Absorb(neighbour, NULL); 00175 InsertBBox(true, true, part); 00176 merge_done = true; 00177 any_done = true; 00178 } else if (neighbour != NULL) { 00179 if (debug) { 00180 tprintf("Overlapped when merged with increase %d: ", overlap_increase); 00181 neighbour->bounding_box().print(); 00182 } 00183 } else if (debug) { 00184 tprintf("No candidate neighbour returned\n"); 00185 } 00186 } while (merge_done); 00187 return any_done; 00188 } 00189 00190 // Returns true if the given part and merge candidate might believably 00191 // be part of a single text line according to the default rules. 00192 // In general we only want to merge partitions that look like they 00193 // are on the same text line, ie their median limits overlap, but we have 00194 // to make exceptions for diacritics and stray punctuation. 00195 static bool OKMergeCandidate(const ColPartition* part, 00196 const ColPartition* candidate, 00197 bool debug) { 00198 const TBOX& part_box = part->bounding_box(); 00199 if (candidate == part) 00200 return false; // Ignore itself. 00201 if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) 00202 return false; // Don't mix inappropriate types. 00203 00204 const TBOX& c_box = candidate->bounding_box(); 00205 if (debug) { 00206 tprintf("Examining merge candidate:"); 00207 c_box.print(); 00208 } 00209 // Candidates must be within a reasonable distance. 00210 if (candidate->IsVerticalType() || part->IsVerticalType()) { 00211 int h_dist = -part->HCoreOverlap(*candidate); 00212 if (h_dist >= MAX(part_box.width(), c_box.width()) / 2) { 00213 if (debug) 00214 tprintf("Too far away: h_dist = %d\n", h_dist); 00215 return false; 00216 } 00217 } else { 00218 // Coarse filter by vertical distance between partitions. 00219 int v_dist = -part->VCoreOverlap(*candidate); 00220 if (v_dist >= MAX(part_box.height(), c_box.height()) / 2) { 00221 if (debug) 00222 tprintf("Too far away: v_dist = %d\n", v_dist); 00223 return false; 00224 } 00225 // Candidates must either overlap in median y, 00226 // or part or candidate must be an acceptable diacritic. 00227 if (!part->VSignificantCoreOverlap(*candidate) && 00228 !part->OKDiacriticMerge(*candidate, debug) && 00229 !candidate->OKDiacriticMerge(*part, debug)) { 00230 if (debug) 00231 tprintf("Candidate fails overlap and diacritic tests!\n"); 00232 return false; 00233 } 00234 } 00235 return true; 00236 } 00237 00238 // Helper function to compute the increase in overlap of the parts list of 00239 // Colpartitions with the combination of merge1 and merge2, compared to 00240 // the overlap with them uncombined. 00241 // An overlap is not counted if passes the OKMergeOverlap test with ok_overlap 00242 // as the pixel overlap limit. merge1 and merge2 must both be non-NULL. 00243 static int IncreaseInOverlap(const ColPartition* merge1, 00244 const ColPartition* merge2, 00245 int ok_overlap, 00246 ColPartition_CLIST* parts) { 00247 ASSERT_HOST(merge1 != NULL && merge2 != NULL); 00248 int total_area = 0; 00249 ColPartition_C_IT it(parts); 00250 TBOX merged_box(merge1->bounding_box()); 00251 merged_box += merge2->bounding_box(); 00252 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00253 ColPartition* part = it.data(); 00254 if (part == merge1 || part == merge2) 00255 continue; 00256 TBOX part_box = part->bounding_box(); 00257 // Compute the overlap of the merged box with part. 00258 int overlap_area = part_box.intersection(merged_box).area(); 00259 if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, 00260 ok_overlap, false)) { 00261 total_area += overlap_area; 00262 // Subtract the overlap of merge1 and merge2 individually. 00263 overlap_area = part_box.intersection(merge1->bounding_box()).area(); 00264 if (overlap_area > 0) 00265 total_area -= overlap_area; 00266 TBOX intersection_box = part_box.intersection(merge2->bounding_box()); 00267 overlap_area = intersection_box.area(); 00268 if (overlap_area > 0) { 00269 total_area -= overlap_area; 00270 // Add back the 3-way area. 00271 intersection_box &= merge1->bounding_box(); // In-place intersection. 00272 overlap_area = intersection_box.area(); 00273 if (overlap_area > 0) 00274 total_area += overlap_area; 00275 } 00276 } 00277 } 00278 return total_area; 00279 } 00280 00281 // Helper function to test that each partition in candidates is either a 00282 // good diacritic merge with part or an OK merge candidate with all others 00283 // in the candidates list. 00284 // ASCII Art Scenario: 00285 // We sometimes get text such as "join-this" where the - is actually a long 00286 // dash culled from a standard set of extra characters that don't match the 00287 // font of the text. This makes its strokewidth not match and forms a broken 00288 // set of 3 partitions for "join", "-" and "this" and the dash may slightly 00289 // overlap BOTH words. 00290 // ------- ------- 00291 // | ==== | 00292 // ------- ------- 00293 // The standard merge rule: "you can merge 2 partitions as long as there is 00294 // no increase in overlap elsewhere" fails miserably here. Merge any pair 00295 // of partitions and the combined box overlaps more with the third than 00296 // before. To allow the merge, we need to consider whether it is safe to 00297 // merge everything, without merging separate text lines. For that we need 00298 // everything to be an OKMergeCandidate (which is supposed to prevent 00299 // separate text lines merging), but this is hard for diacritics to satisfy, 00300 // so an alternative to being OKMergeCandidate with everything is to be an 00301 // OKDiacriticMerge with part as the base character. 00302 static bool TestCompatibleCandidates(const ColPartition& part, bool debug, 00303 ColPartition_CLIST* candidates) { 00304 ColPartition_C_IT it(candidates); 00305 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00306 ColPartition* candidate = it.data(); 00307 if (!candidate->OKDiacriticMerge(part, false)) { 00308 ColPartition_C_IT it2(it); 00309 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { 00310 ColPartition* candidate2 = it2.data(); 00311 if (candidate2 != candidate && 00312 !OKMergeCandidate(candidate, candidate2, false)) { 00313 if (debug) { 00314 tprintf("NC overlap failed:Candidate:"); 00315 candidate2->bounding_box().print(); 00316 tprintf("fails to be a good merge with:"); 00317 candidate->bounding_box().print(); 00318 } 00319 return false; 00320 } 00321 } 00322 } 00323 } 00324 return true; 00325 } 00326 00327 // Finds all the ColPartitions in the grid that overlap with the given 00328 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 00329 // Any partition equal to not_this (may be NULL) is excluded. 00330 void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, 00331 const ColPartition* not_this, 00332 ColPartition_CLIST* parts) { 00333 ColPartitionGridSearch rsearch(this); 00334 rsearch.StartRectSearch(box); 00335 ColPartition* part; 00336 while ((part = rsearch.NextRectSearch()) != NULL) { 00337 if (part != not_this) 00338 parts->add_sorted(SortByBoxLeft<ColPartition>, true, part); 00339 } 00340 } 00341 00342 // Finds and returns the best candidate ColPartition to merge with part, 00343 // selected from the candidates list, based on the minimum increase in 00344 // pairwise overlap among all the partitions overlapped by the combined box. 00345 // If overlap_increase is not NULL then it returns the increase in overlap 00346 // that would result from the merge. 00347 // confirm_cb is a permanent callback that (if non-null) will be used to 00348 // confirm the validity of a proposed merge candidate before selecting it. 00349 // 00350 // ======HOW MERGING WORKS====== 00351 // The problem: 00352 // We want to merge all the parts of a textline together, but avoid merging 00353 // separate textlines. Diacritics, i dots, punctuation, and broken characters 00354 // are examples of small bits that need merging with the main textline. 00355 // Drop-caps and descenders in one line that touch ascenders in the one below 00356 // are examples of cases where we don't want to merge. 00357 // 00358 // The solution: 00359 // Merges that increase overlap among other partitions are generally bad. 00360 // Those that don't increase overlap (much) and minimize the total area 00361 // seem to be good. 00362 // 00363 // Ascii art example: 00364 // The text: 00365 // groggy descenders 00366 // minimum ascenders 00367 // The boxes: The === represents a small box near or overlapping the lower box. 00368 // ----------------- 00369 // | | 00370 // ----------------- 00371 // -===------------- 00372 // | | 00373 // ----------------- 00374 // In considering what to do with the small === box, we find the 2 larger 00375 // boxes as neighbours and possible merge candidates, but merging with the 00376 // upper box increases overlap with the lower box, whereas merging with the 00377 // lower box does not increase overlap. 00378 // If the small === box didn't overlap either to start with, total area 00379 // would be minimized by merging with the nearer (lower) box. 00380 // 00381 // This is a simple example. In reality, we have to allow some increase 00382 // in overlap, or tightly spaced text would end up in bits. 00383 ColPartition* ColPartitionGrid::BestMergeCandidate( 00384 const ColPartition* part, ColPartition_CLIST* candidates, bool debug, 00385 TessResultCallback2<bool, const ColPartition*, const ColPartition*>* confirm_cb, 00386 int* overlap_increase) { 00387 if (overlap_increase != NULL) 00388 *overlap_increase = 0; 00389 if (candidates->empty()) 00390 return NULL; 00391 int ok_overlap = 00392 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 00393 // The best neighbour to merge with is the one that causes least 00394 // total pairwise overlap among all the neighbours. 00395 // If more than one offers the same total overlap, choose the one 00396 // with the least total area. 00397 const TBOX& part_box = part->bounding_box(); 00398 ColPartition_C_IT it(candidates); 00399 ColPartition* best_candidate = NULL; 00400 // Find the total combined box of all candidates and the original. 00401 TBOX full_box(part_box); 00402 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00403 ColPartition* candidate = it.data(); 00404 full_box += candidate->bounding_box(); 00405 } 00406 // Keep valid neighbours in a list. 00407 ColPartition_CLIST neighbours; 00408 // Now run a rect search of the merged box for overlapping neighbours, as 00409 // we need anything that might be overlapped by the merged box. 00410 FindOverlappingPartitions(full_box, part, &neighbours); 00411 if (debug) { 00412 tprintf("Finding best merge candidate from %d, %d neighbours for box:", 00413 candidates->length(), neighbours.length()); 00414 part_box.print(); 00415 } 00416 // If the best increase in overlap is positive, then we also check the 00417 // worst non-candidate overlap. This catches the case of multiple good 00418 // candidates that overlap each other when merged. If the worst 00419 // non-candidate overlap is better than the best overlap, then return 00420 // the worst non-candidate overlap instead. 00421 ColPartition_CLIST non_candidate_neighbours; 00422 non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, 00423 &neighbours, candidates); 00424 int worst_nc_increase = 0; 00425 int best_increase = MAX_INT32; 00426 int best_area = 0; 00427 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00428 ColPartition* candidate = it.data(); 00429 if (confirm_cb != NULL && !confirm_cb->Run(part, candidate)) { 00430 if (debug) { 00431 tprintf("Candidate not confirmed:"); 00432 candidate->bounding_box().print(); 00433 } 00434 continue; 00435 } 00436 int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); 00437 const TBOX& cand_box = candidate->bounding_box(); 00438 if (best_candidate == NULL || increase < best_increase) { 00439 best_candidate = candidate; 00440 best_increase = increase; 00441 best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); 00442 if (debug) { 00443 tprintf("New best merge candidate has increase %d, area %d, over box:", 00444 increase, best_area); 00445 full_box.print(); 00446 candidate->Print(); 00447 } 00448 } else if (increase == best_increase) { 00449 int area = cand_box.bounding_union(part_box).area() - cand_box.area(); 00450 if (area < best_area) { 00451 best_area = area; 00452 best_candidate = candidate; 00453 } 00454 } 00455 increase = IncreaseInOverlap(part, candidate, ok_overlap, 00456 &non_candidate_neighbours); 00457 if (increase > worst_nc_increase) 00458 worst_nc_increase = increase; 00459 } 00460 if (best_increase > 0) { 00461 // If the worst non-candidate increase is less than the best increase 00462 // including the candidates, then all the candidates can merge together 00463 // and the increase in outside overlap would be less, so use that result, 00464 // but only if each candidate is either a good diacritic merge with part, 00465 // or an ok merge candidate with all the others. 00466 // See TestCompatibleCandidates for more explanation and a picture. 00467 if (worst_nc_increase < best_increase && 00468 TestCompatibleCandidates(*part, debug, candidates)) { 00469 best_increase = worst_nc_increase; 00470 } 00471 } 00472 if (overlap_increase != NULL) 00473 *overlap_increase = best_increase; 00474 return best_candidate; 00475 } 00476 00477 // Helper to remove the given box from the given partition, put it in its 00478 // own partition, and add to the partition list. 00479 static void RemoveBadBox(BLOBNBOX* box, ColPartition* part, 00480 ColPartition_LIST* part_list) { 00481 part->RemoveBox(box); 00482 ColPartition::MakeBigPartition(box, part_list); 00483 } 00484 00485 00486 // Split partitions where it reduces overlap between their bounding boxes. 00487 // ColPartitions are after all supposed to be a partitioning of the blobs 00488 // AND of the space on the page! 00489 // Blobs that cause overlaps get removed, put in individual partitions 00490 // and added to the big_parts list. They are most likely characters on 00491 // 2 textlines that touch, or something big like a dropcap. 00492 void ColPartitionGrid::SplitOverlappingPartitions( 00493 ColPartition_LIST* big_parts) { 00494 int ok_overlap = 00495 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 00496 // Iterate the ColPartitions in the grid. 00497 ColPartitionGridSearch gsearch(this); 00498 gsearch.StartFullSearch(); 00499 ColPartition* part; 00500 while ((part = gsearch.NextFullSearch()) != NULL) { 00501 // Set up a rectangle search bounded by the part. 00502 const TBOX& box = part->bounding_box(); 00503 ColPartitionGridSearch rsearch(this); 00504 rsearch.SetUniqueMode(true); 00505 rsearch.StartRectSearch(box); 00506 int unresolved_overlaps = 0; 00507 00508 ColPartition* neighbour; 00509 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 00510 if (neighbour == part) 00511 continue; 00512 const TBOX& neighbour_box = neighbour->bounding_box(); 00513 if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && 00514 part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) 00515 continue; // The overlap is OK both ways. 00516 00517 // If removal of the biggest box from either partition eliminates the 00518 // overlap, and it is much bigger than the box left behind, then 00519 // it is either a drop-cap, an inter-line join, or some junk that 00520 // we don't want anyway, so put it in the big_parts list. 00521 if (!part->IsSingleton()) { 00522 BLOBNBOX* excluded = part->BiggestBox(); 00523 TBOX shrunken = part->BoundsWithoutBox(excluded); 00524 if (!shrunken.overlap(neighbour_box) && 00525 excluded->bounding_box().height() > 00526 kBigPartSizeRatio * shrunken.height()) { 00527 // Removing the biggest box fixes the overlap, so do it! 00528 gsearch.RemoveBBox(); 00529 RemoveBadBox(excluded, part, big_parts); 00530 InsertBBox(true, true, part); 00531 gsearch.RepositionIterator(); 00532 break; 00533 } 00534 } else if (box.contains(neighbour_box)) { 00535 ++unresolved_overlaps; 00536 continue; // No amount of splitting will fix it. 00537 } 00538 if (!neighbour->IsSingleton()) { 00539 BLOBNBOX* excluded = neighbour->BiggestBox(); 00540 TBOX shrunken = neighbour->BoundsWithoutBox(excluded); 00541 if (!shrunken.overlap(box) && 00542 excluded->bounding_box().height() > 00543 kBigPartSizeRatio * shrunken.height()) { 00544 // Removing the biggest box fixes the overlap, so do it! 00545 rsearch.RemoveBBox(); 00546 RemoveBadBox(excluded, neighbour, big_parts); 00547 InsertBBox(true, true, neighbour); 00548 gsearch.RepositionIterator(); 00549 break; 00550 } 00551 } 00552 int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); 00553 int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); 00554 ColPartition* right_part = NULL; 00555 if (neighbour_overlap_count <= part_overlap_count || 00556 part->IsSingleton()) { 00557 // Try to split the neighbour to reduce overlap. 00558 BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box); 00559 if (split_blob != NULL) { 00560 rsearch.RemoveBBox(); 00561 right_part = neighbour->SplitAtBlob(split_blob); 00562 InsertBBox(true, true, neighbour); 00563 ASSERT_HOST(right_part != NULL); 00564 } 00565 } else { 00566 // Try to split part to reduce overlap. 00567 BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box); 00568 if (split_blob != NULL) { 00569 gsearch.RemoveBBox(); 00570 right_part = part->SplitAtBlob(split_blob); 00571 InsertBBox(true, true, part); 00572 ASSERT_HOST(right_part != NULL); 00573 } 00574 } 00575 if (right_part != NULL) { 00576 InsertBBox(true, true, right_part); 00577 gsearch.RepositionIterator(); 00578 rsearch.RepositionIterator(); 00579 break; 00580 } 00581 } 00582 if (unresolved_overlaps > 2 && part->IsSingleton()) { 00583 // This part is no good so just add to big_parts. 00584 RemoveBBox(part); 00585 ColPartition_IT big_it(big_parts); 00586 part->set_block_owned(true); 00587 big_it.add_to_end(part); 00588 gsearch.RepositionIterator(); 00589 } 00590 } 00591 } 00592 00593 // Filters partitions of source_type by looking at local neighbours. 00594 // Where a majority of neighbours have a text type, the partitions are 00595 // changed to text, where the neighbours have image type, they are changed 00596 // to image, and partitions that have no definite neighbourhood type are 00597 // left unchanged. 00598 // im_box and rerotation are used to map blob coordinates onto the 00599 // nontext_map, which is used to prevent the spread of text neighbourhoods 00600 // into images. 00601 // Returns true if anything was changed. 00602 bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, 00603 Pix* nontext_map, 00604 const TBOX& im_box, 00605 const FCOORD& rotation) { 00606 // Iterate the ColPartitions in the grid. 00607 ColPartitionGridSearch gsearch(this); 00608 gsearch.StartFullSearch(); 00609 ColPartition* part; 00610 bool any_changed = false; 00611 while ((part = gsearch.NextFullSearch()) != NULL) { 00612 if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) 00613 continue; 00614 const TBOX& box = part->bounding_box(); 00615 bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); 00616 if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) 00617 any_changed = true; 00618 } 00619 return any_changed; 00620 } 00621 00622 // Compute the mean RGB of the light and dark pixels in each ColPartition 00623 // and also the rms error in the linearity of color. 00624 void ColPartitionGrid::ComputePartitionColors(Pix* scaled_color, 00625 int scaled_factor, 00626 const FCOORD& rerotation) { 00627 if (scaled_color == NULL) 00628 return; 00629 Pix* color_map1 = NULL; 00630 Pix* color_map2 = NULL; 00631 Pix* rms_map = NULL; 00632 if (textord_tabfind_show_color_fit) { 00633 int width = pixGetWidth(scaled_color); 00634 int height = pixGetHeight(scaled_color); 00635 color_map1 = pixCreate(width, height, 32); 00636 color_map2 = pixCreate(width, height, 32); 00637 rms_map = pixCreate(width, height, 8); 00638 } 00639 // Iterate the ColPartitions in the grid. 00640 ColPartitionGridSearch gsearch(this); 00641 gsearch.StartFullSearch(); 00642 ColPartition* part; 00643 while ((part = gsearch.NextFullSearch()) != NULL) { 00644 TBOX part_box = part->bounding_box(); 00645 part_box.rotate_large(rerotation); 00646 ImageFind::ComputeRectangleColors(part_box, scaled_color, 00647 scaled_factor, 00648 color_map1, color_map2, rms_map, 00649 part->color1(), part->color2()); 00650 } 00651 if (color_map1 != NULL) { 00652 pixWrite("swcolorinput.png", scaled_color, IFF_PNG); 00653 pixWrite("swcolor1.png", color_map1, IFF_PNG); 00654 pixWrite("swcolor2.png", color_map2, IFF_PNG); 00655 pixWrite("swrms.png", rms_map, IFF_PNG); 00656 pixDestroy(&color_map1); 00657 pixDestroy(&color_map2); 00658 pixDestroy(&rms_map); 00659 } 00660 } 00661 00662 // Reflects the grid and its colpartitions in the y-axis, assuming that 00663 // all blob boxes have already been done. 00664 void ColPartitionGrid::ReflectInYAxis() { 00665 ColPartition_LIST parts; 00666 ColPartition_IT part_it(&parts); 00667 // Iterate the ColPartitions in the grid to extract them. 00668 ColPartitionGridSearch gsearch(this); 00669 gsearch.StartFullSearch(); 00670 ColPartition* part; 00671 while ((part = gsearch.NextFullSearch()) != NULL) { 00672 part_it.add_after_then_move(part); 00673 } 00674 ICOORD bot_left(-tright().x(), bleft().y()); 00675 ICOORD top_right(-bleft().x(), tright().y()); 00676 // Reinitializing the grid with reflected coords also clears all the 00677 // pointers, so parts will now own the ColPartitions. (Briefly). 00678 Init(gridsize(), bot_left, top_right); 00679 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00680 part = part_it.extract(); 00681 part->ReflectInYAxis(); 00682 InsertBBox(true, true, part); 00683 } 00684 } 00685 00686 // Transforms the grid of partitions to the output blocks, putting each 00687 // partition into a separate block. We don't really care about the order, 00688 // as we just want to get as much text as possible without trying to organize 00689 // it into proper blocks or columns. 00690 // TODO(rays) some kind of sort function would be useful and probably better 00691 // than the default here, which is to sort by order of the grid search. 00692 void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, 00693 TO_BLOCK_LIST* to_blocks) { 00694 TO_BLOCK_IT to_block_it(to_blocks); 00695 BLOCK_IT block_it(blocks); 00696 // All partitions will be put on this list and deleted on return. 00697 ColPartition_LIST parts; 00698 ColPartition_IT part_it(&parts); 00699 // Iterate the ColPartitions in the grid to extract them. 00700 ColPartitionGridSearch gsearch(this); 00701 gsearch.StartFullSearch(); 00702 ColPartition* part; 00703 while ((part = gsearch.NextFullSearch()) != NULL) { 00704 part_it.add_after_then_move(part); 00705 // The partition has to be at least vaguely like text. 00706 BlobRegionType blob_type = part->blob_type(); 00707 if (BLOBNBOX::IsTextType(blob_type) || 00708 (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { 00709 PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT 00710 : PT_FLOWING_TEXT; 00711 // Get metrics from the row that will be used for the block. 00712 TBOX box = part->bounding_box(); 00713 int median_width = part->median_width(); 00714 int median_height = part->median_size(); 00715 // Turn the partition into a TO_ROW. 00716 TO_ROW* row = part->MakeToRow(); 00717 if (row == NULL) { 00718 // This partition is dead. 00719 part->DeleteBoxes(); 00720 continue; 00721 } 00722 BLOCK* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), 00723 box.right(), box.top()); 00724 block->set_poly_block(new POLY_BLOCK(box, type)); 00725 TO_BLOCK* to_block = new TO_BLOCK(block); 00726 TO_ROW_IT row_it(to_block->get_rows()); 00727 row_it.add_after_then_move(row); 00728 // We haven't differentially rotated vertical and horizontal text at 00729 // this point, so use width or height as appropriate. 00730 if (blob_type == BRT_VERT_TEXT) { 00731 to_block->line_size = static_cast<float>(median_width); 00732 to_block->line_spacing = static_cast<float>(box.width()); 00733 to_block->max_blob_size = static_cast<float>(box.width() + 1); 00734 } else { 00735 to_block->line_size = static_cast<float>(median_height); 00736 to_block->line_spacing = static_cast<float>(box.height()); 00737 to_block->max_blob_size = static_cast<float>(box.height() + 1); 00738 } 00739 block_it.add_to_end(block); 00740 to_block_it.add_to_end(to_block); 00741 } else { 00742 // This partition is dead. 00743 part->DeleteBoxes(); 00744 } 00745 } 00746 Clear(); 00747 // Now it is safe to delete the ColPartitions as parts goes out of scope. 00748 } 00749 00750 // Rotates the grid and its colpartitions by the given angle, assuming that 00751 // all blob boxes have already been done. 00752 void ColPartitionGrid::Deskew(const FCOORD& deskew) { 00753 ColPartition_LIST parts; 00754 ColPartition_IT part_it(&parts); 00755 // Iterate the ColPartitions in the grid to extract them. 00756 ColPartitionGridSearch gsearch(this); 00757 gsearch.StartFullSearch(); 00758 ColPartition* part; 00759 while ((part = gsearch.NextFullSearch()) != NULL) { 00760 part_it.add_after_then_move(part); 00761 } 00762 // Rebuild the grid to the new size. 00763 TBOX grid_box(bleft_, tright_); 00764 grid_box.rotate_large(deskew); 00765 Init(gridsize(), grid_box.botleft(), grid_box.topright()); 00766 // Reinitializing the grid with rotated coords also clears all the 00767 // pointers, so parts will now own the ColPartitions. (Briefly). 00768 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00769 part = part_it.extract(); 00770 part->ComputeLimits(); 00771 InsertBBox(true, true, part); 00772 } 00773 } 00774 00775 // Sets the left and right tabs of the partitions in the grid. 00776 void ColPartitionGrid::SetTabStops(TabFind* tabgrid) { 00777 // Iterate the ColPartitions in the grid. 00778 ColPartitionGridSearch gsearch(this); 00779 gsearch.StartFullSearch(); 00780 ColPartition* part; 00781 while ((part = gsearch.NextFullSearch()) != NULL) { 00782 const TBOX& part_box = part->bounding_box(); 00783 TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false); 00784 // If the overlapping line is not a left tab, try for non-overlapping. 00785 if (left_line != NULL && !left_line->IsLeftTab()) 00786 left_line = tabgrid->LeftTabForBox(part_box, false, false); 00787 if (left_line != NULL && left_line->IsLeftTab()) 00788 part->SetLeftTab(left_line); 00789 TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false); 00790 if (right_line != NULL && !right_line->IsRightTab()) 00791 right_line = tabgrid->RightTabForBox(part_box, false, false); 00792 if (right_line != NULL && right_line->IsRightTab()) 00793 part->SetRightTab(right_line); 00794 part->SetColumnGoodness(tabgrid->WidthCB()); 00795 } 00796 } 00797 00798 // Makes the ColPartSets and puts them in the PartSetVector ready 00799 // for finding column bounds. Returns false if no partitions were found. 00800 bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) { 00801 ColPartition_LIST* part_lists = new ColPartition_LIST[gridheight()]; 00802 part_sets->reserve(gridheight()); 00803 // Iterate the ColPartitions in the grid to get parts onto lists for the 00804 // y bottom of each. 00805 ColPartitionGridSearch gsearch(this); 00806 gsearch.StartFullSearch(); 00807 ColPartition* part; 00808 bool any_parts_found = false; 00809 while ((part = gsearch.NextFullSearch()) != NULL) { 00810 BlobRegionType blob_type = part->blob_type(); 00811 if (blob_type != BRT_NOISE && 00812 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { 00813 int grid_x, grid_y; 00814 const TBOX& part_box = part->bounding_box(); 00815 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); 00816 ColPartition_IT part_it(&part_lists[grid_y]); 00817 part_it.add_to_end(part); 00818 any_parts_found = true; 00819 } 00820 } 00821 if (any_parts_found) { 00822 for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { 00823 ColPartitionSet* line_set = NULL; 00824 if (!part_lists[grid_y].empty()) { 00825 line_set = new ColPartitionSet(&part_lists[grid_y]); 00826 } 00827 part_sets->push_back(line_set); 00828 } 00829 } 00830 delete [] part_lists; 00831 return any_parts_found; 00832 } 00833 00834 // Makes a single ColPartitionSet consisting of a single ColPartition that 00835 // represents the total horizontal extent of the significant content on the 00836 // page. Used for the single column setting in place of automatic detection. 00837 // Returns NULL if the page is empty of significant content. 00838 ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback* cb) { 00839 ColPartition* single_column_part = NULL; 00840 // Iterate the ColPartitions in the grid to get parts onto lists for the 00841 // y bottom of each. 00842 ColPartitionGridSearch gsearch(this); 00843 gsearch.StartFullSearch(); 00844 ColPartition* part; 00845 while ((part = gsearch.NextFullSearch()) != NULL) { 00846 BlobRegionType blob_type = part->blob_type(); 00847 if (blob_type != BRT_NOISE && 00848 (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { 00849 // Consider for single column. 00850 BlobTextFlowType flow = part->flow(); 00851 if ((blob_type == BRT_TEXT && 00852 (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || 00853 flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || 00854 blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { 00855 if (single_column_part == NULL) { 00856 single_column_part = part->ShallowCopy(); 00857 single_column_part->set_blob_type(BRT_TEXT); 00858 // Copy the tabs from itself to properly setup the margins. 00859 single_column_part->CopyLeftTab(*single_column_part, false); 00860 single_column_part->CopyRightTab(*single_column_part, false); 00861 } else { 00862 if (part->left_key() < single_column_part->left_key()) 00863 single_column_part->CopyLeftTab(*part, false); 00864 if (part->right_key() > single_column_part->right_key()) 00865 single_column_part->CopyRightTab(*part, false); 00866 } 00867 } 00868 } 00869 } 00870 if (single_column_part != NULL) { 00871 // Make a ColPartitionSet out of the single_column_part as a candidate 00872 // for the single column case. 00873 single_column_part->SetColumnGoodness(cb); 00874 return new ColPartitionSet(single_column_part); 00875 } 00876 return NULL; 00877 } 00878 00879 // Mark the BLOBNBOXes in each partition as being owned by that partition. 00880 void ColPartitionGrid::ClaimBoxes() { 00881 // Iterate the ColPartitions in the grid. 00882 ColPartitionGridSearch gsearch(this); 00883 gsearch.StartFullSearch(); 00884 ColPartition* part; 00885 while ((part = gsearch.NextFullSearch()) != NULL) { 00886 part->ClaimBoxes(); 00887 } 00888 } 00889 00890 // Retypes all the blobs referenced by the partitions in the grid. 00891 // Image blobs are found and returned in the im_blobs list, as they are not 00892 // owned by the block. 00893 void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { 00894 BLOBNBOX_IT im_blob_it(im_blobs); 00895 ColPartition_LIST dead_parts; 00896 ColPartition_IT dead_part_it(&dead_parts); 00897 // Iterate the ColPartitions in the grid. 00898 ColPartitionGridSearch gsearch(this); 00899 gsearch.StartFullSearch(); 00900 ColPartition* part; 00901 while ((part = gsearch.NextFullSearch()) != NULL) { 00902 BlobRegionType blob_type = part->blob_type(); 00903 BlobTextFlowType flow = part->flow(); 00904 if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { 00905 BLOBNBOX_C_IT blob_it(part->boxes()); 00906 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00907 BLOBNBOX* blob = blob_it.data(); 00908 im_blob_it.add_after_then_move(blob); 00909 } 00910 } else if (blob_type != BRT_NOISE) { 00911 // Make sure the blobs are marked with the correct type and flow. 00912 BLOBNBOX_C_IT blob_it(part->boxes()); 00913 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00914 BLOBNBOX* blob = blob_it.data(); 00915 if (blob->region_type() == BRT_NOISE) { 00916 // TODO(rays) Deprecated. Change this section to an assert to verify 00917 // and then delete. 00918 ASSERT_HOST(blob->cblob()->area() != 0); 00919 blob->set_owner(NULL); 00920 blob_it.extract(); 00921 } else { 00922 blob->set_region_type(blob_type); 00923 if (blob->flow() != BTFT_LEADER) 00924 blob->set_flow(flow); 00925 } 00926 } 00927 } 00928 if (blob_type == BRT_NOISE || part->boxes()->empty()) { 00929 BLOBNBOX_C_IT blob_it(part->boxes()); 00930 part->DisownBoxes(); 00931 dead_part_it.add_to_end(part); 00932 gsearch.RemoveBBox(); 00933 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00934 BLOBNBOX* blob = blob_it.data(); 00935 if (blob->cblob()->area() == 0) { 00936 // Any blob with zero area is a fake image blob and should be deleted. 00937 delete blob->cblob(); 00938 delete blob; 00939 } 00940 } 00941 } 00942 } 00943 } 00944 00945 // The boxes within the partitions have changed (by deskew) so recompute 00946 // the bounds of all the partitions and reinsert them into the grid. 00947 void ColPartitionGrid::RecomputeBounds(int gridsize, 00948 const ICOORD& bleft, 00949 const ICOORD& tright, 00950 const ICOORD& vertical) { 00951 ColPartition_LIST saved_parts; 00952 ColPartition_IT part_it(&saved_parts); 00953 // Iterate the ColPartitions in the grid to get parts onto a list. 00954 ColPartitionGridSearch gsearch(this); 00955 gsearch.StartFullSearch(); 00956 ColPartition* part; 00957 while ((part = gsearch.NextFullSearch()) != NULL) { 00958 part_it.add_to_end(part); 00959 } 00960 // Reinitialize grid to the new size. 00961 Init(gridsize, bleft, tright); 00962 // Recompute the bounds of the parts and put them back in the new grid. 00963 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { 00964 part = part_it.extract(); 00965 part->set_vertical(vertical); 00966 part->ComputeLimits(); 00967 InsertBBox(true, true, part); 00968 } 00969 } 00970 00971 // Improves the margins of the ColPartitions in the grid by calling 00972 // FindPartitionMargins on each. 00973 // best_columns, which may be NULL, is an array of pointers indicating the 00974 // column set at each y-coordinate in the grid. 00975 // best_columns is usually the best_columns_ member of ColumnFinder. 00976 void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { 00977 // Iterate the ColPartitions in the grid. 00978 ColPartitionGridSearch gsearch(this); 00979 gsearch.StartFullSearch(); 00980 ColPartition* part; 00981 while ((part = gsearch.NextFullSearch()) != NULL) { 00982 // Set up a rectangle search x-bounded by the column and y by the part. 00983 ColPartitionSet* columns = best_columns != NULL 00984 ? best_columns[gsearch.GridY()] 00985 : NULL; 00986 FindPartitionMargins(columns, part); 00987 const TBOX& box = part->bounding_box(); 00988 if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { 00989 tprintf("Computed margins for part:"); 00990 part->Print(); 00991 } 00992 } 00993 } 00994 00995 // Improves the margins of the ColPartitions in the list by calling 00996 // FindPartitionMargins on each. 00997 // best_columns, which may be NULL, is an array of pointers indicating the 00998 // column set at each y-coordinate in the grid. 00999 // best_columns is usually the best_columns_ member of ColumnFinder. 01000 void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, 01001 ColPartition_LIST* parts) { 01002 ColPartition_IT part_it(parts); 01003 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { 01004 ColPartition* part = part_it.data(); 01005 ColPartitionSet* columns = NULL; 01006 if (best_columns != NULL) { 01007 TBOX part_box = part->bounding_box(); 01008 // Get the columns from the y grid coord. 01009 int grid_x, grid_y; 01010 GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); 01011 columns = best_columns[grid_y]; 01012 } 01013 FindPartitionMargins(columns, part); 01014 } 01015 } 01016 01017 // Deletes all the partitions in the grid after disowning all the blobs. 01018 void ColPartitionGrid::DeleteParts() { 01019 ColPartition_LIST dead_parts; 01020 ColPartition_IT dead_it(&dead_parts); 01021 ColPartitionGridSearch gsearch(this); 01022 gsearch.StartFullSearch(); 01023 ColPartition* part; 01024 while ((part = gsearch.NextFullSearch()) != NULL) { 01025 part->DisownBoxes(); 01026 dead_it.add_to_end(part); // Parts will be deleted on return. 01027 } 01028 Clear(); 01029 } 01030 01031 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 01032 // all the blobs in them. 01033 void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) { 01034 ColPartitionGridSearch gsearch(this); 01035 gsearch.StartFullSearch(); 01036 ColPartition* part; 01037 while ((part = gsearch.NextFullSearch()) != NULL) { 01038 if (part->blob_type() == BRT_UNKNOWN) { 01039 gsearch.RemoveBBox(); 01040 // Once marked, the blobs will be swept up by DeleteUnownedNoise. 01041 part->set_flow(BTFT_NONTEXT); 01042 part->set_blob_type(BRT_NOISE); 01043 part->SetBlobTypes(); 01044 part->DisownBoxes(); 01045 delete part; 01046 } 01047 } 01048 block->DeleteUnownedNoise(); 01049 } 01050 01051 // Finds and marks text partitions that represent figure captions. 01052 void ColPartitionGrid::FindFigureCaptions() { 01053 // For each image region find its best candidate text caption region, 01054 // if any and mark it as such. 01055 ColPartitionGridSearch gsearch(this); 01056 gsearch.StartFullSearch(); 01057 ColPartition* part; 01058 while ((part = gsearch.NextFullSearch()) != NULL) { 01059 if (part->IsImageType()) { 01060 const TBOX& part_box = part->bounding_box(); 01061 bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), 01062 part_box.bottom()); 01063 ColPartition* best_caption = NULL; 01064 int best_dist = 0; // Distance to best_caption. 01065 int best_upper = 0; // Direction of best_caption. 01066 // Handle both lower and upper directions. 01067 for (int upper = 0; upper < 2; ++upper) { 01068 ColPartition_C_IT partner_it(upper ? part->upper_partners() 01069 : part->lower_partners()); 01070 // If there are no image partners, then this direction is ok. 01071 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); 01072 partner_it.forward()) { 01073 ColPartition* partner = partner_it.data(); 01074 if (partner->IsImageType()) { 01075 break; 01076 } 01077 } 01078 if (!partner_it.cycled_list()) continue; 01079 // Find the nearest totally overlapping text partner. 01080 for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); 01081 partner_it.forward()) { 01082 ColPartition* partner = partner_it.data(); 01083 if (!partner->IsTextType()) continue; 01084 const TBOX& partner_box = partner->bounding_box(); 01085 if (debug) { 01086 tprintf("Finding figure captions for image part:"); 01087 part_box.print(); 01088 tprintf("Considering partner:"); 01089 partner_box.print(); 01090 } 01091 if (partner_box.left() >= part_box.left() && 01092 partner_box.right() <= part_box.right()) { 01093 int dist = partner_box.y_gap(part_box); 01094 if (best_caption == NULL || dist < best_dist) { 01095 best_dist = dist; 01096 best_caption = partner; 01097 best_upper = upper; 01098 } 01099 } 01100 } 01101 } 01102 if (best_caption != NULL) { 01103 if (debug) { 01104 tprintf("Best caption candidate:"); 01105 best_caption->bounding_box().print(); 01106 } 01107 // We have a candidate caption. Qualify it as being separable from 01108 // any body text. We are looking for either a small number of lines 01109 // or a big gap that indicates a separation from the body text. 01110 int line_count = 0; 01111 int biggest_gap = 0; 01112 int smallest_gap = MAX_INT16; 01113 int total_height = 0; 01114 int mean_height = 0; 01115 ColPartition* end_partner = NULL; 01116 ColPartition* next_partner = NULL; 01117 for (ColPartition* partner = best_caption; partner != NULL && 01118 line_count <= kMaxCaptionLines; 01119 partner = next_partner) { 01120 if (!partner->IsTextType()) { 01121 end_partner = partner; 01122 break; 01123 } 01124 ++line_count; 01125 total_height += partner->bounding_box().height(); 01126 next_partner = partner->SingletonPartner(best_upper); 01127 if (next_partner != NULL) { 01128 int gap = partner->bounding_box().y_gap( 01129 next_partner->bounding_box()); 01130 if (gap > biggest_gap) { 01131 biggest_gap = gap; 01132 end_partner = next_partner; 01133 mean_height = total_height / line_count; 01134 } else if (gap < smallest_gap) { 01135 smallest_gap = gap; 01136 } 01137 // If the gap looks big compared to the text size and the smallest 01138 // gap seen so far, then we can stop. 01139 if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && 01140 biggest_gap > smallest_gap * kMinCaptionGapRatio) 01141 break; 01142 } 01143 } 01144 if (debug) { 01145 tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", 01146 line_count, biggest_gap, smallest_gap, mean_height); 01147 if (end_partner != NULL) { 01148 tprintf("End partner:"); 01149 end_partner->bounding_box().print(); 01150 } 01151 } 01152 if (next_partner == NULL && line_count <= kMaxCaptionLines) 01153 end_partner = NULL; // No gap, but line count is small. 01154 if (line_count <= kMaxCaptionLines) { 01155 // This is a qualified caption. Mark the text as caption. 01156 for (ColPartition* partner = best_caption; partner != NULL && 01157 partner != end_partner; 01158 partner = next_partner) { 01159 partner->set_type(PT_CAPTION_TEXT); 01160 partner->SetBlobTypes(); 01161 if (debug) { 01162 tprintf("Set caption type for partition:"); 01163 partner->bounding_box().print(); 01164 } 01165 next_partner = partner->SingletonPartner(best_upper); 01166 } 01167 } 01168 } 01169 } 01170 } 01171 } 01172 01175 01176 // For every ColPartition in the grid, finds its upper and lower neighbours. 01177 void ColPartitionGrid::FindPartitionPartners() { 01178 ColPartitionGridSearch gsearch(this); 01179 gsearch.StartFullSearch(); 01180 ColPartition* part; 01181 while ((part = gsearch.NextFullSearch()) != NULL) { 01182 if (part->IsVerticalType()) { 01183 FindVPartitionPartners(true, part); 01184 FindVPartitionPartners(false, part); 01185 } else { 01186 FindPartitionPartners(true, part); 01187 FindPartitionPartners(false, part); 01188 } 01189 } 01190 } 01191 01192 // Finds the best partner in the given direction for the given partition. 01193 // Stores the result with AddPartner. 01194 void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { 01195 if (part->type() == PT_NOISE) 01196 return; // Noise is not allowed to partner anything. 01197 const TBOX& box = part->bounding_box(); 01198 int top = part->median_top(); 01199 int bottom = part->median_bottom(); 01200 int height = top - bottom; 01201 int mid_y = (bottom + top) / 2; 01202 ColPartitionGridSearch vsearch(this); 01203 // Search down for neighbour below 01204 vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); 01205 ColPartition* neighbour; 01206 ColPartition* best_neighbour = NULL; 01207 int best_dist = MAX_INT32; 01208 while ((neighbour = vsearch.NextVerticalSearch(!upper)) != NULL) { 01209 if (neighbour == part || neighbour->type() == PT_NOISE) 01210 continue; // Noise is not allowed to partner anything. 01211 int neighbour_bottom = neighbour->median_bottom(); 01212 int neighbour_top = neighbour->median_top(); 01213 int neighbour_y = (neighbour_bottom + neighbour_top) / 2; 01214 if (upper != (neighbour_y > mid_y)) 01215 continue; 01216 if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) 01217 continue; 01218 if (!part->TypesMatch(*neighbour)) { 01219 if (best_neighbour == NULL) 01220 best_neighbour = neighbour; 01221 continue; 01222 } 01223 int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; 01224 if (dist <= kMaxPartitionSpacing * height) { 01225 if (dist < best_dist) { 01226 best_dist = dist; 01227 best_neighbour = neighbour; 01228 } 01229 } else { 01230 break; 01231 } 01232 } 01233 if (best_neighbour != NULL) 01234 part->AddPartner(upper, best_neighbour); 01235 } 01236 01237 // Finds the best partner in the given direction for the given partition. 01238 // Stores the result with AddPartner. 01239 void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, 01240 ColPartition* part) { 01241 if (part->type() == PT_NOISE) 01242 return; // Noise is not allowed to partner anything. 01243 const TBOX& box = part->bounding_box(); 01244 int left = part->median_left(); 01245 int right = part->median_right(); 01246 int width = right - left; 01247 int mid_x = (left + right) / 2; 01248 ColPartitionGridSearch hsearch(this); 01249 // Search left for neighbour to_the_left 01250 hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); 01251 ColPartition* neighbour; 01252 ColPartition* best_neighbour = NULL; 01253 int best_dist = MAX_INT32; 01254 while ((neighbour = hsearch.NextSideSearch(to_the_left)) != NULL) { 01255 if (neighbour == part || neighbour->type() == PT_NOISE) 01256 continue; // Noise is not allowed to partner anything. 01257 int neighbour_left = neighbour->median_left(); 01258 int neighbour_right = neighbour->median_right(); 01259 int neighbour_x = (neighbour_left + neighbour_right) / 2; 01260 if (to_the_left != (neighbour_x < mid_x)) 01261 continue; 01262 if (!part->VOverlaps(*neighbour)) 01263 continue; 01264 if (!part->TypesMatch(*neighbour)) 01265 continue; // Only match to other vertical text. 01266 int dist = to_the_left ? left - neighbour_right : neighbour_left - right; 01267 if (dist <= kMaxPartitionSpacing * width) { 01268 if (dist < best_dist || best_neighbour == NULL) { 01269 best_dist = dist; 01270 best_neighbour = neighbour; 01271 } 01272 } else { 01273 break; 01274 } 01275 } 01276 // For vertical partitions, the upper partner is to the left, and lower is 01277 // to the right. 01278 if (best_neighbour != NULL) 01279 part->AddPartner(to_the_left, best_neighbour); 01280 } 01281 01282 // For every ColPartition with multiple partners in the grid, reduces the 01283 // number of partners to 0 or 1. If get_desperate is true, goes to more 01284 // desperate merge methods to merge flowing text before breaking partnerships. 01285 void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { 01286 ColPartitionGridSearch gsearch(this); 01287 // Refine in type order so that chasing multiple partners can be done 01288 // before eliminating type mis-matching partners. 01289 for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { 01290 // Iterate the ColPartitions in the grid. 01291 gsearch.StartFullSearch(); 01292 ColPartition* part; 01293 while ((part = gsearch.NextFullSearch()) != NULL) { 01294 part->RefinePartners(static_cast<PolyBlockType>(type), 01295 get_desperate, this); 01296 // Iterator may have been messed up by a merge. 01297 gsearch.RepositionIterator(); 01298 } 01299 } 01300 } 01301 01302 01303 // ========================== PRIVATE CODE ======================== 01304 01305 // Finds and returns a list of candidate ColPartitions to merge with part. 01306 // The candidates must overlap search_box, and when merged must not 01307 // overlap any other partitions that are not overlapped by each individually. 01308 void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, 01309 const TBOX& search_box, bool debug, 01310 ColPartition_CLIST* candidates) { 01311 int ok_overlap = 01312 static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); 01313 const TBOX& part_box = part->bounding_box(); 01314 // Now run the rect search. 01315 ColPartitionGridSearch rsearch(this); 01316 rsearch.SetUniqueMode(true); 01317 rsearch.StartRectSearch(search_box); 01318 ColPartition* candidate; 01319 while ((candidate = rsearch.NextRectSearch()) != NULL) { 01320 if (!OKMergeCandidate(part, candidate, debug)) 01321 continue; 01322 const TBOX& c_box = candidate->bounding_box(); 01323 // Candidate seems to be a potential merge with part. If one contains 01324 // the other, then the merge is a no-brainer. Otherwise, search the 01325 // combined box to see if anything else is inappropriately overlapped. 01326 if (!part_box.contains(c_box) && !c_box.contains(part_box)) { 01327 // Search the combined rectangle to see if anything new is overlapped. 01328 // This is a preliminary test designed to quickly weed-out stupid 01329 // merge candidates that would create a big list of overlapped objects 01330 // for the squared-order overlap analysis. Eg. vertical and horizontal 01331 // line-like objects that overlap real text when merged: 01332 // || ========================== 01333 // || 01334 // || r e a l t e x t 01335 // || 01336 // || 01337 TBOX merged_box(part_box); 01338 merged_box += c_box; 01339 ColPartitionGridSearch msearch(this); 01340 msearch.SetUniqueMode(true); 01341 msearch.StartRectSearch(merged_box); 01342 ColPartition* neighbour; 01343 while ((neighbour = msearch.NextRectSearch()) != NULL) { 01344 if (neighbour == part || neighbour == candidate) 01345 continue; // Ignore itself. 01346 if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false)) 01347 continue; // This kind of merge overlap is OK. 01348 TBOX n_box = neighbour->bounding_box(); 01349 // The overlap is OK if: 01350 // * the n_box already overlapped the part or the candidate OR 01351 // * the n_box is a suitable merge with either part or candidate 01352 if (!n_box.overlap(part_box) && !n_box.overlap(c_box) && 01353 !OKMergeCandidate(part, neighbour, false) && 01354 !OKMergeCandidate(candidate, neighbour, false)) 01355 break; 01356 } 01357 if (neighbour != NULL) { 01358 if (debug) { 01359 tprintf("Combined box overlaps another that is not OK despite" 01360 " allowance of %d:", ok_overlap); 01361 neighbour->bounding_box().print(); 01362 tprintf("Reason:"); 01363 OKMergeCandidate(part, neighbour, true); 01364 tprintf("...and:"); 01365 OKMergeCandidate(candidate, neighbour, true); 01366 tprintf("Overlap:"); 01367 neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true); 01368 } 01369 continue; 01370 } 01371 } 01372 if (debug) { 01373 tprintf("Adding candidate:"); 01374 candidate->bounding_box().print(); 01375 } 01376 // Unique elements as they arrive. 01377 candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate); 01378 } 01379 } 01380 01381 // Smoothes the region type/flow type of the given part by looking at local 01382 // neigbours and the given image mask. Searches a padded rectangle with the 01383 // padding truncated on one size of the part's box in turn for each side, 01384 // using the result (if any) that has the least distance to all neighbours 01385 // that contribute to the decision. This biases in favor of rectangular 01386 // regions without completely enforcing them. 01387 // If a good decision cannot be reached, the part is left unchanged. 01388 // im_box and rerotation are used to map blob coordinates onto the 01389 // nontext_map, which is used to prevent the spread of text neighbourhoods 01390 // into images. 01391 // Returns true if the partition was changed. 01392 bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, 01393 const TBOX& im_box, 01394 const FCOORD& rerotation, 01395 bool debug, 01396 ColPartition* part) { 01397 const TBOX& part_box = part->bounding_box(); 01398 if (debug) { 01399 tprintf("Smooothing part at:"); 01400 part_box.print(); 01401 } 01402 BlobRegionType best_type = BRT_UNKNOWN; 01403 int best_dist = MAX_INT32; 01404 int max_dist = MIN(part_box.width(), part_box.height()); 01405 max_dist = MAX(max_dist * kMaxNeighbourDistFactor, gridsize() * 2); 01406 // Search with the pad truncated on each side of the box in turn. 01407 bool any_image = false; 01408 bool all_image = true; 01409 for (int d = 0; d < BND_COUNT; ++d) { 01410 int dist; 01411 BlobNeighbourDir dir = static_cast<BlobNeighbourDir>(d); 01412 BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, 01413 rerotation, debug, *part, 01414 &dist); 01415 if (debug) { 01416 tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); 01417 } 01418 if (type != BRT_UNKNOWN && dist < best_dist) { 01419 best_dist = dist; 01420 best_type = type; 01421 } 01422 if (type == BRT_POLYIMAGE) 01423 any_image = true; 01424 else 01425 all_image = false; 01426 } 01427 if (best_dist > max_dist) 01428 return false; // Too far away to set the type with it. 01429 if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { 01430 return false; // We are not modifying it. 01431 } 01432 BlobRegionType new_type = part->blob_type(); 01433 BlobTextFlowType new_flow = part->flow(); 01434 if (best_type == BRT_TEXT && !any_image) { 01435 new_flow = BTFT_STRONG_CHAIN; 01436 new_type = BRT_TEXT; 01437 } else if (best_type == BRT_VERT_TEXT && !any_image) { 01438 new_flow = BTFT_STRONG_CHAIN; 01439 new_type = BRT_VERT_TEXT; 01440 } else if (best_type == BRT_POLYIMAGE) { 01441 new_flow = BTFT_NONTEXT; 01442 new_type = BRT_UNKNOWN; 01443 } 01444 if (new_type != part->blob_type() || new_flow != part->flow()) { 01445 part->set_flow(new_flow); 01446 part->set_blob_type(new_type); 01447 part->SetBlobTypes(); 01448 if (debug) { 01449 tprintf("Modified part:"); 01450 part->Print(); 01451 } 01452 return true; 01453 } else { 01454 return false; 01455 } 01456 } 01457 01458 // Sets up a search box based on the part_box, padded in all directions 01459 // except direction. Also setup dist_scaling to weight x,y distances according 01460 // to the given direction. 01461 static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, 01462 const TBOX& part_box, 01463 int min_padding, 01464 TBOX* search_box, 01465 ICOORD* dist_scaling) { 01466 *search_box = part_box; 01467 // Generate a pad value based on the min dimension of part_box, but at least 01468 // min_padding and then scaled by kMaxPadFactor. 01469 int padding = MIN(part_box.height(), part_box.width()); 01470 padding = MAX(padding, min_padding); 01471 padding *= kMaxPadFactor; 01472 search_box->pad(padding, padding); 01473 // Truncate the box in the appropriate direction and make the distance 01474 // metric slightly biased in the truncated direction. 01475 switch (direction) { 01476 case BND_LEFT: 01477 search_box->set_left(part_box.left()); 01478 *dist_scaling = ICOORD(2, 1); 01479 break; 01480 case BND_BELOW: 01481 search_box->set_bottom(part_box.bottom()); 01482 *dist_scaling = ICOORD(1, 2); 01483 break; 01484 case BND_RIGHT: 01485 search_box->set_right(part_box.right()); 01486 *dist_scaling = ICOORD(2, 1); 01487 break; 01488 case BND_ABOVE: 01489 search_box->set_top(part_box.top()); 01490 *dist_scaling = ICOORD(1, 2); 01491 break; 01492 default: 01493 ASSERT_HOST(false); 01494 } 01495 } 01496 01497 // Local enum used by SmoothInOneDirection and AccumulatePartDistances 01498 // for the different types of partition neighbour. 01499 enum NeighbourPartitionType { 01500 NPT_HTEXT, // Definite horizontal text. 01501 NPT_VTEXT, // Definite vertical text. 01502 NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but 01503 // image for image and VTEXT. 01504 NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but 01505 // image for image and HTEXT. 01506 NPT_IMAGE, // Defininte non-text. 01507 NPT_COUNT // Number of array elements. 01508 }; 01509 01510 // Executes the search for SmoothRegionType in a single direction. 01511 // Creates a bounding box that is padded in all directions except direction, 01512 // and searches it for other partitions. Finds the nearest collection of 01513 // partitions that makes a decisive result (if any) and returns the type 01514 // and the distance of the collection. If there are any pixels in the 01515 // nontext_map, then the decision is biased towards image. 01516 BlobRegionType ColPartitionGrid::SmoothInOneDirection( 01517 BlobNeighbourDir direction, Pix* nontext_map, 01518 const TBOX& im_box, const FCOORD& rerotation, 01519 bool debug, const ColPartition& part, int* best_distance) { 01520 // Set up a rectangle search bounded by the part. 01521 TBOX part_box = part.bounding_box(); 01522 TBOX search_box; 01523 ICOORD dist_scaling; 01524 ComputeSearchBoxAndScaling(direction, part_box, gridsize(), 01525 &search_box, &dist_scaling); 01526 bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box, 01527 rerotation, 01528 nontext_map) > 0; 01529 GenericVector<int> dists[NPT_COUNT]; 01530 AccumulatePartDistances(part, dist_scaling, search_box, 01531 nontext_map, im_box, rerotation, debug, dists); 01532 // By iteratively including the next smallest distance across the vectors, 01533 // (as in a merge sort) we can use the vector indices as counts of each type 01534 // and find the nearest set of objects that give us a definite decision. 01535 int counts[NPT_COUNT]; 01536 memset(counts, 0, sizeof(counts[0]) * NPT_COUNT); 01537 // If there is image in the search box, tip the balance in image's favor. 01538 int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0; 01539 BlobRegionType text_dir = part.blob_type(); 01540 BlobTextFlowType flow_type = part.flow(); 01541 int min_dist = 0; 01542 do { 01543 // Find the minimum new entry across the vectors 01544 min_dist = MAX_INT32; 01545 for (int i = 0; i < NPT_COUNT; ++i) { 01546 if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist) 01547 min_dist = dists[i][counts[i]]; 01548 } 01549 // Step all the indices/counts forward to include min_dist. 01550 for (int i = 0; i < NPT_COUNT; ++i) { 01551 while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist) 01552 ++counts[i]; 01553 } 01554 *best_distance = min_dist; 01555 if (debug) { 01556 tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", 01557 counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], 01558 counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], 01559 counts[NPT_IMAGE], image_bias, min_dist); 01560 } 01561 // See if we have a decision yet. 01562 int image_count = counts[NPT_IMAGE]; 01563 int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - 01564 (image_count + counts[NPT_WEAK_VTEXT]); 01565 int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - 01566 (image_count + counts[NPT_WEAK_HTEXT]); 01567 if (image_count > 0 && 01568 image_bias - htext_score >= kSmoothDecisionMargin && 01569 image_bias - vtext_score >= kSmoothDecisionMargin) { 01570 *best_distance = dists[NPT_IMAGE][0]; 01571 if (dists[NPT_WEAK_VTEXT].size() > 0 && 01572 *best_distance > dists[NPT_WEAK_VTEXT][0]) 01573 *best_distance = dists[NPT_WEAK_VTEXT][0]; 01574 if (dists[NPT_WEAK_HTEXT].size() > 0 && 01575 *best_distance > dists[NPT_WEAK_HTEXT][0]) 01576 *best_distance = dists[NPT_WEAK_HTEXT][0]; 01577 return BRT_POLYIMAGE; 01578 } 01579 if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && 01580 counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { 01581 *best_distance = dists[NPT_HTEXT][0]; 01582 return BRT_TEXT; 01583 } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && 01584 counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { 01585 *best_distance = dists[NPT_VTEXT][0]; 01586 return BRT_VERT_TEXT; 01587 } 01588 } while (min_dist < MAX_INT32); 01589 return BRT_UNKNOWN; 01590 } 01591 01592 // Counts the partitions in the given search_box by appending the gap 01593 // distance (scaled by dist_scaling) of the part from the base_part to the 01594 // vector of the appropriate type for the partition. Prior to return, the 01595 // vectors in the dists array are sorted in increasing order. 01596 // The nontext_map (+im_box, rerotation) is used to make text invisible if 01597 // there is non-text in between. 01598 // dists must be an array of GenericVectors of size NPT_COUNT. 01599 void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, 01600 const ICOORD& dist_scaling, 01601 const TBOX& search_box, 01602 Pix* nontext_map, 01603 const TBOX& im_box, 01604 const FCOORD& rerotation, 01605 bool debug, 01606 GenericVector<int>* dists) { 01607 const TBOX& part_box = base_part.bounding_box(); 01608 ColPartitionGridSearch rsearch(this); 01609 rsearch.SetUniqueMode(true); 01610 rsearch.StartRectSearch(search_box); 01611 ColPartition* neighbour; 01612 // Search for compatible neighbours with a similar strokewidth, but not 01613 // on the other side of a tab vector. 01614 while ((neighbour = rsearch.NextRectSearch()) != NULL) { 01615 if (neighbour->IsUnMergeableType() || 01616 !base_part.ConfirmNoTabViolation(*neighbour) || 01617 neighbour == &base_part) 01618 continue; 01619 TBOX nbox = neighbour->bounding_box(); 01620 BlobRegionType n_type = neighbour->blob_type(); 01621 if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && 01622 !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, 01623 nontext_map)) 01624 continue; // Text not visible the other side of image. 01625 if (BLOBNBOX::IsLineType(n_type)) 01626 continue; // Don't use horizontal lines as neighbours. 01627 int x_gap = MAX(part_box.x_gap(nbox), 0); 01628 int y_gap = MAX(part_box.y_gap(nbox), 0); 01629 int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y(); 01630 if (debug) { 01631 tprintf("Part has x-gap=%d, y=%d, dist=%d at:", 01632 x_gap, y_gap, n_dist); 01633 nbox.print(); 01634 } 01635 // Truncate the number of boxes, so text doesn't get too much advantage. 01636 int n_boxes = MIN(neighbour->boxes_count(), kSmoothDecisionMargin); 01637 BlobTextFlowType n_flow = neighbour->flow(); 01638 GenericVector<int>* count_vector = NULL; 01639 if (n_flow == BTFT_STRONG_CHAIN) { 01640 if (n_type == BRT_TEXT) 01641 count_vector = &dists[NPT_HTEXT]; 01642 else 01643 count_vector = &dists[NPT_VTEXT]; 01644 if (debug) { 01645 tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes); 01646 } 01647 } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && 01648 (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) { 01649 // Medium text counts as weak, and all else counts as image. 01650 if (n_type == BRT_TEXT) 01651 count_vector = &dists[NPT_WEAK_HTEXT]; 01652 else 01653 count_vector = &dists[NPT_WEAK_VTEXT]; 01654 if (debug) tprintf("Weak %d\n", n_boxes); 01655 } else { 01656 count_vector = &dists[NPT_IMAGE]; 01657 if (debug) tprintf("Image %d\n", n_boxes); 01658 } 01659 if (count_vector != NULL) { 01660 for (int i = 0; i < n_boxes; ++i) 01661 count_vector->push_back(n_dist); 01662 } 01663 if (debug) { 01664 neighbour->Print(); 01665 } 01666 } 01667 for (int i = 0; i < NPT_COUNT; ++i) 01668 dists[i].sort(); 01669 } 01670 01671 // Improves the margins of the part ColPartition by searching for 01672 // neighbours that vertically overlap significantly. 01673 // columns may be NULL, and indicates the assigned column structure this 01674 // is applicable to part. 01675 void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, 01676 ColPartition* part) { 01677 // Set up a rectangle search x-bounded by the column and y by the part. 01678 TBOX box = part->bounding_box(); 01679 int y = part->MidY(); 01680 // Initial left margin is based on the column, if there is one. 01681 int left_margin = bleft().x(); 01682 int right_margin = tright().x(); 01683 if (columns != NULL) { 01684 ColPartition* column = columns->ColumnContaining(box.left(), y); 01685 if (column != NULL) 01686 left_margin = column->LeftAtY(y); 01687 column = columns->ColumnContaining(box.right(), y); 01688 if (column != NULL) 01689 right_margin = column->RightAtY(y); 01690 } 01691 left_margin -= kColumnWidthFactor; 01692 right_margin += kColumnWidthFactor; 01693 // Search for ColPartitions that reduce the margin. 01694 left_margin = FindMargin(box.left() + box.height(), true, left_margin, 01695 box.bottom(), box.top(), part); 01696 part->set_left_margin(left_margin); 01697 // Search for ColPartitions that reduce the margin. 01698 right_margin = FindMargin(box.right() - box.height(), false, right_margin, 01699 box.bottom(), box.top(), part); 01700 part->set_right_margin(right_margin); 01701 } 01702 01703 // Starting at x, and going in the specified direction, upto x_limit, finds 01704 // the margin for the given y range by searching sideways, 01705 // and ignoring not_this. 01706 int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, 01707 int y_bottom, int y_top, 01708 const ColPartition* not_this) { 01709 int height = y_top - y_bottom; 01710 // Iterate the ColPartitions in the grid. 01711 ColPartitionGridSearch side_search(this); 01712 side_search.SetUniqueMode(true); 01713 side_search.StartSideSearch(x, y_bottom, y_top); 01714 ColPartition* part; 01715 while ((part = side_search.NextSideSearch(right_to_left)) != NULL) { 01716 // Ignore itself. 01717 if (part == not_this) // || part->IsLineType()) 01718 continue; 01719 // Must overlap by enough, based on the min of the heights, so 01720 // large partitions can't smash through small ones. 01721 TBOX box = part->bounding_box(); 01722 int min_overlap = MIN(height, box.height()); 01723 min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5); 01724 int y_overlap = MIN(y_top, box.top()) - MAX(y_bottom, box.bottom()); 01725 if (y_overlap < min_overlap) 01726 continue; 01727 // Must be going the right way. 01728 int x_edge = right_to_left ? box.right() : box.left(); 01729 if ((x_edge < x) != right_to_left) 01730 continue; 01731 // If we have gone past x_limit, then x_limit will do. 01732 if ((x_edge < x_limit) == right_to_left) 01733 break; 01734 // It reduces x limit, so save the new one. 01735 x_limit = x_edge; 01736 } 01737 return x_limit; 01738 } 01739 01740 01741 } // namespace tesseract.