tesseract
3.03
|
00001 /********************************************************************** 00002 * File: blobbox.cpp (Formerly blobnbox.c) 00003 * Description: Code for the textord blob class. 00004 * Author: Ray Smith 00005 * Created: Thu Jul 30 09:08:51 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf. 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #include "blobbox.h" 00026 #include "allheaders.h" 00027 #include "blobs.h" 00028 #include "helpers.h" 00029 #include "normalis.h" 00030 00031 #define PROJECTION_MARGIN 10 //arbitrary 00032 #define EXTERN 00033 00034 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK) 00035 00036 // Upto 30 degrees is allowed for rotations of diacritic blobs. 00037 const double kCosSmallAngle = 0.866; 00038 // Min aspect ratio for a joined word to indicate an obvious flow direction. 00039 const double kDefiniteAspectRatio = 2.0; 00040 // Multiple of short length in perimeter to make a joined word. 00041 const double kComplexShapePerimeterRatio = 1.5; 00042 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs. 00043 const double kMinMediumSizeRatio = 0.25; 00044 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs. 00045 const double kMaxMediumSizeRatio = 4.0; 00046 00047 // Rotates the box and the underlying blob. 00048 void BLOBNBOX::rotate(FCOORD rotation) { 00049 cblob_ptr->rotate(rotation); 00050 rotate_box(rotation); 00051 compute_bounding_box(); 00052 } 00053 00054 // Reflect the box in the y-axis, leaving the underlying blob untouched. 00055 void BLOBNBOX::reflect_box_in_y_axis() { 00056 int left = -box.right(); 00057 box.set_right(-box.left()); 00058 box.set_left(left); 00059 } 00060 00061 // Rotates the box by the angle given by rotation. 00062 // If the blob is a diacritic, then only small rotations for skew 00063 // correction can be applied. 00064 void BLOBNBOX::rotate_box(FCOORD rotation) { 00065 if (IsDiacritic()) { 00066 ASSERT_HOST(rotation.x() >= kCosSmallAngle) 00067 ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_); 00068 ICOORD bottom_pt(top_pt.x(), base_char_bottom_); 00069 top_pt.rotate(rotation); 00070 base_char_top_ = top_pt.y(); 00071 bottom_pt.rotate(rotation); 00072 base_char_bottom_ = bottom_pt.y(); 00073 box.rotate(rotation); 00074 } else { 00075 box.rotate(rotation); 00076 set_diacritic_box(box); 00077 } 00078 } 00079 00080 /********************************************************************** 00081 * BLOBNBOX::merge 00082 * 00083 * Merge this blob with the given blob, which should be after this. 00084 **********************************************************************/ 00085 void BLOBNBOX::merge( //merge blobs 00086 BLOBNBOX *nextblob //blob to join with 00087 ) { 00088 box += nextblob->box; //merge boxes 00089 set_diacritic_box(box); 00090 nextblob->joined = TRUE; 00091 } 00092 00093 00094 // Merge this with other, taking the outlines from other. 00095 // Other is not deleted, but left for the caller to handle. 00096 void BLOBNBOX::really_merge(BLOBNBOX* other) { 00097 if (cblob_ptr != NULL && other->cblob_ptr != NULL) { 00098 C_OUTLINE_IT ol_it(cblob_ptr->out_list()); 00099 ol_it.add_list_after(other->cblob_ptr->out_list()); 00100 } 00101 compute_bounding_box(); 00102 } 00103 00104 00105 /********************************************************************** 00106 * BLOBNBOX::chop 00107 * 00108 * Chop this blob into equal sized pieces using the x height as a guide. 00109 * The blob is not actually chopped. Instead, fake blobs are inserted 00110 * with the relevant bounding boxes. 00111 **********************************************************************/ 00112 00113 void BLOBNBOX::chop( //chop blobs 00114 BLOBNBOX_IT *start_it, //location of this 00115 BLOBNBOX_IT *end_it, //iterator 00116 FCOORD rotation, //for landscape 00117 float xheight //of line 00118 ) { 00119 inT16 blobcount; //no of blobs 00120 BLOBNBOX *newblob; //fake blob 00121 BLOBNBOX *blob; //current blob 00122 inT16 blobindex; //number of chop 00123 inT16 leftx; //left edge of blob 00124 float blobwidth; //width of each 00125 float rightx; //right edge to scan 00126 float ymin, ymax; //limits of new blob 00127 float test_ymin, test_ymax; //limits of part blob 00128 ICOORD bl, tr; //corners of box 00129 BLOBNBOX_IT blob_it; //blob iterator 00130 00131 //get no of chops 00132 blobcount = (inT16) floor (box.width () / xheight); 00133 if (blobcount > 1 && cblob_ptr != NULL) { 00134 //width of each 00135 blobwidth = (float) (box.width () + 1) / blobcount; 00136 for (blobindex = blobcount - 1, rightx = box.right (); 00137 blobindex >= 0; blobindex--, rightx -= blobwidth) { 00138 ymin = (float) MAX_INT32; 00139 ymax = (float) -MAX_INT32; 00140 blob_it = *start_it; 00141 do { 00142 blob = blob_it.data (); 00143 find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth, 00144 rightx, 00145 /*rotation, */ test_ymin, test_ymax); 00146 blob_it.forward (); 00147 UpdateRange(test_ymin, test_ymax, &ymin, &ymax); 00148 } 00149 while (blob != end_it->data ()); 00150 if (ymin < ymax) { 00151 leftx = (inT16) floor (rightx - blobwidth); 00152 if (leftx < box.left ()) 00153 leftx = box.left (); //clip to real box 00154 bl = ICOORD (leftx, (inT16) floor (ymin)); 00155 tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax)); 00156 if (blobindex == 0) 00157 box = TBOX (bl, tr); //change box 00158 else { 00159 newblob = new BLOBNBOX; 00160 //box is all it has 00161 newblob->box = TBOX (bl, tr); 00162 //stay on current 00163 newblob->base_char_top_ = tr.y(); 00164 newblob->base_char_bottom_ = bl.y(); 00165 end_it->add_after_stay_put (newblob); 00166 } 00167 } 00168 } 00169 } 00170 } 00171 00172 // Returns the box gaps between this and its neighbours_ in an array 00173 // indexed by BlobNeighbourDir. 00174 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const { 00175 for (int dir = 0; dir < BND_COUNT; ++dir) { 00176 gaps[dir] = MAX_INT16; 00177 BLOBNBOX* neighbour = neighbours_[dir]; 00178 if (neighbour != NULL) { 00179 TBOX n_box = neighbour->bounding_box(); 00180 if (dir == BND_LEFT || dir == BND_RIGHT) { 00181 gaps[dir] = box.x_gap(n_box); 00182 } else { 00183 gaps[dir] = box.y_gap(n_box); 00184 } 00185 } 00186 } 00187 } 00188 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps) 00189 // modified so that if the max exceeds the max dimension of the blob, and 00190 // the min is less, the max is replaced with the min. 00191 // The objective is to catch cases where there is only a single neighbour 00192 // and avoid reporting the other gap as a ridiculously large number 00193 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max, 00194 int* v_min, int* v_max) const { 00195 int max_dimension = MAX(box.width(), box.height()); 00196 int gaps[BND_COUNT]; 00197 NeighbourGaps(gaps); 00198 *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]); 00199 *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]); 00200 if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min; 00201 *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]); 00202 *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]); 00203 if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min; 00204 } 00205 00206 // NULLs out any neighbours that are DeletableNoise to remove references. 00207 void BLOBNBOX::CleanNeighbours() { 00208 for (int dir = 0; dir < BND_COUNT; ++dir) { 00209 BLOBNBOX* neighbour = neighbours_[dir]; 00210 if (neighbour != NULL && neighbour->DeletableNoise()) { 00211 neighbours_[dir] = NULL; 00212 good_stroke_neighbours_[dir] = false; 00213 } 00214 } 00215 } 00216 00217 // Returns positive if there is at least one side neighbour that has a similar 00218 // stroke width and is not on the other side of a rule line. 00219 int BLOBNBOX::GoodTextBlob() const { 00220 int score = 0; 00221 for (int dir = 0; dir < BND_COUNT; ++dir) { 00222 BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir); 00223 if (good_stroke_neighbour(bnd)) 00224 ++score; 00225 } 00226 return score; 00227 } 00228 00229 // Returns the number of side neighbours that are of type BRT_NOISE. 00230 int BLOBNBOX::NoisyNeighbours() const { 00231 int count = 0; 00232 for (int dir = 0; dir < BND_COUNT; ++dir) { 00233 BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir); 00234 BLOBNBOX* blob = neighbour(bnd); 00235 if (blob != NULL && blob->region_type() == BRT_NOISE) 00236 ++count; 00237 } 00238 return count; 00239 } 00240 00241 // Returns true, and sets vert_possible/horz_possible if the blob has some 00242 // feature that makes it individually appear to flow one way. 00243 // eg if it has a high aspect ratio, yet has a complex shape, such as a 00244 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc. 00245 bool BLOBNBOX::DefiniteIndividualFlow() { 00246 if (cblob() == NULL) return false; 00247 int box_perimeter = 2 * (box.height() + box.width()); 00248 if (box.width() > box.height() * kDefiniteAspectRatio) { 00249 // Attempt to distinguish a wide joined word from a dash. 00250 // If it is a dash, then its perimeter is approximately 00251 // 2 * (box width + stroke width), but more if the outline is noisy, 00252 // so perimeter - 2*(box width + stroke width) should be close to zero. 00253 // A complex shape such as a joined word should have a much larger value. 00254 int perimeter = cblob()->perimeter(); 00255 if (vert_stroke_width() > 0 || perimeter <= 0) 00256 perimeter -= 2 * vert_stroke_width(); 00257 else 00258 perimeter -= 4 * cblob()->area() / perimeter; 00259 perimeter -= 2 * box.width(); 00260 // Use a multiple of the box perimeter as a threshold. 00261 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { 00262 set_vert_possible(false); 00263 set_horz_possible(true); 00264 return true; 00265 } 00266 } 00267 if (box.height() > box.width() * kDefiniteAspectRatio) { 00268 // As above, but for a putative vertical word vs a I/1/l. 00269 int perimeter = cblob()->perimeter(); 00270 if (horz_stroke_width() > 0 || perimeter <= 0) 00271 perimeter -= 2 * horz_stroke_width(); 00272 else 00273 perimeter -= 4 * cblob()->area() / perimeter; 00274 perimeter -= 2 * box.height(); 00275 if (perimeter > kComplexShapePerimeterRatio * box_perimeter) { 00276 set_vert_possible(true); 00277 set_horz_possible(false); 00278 return true; 00279 } 00280 } 00281 return false; 00282 } 00283 00284 // Returns true if there is no tabstop violation in merging this and other. 00285 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const { 00286 if (box.left() < other.box.left() && box.left() < other.left_rule_) 00287 return false; 00288 if (other.box.left() < box.left() && other.box.left() < left_rule_) 00289 return false; 00290 if (box.right() > other.box.right() && box.right() > other.right_rule_) 00291 return false; 00292 if (other.box.right() > box.right() && other.box.right() > right_rule_) 00293 return false; 00294 return true; 00295 } 00296 00297 // Returns true if other has a similar stroke width to this. 00298 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other, 00299 double fractional_tolerance, 00300 double constant_tolerance) const { 00301 // The perimeter-based width is used as a backup in case there is 00302 // no information in the blob. 00303 double p_width = area_stroke_width(); 00304 double n_p_width = other.area_stroke_width(); 00305 float h_tolerance = horz_stroke_width_ * fractional_tolerance 00306 + constant_tolerance; 00307 float v_tolerance = vert_stroke_width_ * fractional_tolerance 00308 + constant_tolerance; 00309 double p_tolerance = p_width * fractional_tolerance 00310 + constant_tolerance; 00311 bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f; 00312 bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f; 00313 bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_, 00314 other.horz_stroke_width_, h_tolerance); 00315 bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_, 00316 other.vert_stroke_width_, v_tolerance); 00317 bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance); 00318 // For a match, at least one of the horizontal and vertical widths 00319 // must match, and the other one must either match or be zero. 00320 // Only if both are zero will we look at the perimeter metric. 00321 return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero)); 00322 } 00323 00324 // Returns a bounding box of the outline contained within the 00325 // given horizontal range. 00326 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) { 00327 FCOORD no_rotation(1.0f, 0.0f); 00328 float top = box.top(); 00329 float bottom = box.bottom(); 00330 if (cblob_ptr != NULL) { 00331 find_cblob_limits(cblob_ptr, static_cast<float>(left), 00332 static_cast<float>(right), no_rotation, 00333 bottom, top); 00334 } 00335 00336 if (top < bottom) { 00337 top = box.top(); 00338 bottom = box.bottom(); 00339 } 00340 FCOORD bot_left(left, bottom); 00341 FCOORD top_right(right, top); 00342 TBOX shrunken_box(bot_left); 00343 TBOX shrunken_box2(top_right); 00344 shrunken_box += shrunken_box2; 00345 return shrunken_box; 00346 } 00347 00348 // Estimates and stores the baseline position based on the shape of the 00349 // outline. 00350 void BLOBNBOX::EstimateBaselinePosition() { 00351 baseline_y_ = box.bottom(); // The default. 00352 if (cblob_ptr == NULL) return; 00353 baseline_y_ = cblob_ptr->EstimateBaselinePosition(); 00354 } 00355 00356 // Helper to call CleanNeighbours on all blobs on the list. 00357 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) { 00358 BLOBNBOX_IT blob_it(blobs); 00359 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00360 blob_it.data()->CleanNeighbours(); 00361 } 00362 } 00363 00364 // Helper to delete all the deletable blobs on the list. 00365 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) { 00366 BLOBNBOX_IT blob_it(blobs); 00367 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00368 BLOBNBOX* blob = blob_it.data(); 00369 if (blob->DeletableNoise()) { 00370 delete blob->cblob(); 00371 delete blob_it.extract(); 00372 } 00373 } 00374 } 00375 00376 // Helper to compute edge offsets for all the blobs on the list. 00377 // See coutln.h for an explanation of edge offsets. 00378 void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey, 00379 BLOBNBOX_LIST* blobs) { 00380 int grey_height = 0; 00381 int thr_height = 0; 00382 int scale_factor = 1; 00383 if (thresholds != NULL && grey != NULL) { 00384 grey_height = pixGetHeight(grey); 00385 thr_height = pixGetHeight(thresholds); 00386 scale_factor = 00387 IntCastRounded(static_cast<double>(grey_height) / thr_height); 00388 } 00389 BLOBNBOX_IT blob_it(blobs); 00390 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00391 BLOBNBOX* blob = blob_it.data(); 00392 if (blob->cblob() != NULL) { 00393 // Get the threshold that applies to this blob. 00394 l_uint32 threshold = 128; 00395 if (thresholds != NULL && grey != NULL) { 00396 const TBOX& box = blob->cblob()->bounding_box(); 00397 // Transform the coordinates if required. 00398 TPOINT pt((box.left() + box.right()) / 2, 00399 (box.top() + box.bottom()) / 2); 00400 pixGetPixel(thresholds, pt.x / scale_factor, 00401 thr_height - 1 - pt.y / scale_factor, &threshold); 00402 } 00403 blob->cblob()->ComputeEdgeOffsets(threshold, grey); 00404 } 00405 } 00406 } 00407 00408 00409 #ifndef GRAPHICS_DISABLED 00410 // Helper to draw all the blobs on the list in the given body_colour, 00411 // with child outlines in the child_colour. 00412 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list, 00413 ScrollView::Color body_colour, 00414 ScrollView::Color child_colour, 00415 ScrollView* win) { 00416 BLOBNBOX_IT it(list); 00417 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00418 it.data()->plot(win, body_colour, child_colour); 00419 } 00420 } 00421 00422 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the 00423 // given list in the given body_colour, with child outlines in the 00424 // child_colour. 00425 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list, 00426 ScrollView::Color body_colour, 00427 ScrollView::Color child_colour, 00428 ScrollView* win) { 00429 BLOBNBOX_IT it(list); 00430 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00431 BLOBNBOX* blob = it.data(); 00432 if (blob->DeletableNoise()) 00433 blob->plot(win, body_colour, child_colour); 00434 } 00435 } 00436 00437 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type, 00438 BlobTextFlowType flow_type) { 00439 switch (region_type) { 00440 case BRT_HLINE: 00441 return ScrollView::BROWN; 00442 case BRT_VLINE: 00443 return ScrollView::DARK_GREEN; 00444 case BRT_RECTIMAGE: 00445 return ScrollView::RED; 00446 case BRT_POLYIMAGE: 00447 return ScrollView::ORANGE; 00448 case BRT_UNKNOWN: 00449 return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE; 00450 case BRT_VERT_TEXT: 00451 if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE) 00452 return ScrollView::GREEN; 00453 if (flow_type == BTFT_CHAIN) 00454 return ScrollView::LIME_GREEN; 00455 return ScrollView::YELLOW; 00456 case BRT_TEXT: 00457 if (flow_type == BTFT_STRONG_CHAIN) 00458 return ScrollView::BLUE; 00459 if (flow_type == BTFT_TEXT_ON_IMAGE) 00460 return ScrollView::LIGHT_BLUE; 00461 if (flow_type == BTFT_CHAIN) 00462 return ScrollView::MEDIUM_BLUE; 00463 if (flow_type == BTFT_LEADER) 00464 return ScrollView::WHEAT; 00465 if (flow_type == BTFT_NONTEXT) 00466 return ScrollView::PINK; 00467 return ScrollView::MAGENTA; 00468 default: 00469 return ScrollView::GREY; 00470 } 00471 } 00472 00473 // Keep in sync with BlobRegionType. 00474 ScrollView::Color BLOBNBOX::BoxColor() const { 00475 return TextlineColor(region_type_, flow_); 00476 } 00477 00478 void BLOBNBOX::plot(ScrollView* window, // window to draw in 00479 ScrollView::Color blob_colour, // for outer bits 00480 ScrollView::Color child_colour) { // for holes 00481 if (cblob_ptr != NULL) 00482 cblob_ptr->plot(window, blob_colour, child_colour); 00483 } 00484 #endif 00485 /********************************************************************** 00486 * find_cblob_limits 00487 * 00488 * Scan the outlines of the cblob to locate the y min and max 00489 * between the given x limits. 00490 **********************************************************************/ 00491 00492 void find_cblob_limits( //get y limits 00493 C_BLOB *blob, //blob to search 00494 float leftx, //x limits 00495 float rightx, 00496 FCOORD rotation, //for landscape 00497 float &ymin, //output y limits 00498 float &ymax) { 00499 inT16 stepindex; //current point 00500 ICOORD pos; //current coords 00501 ICOORD vec; //rotated step 00502 C_OUTLINE *outline; //current outline 00503 //outlines 00504 C_OUTLINE_IT out_it = blob->out_list (); 00505 00506 ymin = (float) MAX_INT32; 00507 ymax = (float) -MAX_INT32; 00508 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00509 outline = out_it.data (); 00510 pos = outline->start_pos (); //get coords 00511 pos.rotate (rotation); 00512 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00513 //inside 00514 if (pos.x () >= leftx && pos.x () <= rightx) { 00515 UpdateRange(pos.y(), &ymin, &ymax); 00516 } 00517 vec = outline->step (stepindex); 00518 vec.rotate (rotation); 00519 pos += vec; //move to next 00520 } 00521 } 00522 } 00523 00524 00525 /********************************************************************** 00526 * find_cblob_vlimits 00527 * 00528 * Scan the outlines of the cblob to locate the y min and max 00529 * between the given x limits. 00530 **********************************************************************/ 00531 00532 void find_cblob_vlimits( //get y limits 00533 C_BLOB *blob, //blob to search 00534 float leftx, //x limits 00535 float rightx, 00536 float &ymin, //output y limits 00537 float &ymax) { 00538 inT16 stepindex; //current point 00539 ICOORD pos; //current coords 00540 ICOORD vec; //rotated step 00541 C_OUTLINE *outline; //current outline 00542 //outlines 00543 C_OUTLINE_IT out_it = blob->out_list (); 00544 00545 ymin = (float) MAX_INT32; 00546 ymax = (float) -MAX_INT32; 00547 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00548 outline = out_it.data (); 00549 pos = outline->start_pos (); //get coords 00550 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00551 //inside 00552 if (pos.x () >= leftx && pos.x () <= rightx) { 00553 UpdateRange(pos.y(), &ymin, &ymax); 00554 } 00555 vec = outline->step (stepindex); 00556 pos += vec; //move to next 00557 } 00558 } 00559 } 00560 00561 00562 /********************************************************************** 00563 * find_cblob_hlimits 00564 * 00565 * Scan the outlines of the cblob to locate the x min and max 00566 * between the given y limits. 00567 **********************************************************************/ 00568 00569 void find_cblob_hlimits( //get x limits 00570 C_BLOB *blob, //blob to search 00571 float bottomy, //y limits 00572 float topy, 00573 float &xmin, //output x limits 00574 float &xmax) { 00575 inT16 stepindex; //current point 00576 ICOORD pos; //current coords 00577 ICOORD vec; //rotated step 00578 C_OUTLINE *outline; //current outline 00579 //outlines 00580 C_OUTLINE_IT out_it = blob->out_list (); 00581 00582 xmin = (float) MAX_INT32; 00583 xmax = (float) -MAX_INT32; 00584 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00585 outline = out_it.data (); 00586 pos = outline->start_pos (); //get coords 00587 for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) { 00588 //inside 00589 if (pos.y () >= bottomy && pos.y () <= topy) { 00590 UpdateRange(pos.x(), &xmin, &xmax); 00591 } 00592 vec = outline->step (stepindex); 00593 pos += vec; //move to next 00594 } 00595 } 00596 } 00597 00598 /********************************************************************** 00599 * crotate_cblob 00600 * 00601 * Rotate the copy by the given vector and return a C_BLOB. 00602 **********************************************************************/ 00603 00604 C_BLOB *crotate_cblob( //rotate it 00605 C_BLOB *blob, //blob to search 00606 FCOORD rotation //for landscape 00607 ) { 00608 C_OUTLINE_LIST out_list; //output outlines 00609 //input outlines 00610 C_OUTLINE_IT in_it = blob->out_list (); 00611 //output outlines 00612 C_OUTLINE_IT out_it = &out_list; 00613 00614 for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) { 00615 out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation)); 00616 } 00617 return new C_BLOB (&out_list); 00618 } 00619 00620 00621 /********************************************************************** 00622 * box_next 00623 * 00624 * Compute the bounding box of this blob with merging of x overlaps 00625 * but no pre-chopping. 00626 * Then move the iterator on to the start of the next blob. 00627 **********************************************************************/ 00628 00629 TBOX box_next( //get bounding box 00630 BLOBNBOX_IT *it //iterator to blobds 00631 ) { 00632 BLOBNBOX *blob; //current blob 00633 TBOX result; //total box 00634 00635 blob = it->data (); 00636 result = blob->bounding_box (); 00637 do { 00638 it->forward (); 00639 blob = it->data (); 00640 if (blob->cblob() == NULL) 00641 //was pre-chopped 00642 result += blob->bounding_box (); 00643 } 00644 //until next real blob 00645 while ((blob->cblob() == NULL) || blob->joined_to_prev()); 00646 return result; 00647 } 00648 00649 00650 /********************************************************************** 00651 * box_next_pre_chopped 00652 * 00653 * Compute the bounding box of this blob with merging of x overlaps 00654 * but WITH pre-chopping. 00655 * Then move the iterator on to the start of the next pre-chopped blob. 00656 **********************************************************************/ 00657 00658 TBOX box_next_pre_chopped( //get bounding box 00659 BLOBNBOX_IT *it //iterator to blobds 00660 ) { 00661 BLOBNBOX *blob; //current blob 00662 TBOX result; //total box 00663 00664 blob = it->data (); 00665 result = blob->bounding_box (); 00666 do { 00667 it->forward (); 00668 blob = it->data (); 00669 } 00670 //until next real blob 00671 while (blob->joined_to_prev ()); 00672 return result; 00673 } 00674 00675 00676 /********************************************************************** 00677 * TO_ROW::TO_ROW 00678 * 00679 * Constructor to make a row from a blob. 00680 **********************************************************************/ 00681 00682 TO_ROW::TO_ROW ( //constructor 00683 BLOBNBOX * blob, //first blob 00684 float top, //corrected top 00685 float bottom, //of row 00686 float row_size //ideal 00687 ) { 00688 clear(); 00689 y_min = bottom; 00690 y_max = top; 00691 initial_y_min = bottom; 00692 00693 float diff; //in size 00694 BLOBNBOX_IT it = &blobs; //list of blobs 00695 00696 it.add_to_end (blob); 00697 diff = top - bottom - row_size; 00698 if (diff > 0) { 00699 y_max -= diff / 2; 00700 y_min += diff / 2; 00701 } 00702 //very small object 00703 else if ((top - bottom) * 3 < row_size) { 00704 diff = row_size / 3 + bottom - top; 00705 y_max += diff / 2; 00706 y_min -= diff / 2; 00707 } 00708 } 00709 00710 void TO_ROW::print() const { 00711 tprintf("pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g," 00712 " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g," 00713 " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n", 00714 pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp, 00715 spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop, 00716 body_size, min_space, max_nonspace, space_threshold, kern_size, 00717 space_size); 00718 } 00719 00720 /********************************************************************** 00721 * TO_ROW:add_blob 00722 * 00723 * Add the blob to the end of the row. 00724 **********************************************************************/ 00725 00726 void TO_ROW::add_blob( //constructor 00727 BLOBNBOX *blob, //first blob 00728 float top, //corrected top 00729 float bottom, //of row 00730 float row_size //ideal 00731 ) { 00732 float allowed; //allowed expansion 00733 float available; //expansion 00734 BLOBNBOX_IT it = &blobs; //list of blobs 00735 00736 it.add_to_end (blob); 00737 allowed = row_size + y_min - y_max; 00738 if (allowed > 0) { 00739 available = top > y_max ? top - y_max : 0; 00740 if (bottom < y_min) 00741 //total available 00742 available += y_min - bottom; 00743 if (available > 0) { 00744 available += available; //do it gradually 00745 if (available < allowed) 00746 available = allowed; 00747 if (bottom < y_min) 00748 y_min -= (y_min - bottom) * allowed / available; 00749 if (top > y_max) 00750 y_max += (top - y_max) * allowed / available; 00751 } 00752 } 00753 } 00754 00755 00756 /********************************************************************** 00757 * TO_ROW:insert_blob 00758 * 00759 * Add the blob to the row in the correct position. 00760 **********************************************************************/ 00761 00762 void TO_ROW::insert_blob( //constructor 00763 BLOBNBOX *blob //first blob 00764 ) { 00765 BLOBNBOX_IT it = &blobs; //list of blobs 00766 00767 if (it.empty ()) 00768 it.add_before_then_move (blob); 00769 else { 00770 it.mark_cycle_pt (); 00771 while (!it.cycled_list () 00772 && it.data ()->bounding_box ().left () <= 00773 blob->bounding_box ().left ()) 00774 it.forward (); 00775 if (it.cycled_list ()) 00776 it.add_to_end (blob); 00777 else 00778 it.add_before_stay_put (blob); 00779 } 00780 } 00781 00782 00783 /********************************************************************** 00784 * TO_ROW::compute_vertical_projection 00785 * 00786 * Compute the vertical projection of a TO_ROW from its blobs. 00787 **********************************************************************/ 00788 00789 void TO_ROW::compute_vertical_projection() { //project whole row 00790 TBOX row_box; //bound of row 00791 BLOBNBOX *blob; //current blob 00792 TBOX blob_box; //bounding box 00793 BLOBNBOX_IT blob_it = blob_list (); 00794 00795 if (blob_it.empty ()) 00796 return; 00797 row_box = blob_it.data ()->bounding_box (); 00798 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) 00799 row_box += blob_it.data ()->bounding_box (); 00800 00801 projection.set_range (row_box.left () - PROJECTION_MARGIN, 00802 row_box.right () + PROJECTION_MARGIN); 00803 projection_left = row_box.left () - PROJECTION_MARGIN; 00804 projection_right = row_box.right () + PROJECTION_MARGIN; 00805 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { 00806 blob = blob_it.data(); 00807 if (blob->cblob() != NULL) 00808 vertical_cblob_projection(blob->cblob(), &projection); 00809 } 00810 } 00811 00812 00813 /********************************************************************** 00814 * TO_ROW::clear 00815 * 00816 * Zero out all scalar members. 00817 **********************************************************************/ 00818 void TO_ROW::clear() { 00819 all_caps = 0; 00820 used_dm_model = 0; 00821 projection_left = 0; 00822 projection_right = 0; 00823 pitch_decision = PITCH_DUNNO; 00824 fixed_pitch = 0.0; 00825 fp_space = 0.0; 00826 fp_nonsp = 0.0; 00827 pr_space = 0.0; 00828 pr_nonsp = 0.0; 00829 spacing = 0.0; 00830 xheight = 0.0; 00831 xheight_evidence = 0; 00832 body_size = 0.0; 00833 ascrise = 0.0; 00834 descdrop = 0.0; 00835 min_space = 0; 00836 max_nonspace = 0; 00837 space_threshold = 0; 00838 kern_size = 0.0; 00839 space_size = 0.0; 00840 y_min = 0.0; 00841 y_max = 0.0; 00842 initial_y_min = 0.0; 00843 m = 0.0; 00844 c = 0.0; 00845 error = 0.0; 00846 para_c = 0.0; 00847 para_error = 0.0; 00848 y_origin = 0.0; 00849 credibility = 0.0; 00850 num_repeated_sets_ = -1; 00851 } 00852 00853 00854 /********************************************************************** 00855 * vertical_cblob_projection 00856 * 00857 * Compute the vertical projection of a cblob from its outlines 00858 * and add to the given STATS. 00859 **********************************************************************/ 00860 00861 void vertical_cblob_projection( //project outlines 00862 C_BLOB *blob, //blob to project 00863 STATS *stats //output 00864 ) { 00865 //outlines of blob 00866 C_OUTLINE_IT out_it = blob->out_list (); 00867 00868 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00869 vertical_coutline_projection (out_it.data (), stats); 00870 } 00871 } 00872 00873 00874 /********************************************************************** 00875 * vertical_coutline_projection 00876 * 00877 * Compute the vertical projection of a outline from its outlines 00878 * and add to the given STATS. 00879 **********************************************************************/ 00880 00881 void vertical_coutline_projection( //project outlines 00882 C_OUTLINE *outline, //outline to project 00883 STATS *stats //output 00884 ) { 00885 ICOORD pos; //current point 00886 ICOORD step; //edge step 00887 inT32 length; //of outline 00888 inT16 stepindex; //current step 00889 C_OUTLINE_IT out_it = outline->child (); 00890 00891 pos = outline->start_pos (); 00892 length = outline->pathlength (); 00893 for (stepindex = 0; stepindex < length; stepindex++) { 00894 step = outline->step (stepindex); 00895 if (step.x () > 0) { 00896 stats->add (pos.x (), -pos.y ()); 00897 } else if (step.x () < 0) { 00898 stats->add (pos.x () - 1, pos.y ()); 00899 } 00900 pos += step; 00901 } 00902 00903 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00904 vertical_coutline_projection (out_it.data (), stats); 00905 } 00906 } 00907 00908 00909 /********************************************************************** 00910 * TO_BLOCK::TO_BLOCK 00911 * 00912 * Constructor to make a TO_BLOCK from a real block. 00913 **********************************************************************/ 00914 00915 TO_BLOCK::TO_BLOCK( //make a block 00916 BLOCK *src_block //real block 00917 ) { 00918 clear(); 00919 block = src_block; 00920 } 00921 00922 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) { 00923 BLOBNBOX_IT it = boxes; 00924 // A BLOBNBOX generally doesn't own its blobs, so if they do, you 00925 // have to delete them explicitly. 00926 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 00927 BLOBNBOX* box = it.data(); 00928 if (box->cblob() != NULL) 00929 delete box->cblob(); 00930 } 00931 } 00932 00933 /********************************************************************** 00934 * TO_BLOCK::clear 00935 * 00936 * Zero out all scalar members. 00937 **********************************************************************/ 00938 void TO_BLOCK::clear() { 00939 block = NULL; 00940 pitch_decision = PITCH_DUNNO; 00941 line_spacing = 0.0; 00942 line_size = 0.0; 00943 max_blob_size = 0.0; 00944 baseline_offset = 0.0; 00945 xheight = 0.0; 00946 fixed_pitch = 0.0; 00947 kern_size = 0.0; 00948 space_size = 0.0; 00949 min_space = 0; 00950 max_nonspace = 0; 00951 fp_space = 0.0; 00952 fp_nonsp = 0.0; 00953 pr_space = 0.0; 00954 pr_nonsp = 0.0; 00955 key_row = NULL; 00956 } 00957 00958 00959 TO_BLOCK::~TO_BLOCK() { 00960 // Any residual BLOBNBOXes at this stage own their blobs, so delete them. 00961 clear_blobnboxes(&blobs); 00962 clear_blobnboxes(&underlines); 00963 clear_blobnboxes(&noise_blobs); 00964 clear_blobnboxes(&small_blobs); 00965 clear_blobnboxes(&large_blobs); 00966 } 00967 00968 // Helper function to divide the input blobs over noise, small, medium 00969 // and large lists. Blobs small in height and (small in width or large in width) 00970 // go in the noise list. Dash (-) candidates go in the small list, and 00971 // medium and large are by height. 00972 // SIDE-EFFECT: reset all blobs to initial state by calling Init(). 00973 static void SizeFilterBlobs(int min_height, int max_height, 00974 BLOBNBOX_LIST* src_list, 00975 BLOBNBOX_LIST* noise_list, 00976 BLOBNBOX_LIST* small_list, 00977 BLOBNBOX_LIST* medium_list, 00978 BLOBNBOX_LIST* large_list) { 00979 BLOBNBOX_IT noise_it(noise_list); 00980 BLOBNBOX_IT small_it(small_list); 00981 BLOBNBOX_IT medium_it(medium_list); 00982 BLOBNBOX_IT large_it(large_list); 00983 for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) { 00984 BLOBNBOX* blob = src_it.extract(); 00985 blob->ReInit(); 00986 int width = blob->bounding_box().width(); 00987 int height = blob->bounding_box().height(); 00988 if (height < min_height && 00989 (width < min_height || width > max_height)) 00990 noise_it.add_after_then_move(blob); 00991 else if (height > max_height) 00992 large_it.add_after_then_move(blob); 00993 else if (height < min_height) 00994 small_it.add_after_then_move(blob); 00995 else 00996 medium_it.add_after_then_move(blob); 00997 } 00998 } 00999 01000 // Reorganize the blob lists with a different definition of small, medium 01001 // and large, compared to the original definition. 01002 // Height is still the primary filter key, but medium width blobs of small 01003 // height become small, and very wide blobs of small height stay noise, along 01004 // with small dot-shaped blobs. 01005 void TO_BLOCK::ReSetAndReFilterBlobs() { 01006 int min_height = IntCastRounded(kMinMediumSizeRatio * line_size); 01007 int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size); 01008 BLOBNBOX_LIST noise_list; 01009 BLOBNBOX_LIST small_list; 01010 BLOBNBOX_LIST medium_list; 01011 BLOBNBOX_LIST large_list; 01012 SizeFilterBlobs(min_height, max_height, &blobs, 01013 &noise_list, &small_list, &medium_list, &large_list); 01014 SizeFilterBlobs(min_height, max_height, &large_blobs, 01015 &noise_list, &small_list, &medium_list, &large_list); 01016 SizeFilterBlobs(min_height, max_height, &small_blobs, 01017 &noise_list, &small_list, &medium_list, &large_list); 01018 SizeFilterBlobs(min_height, max_height, &noise_blobs, 01019 &noise_list, &small_list, &medium_list, &large_list); 01020 BLOBNBOX_IT blob_it(&blobs); 01021 blob_it.add_list_after(&medium_list); 01022 blob_it.set_to_list(&large_blobs); 01023 blob_it.add_list_after(&large_list); 01024 blob_it.set_to_list(&small_blobs); 01025 blob_it.add_list_after(&small_list); 01026 blob_it.set_to_list(&noise_blobs); 01027 blob_it.add_list_after(&noise_list); 01028 } 01029 01030 // Deletes noise blobs from all lists where not owned by a ColPartition. 01031 void TO_BLOCK::DeleteUnownedNoise() { 01032 BLOBNBOX::CleanNeighbours(&blobs); 01033 BLOBNBOX::CleanNeighbours(&small_blobs); 01034 BLOBNBOX::CleanNeighbours(&noise_blobs); 01035 BLOBNBOX::CleanNeighbours(&large_blobs); 01036 BLOBNBOX::DeleteNoiseBlobs(&blobs); 01037 BLOBNBOX::DeleteNoiseBlobs(&small_blobs); 01038 BLOBNBOX::DeleteNoiseBlobs(&noise_blobs); 01039 BLOBNBOX::DeleteNoiseBlobs(&large_blobs); 01040 } 01041 01042 // Computes and stores the edge offsets on each blob for use in feature 01043 // extraction, using greyscale if the supplied grey and thresholds pixes 01044 // are 8-bit or otherwise (if NULL or not 8 bit) the original binary 01045 // edge step outlines. 01046 // Thresholds must either be the same size as grey or an integer down-scale 01047 // of grey. 01048 // See coutln.h for an explanation of edge offsets. 01049 void TO_BLOCK::ComputeEdgeOffsets(Pix* thresholds, Pix* grey) { 01050 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs); 01051 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs); 01052 BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs); 01053 } 01054 01055 #ifndef GRAPHICS_DISABLED 01056 // Draw the noise blobs from all lists in red. 01057 void TO_BLOCK::plot_noise_blobs(ScrollView* win) { 01058 BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win); 01059 BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win); 01060 BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win); 01061 BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win); 01062 } 01063 01064 // Draw the blobs on the various lists in the block in different colors. 01065 void TO_BLOCK::plot_graded_blobs(ScrollView* win) { 01066 BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win); 01067 BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW, 01068 win); 01069 BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW, 01070 win); 01071 BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win); 01072 } 01073 01074 /********************************************************************** 01075 * plot_blob_list 01076 * 01077 * Draw a list of blobs. 01078 **********************************************************************/ 01079 01080 void plot_blob_list(ScrollView* win, // window to draw in 01081 BLOBNBOX_LIST *list, // blob list 01082 ScrollView::Color body_colour, // colour to draw 01083 ScrollView::Color child_colour) { // colour of child 01084 BLOBNBOX_IT it = list; 01085 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { 01086 it.data()->plot(win, body_colour, child_colour); 01087 } 01088 } 01089 #endif // GRAPHICS_DISABLED