tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/blobbox.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        blobbox.cpp  (Formerly blobnbox.c)
00003  * Description: Code for the textord blob class.
00004  * Author:                                      Ray Smith
00005  * Created:                                     Thu Jul 30 09:08:51 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 // Include automatically generated configuration file if running autoconf.
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 #include "blobbox.h"
00026 #include "allheaders.h"
00027 #include "blobs.h"
00028 #include "helpers.h"
00029 #include "normalis.h"
00030 
00031 #define PROJECTION_MARGIN 10     //arbitrary
00032 #define EXTERN
00033 
00034 ELISTIZE (BLOBNBOX) ELIST2IZE (TO_ROW) ELISTIZE (TO_BLOCK)
00035 
00036 // Upto 30 degrees is allowed for rotations of diacritic blobs.
00037 const double kCosSmallAngle = 0.866;
00038 // Min aspect ratio for a joined word to indicate an obvious flow direction.
00039 const double kDefiniteAspectRatio = 2.0;
00040 // Multiple of short length in perimeter to make a joined word.
00041 const double kComplexShapePerimeterRatio = 1.5;
00042 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
00043 const double kMinMediumSizeRatio = 0.25;
00044 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
00045 const double kMaxMediumSizeRatio = 4.0;
00046 
00047 // Rotates the box and the underlying blob.
00048 void BLOBNBOX::rotate(FCOORD rotation) {
00049   cblob_ptr->rotate(rotation);
00050   rotate_box(rotation);
00051   compute_bounding_box();
00052 }
00053 
00054 // Reflect the box in the y-axis, leaving the underlying blob untouched.
00055 void BLOBNBOX::reflect_box_in_y_axis() {
00056   int left = -box.right();
00057   box.set_right(-box.left());
00058   box.set_left(left);
00059 }
00060 
00061 // Rotates the box by the angle given by rotation.
00062 // If the blob is a diacritic, then only small rotations for skew
00063 // correction can be applied.
00064 void BLOBNBOX::rotate_box(FCOORD rotation) {
00065   if (IsDiacritic()) {
00066     ASSERT_HOST(rotation.x() >= kCosSmallAngle)
00067     ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
00068     ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
00069     top_pt.rotate(rotation);
00070     base_char_top_ = top_pt.y();
00071     bottom_pt.rotate(rotation);
00072     base_char_bottom_ = bottom_pt.y();
00073     box.rotate(rotation);
00074   } else {
00075     box.rotate(rotation);
00076     set_diacritic_box(box);
00077   }
00078 }
00079 
00080 /**********************************************************************
00081  * BLOBNBOX::merge
00082  *
00083  * Merge this blob with the given blob, which should be after this.
00084  **********************************************************************/
00085 void BLOBNBOX::merge(                    //merge blobs
00086                      BLOBNBOX *nextblob  //blob to join with
00087                     ) {
00088   box += nextblob->box;          //merge boxes
00089   set_diacritic_box(box);
00090   nextblob->joined = TRUE;
00091 }
00092 
00093 
00094 // Merge this with other, taking the outlines from other.
00095 // Other is not deleted, but left for the caller to handle.
00096 void BLOBNBOX::really_merge(BLOBNBOX* other) {
00097   if (cblob_ptr != NULL && other->cblob_ptr != NULL) {
00098     C_OUTLINE_IT ol_it(cblob_ptr->out_list());
00099     ol_it.add_list_after(other->cblob_ptr->out_list());
00100   }
00101   compute_bounding_box();
00102 }
00103 
00104 
00105 /**********************************************************************
00106  * BLOBNBOX::chop
00107  *
00108  * Chop this blob into equal sized pieces using the x height as a guide.
00109  * The blob is not actually chopped. Instead, fake blobs are inserted
00110  * with the relevant bounding boxes.
00111  **********************************************************************/
00112 
00113 void BLOBNBOX::chop(                        //chop blobs
00114                     BLOBNBOX_IT *start_it,  //location of this
00115                     BLOBNBOX_IT *end_it,    //iterator
00116                     FCOORD rotation,        //for landscape
00117                     float xheight           //of line
00118                    ) {
00119   inT16 blobcount;               //no of blobs
00120   BLOBNBOX *newblob;             //fake blob
00121   BLOBNBOX *blob;                //current blob
00122   inT16 blobindex;               //number of chop
00123   inT16 leftx;                   //left edge of blob
00124   float blobwidth;               //width of each
00125   float rightx;                  //right edge to scan
00126   float ymin, ymax;              //limits of new blob
00127   float test_ymin, test_ymax;    //limits of part blob
00128   ICOORD bl, tr;                 //corners of box
00129   BLOBNBOX_IT blob_it;           //blob iterator
00130 
00131                                  //get no of chops
00132   blobcount = (inT16) floor (box.width () / xheight);
00133   if (blobcount > 1 && cblob_ptr != NULL) {
00134                                  //width of each
00135     blobwidth = (float) (box.width () + 1) / blobcount;
00136     for (blobindex = blobcount - 1, rightx = box.right ();
00137     blobindex >= 0; blobindex--, rightx -= blobwidth) {
00138       ymin = (float) MAX_INT32;
00139       ymax = (float) -MAX_INT32;
00140       blob_it = *start_it;
00141       do {
00142         blob = blob_it.data ();
00143         find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth,
00144                            rightx,
00145             /*rotation, */ test_ymin, test_ymax);
00146         blob_it.forward ();
00147         UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
00148       }
00149       while (blob != end_it->data ());
00150       if (ymin < ymax) {
00151         leftx = (inT16) floor (rightx - blobwidth);
00152         if (leftx < box.left ())
00153           leftx = box.left ();   //clip to real box
00154         bl = ICOORD (leftx, (inT16) floor (ymin));
00155         tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
00156         if (blobindex == 0)
00157           box = TBOX (bl, tr);    //change box
00158         else {
00159           newblob = new BLOBNBOX;
00160                                  //box is all it has
00161           newblob->box = TBOX (bl, tr);
00162                                  //stay on current
00163           newblob->base_char_top_ = tr.y();
00164           newblob->base_char_bottom_ = bl.y();
00165           end_it->add_after_stay_put (newblob);
00166         }
00167       }
00168     }
00169   }
00170 }
00171 
00172 // Returns the box gaps between this and its neighbours_ in an array
00173 // indexed by BlobNeighbourDir.
00174 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
00175   for (int dir = 0; dir < BND_COUNT; ++dir) {
00176     gaps[dir] = MAX_INT16;
00177     BLOBNBOX* neighbour = neighbours_[dir];
00178     if (neighbour != NULL) {
00179       TBOX n_box = neighbour->bounding_box();
00180       if (dir == BND_LEFT || dir == BND_RIGHT) {
00181         gaps[dir] = box.x_gap(n_box);
00182       } else {
00183         gaps[dir] = box.y_gap(n_box);
00184       }
00185     }
00186   }
00187 }
00188 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
00189 // modified so that if the max exceeds the max dimension of the blob, and
00190 // the min is less, the max is replaced with the min.
00191 // The objective is to catch cases where there is only a single neighbour
00192 // and avoid reporting the other gap as a ridiculously large number
00193 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
00194                                  int* v_min, int* v_max) const {
00195   int max_dimension = MAX(box.width(), box.height());
00196   int gaps[BND_COUNT];
00197   NeighbourGaps(gaps);
00198   *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]);
00199   *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]);
00200   if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min;
00201   *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]);
00202   *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]);
00203   if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
00204 }
00205 
00206 // NULLs out any neighbours that are DeletableNoise to remove references.
00207 void BLOBNBOX::CleanNeighbours() {
00208   for (int dir = 0; dir < BND_COUNT; ++dir) {
00209     BLOBNBOX* neighbour = neighbours_[dir];
00210     if (neighbour != NULL && neighbour->DeletableNoise()) {
00211       neighbours_[dir] = NULL;
00212       good_stroke_neighbours_[dir] = false;
00213     }
00214   }
00215 }
00216 
00217 // Returns positive if there is at least one side neighbour that has a similar
00218 // stroke width and is not on the other side of a rule line.
00219 int BLOBNBOX::GoodTextBlob() const {
00220   int score = 0;
00221   for (int dir = 0; dir < BND_COUNT; ++dir) {
00222     BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
00223     if (good_stroke_neighbour(bnd))
00224       ++score;
00225   }
00226   return score;
00227 }
00228 
00229 // Returns the number of side neighbours that are of type BRT_NOISE.
00230 int BLOBNBOX::NoisyNeighbours() const {
00231   int count = 0;
00232   for (int dir = 0; dir < BND_COUNT; ++dir) {
00233     BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
00234     BLOBNBOX* blob = neighbour(bnd);
00235     if (blob != NULL && blob->region_type() == BRT_NOISE)
00236       ++count;
00237   }
00238   return count;
00239 }
00240 
00241 // Returns true, and sets vert_possible/horz_possible if the blob has some
00242 // feature that makes it individually appear to flow one way.
00243 // eg if it has a high aspect ratio, yet has a complex shape, such as a
00244 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
00245 bool BLOBNBOX::DefiniteIndividualFlow() {
00246   if (cblob() == NULL) return false;
00247   int box_perimeter = 2 * (box.height() + box.width());
00248   if (box.width() > box.height() * kDefiniteAspectRatio) {
00249     // Attempt to distinguish a wide joined word from a dash.
00250     // If it is a dash, then its perimeter is approximately
00251     // 2 * (box width + stroke width), but more if the outline is noisy,
00252     // so perimeter - 2*(box width + stroke width) should be close to zero.
00253     // A complex shape such as a joined word should have a much larger value.
00254     int perimeter = cblob()->perimeter();
00255     if (vert_stroke_width() > 0 || perimeter <= 0)
00256       perimeter -= 2 * vert_stroke_width();
00257     else
00258       perimeter -= 4 * cblob()->area() / perimeter;
00259     perimeter -= 2 * box.width();
00260     // Use a multiple of the box perimeter as a threshold.
00261     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
00262       set_vert_possible(false);
00263       set_horz_possible(true);
00264       return true;
00265     }
00266   }
00267   if (box.height() > box.width() * kDefiniteAspectRatio) {
00268     // As above, but for a putative vertical word vs a I/1/l.
00269     int perimeter = cblob()->perimeter();
00270     if (horz_stroke_width() > 0 || perimeter <= 0)
00271       perimeter -= 2 * horz_stroke_width();
00272     else
00273       perimeter -= 4 * cblob()->area() / perimeter;
00274     perimeter -= 2 * box.height();
00275     if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
00276       set_vert_possible(true);
00277       set_horz_possible(false);
00278       return true;
00279     }
00280   }
00281   return false;
00282 }
00283 
00284 // Returns true if there is no tabstop violation in merging this and other.
00285 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const {
00286   if (box.left() < other.box.left() && box.left() < other.left_rule_)
00287     return false;
00288   if (other.box.left() < box.left() && other.box.left() < left_rule_)
00289     return false;
00290   if (box.right() > other.box.right() && box.right() > other.right_rule_)
00291     return false;
00292   if (other.box.right() > box.right() && other.box.right() > right_rule_)
00293     return false;
00294   return true;
00295 }
00296 
00297 // Returns true if other has a similar stroke width to this.
00298 bool BLOBNBOX::MatchingStrokeWidth(const BLOBNBOX& other,
00299                                    double fractional_tolerance,
00300                                    double constant_tolerance) const {
00301   // The perimeter-based width is used as a backup in case there is
00302   // no information in the blob.
00303   double p_width = area_stroke_width();
00304   double n_p_width = other.area_stroke_width();
00305   float h_tolerance = horz_stroke_width_ * fractional_tolerance
00306                      + constant_tolerance;
00307   float v_tolerance = vert_stroke_width_ * fractional_tolerance
00308                      + constant_tolerance;
00309   double p_tolerance = p_width * fractional_tolerance
00310                      + constant_tolerance;
00311   bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
00312   bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
00313   bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
00314                                      other.horz_stroke_width_, h_tolerance);
00315   bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
00316                                      other.vert_stroke_width_, v_tolerance);
00317   bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
00318   // For a match, at least one of the horizontal and vertical widths
00319   // must match, and the other one must either match or be zero.
00320   // Only if both are zero will we look at the perimeter metric.
00321   return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
00322 }
00323 
00324 // Returns a bounding box of the outline contained within the
00325 // given horizontal range.
00326 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
00327   FCOORD no_rotation(1.0f, 0.0f);
00328   float top = box.top();
00329   float bottom = box.bottom();
00330   if (cblob_ptr != NULL) {
00331     find_cblob_limits(cblob_ptr, static_cast<float>(left),
00332                       static_cast<float>(right), no_rotation,
00333                       bottom, top);
00334   }
00335 
00336   if (top < bottom) {
00337     top = box.top();
00338     bottom = box.bottom();
00339   }
00340   FCOORD bot_left(left, bottom);
00341   FCOORD top_right(right, top);
00342   TBOX shrunken_box(bot_left);
00343   TBOX shrunken_box2(top_right);
00344   shrunken_box += shrunken_box2;
00345   return shrunken_box;
00346 }
00347 
00348 // Estimates and stores the baseline position based on the shape of the
00349 // outline.
00350 void BLOBNBOX::EstimateBaselinePosition() {
00351   baseline_y_ = box.bottom();  // The default.
00352   if (cblob_ptr == NULL) return;
00353   baseline_y_ = cblob_ptr->EstimateBaselinePosition();
00354 }
00355 
00356 // Helper to call CleanNeighbours on all blobs on the list.
00357 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
00358   BLOBNBOX_IT blob_it(blobs);
00359   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00360     blob_it.data()->CleanNeighbours();
00361   }
00362 }
00363 
00364 // Helper to delete all the deletable blobs on the list.
00365 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
00366   BLOBNBOX_IT blob_it(blobs);
00367   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00368     BLOBNBOX* blob = blob_it.data();
00369     if (blob->DeletableNoise()) {
00370       delete blob->cblob();
00371       delete blob_it.extract();
00372     }
00373   }
00374 }
00375 
00376 // Helper to compute edge offsets for  all the blobs on the list.
00377 // See coutln.h for an explanation of edge offsets.
00378 void BLOBNBOX::ComputeEdgeOffsets(Pix* thresholds, Pix* grey,
00379                                   BLOBNBOX_LIST* blobs) {
00380   int grey_height = 0;
00381   int thr_height = 0;
00382   int scale_factor = 1;
00383   if (thresholds != NULL && grey != NULL) {
00384     grey_height = pixGetHeight(grey);
00385     thr_height = pixGetHeight(thresholds);
00386     scale_factor =
00387         IntCastRounded(static_cast<double>(grey_height) / thr_height);
00388   }
00389   BLOBNBOX_IT blob_it(blobs);
00390   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00391     BLOBNBOX* blob = blob_it.data();
00392     if (blob->cblob() != NULL) {
00393       // Get the threshold that applies to this blob.
00394       l_uint32 threshold = 128;
00395       if (thresholds != NULL && grey != NULL) {
00396         const TBOX& box = blob->cblob()->bounding_box();
00397         // Transform the coordinates if required.
00398         TPOINT pt((box.left() + box.right()) / 2,
00399                   (box.top() + box.bottom()) / 2);
00400         pixGetPixel(thresholds, pt.x / scale_factor,
00401                     thr_height - 1 - pt.y / scale_factor, &threshold);
00402       }
00403       blob->cblob()->ComputeEdgeOffsets(threshold, grey);
00404     }
00405   }
00406 }
00407 
00408 
00409 #ifndef GRAPHICS_DISABLED
00410 // Helper to draw all the blobs on the list in the given body_colour,
00411 // with child outlines in the child_colour.
00412 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
00413                          ScrollView::Color body_colour,
00414                          ScrollView::Color child_colour,
00415                          ScrollView* win) {
00416   BLOBNBOX_IT it(list);
00417   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00418     it.data()->plot(win, body_colour, child_colour);
00419   }
00420 }
00421 
00422 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
00423 // given list in the given body_colour, with child outlines in the
00424 // child_colour.
00425 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
00426                               ScrollView::Color body_colour,
00427                               ScrollView::Color child_colour,
00428                               ScrollView* win) {
00429   BLOBNBOX_IT it(list);
00430   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00431     BLOBNBOX* blob = it.data();
00432     if (blob->DeletableNoise())
00433       blob->plot(win, body_colour, child_colour);
00434   }
00435 }
00436 
00437 ScrollView::Color BLOBNBOX::TextlineColor(BlobRegionType region_type,
00438                                           BlobTextFlowType flow_type) {
00439   switch (region_type) {
00440     case BRT_HLINE:
00441       return ScrollView::BROWN;
00442     case BRT_VLINE:
00443       return ScrollView::DARK_GREEN;
00444     case BRT_RECTIMAGE:
00445       return ScrollView::RED;
00446     case BRT_POLYIMAGE:
00447       return ScrollView::ORANGE;
00448     case BRT_UNKNOWN:
00449       return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
00450     case BRT_VERT_TEXT:
00451       if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
00452         return ScrollView::GREEN;
00453       if (flow_type == BTFT_CHAIN)
00454         return ScrollView::LIME_GREEN;
00455       return ScrollView::YELLOW;
00456     case BRT_TEXT:
00457       if (flow_type == BTFT_STRONG_CHAIN)
00458         return ScrollView::BLUE;
00459       if (flow_type == BTFT_TEXT_ON_IMAGE)
00460         return ScrollView::LIGHT_BLUE;
00461       if (flow_type == BTFT_CHAIN)
00462         return ScrollView::MEDIUM_BLUE;
00463       if (flow_type == BTFT_LEADER)
00464         return ScrollView::WHEAT;
00465       if (flow_type == BTFT_NONTEXT)
00466         return ScrollView::PINK;
00467       return ScrollView::MAGENTA;
00468     default:
00469       return ScrollView::GREY;
00470   }
00471 }
00472 
00473 // Keep in sync with BlobRegionType.
00474 ScrollView::Color BLOBNBOX::BoxColor() const {
00475   return TextlineColor(region_type_, flow_);
00476 }
00477 
00478 void BLOBNBOX::plot(ScrollView* window,                // window to draw in
00479                     ScrollView::Color blob_colour,     // for outer bits
00480                     ScrollView::Color child_colour) {  // for holes
00481   if (cblob_ptr != NULL)
00482     cblob_ptr->plot(window, blob_colour, child_colour);
00483 }
00484 #endif
00485 /**********************************************************************
00486  * find_cblob_limits
00487  *
00488  * Scan the outlines of the cblob to locate the y min and max
00489  * between the given x limits.
00490  **********************************************************************/
00491 
00492 void find_cblob_limits(                  //get y limits
00493                        C_BLOB *blob,     //blob to search
00494                        float leftx,      //x limits
00495                        float rightx,
00496                        FCOORD rotation,  //for landscape
00497                        float &ymin,      //output y limits
00498                        float &ymax) {
00499   inT16 stepindex;               //current point
00500   ICOORD pos;                    //current coords
00501   ICOORD vec;                    //rotated step
00502   C_OUTLINE *outline;            //current outline
00503                                  //outlines
00504   C_OUTLINE_IT out_it = blob->out_list ();
00505 
00506   ymin = (float) MAX_INT32;
00507   ymax = (float) -MAX_INT32;
00508   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00509     outline = out_it.data ();
00510     pos = outline->start_pos (); //get coords
00511     pos.rotate (rotation);
00512     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00513                                  //inside
00514       if (pos.x () >= leftx && pos.x () <= rightx) {
00515         UpdateRange(pos.y(), &ymin, &ymax);
00516       }
00517       vec = outline->step (stepindex);
00518       vec.rotate (rotation);
00519       pos += vec;                //move to next
00520     }
00521   }
00522 }
00523 
00524 
00525 /**********************************************************************
00526  * find_cblob_vlimits
00527  *
00528  * Scan the outlines of the cblob to locate the y min and max
00529  * between the given x limits.
00530  **********************************************************************/
00531 
00532 void find_cblob_vlimits(               //get y limits
00533                         C_BLOB *blob,  //blob to search
00534                         float leftx,   //x limits
00535                         float rightx,
00536                         float &ymin,   //output y limits
00537                         float &ymax) {
00538   inT16 stepindex;               //current point
00539   ICOORD pos;                    //current coords
00540   ICOORD vec;                    //rotated step
00541   C_OUTLINE *outline;            //current outline
00542                                  //outlines
00543   C_OUTLINE_IT out_it = blob->out_list ();
00544 
00545   ymin = (float) MAX_INT32;
00546   ymax = (float) -MAX_INT32;
00547   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00548     outline = out_it.data ();
00549     pos = outline->start_pos (); //get coords
00550     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00551                                  //inside
00552       if (pos.x () >= leftx && pos.x () <= rightx) {
00553         UpdateRange(pos.y(), &ymin, &ymax);
00554       }
00555       vec = outline->step (stepindex);
00556       pos += vec;                //move to next
00557     }
00558   }
00559 }
00560 
00561 
00562 /**********************************************************************
00563  * find_cblob_hlimits
00564  *
00565  * Scan the outlines of the cblob to locate the x min and max
00566  * between the given y limits.
00567  **********************************************************************/
00568 
00569 void find_cblob_hlimits(                //get x limits
00570                         C_BLOB *blob,   //blob to search
00571                         float bottomy,  //y limits
00572                         float topy,
00573                         float &xmin,    //output x limits
00574                         float &xmax) {
00575   inT16 stepindex;               //current point
00576   ICOORD pos;                    //current coords
00577   ICOORD vec;                    //rotated step
00578   C_OUTLINE *outline;            //current outline
00579                                  //outlines
00580   C_OUTLINE_IT out_it = blob->out_list ();
00581 
00582   xmin = (float) MAX_INT32;
00583   xmax = (float) -MAX_INT32;
00584   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00585     outline = out_it.data ();
00586     pos = outline->start_pos (); //get coords
00587     for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
00588                                  //inside
00589       if (pos.y () >= bottomy && pos.y () <= topy) {
00590         UpdateRange(pos.x(), &xmin, &xmax);
00591       }
00592       vec = outline->step (stepindex);
00593       pos += vec;                //move to next
00594     }
00595   }
00596 }
00597 
00598 /**********************************************************************
00599  * crotate_cblob
00600  *
00601  * Rotate the copy by the given vector and return a C_BLOB.
00602  **********************************************************************/
00603 
00604 C_BLOB *crotate_cblob(                 //rotate it
00605                       C_BLOB *blob,    //blob to search
00606                       FCOORD rotation  //for landscape
00607                      ) {
00608   C_OUTLINE_LIST out_list;       //output outlines
00609                                  //input outlines
00610   C_OUTLINE_IT in_it = blob->out_list ();
00611                                  //output outlines
00612   C_OUTLINE_IT out_it = &out_list;
00613 
00614   for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
00615     out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
00616   }
00617   return new C_BLOB (&out_list);
00618 }
00619 
00620 
00621 /**********************************************************************
00622  * box_next
00623  *
00624  * Compute the bounding box of this blob with merging of x overlaps
00625  * but no pre-chopping.
00626  * Then move the iterator on to the start of the next blob.
00627  **********************************************************************/
00628 
00629 TBOX box_next(                 //get bounding box
00630              BLOBNBOX_IT *it  //iterator to blobds
00631             ) {
00632   BLOBNBOX *blob;                //current blob
00633   TBOX result;                    //total box
00634 
00635   blob = it->data ();
00636   result = blob->bounding_box ();
00637   do {
00638     it->forward ();
00639     blob = it->data ();
00640     if (blob->cblob() == NULL)
00641                                  //was pre-chopped
00642       result += blob->bounding_box ();
00643   }
00644                                  //until next real blob
00645   while ((blob->cblob() == NULL) || blob->joined_to_prev());
00646   return result;
00647 }
00648 
00649 
00650 /**********************************************************************
00651  * box_next_pre_chopped
00652  *
00653  * Compute the bounding box of this blob with merging of x overlaps
00654  * but WITH pre-chopping.
00655  * Then move the iterator on to the start of the next pre-chopped blob.
00656  **********************************************************************/
00657 
00658 TBOX box_next_pre_chopped(                 //get bounding box
00659                          BLOBNBOX_IT *it  //iterator to blobds
00660                         ) {
00661   BLOBNBOX *blob;                //current blob
00662   TBOX result;                    //total box
00663 
00664   blob = it->data ();
00665   result = blob->bounding_box ();
00666   do {
00667     it->forward ();
00668     blob = it->data ();
00669   }
00670                                  //until next real blob
00671   while (blob->joined_to_prev ());
00672   return result;
00673 }
00674 
00675 
00676 /**********************************************************************
00677  * TO_ROW::TO_ROW
00678  *
00679  * Constructor to make a row from a blob.
00680  **********************************************************************/
00681 
00682 TO_ROW::TO_ROW (                 //constructor
00683 BLOBNBOX * blob,                 //first blob
00684 float top,                       //corrected top
00685 float bottom,                    //of row
00686 float row_size                   //ideal
00687 ) {
00688   clear();
00689   y_min = bottom;
00690   y_max = top;
00691   initial_y_min = bottom;
00692 
00693   float diff;                    //in size
00694   BLOBNBOX_IT it = &blobs;       //list of blobs
00695 
00696   it.add_to_end (blob);
00697   diff = top - bottom - row_size;
00698   if (diff > 0) {
00699     y_max -= diff / 2;
00700     y_min += diff / 2;
00701   }
00702                                  //very small object
00703   else if ((top - bottom) * 3 < row_size) {
00704     diff = row_size / 3 + bottom - top;
00705     y_max += diff / 2;
00706     y_min -= diff / 2;
00707   }
00708 }
00709 
00710 void TO_ROW::print() const {
00711   tprintf("pitch=%d, fp=%g, fps=%g, fpns=%g, prs=%g, prns=%g,"
00712           " spacing=%g xh=%g y_origin=%g xev=%d, asc=%g, desc=%g,"
00713           " body=%g, minsp=%d maxnsp=%d, thr=%d kern=%g sp=%g\n",
00714           pitch_decision, fixed_pitch, fp_space, fp_nonsp, pr_space, pr_nonsp,
00715           spacing, xheight, y_origin, xheight_evidence, ascrise, descdrop,
00716           body_size, min_space, max_nonspace, space_threshold, kern_size,
00717           space_size);
00718 }
00719 
00720 /**********************************************************************
00721  * TO_ROW:add_blob
00722  *
00723  * Add the blob to the end of the row.
00724  **********************************************************************/
00725 
00726 void TO_ROW::add_blob(                 //constructor
00727                       BLOBNBOX *blob,  //first blob
00728                       float top,       //corrected top
00729                       float bottom,    //of row
00730                       float row_size   //ideal
00731                      ) {
00732   float allowed;                 //allowed expansion
00733   float available;               //expansion
00734   BLOBNBOX_IT it = &blobs;       //list of blobs
00735 
00736   it.add_to_end (blob);
00737   allowed = row_size + y_min - y_max;
00738   if (allowed > 0) {
00739     available = top > y_max ? top - y_max : 0;
00740     if (bottom < y_min)
00741                                  //total available
00742         available += y_min - bottom;
00743     if (available > 0) {
00744       available += available;    //do it gradually
00745       if (available < allowed)
00746         available = allowed;
00747       if (bottom < y_min)
00748         y_min -= (y_min - bottom) * allowed / available;
00749       if (top > y_max)
00750         y_max += (top - y_max) * allowed / available;
00751     }
00752   }
00753 }
00754 
00755 
00756 /**********************************************************************
00757  * TO_ROW:insert_blob
00758  *
00759  * Add the blob to the row in the correct position.
00760  **********************************************************************/
00761 
00762 void TO_ROW::insert_blob(                //constructor
00763                          BLOBNBOX *blob  //first blob
00764                         ) {
00765   BLOBNBOX_IT it = &blobs;       //list of blobs
00766 
00767   if (it.empty ())
00768     it.add_before_then_move (blob);
00769   else {
00770     it.mark_cycle_pt ();
00771     while (!it.cycled_list ()
00772       && it.data ()->bounding_box ().left () <=
00773       blob->bounding_box ().left ())
00774       it.forward ();
00775     if (it.cycled_list ())
00776       it.add_to_end (blob);
00777     else
00778       it.add_before_stay_put (blob);
00779   }
00780 }
00781 
00782 
00783 /**********************************************************************
00784  * TO_ROW::compute_vertical_projection
00785  *
00786  * Compute the vertical projection of a TO_ROW from its blobs.
00787  **********************************************************************/
00788 
00789 void TO_ROW::compute_vertical_projection() {  //project whole row
00790   TBOX row_box;                   //bound of row
00791   BLOBNBOX *blob;                //current blob
00792   TBOX blob_box;                  //bounding box
00793   BLOBNBOX_IT blob_it = blob_list ();
00794 
00795   if (blob_it.empty ())
00796     return;
00797   row_box = blob_it.data ()->bounding_box ();
00798   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
00799     row_box += blob_it.data ()->bounding_box ();
00800 
00801   projection.set_range (row_box.left () - PROJECTION_MARGIN,
00802     row_box.right () + PROJECTION_MARGIN);
00803   projection_left = row_box.left () - PROJECTION_MARGIN;
00804   projection_right = row_box.right () + PROJECTION_MARGIN;
00805   for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
00806     blob = blob_it.data();
00807     if (blob->cblob() != NULL)
00808       vertical_cblob_projection(blob->cblob(), &projection);
00809   }
00810 }
00811 
00812 
00813 /**********************************************************************
00814  * TO_ROW::clear
00815  *
00816  * Zero out all scalar members.
00817  **********************************************************************/
00818 void TO_ROW::clear() {
00819   all_caps = 0;
00820   used_dm_model = 0;
00821   projection_left = 0;
00822   projection_right = 0;
00823   pitch_decision = PITCH_DUNNO;
00824   fixed_pitch = 0.0;
00825   fp_space = 0.0;
00826   fp_nonsp = 0.0;
00827   pr_space = 0.0;
00828   pr_nonsp = 0.0;
00829   spacing = 0.0;
00830   xheight = 0.0;
00831   xheight_evidence = 0;
00832   body_size = 0.0;
00833   ascrise = 0.0;
00834   descdrop = 0.0;
00835   min_space = 0;
00836   max_nonspace = 0;
00837   space_threshold = 0;
00838   kern_size = 0.0;
00839   space_size = 0.0;
00840   y_min = 0.0;
00841   y_max = 0.0;
00842   initial_y_min = 0.0;
00843   m = 0.0;
00844   c = 0.0;
00845   error = 0.0;
00846   para_c = 0.0;
00847   para_error = 0.0;
00848   y_origin = 0.0;
00849   credibility = 0.0;
00850   num_repeated_sets_ = -1;
00851 }
00852 
00853 
00854 /**********************************************************************
00855  * vertical_cblob_projection
00856  *
00857  * Compute the vertical projection of a cblob from its outlines
00858  * and add to the given STATS.
00859  **********************************************************************/
00860 
00861 void vertical_cblob_projection(               //project outlines
00862                                C_BLOB *blob,  //blob to project
00863                                STATS *stats   //output
00864                               ) {
00865                                  //outlines of blob
00866   C_OUTLINE_IT out_it = blob->out_list ();
00867 
00868   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00869     vertical_coutline_projection (out_it.data (), stats);
00870   }
00871 }
00872 
00873 
00874 /**********************************************************************
00875  * vertical_coutline_projection
00876  *
00877  * Compute the vertical projection of a outline from its outlines
00878  * and add to the given STATS.
00879  **********************************************************************/
00880 
00881 void vertical_coutline_projection(                     //project outlines
00882                                   C_OUTLINE *outline,  //outline to project
00883                                   STATS *stats         //output
00884                                  ) {
00885   ICOORD pos;                    //current point
00886   ICOORD step;                   //edge step
00887   inT32 length;                  //of outline
00888   inT16 stepindex;               //current step
00889   C_OUTLINE_IT out_it = outline->child ();
00890 
00891   pos = outline->start_pos ();
00892   length = outline->pathlength ();
00893   for (stepindex = 0; stepindex < length; stepindex++) {
00894     step = outline->step (stepindex);
00895     if (step.x () > 0) {
00896      stats->add (pos.x (), -pos.y ());
00897     } else if (step.x () < 0) {
00898       stats->add (pos.x () - 1, pos.y ());
00899     }
00900     pos += step;
00901   }
00902 
00903   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00904     vertical_coutline_projection (out_it.data (), stats);
00905   }
00906 }
00907 
00908 
00909 /**********************************************************************
00910  * TO_BLOCK::TO_BLOCK
00911  *
00912  * Constructor to make a TO_BLOCK from a real block.
00913  **********************************************************************/
00914 
00915 TO_BLOCK::TO_BLOCK(                  //make a block
00916                    BLOCK *src_block  //real block
00917                   ) {
00918   clear();
00919   block = src_block;
00920 }
00921 
00922 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
00923   BLOBNBOX_IT it = boxes;
00924   // A BLOBNBOX generally doesn't own its blobs, so if they do, you
00925   // have to delete them explicitly.
00926   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00927     BLOBNBOX* box = it.data();
00928     if (box->cblob() != NULL)
00929       delete box->cblob();
00930   }
00931 }
00932 
00933 /**********************************************************************
00934  * TO_BLOCK::clear
00935  *
00936  * Zero out all scalar members.
00937  **********************************************************************/
00938 void TO_BLOCK::clear() {
00939   block = NULL;
00940   pitch_decision = PITCH_DUNNO;
00941   line_spacing = 0.0;
00942   line_size = 0.0;
00943   max_blob_size = 0.0;
00944   baseline_offset = 0.0;
00945   xheight = 0.0;
00946   fixed_pitch = 0.0;
00947   kern_size = 0.0;
00948   space_size = 0.0;
00949   min_space = 0;
00950   max_nonspace = 0;
00951   fp_space = 0.0;
00952   fp_nonsp = 0.0;
00953   pr_space = 0.0;
00954   pr_nonsp = 0.0;
00955   key_row = NULL;
00956 }
00957 
00958 
00959 TO_BLOCK::~TO_BLOCK() {
00960   // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
00961   clear_blobnboxes(&blobs);
00962   clear_blobnboxes(&underlines);
00963   clear_blobnboxes(&noise_blobs);
00964   clear_blobnboxes(&small_blobs);
00965   clear_blobnboxes(&large_blobs);
00966 }
00967 
00968 // Helper function to divide the input blobs over noise, small, medium
00969 // and large lists. Blobs small in height and (small in width or large in width)
00970 // go in the noise list. Dash (-) candidates go in the small list, and
00971 // medium and large are by height.
00972 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
00973 static void SizeFilterBlobs(int min_height, int max_height,
00974                             BLOBNBOX_LIST* src_list,
00975                             BLOBNBOX_LIST* noise_list,
00976                             BLOBNBOX_LIST* small_list,
00977                             BLOBNBOX_LIST* medium_list,
00978                             BLOBNBOX_LIST* large_list) {
00979   BLOBNBOX_IT noise_it(noise_list);
00980   BLOBNBOX_IT small_it(small_list);
00981   BLOBNBOX_IT medium_it(medium_list);
00982   BLOBNBOX_IT large_it(large_list);
00983   for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
00984     BLOBNBOX* blob = src_it.extract();
00985     blob->ReInit();
00986     int width = blob->bounding_box().width();
00987     int height = blob->bounding_box().height();
00988     if (height < min_height  &&
00989         (width < min_height || width > max_height))
00990       noise_it.add_after_then_move(blob);
00991     else if (height > max_height)
00992       large_it.add_after_then_move(blob);
00993     else if (height < min_height)
00994       small_it.add_after_then_move(blob);
00995     else
00996       medium_it.add_after_then_move(blob);
00997   }
00998 }
00999 
01000 // Reorganize the blob lists with a different definition of small, medium
01001 // and large, compared to the original definition.
01002 // Height is still the primary filter key, but medium width blobs of small
01003 // height become small, and very wide blobs of small height stay noise, along
01004 // with small dot-shaped blobs.
01005 void TO_BLOCK::ReSetAndReFilterBlobs() {
01006   int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
01007   int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
01008   BLOBNBOX_LIST noise_list;
01009   BLOBNBOX_LIST small_list;
01010   BLOBNBOX_LIST medium_list;
01011   BLOBNBOX_LIST large_list;
01012   SizeFilterBlobs(min_height, max_height, &blobs,
01013                   &noise_list, &small_list, &medium_list, &large_list);
01014   SizeFilterBlobs(min_height, max_height, &large_blobs,
01015                   &noise_list, &small_list, &medium_list, &large_list);
01016   SizeFilterBlobs(min_height, max_height, &small_blobs,
01017                   &noise_list, &small_list, &medium_list, &large_list);
01018   SizeFilterBlobs(min_height, max_height, &noise_blobs,
01019                   &noise_list, &small_list, &medium_list, &large_list);
01020   BLOBNBOX_IT blob_it(&blobs);
01021   blob_it.add_list_after(&medium_list);
01022   blob_it.set_to_list(&large_blobs);
01023   blob_it.add_list_after(&large_list);
01024   blob_it.set_to_list(&small_blobs);
01025   blob_it.add_list_after(&small_list);
01026   blob_it.set_to_list(&noise_blobs);
01027   blob_it.add_list_after(&noise_list);
01028 }
01029 
01030 // Deletes noise blobs from all lists where not owned by a ColPartition.
01031 void TO_BLOCK::DeleteUnownedNoise() {
01032   BLOBNBOX::CleanNeighbours(&blobs);
01033   BLOBNBOX::CleanNeighbours(&small_blobs);
01034   BLOBNBOX::CleanNeighbours(&noise_blobs);
01035   BLOBNBOX::CleanNeighbours(&large_blobs);
01036   BLOBNBOX::DeleteNoiseBlobs(&blobs);
01037   BLOBNBOX::DeleteNoiseBlobs(&small_blobs);
01038   BLOBNBOX::DeleteNoiseBlobs(&noise_blobs);
01039   BLOBNBOX::DeleteNoiseBlobs(&large_blobs);
01040 }
01041 
01042 // Computes and stores the edge offsets on each blob for use in feature
01043 // extraction, using greyscale if the supplied grey and thresholds pixes
01044 // are 8-bit or otherwise (if NULL or not 8 bit) the original binary
01045 // edge step outlines.
01046 // Thresholds must either be the same size as grey or an integer down-scale
01047 // of grey.
01048 // See coutln.h for an explanation of edge offsets.
01049 void TO_BLOCK::ComputeEdgeOffsets(Pix* thresholds, Pix* grey) {
01050   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &blobs);
01051   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &small_blobs);
01052   BLOBNBOX::ComputeEdgeOffsets(thresholds, grey, &noise_blobs);
01053 }
01054 
01055 #ifndef GRAPHICS_DISABLED
01056 // Draw the noise blobs from all lists in red.
01057 void TO_BLOCK::plot_noise_blobs(ScrollView* win) {
01058   BLOBNBOX::PlotNoiseBlobs(&noise_blobs, ScrollView::RED, ScrollView::RED, win);
01059   BLOBNBOX::PlotNoiseBlobs(&small_blobs, ScrollView::RED, ScrollView::RED, win);
01060   BLOBNBOX::PlotNoiseBlobs(&large_blobs, ScrollView::RED, ScrollView::RED, win);
01061   BLOBNBOX::PlotNoiseBlobs(&blobs, ScrollView::RED, ScrollView::RED, win);
01062 }
01063 
01064 // Draw the blobs on the various lists in the block in different colors.
01065 void TO_BLOCK::plot_graded_blobs(ScrollView* win) {
01066   BLOBNBOX::PlotBlobs(&noise_blobs, ScrollView::CORAL, ScrollView::BLUE, win);
01067   BLOBNBOX::PlotBlobs(&small_blobs, ScrollView::GOLDENROD, ScrollView::YELLOW,
01068                       win);
01069   BLOBNBOX::PlotBlobs(&large_blobs, ScrollView::DARK_GREEN, ScrollView::YELLOW,
01070                       win);
01071   BLOBNBOX::PlotBlobs(&blobs, ScrollView::WHITE, ScrollView::BROWN, win);
01072 }
01073 
01074 /**********************************************************************
01075  * plot_blob_list
01076  *
01077  * Draw a list of blobs.
01078  **********************************************************************/
01079 
01080 void plot_blob_list(ScrollView* win,                   // window to draw in
01081                     BLOBNBOX_LIST *list,               // blob list
01082                     ScrollView::Color body_colour,     // colour to draw
01083                     ScrollView::Color child_colour) {  // colour of child
01084   BLOBNBOX_IT it = list;
01085   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
01086     it.data()->plot(win, body_colour, child_colour);
01087   }
01088 }
01089 #endif  // GRAPHICS_DISABLED
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines