tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/stepblob.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        stepblob.cpp  (Formerly cblob.c)
00003  * Description: Code for C_BLOB class.
00004  * Author:              Ray Smith
00005  * Created:             Tue Oct 08 10:41:13 BST 1991
00006  *
00007  * (C) Copyright 1991, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "stepblob.h"
00021 #include "allheaders.h"
00022 
00023 // Include automatically generated configuration file if running autoconf.
00024 #ifdef HAVE_CONFIG_H
00025 #include "config_auto.h"
00026 #endif
00027 
00028 // Max perimeter to width ratio for a baseline position above box bottom.
00029 const double kMaxPerimeterWidthRatio = 8.0;
00030 
00031 ELISTIZE (C_BLOB)
00032 /**********************************************************************
00033  * position_outline
00034  *
00035  * Position the outline in the given list at the relevant place
00036  * according to its nesting.
00037  **********************************************************************/
00038 static void position_outline(                          //put in place
00039                              C_OUTLINE *outline,       //thing to place
00040                              C_OUTLINE_LIST *destlist  //desstination list
00041                             ) {
00042   C_OUTLINE *dest_outline;       //outline from dest list
00043   C_OUTLINE_IT it = destlist;    //iterator
00044                                  //iterator on children
00045   C_OUTLINE_IT child_it = outline->child ();
00046 
00047   if (!it.empty ()) {
00048     do {
00049       dest_outline = it.data (); //get destination
00050                                  //encloses dest
00051       if (*dest_outline < *outline) {
00052                                  //take off list
00053         dest_outline = it.extract ();
00054                                  //put this in place
00055         it.add_after_then_move (outline);
00056                                  //make it a child
00057         child_it.add_to_end (dest_outline);
00058         while (!it.at_last ()) {
00059           it.forward ();         //do rest of list
00060                                  //check for other children
00061           dest_outline = it.data ();
00062           if (*dest_outline < *outline) {
00063                                  //take off list
00064             dest_outline = it.extract ();
00065             child_it.add_to_end (dest_outline);
00066             //make it a child
00067             if (it.empty ())
00068               break;
00069           }
00070         }
00071         return;                  //finished
00072       }
00073                                  //enclosed by dest
00074       else if (*outline < *dest_outline) {
00075         position_outline (outline, dest_outline->child ());
00076         //place in child list
00077         return;                  //finished
00078       }
00079       it.forward ();
00080     }
00081     while (!it.at_first ());
00082   }
00083   it.add_to_end (outline);       //at outer level
00084 }
00085 
00086 
00087 /**********************************************************************
00088  * plot_outline_list
00089  *
00090  * Draw a list of outlines in the given colour and their children
00091  * in the child colour.
00092  **********************************************************************/
00093 
00094 #ifndef GRAPHICS_DISABLED
00095 static void plot_outline_list(                       //draw outlines
00096                               C_OUTLINE_LIST *list,  //outline to draw
00097                               ScrollView* window,         //window to draw in
00098                               ScrollView::Color colour,         //colour to use
00099                               ScrollView::Color child_colour    //colour of children
00100                              ) {
00101   C_OUTLINE *outline;            //current outline
00102   C_OUTLINE_IT it = list;        //iterator
00103 
00104   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00105     outline = it.data ();
00106                                  //draw it
00107     outline->plot (window, colour);
00108     if (!outline->child ()->empty ())
00109       plot_outline_list (outline->child (), window,
00110         child_colour, child_colour);
00111   }
00112 }
00113 // Draws the outlines in the given colour, and child_colour, normalized
00114 // using the given denorm, making use of sub-pixel accurate information
00115 // if available.
00116 static void plot_normed_outline_list(const DENORM& denorm,
00117                                      C_OUTLINE_LIST *list,
00118                                      ScrollView::Color colour,
00119                                      ScrollView::Color child_colour,
00120                                      ScrollView* window) {
00121   C_OUTLINE_IT it(list);
00122   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00123     C_OUTLINE* outline = it.data();
00124     outline->plot_normed(denorm, colour, window);
00125     if (!outline->child()->empty())
00126       plot_normed_outline_list(denorm, outline->child(), child_colour,
00127                                child_colour, window);
00128   }
00129 }
00130 #endif
00131 
00132 
00133 /**********************************************************************
00134  * reverse_outline_list
00135  *
00136  * Reverse a list of outlines and their children.
00137  **********************************************************************/
00138 
00139 static void reverse_outline_list(C_OUTLINE_LIST *list) {
00140   C_OUTLINE_IT it = list;        // iterator
00141 
00142   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00143     C_OUTLINE* outline = it.data();
00144     outline->reverse();         // reverse it
00145     outline->set_flag(COUT_INVERSE, TRUE);
00146     if (!outline->child()->empty())
00147       reverse_outline_list(outline->child());
00148   }
00149 }
00150 
00151 
00152 /**********************************************************************
00153  * C_BLOB::C_BLOB
00154  *
00155  * Constructor to build a C_BLOB from a list of C_OUTLINEs.
00156  * The C_OUTLINEs are not copied so the source list is emptied.
00157  * The C_OUTLINEs are nested correctly in the blob.
00158  **********************************************************************/
00159 
00160 C_BLOB::C_BLOB(C_OUTLINE_LIST *outline_list) {
00161   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
00162     C_OUTLINE* outline = ol_it.extract();
00163     // Position this outline in appropriate position in the hierarchy.
00164     position_outline(outline, &outlines);
00165   }
00166   CheckInverseFlagAndDirection();
00167 }
00168 
00169 // Simpler constructor to build a blob from a single outline that has
00170 // already been fully initialized.
00171 C_BLOB::C_BLOB(C_OUTLINE* outline) {
00172   C_OUTLINE_IT it(&outlines);
00173   it.add_to_end(outline);
00174 }
00175 
00176 // Builds a set of one or more blobs from a list of outlines.
00177 // Input: one outline on outline_list contains all the others, but the
00178 // nesting and order are undefined.
00179 // If good_blob is true, the blob is added to good_blobs_it, unless
00180 // an illegal (generation-skipping) parent-child relationship is found.
00181 // If so, the parent blob goes to bad_blobs_it, and the immediate children
00182 // are promoted to the top level, recursively being sent to good_blobs_it.
00183 // If good_blob is false, all created blobs will go to the bad_blobs_it.
00184 // Output: outline_list is empty. One or more blobs are added to
00185 // good_blobs_it and/or bad_blobs_it.
00186 void C_BLOB::ConstructBlobsFromOutlines(bool good_blob,
00187                                         C_OUTLINE_LIST* outline_list,
00188                                         C_BLOB_IT* good_blobs_it,
00189                                         C_BLOB_IT* bad_blobs_it) {
00190   // List of top-level outlines with correctly nested children.
00191   C_OUTLINE_LIST nested_outlines;
00192   for (C_OUTLINE_IT ol_it(outline_list); !ol_it.empty(); ol_it.forward()) {
00193     C_OUTLINE* outline = ol_it.extract();
00194     // Position this outline in appropriate position in the hierarchy.
00195     position_outline(outline, &nested_outlines);
00196   }
00197   // Check for legal nesting and reassign as required.
00198   for (C_OUTLINE_IT ol_it(&nested_outlines); !ol_it.empty(); ol_it.forward()) {
00199     C_OUTLINE* outline = ol_it.extract();
00200     bool blob_is_good = good_blob;
00201     if (!outline->IsLegallyNested()) {
00202       // The blob is illegally nested.
00203       // Mark it bad, and add all its children to the top-level list.
00204       blob_is_good = false;
00205       ol_it.add_list_after(outline->child());
00206     }
00207     C_BLOB* blob = new C_BLOB(outline);
00208     // Set inverse flag and reverse if needed.
00209     blob->CheckInverseFlagAndDirection();
00210     // Put on appropriate list.
00211     if (!blob_is_good && bad_blobs_it != NULL)
00212       bad_blobs_it->add_after_then_move(blob);
00213     else
00214       good_blobs_it->add_after_then_move(blob);
00215   }
00216 }
00217 
00218 // Sets the COUT_INVERSE flag appropriately on the outlines and their
00219 // children recursively, reversing the outlines if needed so that
00220 // everything has an anticlockwise top-level.
00221 void C_BLOB::CheckInverseFlagAndDirection() {
00222   C_OUTLINE_IT ol_it(&outlines);
00223   for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
00224     C_OUTLINE* outline = ol_it.data();
00225     if (outline->turn_direction() < 0) {
00226       outline->reverse();
00227       reverse_outline_list(outline->child());
00228       outline->set_flag(COUT_INVERSE, TRUE);
00229     } else {
00230       outline->set_flag(COUT_INVERSE, FALSE);
00231     }
00232   }
00233 }
00234 
00235 
00236 // Build and return a fake blob containing a single fake outline with no
00237 // steps.
00238 C_BLOB* C_BLOB::FakeBlob(const TBOX& box) {
00239   C_OUTLINE_LIST outlines;
00240   C_OUTLINE::FakeOutline(box, &outlines);
00241   return new C_BLOB(&outlines);
00242 }
00243 
00244 /**********************************************************************
00245  * C_BLOB::bounding_box
00246  *
00247  * Return the bounding box of the blob.
00248  **********************************************************************/
00249 
00250 TBOX C_BLOB::bounding_box() {  //bounding box
00251   C_OUTLINE *outline;            //current outline
00252   C_OUTLINE_IT it = &outlines;   //outlines of blob
00253   TBOX box;                       //bounding box
00254 
00255   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00256     outline = it.data ();
00257     box += outline->bounding_box ();
00258   }
00259   return box;
00260 }
00261 
00262 
00263 /**********************************************************************
00264  * C_BLOB::area
00265  *
00266  * Return the area of the blob.
00267  **********************************************************************/
00268 
00269 inT32 C_BLOB::area() {  //area
00270   C_OUTLINE *outline;            //current outline
00271   C_OUTLINE_IT it = &outlines;   //outlines of blob
00272   inT32 total;                   //total area
00273 
00274   total = 0;
00275   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00276     outline = it.data ();
00277     total += outline->area ();
00278   }
00279   return total;
00280 }
00281 
00282 /**********************************************************************
00283  * C_BLOB::perimeter
00284  *
00285  * Return the perimeter of the top and 2nd level outlines.
00286  **********************************************************************/
00287 
00288 inT32 C_BLOB::perimeter() {
00289   C_OUTLINE *outline;            // current outline
00290   C_OUTLINE_IT it = &outlines;   // outlines of blob
00291   inT32 total;                   // total perimeter
00292 
00293   total = 0;
00294   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00295     outline = it.data();
00296     total += outline->perimeter();
00297   }
00298   return total;
00299 }
00300 
00301 
00302 /**********************************************************************
00303  * C_BLOB::outer_area
00304  *
00305  * Return the area of the blob.
00306  **********************************************************************/
00307 
00308 inT32 C_BLOB::outer_area() {  //area
00309   C_OUTLINE *outline;            //current outline
00310   C_OUTLINE_IT it = &outlines;   //outlines of blob
00311   inT32 total;                   //total area
00312 
00313   total = 0;
00314   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00315     outline = it.data ();
00316     total += outline->outer_area ();
00317   }
00318   return total;
00319 }
00320 
00321 
00322 /**********************************************************************
00323  * C_BLOB::count_transitions
00324  *
00325  * Return the total x and y maxes and mins in the blob.
00326  * Chlid outlines are not counted.
00327  **********************************************************************/
00328 
00329 inT32 C_BLOB::count_transitions(                 //area
00330                                 inT32 threshold  //on size
00331                                ) {
00332   C_OUTLINE *outline;            //current outline
00333   C_OUTLINE_IT it = &outlines;   //outlines of blob
00334   inT32 total;                   //total area
00335 
00336   total = 0;
00337   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
00338     outline = it.data ();
00339     total += outline->count_transitions (threshold);
00340   }
00341   return total;
00342 }
00343 
00344 
00345 /**********************************************************************
00346  * C_BLOB::move
00347  *
00348  * Move C_BLOB by vector
00349  **********************************************************************/
00350 
00351 void C_BLOB::move(                  // reposition blob
00352                   const ICOORD vec  // by vector
00353                  ) {
00354   C_OUTLINE_IT it(&outlines);  // iterator
00355 
00356   for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ())
00357     it.data ()->move (vec);      // move each outline
00358 }
00359 
00360 // Static helper for C_BLOB::rotate to allow recursion of child outlines.
00361 void RotateOutlineList(const FCOORD& rotation, C_OUTLINE_LIST* outlines) {
00362   C_OUTLINE_LIST new_outlines;
00363   C_OUTLINE_IT src_it(outlines);
00364   C_OUTLINE_IT dest_it(&new_outlines);
00365   while (!src_it.empty()) {
00366     C_OUTLINE* old_outline = src_it.extract();
00367     src_it.forward();
00368     C_OUTLINE* new_outline = new C_OUTLINE(old_outline, rotation);
00369     if (!old_outline->child()->empty()) {
00370       RotateOutlineList(rotation, old_outline->child());
00371       C_OUTLINE_IT child_it(new_outline->child());
00372       child_it.add_list_after(old_outline->child());
00373     }
00374     delete old_outline;
00375     dest_it.add_to_end(new_outline);
00376   }
00377   src_it.add_list_after(&new_outlines);
00378 }
00379 
00380 /**********************************************************************
00381  * C_BLOB::rotate
00382  *
00383  * Rotate C_BLOB by rotation.
00384  * Warning! has to rebuild all the C_OUTLINEs.
00385  **********************************************************************/
00386 void C_BLOB::rotate(const FCOORD& rotation) {
00387   RotateOutlineList(rotation, &outlines);
00388 }
00389 
00390 // Helper calls ComputeEdgeOffsets or ComputeBinaryOffsets recursively on the
00391 // outline list and its children.
00392 static void ComputeEdgeOffsetsOutlineList(int threshold, Pix* pix,
00393                                           C_OUTLINE_LIST *list) {
00394   C_OUTLINE_IT it(list);
00395   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00396     C_OUTLINE* outline = it.data();
00397     if (pix != NULL && pixGetDepth(pix) == 8)
00398       outline->ComputeEdgeOffsets(threshold, pix);
00399     else
00400       outline->ComputeBinaryOffsets();
00401     if (!outline->child()->empty())
00402       ComputeEdgeOffsetsOutlineList(threshold, pix, outline->child());
00403   }
00404 }
00405 
00406 // Adds sub-pixel resolution EdgeOffsets for the outlines using greyscale
00407 // if the supplied pix is 8-bit or the binary edges if NULL.
00408 void C_BLOB::ComputeEdgeOffsets(int threshold, Pix* pix) {
00409   ComputeEdgeOffsetsOutlineList(threshold, pix, &outlines);
00410 }
00411 
00412 // Estimates and returns the baseline position based on the shape of the
00413 // outlines.
00414 // We first find the minimum y-coord (y_mins) at each x-coord within the blob.
00415 // If there is a run of some y or y+1 in y_mins that is longer than the total
00416 // number of positions at bottom or bottom+1, subject to the additional
00417 // condition that at least one side of the y/y+1 run is higher than y+1, so it
00418 // is not a local minimum, then y, not the bottom, makes a good candidate
00419 // baseline position for this blob. Eg
00420 //   |                  ---|
00421 //   |                  |
00422 //   |-      -----------|        <=  Good candidate baseline position.
00423 //    |-    -|
00424 //     |   -|
00425 //     |---|                     <=  Bottom of blob
00426 inT16 C_BLOB::EstimateBaselinePosition() {
00427   TBOX box = bounding_box();
00428   int left = box.left();
00429   int width = box.width();
00430   int bottom = box.bottom();
00431   if (outlines.empty() || perimeter() > width * kMaxPerimeterWidthRatio)
00432     return bottom;  // This is only for non-CJK blobs.
00433   // Get the minimum y coordinate at each x-coordinate.
00434   GenericVector<int> y_mins;
00435   y_mins.init_to_size(width + 1, box.top());
00436   C_OUTLINE_IT it(&outlines);
00437   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00438     C_OUTLINE* outline = it.data();
00439     ICOORD pos = outline->start_pos();
00440     for (int s = 0; s < outline->pathlength(); ++s) {
00441       if (pos.y() < y_mins[pos.x() - left])
00442         y_mins[pos.x() - left] = pos.y();
00443       pos += outline->step(s);
00444     }
00445   }
00446   // Find the total extent of the bottom or bottom + 1.
00447   int bottom_extent = 0;
00448   for (int x = 0; x <= width; ++x) {
00449     if (y_mins[x] == bottom || y_mins[x] == bottom + 1)
00450       ++bottom_extent;
00451   }
00452   // Find the lowest run longer than the bottom extent that is not the bottom.
00453   int best_min = box.top();
00454   int prev_run = 0;
00455   int prev_y = box.top();
00456   int prev_prev_y = box.top();
00457   for (int x = 0; x < width; x += prev_run) {
00458     // Find the length of the current run.
00459     int y_at_x = y_mins[x];
00460     int run = 1;
00461     while (x + run <= width && y_mins[x + run] == y_at_x) ++run;
00462     if (y_at_x > bottom + 1) {
00463       // Possible contender.
00464       int total_run = run;
00465       // Find extent of current value or +1 to the right of x.
00466       while (x + total_run <= width &&
00467           (y_mins[x + total_run] == y_at_x ||
00468               y_mins[x + total_run] == y_at_x + 1)) ++total_run;
00469       // At least one end has to be higher so it is not a local max.
00470       if (prev_prev_y > y_at_x + 1 || x + total_run > width ||
00471           y_mins[x + total_run] > y_at_x + 1) {
00472         // If the prev_run is at y + 1, then we can add that too. There cannot
00473         // be a suitable run at y before that or we would have found it already.
00474         if (prev_run > 0 && prev_y == y_at_x + 1) total_run += prev_run;
00475         if (total_run > bottom_extent && y_at_x < best_min) {
00476           best_min = y_at_x;
00477         }
00478       }
00479     }
00480     prev_run = run;
00481     prev_prev_y = prev_y;
00482     prev_y = y_at_x;
00483   }
00484   return best_min == box.top() ? bottom : best_min;
00485 }
00486 
00487 static void render_outline_list(C_OUTLINE_LIST *list,
00488                                 int left, int top, Pix* pix) {
00489   C_OUTLINE_IT it(list);
00490   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00491     C_OUTLINE* outline = it.data();
00492     outline->render(left, top, pix);
00493     if (!outline->child()->empty())
00494       render_outline_list(outline->child(), left, top, pix);
00495   }
00496 }
00497 
00498 static void render_outline_list_outline(C_OUTLINE_LIST *list,
00499                                         int left, int top, Pix* pix) {
00500   C_OUTLINE_IT it(list);
00501   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00502     C_OUTLINE* outline = it.data();
00503     outline->render_outline(left, top, pix);
00504   }
00505 }
00506 
00507 // Returns a Pix rendering of the blob. pixDestroy after use.
00508 Pix* C_BLOB::render() {
00509   TBOX box = bounding_box();
00510   Pix* pix = pixCreate(box.width(), box.height(), 1);
00511   render_outline_list(&outlines, box.left(), box.top(), pix);
00512   return pix;
00513 }
00514 
00515 // Returns a Pix rendering of the outline of the blob. (no fill).
00516 // pixDestroy after use.
00517 Pix* C_BLOB::render_outline() {
00518   TBOX box = bounding_box();
00519   Pix* pix = pixCreate(box.width(), box.height(), 1);
00520   render_outline_list_outline(&outlines, box.left(), box.top(), pix);
00521   return pix;
00522 }
00523 
00524 /**********************************************************************
00525  * C_BLOB::plot
00526  *
00527  * Draw the C_BLOB in the given colour.
00528  **********************************************************************/
00529 
00530 #ifndef GRAPHICS_DISABLED
00531 void C_BLOB::plot(ScrollView* window,                // window to draw in
00532                   ScrollView::Color blob_colour,     // main colour
00533                   ScrollView::Color child_colour) {  // for holes
00534   plot_outline_list(&outlines, window, blob_colour, child_colour);
00535 }
00536 // Draws the blob in the given colour, and child_colour, normalized
00537 // using the given denorm, making use of sub-pixel accurate information
00538 // if available.
00539 void C_BLOB::plot_normed(const DENORM& denorm,
00540                          ScrollView::Color blob_colour,
00541                          ScrollView::Color child_colour,
00542                          ScrollView* window) {
00543   plot_normed_outline_list(denorm, &outlines, blob_colour, child_colour,
00544                            window);
00545 }
00546 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines