tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/underlin.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        underlin.cpp  (Formerly undrline.c)
00003  * Description: Code to chop blobs apart from underlines.
00004  * Author:              Ray Smith
00005  * Created:             Mon Aug  8 11:14:00 BST 1994
00006  *
00007  * (C) Copyright 1994, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifdef __UNIX__
00021 #include          <assert.h>
00022 #endif
00023 #include          "underlin.h"
00024 
00025 #define PROJECTION_MARGIN 10     //arbitrary
00026 #define EXTERN
00027 
00028 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
00029 EXTERN BOOL_VAR (textord_restore_underlines, TRUE,
00030 "Chop underlines & put back");
00031 
00032 /**********************************************************************
00033  * restore_underlined_blobs
00034  *
00035  * Find underlined blobs and put them back in the row.
00036  **********************************************************************/
00037 
00038 void restore_underlined_blobs(                 //get chop points
00039                               TO_BLOCK *block  //block to do
00040                              ) {
00041   inT16 chop_coord;              //chop boundary
00042   TBOX blob_box;                  //of underline
00043   BLOBNBOX *u_line;              //underline bit
00044   TO_ROW *row;                   //best row for blob
00045   ICOORDELT_LIST chop_cells;     //blobs to cut out
00046                                  //real underlines
00047   BLOBNBOX_LIST residual_underlines;
00048   C_OUTLINE_LIST left_coutlines;
00049   C_OUTLINE_LIST right_coutlines;
00050   ICOORDELT_IT cell_it = &chop_cells;
00051                                  //under lines
00052   BLOBNBOX_IT under_it = &block->underlines;
00053   BLOBNBOX_IT ru_it = &residual_underlines;
00054 
00055   if (block->get_rows()->empty())
00056     return;  // Don't crash if there are no rows.
00057   for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
00058   under_it.forward ()) {
00059     u_line = under_it.extract ();
00060     blob_box = u_line->bounding_box ();
00061     row = most_overlapping_row (block->get_rows (), u_line);
00062     if (row == NULL)
00063       return;  // Don't crash if there is no row.
00064     find_underlined_blobs (u_line, &row->baseline, row->xheight,
00065       row->xheight * textord_underline_offset,
00066       &chop_cells);
00067     cell_it.set_to_list (&chop_cells);
00068     for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
00069     cell_it.forward ()) {
00070       chop_coord = cell_it.data ()->x ();
00071       if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
00072         split_to_blob (u_line, chop_coord,
00073           textord_fp_chop_error + 0.5,
00074           &left_coutlines,
00075           &right_coutlines);
00076         if (!left_coutlines.empty()) {
00077           ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00078         }
00079         chop_coord = cell_it.data ()->y ();
00080         split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
00081                       &left_coutlines, &right_coutlines);
00082         if (!left_coutlines.empty()) {
00083           row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00084         }
00085         u_line = NULL;           //no more blobs to add
00086       }
00087       delete cell_it.extract();
00088     }
00089     if (!right_coutlines.empty ()) {
00090       split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
00091                     &left_coutlines, &right_coutlines);
00092       if (!left_coutlines.empty())
00093         ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
00094     }
00095     if (u_line != NULL) {
00096       if (u_line->cblob() != NULL)
00097         delete u_line->cblob();
00098       delete u_line;
00099     }
00100   }
00101   if (!ru_it.empty()) {
00102     ru_it.move_to_first();
00103     for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
00104       under_it.add_after_then_move(ru_it.extract());
00105     }
00106   }
00107 }
00108 
00109 
00110 /**********************************************************************
00111  * most_overlapping_row
00112  *
00113  * Return the row which most overlaps the blob.
00114  **********************************************************************/
00115 
00116 TO_ROW *most_overlapping_row(                    //find best row
00117                              TO_ROW_LIST *rows,  //list of rows
00118                              BLOBNBOX *blob      //blob to place
00119                             ) {
00120   inT16 x = (blob->bounding_box ().left ()
00121     + blob->bounding_box ().right ()) / 2;
00122   TO_ROW_IT row_it = rows;       //row iterator
00123   TO_ROW *row;                   //current row
00124   TO_ROW *best_row;              //output row
00125   float overlap;                 //of blob & row
00126   float bestover;                //best overlap
00127 
00128   best_row = NULL;
00129   bestover = (float) -MAX_INT32;
00130   if (row_it.empty ())
00131     return NULL;
00132   row = row_it.data ();
00133   row_it.mark_cycle_pt ();
00134   while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
00135   && !row_it.cycled_list ()) {
00136     best_row = row;
00137     bestover =
00138       blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
00139     row_it.forward ();
00140     row = row_it.data ();
00141   }
00142   while (row->baseline.y (x) + row->xheight + row->ascrise
00143   >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
00144     overlap = row->baseline.y (x) + row->xheight + row->ascrise;
00145     if (blob->bounding_box ().top () < overlap)
00146       overlap = blob->bounding_box ().top ();
00147     if (blob->bounding_box ().bottom () >
00148       row->baseline.y (x) + row->descdrop)
00149       overlap -= blob->bounding_box ().bottom ();
00150     else
00151       overlap -= row->baseline.y (x) + row->descdrop;
00152     if (overlap > bestover) {
00153       bestover = overlap;
00154       best_row = row;
00155     }
00156     row_it.forward ();
00157     row = row_it.data ();
00158   }
00159   if (bestover < 0
00160     && row->baseline.y (x) + row->xheight + row->ascrise
00161     - blob->bounding_box ().bottom () > bestover)
00162     best_row = row;
00163   return best_row;
00164 }
00165 
00166 
00167 /**********************************************************************
00168  * find_underlined_blobs
00169  *
00170  * Find the start and end coords of blobs in the underline.
00171  **********************************************************************/
00172 
00173 void find_underlined_blobs(                            //get chop points
00174                            BLOBNBOX *u_line,           //underlined unit
00175                            QSPLINE *baseline,          //actual baseline
00176                            float xheight,              //height of line
00177                            float baseline_offset,      //amount to shrinke it
00178                            ICOORDELT_LIST *chop_cells  //places to chop
00179                           ) {
00180   inT16 x, y;                    //sides of blob
00181   ICOORD blob_chop;              //sides of blob
00182   TBOX blob_box = u_line->bounding_box ();
00183                                  //cell iterator
00184   ICOORDELT_IT cell_it = chop_cells;
00185   STATS upper_proj (blob_box.left (), blob_box.right () + 1);
00186   STATS middle_proj (blob_box.left (), blob_box.right () + 1);
00187   STATS lower_proj (blob_box.left (), blob_box.right () + 1);
00188   C_OUTLINE_IT out_it;           //outlines of blob
00189 
00190   ASSERT_HOST (u_line->cblob () != NULL);
00191 
00192   out_it.set_to_list (u_line->cblob ()->out_list ());
00193   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00194     vertical_cunderline_projection (out_it.data (),
00195       baseline, xheight, baseline_offset,
00196       &lower_proj, &middle_proj, &upper_proj);
00197   }
00198 
00199   for (x = blob_box.left (); x < blob_box.right (); x++) {
00200     if (middle_proj.pile_count (x) > 0) {
00201       for (y = x + 1;
00202         y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
00203       blob_chop = ICOORD (x, y);
00204       cell_it.add_after_then_move (new ICOORDELT (blob_chop));
00205       x = y;
00206     }
00207   }
00208 }
00209 
00210 
00211 /**********************************************************************
00212  * vertical_cunderline_projection
00213  *
00214  * Compute the vertical projection of a outline from its outlines
00215  * and add to the given STATS.
00216  **********************************************************************/
00217 
00218 void vertical_cunderline_projection(                        //project outlines
00219                                     C_OUTLINE *outline,     //outline to project
00220                                     QSPLINE *baseline,      //actual baseline
00221                                     float xheight,          //height of line
00222                                     float baseline_offset,  //amount to shrinke it
00223                                     STATS *lower_proj,      //below baseline
00224                                     STATS *middle_proj,     //centre region
00225                                     STATS *upper_proj       //top region
00226                                    ) {
00227   ICOORD pos;                    //current point
00228   ICOORD step;                   //edge step
00229   inT16 lower_y, upper_y;        //region limits
00230   inT32 length;                  //of outline
00231   inT16 stepindex;               //current step
00232   C_OUTLINE_IT out_it = outline->child ();
00233 
00234   pos = outline->start_pos ();
00235   length = outline->pathlength ();
00236   for (stepindex = 0; stepindex < length; stepindex++) {
00237     step = outline->step (stepindex);
00238     if (step.x () > 0) {
00239       lower_y =
00240         (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
00241       upper_y =
00242         (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
00243         xheight + 0.5);
00244       if (pos.y () >= lower_y) {
00245         lower_proj->add (pos.x (), -lower_y);
00246         if (pos.y () >= upper_y) {
00247           middle_proj->add (pos.x (), lower_y - upper_y);
00248           upper_proj->add (pos.x (), upper_y - pos.y ());
00249         }
00250         else
00251           middle_proj->add (pos.x (), lower_y - pos.y ());
00252       }
00253       else
00254         lower_proj->add (pos.x (), -pos.y ());
00255     }
00256     else if (step.x () < 0) {
00257       lower_y =
00258         (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
00259         0.5);
00260       upper_y =
00261         (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
00262         xheight + 0.5);
00263       if (pos.y () >= lower_y) {
00264         lower_proj->add (pos.x () - 1, lower_y);
00265         if (pos.y () >= upper_y) {
00266           middle_proj->add (pos.x () - 1, upper_y - lower_y);
00267           upper_proj->add (pos.x () - 1, pos.y () - upper_y);
00268         }
00269         else
00270           middle_proj->add (pos.x () - 1, pos.y () - lower_y);
00271       }
00272       else
00273         lower_proj->add (pos.x () - 1, pos.y ());
00274     }
00275     pos += step;
00276   }
00277 
00278   for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
00279     vertical_cunderline_projection (out_it.data (),
00280       baseline, xheight, baseline_offset,
00281       lower_proj, middle_proj, upper_proj);
00282   }
00283 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines