tesseract
3.03
|
00001 /********************************************************************** 00002 * File: underlin.cpp (Formerly undrline.c) 00003 * Description: Code to chop blobs apart from underlines. 00004 * Author: Ray Smith 00005 * Created: Mon Aug 8 11:14:00 BST 1994 00006 * 00007 * (C) Copyright 1994, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifdef __UNIX__ 00021 #include <assert.h> 00022 #endif 00023 #include "underlin.h" 00024 00025 #define PROJECTION_MARGIN 10 //arbitrary 00026 #define EXTERN 00027 00028 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore"); 00029 EXTERN BOOL_VAR (textord_restore_underlines, TRUE, 00030 "Chop underlines & put back"); 00031 00032 /********************************************************************** 00033 * restore_underlined_blobs 00034 * 00035 * Find underlined blobs and put them back in the row. 00036 **********************************************************************/ 00037 00038 void restore_underlined_blobs( //get chop points 00039 TO_BLOCK *block //block to do 00040 ) { 00041 inT16 chop_coord; //chop boundary 00042 TBOX blob_box; //of underline 00043 BLOBNBOX *u_line; //underline bit 00044 TO_ROW *row; //best row for blob 00045 ICOORDELT_LIST chop_cells; //blobs to cut out 00046 //real underlines 00047 BLOBNBOX_LIST residual_underlines; 00048 C_OUTLINE_LIST left_coutlines; 00049 C_OUTLINE_LIST right_coutlines; 00050 ICOORDELT_IT cell_it = &chop_cells; 00051 //under lines 00052 BLOBNBOX_IT under_it = &block->underlines; 00053 BLOBNBOX_IT ru_it = &residual_underlines; 00054 00055 if (block->get_rows()->empty()) 00056 return; // Don't crash if there are no rows. 00057 for (under_it.mark_cycle_pt (); !under_it.cycled_list (); 00058 under_it.forward ()) { 00059 u_line = under_it.extract (); 00060 blob_box = u_line->bounding_box (); 00061 row = most_overlapping_row (block->get_rows (), u_line); 00062 if (row == NULL) 00063 return; // Don't crash if there is no row. 00064 find_underlined_blobs (u_line, &row->baseline, row->xheight, 00065 row->xheight * textord_underline_offset, 00066 &chop_cells); 00067 cell_it.set_to_list (&chop_cells); 00068 for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); 00069 cell_it.forward ()) { 00070 chop_coord = cell_it.data ()->x (); 00071 if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { 00072 split_to_blob (u_line, chop_coord, 00073 textord_fp_chop_error + 0.5, 00074 &left_coutlines, 00075 &right_coutlines); 00076 if (!left_coutlines.empty()) { 00077 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00078 } 00079 chop_coord = cell_it.data ()->y (); 00080 split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5, 00081 &left_coutlines, &right_coutlines); 00082 if (!left_coutlines.empty()) { 00083 row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00084 } 00085 u_line = NULL; //no more blobs to add 00086 } 00087 delete cell_it.extract(); 00088 } 00089 if (!right_coutlines.empty ()) { 00090 split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5, 00091 &left_coutlines, &right_coutlines); 00092 if (!left_coutlines.empty()) 00093 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); 00094 } 00095 if (u_line != NULL) { 00096 if (u_line->cblob() != NULL) 00097 delete u_line->cblob(); 00098 delete u_line; 00099 } 00100 } 00101 if (!ru_it.empty()) { 00102 ru_it.move_to_first(); 00103 for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { 00104 under_it.add_after_then_move(ru_it.extract()); 00105 } 00106 } 00107 } 00108 00109 00110 /********************************************************************** 00111 * most_overlapping_row 00112 * 00113 * Return the row which most overlaps the blob. 00114 **********************************************************************/ 00115 00116 TO_ROW *most_overlapping_row( //find best row 00117 TO_ROW_LIST *rows, //list of rows 00118 BLOBNBOX *blob //blob to place 00119 ) { 00120 inT16 x = (blob->bounding_box ().left () 00121 + blob->bounding_box ().right ()) / 2; 00122 TO_ROW_IT row_it = rows; //row iterator 00123 TO_ROW *row; //current row 00124 TO_ROW *best_row; //output row 00125 float overlap; //of blob & row 00126 float bestover; //best overlap 00127 00128 best_row = NULL; 00129 bestover = (float) -MAX_INT32; 00130 if (row_it.empty ()) 00131 return NULL; 00132 row = row_it.data (); 00133 row_it.mark_cycle_pt (); 00134 while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () 00135 && !row_it.cycled_list ()) { 00136 best_row = row; 00137 bestover = 00138 blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; 00139 row_it.forward (); 00140 row = row_it.data (); 00141 } 00142 while (row->baseline.y (x) + row->xheight + row->ascrise 00143 >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { 00144 overlap = row->baseline.y (x) + row->xheight + row->ascrise; 00145 if (blob->bounding_box ().top () < overlap) 00146 overlap = blob->bounding_box ().top (); 00147 if (blob->bounding_box ().bottom () > 00148 row->baseline.y (x) + row->descdrop) 00149 overlap -= blob->bounding_box ().bottom (); 00150 else 00151 overlap -= row->baseline.y (x) + row->descdrop; 00152 if (overlap > bestover) { 00153 bestover = overlap; 00154 best_row = row; 00155 } 00156 row_it.forward (); 00157 row = row_it.data (); 00158 } 00159 if (bestover < 0 00160 && row->baseline.y (x) + row->xheight + row->ascrise 00161 - blob->bounding_box ().bottom () > bestover) 00162 best_row = row; 00163 return best_row; 00164 } 00165 00166 00167 /********************************************************************** 00168 * find_underlined_blobs 00169 * 00170 * Find the start and end coords of blobs in the underline. 00171 **********************************************************************/ 00172 00173 void find_underlined_blobs( //get chop points 00174 BLOBNBOX *u_line, //underlined unit 00175 QSPLINE *baseline, //actual baseline 00176 float xheight, //height of line 00177 float baseline_offset, //amount to shrinke it 00178 ICOORDELT_LIST *chop_cells //places to chop 00179 ) { 00180 inT16 x, y; //sides of blob 00181 ICOORD blob_chop; //sides of blob 00182 TBOX blob_box = u_line->bounding_box (); 00183 //cell iterator 00184 ICOORDELT_IT cell_it = chop_cells; 00185 STATS upper_proj (blob_box.left (), blob_box.right () + 1); 00186 STATS middle_proj (blob_box.left (), blob_box.right () + 1); 00187 STATS lower_proj (blob_box.left (), blob_box.right () + 1); 00188 C_OUTLINE_IT out_it; //outlines of blob 00189 00190 ASSERT_HOST (u_line->cblob () != NULL); 00191 00192 out_it.set_to_list (u_line->cblob ()->out_list ()); 00193 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00194 vertical_cunderline_projection (out_it.data (), 00195 baseline, xheight, baseline_offset, 00196 &lower_proj, &middle_proj, &upper_proj); 00197 } 00198 00199 for (x = blob_box.left (); x < blob_box.right (); x++) { 00200 if (middle_proj.pile_count (x) > 0) { 00201 for (y = x + 1; 00202 y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); 00203 blob_chop = ICOORD (x, y); 00204 cell_it.add_after_then_move (new ICOORDELT (blob_chop)); 00205 x = y; 00206 } 00207 } 00208 } 00209 00210 00211 /********************************************************************** 00212 * vertical_cunderline_projection 00213 * 00214 * Compute the vertical projection of a outline from its outlines 00215 * and add to the given STATS. 00216 **********************************************************************/ 00217 00218 void vertical_cunderline_projection( //project outlines 00219 C_OUTLINE *outline, //outline to project 00220 QSPLINE *baseline, //actual baseline 00221 float xheight, //height of line 00222 float baseline_offset, //amount to shrinke it 00223 STATS *lower_proj, //below baseline 00224 STATS *middle_proj, //centre region 00225 STATS *upper_proj //top region 00226 ) { 00227 ICOORD pos; //current point 00228 ICOORD step; //edge step 00229 inT16 lower_y, upper_y; //region limits 00230 inT32 length; //of outline 00231 inT16 stepindex; //current step 00232 C_OUTLINE_IT out_it = outline->child (); 00233 00234 pos = outline->start_pos (); 00235 length = outline->pathlength (); 00236 for (stepindex = 0; stepindex < length; stepindex++) { 00237 step = outline->step (stepindex); 00238 if (step.x () > 0) { 00239 lower_y = 00240 (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5); 00241 upper_y = 00242 (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 00243 xheight + 0.5); 00244 if (pos.y () >= lower_y) { 00245 lower_proj->add (pos.x (), -lower_y); 00246 if (pos.y () >= upper_y) { 00247 middle_proj->add (pos.x (), lower_y - upper_y); 00248 upper_proj->add (pos.x (), upper_y - pos.y ()); 00249 } 00250 else 00251 middle_proj->add (pos.x (), lower_y - pos.y ()); 00252 } 00253 else 00254 lower_proj->add (pos.x (), -pos.y ()); 00255 } 00256 else if (step.x () < 0) { 00257 lower_y = 00258 (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + 00259 0.5); 00260 upper_y = 00261 (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset + 00262 xheight + 0.5); 00263 if (pos.y () >= lower_y) { 00264 lower_proj->add (pos.x () - 1, lower_y); 00265 if (pos.y () >= upper_y) { 00266 middle_proj->add (pos.x () - 1, upper_y - lower_y); 00267 upper_proj->add (pos.x () - 1, pos.y () - upper_y); 00268 } 00269 else 00270 middle_proj->add (pos.x () - 1, pos.y () - lower_y); 00271 } 00272 else 00273 lower_proj->add (pos.x () - 1, pos.y ()); 00274 } 00275 pos += step; 00276 } 00277 00278 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { 00279 vertical_cunderline_projection (out_it.data (), 00280 baseline, xheight, baseline_offset, 00281 lower_proj, middle_proj, upper_proj); 00282 } 00283 }