tesseract
3.03
|
00001 /********************************************************************** 00002 * File: ocrblock.cpp (Formerly block.c) 00003 * Description: BLOCK member functions and iterator functions. 00004 * Author: Ray Smith 00005 * Created: Fri Mar 15 09:41:28 GMT 1991 00006 * 00007 * (C) Copyright 1991, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include <stdlib.h> 00021 #include "blckerr.h" 00022 #include "ocrblock.h" 00023 #include "stepblob.h" 00024 #include "tprintf.h" 00025 00026 #define BLOCK_LABEL_HEIGHT 150 //char height of block id 00027 00028 ELISTIZE (BLOCK) 00034 BLOCK::BLOCK(const char *name, //< filename 00035 BOOL8 prop, //< proportional 00036 inT16 kern, //< kerning 00037 inT16 space, //< spacing 00038 inT16 xmin, //< bottom left 00039 inT16 ymin, inT16 xmax, //< top right 00040 inT16 ymax) 00041 : PDBLK (xmin, ymin, xmax, ymax), 00042 filename(name), 00043 re_rotation_(1.0f, 0.0f), 00044 classify_rotation_(1.0f, 0.0f), 00045 skew_(1.0f, 0.0f) { 00046 ICOORDELT_IT left_it = &leftside; 00047 ICOORDELT_IT right_it = &rightside; 00048 00049 proportional = prop; 00050 right_to_left_ = false; 00051 kerning = kern; 00052 spacing = space; 00053 font_class = -1; //not assigned 00054 cell_over_xheight_ = 2.0f; 00055 hand_poly = NULL; 00056 left_it.set_to_list (&leftside); 00057 right_it.set_to_list (&rightside); 00058 //make default box 00059 left_it.add_to_end (new ICOORDELT (xmin, ymin)); 00060 left_it.add_to_end (new ICOORDELT (xmin, ymax)); 00061 right_it.add_to_end (new ICOORDELT (xmax, ymin)); 00062 right_it.add_to_end (new ICOORDELT (xmax, ymax)); 00063 } 00064 00071 int decreasing_top_order( // 00072 const void *row1, 00073 const void *row2) { 00074 return (*(ROW **) row2)->bounding_box ().top () - 00075 (*(ROW **) row1)->bounding_box ().top (); 00076 } 00077 00078 00084 void BLOCK::rotate(const FCOORD& rotation) { 00085 poly_block()->rotate(rotation); 00086 box = *poly_block()->bounding_box(); 00087 } 00088 00095 void BLOCK::reflect_polygon_in_y_axis() { 00096 poly_block()->reflect_in_y_axis(); 00097 box = *poly_block()->bounding_box(); 00098 } 00099 00106 void BLOCK::sort_rows() { // order on "top" 00107 ROW_IT row_it(&rows); 00108 00109 row_it.sort (decreasing_top_order); 00110 } 00111 00112 00120 void BLOCK::compress() { // squash it up 00121 #define ROW_SPACING 5 00122 00123 ROW_IT row_it(&rows); 00124 ROW *row; 00125 ICOORD row_spacing (0, ROW_SPACING); 00126 00127 ICOORDELT_IT icoordelt_it; 00128 00129 sort_rows(); 00130 00131 box = TBOX (box.topleft (), box.topleft ()); 00132 box.move_bottom_edge (ROW_SPACING); 00133 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { 00134 row = row_it.data (); 00135 row->move (box.botleft () - row_spacing - 00136 row->bounding_box ().topleft ()); 00137 box += row->bounding_box (); 00138 } 00139 00140 leftside.clear (); 00141 icoordelt_it.set_to_list (&leftside); 00142 icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.bottom ())); 00143 icoordelt_it.add_to_end (new ICOORDELT (box.left (), box.top ())); 00144 rightside.clear (); 00145 icoordelt_it.set_to_list (&rightside); 00146 icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.bottom ())); 00147 icoordelt_it.add_to_end (new ICOORDELT (box.right (), box.top ())); 00148 } 00149 00150 00158 void BLOCK::check_pitch() { // check prop 00159 // tprintf("Missing FFT fixed pitch stuff!\n"); 00160 pitch = -1; 00161 } 00162 00163 00170 void BLOCK::compress( // squash it up 00171 const ICOORD vec // and move 00172 ) { 00173 box.move (vec); 00174 compress(); 00175 } 00176 00177 00184 void BLOCK::print( //print list of sides 00185 FILE *, //< file to print on 00186 BOOL8 dump //< print full detail 00187 ) { 00188 ICOORDELT_IT it = &leftside; //iterator 00189 00190 box.print (); 00191 tprintf ("Proportional= %s\n", proportional ? "TRUE" : "FALSE"); 00192 tprintf ("Kerning= %d\n", kerning); 00193 tprintf ("Spacing= %d\n", spacing); 00194 tprintf ("Fixed_pitch=%d\n", pitch); 00195 tprintf ("Filename= %s\n", filename.string ()); 00196 00197 if (dump) { 00198 tprintf ("Left side coords are:\n"); 00199 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) 00200 tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); 00201 tprintf ("\n"); 00202 tprintf ("Right side coords are:\n"); 00203 it.set_to_list (&rightside); 00204 for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) 00205 tprintf ("(%d,%d) ", it.data ()->x (), it.data ()->y ()); 00206 tprintf ("\n"); 00207 } 00208 } 00209 00216 BLOCK & BLOCK::operator= ( //assignment 00217 const BLOCK & source //from this 00218 ) { 00219 this->ELIST_LINK::operator= (source); 00220 this->PDBLK::operator= (source); 00221 proportional = source.proportional; 00222 kerning = source.kerning; 00223 spacing = source.spacing; 00224 filename = source.filename; //STRINGs assign ok 00225 if (!rows.empty ()) 00226 rows.clear (); 00227 re_rotation_ = source.re_rotation_; 00228 classify_rotation_ = source.classify_rotation_; 00229 skew_ = source.skew_; 00230 return *this; 00231 } 00232 00233 // This function is for finding the approximate (horizontal) distance from 00234 // the x-coordinate of the left edge of a symbol to the left edge of the 00235 // text block which contains it. We are passed: 00236 // segments - output of PB_LINE_IT::get_line() which contains x-coordinate 00237 // intervals for the scan line going through the symbol's y-coordinate. 00238 // Each element of segments is of the form (x()=start_x, y()=length). 00239 // x - the x coordinate of the symbol we're interested in. 00240 // margin - return value, the distance from x,y to the left margin of the 00241 // block containing it. 00242 // If all segments were to the right of x, we return false and 0. 00243 bool LeftMargin(ICOORDELT_LIST *segments, int x, int *margin) { 00244 bool found = false; 00245 *margin = 0; 00246 if (segments->empty()) 00247 return found; 00248 ICOORDELT_IT seg_it(segments); 00249 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { 00250 int cur_margin = x - seg_it.data()->x(); 00251 if (cur_margin >= 0) { 00252 if (!found) { 00253 *margin = cur_margin; 00254 } else if (cur_margin < *margin) { 00255 *margin = cur_margin; 00256 } 00257 found = true; 00258 } 00259 } 00260 return found; 00261 } 00262 00263 // This function is for finding the approximate (horizontal) distance from 00264 // the x-coordinate of the right edge of a symbol to the right edge of the 00265 // text block which contains it. We are passed: 00266 // segments - output of PB_LINE_IT::get_line() which contains x-coordinate 00267 // intervals for the scan line going through the symbol's y-coordinate. 00268 // Each element of segments is of the form (x()=start_x, y()=length). 00269 // x - the x coordinate of the symbol we're interested in. 00270 // margin - return value, the distance from x,y to the right margin of the 00271 // block containing it. 00272 // If all segments were to the left of x, we return false and 0. 00273 bool RightMargin(ICOORDELT_LIST *segments, int x, int *margin) { 00274 bool found = false; 00275 *margin = 0; 00276 if (segments->empty()) 00277 return found; 00278 ICOORDELT_IT seg_it(segments); 00279 for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) { 00280 int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x; 00281 if (cur_margin >= 0) { 00282 if (!found) { 00283 *margin = cur_margin; 00284 } else if (cur_margin < *margin) { 00285 *margin = cur_margin; 00286 } 00287 found = true; 00288 } 00289 } 00290 return found; 00291 } 00292 00293 // Compute the distance from the left and right ends of each row to the 00294 // left and right edges of the block's polyblock. Illustration: 00295 // ____________________________ _______________________ 00296 // | Howdy neighbor! | |rectangular blocks look| 00297 // | This text is written to| |more like stacked pizza| 00298 // |illustrate how useful poly- |boxes. | 00299 // |blobs are in ----------- ------ The polyblob| 00300 // |dealing with| _________ |for a BLOCK rec-| 00301 // |harder layout| /===========\ |ords the possibly| 00302 // |issues. | | _ _ | |skewed pseudo-| 00303 // | You see this| | |_| \|_| | |rectangular | 00304 // |text is flowed| | } | |boundary that| 00305 // |around a mid-| \ ____ | |forms the ideal-| 00306 // |cloumn portrait._____ \ / __|ized text margin| 00307 // | Polyblobs exist| \ / |from which we should| 00308 // |to account for insets| | | |measure paragraph| 00309 // |which make otherwise| ----- |indentation. | 00310 // ----------------------- ---------------------- 00311 // 00312 // If we identify a drop-cap, we measure the left margin for the lines 00313 // below the first line relative to one space past the drop cap. The 00314 // first line's margin and those past the drop cap area are measured 00315 // relative to the enclosing polyblock. 00316 // 00317 // TODO(rays): Before this will work well, we'll need to adjust the 00318 // polyblob tighter around the text near images, as in: 00319 // UNLV_AUTO:mag.3G0 page 2 00320 // UNLV_AUTO:mag.3G4 page 16 00321 void BLOCK::compute_row_margins() { 00322 if (row_list()->empty() || row_list()->singleton()) { 00323 return; 00324 } 00325 00326 // If Layout analysis was not called, default to this. 00327 POLY_BLOCK rect_block(bounding_box(), PT_FLOWING_TEXT); 00328 POLY_BLOCK *pblock = &rect_block; 00329 if (poly_block() != NULL) { 00330 pblock = poly_block(); 00331 } 00332 00333 // Step One: Determine if there is a drop-cap. 00334 // TODO(eger): Fix up drop cap code for RTL languages. 00335 ROW_IT r_it(row_list()); 00336 ROW *first_row = r_it.data(); 00337 ROW *second_row = r_it.data_relative(1); 00338 00339 // initialize the bottom of a fictitious drop cap far above the first line. 00340 int drop_cap_bottom = first_row->bounding_box().top() + 00341 first_row->bounding_box().height(); 00342 int drop_cap_right = first_row->bounding_box().left(); 00343 int mid_second_line = second_row->bounding_box().top() - 00344 second_row->bounding_box().height() / 2; 00345 WERD_IT werd_it(r_it.data()->word_list()); // words of line one 00346 if (!werd_it.empty()) { 00347 C_BLOB_IT cblob_it(werd_it.data()->cblob_list()); 00348 for (cblob_it.mark_cycle_pt(); !cblob_it.cycled_list(); 00349 cblob_it.forward()) { 00350 TBOX bbox = cblob_it.data()->bounding_box(); 00351 if (bbox.bottom() <= mid_second_line) { 00352 // we found a real drop cap 00353 first_row->set_has_drop_cap(true); 00354 if (drop_cap_bottom > bbox.bottom()) 00355 drop_cap_bottom = bbox.bottom(); 00356 if (drop_cap_right < bbox.right()) 00357 drop_cap_right = bbox.right(); 00358 } 00359 } 00360 } 00361 00362 // Step Two: Calculate the margin from the text of each row to the block 00363 // (or drop-cap) boundaries. 00364 PB_LINE_IT lines(pblock); 00365 r_it.set_to_list(row_list()); 00366 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) { 00367 ROW *row = r_it.data(); 00368 TBOX row_box = row->bounding_box(); 00369 int left_y = row->base_line(row_box.left()) + row->x_height(); 00370 int left_margin; 00371 ICOORDELT_LIST *segments = lines.get_line(left_y); 00372 LeftMargin(segments, row_box.left(), &left_margin); 00373 delete segments; 00374 00375 if (row_box.top() >= drop_cap_bottom) { 00376 int drop_cap_distance = row_box.left() - row->space() - drop_cap_right; 00377 if (drop_cap_distance < 0) 00378 drop_cap_distance = 0; 00379 if (drop_cap_distance < left_margin) 00380 left_margin = drop_cap_distance; 00381 } 00382 00383 int right_y = row->base_line(row_box.right()) + row->x_height(); 00384 int right_margin; 00385 segments = lines.get_line(right_y); 00386 RightMargin(segments, row_box.right(), &right_margin); 00387 delete segments; 00388 row->set_lmargin(left_margin); 00389 row->set_rmargin(right_margin); 00390 } 00391 } 00392 00393 /********************************************************************** 00394 * PrintSegmentationStats 00395 * 00396 * Prints segmentation stats for the given block list. 00397 **********************************************************************/ 00398 00399 void PrintSegmentationStats(BLOCK_LIST* block_list) { 00400 int num_blocks = 0; 00401 int num_rows = 0; 00402 int num_words = 0; 00403 int num_blobs = 0; 00404 BLOCK_IT block_it(block_list); 00405 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { 00406 BLOCK* block = block_it.data(); 00407 ++num_blocks; 00408 ROW_IT row_it(block->row_list()); 00409 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00410 ++num_rows; 00411 ROW* row = row_it.data(); 00412 // Iterate over all werds in the row. 00413 WERD_IT werd_it(row->word_list()); 00414 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { 00415 WERD* werd = werd_it.data(); 00416 ++num_words; 00417 num_blobs += werd->cblob_list()->length(); 00418 } 00419 } 00420 } 00421 tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n", 00422 num_blocks, num_rows, num_words, num_blobs); 00423 } 00424 00425 /********************************************************************** 00426 * ExtractBlobsFromSegmentation 00427 * 00428 * Extracts blobs from the given block list and adds them to the output list. 00429 * The block list must have been created by performing a page segmentation. 00430 **********************************************************************/ 00431 00432 void ExtractBlobsFromSegmentation(BLOCK_LIST* blocks, 00433 C_BLOB_LIST* output_blob_list) { 00434 C_BLOB_IT return_list_it(output_blob_list); 00435 BLOCK_IT block_it(blocks); 00436 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { 00437 BLOCK* block = block_it.data(); 00438 ROW_IT row_it(block->row_list()); 00439 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00440 ROW* row = row_it.data(); 00441 // Iterate over all werds in the row. 00442 WERD_IT werd_it(row->word_list()); 00443 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { 00444 WERD* werd = werd_it.data(); 00445 return_list_it.move_to_last(); 00446 return_list_it.add_list_after(werd->cblob_list()); 00447 return_list_it.move_to_last(); 00448 return_list_it.add_list_after(werd->rej_cblob_list()); 00449 } 00450 } 00451 } 00452 } 00453 00454 /********************************************************************** 00455 * RefreshWordBlobsFromNewBlobs() 00456 * 00457 * Refreshes the words in the block_list by using blobs in the 00458 * new_blobs list. 00459 * Block list must have word segmentation in it. 00460 * It consumes the blobs provided in the new_blobs list. The blobs leftover in 00461 * the new_blobs list after the call weren't matched to any blobs of the words 00462 * in block list. 00463 * The output not_found_blobs is a list of blobs from the original segmentation 00464 * in the block_list for which no corresponding new blobs were found. 00465 **********************************************************************/ 00466 00467 void RefreshWordBlobsFromNewBlobs(BLOCK_LIST* block_list, 00468 C_BLOB_LIST* new_blobs, 00469 C_BLOB_LIST* not_found_blobs) { 00470 // Now iterate over all the blobs in the segmentation_block_list_, and just 00471 // replace the corresponding c-blobs inside the werds. 00472 BLOCK_IT block_it(block_list); 00473 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { 00474 BLOCK* block = block_it.data(); 00475 if (block->poly_block() != NULL && !block->poly_block()->IsText()) 00476 continue; // Don't touch non-text blocks. 00477 // Iterate over all rows in the block. 00478 ROW_IT row_it(block->row_list()); 00479 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { 00480 ROW* row = row_it.data(); 00481 // Iterate over all werds in the row. 00482 WERD_IT werd_it(row->word_list()); 00483 WERD_LIST new_words; 00484 WERD_IT new_words_it(&new_words); 00485 for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) { 00486 WERD* werd = werd_it.extract(); 00487 WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs, 00488 not_found_blobs); 00489 if (new_werd) { 00490 // Insert this new werd into the actual row's werd-list. Remove the 00491 // existing one. 00492 new_words_it.add_after_then_move(new_werd); 00493 delete werd; 00494 } else { 00495 // Reinsert the older word back, for lack of better options. 00496 // This is critical since dropping the words messes up segmentation: 00497 // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on. 00498 new_words_it.add_after_then_move(werd); 00499 } 00500 } 00501 // Get rid of the old word list & replace it with the new one. 00502 row->word_list()->clear(); 00503 werd_it.move_to_first(); 00504 werd_it.add_list_after(&new_words); 00505 } 00506 } 00507 }