tesseract
3.03
|
00001 /********************************************************************** 00002 * File: cube_page_segmenter.cpp 00003 * Description: Implementation of the Cube Page Segmenter Class 00004 * Author: Ahmad Abdulkader 00005 * Created: 2007 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "cube_line_segmenter.h" 00021 #include "ndminx.h" 00022 00023 namespace tesseract { 00024 // constants that worked for Arabic page segmenter 00025 const int CubeLineSegmenter::kLineSepMorphMinHgt = 20; 00026 const int CubeLineSegmenter::kHgtBins = 20; 00027 const double CubeLineSegmenter::kMaxValidLineRatio = 3.2; 00028 const int CubeLineSegmenter::kMaxConnCompHgt = 150; 00029 const int CubeLineSegmenter::kMaxConnCompWid = 500; 00030 const int CubeLineSegmenter::kMaxHorzAspectRatio = 50; 00031 const int CubeLineSegmenter::kMaxVertAspectRatio = 20; 00032 const int CubeLineSegmenter::kMinWid = 2; 00033 const int CubeLineSegmenter::kMinHgt = 2; 00034 const float CubeLineSegmenter::kMinValidLineHgtRatio = 2.5; 00035 00036 CubeLineSegmenter::CubeLineSegmenter(CubeRecoContext *cntxt, Pix *img) { 00037 cntxt_ = cntxt; 00038 orig_img_ = img; 00039 img_ = NULL; 00040 lines_pixa_ = NULL; 00041 init_ = false; 00042 line_cnt_ = 0; 00043 columns_ = NULL; 00044 con_comps_ = NULL; 00045 est_alef_hgt_ = 0.0; 00046 est_dot_hgt_ = 0.0; 00047 } 00048 00049 CubeLineSegmenter::~CubeLineSegmenter() { 00050 if (img_ != NULL) { 00051 pixDestroy(&img_); 00052 img_ = NULL; 00053 } 00054 00055 if (lines_pixa_ != NULL) { 00056 pixaDestroy(&lines_pixa_); 00057 lines_pixa_ = NULL; 00058 } 00059 00060 if (con_comps_ != NULL) { 00061 pixaDestroy(&con_comps_); 00062 con_comps_ = NULL; 00063 } 00064 00065 if (columns_ != NULL) { 00066 pixaaDestroy(&columns_); 00067 columns_ = NULL; 00068 } 00069 } 00070 00071 // compute validity ratio for a line 00072 double CubeLineSegmenter::ValidityRatio(Pix *line_mask_pix, Box *line_box) { 00073 return line_box->h / est_alef_hgt_; 00074 } 00075 00076 // validate line 00077 bool CubeLineSegmenter::ValidLine(Pix *line_mask_pix, Box *line_box) { 00078 double validity_ratio = ValidityRatio(line_mask_pix, line_box); 00079 00080 return validity_ratio < kMaxValidLineRatio; 00081 } 00082 00083 // perform a vertical Closing with the specified threshold 00084 // returning the resulting conn comps as a pixa 00085 Pixa *CubeLineSegmenter::VerticalClosing(Pix *pix, 00086 int threshold, Boxa **boxa) { 00087 char sequence_str[16]; 00088 00089 // do the morphology 00090 sprintf(sequence_str, "c100.%d", threshold); 00091 Pix *morphed_pix = pixMorphCompSequence(pix, sequence_str, 0); 00092 if (morphed_pix == NULL) { 00093 return NULL; 00094 } 00095 00096 // get the resulting lines by computing concomps 00097 Pixa *pixac; 00098 (*boxa) = pixConnComp(morphed_pix, &pixac, 8); 00099 00100 pixDestroy(&morphed_pix); 00101 00102 if ((*boxa) == NULL) { 00103 return NULL; 00104 } 00105 00106 return pixac; 00107 } 00108 00109 // do a desperate attempt at cracking lines 00110 Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, 00111 Box *cracked_line_box, int line_cnt) { 00112 // create lines pixa array 00113 Pixa **lines_pixa = new Pixa*[line_cnt]; 00114 if (lines_pixa == NULL) { 00115 return NULL; 00116 } 00117 00118 memset(lines_pixa, 0, line_cnt * sizeof(*lines_pixa)); 00119 00120 // compute line conn comps 00121 Pixa *line_con_comps_pix; 00122 Boxa *line_con_comps = ComputeLineConComps(cracked_line_pix, 00123 cracked_line_box, &line_con_comps_pix); 00124 00125 if (line_con_comps == NULL) { 00126 delete []lines_pixa; 00127 return NULL; 00128 } 00129 00130 // assign each conn comp to the a line based on its centroid 00131 for (int con = 0; con < line_con_comps->n; con++) { 00132 Box *con_box = line_con_comps->box[con]; 00133 Pix *con_pix = line_con_comps_pix->pix[con]; 00134 int mid_y = (con_box->y - cracked_line_box->y) + (con_box->h / 2), 00135 line_idx = MIN(line_cnt - 1, 00136 (mid_y * line_cnt / cracked_line_box->h)); 00137 00138 // create the line if it has not been created? 00139 if (lines_pixa[line_idx] == NULL) { 00140 lines_pixa[line_idx] = pixaCreate(line_con_comps->n); 00141 if (lines_pixa[line_idx] == NULL) { 00142 delete []lines_pixa; 00143 boxaDestroy(&line_con_comps); 00144 pixaDestroy(&line_con_comps_pix); 00145 return NULL; 00146 } 00147 } 00148 00149 // add the concomp to the line 00150 if (pixaAddPix(lines_pixa[line_idx], con_pix, L_CLONE) != 0 || 00151 pixaAddBox(lines_pixa[line_idx], con_box, L_CLONE)) { 00152 delete []lines_pixa; 00153 boxaDestroy(&line_con_comps); 00154 pixaDestroy(&line_con_comps_pix); 00155 } 00156 } 00157 00158 // create the lines pixa 00159 Pixa *lines = pixaCreate(line_cnt); 00160 bool success = true; 00161 00162 // create and check the validity of the lines 00163 for (int line = 0; line < line_cnt; line++) { 00164 Pixa *line_pixa = lines_pixa[line]; 00165 00166 // skip invalid lines 00167 if (line_pixa == NULL) { 00168 continue; 00169 } 00170 00171 // merge the pix, check the validity of the line 00172 // and add it to the lines pixa 00173 Box *line_box; 00174 Pix *line_pix = Pixa2Pix(line_pixa, &line_box); 00175 if (line_pix == NULL || 00176 line_box == NULL || 00177 ValidLine(line_pix, line_box) == false || 00178 pixaAddPix(lines, line_pix, L_INSERT) != 0 || 00179 pixaAddBox(lines, line_box, L_INSERT) != 0) { 00180 if (line_pix != NULL) { 00181 pixDestroy(&line_pix); 00182 } 00183 00184 if (line_box != NULL) { 00185 boxDestroy(&line_box); 00186 } 00187 00188 success = false; 00189 00190 break; 00191 } 00192 } 00193 00194 // cleanup 00195 for (int line = 0; line < line_cnt; line++) { 00196 if (lines_pixa[line] != NULL) { 00197 pixaDestroy(&lines_pixa[line]); 00198 } 00199 } 00200 00201 delete []lines_pixa; 00202 boxaDestroy(&line_con_comps); 00203 pixaDestroy(&line_con_comps_pix); 00204 00205 if (success == false) { 00206 pixaDestroy(&lines); 00207 lines = NULL; 00208 } 00209 00210 return lines; 00211 } 00212 00213 // do a desperate attempt at cracking lines 00214 Pixa *CubeLineSegmenter::CrackLine(Pix *cracked_line_pix, 00215 Box *cracked_line_box) { 00216 // estimate max line count 00217 int max_line_cnt = static_cast<int>((cracked_line_box->h / 00218 est_alef_hgt_) + 0.5); 00219 if (max_line_cnt < 2) { 00220 return NULL; 00221 } 00222 00223 for (int line_cnt = 2; line_cnt < max_line_cnt; line_cnt++) { 00224 Pixa *lines = CrackLine(cracked_line_pix, cracked_line_box, line_cnt); 00225 if (lines != NULL) { 00226 return lines; 00227 } 00228 } 00229 00230 return NULL; 00231 } 00232 00233 // split a line continously until valid or fail 00234 Pixa *CubeLineSegmenter::SplitLine(Pix *line_mask_pix, Box *line_box) { 00235 // clone the line mask 00236 Pix *line_pix = pixClone(line_mask_pix); 00237 00238 if (line_pix == NULL) { 00239 return NULL; 00240 } 00241 00242 // AND with the image to get the actual line 00243 pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, 00244 PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); 00245 00246 // continue to do rasterop morphology on the line until 00247 // it splits to valid lines or we fail 00248 int morph_hgt = kLineSepMorphMinHgt - 1, 00249 best_threshold = kLineSepMorphMinHgt - 1, 00250 max_valid_portion = 0; 00251 00252 Boxa *boxa; 00253 Pixa *pixac; 00254 00255 do { 00256 pixac = VerticalClosing(line_pix, morph_hgt, &boxa); 00257 00258 // add the box offset to all the lines 00259 // and check for the validity of each 00260 int line, 00261 valid_line_cnt = 0, 00262 valid_portion = 0; 00263 00264 for (line = 0; line < pixac->n; line++) { 00265 boxa->box[line]->x += line_box->x; 00266 boxa->box[line]->y += line_box->y; 00267 00268 if (ValidLine(pixac->pix[line], boxa->box[line]) == true) { 00269 // count valid lines 00270 valid_line_cnt++; 00271 00272 // and the valid portions 00273 valid_portion += boxa->box[line]->h; 00274 } 00275 } 00276 00277 // all the lines are valid 00278 if (valid_line_cnt == pixac->n) { 00279 boxaDestroy(&boxa); 00280 pixDestroy(&line_pix); 00281 return pixac; 00282 } 00283 00284 // a larger valid portion 00285 if (valid_portion > max_valid_portion) { 00286 max_valid_portion = valid_portion; 00287 best_threshold = morph_hgt; 00288 } 00289 00290 boxaDestroy(&boxa); 00291 pixaDestroy(&pixac); 00292 00293 morph_hgt--; 00294 } 00295 while (morph_hgt > 0); 00296 00297 // failed to break into valid lines 00298 // attempt to crack the line 00299 pixac = CrackLine(line_pix, line_box); 00300 if (pixac != NULL) { 00301 pixDestroy(&line_pix); 00302 return pixac; 00303 } 00304 00305 // try to leverage any of the lines 00306 // did the best threshold yield a non zero valid portion 00307 if (max_valid_portion > 0) { 00308 // use this threshold to break lines 00309 pixac = VerticalClosing(line_pix, best_threshold, &boxa); 00310 00311 // add the box offset to all the lines 00312 // and check for the validity of each 00313 for (int line = 0; line < pixac->n; line++) { 00314 boxa->box[line]->x += line_box->x; 00315 boxa->box[line]->y += line_box->y; 00316 00317 // remove invalid lines from the pixa 00318 if (ValidLine(pixac->pix[line], boxa->box[line]) == false) { 00319 pixaRemovePix(pixac, line); 00320 line--; 00321 } 00322 } 00323 00324 boxaDestroy(&boxa); 00325 pixDestroy(&line_pix); 00326 return pixac; 00327 } 00328 00329 // last resort: attempt to crack the line 00330 pixDestroy(&line_pix); 00331 00332 return NULL; 00333 } 00334 00335 // Checks of a line is too small 00336 bool CubeLineSegmenter::SmallLine(Box *line_box) { 00337 return line_box->h <= (kMinValidLineHgtRatio * est_dot_hgt_); 00338 } 00339 00340 // Compute the connected components in a line 00341 Boxa * CubeLineSegmenter::ComputeLineConComps(Pix *line_mask_pix, 00342 Box *line_box, 00343 Pixa **con_comps_pixa) { 00344 // clone the line mask 00345 Pix *line_pix = pixClone(line_mask_pix); 00346 00347 if (line_pix == NULL) { 00348 return NULL; 00349 } 00350 00351 // AND with the image to get the actual line 00352 pixRasterop(line_pix, 0, 0, line_pix->w, line_pix->h, 00353 PIX_SRC & PIX_DST, img_, line_box->x, line_box->y); 00354 00355 // compute the connected components of the line to be merged 00356 Boxa *line_con_comps = pixConnComp(line_pix, con_comps_pixa, 8); 00357 00358 pixDestroy(&line_pix); 00359 00360 // offset boxes by the bbox of the line 00361 for (int con = 0; con < line_con_comps->n; con++) { 00362 line_con_comps->box[con]->x += line_box->x; 00363 line_con_comps->box[con]->y += line_box->y; 00364 } 00365 00366 return line_con_comps; 00367 } 00368 00369 // create a union of two arbitrary pix 00370 Pix *CubeLineSegmenter::PixUnion(Pix *dest_pix, Box *dest_box, 00371 Pix *src_pix, Box *src_box) { 00372 // compute dimensions of union rect 00373 BOX *union_box = boxBoundingRegion(src_box, dest_box); 00374 00375 // create the union pix 00376 Pix *union_pix = pixCreate(union_box->w, union_box->h, src_pix->d); 00377 if (union_pix == NULL) { 00378 return NULL; 00379 } 00380 00381 // blt the src and dest pix 00382 pixRasterop(union_pix, 00383 src_box->x - union_box->x, src_box->y - union_box->y, 00384 src_box->w, src_box->h, PIX_SRC | PIX_DST, src_pix, 0, 0); 00385 00386 pixRasterop(union_pix, 00387 dest_box->x - union_box->x, dest_box->y - union_box->y, 00388 dest_box->w, dest_box->h, PIX_SRC | PIX_DST, dest_pix, 0, 0); 00389 00390 // replace the dest_box 00391 *dest_box = *union_box; 00392 00393 boxDestroy(&union_box); 00394 00395 return union_pix; 00396 } 00397 00398 // create a union of a number of arbitrary pix 00399 Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box, 00400 int start_pix, int pix_cnt) { 00401 // compute union_box 00402 int min_x = INT_MAX, 00403 max_x = INT_MIN, 00404 min_y = INT_MAX, 00405 max_y = INT_MIN; 00406 00407 for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { 00408 Box *pix_box = pixa->boxa->box[pix_idx]; 00409 00410 UpdateRange(pix_box->x, pix_box->x + pix_box->w, &min_x, &max_x); 00411 UpdateRange(pix_box->y, pix_box->y + pix_box->h, &min_y, &max_y); 00412 } 00413 00414 (*dest_box) = boxCreate(min_x, min_y, max_x - min_x, max_y - min_y); 00415 if ((*dest_box) == NULL) { 00416 return NULL; 00417 } 00418 00419 // create the union pix 00420 Pix *union_pix = pixCreate((*dest_box)->w, (*dest_box)->h, img_->d); 00421 if (union_pix == NULL) { 00422 boxDestroy(dest_box); 00423 return NULL; 00424 } 00425 00426 // create a pix corresponding to the union of all pixs 00427 // blt the src and dest pix 00428 for (int pix_idx = start_pix; pix_idx < (start_pix + pix_cnt); pix_idx++) { 00429 Box *pix_box = pixa->boxa->box[pix_idx]; 00430 Pix *con_pix = pixa->pix[pix_idx]; 00431 00432 pixRasterop(union_pix, 00433 pix_box->x - (*dest_box)->x, pix_box->y - (*dest_box)->y, 00434 pix_box->w, pix_box->h, PIX_SRC | PIX_DST, con_pix, 0, 0); 00435 } 00436 00437 return union_pix; 00438 } 00439 00440 // create a union of a number of arbitrary pix 00441 Pix *CubeLineSegmenter::Pixa2Pix(Pixa *pixa, Box **dest_box) { 00442 return Pixa2Pix(pixa, dest_box, 0, pixa->n); 00443 } 00444 00445 // merges a number of lines into one line given a bounding box and a mask 00446 bool CubeLineSegmenter::MergeLine(Pix *line_mask_pix, Box *line_box, 00447 Pixa *lines, Boxaa *lines_con_comps) { 00448 // compute the connected components of the lines to be merged 00449 Pixa *small_con_comps_pix; 00450 Boxa *small_line_con_comps = ComputeLineConComps(line_mask_pix, 00451 line_box, &small_con_comps_pix); 00452 00453 if (small_line_con_comps == NULL) { 00454 return false; 00455 } 00456 00457 // for each connected component 00458 for (int con = 0; con < small_line_con_comps->n; con++) { 00459 Box *small_con_comp_box = small_line_con_comps->box[con]; 00460 int best_line = -1, 00461 best_dist = INT_MAX, 00462 small_box_right = small_con_comp_box->x + small_con_comp_box->w, 00463 small_box_bottom = small_con_comp_box->y + small_con_comp_box->h; 00464 00465 // for each valid line 00466 for (int line = 0; line < lines->n; line++) { 00467 if (SmallLine(lines->boxa->box[line]) == true) { 00468 continue; 00469 } 00470 00471 // for all the connected components in the line 00472 Boxa *line_con_comps = lines_con_comps->boxa[line]; 00473 00474 for (int lcon = 0; lcon < line_con_comps->n; lcon++) { 00475 Box *con_comp_box = line_con_comps->box[lcon]; 00476 int xdist, 00477 ydist, 00478 box_right = con_comp_box->x + con_comp_box->w, 00479 box_bottom = con_comp_box->y + con_comp_box->h; 00480 00481 xdist = MAX(small_con_comp_box->x, con_comp_box->x) - 00482 MIN(small_box_right, box_right); 00483 00484 ydist = MAX(small_con_comp_box->y, con_comp_box->y) - 00485 MIN(small_box_bottom, box_bottom); 00486 00487 // if there is an overlap in x-direction 00488 if (xdist <= 0) { 00489 if (best_line == -1 || ydist < best_dist) { 00490 best_dist = ydist; 00491 best_line = line; 00492 } 00493 } 00494 } 00495 } 00496 00497 // if the distance is too big, do not merged 00498 if (best_line != -1 && best_dist < est_alef_hgt_) { 00499 // add the pix to the best line 00500 Pix *new_line = PixUnion(lines->pix[best_line], 00501 lines->boxa->box[best_line], 00502 small_con_comps_pix->pix[con], small_con_comp_box); 00503 00504 if (new_line == NULL) { 00505 return false; 00506 } 00507 00508 pixDestroy(&lines->pix[best_line]); 00509 lines->pix[best_line] = new_line; 00510 } 00511 } 00512 00513 pixaDestroy(&small_con_comps_pix); 00514 boxaDestroy(&small_line_con_comps); 00515 00516 return true; 00517 } 00518 00519 // Creates new set of lines from the computed columns 00520 bool CubeLineSegmenter::AddLines(Pixa *lines) { 00521 // create an array that will hold the bounding boxes 00522 // of the concomps belonging to each line 00523 Boxaa *lines_con_comps = boxaaCreate(lines->n); 00524 if (lines_con_comps == NULL) { 00525 return false; 00526 } 00527 00528 for (int line = 0; line < lines->n; line++) { 00529 // if the line is not valid 00530 if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) { 00531 // split it 00532 Pixa *split_lines = SplitLine(lines->pix[line], 00533 lines->boxa->box[line]); 00534 00535 // remove the old line 00536 if (pixaRemovePix(lines, line) != 0) { 00537 return false; 00538 } 00539 00540 line--; 00541 00542 if (split_lines == NULL) { 00543 continue; 00544 } 00545 00546 // add the split lines instead and move the pointer 00547 for (int s_line = 0; s_line < split_lines->n; s_line++) { 00548 Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE); 00549 Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE); 00550 00551 if (sp_line == NULL || sp_box == NULL) { 00552 return false; 00553 } 00554 00555 // insert the new line 00556 if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) { 00557 return false; 00558 } 00559 } 00560 00561 // remove the split lines 00562 pixaDestroy(&split_lines); 00563 } 00564 } 00565 00566 // compute the concomps bboxes of each line 00567 for (int line = 0; line < lines->n; line++) { 00568 Boxa *line_con_comps = ComputeLineConComps(lines->pix[line], 00569 lines->boxa->box[line], NULL); 00570 00571 if (line_con_comps == NULL) { 00572 return false; 00573 } 00574 00575 // insert it into the boxaa array 00576 if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) { 00577 return false; 00578 } 00579 } 00580 00581 // post process the lines: 00582 // merge the contents of "small" lines info legitimate lines 00583 for (int line = 0; line < lines->n; line++) { 00584 // a small line detected 00585 if (SmallLine(lines->boxa->box[line]) == true) { 00586 // merge its components to one of the valid lines 00587 if (MergeLine(lines->pix[line], lines->boxa->box[line], 00588 lines, lines_con_comps) == true) { 00589 // remove the small line 00590 if (pixaRemovePix(lines, line) != 0) { 00591 return false; 00592 } 00593 00594 if (boxaaRemoveBoxa(lines_con_comps, line) != 0) { 00595 return false; 00596 } 00597 00598 line--; 00599 } 00600 } 00601 } 00602 00603 boxaaDestroy(&lines_con_comps); 00604 00605 // add the pix masks 00606 if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) { 00607 return false; 00608 } 00609 00610 return true; 00611 } 00612 00613 // Index the specific pixa using RTL reading order 00614 int *CubeLineSegmenter::IndexRTL(Pixa *pixa) { 00615 int *pix_index = new int[pixa->n]; 00616 if (pix_index == NULL) { 00617 return NULL; 00618 } 00619 00620 for (int pix = 0; pix < pixa->n; pix++) { 00621 pix_index[pix] = pix; 00622 } 00623 00624 for (int ipix = 0; ipix < pixa->n; ipix++) { 00625 for (int jpix = ipix + 1; jpix < pixa->n; jpix++) { 00626 Box *ipix_box = pixa->boxa->box[pix_index[ipix]], 00627 *jpix_box = pixa->boxa->box[pix_index[jpix]]; 00628 00629 // swap? 00630 if ((ipix_box->x + ipix_box->w) < (jpix_box->x + jpix_box->w)) { 00631 int temp = pix_index[ipix]; 00632 pix_index[ipix] = pix_index[jpix]; 00633 pix_index[jpix] = temp; 00634 } 00635 } 00636 } 00637 00638 return pix_index; 00639 } 00640 00641 // Performs line segmentation 00642 bool CubeLineSegmenter::LineSegment() { 00643 // Use full image morphology to find columns 00644 // This only works for simple layouts where each column 00645 // of text extends the full height of the input image. 00646 Pix *pix_temp1 = pixMorphCompSequence(img_, "c5.500", 0); 00647 if (pix_temp1 == NULL) { 00648 return false; 00649 } 00650 00651 // Mask with a single component over each column 00652 Pixa *pixam; 00653 Boxa *boxa = pixConnComp(pix_temp1, &pixam, 8); 00654 00655 if (boxa == NULL) { 00656 return false; 00657 } 00658 00659 int init_morph_min_hgt = kLineSepMorphMinHgt; 00660 char sequence_str[16]; 00661 sprintf(sequence_str, "c100.%d", init_morph_min_hgt); 00662 00663 // Use selective region-based morphology to get the textline mask. 00664 Pixa *pixad = pixaMorphSequenceByRegion(img_, pixam, sequence_str, 0, 0); 00665 if (pixad == NULL) { 00666 return false; 00667 } 00668 00669 // for all columns 00670 int col_cnt = boxaGetCount(boxa); 00671 00672 // create columns 00673 columns_ = pixaaCreate(col_cnt); 00674 if (columns_ == NULL) { 00675 return false; 00676 } 00677 00678 // index columns based on readind order (RTL) 00679 int *col_order = IndexRTL(pixad); 00680 if (col_order == NULL) { 00681 return false; 00682 } 00683 00684 line_cnt_ = 0; 00685 00686 for (int col_idx = 0; col_idx < col_cnt; col_idx++) { 00687 int col = col_order[col_idx]; 00688 00689 // get the pix and box corresponding to the column 00690 Pix *pixt3 = pixaGetPix(pixad, col, L_CLONE); 00691 if (pixt3 == NULL) { 00692 delete []col_order; 00693 return false; 00694 } 00695 00696 Box *col_box = pixad->boxa->box[col]; 00697 00698 Pixa *pixac; 00699 Boxa *boxa2 = pixConnComp(pixt3, &pixac, 8); 00700 if (boxa2 == NULL) { 00701 delete []col_order; 00702 return false; 00703 } 00704 00705 // offset the boxes by the column box 00706 for (int line = 0; line < pixac->n; line++) { 00707 pixac->boxa->box[line]->x += col_box->x; 00708 pixac->boxa->box[line]->y += col_box->y; 00709 } 00710 00711 // add the lines 00712 if (AddLines(pixac) == true) { 00713 if (pixaaAddBox(columns_, col_box, L_CLONE) != 0) { 00714 delete []col_order; 00715 return false; 00716 } 00717 } 00718 00719 pixDestroy(&pixt3); 00720 boxaDestroy(&boxa2); 00721 00722 line_cnt_ += columns_->pixa[col_idx]->n; 00723 } 00724 00725 pixaDestroy(&pixam); 00726 pixaDestroy(&pixad); 00727 boxaDestroy(&boxa); 00728 00729 delete []col_order; 00730 pixDestroy(&pix_temp1); 00731 00732 return true; 00733 } 00734 00735 // Estimate the paramters of the font(s) used in the page 00736 bool CubeLineSegmenter::EstimateFontParams() { 00737 int hgt_hist[kHgtBins]; 00738 int max_hgt; 00739 double mean_hgt; 00740 00741 // init hgt histogram of concomps 00742 memset(hgt_hist, 0, sizeof(hgt_hist)); 00743 00744 // compute max hgt 00745 max_hgt = 0; 00746 00747 for (int con = 0; con < con_comps_->n; con++) { 00748 // skip conn comps that are too long or too wide 00749 if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || 00750 con_comps_->boxa->box[con]->w > kMaxConnCompWid) { 00751 continue; 00752 } 00753 00754 max_hgt = MAX(max_hgt, con_comps_->boxa->box[con]->h); 00755 } 00756 00757 if (max_hgt <= 0) { 00758 return false; 00759 } 00760 00761 // init hgt histogram of concomps 00762 memset(hgt_hist, 0, sizeof(hgt_hist)); 00763 00764 // compute histogram 00765 mean_hgt = 0.0; 00766 for (int con = 0; con < con_comps_->n; con++) { 00767 // skip conn comps that are too long or too wide 00768 if (con_comps_->boxa->box[con]->h > kMaxConnCompHgt || 00769 con_comps_->boxa->box[con]->w > kMaxConnCompWid) { 00770 continue; 00771 } 00772 00773 int bin = static_cast<int>(kHgtBins * con_comps_->boxa->box[con]->h / 00774 max_hgt); 00775 bin = MIN(bin, kHgtBins - 1); 00776 hgt_hist[bin]++; 00777 mean_hgt += con_comps_->boxa->box[con]->h; 00778 } 00779 00780 mean_hgt /= con_comps_->n; 00781 00782 // find the top 2 bins 00783 int idx[kHgtBins]; 00784 00785 for (int bin = 0; bin < kHgtBins; bin++) { 00786 idx[bin] = bin; 00787 } 00788 00789 for (int ibin = 0; ibin < 2; ibin++) { 00790 for (int jbin = ibin + 1; jbin < kHgtBins; jbin++) { 00791 if (hgt_hist[idx[ibin]] < hgt_hist[idx[jbin]]) { 00792 int swap = idx[ibin]; 00793 idx[ibin] = idx[jbin]; 00794 idx[jbin] = swap; 00795 } 00796 } 00797 } 00798 00799 // emperically, we found out that the 2 highest freq bins correspond 00800 // respectively to the dot and alef 00801 est_dot_hgt_ = (1.0 * (idx[0] + 1) * max_hgt / kHgtBins); 00802 est_alef_hgt_ = (1.0 * (idx[1] + 1) * max_hgt / kHgtBins); 00803 00804 // as a sanity check the dot hgt must be significanly lower than alef 00805 if (est_alef_hgt_ < (est_dot_hgt_ * 2)) { 00806 // use max_hgt to estimate instead 00807 est_alef_hgt_ = mean_hgt * 1.5; 00808 est_dot_hgt_ = est_alef_hgt_ / 5.0; 00809 } 00810 00811 est_alef_hgt_ = MAX(est_alef_hgt_, est_dot_hgt_ * 4.0); 00812 00813 return true; 00814 } 00815 00816 // clean up the image 00817 Pix *CubeLineSegmenter::CleanUp(Pix *orig_img) { 00818 // get rid of long horizontal lines 00819 Pix *pix_temp0 = pixMorphCompSequence(orig_img, "o300.2", 0); 00820 pixXor(pix_temp0, pix_temp0, orig_img); 00821 00822 // get rid of long vertical lines 00823 Pix *pix_temp1 = pixMorphCompSequence(pix_temp0, "o2.300", 0); 00824 pixXor(pix_temp1, pix_temp1, pix_temp0); 00825 00826 pixDestroy(&pix_temp0); 00827 00828 // detect connected components 00829 Pixa *con_comps; 00830 Boxa *boxa = pixConnComp(pix_temp1, &con_comps, 8); 00831 if (boxa == NULL) { 00832 return NULL; 00833 } 00834 00835 // detect and remove suspicious conn comps 00836 for (int con = 0; con < con_comps->n; con++) { 00837 Box *box = boxa->box[con]; 00838 00839 // remove if suspc. conn comp 00840 if ((box->w > (box->h * kMaxHorzAspectRatio)) || 00841 (box->h > (box->w * kMaxVertAspectRatio)) || 00842 (box->w < kMinWid && box->h < kMinHgt)) { 00843 pixRasterop(pix_temp1, box->x, box->y, box->w, box->h, 00844 PIX_SRC ^ PIX_DST, con_comps->pix[con], 0, 0); 00845 } 00846 } 00847 00848 pixaDestroy(&con_comps); 00849 boxaDestroy(&boxa); 00850 00851 return pix_temp1; 00852 } 00853 00854 // Init the page segmenter 00855 bool CubeLineSegmenter::Init() { 00856 if (init_ == true) { 00857 return true; 00858 } 00859 00860 if (orig_img_ == NULL) { 00861 return false; 00862 } 00863 00864 // call the internal line segmentation 00865 return FindLines(); 00866 } 00867 00868 // return the pix mask and box of a specific line 00869 Pix *CubeLineSegmenter::Line(int line, Box **line_box) { 00870 if (init_ == false && Init() == false) { 00871 return NULL; 00872 } 00873 00874 if (line < 0 || line >= line_cnt_) { 00875 return NULL; 00876 } 00877 00878 (*line_box) = lines_pixa_->boxa->box[line]; 00879 return lines_pixa_->pix[line]; 00880 } 00881 00882 // Implements a basic rudimentary layout analysis based on Leptonica 00883 // works OK for Arabic. For other languages, the function TesseractPageAnalysis 00884 // should be called instead. 00885 bool CubeLineSegmenter::FindLines() { 00886 // convert the image to gray scale if necessary 00887 Pix *gray_scale_img = NULL; 00888 if (orig_img_->d != 2 && orig_img_->d != 8) { 00889 gray_scale_img = pixConvertTo8(orig_img_, false); 00890 if (gray_scale_img == NULL) { 00891 return false; 00892 } 00893 } else { 00894 gray_scale_img = orig_img_; 00895 } 00896 00897 // threshold image 00898 Pix *thresholded_img; 00899 thresholded_img = pixThresholdToBinary(gray_scale_img, 128); 00900 // free the gray scale image if necessary 00901 if (gray_scale_img != orig_img_) { 00902 pixDestroy(&gray_scale_img); 00903 } 00904 // bail-out if thresholding failed 00905 if (thresholded_img == NULL) { 00906 return false; 00907 } 00908 00909 // deskew 00910 Pix *deskew_img = pixDeskew(thresholded_img, 2); 00911 if (deskew_img == NULL) { 00912 return false; 00913 } 00914 00915 pixDestroy(&thresholded_img); 00916 00917 img_ = CleanUp(deskew_img); 00918 pixDestroy(&deskew_img); 00919 if (img_ == NULL) { 00920 return false; 00921 } 00922 00923 pixDestroy(&deskew_img); 00924 00925 // compute connected components 00926 Boxa *boxa = pixConnComp(img_, &con_comps_, 8); 00927 if (boxa == NULL) { 00928 return false; 00929 } 00930 00931 boxaDestroy(&boxa); 00932 00933 // estimate dot and alef hgts 00934 if (EstimateFontParams() == false) { 00935 return false; 00936 } 00937 00938 // perform line segmentation 00939 if (LineSegment() == false) { 00940 return false; 00941 } 00942 00943 // success 00944 init_ = true; 00945 return true; 00946 } 00947 00948 }