tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/colpartitionset.cpp
Go to the documentation of this file.
00001 
00002 // File:        colpartitionset.cpp
00003 // Description: Class to hold a list of ColPartitions of the page that
00004 //              correspond roughly to columns.
00005 // Author:      Ray Smith
00006 // Created:     Thu Aug 14 10:54:01 PDT 2008
00007 //
00008 // (C) Copyright 2008, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 #include "colpartitionset.h"
00026 #include "ndminx.h"
00027 #include "workingpartset.h"
00028 #include "tablefind.h"
00029 
00030 namespace tesseract {
00031 
00032 // Minimum width of a column to be interesting as a multiple of resolution.
00033 const double kMinColumnWidth = 2.0 / 3;
00034 
00035 ELISTIZE(ColPartitionSet)
00036 
00037 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
00038   ColPartition_IT it(&parts_);
00039   it.add_list_after(partitions);
00040   ComputeCoverage();
00041 }
00042 
00043 ColPartitionSet::ColPartitionSet(ColPartition* part) {
00044   ColPartition_IT it(&parts_);
00045   it.add_after_then_move(part);
00046   ComputeCoverage();
00047 }
00048 
00049 ColPartitionSet::~ColPartitionSet() {
00050 }
00051 
00052 // Return an element of the parts_ list from its index.
00053 ColPartition* ColPartitionSet::GetColumnByIndex(int index) {
00054   ColPartition_IT it(&parts_);
00055   it.mark_cycle_pt();
00056   for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
00057   if (it.cycled_list())
00058     return NULL;
00059   return it.data();
00060 }
00061 
00062 // Return the ColPartition that contains the given coords, if any, else NULL.
00063 ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
00064   ColPartition_IT it(&parts_);
00065   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00066     ColPartition* part = it.data();
00067     if (part->ColumnContains(x, y))
00068       return part;
00069   }
00070   return NULL;
00071 }
00072 
00073 // Extract all the parts from the list, relinquishing ownership.
00074 void ColPartitionSet::RelinquishParts() {
00075   ColPartition_IT it(&parts_);
00076   while (!it.empty()) {
00077     it.extract();
00078     it.forward();
00079   }
00080 }
00081 
00082 // Attempt to improve this by adding partitions or expanding partitions.
00083 void ColPartitionSet::ImproveColumnCandidate(WidthCallback* cb,
00084                                              PartSetVector* src_sets) {
00085   int set_size = src_sets->size();
00086   // Iterate over the provided column sets, as each one may have something
00087   // to improve this.
00088   for (int i = 0; i < set_size; ++i) {
00089     ColPartitionSet* column_set = src_sets->get(i);
00090     if (column_set == NULL)
00091       continue;
00092     // Iterate over the parts in this and column_set, adding bigger or
00093     // new parts in column_set to this.
00094     ColPartition_IT part_it(&parts_);
00095     ASSERT_HOST(!part_it.empty());
00096     int prev_right = MIN_INT32;
00097     part_it.mark_cycle_pt();
00098     ColPartition_IT col_it(&column_set->parts_);
00099     for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
00100       ColPartition* col_part = col_it.data();
00101       if (col_part->blob_type() < BRT_UNKNOWN)
00102         continue;  // Ignore image partitions.
00103       int col_left = col_part->left_key();
00104       int col_right = col_part->right_key();
00105       // Sync-up part_it (in this) so it matches the col_part in column_set.
00106       ColPartition* part = part_it.data();
00107       while (!part_it.at_last() && part->right_key() < col_left) {
00108         prev_right = part->right_key();
00109         part_it.forward();
00110         part = part_it.data();
00111       }
00112       int part_left = part->left_key();
00113       int part_right = part->right_key();
00114       if (part_right < col_left || col_right < part_left) {
00115         // There is no overlap so this is a new partition.
00116         AddPartition(col_part->ShallowCopy(), &part_it);
00117         continue;
00118       }
00119       // Check the edges of col_part to see if they can improve part.
00120       bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
00121       if (col_left < part_left && col_left > prev_right) {
00122         // The left edge of the column is better and it doesn't overlap,
00123         // so we can potentially expand it.
00124         int col_box_left = col_part->BoxLeftKey();
00125         bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
00126         bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
00127         if (tab_width_ok || (!part_width_ok )) {
00128           // The tab is leaving the good column metric at least as good as
00129           // it was before, so use the tab.
00130           part->CopyLeftTab(*col_part, false);
00131           part->SetColumnGoodness(cb);
00132         } else if (col_box_left < part_left &&
00133                    (box_width_ok || !part_width_ok)) {
00134           // The box is leaving the good column metric at least as good as
00135           // it was before, so use the box.
00136           part->CopyLeftTab(*col_part, true);
00137           part->SetColumnGoodness(cb);
00138         }
00139         part_left = part->left_key();
00140       }
00141       if (col_right > part_right &&
00142           (part_it.at_last() ||
00143            part_it.data_relative(1)->left_key() > col_right)) {
00144         // The right edge is better, so we can possibly expand it.
00145         int col_box_right = col_part->BoxRightKey();
00146         bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
00147         bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
00148         if (tab_width_ok || (!part_width_ok )) {
00149           // The tab is leaving the good column metric at least as good as
00150           // it was before, so use the tab.
00151           part->CopyRightTab(*col_part, false);
00152           part->SetColumnGoodness(cb);
00153         } else if (col_box_right > part_right &&
00154                    (box_width_ok || !part_width_ok)) {
00155           // The box is leaving the good column metric at least as good as
00156           // it was before, so use the box.
00157           part->CopyRightTab(*col_part, true);
00158           part->SetColumnGoodness(cb);
00159         }
00160       }
00161     }
00162   }
00163   ComputeCoverage();
00164 }
00165 
00166 // If this set is good enough to represent a new partitioning into columns,
00167 // add it to the vector of sets, otherwise delete it.
00168 void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
00169                                               WidthCallback* cb) {
00170   bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
00171                                          bounding_box_.bottom());
00172   if (debug) {
00173     tprintf("Considering new column candidate:\n");
00174     Print();
00175   }
00176   if (!LegalColumnCandidate()) {
00177     if (debug) {
00178       tprintf("Not a legal column candidate:\n");
00179       Print();
00180     }
00181     delete this;
00182     return;
00183   }
00184   for (int i = 0; i < column_sets->size(); ++i) {
00185     ColPartitionSet* columns = column_sets->get(i);
00186     // In ordering the column set candidates, good_coverage_ is king,
00187     // followed by good_column_count_ and then bad_coverage_.
00188     bool better = good_coverage_ > columns->good_coverage_;
00189     if (good_coverage_ == columns->good_coverage_) {
00190       better = good_column_count_ > columns->good_column_count_;
00191       if (good_column_count_ == columns->good_column_count_) {
00192           better = bad_coverage_ > columns->bad_coverage_;
00193       }
00194     }
00195     if (better) {
00196       // The new one is better so add it.
00197       if (debug)
00198         tprintf("Good one\n");
00199       column_sets->insert(this, i);
00200       return;
00201     }
00202     if (columns->CompatibleColumns(false, this, cb)) {
00203       if (debug)
00204         tprintf("Duplicate\n");
00205       delete this;
00206       return;  // It is not unique.
00207     }
00208   }
00209   if (debug)
00210     tprintf("Added to end\n");
00211   column_sets->push_back(this);
00212 }
00213 
00214 // Return true if the partitions in other are all compatible with the columns
00215 // in this.
00216 bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
00217                                         WidthCallback* cb) {
00218   if (debug) {
00219     tprintf("CompatibleColumns testing compatibility\n");
00220     Print();
00221     other->Print();
00222   }
00223   if (other->parts_.empty()) {
00224     if (debug)
00225       tprintf("CompatibleColumns true due to empty other\n");
00226     return true;
00227   }
00228   ColPartition_IT it(&other->parts_);
00229   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00230     ColPartition* part = it.data();
00231     if (part->blob_type() < BRT_UNKNOWN) {
00232       if (debug) {
00233         tprintf("CompatibleColumns ignoring image partition\n");
00234         part->Print();
00235       }
00236       continue;  // Image partitions are irrelevant to column compatibility.
00237     }
00238     int y = part->MidY();
00239     int left = part->bounding_box().left();
00240     int right = part->bounding_box().right();
00241     ColPartition* left_col = ColumnContaining(left, y);
00242     ColPartition* right_col = ColumnContaining(right, y);
00243     if (right_col == NULL || left_col == NULL) {
00244       if (debug) {
00245         tprintf("CompatibleColumns false due to partition edge outside\n");
00246         part->Print();
00247       }
00248       return false;  // A partition edge lies outside of all columns
00249     }
00250     if (right_col != left_col && cb->Run(right - left)) {
00251       if (debug) {
00252         tprintf("CompatibleColumns false due to good width in multiple cols\n");
00253         part->Print();
00254       }
00255       return false;  // Partition with a good width must be in a single column.
00256     }
00257 
00258     ColPartition_IT it2= it;
00259     while (!it2.at_last()) {
00260       it2.forward();
00261       ColPartition* next_part = it2.data();
00262       if (!BLOBNBOX::IsTextType(next_part->blob_type()))
00263         continue;  // Non-text partitions are irrelevant.
00264       int next_left = next_part->bounding_box().left();
00265       if (next_left == right) {
00266         break;  // They share the same edge, so one must be a pull-out.
00267       }
00268       // Search to see if right and next_left fall within a single column.
00269       ColPartition* next_left_col = ColumnContaining(next_left, y);
00270       if (right_col == next_left_col) {
00271         // There is a column break in this column.
00272         // This can be due to a figure caption within a column, a pull-out
00273         // block, or a simple broken textline that remains to be merged:
00274         // all allowed, or a change in column layout: not allowed.
00275         // If both partitions are of good width, then it is likely
00276         // a change in column layout, otherwise probably an allowed situation.
00277         if (part->good_width() && next_part->good_width()) {
00278           if (debug) {
00279             int next_right = next_part->bounding_box().right();
00280             tprintf("CompatibleColumns false due to 2 parts of good width\n");
00281             tprintf("part1 %d-%d, part2 %d-%d\n",
00282                     left, right, next_left, next_right);
00283             right_col->Print();
00284           }
00285           return false;
00286         }
00287       }
00288       break;
00289     }
00290   }
00291   if (debug)
00292     tprintf("CompatibleColumns true!\n");
00293   return true;
00294 }
00295 
00296 // Returns the total width of all blobs in the part_set that do not lie
00297 // within an approved column. Used as a cost measure for using this
00298 // column set over another that might be compatible.
00299 int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) {
00300   int total_width = 0;
00301   ColPartition_IT it(&part_set->parts_);
00302   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00303     ColPartition* part = it.data();
00304     if (!BLOBNBOX::IsTextType(part->blob_type())) {
00305       continue;  // Non-text partitions are irrelevant to column compatibility.
00306     }
00307     int y = part->MidY();
00308     BLOBNBOX_C_IT box_it(part->boxes());
00309     for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
00310       const TBOX& box = it.data()->bounding_box();
00311       // Assume that the whole blob is outside any column iff its x-middle
00312       // is outside.
00313       int x = (box.left() + box.right()) / 2;
00314       ColPartition* col = ColumnContaining(x, y);
00315       if (col == NULL)
00316         total_width += box.width();
00317     }
00318   }
00319   return total_width;
00320 }
00321 
00322 // Return true if this ColPartitionSet makes a legal column candidate by
00323 // having legal individual partitions and non-overlapping adjacent pairs.
00324 bool ColPartitionSet::LegalColumnCandidate() {
00325   ColPartition_IT it(&parts_);
00326   if (it.empty())
00327     return false;
00328   bool any_text_parts = false;
00329   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00330     ColPartition* part = it.data();
00331     if (BLOBNBOX::IsTextType(part->blob_type())) {
00332       if (!part->IsLegal())
00333         return false;  // Individual partition is illegal.
00334       any_text_parts = true;
00335     }
00336     if (!it.at_last()) {
00337       ColPartition* next_part = it.data_relative(1);
00338       if (next_part->left_key() < part->right_key()) {
00339         return false;
00340       }
00341     }
00342   }
00343   return any_text_parts;
00344 }
00345 
00346 // Return a copy of this. If good_only will only copy the Good ColPartitions.
00347 ColPartitionSet* ColPartitionSet::Copy(bool good_only) {
00348   ColPartition_LIST copy_parts;
00349   ColPartition_IT src_it(&parts_);
00350   ColPartition_IT dest_it(&copy_parts);
00351   for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
00352     ColPartition* part = src_it.data();
00353     if (BLOBNBOX::IsTextType(part->blob_type()) &&
00354         (!good_only || part->good_width() || part->good_column()))
00355       dest_it.add_after_then_move(part->ShallowCopy());
00356   }
00357   if (dest_it.empty())
00358     return NULL;
00359   return new ColPartitionSet(&copy_parts);
00360 }
00361 
00362 // Return the bounding boxes of columns at the given y-range
00363 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
00364                                      ColSegment_LIST *segments) {
00365   ColPartition_IT it(&parts_);
00366   ColSegment_IT col_it(segments);
00367   col_it.move_to_last();
00368   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00369     ColPartition* part = it.data();
00370     ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
00371     ICOORD top_right(part->RightAtY(y_bottom), y_top);
00372     ColSegment *col_seg = new ColSegment();
00373     col_seg->InsertBox(TBOX(bot_left, top_right));
00374     col_it.add_after_then_move(col_seg);
00375   }
00376 }
00377 
00378 // Display the edges of the columns at the given y coords.
00379 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
00380                                          ScrollView* win) {
00381 #ifndef GRAPHICS_DISABLED
00382   ColPartition_IT it(&parts_);
00383   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00384     ColPartition* part = it.data();
00385     win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
00386     win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
00387   }
00388 #endif  // GRAPHICS_DISABLED
00389 }
00390 
00391 // Return the ColumnSpanningType that best explains the columns overlapped
00392 // by the given coords(left,right,y), with the given margins.
00393 // Also return the first and last column index touched by the coords and
00394 // the leftmost spanned column.
00395 // Column indices are 2n + 1 for real columns (0 based) and even values
00396 // represent the gaps in between columns, with 0 being left of the leftmost.
00397 // resolution refers to the ppi resolution of the image.
00398 ColumnSpanningType ColPartitionSet::SpanningType(int resolution,
00399                                                  int left, int right,
00400                                                  int height, int y,
00401                                                  int left_margin,
00402                                                  int right_margin,
00403                                                  int* first_col,
00404                                                  int* last_col,
00405                                                  int* first_spanned_col) {
00406   *first_col = -1;
00407   *last_col = -1;
00408   *first_spanned_col = -1;
00409   int margin_columns = 0;
00410   ColPartition_IT it(&parts_);
00411   int col_index = 1;
00412   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
00413     ColPartition* part = it.data();
00414     if (part->ColumnContains(left, y) ||
00415         (it.at_first() && part->ColumnContains(left + height, y))) {
00416       // In the default case, first_col is set, but columns_spanned remains
00417       // zero, so first_col will get reset in the first column genuinely
00418       // spanned, but we can tell the difference from a noise partition
00419       // that touches no column.
00420       *first_col = col_index;
00421       if (part->ColumnContains(right, y) ||
00422           (it.at_last() && part->ColumnContains(right - height, y))) {
00423         // Both within a single column.
00424         *last_col = col_index;
00425         return CST_FLOWING;
00426       }
00427       if (left_margin <= part->LeftAtY(y)) {
00428         // It completely spans this column.
00429         *first_spanned_col = col_index;
00430         margin_columns = 1;
00431       }
00432     } else if (part->ColumnContains(right, y) ||
00433                (it.at_last() && part->ColumnContains(right - height, y))) {
00434       if (*first_col < 0) {
00435         // It started in-between.
00436         *first_col = col_index - 1;
00437       }
00438       if (right_margin >= part->RightAtY(y)) {
00439         // It completely spans this column.
00440         if (margin_columns == 0)
00441           *first_spanned_col = col_index;
00442         ++margin_columns;
00443       }
00444       *last_col = col_index;
00445       break;
00446     } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
00447       // Neither left nor right are contained within, so it spans this
00448       // column.
00449       if (*first_col < 0) {
00450         // It started in between the previous column and the current column.
00451         *first_col = col_index - 1;
00452       }
00453       if (margin_columns == 0)
00454         *first_spanned_col = col_index;
00455       *last_col = col_index;
00456     } else if (right < part->LeftAtY(y)) {
00457       // We have gone past the end.
00458       *last_col = col_index - 1;
00459       if (*first_col < 0) {
00460         // It must lie completely between columns =>noise.
00461         *first_col = col_index - 1;
00462       }
00463       break;
00464     }
00465   }
00466   if (*first_col < 0)
00467     *first_col = col_index - 1;  // The last in-between.
00468   if (*last_col < 0)
00469     *last_col = col_index - 1;  // The last in-between.
00470   ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
00471   ASSERT_HOST(*first_col <= *last_col);
00472   if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
00473     // Neither end was in a column, and it didn't span any, so it lies
00474     // entirely between columns, therefore noise.
00475     return CST_NOISE;
00476   } else if (margin_columns <= 1) {
00477     // An exception for headings that stick outside of single-column text.
00478     if (margin_columns == 1 && parts_.singleton()) {
00479       return CST_HEADING;
00480     }
00481     // It is a pullout, as left and right were not in the same column, but
00482     // it doesn't go to the edge of its start and end.
00483     return CST_PULLOUT;
00484   }
00485   // Its margins went to the edges of first and last columns => heading.
00486   return CST_HEADING;
00487 }
00488 
00489 // The column_set has changed. Close down all in-progress WorkingPartSets in
00490 // columns that do not match and start new ones for the new columns in this.
00491 // As ColPartitions are turned into BLOCKs, the used ones are put in
00492 // used_parts, as they still need to be referenced in the grid.
00493 void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft,
00494                                         const ICOORD& tright,
00495                                         int resolution,
00496                                         ColPartition_LIST* used_parts,
00497                                         WorkingPartSet_LIST* working_set_list) {
00498   // Move the input list to a temporary location so we can delete its elements
00499   // as we add them to the output working_set.
00500   WorkingPartSet_LIST work_src;
00501   WorkingPartSet_IT src_it(&work_src);
00502   src_it.add_list_after(working_set_list);
00503   src_it.move_to_first();
00504   WorkingPartSet_IT dest_it(working_set_list);
00505   // Completed blocks and to_blocks are accumulated and given to the first new
00506   // one  whenever we keep a column, or at the end.
00507   BLOCK_LIST completed_blocks;
00508   TO_BLOCK_LIST to_blocks;
00509   WorkingPartSet* first_new_set = NULL;
00510   WorkingPartSet* working_set = NULL;
00511   ColPartition_IT col_it(&parts_);
00512   for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
00513     ColPartition* column = col_it.data();
00514     // Any existing column to the left of column is completed.
00515     while (!src_it.empty() &&
00516            ((working_set = src_it.data())->column() == NULL ||
00517             working_set->column()->right_key() <= column->left_key())) {
00518       src_it.extract();
00519       working_set->ExtractCompletedBlocks(bleft, tright, resolution,
00520                                           used_parts, &completed_blocks,
00521                                           &to_blocks);
00522       delete working_set;
00523       src_it.forward();
00524     }
00525     // Make a new between-column WorkingSet for before the current column.
00526     working_set = new WorkingPartSet(NULL);
00527     dest_it.add_after_then_move(working_set);
00528     if (first_new_set == NULL)
00529       first_new_set = working_set;
00530     // A matching column gets to stay, and first_new_set gets all the
00531     // completed_sets.
00532     working_set = src_it.empty() ? NULL : src_it.data();
00533     if (working_set != NULL &&
00534         working_set->column()->MatchingColumns(*column)) {
00535       working_set->set_column(column);
00536       dest_it.add_after_then_move(src_it.extract());
00537       src_it.forward();
00538       first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
00539       first_new_set = NULL;
00540     } else {
00541       // Just make a new working set for the current column.
00542       working_set = new WorkingPartSet(column);
00543       dest_it.add_after_then_move(working_set);
00544     }
00545   }
00546   // Complete any remaining src working sets.
00547   while (!src_it.empty()) {
00548     working_set = src_it.extract();
00549     working_set->ExtractCompletedBlocks(bleft, tright, resolution,
00550                                         used_parts, &completed_blocks,
00551                                         &to_blocks);
00552     delete working_set;
00553     src_it.forward();
00554   }
00555   // Make a new between-column WorkingSet for after the last column.
00556   working_set = new WorkingPartSet(NULL);
00557   dest_it.add_after_then_move(working_set);
00558   if (first_new_set == NULL)
00559     first_new_set = working_set;
00560   // The first_new_set now gets any accumulated completed_parts/blocks.
00561   first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
00562 }
00563 
00564 // Accumulate the widths and gaps into the given variables.
00565 void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
00566                                                     int* width_samples,
00567                                                     int* total_gap,
00568                                                     int* gap_samples) {
00569   ColPartition_IT it(&parts_);
00570   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00571     ColPartition* part = it.data();
00572     *total_width += part->ColumnWidth();
00573     ++*width_samples;
00574     if (!it.at_last()) {
00575       ColPartition* next_part = it.data_relative(1);
00576       int gap = part->KeyWidth(part->right_key(), next_part->left_key());
00577       *total_gap += gap;
00578       ++*gap_samples;
00579     }
00580   }
00581 }
00582 
00583 // Provide debug output for this ColPartitionSet and all the ColPartitions.
00584 void ColPartitionSet::Print() {
00585   ColPartition_IT it(&parts_);
00586   tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
00587           " (%d,%d)->(%d,%d)\n",
00588           it.length(), good_column_count_, good_coverage_, bad_coverage_,
00589           bounding_box_.left(), bounding_box_.bottom(),
00590           bounding_box_.right(), bounding_box_.top());
00591   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00592     ColPartition* part = it.data();
00593     part->Print();
00594   }
00595 }
00596 
00597 // PRIVATE CODE.
00598 
00599 // Add the given partition to the list in the appropriate place.
00600 void ColPartitionSet::AddPartition(ColPartition* new_part,
00601                                    ColPartition_IT* it) {
00602   AddPartitionCoverageAndBox(*new_part);
00603   int new_right = new_part->right_key();
00604   if (it->data()->left_key() >= new_right)
00605     it->add_before_stay_put(new_part);
00606   else
00607     it->add_after_stay_put(new_part);
00608 }
00609 
00610 // Compute the coverage and good column count. Coverage is the amount of the
00611 // width of the page (in pixels) that is covered by ColPartitions, which are
00612 // used to provide candidate column layouts.
00613 // Coverage is split into good and bad. Good coverage is provided by
00614 // ColPartitions of a frequent width (according to the callback function
00615 // provided by TabFinder::WidthCB, which accesses stored statistics on the
00616 // widths of ColParititions) and bad coverage is provided by all other
00617 // ColPartitions, even if they have tab vectors at both sides. Thus:
00618 // |-----------------------------------------------------------------|
00619 // |        Double     width    heading                              |
00620 // |-----------------------------------------------------------------|
00621 // |-------------------------------| |-------------------------------|
00622 // |   Common width ColParition    | |  Common width ColPartition    |
00623 // |-------------------------------| |-------------------------------|
00624 // the layout with two common-width columns has better coverage than the
00625 // double width heading, because the coverage is "good," even though less in
00626 // total coverage than the heading, because the heading coverage is "bad."
00627 void ColPartitionSet::ComputeCoverage() {
00628   // Count the number of good columns and sum their width.
00629   ColPartition_IT it(&parts_);
00630   good_column_count_ = 0;
00631   good_coverage_ = 0;
00632   bad_coverage_ = 0;
00633   bounding_box_ = TBOX();
00634   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
00635     ColPartition* part = it.data();
00636     AddPartitionCoverageAndBox(*part);
00637   }
00638 }
00639 
00640 // Adds the coverage, column count and box for a single partition,
00641 // without adding it to the list. (Helper factored from ComputeCoverage.)
00642 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
00643   bounding_box_ += part.bounding_box();
00644   int coverage = part.ColumnWidth();
00645   if (part.good_width()) {
00646     good_coverage_ += coverage;
00647     good_column_count_ += 2;
00648   } else {
00649     if (part.blob_type() < BRT_UNKNOWN)
00650       coverage /= 2;
00651     if (part.good_column())
00652       ++good_column_count_;
00653     bad_coverage_ += coverage;
00654   }
00655 }
00656 
00657 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines