tesseract
3.03
|
00001 00002 // File: colpartitionrid.h 00003 // Description: Class collecting code that acts on a BBGrid of ColPartitions. 00004 // Author: Ray Smith 00005 // Created: Mon Oct 05 08:42:01 PDT 2009 00006 // 00007 // (C) Copyright 2009, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00021 #define TESSERACT_TEXTORD_COLPARTITIONGRID_H__ 00022 00023 #include "bbgrid.h" 00024 #include "colpartition.h" 00025 #include "colpartitionset.h" 00026 00027 namespace tesseract { 00028 00029 class TabFind; 00030 00031 // ColPartitionGrid is a BBGrid of ColPartition. 00032 // It collects functions that work on the grid. 00033 class ColPartitionGrid : public BBGrid<ColPartition, 00034 ColPartition_CLIST, 00035 ColPartition_C_IT> { 00036 public: 00037 ColPartitionGrid(); 00038 ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); 00039 00040 virtual ~ColPartitionGrid(); 00041 00042 // Handles a click event in a display window. 00043 void HandleClick(int x, int y); 00044 00045 // Merges ColPartitions in the grid that look like they belong in the same 00046 // textline. 00047 // For all partitions in the grid, calls the box_cb permanent callback 00048 // to compute the search box, seaches the box, and if a candidate is found, 00049 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00050 // true, then the partitions are merged. 00051 // Both callbacks are deleted before returning. 00052 void Merges(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00053 TessResultCallback2<bool, const ColPartition*, 00054 const ColPartition*>* confirm_cb); 00055 00056 // For the given partition, calls the box_cb permanent callback 00057 // to compute the search box, searches the box, and if a candidate is found, 00058 // calls the confirm_cb to check any more rules. If the confirm_cb returns 00059 // true, then the partitions are merged. 00060 // Returns true if the partition is consumed by one or more merges. 00061 bool MergePart(TessResultCallback2<bool, ColPartition*, TBOX*>* box_cb, 00062 TessResultCallback2<bool, const ColPartition*, 00063 const ColPartition*>* confirm_cb, 00064 ColPartition* part); 00065 00066 // Finds all the ColPartitions in the grid that overlap with the given 00067 // box and returns them SortByBoxLeft(ed) and uniqued in the given list. 00068 // Any partition equal to not_this (may be NULL) is excluded. 00069 void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, 00070 ColPartition_CLIST* parts); 00071 00072 // Finds and returns the best candidate ColPartition to merge with part, 00073 // selected from the candidates list, based on the minimum increase in 00074 // pairwise overlap among all the partitions overlapped by the combined box. 00075 // If overlap_increase is not NULL then it returns the increase in overlap 00076 // that would result from the merge. 00077 // See colpartitiongrid.cpp for a diagram. 00078 ColPartition* BestMergeCandidate( 00079 const ColPartition* part, ColPartition_CLIST* candidates, bool debug, 00080 TessResultCallback2<bool, const ColPartition*, 00081 const ColPartition*>* confirm_cb, 00082 int* overlap_increase); 00083 00084 // Split partitions where it reduces overlap between their bounding boxes. 00085 // ColPartitions are after all supposed to be a partitioning of the blobs 00086 // AND of the space on the page! 00087 // Blobs that cause overlaps get removed, put in individual partitions 00088 // and added to the big_parts list. They are most likely characters on 00089 // 2 textlines that touch, or something big like a dropcap. 00090 void SplitOverlappingPartitions(ColPartition_LIST* big_parts); 00091 00092 // Filters partitions of source_type by looking at local neighbours. 00093 // Where a majority of neighbours have a text type, the partitions are 00094 // changed to text, where the neighbours have image type, they are changed 00095 // to image, and partitions that have no definite neighbourhood type are 00096 // left unchanged. 00097 // im_box and rerotation are used to map blob coordinates onto the 00098 // nontext_map, which is used to prevent the spread of text neighbourhoods 00099 // into images. 00100 // Returns true if anything was changed. 00101 bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map, 00102 const TBOX& im_box, const FCOORD& rerotation); 00103 00104 // Compute the mean RGB of the light and dark pixels in each ColPartition 00105 // and also the rms error in the linearity of color. 00106 void ComputePartitionColors(Pix* scaled_color, int scaled_factor, 00107 const FCOORD& rerotation); 00108 00109 // Reflects the grid and its colpartitions in the y-axis, assuming that 00110 // all blob boxes have already been done. 00111 void ReflectInYAxis(); 00112 00113 // Rotates the grid and its colpartitions by the given angle, assuming that 00114 // all blob boxes have already been done. 00115 void Deskew(const FCOORD& deskew); 00116 00117 // Transforms the grid of partitions to the output blocks, putting each 00118 // partition into a separate block. We don't really care about the order, 00119 // as we just want to get as much text as possible without trying to organize 00120 // it into proper blocks or columns. 00121 void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); 00122 00123 // Sets the left and right tabs of the partitions in the grid. 00124 void SetTabStops(TabFind* tabgrid); 00125 00126 // Makes the ColPartSets and puts them in the PartSetVector ready 00127 // for finding column bounds. Returns false if no partitions were found. 00128 // Each ColPartition in the grid is placed in a single ColPartSet based 00129 // on the bottom-left of its bounding box. 00130 bool MakeColPartSets(PartSetVector* part_sets); 00131 00132 // Makes a single ColPartitionSet consisting of a single ColPartition that 00133 // represents the total horizontal extent of the significant content on the 00134 // page. Used for the single column setting in place of automatic detection. 00135 // Returns NULL if the page is empty of significant content. 00136 ColPartitionSet* MakeSingleColumnSet(WidthCallback* cb); 00137 00138 // Mark the BLOBNBOXes in each partition as being owned by that partition. 00139 void ClaimBoxes(); 00140 00141 // Retypes all the blobs referenced by the partitions in the grid. 00142 // Image blobs are sliced on the grid boundaries to give the tab finder 00143 // a better handle on the edges of the images, and the actual blobs are 00144 // returned in the im_blobs list, as they are not owned by the block. 00145 void ReTypeBlobs(BLOBNBOX_LIST* im_blobs); 00146 00147 // The boxes within the partitions have changed (by deskew) so recompute 00148 // the bounds of all the partitions and reinsert them into the grid. 00149 void RecomputeBounds(int gridsize, const ICOORD& bleft, 00150 const ICOORD& tright, const ICOORD& vertical); 00151 00152 // Improves the margins of the ColPartitions in the grid by calling 00153 // FindPartitionMargins on each. 00154 void GridFindMargins(ColPartitionSet** best_columns); 00155 00156 // Improves the margins of the ColPartitions in the list by calling 00157 // FindPartitionMargins on each. 00158 void ListFindMargins(ColPartitionSet** best_columns, 00159 ColPartition_LIST* parts); 00160 00161 // Deletes all the partitions in the grid after disowning all the blobs. 00162 void DeleteParts(); 00163 00164 // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and 00165 // all the blobs in them. 00166 void DeleteUnknownParts(TO_BLOCK* block); 00167 00168 // Finds and marks text partitions that represent figure captions. 00169 void FindFigureCaptions(); 00170 00173 // For every ColPartition in the grid, finds its upper and lower neighbours. 00174 void FindPartitionPartners(); 00175 // Finds the best partner in the given direction for the given partition. 00176 // Stores the result with AddPartner. 00177 void FindPartitionPartners(bool upper, ColPartition* part); 00178 // Finds the best partner in the given direction for the given partition. 00179 // Stores the result with AddPartner. 00180 void FindVPartitionPartners(bool to_the_left, ColPartition* part); 00181 // For every ColPartition with multiple partners in the grid, reduces the 00182 // number of partners to 0 or 1. If get_desperate is true, goes to more 00183 // desperate merge methods to merge flowing text before breaking partnerships. 00184 void RefinePartitionPartners(bool get_desperate); 00185 00186 private: 00187 // Finds and returns a list of candidate ColPartitions to merge with part. 00188 // The candidates must overlap search_box, and when merged must not 00189 // overlap any other partitions that are not overlapped by each individually. 00190 void FindMergeCandidates(const ColPartition* part, const TBOX& search_box, 00191 bool debug, ColPartition_CLIST* candidates); 00192 00193 // Smoothes the region type/flow type of the given part by looking at local 00194 // neigbours and the given image mask. Searches a padded rectangle with the 00195 // padding truncated on one size of the part's box in turn for each side, 00196 // using the result (if any) that has the least distance to all neighbours 00197 // that contribute to the decision. This biases in favor of rectangular 00198 // regions without completely enforcing them. 00199 // If a good decision cannot be reached, the part is left unchanged. 00200 // im_box and rerotation are used to map blob coordinates onto the 00201 // nontext_map, which is used to prevent the spread of text neighbourhoods 00202 // into images. 00203 // Returns true if the partition was changed. 00204 bool SmoothRegionType(Pix* nontext_map, 00205 const TBOX& im_box, 00206 const FCOORD& rerotation, 00207 bool debug, 00208 ColPartition* part); 00209 // Executes the search for SmoothRegionType in a single direction. 00210 // Creates a bounding box that is padded in all directions except direction, 00211 // and searches it for other partitions. Finds the nearest collection of 00212 // partitions that makes a decisive result (if any) and returns the type 00213 // and the distance of the collection. If there are any pixels in the 00214 // nontext_map, then the decision is biased towards image. 00215 BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, 00216 Pix* nontext_map, 00217 const TBOX& im_box, 00218 const FCOORD& rerotation, 00219 bool debug, 00220 const ColPartition& part, 00221 int* best_distance); 00222 // Counts the partitions in the given search_box by appending the gap 00223 // distance (scaled by dist_scaling) of the part from the base_part to the 00224 // vector of the appropriate type for the partition. Prior to return, the 00225 // vectors in the dists array are sorted in increasing order. 00226 // dists must be an array of GenericVectors of size NPT_COUNT. 00227 void AccumulatePartDistances(const ColPartition& base_part, 00228 const ICOORD& dist_scaling, 00229 const TBOX& search_box, 00230 Pix* nontext_map, 00231 const TBOX& im_box, 00232 const FCOORD& rerotation, 00233 bool debug, 00234 GenericVector<int>* dists); 00235 00236 // Improves the margins of the ColPartition by searching for 00237 // neighbours that vertically overlap significantly. 00238 void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); 00239 00240 // Starting at x, and going in the specified direction, upto x_limit, finds 00241 // the margin for the given y range by searching sideways, 00242 // and ignoring not_this. 00243 int FindMargin(int x, bool right_to_left, int x_limit, 00244 int y_bottom, int y_top, const ColPartition* not_this); 00245 }; 00246 00247 } // namespace tesseract. 00248 00249 #endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H__