tesseract
3.03
|
00001 00002 // File: ccnontextdetect.cpp 00003 // Description: Connected-Component-based photo (non-text) detection. 00004 // Copyright 2011 Google Inc. All Rights Reserved. 00005 // Author: rays@google.com (Ray Smith) 00006 // Created: Sat Jun 11 10:12:01 PST 2011 00007 // 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifdef HAVE_CONFIG_H 00021 #include "config_auto.h" 00022 #endif 00023 00024 #include "ccnontextdetect.h" 00025 #include "imagefind.h" 00026 #include "strokewidth.h" 00027 00028 namespace tesseract { 00029 00030 // Max number of neighbour small objects per squared gridsize before a grid 00031 // cell becomes image. 00032 const double kMaxSmallNeighboursPerPix = 1.0 / 32; 00033 // Max number of small blobs a large blob may overlap before it is rejected 00034 // and determined to be image. 00035 const int kMaxLargeOverlapsWithSmall = 3; 00036 // Max number of small blobs a medium blob may overlap before it is rejected 00037 // and determined to be image. Larger than for large blobs as medium blobs 00038 // may be complex Chinese characters. Very large Chinese characters are going 00039 // to overlap more medium blobs than small. 00040 const int kMaxMediumOverlapsWithSmall = 12; 00041 // Max number of normal blobs a large blob may overlap before it is rejected 00042 // and determined to be image. This is set higher to allow for drop caps, which 00043 // may overlap a lot of good text blobs. 00044 const int kMaxLargeOverlapsWithMedium = 12; 00045 // Multiplier of original noise_count used to test for the case of spreading 00046 // noise beyond where it should really be. 00047 const int kOriginalNoiseMultiple = 8; 00048 // Pixel padding for noise blobs when rendering on the image 00049 // mask to encourage them to join together. Make it too big and images 00050 // will fatten out too much and have to be clipped to text. 00051 const int kNoisePadding = 4; 00052 // Fraction of max_noise_count_ to be added to the noise count if there is 00053 // photo mask in the background. 00054 const double kPhotoOffsetFraction = 0.375; 00055 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise 00056 // density. Good blobs are supposed to be highly likely real text. 00057 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor 00058 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not 00059 // the 1/(4pi) that you would expect. 00060 const double kMinGoodTextPARatio = 1.5; 00061 00062 CCNonTextDetect::CCNonTextDetect(int gridsize, 00063 const ICOORD& bleft, const ICOORD& tright) 00064 : BlobGrid(gridsize, bleft, tright), 00065 max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * 00066 gridsize * gridsize)), 00067 noise_density_(NULL) { 00068 // TODO(rays) break max_noise_count_ out into an area-proportional 00069 // value, as now plus an additive constant for the number of text blobs 00070 // in the 3x3 neigbourhood - maybe 9. 00071 } 00072 00073 CCNonTextDetect::~CCNonTextDetect() { 00074 delete noise_density_; 00075 } 00076 00077 // Creates and returns a Pix with the same resolution as the original 00078 // in which 1 (black) pixels represent likely non text (photo, line drawing) 00079 // areas of the page, deleting from the blob_block the blobs that were 00080 // determined to be non-text. 00081 // The photo_map is used to bias the decision towards non-text, rather than 00082 // supplying definite decision. 00083 // The blob_block is the usual result of connected component analysis, 00084 // holding the detected blobs. 00085 // The returned Pix should be PixDestroyed after use. 00086 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, 00087 TO_BLOCK* blob_block) { 00088 // Insert the smallest blobs into the grid. 00089 InsertBlobList(&blob_block->small_blobs); 00090 InsertBlobList(&blob_block->noise_blobs); 00091 // Add the medium blobs that don't have a good strokewidth neighbour. 00092 // Those that do go into good_grid as an antidote to spreading beyond the 00093 // real reaches of a noise region. 00094 BlobGrid good_grid(gridsize(), bleft(), tright()); 00095 BLOBNBOX_IT blob_it(&blob_block->blobs); 00096 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00097 BLOBNBOX* blob = blob_it.data(); 00098 double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; 00099 perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); 00100 if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) 00101 InsertBBox(true, true, blob); 00102 else 00103 good_grid.InsertBBox(true, true, blob); 00104 } 00105 noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); 00106 good_grid.Clear(); // Not needed any more. 00107 Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); 00108 if (debug) { 00109 pixWrite("junknoisemask.png", pix, IFF_PNG); 00110 } 00111 ScrollView* win = NULL; 00112 #ifndef GRAPHICS_DISABLED 00113 if (debug) { 00114 win = MakeWindow(0, 400, "Photo Mask Blobs"); 00115 } 00116 #endif // GRAPHICS_DISABLED 00117 // Large and medium blobs are not text if they overlap with "a lot" of small 00118 // blobs. 00119 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, 00120 kMaxLargeOverlapsWithSmall, 00121 win, ScrollView::DARK_GREEN, pix); 00122 MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, 00123 win, ScrollView::WHITE, pix); 00124 // Clear the grid of small blobs and insert the medium blobs. 00125 Clear(); 00126 InsertBlobList(&blob_block->blobs); 00127 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, 00128 kMaxLargeOverlapsWithMedium, 00129 win, ScrollView::DARK_GREEN, pix); 00130 // Clear again before we start deleting the blobs in the grid. 00131 Clear(); 00132 MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, 00133 win, ScrollView::CORAL, pix); 00134 MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, 00135 win, ScrollView::GOLDENROD, pix); 00136 MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, 00137 win, ScrollView::WHITE, pix); 00138 if (debug) { 00139 #ifndef GRAPHICS_DISABLED 00140 win->Update(); 00141 #endif // GRAPHICS_DISABLED 00142 pixWrite("junkccphotomask.png", pix, IFF_PNG); 00143 #ifndef GRAPHICS_DISABLED 00144 delete win->AwaitEvent(SVET_DESTROY); 00145 delete win; 00146 #endif // GRAPHICS_DISABLED 00147 } 00148 return pix; 00149 } 00150 00151 // Computes and returns the noise_density IntGrid, at the same gridsize as 00152 // this by summing the number of small elements in a 3x3 neighbourhood of 00153 // each grid cell. good_grid is filled with blobs that are considered most 00154 // likely good text, and this is filled with small and medium blobs that are 00155 // more likely non-text. 00156 // The photo_map is used to bias the decision towards non-text, rather than 00157 // supplying definite decision. 00158 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, 00159 BlobGrid* good_grid) { 00160 IntGrid* noise_counts = CountCellElements(); 00161 IntGrid* noise_density = noise_counts->NeighbourhoodSum(); 00162 IntGrid* good_counts = good_grid->CountCellElements(); 00163 // Now increase noise density in photo areas, to bias the decision and 00164 // minimize hallucinated text on image, but trim the noise_density where 00165 // there are good blobs and the original count is low in non-photo areas, 00166 // indicating that most of the result came from neighbouring cells. 00167 int height = pixGetHeight(photo_map); 00168 int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); 00169 for (int y = 0; y < gridheight(); ++y) { 00170 for (int x = 0; x < gridwidth(); ++x) { 00171 int noise = noise_density->GridCellValue(x, y); 00172 if (max_noise_count_ < noise + photo_offset && 00173 noise <= max_noise_count_) { 00174 // Test for photo. 00175 int left = x * gridsize(); 00176 int right = left + gridsize(); 00177 int bottom = height - y * gridsize(); 00178 int top = bottom - gridsize(); 00179 if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, 00180 &bottom)) { 00181 noise_density->SetGridCell(x, y, noise + photo_offset); 00182 } 00183 } 00184 if (debug && noise > max_noise_count_ && 00185 good_counts->GridCellValue(x, y) > 0) { 00186 tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", 00187 x * gridsize(), y * gridsize(), 00188 noise_density->GridCellValue(x, y), 00189 good_counts->GridCellValue(x, y), 00190 noise_counts->GridCellValue(x, y), max_noise_count_); 00191 } 00192 if (noise > max_noise_count_ && 00193 good_counts->GridCellValue(x, y) > 0 && 00194 noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= 00195 max_noise_count_) { 00196 noise_density->SetGridCell(x, y, 0); 00197 } 00198 } 00199 } 00200 delete noise_counts; 00201 delete good_counts; 00202 return noise_density; 00203 } 00204 00205 // Helper to expand a box in one of the 4 directions by the given pad, 00206 // provided it does not expand into any cell with a zero noise density. 00207 // If that is not possible, try expanding all round by a small constant. 00208 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, 00209 int pad) { 00210 TBOX expanded_box(box); 00211 expanded_box.set_right(box.right() + pad); 00212 if (!noise_density.AnyZeroInRect(expanded_box)) 00213 return expanded_box; 00214 expanded_box = box; 00215 expanded_box.set_left(box.left() - pad); 00216 if (!noise_density.AnyZeroInRect(expanded_box)) 00217 return expanded_box; 00218 expanded_box = box; 00219 expanded_box.set_top(box.top() + pad); 00220 if (!noise_density.AnyZeroInRect(expanded_box)) 00221 return expanded_box; 00222 expanded_box = box; 00223 expanded_box.set_bottom(box.bottom() + pad); 00224 if (!noise_density.AnyZeroInRect(expanded_box)) 00225 return expanded_box; 00226 expanded_box = box; 00227 expanded_box.pad(kNoisePadding, kNoisePadding); 00228 if (!noise_density.AnyZeroInRect(expanded_box)) 00229 return expanded_box; 00230 return box; 00231 } 00232 00233 // Tests each blob in the list to see if it is certain non-text using 2 00234 // conditions: 00235 // 1. blob overlaps a cell with high value in noise_density_ (previously set 00236 // by ComputeNoiseDensity). 00237 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This 00238 // condition is disabled with max_blob_overlaps == -1. 00239 // If it does, the blob is declared non-text, and is used to mark up the 00240 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their 00241 // neighbours reset, as they may now point to deleted data. 00242 // WARNING: The blobs list blobs may be in the *this grid, but they are 00243 // not removed. If any deleted blobs might be in *this, then this must be 00244 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. 00245 // If the win is not NULL, deleted blobs are drawn on it in red, and kept 00246 // blobs are drawn on it in ok_color. 00247 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, 00248 int max_blob_overlaps, 00249 ScrollView* win, 00250 ScrollView::Color ok_color, 00251 Pix* nontext_mask) { 00252 int imageheight = tright().y() - bleft().x(); 00253 BLOBNBOX_IT blob_it(blobs); 00254 BLOBNBOX_LIST dead_blobs; 00255 BLOBNBOX_IT dead_it(&dead_blobs); 00256 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { 00257 BLOBNBOX* blob = blob_it.data(); 00258 TBOX box = blob->bounding_box(); 00259 if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && 00260 (max_blob_overlaps < 0 || 00261 !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { 00262 blob->ClearNeighbours(); 00263 #ifndef GRAPHICS_DISABLED 00264 if (win != NULL) 00265 blob->plot(win, ok_color, ok_color); 00266 #endif // GRAPHICS_DISABLED 00267 } else { 00268 if (noise_density_->AnyZeroInRect(box)) { 00269 // There is a danger that the bounding box may overlap real text, so 00270 // we need to render the outline. 00271 Pix* blob_pix = blob->cblob()->render_outline(); 00272 pixRasterop(nontext_mask, box.left(), imageheight - box.top(), 00273 box.width(), box.height(), PIX_SRC | PIX_DST, 00274 blob_pix, 0, 0); 00275 pixDestroy(&blob_pix); 00276 } else { 00277 if (box.area() < gridsize() * gridsize()) { 00278 // It is a really bad idea to make lots of small components in the 00279 // photo mask, so try to join it to a bigger area by expanding the 00280 // box in a way that does not touch any zero noise density cell. 00281 box = AttemptBoxExpansion(box, *noise_density_, gridsize()); 00282 } 00283 // All overlapped cells are non-zero, so just mark the rectangle. 00284 pixRasterop(nontext_mask, box.left(), imageheight - box.top(), 00285 box.width(), box.height(), PIX_SET, NULL, 0, 0); 00286 } 00287 #ifndef GRAPHICS_DISABLED 00288 if (win != NULL) 00289 blob->plot(win, ScrollView::RED, ScrollView::RED); 00290 #endif // GRAPHICS_DISABLED 00291 // It is safe to delete the cblob now, as it isn't used by the grid 00292 // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the 00293 // dead_blobs list. 00294 // TODO(rays) delete the delete when the BLOBNBOX destructor deletes 00295 // the cblob. 00296 delete blob->cblob(); 00297 dead_it.add_to_end(blob_it.extract()); 00298 } 00299 } 00300 } 00301 00302 // Returns true if the given blob overlaps more than max_overlaps blobs 00303 // in the current grid. 00304 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { 00305 // Search the grid to see what intersects it. 00306 // Setup a Rectangle search for overlapping this blob. 00307 BlobGridSearch rsearch(this); 00308 TBOX box = blob->bounding_box(); 00309 rsearch.StartRectSearch(box); 00310 rsearch.SetUniqueMode(true); 00311 BLOBNBOX* neighbour; 00312 int overlap_count = 0; 00313 while (overlap_count <= max_overlaps && 00314 (neighbour = rsearch.NextRectSearch()) != NULL) { 00315 if (box.major_overlap(neighbour->bounding_box())) { 00316 ++overlap_count; 00317 if (overlap_count > max_overlaps) 00318 return true; 00319 } 00320 } 00321 return false; 00322 } 00323 00324 } // namespace tesseract.