tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/ccnontextdetect.cpp
Go to the documentation of this file.
00001 
00002 // File:        ccnontextdetect.cpp
00003 // Description: Connected-Component-based photo (non-text) detection.
00004 // Copyright 2011 Google Inc. All Rights Reserved.
00005 // Author: rays@google.com (Ray Smith)
00006 // Created:     Sat Jun 11 10:12:01 PST 2011
00007 //
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifdef HAVE_CONFIG_H
00021 #include "config_auto.h"
00022 #endif
00023 
00024 #include "ccnontextdetect.h"
00025 #include "imagefind.h"
00026 #include "strokewidth.h"
00027 
00028 namespace tesseract {
00029 
00030 // Max number of neighbour small objects per squared gridsize before a grid
00031 // cell becomes image.
00032 const double kMaxSmallNeighboursPerPix = 1.0 / 32;
00033 // Max number of small blobs a large blob may overlap before it is rejected
00034 // and determined to be image.
00035 const int kMaxLargeOverlapsWithSmall = 3;
00036 // Max number of small blobs a medium blob may overlap before it is rejected
00037 // and determined to be image. Larger than for large blobs as medium blobs
00038 // may be complex Chinese characters. Very large Chinese characters are going
00039 // to overlap more medium blobs than small.
00040 const int kMaxMediumOverlapsWithSmall = 12;
00041 // Max number of normal blobs a large blob may overlap before it is rejected
00042 // and determined to be image. This is set higher to allow for drop caps, which
00043 // may overlap a lot of good text blobs.
00044 const int kMaxLargeOverlapsWithMedium = 12;
00045 // Multiplier of original noise_count used to test for the case of spreading
00046 // noise beyond where it should really be.
00047 const int kOriginalNoiseMultiple = 8;
00048 // Pixel padding for noise blobs when rendering on the image
00049 // mask to encourage them to join together. Make it too big and images
00050 // will fatten out too much and have to be clipped to text.
00051 const int kNoisePadding = 4;
00052 // Fraction of max_noise_count_ to be added to the noise count if there is
00053 // photo mask in the background.
00054 const double kPhotoOffsetFraction = 0.375;
00055 // Min ratio of perimeter^2/16area for a "good" blob in estimating noise
00056 // density. Good blobs are supposed to be highly likely real text.
00057 // We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
00058 // of 16. Digital circles are weird and have a minimum ratio of pi/64, not
00059 // the 1/(4pi) that you would expect.
00060 const double kMinGoodTextPARatio = 1.5;
00061 
00062 CCNonTextDetect::CCNonTextDetect(int gridsize,
00063                              const ICOORD& bleft, const ICOORD& tright)
00064   : BlobGrid(gridsize, bleft, tright),
00065     max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
00066                                       gridsize * gridsize)),
00067     noise_density_(NULL) {
00068   // TODO(rays) break max_noise_count_ out into an area-proportional
00069   // value, as now plus an additive constant for the number of text blobs
00070   // in the 3x3 neigbourhood - maybe 9.
00071 }
00072 
00073 CCNonTextDetect::~CCNonTextDetect() {
00074   delete noise_density_;
00075 }
00076 
00077 // Creates and returns a Pix with the same resolution as the original
00078 // in which 1 (black) pixels represent likely non text (photo, line drawing)
00079 // areas of the page, deleting from the blob_block the blobs that were
00080 // determined to be non-text.
00081 // The photo_map is used to bias the decision towards non-text, rather than
00082 // supplying definite decision.
00083 // The blob_block is the usual result of connected component analysis,
00084 // holding the detected blobs.
00085 // The returned Pix should be PixDestroyed after use.
00086 Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
00087                                          TO_BLOCK* blob_block) {
00088   // Insert the smallest blobs into the grid.
00089   InsertBlobList(&blob_block->small_blobs);
00090   InsertBlobList(&blob_block->noise_blobs);
00091   // Add the medium blobs that don't have a good strokewidth neighbour.
00092   // Those that do go into good_grid as an antidote to spreading beyond the
00093   // real reaches of a noise region.
00094   BlobGrid good_grid(gridsize(), bleft(), tright());
00095   BLOBNBOX_IT blob_it(&blob_block->blobs);
00096   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00097     BLOBNBOX* blob = blob_it.data();
00098     double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
00099     perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
00100     if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
00101       InsertBBox(true, true, blob);
00102     else
00103       good_grid.InsertBBox(true, true, blob);
00104   }
00105   noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
00106   good_grid.Clear();  // Not needed any more.
00107   Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
00108   if (debug) {
00109     pixWrite("junknoisemask.png", pix, IFF_PNG);
00110   }
00111   ScrollView* win = NULL;
00112   #ifndef GRAPHICS_DISABLED
00113   if (debug) {
00114     win = MakeWindow(0, 400, "Photo Mask Blobs");
00115   }
00116   #endif  // GRAPHICS_DISABLED
00117   // Large and medium blobs are not text if they overlap with "a lot" of small
00118   // blobs.
00119   MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
00120                             kMaxLargeOverlapsWithSmall,
00121                             win, ScrollView::DARK_GREEN, pix);
00122   MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
00123                           win, ScrollView::WHITE, pix);
00124   // Clear the grid of small blobs and insert the medium blobs.
00125   Clear();
00126   InsertBlobList(&blob_block->blobs);
00127   MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
00128                             kMaxLargeOverlapsWithMedium,
00129                             win, ScrollView::DARK_GREEN, pix);
00130   // Clear again before we start deleting the blobs in the grid.
00131   Clear();
00132   MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
00133                             win, ScrollView::CORAL, pix);
00134   MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
00135                             win, ScrollView::GOLDENROD, pix);
00136   MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
00137                             win, ScrollView::WHITE, pix);
00138   if (debug) {
00139     #ifndef GRAPHICS_DISABLED
00140     win->Update();
00141     #endif  // GRAPHICS_DISABLED
00142     pixWrite("junkccphotomask.png", pix, IFF_PNG);
00143     #ifndef GRAPHICS_DISABLED
00144     delete win->AwaitEvent(SVET_DESTROY);
00145     delete win;
00146     #endif  // GRAPHICS_DISABLED
00147   }
00148   return pix;
00149 }
00150 
00151 // Computes and returns the noise_density IntGrid, at the same gridsize as
00152 // this by summing the number of small elements in a 3x3 neighbourhood of
00153 // each grid cell. good_grid is filled with blobs that are considered most
00154 // likely good text, and this is filled with small and medium blobs that are
00155 // more likely non-text.
00156 // The photo_map is used to bias the decision towards non-text, rather than
00157 // supplying definite decision.
00158 IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
00159                                               BlobGrid* good_grid) {
00160   IntGrid* noise_counts = CountCellElements();
00161   IntGrid* noise_density = noise_counts->NeighbourhoodSum();
00162   IntGrid* good_counts = good_grid->CountCellElements();
00163   // Now increase noise density in photo areas, to bias the decision and
00164   // minimize hallucinated text on image, but trim the noise_density where
00165   // there are good blobs and the original count is low in non-photo areas,
00166   // indicating that most of the result came from neighbouring cells.
00167   int height = pixGetHeight(photo_map);
00168   int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
00169   for (int y = 0; y < gridheight(); ++y) {
00170     for (int x = 0; x < gridwidth(); ++x) {
00171       int noise = noise_density->GridCellValue(x, y);
00172       if (max_noise_count_ < noise + photo_offset &&
00173           noise <= max_noise_count_) {
00174         // Test for photo.
00175         int left = x * gridsize();
00176         int right = left + gridsize();
00177         int bottom = height - y * gridsize();
00178         int top = bottom - gridsize();
00179         if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
00180                                         &bottom)) {
00181           noise_density->SetGridCell(x, y, noise + photo_offset);
00182         }
00183       }
00184       if (debug && noise > max_noise_count_ &&
00185           good_counts->GridCellValue(x, y) > 0) {
00186         tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
00187                 x * gridsize(), y * gridsize(),
00188                 noise_density->GridCellValue(x, y),
00189                 good_counts->GridCellValue(x, y),
00190                 noise_counts->GridCellValue(x, y), max_noise_count_);
00191       }
00192       if (noise > max_noise_count_ &&
00193           good_counts->GridCellValue(x, y) > 0 &&
00194           noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
00195               max_noise_count_) {
00196         noise_density->SetGridCell(x, y, 0);
00197       }
00198     }
00199   }
00200   delete noise_counts;
00201   delete good_counts;
00202   return noise_density;
00203 }
00204 
00205 // Helper to expand a box in one of the 4 directions by the given pad,
00206 // provided it does not expand into any cell with a zero noise density.
00207 // If that is not possible, try expanding all round by a small constant.
00208 static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
00209                                 int pad) {
00210   TBOX expanded_box(box);
00211   expanded_box.set_right(box.right() + pad);
00212   if (!noise_density.AnyZeroInRect(expanded_box))
00213     return expanded_box;
00214   expanded_box = box;
00215   expanded_box.set_left(box.left() - pad);
00216   if (!noise_density.AnyZeroInRect(expanded_box))
00217     return expanded_box;
00218   expanded_box = box;
00219   expanded_box.set_top(box.top() + pad);
00220   if (!noise_density.AnyZeroInRect(expanded_box))
00221     return expanded_box;
00222   expanded_box = box;
00223   expanded_box.set_bottom(box.bottom() + pad);
00224   if (!noise_density.AnyZeroInRect(expanded_box))
00225     return expanded_box;
00226   expanded_box = box;
00227   expanded_box.pad(kNoisePadding, kNoisePadding);
00228   if (!noise_density.AnyZeroInRect(expanded_box))
00229     return expanded_box;
00230   return box;
00231 }
00232 
00233 // Tests each blob in the list to see if it is certain non-text using 2
00234 // conditions:
00235 // 1. blob overlaps a cell with high value in noise_density_ (previously set
00236 // by ComputeNoiseDensity).
00237 // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
00238 // condition is disabled with max_blob_overlaps == -1.
00239 // If it does, the blob is declared non-text, and is used to mark up the
00240 // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
00241 // neighbours reset, as they may now point to deleted data.
00242 // WARNING: The blobs list blobs may be in the *this grid, but they are
00243 // not removed. If any deleted blobs might be in *this, then this must be
00244 // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
00245 // If the win is not NULL, deleted blobs are drawn on it in red, and kept
00246 // blobs are drawn on it in ok_color.
00247 void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
00248                                                 int max_blob_overlaps,
00249                                                 ScrollView* win,
00250                                                 ScrollView::Color ok_color,
00251                                                 Pix* nontext_mask) {
00252   int imageheight = tright().y() - bleft().x();
00253   BLOBNBOX_IT blob_it(blobs);
00254   BLOBNBOX_LIST dead_blobs;
00255   BLOBNBOX_IT dead_it(&dead_blobs);
00256   for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
00257     BLOBNBOX* blob = blob_it.data();
00258     TBOX box = blob->bounding_box();
00259     if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
00260         (max_blob_overlaps < 0 ||
00261             !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
00262       blob->ClearNeighbours();
00263       #ifndef GRAPHICS_DISABLED
00264       if (win != NULL)
00265         blob->plot(win, ok_color, ok_color);
00266       #endif  // GRAPHICS_DISABLED
00267     } else {
00268       if (noise_density_->AnyZeroInRect(box)) {
00269         // There is a danger that the bounding box may overlap real text, so
00270         // we need to render the outline.
00271         Pix* blob_pix = blob->cblob()->render_outline();
00272         pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
00273                     box.width(), box.height(), PIX_SRC | PIX_DST,
00274                     blob_pix, 0, 0);
00275         pixDestroy(&blob_pix);
00276       } else {
00277         if (box.area() < gridsize() * gridsize()) {
00278           // It is a really bad idea to make lots of small components in the
00279           // photo mask, so try to join it to a bigger area by expanding the
00280           // box in a way that does not touch any zero noise density cell.
00281           box = AttemptBoxExpansion(box, *noise_density_, gridsize());
00282         }
00283         // All overlapped cells are non-zero, so just mark the rectangle.
00284         pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
00285                     box.width(), box.height(), PIX_SET, NULL, 0, 0);
00286       }
00287       #ifndef GRAPHICS_DISABLED
00288       if (win != NULL)
00289         blob->plot(win, ScrollView::RED, ScrollView::RED);
00290       #endif  // GRAPHICS_DISABLED
00291       // It is safe to delete the cblob now, as it isn't used by the grid
00292       // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
00293       // dead_blobs list.
00294       // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
00295       // the cblob.
00296       delete blob->cblob();
00297       dead_it.add_to_end(blob_it.extract());
00298     }
00299   }
00300 }
00301 
00302 // Returns true if the given blob overlaps more than max_overlaps blobs
00303 // in the current grid.
00304 bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
00305   // Search the grid to see what intersects it.
00306   // Setup a Rectangle search for overlapping this blob.
00307   BlobGridSearch rsearch(this);
00308   TBOX box = blob->bounding_box();
00309   rsearch.StartRectSearch(box);
00310   rsearch.SetUniqueMode(true);
00311   BLOBNBOX* neighbour;
00312   int overlap_count = 0;
00313   while (overlap_count <= max_overlaps &&
00314          (neighbour = rsearch.NextRectSearch()) != NULL) {
00315     if (box.major_overlap(neighbour->bounding_box())) {
00316       ++overlap_count;
00317       if (overlap_count > max_overlaps)
00318         return true;
00319     }
00320   }
00321   return false;
00322 }
00323 
00324 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines