tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/thresholder.cpp
Go to the documentation of this file.
00001 
00002 // File:        thresholder.cpp
00003 // Description: Base API for thresolding images in tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Mon May 12 11:28:15 PDT 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "allheaders.h"
00021 
00022 #include "thresholder.h"
00023 
00024 #include <string.h>
00025 
00026 #include "otsuthr.h"
00027 
00028 #include "openclwrapper.h"
00029 
00030 namespace tesseract {
00031 
00032 ImageThresholder::ImageThresholder()
00033   : pix_(NULL),
00034     image_width_(0), image_height_(0),
00035     pix_channels_(0), pix_wpl_(0),
00036     scale_(1), yres_(300), estimated_res_(300) {
00037   SetRectangle(0, 0, 0, 0);
00038 }
00039 
00040 ImageThresholder::~ImageThresholder() {
00041   Clear();
00042 }
00043 
00044 // Destroy the Pix if there is one, freeing memory.
00045 void ImageThresholder::Clear() {
00046   pixDestroy(&pix_);
00047 }
00048 
00049 // Return true if no image has been set.
00050 bool ImageThresholder::IsEmpty() const {
00051   return pix_ == NULL;
00052 }
00053 
00054 // SetImage makes a copy of all the image data, so it may be deleted
00055 // immediately after this call.
00056 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
00057 // Palette color images will not work properly and must be converted to
00058 // 24 bit.
00059 // Binary images of 1 bit per pixel may also be given but they must be
00060 // byte packed with the MSB of the first byte being the first pixel, and a
00061 // one pixel is WHITE. For binary images set bytes_per_pixel=0.
00062 void ImageThresholder::SetImage(const unsigned char* imagedata,
00063                                 int width, int height,
00064                                 int bytes_per_pixel, int bytes_per_line) {
00065   int bpp = bytes_per_pixel * 8;
00066   if (bpp == 0) bpp = 1;
00067   Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
00068   l_uint32* data = pixGetData(pix);
00069   int wpl = pixGetWpl(pix);
00070   switch (bpp) {
00071   case 1:
00072     for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
00073       for (int x = 0; x < width; ++x) {
00074         if (imagedata[x / 8] & (0x80 >> (x % 8)))
00075           CLEAR_DATA_BIT(data, x);
00076         else
00077           SET_DATA_BIT(data, x);
00078       }
00079     }
00080     break;
00081 
00082   case 8:
00083     // Greyscale just copies the bytes in the right order.
00084     for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
00085       for (int x = 0; x < width; ++x)
00086         SET_DATA_BYTE(data, x, imagedata[x]);
00087     }
00088     break;
00089 
00090   case 24:
00091     // Put the colors in the correct places in the line buffer.
00092     for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
00093       for (int x = 0; x < width; ++x, ++data) {
00094         SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
00095         SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
00096         SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
00097       }
00098     }
00099     break;
00100 
00101   case 32:
00102     // Maintain byte order consistency across different endianness.
00103     for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
00104       for (int x = 0; x < width; ++x) {
00105         data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
00106                   (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
00107       }
00108     }
00109     break;
00110 
00111   default:
00112     tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
00113   }
00114   pixSetYRes(pix, 300);
00115   SetImage(pix);
00116   pixDestroy(&pix);
00117 }
00118 
00119 // Store the coordinates of the rectangle to process for later use.
00120 // Doesn't actually do any thresholding.
00121 void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
00122   rect_left_ = left;
00123   rect_top_ = top;
00124   rect_width_ = width;
00125   rect_height_ = height;
00126 }
00127 
00128 // Get enough parameters to be able to rebuild bounding boxes in the
00129 // original image (not just within the rectangle).
00130 // Left and top are enough with top-down coordinates, but
00131 // the height of the rectangle and the image are needed for bottom-up.
00132 void ImageThresholder::GetImageSizes(int* left, int* top,
00133                                      int* width, int* height,
00134                                      int* imagewidth, int* imageheight) {
00135   *left = rect_left_;
00136   *top = rect_top_;
00137   *width = rect_width_;
00138   *height = rect_height_;
00139   *imagewidth = image_width_;
00140   *imageheight = image_height_;
00141 }
00142 
00143 // Pix vs raw, which to use? Pix is the preferred input for efficiency,
00144 // since raw buffers are copied.
00145 // SetImage for Pix clones its input, so the source pix may be pixDestroyed
00146 // immediately after, but may not go away until after the Thresholder has
00147 // finished with it.
00148 void ImageThresholder::SetImage(const Pix* pix) {
00149   if (pix_ != NULL)
00150     pixDestroy(&pix_);
00151   Pix* src = const_cast<Pix*>(pix);
00152   int depth;
00153   pixGetDimensions(src, &image_width_, &image_height_, &depth);
00154   // Convert the image as necessary so it is one of binary, plain RGB, or
00155   // 8 bit with no colormap.
00156   if (depth > 1 && depth < 8) {
00157     pix_ = pixConvertTo8(src, false);
00158   } else if (pixGetColormap(src)) {
00159     pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
00160   } else {
00161     pix_ = pixClone(src);
00162   }
00163   depth = pixGetDepth(pix_);
00164   pix_channels_ = depth / 8;
00165   pix_wpl_ = pixGetWpl(pix_);
00166   scale_ = 1;
00167   estimated_res_ = yres_ = pixGetYRes(src);
00168   Init();
00169 }
00170 
00171 // Threshold the source image as efficiently as possible to the output Pix.
00172 // Creates a Pix and sets pix to point to the resulting pointer.
00173 // Caller must use pixDestroy to free the created Pix.
00174 void ImageThresholder::ThresholdToPix(Pix** pix) {
00175   if (pix_channels_ == 0) {
00176     // We have a binary image, so it just has to be cloned.
00177     *pix = GetPixRect();
00178   } else {
00179     OtsuThresholdRectToPix(pix_, pix);
00180   }
00181 }
00182 
00183 // Gets a pix that contains an 8 bit threshold value at each pixel. The
00184 // returned pix may be an integer reduction of the binary image such that
00185 // the scale factor may be inferred from the ratio of the sizes, even down
00186 // to the extreme of a 1x1 pixel thresholds image.
00187 // Ideally the 8 bit threshold should be the exact threshold used to generate
00188 // the binary image in ThresholdToPix, but this is not a hard constraint.
00189 // Returns NULL if the input is binary. PixDestroy after use.
00190 Pix* ImageThresholder::GetPixRectThresholds() {
00191   if (IsBinary()) return NULL;
00192   Pix* pix_grey = GetPixRectGrey();
00193   int width = pixGetWidth(pix_grey);
00194   int height = pixGetHeight(pix_grey);
00195   int* thresholds;
00196   int* hi_values;
00197   OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
00198   pixDestroy(&pix_grey);
00199   Pix* pix_thresholds = pixCreate(width, height, 8);
00200   int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
00201   pixSetAllArbitrary(pix_thresholds, threshold);
00202   delete [] thresholds;
00203   delete [] hi_values;
00204   return pix_thresholds;
00205 }
00206 
00207 // Common initialization shared between SetImage methods.
00208 void ImageThresholder::Init() {
00209   SetRectangle(0, 0, image_width_, image_height_);
00210 }
00211 
00212 // Get a clone/copy of the source image rectangle.
00213 // The returned Pix must be pixDestroyed.
00214 // This function will be used in the future by the page layout analysis, and
00215 // the layout analysis that uses it will only be available with Leptonica,
00216 // so there is no raw equivalent.
00217 Pix* ImageThresholder::GetPixRect() {
00218   if (IsFullImage()) {
00219     // Just clone the whole thing.
00220     return pixClone(pix_);
00221   } else {
00222     // Crop to the given rectangle.
00223     Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
00224     Pix* cropped = pixClipRectangle(pix_, box, NULL);
00225     boxDestroy(&box);
00226     return cropped;
00227   }
00228 }
00229 
00230 // Get a clone/copy of the source image rectangle, reduced to greyscale,
00231 // and at the same resolution as the output binary.
00232 // The returned Pix must be pixDestroyed.
00233 // Provided to the classifier to extract features from the greyscale image.
00234 Pix* ImageThresholder::GetPixRectGrey() {
00235   Pix* pix = GetPixRect();  // May have to be reduced to grey.
00236   int depth = pixGetDepth(pix);
00237   if (depth != 8) {
00238     Pix* result = depth < 8 ? pixConvertTo8(pix, false)
00239                             : pixConvertRGBToLuminance(pix);
00240     pixDestroy(&pix);
00241     return result;
00242   }
00243   return pix;
00244 }
00245 
00246 // Otsu thresholds the rectangle, taking the rectangle from *this.
00247 void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix,
00248                                               Pix** out_pix) const {
00249   PERF_COUNT_START("OtsuThresholdRectToPix")
00250   int* thresholds;
00251   int* hi_values;
00252 
00253   int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
00254                                    rect_height_, &thresholds, &hi_values);
00255   // only use opencl if compiled w/ OpenCL and selected device is opencl
00256 #ifdef USE_OPENCL
00257   OpenclDevice od;
00258   if ((num_channels == 4 || num_channels == 1) &&
00259       od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) {
00260     od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix),
00261                              num_channels, pixGetWpl(src_pix) * 4,
00262                              thresholds, hi_values, out_pix /*pix_OCL*/,
00263                              rect_height_, rect_width_, rect_top_, rect_left_);
00264   } else {
00265 #endif
00266     ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
00267 #ifdef USE_OPENCL
00268   }
00269 #endif
00270   delete [] thresholds;
00271   delete [] hi_values;
00272 
00273   PERF_COUNT_END
00274 }
00275 
00279 // arrays and also the bytes per pixel in src_pix.
00280 void ImageThresholder::ThresholdRectToPix(Pix* src_pix,
00281                                           int num_channels,
00282                                           const int* thresholds,
00283                                           const int* hi_values,
00284                                           Pix** pix) const {
00285   PERF_COUNT_START("ThresholdRectToPix")
00286   *pix = pixCreate(rect_width_, rect_height_, 1);
00287   uinT32* pixdata = pixGetData(*pix);
00288   int wpl = pixGetWpl(*pix);
00289   int src_wpl = pixGetWpl(src_pix);
00290   uinT32* srcdata = pixGetData(src_pix);
00291   for (int y = 0; y < rect_height_; ++y) {
00292     const uinT32* linedata = srcdata + (y + rect_top_) * src_wpl;
00293     uinT32* pixline = pixdata + y * wpl;
00294     for (int x = 0; x < rect_width_; ++x) {
00295       bool white_result = true;
00296       for (int ch = 0; ch < num_channels; ++ch) {
00297         int pixel = GET_DATA_BYTE(const_cast<void*>(
00298                                   reinterpret_cast<const void *>(linedata)),
00299                                   (x + rect_left_) * num_channels + ch);
00300         if (hi_values[ch] >= 0 &&
00301             (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
00302           white_result = false;
00303           break;
00304         }
00305       }
00306       if (white_result)
00307         CLEAR_DATA_BIT(pixline, x);
00308       else
00309         SET_DATA_BIT(pixline, x);
00310     }
00311   }
00312 
00313   PERF_COUNT_END
00314 }
00315 
00316 }  // namespace tesseract.
00317 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines