tesseract
3.03
|
00001 00002 // File: thresholder.cpp 00003 // Description: Base API for thresolding images in tesseract. 00004 // Author: Ray Smith 00005 // Created: Mon May 12 11:28:15 PDT 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #include "allheaders.h" 00021 00022 #include "thresholder.h" 00023 00024 #include <string.h> 00025 00026 #include "otsuthr.h" 00027 00028 #include "openclwrapper.h" 00029 00030 namespace tesseract { 00031 00032 ImageThresholder::ImageThresholder() 00033 : pix_(NULL), 00034 image_width_(0), image_height_(0), 00035 pix_channels_(0), pix_wpl_(0), 00036 scale_(1), yres_(300), estimated_res_(300) { 00037 SetRectangle(0, 0, 0, 0); 00038 } 00039 00040 ImageThresholder::~ImageThresholder() { 00041 Clear(); 00042 } 00043 00044 // Destroy the Pix if there is one, freeing memory. 00045 void ImageThresholder::Clear() { 00046 pixDestroy(&pix_); 00047 } 00048 00049 // Return true if no image has been set. 00050 bool ImageThresholder::IsEmpty() const { 00051 return pix_ == NULL; 00052 } 00053 00054 // SetImage makes a copy of all the image data, so it may be deleted 00055 // immediately after this call. 00056 // Greyscale of 8 and color of 24 or 32 bits per pixel may be given. 00057 // Palette color images will not work properly and must be converted to 00058 // 24 bit. 00059 // Binary images of 1 bit per pixel may also be given but they must be 00060 // byte packed with the MSB of the first byte being the first pixel, and a 00061 // one pixel is WHITE. For binary images set bytes_per_pixel=0. 00062 void ImageThresholder::SetImage(const unsigned char* imagedata, 00063 int width, int height, 00064 int bytes_per_pixel, int bytes_per_line) { 00065 int bpp = bytes_per_pixel * 8; 00066 if (bpp == 0) bpp = 1; 00067 Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp); 00068 l_uint32* data = pixGetData(pix); 00069 int wpl = pixGetWpl(pix); 00070 switch (bpp) { 00071 case 1: 00072 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { 00073 for (int x = 0; x < width; ++x) { 00074 if (imagedata[x / 8] & (0x80 >> (x % 8))) 00075 CLEAR_DATA_BIT(data, x); 00076 else 00077 SET_DATA_BIT(data, x); 00078 } 00079 } 00080 break; 00081 00082 case 8: 00083 // Greyscale just copies the bytes in the right order. 00084 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) { 00085 for (int x = 0; x < width; ++x) 00086 SET_DATA_BYTE(data, x, imagedata[x]); 00087 } 00088 break; 00089 00090 case 24: 00091 // Put the colors in the correct places in the line buffer. 00092 for (int y = 0; y < height; ++y, imagedata += bytes_per_line) { 00093 for (int x = 0; x < width; ++x, ++data) { 00094 SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]); 00095 SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]); 00096 SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]); 00097 } 00098 } 00099 break; 00100 00101 case 32: 00102 // Maintain byte order consistency across different endianness. 00103 for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) { 00104 for (int x = 0; x < width; ++x) { 00105 data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) | 00106 (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3]; 00107 } 00108 } 00109 break; 00110 00111 default: 00112 tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp); 00113 } 00114 pixSetYRes(pix, 300); 00115 SetImage(pix); 00116 pixDestroy(&pix); 00117 } 00118 00119 // Store the coordinates of the rectangle to process for later use. 00120 // Doesn't actually do any thresholding. 00121 void ImageThresholder::SetRectangle(int left, int top, int width, int height) { 00122 rect_left_ = left; 00123 rect_top_ = top; 00124 rect_width_ = width; 00125 rect_height_ = height; 00126 } 00127 00128 // Get enough parameters to be able to rebuild bounding boxes in the 00129 // original image (not just within the rectangle). 00130 // Left and top are enough with top-down coordinates, but 00131 // the height of the rectangle and the image are needed for bottom-up. 00132 void ImageThresholder::GetImageSizes(int* left, int* top, 00133 int* width, int* height, 00134 int* imagewidth, int* imageheight) { 00135 *left = rect_left_; 00136 *top = rect_top_; 00137 *width = rect_width_; 00138 *height = rect_height_; 00139 *imagewidth = image_width_; 00140 *imageheight = image_height_; 00141 } 00142 00143 // Pix vs raw, which to use? Pix is the preferred input for efficiency, 00144 // since raw buffers are copied. 00145 // SetImage for Pix clones its input, so the source pix may be pixDestroyed 00146 // immediately after, but may not go away until after the Thresholder has 00147 // finished with it. 00148 void ImageThresholder::SetImage(const Pix* pix) { 00149 if (pix_ != NULL) 00150 pixDestroy(&pix_); 00151 Pix* src = const_cast<Pix*>(pix); 00152 int depth; 00153 pixGetDimensions(src, &image_width_, &image_height_, &depth); 00154 // Convert the image as necessary so it is one of binary, plain RGB, or 00155 // 8 bit with no colormap. 00156 if (depth > 1 && depth < 8) { 00157 pix_ = pixConvertTo8(src, false); 00158 } else if (pixGetColormap(src)) { 00159 pix_ = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC); 00160 } else { 00161 pix_ = pixClone(src); 00162 } 00163 depth = pixGetDepth(pix_); 00164 pix_channels_ = depth / 8; 00165 pix_wpl_ = pixGetWpl(pix_); 00166 scale_ = 1; 00167 estimated_res_ = yres_ = pixGetYRes(src); 00168 Init(); 00169 } 00170 00171 // Threshold the source image as efficiently as possible to the output Pix. 00172 // Creates a Pix and sets pix to point to the resulting pointer. 00173 // Caller must use pixDestroy to free the created Pix. 00174 void ImageThresholder::ThresholdToPix(Pix** pix) { 00175 if (pix_channels_ == 0) { 00176 // We have a binary image, so it just has to be cloned. 00177 *pix = GetPixRect(); 00178 } else { 00179 OtsuThresholdRectToPix(pix_, pix); 00180 } 00181 } 00182 00183 // Gets a pix that contains an 8 bit threshold value at each pixel. The 00184 // returned pix may be an integer reduction of the binary image such that 00185 // the scale factor may be inferred from the ratio of the sizes, even down 00186 // to the extreme of a 1x1 pixel thresholds image. 00187 // Ideally the 8 bit threshold should be the exact threshold used to generate 00188 // the binary image in ThresholdToPix, but this is not a hard constraint. 00189 // Returns NULL if the input is binary. PixDestroy after use. 00190 Pix* ImageThresholder::GetPixRectThresholds() { 00191 if (IsBinary()) return NULL; 00192 Pix* pix_grey = GetPixRectGrey(); 00193 int width = pixGetWidth(pix_grey); 00194 int height = pixGetHeight(pix_grey); 00195 int* thresholds; 00196 int* hi_values; 00197 OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values); 00198 pixDestroy(&pix_grey); 00199 Pix* pix_thresholds = pixCreate(width, height, 8); 00200 int threshold = thresholds[0] > 0 ? thresholds[0] : 128; 00201 pixSetAllArbitrary(pix_thresholds, threshold); 00202 delete [] thresholds; 00203 delete [] hi_values; 00204 return pix_thresholds; 00205 } 00206 00207 // Common initialization shared between SetImage methods. 00208 void ImageThresholder::Init() { 00209 SetRectangle(0, 0, image_width_, image_height_); 00210 } 00211 00212 // Get a clone/copy of the source image rectangle. 00213 // The returned Pix must be pixDestroyed. 00214 // This function will be used in the future by the page layout analysis, and 00215 // the layout analysis that uses it will only be available with Leptonica, 00216 // so there is no raw equivalent. 00217 Pix* ImageThresholder::GetPixRect() { 00218 if (IsFullImage()) { 00219 // Just clone the whole thing. 00220 return pixClone(pix_); 00221 } else { 00222 // Crop to the given rectangle. 00223 Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_); 00224 Pix* cropped = pixClipRectangle(pix_, box, NULL); 00225 boxDestroy(&box); 00226 return cropped; 00227 } 00228 } 00229 00230 // Get a clone/copy of the source image rectangle, reduced to greyscale, 00231 // and at the same resolution as the output binary. 00232 // The returned Pix must be pixDestroyed. 00233 // Provided to the classifier to extract features from the greyscale image. 00234 Pix* ImageThresholder::GetPixRectGrey() { 00235 Pix* pix = GetPixRect(); // May have to be reduced to grey. 00236 int depth = pixGetDepth(pix); 00237 if (depth != 8) { 00238 Pix* result = depth < 8 ? pixConvertTo8(pix, false) 00239 : pixConvertRGBToLuminance(pix); 00240 pixDestroy(&pix); 00241 return result; 00242 } 00243 return pix; 00244 } 00245 00246 // Otsu thresholds the rectangle, taking the rectangle from *this. 00247 void ImageThresholder::OtsuThresholdRectToPix(Pix* src_pix, 00248 Pix** out_pix) const { 00249 PERF_COUNT_START("OtsuThresholdRectToPix") 00250 int* thresholds; 00251 int* hi_values; 00252 00253 int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, 00254 rect_height_, &thresholds, &hi_values); 00255 // only use opencl if compiled w/ OpenCL and selected device is opencl 00256 #ifdef USE_OPENCL 00257 OpenclDevice od; 00258 if ((num_channels == 4 || num_channels == 1) && 00259 od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0 ) { 00260 od.ThresholdRectToPixOCL((const unsigned char*)pixGetData(src_pix), 00261 num_channels, pixGetWpl(src_pix) * 4, 00262 thresholds, hi_values, out_pix /*pix_OCL*/, 00263 rect_height_, rect_width_, rect_top_, rect_left_); 00264 } else { 00265 #endif 00266 ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix); 00267 #ifdef USE_OPENCL 00268 } 00269 #endif 00270 delete [] thresholds; 00271 delete [] hi_values; 00272 00273 PERF_COUNT_END 00274 } 00275 00279 // arrays and also the bytes per pixel in src_pix. 00280 void ImageThresholder::ThresholdRectToPix(Pix* src_pix, 00281 int num_channels, 00282 const int* thresholds, 00283 const int* hi_values, 00284 Pix** pix) const { 00285 PERF_COUNT_START("ThresholdRectToPix") 00286 *pix = pixCreate(rect_width_, rect_height_, 1); 00287 uinT32* pixdata = pixGetData(*pix); 00288 int wpl = pixGetWpl(*pix); 00289 int src_wpl = pixGetWpl(src_pix); 00290 uinT32* srcdata = pixGetData(src_pix); 00291 for (int y = 0; y < rect_height_; ++y) { 00292 const uinT32* linedata = srcdata + (y + rect_top_) * src_wpl; 00293 uinT32* pixline = pixdata + y * wpl; 00294 for (int x = 0; x < rect_width_; ++x) { 00295 bool white_result = true; 00296 for (int ch = 0; ch < num_channels; ++ch) { 00297 int pixel = GET_DATA_BYTE(const_cast<void*>( 00298 reinterpret_cast<const void *>(linedata)), 00299 (x + rect_left_) * num_channels + ch); 00300 if (hi_values[ch] >= 0 && 00301 (pixel > thresholds[ch]) == (hi_values[ch] == 0)) { 00302 white_result = false; 00303 break; 00304 } 00305 } 00306 if (white_result) 00307 CLEAR_DATA_BIT(pixline, x); 00308 else 00309 SET_DATA_BIT(pixline, x); 00310 } 00311 } 00312 00313 PERF_COUNT_END 00314 } 00315 00316 } // namespace tesseract. 00317