tesseract
3.03
|
00001 00002 // File: thresholder.h 00003 // Description: Base API for thresolding images in tesseract. 00004 // Author: Ray Smith 00005 // Created: Mon May 12 11:00:15 PDT 2008 00006 // 00007 // (C) Copyright 2008, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CCMAIN_THRESHOLDER_H__ 00021 #define TESSERACT_CCMAIN_THRESHOLDER_H__ 00022 00023 #include "platform.h" 00024 00025 class IMAGE; 00026 struct Pix; 00027 00028 namespace tesseract { 00029 00036 class TESS_API ImageThresholder { 00037 public: 00038 ImageThresholder(); 00039 virtual ~ImageThresholder(); 00040 00042 virtual void Clear(); 00043 00045 bool IsEmpty() const; 00046 00055 void SetImage(const unsigned char* imagedata, int width, int height, 00056 int bytes_per_pixel, int bytes_per_line); 00057 00060 void SetRectangle(int left, int top, int width, int height); 00061 00066 virtual void GetImageSizes(int* left, int* top, int* width, int* height, 00067 int* imagewidth, int* imageheight); 00068 00070 bool IsColor() const { 00071 return pix_channels_ >= 3; 00072 } 00073 00075 bool IsBinary() const { 00076 return pix_channels_ == 0; 00077 } 00078 00079 int GetScaleFactor() const { 00080 return scale_; 00081 } 00082 00083 // Set the resolution of the source image in pixels per inch. 00084 // This should be called right after SetImage(), and will let us return 00085 // appropriate font sizes for the text. 00086 void SetSourceYResolution(int ppi) { 00087 yres_ = ppi; 00088 estimated_res_ = ppi; 00089 } 00090 int GetSourceYResolution() const { 00091 return yres_; 00092 } 00093 int GetScaledYResolution() const { 00094 return scale_ * yres_; 00095 } 00096 // Set the resolution of the source image in pixels per inch, as estimated 00097 // by the thresholder from the text size found during thresholding. 00098 // This value will be used to set internal size thresholds during recognition 00099 // and will not influence the output "point size." The default value is 00100 // the same as the source resolution. (yres_) 00101 void SetEstimatedResolution(int ppi) { 00102 estimated_res_ = ppi; 00103 } 00104 // Returns the estimated resolution, including any active scaling. 00105 // This value will be used to set internal size thresholds during recognition. 00106 int GetScaledEstimatedResolution() const { 00107 return scale_ * estimated_res_; 00108 } 00109 00115 void SetImage(const Pix* pix); 00116 00120 virtual void ThresholdToPix(Pix** pix); 00121 00122 // Gets a pix that contains an 8 bit threshold value at each pixel. The 00123 // returned pix may be an integer reduction of the binary image such that 00124 // the scale factor may be inferred from the ratio of the sizes, even down 00125 // to the extreme of a 1x1 pixel thresholds image. 00126 // Ideally the 8 bit threshold should be the exact threshold used to generate 00127 // the binary image in ThresholdToPix, but this is not a hard constraint. 00128 // Returns NULL if the input is binary. PixDestroy after use. 00129 virtual Pix* GetPixRectThresholds(); 00130 00136 Pix* GetPixRect(); 00137 00138 // Get a clone/copy of the source image rectangle, reduced to greyscale, 00139 // and at the same resolution as the output binary. 00140 // The returned Pix must be pixDestroyed. 00141 // Provided to the classifier to extract features from the greyscale image. 00142 virtual Pix* GetPixRectGrey(); 00143 00144 protected: 00145 // ---------------------------------------------------------------------- 00146 // Utility functions that may be useful components for other thresholders. 00147 00149 virtual void Init(); 00150 00152 bool IsFullImage() const { 00153 return rect_left_ == 0 && rect_top_ == 0 && 00154 rect_width_ == image_width_ && rect_height_ == image_height_; 00155 } 00156 00157 // Otsu thresholds the rectangle, taking the rectangle from *this. 00158 void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const; 00159 00163 // arrays and also the bytes per pixel in src_pix. 00164 void ThresholdRectToPix(Pix* src_pix, int num_channels, 00165 const int* thresholds, const int* hi_values, 00166 Pix** pix) const; 00167 00168 protected: 00171 Pix* pix_; 00172 00173 int image_width_; //< Width of source pix_. 00174 int image_height_; //< Height of source pix_. 00175 int pix_channels_; //< Number of 8-bit channels in pix_. 00176 int pix_wpl_; //< Words per line of pix_. 00177 // Limits of image rectangle to be processed. 00178 int scale_; //< Scale factor from original image. 00179 int yres_; //< y pixels/inch in source image. 00180 int estimated_res_; //< Resolution estimate from text size. 00181 int rect_left_; 00182 int rect_top_; 00183 int rect_width_; 00184 int rect_height_; 00185 }; 00186 00187 } // namespace tesseract. 00188 00189 #endif // TESSERACT_CCMAIN_THRESHOLDER_H__