tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/thresholder.h
Go to the documentation of this file.
00001 
00002 // File:        thresholder.h
00003 // Description: Base API for thresolding images in tesseract.
00004 // Author:      Ray Smith
00005 // Created:     Mon May 12 11:00:15 PDT 2008
00006 //
00007 // (C) Copyright 2008, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CCMAIN_THRESHOLDER_H__
00021 #define TESSERACT_CCMAIN_THRESHOLDER_H__
00022 
00023 #include          "platform.h"
00024 
00025 class IMAGE;
00026 struct Pix;
00027 
00028 namespace tesseract {
00029 
00036 class TESS_API ImageThresholder {
00037  public:
00038   ImageThresholder();
00039   virtual ~ImageThresholder();
00040 
00042   virtual void Clear();
00043 
00045   bool IsEmpty() const;
00046 
00055   void SetImage(const unsigned char* imagedata, int width, int height,
00056                 int bytes_per_pixel, int bytes_per_line);
00057 
00060   void SetRectangle(int left, int top, int width, int height);
00061 
00066   virtual void GetImageSizes(int* left, int* top, int* width, int* height,
00067                              int* imagewidth, int* imageheight);
00068 
00070   bool IsColor() const {
00071     return pix_channels_ >= 3;
00072   }
00073 
00075   bool IsBinary() const {
00076     return pix_channels_ == 0;
00077   }
00078 
00079   int GetScaleFactor() const {
00080     return scale_;
00081   }
00082 
00083   // Set the resolution of the source image in pixels per inch.
00084   // This should be called right after SetImage(), and will let us return
00085   // appropriate font sizes for the text.
00086   void SetSourceYResolution(int ppi) {
00087     yres_ = ppi;
00088     estimated_res_ = ppi;
00089   }
00090   int GetSourceYResolution() const {
00091     return yres_;
00092   }
00093   int GetScaledYResolution() const {
00094     return scale_ * yres_;
00095   }
00096   // Set the resolution of the source image in pixels per inch, as estimated
00097   // by the thresholder from the text size found during thresholding.
00098   // This value will be used to set internal size thresholds during recognition
00099   // and will not influence the output "point size." The default value is
00100   // the same as the source resolution. (yres_)
00101   void SetEstimatedResolution(int ppi) {
00102     estimated_res_ = ppi;
00103   }
00104   // Returns the estimated resolution, including any active scaling.
00105   // This value will be used to set internal size thresholds during recognition.
00106   int GetScaledEstimatedResolution() const {
00107     return scale_ * estimated_res_;
00108   }
00109 
00115   void SetImage(const Pix* pix);
00116 
00120   virtual void ThresholdToPix(Pix** pix);
00121 
00122   // Gets a pix that contains an 8 bit threshold value at each pixel. The
00123   // returned pix may be an integer reduction of the binary image such that
00124   // the scale factor may be inferred from the ratio of the sizes, even down
00125   // to the extreme of a 1x1 pixel thresholds image.
00126   // Ideally the 8 bit threshold should be the exact threshold used to generate
00127   // the binary image in ThresholdToPix, but this is not a hard constraint.
00128   // Returns NULL if the input is binary. PixDestroy after use.
00129   virtual Pix* GetPixRectThresholds();
00130 
00136   Pix* GetPixRect();
00137 
00138   // Get a clone/copy of the source image rectangle, reduced to greyscale,
00139   // and at the same resolution as the output binary.
00140   // The returned Pix must be pixDestroyed.
00141   // Provided to the classifier to extract features from the greyscale image.
00142   virtual Pix* GetPixRectGrey();
00143 
00144  protected:
00145   // ----------------------------------------------------------------------
00146   // Utility functions that may be useful components for other thresholders.
00147 
00149   virtual void Init();
00150 
00152   bool IsFullImage() const {
00153     return rect_left_ == 0 && rect_top_ == 0 &&
00154            rect_width_ == image_width_ && rect_height_ == image_height_;
00155   }
00156 
00157   // Otsu thresholds the rectangle, taking the rectangle from *this.
00158   void OtsuThresholdRectToPix(Pix* src_pix, Pix** out_pix) const;
00159 
00163   // arrays and also the bytes per pixel in src_pix.
00164   void ThresholdRectToPix(Pix* src_pix, int num_channels,
00165                           const int* thresholds, const int* hi_values,
00166                           Pix** pix) const;
00167 
00168  protected:
00171   Pix*                 pix_;
00172 
00173   int                  image_width_;    //< Width of source pix_.
00174   int                  image_height_;   //< Height of source pix_.
00175   int                  pix_channels_;   //< Number of 8-bit channels in pix_.
00176   int                  pix_wpl_;        //< Words per line of pix_.
00177   // Limits of image rectangle to be processed.
00178   int                  scale_;          //< Scale factor from original image.
00179   int                  yres_;           //< y pixels/inch in source image.
00180   int                  estimated_res_;  //< Resolution estimate from text size.
00181   int                  rect_left_;
00182   int                  rect_top_;
00183   int                  rect_width_;
00184   int                  rect_height_;
00185 };
00186 
00187 }  // namespace tesseract.
00188 
00189 #endif  // TESSERACT_CCMAIN_THRESHOLDER_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines