tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/otsuthr.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        otsuthr.cpp
00003  * Description: Simple Otsu thresholding for binarizing images.
00004  * Author:      Ray Smith
00005  * Created:     Fri Mar 07 12:31:01 PST 2008
00006  *
00007  * (C) Copyright 2008, Google Inc.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #include "otsuthr.h"
00021 
00022 #include <string.h>
00023 #include "allheaders.h"
00024 #include "helpers.h"
00025 #include "openclwrapper.h"
00026 
00027 
00028 namespace tesseract {
00029 
00030 // Computes the Otsu threshold(s) for the given image rectangle, making one
00031 // for each channel. Each channel is always one byte per pixel.
00032 // Returns an array of threshold values and an array of hi_values, such
00033 // that a pixel value >threshold[channel] is considered foreground if
00034 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates
00035 // that there is no apparent foreground. At least one hi_value will not be -1.
00036 // Delete thresholds and hi_values with delete [] after use.
00037 // The return value is the number of channels in the input image, being
00038 // the size of the output thresholds and hi_values arrays.
00039 int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height,
00040                   int** thresholds, int** hi_values) {
00041   int num_channels = pixGetDepth(src_pix) / 8;
00042   // Of all channels with no good hi_value, keep the best so we can always
00043   // produce at least one answer.
00044   PERF_COUNT_START("OtsuThreshold")
00045   int best_hi_value = 1;
00046   int best_hi_index = 0;
00047   bool any_good_hivalue = false;
00048   double best_hi_dist = 0.0;
00049   *thresholds = new int[num_channels];
00050   *hi_values = new int[num_channels];
00051   // all of channel 0 then all of channel 1...
00052   int *histogramAllChannels = new int[kHistogramSize * num_channels];
00053 
00054   // only use opencl if compiled w/ OpenCL and selected device is opencl
00055 #ifdef USE_OPENCL
00056     // Calculate Histogram on GPU
00057     OpenclDevice od;
00058     if (od.selectedDeviceIsOpenCL() &&
00059         (num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) {
00060       od.HistogramRectOCL(
00061           (const unsigned char*)pixGetData(src_pix),
00062           num_channels,
00063           pixGetWpl(src_pix) * 4,
00064           left,
00065           top,
00066           width,
00067           height,
00068           kHistogramSize,
00069           histogramAllChannels);
00070 
00071     // Calculate Threshold from Histogram on cpu
00072     for (int ch = 0; ch < num_channels; ++ch) {
00073       (*thresholds)[ch] = -1;
00074       (*hi_values)[ch] = -1;
00075       int *histogram = &histogramAllChannels[kHistogramSize * ch];
00076       int H;
00077       int best_omega_0;
00078       int best_t = OtsuStats(histogram, &H, &best_omega_0);
00079       if (best_omega_0 == 0 || best_omega_0 == H) {
00080          // This channel is empty.
00081          continue;
00082        }
00083       // To be a convincing foreground we must have a small fraction of H
00084       // or to be a convincing background we must have a large fraction of H.
00085       // In between we assume this channel contains no thresholding information.
00086       int hi_value = best_omega_0 < H * 0.5;
00087       (*thresholds)[ch] = best_t;
00088       if (best_omega_0 > H * 0.75) {
00089         any_good_hivalue = true;
00090         (*hi_values)[ch] = 0;
00091       } else if (best_omega_0 < H * 0.25) {
00092         any_good_hivalue = true;
00093         (*hi_values)[ch] = 1;
00094       } else {
00095         // In case all channels are like this, keep the best of the bad lot.
00096         double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
00097         if (hi_dist > best_hi_dist) {
00098           best_hi_dist = hi_dist;
00099           best_hi_value = hi_value;
00100           best_hi_index = ch;
00101         }
00102       }
00103     }
00104   } else {
00105 #endif
00106     for (int ch = 0; ch < num_channels; ++ch) {
00107       (*thresholds)[ch] = -1;
00108       (*hi_values)[ch] = -1;
00109       // Compute the histogram of the image rectangle.
00110       int histogram[kHistogramSize];
00111       HistogramRect(src_pix, ch, left, top, width, height, histogram);
00112       int H;
00113       int best_omega_0;
00114       int best_t = OtsuStats(histogram, &H, &best_omega_0);
00115       if (best_omega_0 == 0 || best_omega_0 == H) {
00116          // This channel is empty.
00117          continue;
00118        }
00119       // To be a convincing foreground we must have a small fraction of H
00120       // or to be a convincing background we must have a large fraction of H.
00121       // In between we assume this channel contains no thresholding information.
00122       int hi_value = best_omega_0 < H * 0.5;
00123       (*thresholds)[ch] = best_t;
00124       if (best_omega_0 > H * 0.75) {
00125         any_good_hivalue = true;
00126         (*hi_values)[ch] = 0;
00127       } else if (best_omega_0 < H * 0.25) {
00128         any_good_hivalue = true;
00129         (*hi_values)[ch] = 1;
00130       } else {
00131         // In case all channels are like this, keep the best of the bad lot.
00132         double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0;
00133         if (hi_dist > best_hi_dist) {
00134           best_hi_dist = hi_dist;
00135           best_hi_value = hi_value;
00136           best_hi_index = ch;
00137         }
00138       }
00139     }
00140 #ifdef USE_OPENCL
00141   }
00142 #endif  // USE_OPENCL
00143   delete[] histogramAllChannels;
00144 
00145   if (!any_good_hivalue) {
00146     // Use the best of the ones that were not good enough.
00147     (*hi_values)[best_hi_index] = best_hi_value;
00148   }
00149   PERF_COUNT_END
00150   return num_channels;
00151 }
00152 
00153 // Computes the histogram for the given image rectangle, and the given
00154 // single channel. Each channel is always one byte per pixel.
00155 // Histogram is always a kHistogramSize(256) element array to count
00156 // occurrences of each pixel value.
00157 void HistogramRect(Pix* src_pix, int channel,
00158                    int left, int top, int width, int height,
00159                    int* histogram) {
00160   PERF_COUNT_START("HistogramRect")
00161   int num_channels = pixGetDepth(src_pix) / 8;
00162   channel = ClipToRange(channel, 0, num_channels - 1);
00163   int bottom = top + height;
00164   memset(histogram, 0, sizeof(*histogram) * kHistogramSize);
00165   int src_wpl = pixGetWpl(src_pix);
00166   l_uint32* srcdata = pixGetData(src_pix);
00167   for (int y = top; y < bottom; ++y) {
00168     const l_uint32* linedata = srcdata + y * src_wpl;
00169     for (int x = 0; x < width; ++x) {
00170       int pixel = GET_DATA_BYTE(const_cast<void*>(
00171           reinterpret_cast<const void *>(linedata)),
00172           (x + left) * num_channels + channel);
00173       ++histogram[pixel];
00174     }
00175   }
00176   PERF_COUNT_END
00177 }
00178 
00179 // Computes the Otsu threshold(s) for the given histogram.
00180 // Also returns H = total count in histogram, and
00181 // omega0 = count of histogram below threshold.
00182 int OtsuStats(const int* histogram, int* H_out, int* omega0_out) {
00183   int H = 0;
00184   double mu_T = 0.0;
00185   for (int i = 0; i < kHistogramSize; ++i) {
00186     H += histogram[i];
00187     mu_T += static_cast<double>(i) * histogram[i];
00188   }
00189 
00190   // Now maximize sig_sq_B over t.
00191   // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf
00192   int best_t = -1;
00193   int omega_0, omega_1;
00194   int best_omega_0 = 0;
00195   double best_sig_sq_B = 0.0;
00196   double mu_0, mu_1, mu_t;
00197   omega_0 = 0;
00198   mu_t = 0.0;
00199   for (int t = 0; t < kHistogramSize - 1; ++t) {
00200     omega_0 += histogram[t];
00201     mu_t += t * static_cast<double>(histogram[t]);
00202     if (omega_0 == 0)
00203       continue;
00204     omega_1 = H - omega_0;
00205     if (omega_1 == 0)
00206       break;
00207     mu_0 = mu_t / omega_0;
00208     mu_1 = (mu_T - mu_t) / omega_1;
00209     double sig_sq_B = mu_1 - mu_0;
00210     sig_sq_B *= sig_sq_B * omega_0 * omega_1;
00211     if (best_t < 0 || sig_sq_B > best_sig_sq_B) {
00212       best_sig_sq_B = sig_sq_B;
00213       best_t = t;
00214       best_omega_0 = omega_0;
00215     }
00216   }
00217   if (H_out != NULL) *H_out = H;
00218   if (omega0_out != NULL) *omega0_out = best_omega_0;
00219   return best_t;
00220 }
00221 
00222 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines