tesseract
3.03
|
00001 /********************************************************************** 00002 * File: otsuthr.cpp 00003 * Description: Simple Otsu thresholding for binarizing images. 00004 * Author: Ray Smith 00005 * Created: Fri Mar 07 12:31:01 PST 2008 00006 * 00007 * (C) Copyright 2008, Google Inc. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #include "otsuthr.h" 00021 00022 #include <string.h> 00023 #include "allheaders.h" 00024 #include "helpers.h" 00025 #include "openclwrapper.h" 00026 00027 00028 namespace tesseract { 00029 00030 // Computes the Otsu threshold(s) for the given image rectangle, making one 00031 // for each channel. Each channel is always one byte per pixel. 00032 // Returns an array of threshold values and an array of hi_values, such 00033 // that a pixel value >threshold[channel] is considered foreground if 00034 // hi_values[channel] is 0 or background if 1. A hi_value of -1 indicates 00035 // that there is no apparent foreground. At least one hi_value will not be -1. 00036 // Delete thresholds and hi_values with delete [] after use. 00037 // The return value is the number of channels in the input image, being 00038 // the size of the output thresholds and hi_values arrays. 00039 int OtsuThreshold(Pix* src_pix, int left, int top, int width, int height, 00040 int** thresholds, int** hi_values) { 00041 int num_channels = pixGetDepth(src_pix) / 8; 00042 // Of all channels with no good hi_value, keep the best so we can always 00043 // produce at least one answer. 00044 PERF_COUNT_START("OtsuThreshold") 00045 int best_hi_value = 1; 00046 int best_hi_index = 0; 00047 bool any_good_hivalue = false; 00048 double best_hi_dist = 0.0; 00049 *thresholds = new int[num_channels]; 00050 *hi_values = new int[num_channels]; 00051 // all of channel 0 then all of channel 1... 00052 int *histogramAllChannels = new int[kHistogramSize * num_channels]; 00053 00054 // only use opencl if compiled w/ OpenCL and selected device is opencl 00055 #ifdef USE_OPENCL 00056 // Calculate Histogram on GPU 00057 OpenclDevice od; 00058 if (od.selectedDeviceIsOpenCL() && 00059 (num_channels == 1 || num_channels == 4) && top == 0 && left == 0 ) { 00060 od.HistogramRectOCL( 00061 (const unsigned char*)pixGetData(src_pix), 00062 num_channels, 00063 pixGetWpl(src_pix) * 4, 00064 left, 00065 top, 00066 width, 00067 height, 00068 kHistogramSize, 00069 histogramAllChannels); 00070 00071 // Calculate Threshold from Histogram on cpu 00072 for (int ch = 0; ch < num_channels; ++ch) { 00073 (*thresholds)[ch] = -1; 00074 (*hi_values)[ch] = -1; 00075 int *histogram = &histogramAllChannels[kHistogramSize * ch]; 00076 int H; 00077 int best_omega_0; 00078 int best_t = OtsuStats(histogram, &H, &best_omega_0); 00079 if (best_omega_0 == 0 || best_omega_0 == H) { 00080 // This channel is empty. 00081 continue; 00082 } 00083 // To be a convincing foreground we must have a small fraction of H 00084 // or to be a convincing background we must have a large fraction of H. 00085 // In between we assume this channel contains no thresholding information. 00086 int hi_value = best_omega_0 < H * 0.5; 00087 (*thresholds)[ch] = best_t; 00088 if (best_omega_0 > H * 0.75) { 00089 any_good_hivalue = true; 00090 (*hi_values)[ch] = 0; 00091 } else if (best_omega_0 < H * 0.25) { 00092 any_good_hivalue = true; 00093 (*hi_values)[ch] = 1; 00094 } else { 00095 // In case all channels are like this, keep the best of the bad lot. 00096 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; 00097 if (hi_dist > best_hi_dist) { 00098 best_hi_dist = hi_dist; 00099 best_hi_value = hi_value; 00100 best_hi_index = ch; 00101 } 00102 } 00103 } 00104 } else { 00105 #endif 00106 for (int ch = 0; ch < num_channels; ++ch) { 00107 (*thresholds)[ch] = -1; 00108 (*hi_values)[ch] = -1; 00109 // Compute the histogram of the image rectangle. 00110 int histogram[kHistogramSize]; 00111 HistogramRect(src_pix, ch, left, top, width, height, histogram); 00112 int H; 00113 int best_omega_0; 00114 int best_t = OtsuStats(histogram, &H, &best_omega_0); 00115 if (best_omega_0 == 0 || best_omega_0 == H) { 00116 // This channel is empty. 00117 continue; 00118 } 00119 // To be a convincing foreground we must have a small fraction of H 00120 // or to be a convincing background we must have a large fraction of H. 00121 // In between we assume this channel contains no thresholding information. 00122 int hi_value = best_omega_0 < H * 0.5; 00123 (*thresholds)[ch] = best_t; 00124 if (best_omega_0 > H * 0.75) { 00125 any_good_hivalue = true; 00126 (*hi_values)[ch] = 0; 00127 } else if (best_omega_0 < H * 0.25) { 00128 any_good_hivalue = true; 00129 (*hi_values)[ch] = 1; 00130 } else { 00131 // In case all channels are like this, keep the best of the bad lot. 00132 double hi_dist = hi_value ? (H - best_omega_0) : best_omega_0; 00133 if (hi_dist > best_hi_dist) { 00134 best_hi_dist = hi_dist; 00135 best_hi_value = hi_value; 00136 best_hi_index = ch; 00137 } 00138 } 00139 } 00140 #ifdef USE_OPENCL 00141 } 00142 #endif // USE_OPENCL 00143 delete[] histogramAllChannels; 00144 00145 if (!any_good_hivalue) { 00146 // Use the best of the ones that were not good enough. 00147 (*hi_values)[best_hi_index] = best_hi_value; 00148 } 00149 PERF_COUNT_END 00150 return num_channels; 00151 } 00152 00153 // Computes the histogram for the given image rectangle, and the given 00154 // single channel. Each channel is always one byte per pixel. 00155 // Histogram is always a kHistogramSize(256) element array to count 00156 // occurrences of each pixel value. 00157 void HistogramRect(Pix* src_pix, int channel, 00158 int left, int top, int width, int height, 00159 int* histogram) { 00160 PERF_COUNT_START("HistogramRect") 00161 int num_channels = pixGetDepth(src_pix) / 8; 00162 channel = ClipToRange(channel, 0, num_channels - 1); 00163 int bottom = top + height; 00164 memset(histogram, 0, sizeof(*histogram) * kHistogramSize); 00165 int src_wpl = pixGetWpl(src_pix); 00166 l_uint32* srcdata = pixGetData(src_pix); 00167 for (int y = top; y < bottom; ++y) { 00168 const l_uint32* linedata = srcdata + y * src_wpl; 00169 for (int x = 0; x < width; ++x) { 00170 int pixel = GET_DATA_BYTE(const_cast<void*>( 00171 reinterpret_cast<const void *>(linedata)), 00172 (x + left) * num_channels + channel); 00173 ++histogram[pixel]; 00174 } 00175 } 00176 PERF_COUNT_END 00177 } 00178 00179 // Computes the Otsu threshold(s) for the given histogram. 00180 // Also returns H = total count in histogram, and 00181 // omega0 = count of histogram below threshold. 00182 int OtsuStats(const int* histogram, int* H_out, int* omega0_out) { 00183 int H = 0; 00184 double mu_T = 0.0; 00185 for (int i = 0; i < kHistogramSize; ++i) { 00186 H += histogram[i]; 00187 mu_T += static_cast<double>(i) * histogram[i]; 00188 } 00189 00190 // Now maximize sig_sq_B over t. 00191 // http://www.ctie.monash.edu.au/hargreave/Cornall_Terry_328.pdf 00192 int best_t = -1; 00193 int omega_0, omega_1; 00194 int best_omega_0 = 0; 00195 double best_sig_sq_B = 0.0; 00196 double mu_0, mu_1, mu_t; 00197 omega_0 = 0; 00198 mu_t = 0.0; 00199 for (int t = 0; t < kHistogramSize - 1; ++t) { 00200 omega_0 += histogram[t]; 00201 mu_t += t * static_cast<double>(histogram[t]); 00202 if (omega_0 == 0) 00203 continue; 00204 omega_1 = H - omega_0; 00205 if (omega_1 == 0) 00206 break; 00207 mu_0 = mu_t / omega_0; 00208 mu_1 = (mu_T - mu_t) / omega_1; 00209 double sig_sq_B = mu_1 - mu_0; 00210 sig_sq_B *= sig_sq_B * omega_0 * omega_1; 00211 if (best_t < 0 || sig_sq_B > best_sig_sq_B) { 00212 best_sig_sq_B = sig_sq_B; 00213 best_t = t; 00214 best_omega_0 = omega_0; 00215 } 00216 } 00217 if (H_out != NULL) *H_out = H; 00218 if (omega0_out != NULL) *omega0_out = best_omega_0; 00219 return best_t; 00220 } 00221 00222 } // namespace tesseract.