tesseract
3.03
|
00001 /********************************************************************** 00002 * File: degradeimage.cpp 00003 * Description: Function to degrade an image (usually of text) as if it 00004 * has been printed and then scanned. 00005 * Authors: Ray Smith 00006 * Created: Tue Nov 19 2013 00007 * 00008 * (C) Copyright 2013, Google Inc. 00009 * Licensed under the Apache License, Version 2.0 (the "License"); 00010 * you may not use this file except in compliance with the License. 00011 * You may obtain a copy of the License at 00012 * http://www.apache.org/licenses/LICENSE-2.0 00013 * Unless required by applicable law or agreed to in writing, software 00014 * distributed under the License is distributed on an "AS IS" BASIS, 00015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 * See the License for the specific language governing permissions and 00017 * limitations under the License. 00018 * 00019 **********************************************************************/ 00020 00021 #include "degradeimage.h" 00022 00023 #include <stdlib.h> 00024 #include "allheaders.h" // from leptonica 00025 00026 namespace tesseract { 00027 00028 // Rotation is +/- kRotationRange radians. 00029 const float kRotationRange = 0.02f; 00030 // Number of grey levels to shift by for each exposure step. 00031 const int kExposureFactor = 16; 00032 // Salt and pepper noise is +/- kSaltnPepper. 00033 const int kSaltnPepper = 5; 00034 // Min sum of width + height on which to operate the ramp. 00035 const int kMinRampSize = 1000; 00036 00037 static unsigned int random_seed = 0x18273645; 00038 #ifndef rand_r // _MSC_VER, ANDROID 00039 #define rand_r(random_seed) rand() 00040 #endif // _MSC_VER 00041 00042 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0 00043 // corresponding to darkening on the copier and <0 lighter and 0 not copied. 00044 // Exposures in [-2,2] are most useful, with -3 and 3 being extreme. 00045 // If rotation is NULL, rotation is skipped. If *rotation is non-zero, the pix 00046 // is rotated by *rotation else it is randomly rotated and *rotation is 00047 // modified. 00048 // HOW IT WORKS: 00049 // Most of the process is really dictated by the fact that the minimum 00050 // available convolution is 3X3, which is too big really to simulate a 00051 // good quality print/scan process. (2X2 would be better.) 00052 // 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the 00053 // images generally biased to being too light, so most of the work is to make 00054 // them darker. 3 levels of thickening/darkening are achieved with 2 dilations, 00055 // (using a greyscale erosion) one heavy (by being before convolution) and one 00056 // light (after convolution). 00057 // With no dilation, after covolution, the images are so light that a heavy 00058 // constant offset is required to make the 0 image look reasonable. A simple 00059 // constant offset multiple of exposure to undo this value is enough to achieve 00060 // all the required lightening. This gives the advantage that exposure level 1 00061 // with a single dilation gives a good impression of the broken-yet-too-dark 00062 // problem that is often seen in scans. 00063 // A small random rotation gives some varying greyscale values on the edges, 00064 // and some random salt and pepper noise on top helps to realistically jaggy-up 00065 // the edges. 00066 // Finally a greyscale ramp provides a continuum of effects between exposure 00067 // levels. 00068 Pix* DegradeImage(Pix* input, int exposure, float* rotation) { 00069 Pix* pix = pixConvertTo8(input, false); 00070 pixDestroy(&input); 00071 input = pix; 00072 int width = pixGetWidth(input); 00073 int height = pixGetHeight(input); 00074 if (exposure >= 2) { 00075 // An erosion simulates the spreading darkening of a dark copy. 00076 // This is backwards to binary morphology, 00077 // see http://www.leptonica.com/grayscale-morphology.html 00078 pix = input; 00079 input = pixErodeGray(pix, 3, 3); 00080 pixDestroy(&pix); 00081 } 00082 // A convolution is essential to any mode as no scanner produces an 00083 // image as sharp as the electronic image. 00084 pix = pixBlockconv(input, 1, 1); 00085 pixDestroy(&input); 00086 // A small random rotation helps to make the edges jaggy in a realistic way. 00087 if (rotation != NULL) { 00088 float radians_clockwise; 00089 if (*rotation) { 00090 radians_clockwise = *rotation; 00091 } else { 00092 radians_clockwise = (2.0*rand_r(&random_seed)/RAND_MAX - 1.0) * 00093 kRotationRange; 00094 } 00095 00096 input = pixRotate(pix, radians_clockwise, 00097 L_ROTATE_AREA_MAP, L_BRING_IN_WHITE, 00098 0, 0); 00099 // Rotate the boxes to match. 00100 *rotation = radians_clockwise; 00101 pixDestroy(&pix); 00102 } else { 00103 input = pix; 00104 } 00105 00106 if (exposure >= 3 || exposure == 1) { 00107 // Erosion after the convolution is not as heavy as before, so it is 00108 // good for level 1 and in addition as a level 3. 00109 // This is backwards to binary morphology, 00110 // see http://www.leptonica.com/grayscale-morphology.html 00111 pix = input; 00112 input = pixErodeGray(pix, 3, 3); 00113 pixDestroy(&pix); 00114 } 00115 // The convolution really needed to be 2x2 to be realistic enough, but 00116 // we only have 3x3, so we have to bias the image darker or lose thin 00117 // strokes. 00118 int erosion_offset = 0; 00119 // For light and 0 exposure, there is no dilation, so compensate for the 00120 // convolution with a big darkening bias which is undone for lighter 00121 // exposures. 00122 if (exposure <= 0) 00123 erosion_offset = -3 * kExposureFactor; 00124 // Add in a general offset of the greyscales for the exposure level so 00125 // a threshold of 128 gives a reasonable binary result. 00126 erosion_offset -= exposure * kExposureFactor; 00127 // Add a gradual fade over the page and a small amount of salt and pepper 00128 // noise to simulate noise in the sensor/paper fibres and varying 00129 // illumination. 00130 l_uint32* data = pixGetData(input); 00131 for (int y = 0; y < height; ++y) { 00132 for (int x = 0; x < width; ++x) { 00133 int pixel = GET_DATA_BYTE(data, x); 00134 pixel += rand_r(&random_seed) % (kSaltnPepper*2 + 1) - kSaltnPepper; 00135 if (height + width > kMinRampSize) 00136 pixel -= (2*x + y) * 32 / (height + width); 00137 pixel += erosion_offset; 00138 if (pixel < 0) 00139 pixel = 0; 00140 if (pixel > 255) 00141 pixel = 255; 00142 SET_DATA_BYTE(data, x, pixel); 00143 } 00144 data += input->wpl; 00145 } 00146 return input; 00147 } 00148 00149 } // namespace tesseract