tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/training/degradeimage.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        degradeimage.cpp
00003  * Description: Function to degrade an image (usually of text) as if it
00004  *              has been printed and then scanned.
00005  * Authors:     Ray Smith
00006  * Created:     Tue Nov 19 2013
00007  *
00008  * (C) Copyright 2013, Google Inc.
00009  * Licensed under the Apache License, Version 2.0 (the "License");
00010  * you may not use this file except in compliance with the License.
00011  * You may obtain a copy of the License at
00012  * http://www.apache.org/licenses/LICENSE-2.0
00013  * Unless required by applicable law or agreed to in writing, software
00014  * distributed under the License is distributed on an "AS IS" BASIS,
00015  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016  * See the License for the specific language governing permissions and
00017  * limitations under the License.
00018  *
00019  **********************************************************************/
00020 
00021 #include "degradeimage.h"
00022 
00023 #include <stdlib.h>
00024 #include "allheaders.h"   // from leptonica
00025 
00026 namespace tesseract {
00027 
00028 // Rotation is +/- kRotationRange radians.
00029 const float kRotationRange = 0.02f;
00030 // Number of grey levels to shift by for each exposure step.
00031 const int kExposureFactor = 16;
00032 // Salt and pepper noise is +/- kSaltnPepper.
00033 const int kSaltnPepper = 5;
00034 // Min sum of width + height on which to operate the ramp.
00035 const int kMinRampSize = 1000;
00036 
00037 static unsigned int random_seed = 0x18273645;
00038 #ifndef rand_r  // _MSC_VER, ANDROID
00039 #define rand_r(random_seed) rand()
00040 #endif  // _MSC_VER
00041 
00042 // Degrade the pix as if by a print/copy/scan cycle with exposure > 0
00043 // corresponding to darkening on the copier and <0 lighter and 0 not copied.
00044 // Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
00045 // If rotation is NULL, rotation is skipped. If *rotation is non-zero, the pix
00046 // is rotated by *rotation else it is randomly rotated and *rotation is
00047 // modified.
00048 // HOW IT WORKS:
00049 // Most of the process is really dictated by the fact that the minimum
00050 // available convolution is 3X3, which is too big really to simulate a
00051 // good quality print/scan process. (2X2 would be better.)
00052 // 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
00053 // images generally biased to being too light, so most of the work is to make
00054 // them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
00055 // (using a greyscale erosion) one heavy (by being before convolution) and one
00056 // light (after convolution).
00057 // With no dilation, after covolution, the images are so light that a heavy
00058 // constant offset is required to make the 0 image look reasonable. A simple
00059 // constant offset multiple of exposure to undo this value is enough to achieve
00060 // all the required lightening. This gives the advantage that exposure level 1
00061 // with a single dilation gives a good impression of the broken-yet-too-dark
00062 // problem that is often seen in scans.
00063 // A small random rotation gives some varying greyscale values on the edges,
00064 // and some random salt and pepper noise on top helps to realistically jaggy-up
00065 // the edges.
00066 // Finally a greyscale ramp provides a continuum of effects between exposure
00067 // levels.
00068 Pix* DegradeImage(Pix* input, int exposure, float* rotation) {
00069   Pix* pix = pixConvertTo8(input, false);
00070   pixDestroy(&input);
00071   input = pix;
00072   int width = pixGetWidth(input);
00073   int height = pixGetHeight(input);
00074   if (exposure >= 2) {
00075     // An erosion simulates the spreading darkening of a dark copy.
00076     // This is backwards to binary morphology,
00077     // see http://www.leptonica.com/grayscale-morphology.html
00078     pix = input;
00079     input = pixErodeGray(pix, 3, 3);
00080     pixDestroy(&pix);
00081   }
00082   // A convolution is essential to any mode as no scanner produces an
00083   // image as sharp as the electronic image.
00084   pix = pixBlockconv(input, 1, 1);
00085   pixDestroy(&input);
00086   // A small random rotation helps to make the edges jaggy in a realistic way.
00087   if (rotation != NULL) {
00088     float radians_clockwise;
00089     if (*rotation) {
00090       radians_clockwise = *rotation;
00091     } else {
00092       radians_clockwise = (2.0*rand_r(&random_seed)/RAND_MAX - 1.0) *
00093           kRotationRange;
00094     }
00095 
00096     input = pixRotate(pix, radians_clockwise,
00097                       L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
00098                       0, 0);
00099     // Rotate the boxes to match.
00100     *rotation = radians_clockwise;
00101     pixDestroy(&pix);
00102   } else {
00103     input = pix;
00104   }
00105 
00106   if (exposure >= 3 || exposure == 1) {
00107     // Erosion after the convolution is not as heavy as before, so it is
00108     // good for level 1 and in addition as a level 3.
00109     // This is backwards to binary morphology,
00110     // see http://www.leptonica.com/grayscale-morphology.html
00111     pix = input;
00112     input = pixErodeGray(pix, 3, 3);
00113     pixDestroy(&pix);
00114   }
00115   // The convolution really needed to be 2x2 to be realistic enough, but
00116   // we only have 3x3, so we have to bias the image darker or lose thin
00117   // strokes.
00118   int erosion_offset = 0;
00119   // For light and 0 exposure, there is no dilation, so compensate for the
00120   // convolution with a big darkening bias which is undone for lighter
00121   // exposures.
00122   if (exposure <= 0)
00123     erosion_offset = -3 * kExposureFactor;
00124   // Add in a general offset of the greyscales for the exposure level so
00125   // a threshold of 128 gives a reasonable binary result.
00126   erosion_offset -= exposure * kExposureFactor;
00127   // Add a gradual fade over the page and a small amount of salt and pepper
00128   // noise to simulate noise in the sensor/paper fibres and varying
00129   // illumination.
00130   l_uint32* data = pixGetData(input);
00131   for (int y = 0; y < height; ++y) {
00132     for (int x = 0; x < width; ++x) {
00133       int pixel = GET_DATA_BYTE(data, x);
00134       pixel += rand_r(&random_seed) % (kSaltnPepper*2 + 1) - kSaltnPepper;
00135       if (height + width > kMinRampSize)
00136         pixel -= (2*x + y) * 32 / (height + width);
00137       pixel += erosion_offset;
00138       if (pixel < 0)
00139         pixel = 0;
00140       if (pixel > 255)
00141         pixel = 255;
00142       SET_DATA_BYTE(data, x, pixel);
00143     }
00144     data += input->wpl;
00145   }
00146   return input;
00147 }
00148 
00149 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines