tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/normalis.h
Go to the documentation of this file.
00001 /**********************************************************************
00002  * File:        normalis.h  (Formerly denorm.h)
00003  * Description: Code for the DENORM class.
00004  * Author:      Ray Smith
00005  * Created:     Thu Apr 23 09:22:43 BST 1992
00006  *
00007  * (C) Copyright 1992, Hewlett-Packard Ltd.
00008  ** Licensed under the Apache License, Version 2.0 (the "License");
00009  ** you may not use this file except in compliance with the License.
00010  ** You may obtain a copy of the License at
00011  ** http://www.apache.org/licenses/LICENSE-2.0
00012  ** Unless required by applicable law or agreed to in writing, software
00013  ** distributed under the License is distributed on an "AS IS" BASIS,
00014  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015  ** See the License for the specific language governing permissions and
00016  ** limitations under the License.
00017  *
00018  **********************************************************************/
00019 
00020 #ifndef           NORMALIS_H
00021 #define           NORMALIS_H
00022 
00023 #include <stdio.h>
00024 #include "genericvector.h"
00025 #include "host.h"
00026 
00027 const int kBlnCellHeight = 256;     // Full-height for baseline normalization.
00028 const int kBlnXHeight = 128;        // x-height for baseline normalization.
00029 const int kBlnBaselineOffset = 64;  // offset for baseline normalization.
00030 
00031 struct Pix;
00032 class ROW;                          // Forward decl
00033 class BLOCK;
00034 class FCOORD;
00035 struct TBLOB;
00036 class TBOX;
00037 struct TPOINT;
00038 class UNICHARSET;
00039 
00040 namespace tesseract {
00041 
00042 // Possible normalization methods. Use NEGATIVE values as these also
00043 // double up as markers for the last sub-classifier.
00044 enum NormalizationMode {
00045   NM_BASELINE = -3,         // The original BL normalization mode.
00046   NM_CHAR_ISOTROPIC = -2,   // Character normalization but isotropic.
00047   NM_CHAR_ANISOTROPIC = -1  // The original CN normalization mode.
00048 };
00049 
00050 }  // namespace tesseract.
00051 
00052 class DENORM {
00053  public:
00054   DENORM();
00055 
00056   // Copying a DENORM is allowed.
00057   DENORM(const DENORM &);
00058   DENORM& operator=(const DENORM&);
00059   ~DENORM();
00060 
00061   // Setup the normalization transformation parameters.
00062   // The normalizations applied to a blob are as follows:
00063   // 1. An optional block layout rotation that was applied during layout
00064   // analysis to make the textlines horizontal.
00065   // 2. A normalization transformation (LocalNormTransform):
00066   // Subtract the "origin"
00067   // Apply an x,y scaling.
00068   // Apply an optional rotation.
00069   // Add back a final translation.
00070   // The origin is in the block-rotated space, and is usually something like
00071   // the x-middle of the word at the baseline.
00072   // 3. Zero or more further normalization transformations that are applied
00073   // in sequence, with a similar pattern to the first normalization transform.
00074   //
00075   // A DENORM holds the parameters of a single normalization, and can execute
00076   // both the LocalNormTransform (a forwards normalization), and the
00077   // LocalDenormTransform which is an inverse transform or de-normalization.
00078   // A DENORM may point to a predecessor DENORM, which is actually the earlier
00079   // normalization, so the full normalization sequence involves executing all
00080   // predecessors first and then the transform in "this".
00081   // Let x be image co-ordinates and that we have normalization classes A, B, C
00082   // where we first apply A then B then C to get normalized x':
00083   // x' = CBAx
00084   // Then the backwards (to original coordinates) would be:
00085   // x = A^-1 B^-1 C^-1 x'
00086   // and A = B->predecessor_ and B = C->predecessor_
00087   // NormTransform executes all predecessors recursively, and then this.
00088   // NormTransform would be used to transform an image-based feature to
00089   // normalized space for use in a classifier
00090   // DenormTransform inverts this and then all predecessors. It can be
00091   // used to get back to the original image coordinates from normalized space.
00092   // The LocalNormTransform member executes just the transformation
00093   // in "this" without the layout rotation or any predecessors. It would be
00094   // used to run each successive normalization, eg the word normalization,
00095   // and later the character normalization.
00096 
00097   // Arguments:
00098   // block: if not NULL, then this is the first transformation, and
00099   //        block->re_rotation() needs to be used after the Denorm
00100   //        transformation to get back to the image coords.
00101   // rotation: if not NULL, apply this rotation after translation to the
00102   //           origin and scaling. (Usually a classify rotation.)
00103   // predecessor: if not NULL, then predecessor has been applied to the
00104   //              input space and needs to be undone to complete the inverse.
00105   // The above pointers are not owned by this DENORM and are assumed to live
00106   // longer than this denorm, except rotation, which is deep copied on input.
00107   //
00108   // x_origin: The x origin which will be mapped to final_xshift in the result.
00109   // y_origin: The y origin which will be mapped to final_yshift in the result.
00110   //           Added to result of row->baseline(x) if not NULL.
00111   //
00112   // x_scale: scale factor for the x-coordinate.
00113   // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
00114   // Note that these scale factors apply to the same x and y system as the
00115   // x-origin and y-origin apply, ie after any block rotation, but before
00116   // the rotation argument is applied.
00117   //
00118   // final_xshift: The x component of the final translation.
00119   // final_yshift: The y component of the final translation.
00120   //
00121   // In theory, any of the commonly used normalizations can be setup here:
00122   // * Traditional baseline normalization on a word:
00123   // SetupNormalization(block, NULL, NULL,
00124   //                    box.x_middle(), baseline,
00125   //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
00126   //                    0, kBlnBaselineOffset);
00127   // * "Numeric mode" baseline normalization on a word, in which the blobs
00128   //   are positioned with the bottom as the baseline is achieved by making
00129   //   a separate DENORM for each blob.
00130   // SetupNormalization(block, NULL, NULL,
00131   //                    box.x_middle(), box.bottom(),
00132   //                    kBlnXHeight / x_height, kBlnXHeight / x_height,
00133   //                    0, kBlnBaselineOffset);
00134   // * Anisotropic character normalization used by IntFx.
00135   // SetupNormalization(NULL, NULL, denorm,
00136   //                    centroid_x, centroid_y,
00137   //                    51.2 / ry, 51.2 / rx, 128, 128);
00138   // * Normalize blob height to x-height (current OSD):
00139   // SetupNormalization(NULL, &rotation, NULL,
00140   //                    box.rotational_x_middle(rotation),
00141   //                    box.rotational_y_middle(rotation),
00142   //                    kBlnXHeight / box.rotational_height(rotation),
00143   //                    kBlnXHeight / box.rotational_height(rotation),
00144   //                    0, kBlnBaselineOffset);
00145   // * Secondary normalization for classification rotation (current):
00146   // FCOORD rotation = block->classify_rotation();
00147   // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
00148   // SetupNormalization(NULL, &rotation, denorm,
00149   //                    box.rotational_x_middle(rotation),
00150   //                    box.rotational_y_middle(rotation),
00151   //                    target_height / box.rotational_height(rotation),
00152   //                    target_height / box.rotational_height(rotation),
00153   //                    0, kBlnBaselineOffset);
00154   // * Proposed new normalizations for CJK: Between them there is then
00155   // no need for further normalization at all, and the character fills the cell.
00156   // ** Replacement for baseline normalization on a word:
00157   // Scales height and width independently so that modal height and pitch
00158   // fill the cell respectively.
00159   // float cap_height = x_height / CCStruct::kXHeightCapRatio;
00160   // SetupNormalization(block, NULL, NULL,
00161   //                    box.x_middle(), cap_height / 2.0f,
00162   //                    kBlnCellHeight / fixed_pitch,
00163   //                    kBlnCellHeight / cap_height,
00164   //                    0, 0);
00165   // ** Secondary normalization for classification (with rotation) (proposed):
00166   // Requires a simple translation to the center of the appropriate character
00167   // cell, no further scaling and a simple rotation (or nothing) about the
00168   // cell center.
00169   // FCOORD rotation = block->classify_rotation();
00170   // SetupNormalization(NULL, &rotation, denorm,
00171   //                    fixed_pitch_cell_center,
00172   //                    0.0f,
00173   //                    1.0f,
00174   //                    1.0f,
00175   //                    0, 0);
00176   void SetupNormalization(const BLOCK* block,
00177                           const FCOORD* rotation,
00178                           const DENORM* predecessor,
00179                           float x_origin, float y_origin,
00180                           float x_scale, float y_scale,
00181                           float final_xshift, float final_yshift);
00182 
00183   // Sets up the DENORM to execute a non-linear transformation based on
00184   // preserving an even distribution of stroke edges. The transformation
00185   // operates only within the given box, scaling input coords within the box
00186   // non-linearly to a box of target_width by target_height, with all other
00187   // coords being clipped to the box edge. As with SetupNormalization above,
00188   // final_xshift and final_yshift are applied after scaling, and the bottom-
00189   // left of box is used as a pre-scaling origin.
00190   // x_coords is a collection of the x-coords of vertical edges for each
00191   // y-coord starting at box.bottom().
00192   // y_coords is a collection of the y-coords of horizontal edges for each
00193   // x-coord starting at box.left().
00194   // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
00195   // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
00196   // The second-level vectors must all be sorted in ascending order.
00197   void SetupNonLinear(const DENORM* predecessor, const TBOX& box,
00198                       float target_width, float target_height,
00199                       float final_xshift, float final_yshift,
00200                       const GenericVector<GenericVector<int> >& x_coords,
00201                       const GenericVector<GenericVector<int> >& y_coords);
00202 
00203   // Transforms the given coords one step forward to normalized space, without
00204   // using any block rotation or predecessor.
00205   void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
00206   void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
00207   // Transforms the given coords forward to normalized space using the
00208   // full transformation sequence defined by the block rotation, the
00209   // predecessors, deepest first, and finally this. If first_norm is not NULL,
00210   // then the first and deepest transformation used is first_norm, ending
00211   // with this, and the block rotation will not be applied.
00212   void NormTransform(const DENORM* first_norm, const TPOINT& pt,
00213                      TPOINT* transformed) const;
00214   void NormTransform(const DENORM* first_norm, const FCOORD& pt,
00215                      FCOORD* transformed) const;
00216   // Transforms the given coords one step back to source space, without
00217   // using to any block rotation or predecessor.
00218   void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
00219   void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
00220   // Transforms the given coords all the way back to source image space using
00221   // the full transformation sequence defined by this and its predecesors
00222   // recursively, shallowest first, and finally any block re_rotation.
00223   // If last_denorm is not NULL, then the last transformation used will
00224   // be last_denorm, and the block re_rotation will never be executed.
00225   void DenormTransform(const DENORM* last_denorm, const TPOINT& pt,
00226                        TPOINT* original) const;
00227   void DenormTransform(const DENORM* last_denorm, const FCOORD& pt,
00228                        FCOORD* original) const;
00229 
00230   // Normalize a blob using blob transformations. Less accurate, but
00231   // more accurately copies the old way.
00232   void LocalNormBlob(TBLOB* blob) const;
00233 
00234   // Fills in the x-height range accepted by the given unichar_id in blob
00235   // coordinates, given its bounding box in the usual baseline-normalized
00236   // coordinates, with some initial crude x-height estimate (such as word
00237   // size) and this denoting the transformation that was used.
00238   // Also returns the amount the character must have shifted up or down.
00239   void XHeightRange(int unichar_id, const UNICHARSET& unicharset,
00240                     const TBOX& bbox,
00241                     float* min_xht,
00242                     float* max_xht,
00243                     float* yshift) const;
00244 
00245   // Prints the content of the DENORM for debug purposes.
00246   void Print() const;
00247 
00248   Pix* pix() const {
00249     return pix_;
00250   }
00251   void set_pix(Pix* pix) {
00252     pix_ = pix;
00253   }
00254   bool inverse() const {
00255     return inverse_;
00256   }
00257   void set_inverse(bool value) {
00258     inverse_ = value;
00259   }
00260   const DENORM* RootDenorm() const {
00261     if (predecessor_ != NULL)
00262       return predecessor_->RootDenorm();
00263     return this;
00264   }
00265   const DENORM* predecessor() const {
00266     return predecessor_;
00267   }
00268   // Accessors - perhaps should not be needed.
00269   float x_scale() const {
00270     return x_scale_;
00271   }
00272   float y_scale() const {
00273     return y_scale_;
00274   }
00275   const BLOCK* block() const {
00276     return block_;
00277   }
00278   void set_block(const BLOCK* block) {
00279     block_ = block;
00280   }
00281 
00282  private:
00283   // Free allocated memory and clear pointers.
00284   void Clear();
00285   // Setup default values.
00286   void Init();
00287 
00288   // Best available image.
00289   Pix* pix_;
00290   // True if the source image is white-on-black.
00291   bool inverse_;
00292   // Block the word came from. If not null, block->re_rotation() takes the
00293   // "untransformed" coordinates even further back to the original image.
00294   // Used only on the first DENORM in a chain.
00295   const BLOCK* block_;
00296   // Rotation to apply between translation to the origin and scaling.
00297   const FCOORD* rotation_;
00298   // Previous transformation in a chain.
00299   const DENORM* predecessor_;
00300   // Non-linear transformation maps directly from each integer offset from the
00301   // origin to the corresponding x-coord. Owned by the DENORM.
00302   GenericVector<float>* x_map_;
00303   // Non-linear transformation maps directly from each integer offset from the
00304   // origin to the corresponding y-coord. Owned by the DENORM.
00305   GenericVector<float>* y_map_;
00306   // x-coordinate to be mapped to final_xshift_ in the result.
00307   float x_origin_;
00308   // y-coordinate to be mapped to final_yshift_ in the result.
00309   float y_origin_;
00310   // Scale factors for x and y coords. Applied to pre-rotation system.
00311   float x_scale_;
00312   float y_scale_;
00313   // Destination coords of the x_origin_ and y_origin_.
00314   float final_xshift_;
00315   float final_yshift_;
00316 };
00317 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines