tesseract
3.03
|
00001 /********************************************************************** 00002 * File: normalis.h (Formerly denorm.h) 00003 * Description: Code for the DENORM class. 00004 * Author: Ray Smith 00005 * Created: Thu Apr 23 09:22:43 BST 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 #ifndef NORMALIS_H 00021 #define NORMALIS_H 00022 00023 #include <stdio.h> 00024 #include "genericvector.h" 00025 #include "host.h" 00026 00027 const int kBlnCellHeight = 256; // Full-height for baseline normalization. 00028 const int kBlnXHeight = 128; // x-height for baseline normalization. 00029 const int kBlnBaselineOffset = 64; // offset for baseline normalization. 00030 00031 struct Pix; 00032 class ROW; // Forward decl 00033 class BLOCK; 00034 class FCOORD; 00035 struct TBLOB; 00036 class TBOX; 00037 struct TPOINT; 00038 class UNICHARSET; 00039 00040 namespace tesseract { 00041 00042 // Possible normalization methods. Use NEGATIVE values as these also 00043 // double up as markers for the last sub-classifier. 00044 enum NormalizationMode { 00045 NM_BASELINE = -3, // The original BL normalization mode. 00046 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic. 00047 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode. 00048 }; 00049 00050 } // namespace tesseract. 00051 00052 class DENORM { 00053 public: 00054 DENORM(); 00055 00056 // Copying a DENORM is allowed. 00057 DENORM(const DENORM &); 00058 DENORM& operator=(const DENORM&); 00059 ~DENORM(); 00060 00061 // Setup the normalization transformation parameters. 00062 // The normalizations applied to a blob are as follows: 00063 // 1. An optional block layout rotation that was applied during layout 00064 // analysis to make the textlines horizontal. 00065 // 2. A normalization transformation (LocalNormTransform): 00066 // Subtract the "origin" 00067 // Apply an x,y scaling. 00068 // Apply an optional rotation. 00069 // Add back a final translation. 00070 // The origin is in the block-rotated space, and is usually something like 00071 // the x-middle of the word at the baseline. 00072 // 3. Zero or more further normalization transformations that are applied 00073 // in sequence, with a similar pattern to the first normalization transform. 00074 // 00075 // A DENORM holds the parameters of a single normalization, and can execute 00076 // both the LocalNormTransform (a forwards normalization), and the 00077 // LocalDenormTransform which is an inverse transform or de-normalization. 00078 // A DENORM may point to a predecessor DENORM, which is actually the earlier 00079 // normalization, so the full normalization sequence involves executing all 00080 // predecessors first and then the transform in "this". 00081 // Let x be image co-ordinates and that we have normalization classes A, B, C 00082 // where we first apply A then B then C to get normalized x': 00083 // x' = CBAx 00084 // Then the backwards (to original coordinates) would be: 00085 // x = A^-1 B^-1 C^-1 x' 00086 // and A = B->predecessor_ and B = C->predecessor_ 00087 // NormTransform executes all predecessors recursively, and then this. 00088 // NormTransform would be used to transform an image-based feature to 00089 // normalized space for use in a classifier 00090 // DenormTransform inverts this and then all predecessors. It can be 00091 // used to get back to the original image coordinates from normalized space. 00092 // The LocalNormTransform member executes just the transformation 00093 // in "this" without the layout rotation or any predecessors. It would be 00094 // used to run each successive normalization, eg the word normalization, 00095 // and later the character normalization. 00096 00097 // Arguments: 00098 // block: if not NULL, then this is the first transformation, and 00099 // block->re_rotation() needs to be used after the Denorm 00100 // transformation to get back to the image coords. 00101 // rotation: if not NULL, apply this rotation after translation to the 00102 // origin and scaling. (Usually a classify rotation.) 00103 // predecessor: if not NULL, then predecessor has been applied to the 00104 // input space and needs to be undone to complete the inverse. 00105 // The above pointers are not owned by this DENORM and are assumed to live 00106 // longer than this denorm, except rotation, which is deep copied on input. 00107 // 00108 // x_origin: The x origin which will be mapped to final_xshift in the result. 00109 // y_origin: The y origin which will be mapped to final_yshift in the result. 00110 // Added to result of row->baseline(x) if not NULL. 00111 // 00112 // x_scale: scale factor for the x-coordinate. 00113 // y_scale: scale factor for the y-coordinate. Ignored if segs is given. 00114 // Note that these scale factors apply to the same x and y system as the 00115 // x-origin and y-origin apply, ie after any block rotation, but before 00116 // the rotation argument is applied. 00117 // 00118 // final_xshift: The x component of the final translation. 00119 // final_yshift: The y component of the final translation. 00120 // 00121 // In theory, any of the commonly used normalizations can be setup here: 00122 // * Traditional baseline normalization on a word: 00123 // SetupNormalization(block, NULL, NULL, 00124 // box.x_middle(), baseline, 00125 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00126 // 0, kBlnBaselineOffset); 00127 // * "Numeric mode" baseline normalization on a word, in which the blobs 00128 // are positioned with the bottom as the baseline is achieved by making 00129 // a separate DENORM for each blob. 00130 // SetupNormalization(block, NULL, NULL, 00131 // box.x_middle(), box.bottom(), 00132 // kBlnXHeight / x_height, kBlnXHeight / x_height, 00133 // 0, kBlnBaselineOffset); 00134 // * Anisotropic character normalization used by IntFx. 00135 // SetupNormalization(NULL, NULL, denorm, 00136 // centroid_x, centroid_y, 00137 // 51.2 / ry, 51.2 / rx, 128, 128); 00138 // * Normalize blob height to x-height (current OSD): 00139 // SetupNormalization(NULL, &rotation, NULL, 00140 // box.rotational_x_middle(rotation), 00141 // box.rotational_y_middle(rotation), 00142 // kBlnXHeight / box.rotational_height(rotation), 00143 // kBlnXHeight / box.rotational_height(rotation), 00144 // 0, kBlnBaselineOffset); 00145 // * Secondary normalization for classification rotation (current): 00146 // FCOORD rotation = block->classify_rotation(); 00147 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio; 00148 // SetupNormalization(NULL, &rotation, denorm, 00149 // box.rotational_x_middle(rotation), 00150 // box.rotational_y_middle(rotation), 00151 // target_height / box.rotational_height(rotation), 00152 // target_height / box.rotational_height(rotation), 00153 // 0, kBlnBaselineOffset); 00154 // * Proposed new normalizations for CJK: Between them there is then 00155 // no need for further normalization at all, and the character fills the cell. 00156 // ** Replacement for baseline normalization on a word: 00157 // Scales height and width independently so that modal height and pitch 00158 // fill the cell respectively. 00159 // float cap_height = x_height / CCStruct::kXHeightCapRatio; 00160 // SetupNormalization(block, NULL, NULL, 00161 // box.x_middle(), cap_height / 2.0f, 00162 // kBlnCellHeight / fixed_pitch, 00163 // kBlnCellHeight / cap_height, 00164 // 0, 0); 00165 // ** Secondary normalization for classification (with rotation) (proposed): 00166 // Requires a simple translation to the center of the appropriate character 00167 // cell, no further scaling and a simple rotation (or nothing) about the 00168 // cell center. 00169 // FCOORD rotation = block->classify_rotation(); 00170 // SetupNormalization(NULL, &rotation, denorm, 00171 // fixed_pitch_cell_center, 00172 // 0.0f, 00173 // 1.0f, 00174 // 1.0f, 00175 // 0, 0); 00176 void SetupNormalization(const BLOCK* block, 00177 const FCOORD* rotation, 00178 const DENORM* predecessor, 00179 float x_origin, float y_origin, 00180 float x_scale, float y_scale, 00181 float final_xshift, float final_yshift); 00182 00183 // Sets up the DENORM to execute a non-linear transformation based on 00184 // preserving an even distribution of stroke edges. The transformation 00185 // operates only within the given box, scaling input coords within the box 00186 // non-linearly to a box of target_width by target_height, with all other 00187 // coords being clipped to the box edge. As with SetupNormalization above, 00188 // final_xshift and final_yshift are applied after scaling, and the bottom- 00189 // left of box is used as a pre-scaling origin. 00190 // x_coords is a collection of the x-coords of vertical edges for each 00191 // y-coord starting at box.bottom(). 00192 // y_coords is a collection of the y-coords of horizontal edges for each 00193 // x-coord starting at box.left(). 00194 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom. 00195 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1. 00196 // The second-level vectors must all be sorted in ascending order. 00197 void SetupNonLinear(const DENORM* predecessor, const TBOX& box, 00198 float target_width, float target_height, 00199 float final_xshift, float final_yshift, 00200 const GenericVector<GenericVector<int> >& x_coords, 00201 const GenericVector<GenericVector<int> >& y_coords); 00202 00203 // Transforms the given coords one step forward to normalized space, without 00204 // using any block rotation or predecessor. 00205 void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const; 00206 void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const; 00207 // Transforms the given coords forward to normalized space using the 00208 // full transformation sequence defined by the block rotation, the 00209 // predecessors, deepest first, and finally this. If first_norm is not NULL, 00210 // then the first and deepest transformation used is first_norm, ending 00211 // with this, and the block rotation will not be applied. 00212 void NormTransform(const DENORM* first_norm, const TPOINT& pt, 00213 TPOINT* transformed) const; 00214 void NormTransform(const DENORM* first_norm, const FCOORD& pt, 00215 FCOORD* transformed) const; 00216 // Transforms the given coords one step back to source space, without 00217 // using to any block rotation or predecessor. 00218 void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const; 00219 void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const; 00220 // Transforms the given coords all the way back to source image space using 00221 // the full transformation sequence defined by this and its predecesors 00222 // recursively, shallowest first, and finally any block re_rotation. 00223 // If last_denorm is not NULL, then the last transformation used will 00224 // be last_denorm, and the block re_rotation will never be executed. 00225 void DenormTransform(const DENORM* last_denorm, const TPOINT& pt, 00226 TPOINT* original) const; 00227 void DenormTransform(const DENORM* last_denorm, const FCOORD& pt, 00228 FCOORD* original) const; 00229 00230 // Normalize a blob using blob transformations. Less accurate, but 00231 // more accurately copies the old way. 00232 void LocalNormBlob(TBLOB* blob) const; 00233 00234 // Fills in the x-height range accepted by the given unichar_id in blob 00235 // coordinates, given its bounding box in the usual baseline-normalized 00236 // coordinates, with some initial crude x-height estimate (such as word 00237 // size) and this denoting the transformation that was used. 00238 // Also returns the amount the character must have shifted up or down. 00239 void XHeightRange(int unichar_id, const UNICHARSET& unicharset, 00240 const TBOX& bbox, 00241 float* min_xht, 00242 float* max_xht, 00243 float* yshift) const; 00244 00245 // Prints the content of the DENORM for debug purposes. 00246 void Print() const; 00247 00248 Pix* pix() const { 00249 return pix_; 00250 } 00251 void set_pix(Pix* pix) { 00252 pix_ = pix; 00253 } 00254 bool inverse() const { 00255 return inverse_; 00256 } 00257 void set_inverse(bool value) { 00258 inverse_ = value; 00259 } 00260 const DENORM* RootDenorm() const { 00261 if (predecessor_ != NULL) 00262 return predecessor_->RootDenorm(); 00263 return this; 00264 } 00265 const DENORM* predecessor() const { 00266 return predecessor_; 00267 } 00268 // Accessors - perhaps should not be needed. 00269 float x_scale() const { 00270 return x_scale_; 00271 } 00272 float y_scale() const { 00273 return y_scale_; 00274 } 00275 const BLOCK* block() const { 00276 return block_; 00277 } 00278 void set_block(const BLOCK* block) { 00279 block_ = block; 00280 } 00281 00282 private: 00283 // Free allocated memory and clear pointers. 00284 void Clear(); 00285 // Setup default values. 00286 void Init(); 00287 00288 // Best available image. 00289 Pix* pix_; 00290 // True if the source image is white-on-black. 00291 bool inverse_; 00292 // Block the word came from. If not null, block->re_rotation() takes the 00293 // "untransformed" coordinates even further back to the original image. 00294 // Used only on the first DENORM in a chain. 00295 const BLOCK* block_; 00296 // Rotation to apply between translation to the origin and scaling. 00297 const FCOORD* rotation_; 00298 // Previous transformation in a chain. 00299 const DENORM* predecessor_; 00300 // Non-linear transformation maps directly from each integer offset from the 00301 // origin to the corresponding x-coord. Owned by the DENORM. 00302 GenericVector<float>* x_map_; 00303 // Non-linear transformation maps directly from each integer offset from the 00304 // origin to the corresponding y-coord. Owned by the DENORM. 00305 GenericVector<float>* y_map_; 00306 // x-coordinate to be mapped to final_xshift_ in the result. 00307 float x_origin_; 00308 // y-coordinate to be mapped to final_yshift_ in the result. 00309 float y_origin_; 00310 // Scale factors for x and y coords. Applied to pre-rotation system. 00311 float x_scale_; 00312 float y_scale_; 00313 // Destination coords of the x_origin_ and y_origin_. 00314 float final_xshift_; 00315 float final_yshift_; 00316 }; 00317 #endif