tesseract
3.03
|
00001 /****************************************************************************** 00002 ** Filename: mfoutline.c 00003 ** Purpose: Interface to outline struct used for extracting features 00004 ** Author: Dan Johnson 00005 ** History: Thu May 17 08:14:18 1990, DSJ, Created. 00006 ** 00007 ** (c) Copyright Hewlett-Packard Company, 1988. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 ******************************************************************************/ 00018 /*---------------------------------------------------------------------------- 00019 Include Files and Type Defines 00020 ----------------------------------------------------------------------------*/ 00021 #include "clusttool.h" //If remove you get cought in a loop somewhere 00022 #include "emalloc.h" 00023 #include "mfoutline.h" 00024 #include "blobs.h" 00025 #include "const.h" 00026 #include "mfx.h" 00027 #include "params.h" 00028 #include "classify.h" 00029 00030 #include <math.h> 00031 #include <stdio.h> 00032 00033 #define MIN_INERTIA (0.00001) 00034 00035 /*---------------------------------------------------------------------------- 00036 Public Code 00037 ----------------------------------------------------------------------------*/ 00038 00039 /*---------------------------------------------------------------------------*/ 00040 // Convert a blob into a list of MFOUTLINEs (float-based microfeature format). 00041 LIST ConvertBlob(TBLOB *blob) { 00042 LIST outlines = NIL_LIST; 00043 return (blob == NULL) 00044 ? NIL_LIST 00045 : ConvertOutlines(blob->outlines, outlines, outer); 00046 } 00047 00048 00049 /*---------------------------------------------------------------------------*/ 00050 // Convert a TESSLINE into the float-based MFOUTLINE micro-feature format. 00051 MFOUTLINE ConvertOutline(TESSLINE *outline) { 00052 MFEDGEPT *NewPoint; 00053 MFOUTLINE MFOutline = NIL_LIST; 00054 EDGEPT *EdgePoint; 00055 EDGEPT *StartPoint; 00056 EDGEPT *NextPoint; 00057 00058 if (outline == NULL || outline->loop == NULL) 00059 return MFOutline; 00060 00061 StartPoint = outline->loop; 00062 EdgePoint = StartPoint; 00063 do { 00064 NextPoint = EdgePoint->next; 00065 00066 /* filter out duplicate points */ 00067 if (EdgePoint->pos.x != NextPoint->pos.x || 00068 EdgePoint->pos.y != NextPoint->pos.y) { 00069 NewPoint = NewEdgePoint(); 00070 ClearMark(NewPoint); 00071 NewPoint->Hidden = EdgePoint->IsHidden(); 00072 NewPoint->Point.x = EdgePoint->pos.x; 00073 NewPoint->Point.y = EdgePoint->pos.y; 00074 MFOutline = push(MFOutline, NewPoint); 00075 } 00076 EdgePoint = NextPoint; 00077 } while (EdgePoint != StartPoint); 00078 00079 if (MFOutline != NULL) 00080 MakeOutlineCircular(MFOutline); 00081 return MFOutline; 00082 } 00083 00084 00085 /*---------------------------------------------------------------------------*/ 00086 // Convert a tree of outlines to a list of MFOUTLINEs (lists of MFEDGEPTs). 00087 // 00088 // Parameters: 00089 // outline first outline to be converted 00090 // mf_outlines list to add converted outlines to 00091 // outline_type are the outlines outer or holes? 00092 LIST ConvertOutlines(TESSLINE *outline, 00093 LIST mf_outlines, 00094 OUTLINETYPE outline_type) { 00095 MFOUTLINE mf_outline; 00096 00097 while (outline != NULL) { 00098 mf_outline = ConvertOutline(outline); 00099 if (mf_outline != NULL) 00100 mf_outlines = push(mf_outlines, mf_outline); 00101 outline = outline->next; 00102 } 00103 return mf_outlines; 00104 } 00105 00106 /*---------------------------------------------------------------------------*/ 00107 void FindDirectionChanges(MFOUTLINE Outline, 00108 FLOAT32 MinSlope, 00109 FLOAT32 MaxSlope) { 00110 /* 00111 ** Parameters: 00112 ** Outline micro-feature outline to analyze 00113 ** MinSlope controls "snapping" of segments to horizontal 00114 ** MaxSlope controls "snapping" of segments to vertical 00115 ** Globals: none 00116 ** Operation: 00117 ** This routine searches thru the specified outline, computes 00118 ** a slope for each vector in the outline, and marks each 00119 ** vector as having one of the following directions: 00120 ** N, S, E, W, NE, NW, SE, SW 00121 ** This information is then stored in the outline and the 00122 ** outline is returned. 00123 ** Return: none 00124 ** Exceptions: none 00125 ** History: 7/21/89, DSJ, Created. 00126 */ 00127 MFEDGEPT *Current; 00128 MFEDGEPT *Last; 00129 MFOUTLINE EdgePoint; 00130 00131 if (DegenerateOutline (Outline)) 00132 return; 00133 00134 Last = PointAt (Outline); 00135 Outline = NextPointAfter (Outline); 00136 EdgePoint = Outline; 00137 do { 00138 Current = PointAt (EdgePoint); 00139 ComputeDirection(Last, Current, MinSlope, MaxSlope); 00140 00141 Last = Current; 00142 EdgePoint = NextPointAfter (EdgePoint); 00143 } 00144 while (EdgePoint != Outline); 00145 00146 } /* FindDirectionChanges */ 00147 00148 00149 /*---------------------------------------------------------------------------*/ 00150 void FreeMFOutline(void *arg) { //MFOUTLINE Outline) 00151 /* 00152 ** Parameters: 00153 ** Outline micro-feature outline to be freed 00154 ** Globals: none 00155 ** Operation: 00156 ** This routine deallocates all of the memory consumed by 00157 ** a micro-feature outline. 00158 ** Return: none 00159 ** Exceptions: none 00160 ** History: 7/27/89, DSJ, Created. 00161 */ 00162 MFOUTLINE Start; 00163 MFOUTLINE Outline = (MFOUTLINE) arg; 00164 00165 /* break the circular outline so we can use std. techniques to deallocate */ 00166 Start = list_rest (Outline); 00167 set_rest(Outline, NIL_LIST); 00168 while (Start != NULL) { 00169 free_struct (first_node (Start), sizeof (MFEDGEPT), "MFEDGEPT"); 00170 Start = pop (Start); 00171 } 00172 00173 } /* FreeMFOutline */ 00174 00175 00176 /*---------------------------------------------------------------------------*/ 00177 void FreeOutlines(LIST Outlines) { 00178 /* 00179 ** Parameters: 00180 ** Outlines list of mf-outlines to be freed 00181 ** Globals: none 00182 ** Operation: Release all memory consumed by the specified list 00183 ** of outlines. 00184 ** Return: none 00185 ** Exceptions: none 00186 ** History: Thu Dec 13 16:14:50 1990, DSJ, Created. 00187 */ 00188 destroy_nodes(Outlines, FreeMFOutline); 00189 } /* FreeOutlines */ 00190 00191 00192 /*---------------------------------------------------------------------------*/ 00193 void MarkDirectionChanges(MFOUTLINE Outline) { 00194 /* 00195 ** Parameters: 00196 ** Outline micro-feature outline to analyze 00197 ** Globals: none 00198 ** Operation: 00199 ** This routine searches thru the specified outline and finds 00200 ** the points at which the outline changes direction. These 00201 ** points are then marked as "extremities". This routine is 00202 ** used as an alternative to FindExtremities(). It forces the 00203 ** endpoints of the microfeatures to be at the direction 00204 ** changes rather than at the midpoint between direction 00205 ** changes. 00206 ** Return: none 00207 ** Exceptions: none 00208 ** History: 6/29/90, DSJ, Created. 00209 */ 00210 MFOUTLINE Current; 00211 MFOUTLINE Last; 00212 MFOUTLINE First; 00213 00214 if (DegenerateOutline (Outline)) 00215 return; 00216 00217 First = NextDirectionChange (Outline); 00218 Last = First; 00219 do { 00220 Current = NextDirectionChange (Last); 00221 MarkPoint (PointAt (Current)); 00222 Last = Current; 00223 } 00224 while (Last != First); 00225 00226 } /* MarkDirectionChanges */ 00227 00228 00229 /*---------------------------------------------------------------------------*/ 00230 // Return a new edge point for a micro-feature outline. 00231 MFEDGEPT *NewEdgePoint() { 00232 return ((MFEDGEPT *) alloc_struct(sizeof(MFEDGEPT), "MFEDGEPT")); 00233 } 00234 00235 00236 /*---------------------------------------------------------------------------*/ 00237 MFOUTLINE NextExtremity(MFOUTLINE EdgePoint) { 00238 /* 00239 ** Parameters: 00240 ** EdgePoint start search from this point 00241 ** Globals: none 00242 ** Operation: 00243 ** This routine returns the next point in the micro-feature 00244 ** outline that is an extremity. The search starts after 00245 ** EdgePoint. The routine assumes that the outline being 00246 ** searched is not a degenerate outline (i.e. it must have 00247 ** 2 or more edge points). 00248 ** Return: Next extremity in the outline after EdgePoint. 00249 ** Exceptions: none 00250 ** History: 7/26/89, DSJ, Created. 00251 */ 00252 EdgePoint = NextPointAfter(EdgePoint); 00253 while (!PointAt(EdgePoint)->ExtremityMark) 00254 EdgePoint = NextPointAfter(EdgePoint); 00255 00256 return (EdgePoint); 00257 00258 } /* NextExtremity */ 00259 00260 00261 /*---------------------------------------------------------------------------*/ 00262 void NormalizeOutline(MFOUTLINE Outline, 00263 FLOAT32 XOrigin) { 00264 /* 00265 ** Parameters: 00266 ** Outline outline to be normalized 00267 ** XOrigin x-origin of text 00268 ** Globals: none 00269 ** Operation: 00270 ** This routine normalizes the coordinates of the specified 00271 ** outline so that the outline is deskewed down to the 00272 ** baseline, translated so that x=0 is at XOrigin, and scaled 00273 ** so that the height of a character cell from descender to 00274 ** ascender is 1. Of this height, 0.25 is for the descender, 00275 ** 0.25 for the ascender, and 0.5 for the x-height. The 00276 ** y coordinate of the baseline is 0. 00277 ** Return: none 00278 ** Exceptions: none 00279 ** History: 8/2/89, DSJ, Created. 00280 */ 00281 if (Outline == NIL_LIST) 00282 return; 00283 00284 MFOUTLINE EdgePoint = Outline; 00285 do { 00286 MFEDGEPT *Current = PointAt(EdgePoint); 00287 Current->Point.y = MF_SCALE_FACTOR * 00288 (Current->Point.y - kBlnBaselineOffset); 00289 Current->Point.x = MF_SCALE_FACTOR * (Current->Point.x - XOrigin); 00290 EdgePoint = NextPointAfter(EdgePoint); 00291 } while (EdgePoint != Outline); 00292 } /* NormalizeOutline */ 00293 00294 00295 /*---------------------------------------------------------------------------*/ 00296 namespace tesseract { 00297 void Classify::NormalizeOutlines(LIST Outlines, 00298 FLOAT32 *XScale, 00299 FLOAT32 *YScale) { 00300 /* 00301 ** Parameters: 00302 ** Outlines list of outlines to be normalized 00303 ** XScale x-direction scale factor used by routine 00304 ** YScale y-direction scale factor used by routine 00305 ** Globals: 00306 ** classify_norm_method method being used for normalization 00307 ** classify_char_norm_range map radius of gyration to this value 00308 ** Operation: This routine normalizes every outline in Outlines 00309 ** according to the currently selected normalization method. 00310 ** It also returns the scale factors that it used to do this 00311 ** scaling. The scale factors returned represent the x and 00312 ** y sizes in the normalized coordinate system that correspond 00313 ** to 1 pixel in the original coordinate system. 00314 ** Return: none (Outlines are changed and XScale and YScale are updated) 00315 ** Exceptions: none 00316 ** History: Fri Dec 14 08:14:55 1990, DSJ, Created. 00317 */ 00318 MFOUTLINE Outline; 00319 00320 switch (classify_norm_method) { 00321 case character: 00322 ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?"); 00323 break; 00324 00325 case baseline: 00326 iterate(Outlines) { 00327 Outline = (MFOUTLINE) first_node(Outlines); 00328 NormalizeOutline(Outline, 0.0); 00329 } 00330 *XScale = *YScale = MF_SCALE_FACTOR; 00331 break; 00332 } 00333 } /* NormalizeOutlines */ 00334 } // namespace tesseract 00335 00339 /*---------------------------------------------------------------------------*/ 00340 void ChangeDirection(MFOUTLINE Start, MFOUTLINE End, DIRECTION Direction) { 00341 /* 00342 ** Parameters: 00343 ** Start, End defines segment of outline to be modified 00344 ** Direction new direction to assign to segment 00345 ** Globals: none 00346 ** Operation: Change the direction of every vector in the specified 00347 ** outline segment to Direction. The segment to be changed 00348 ** starts at Start and ends at End. Note that the previous 00349 ** direction of End must also be changed to reflect the 00350 ** change in direction of the point before it. 00351 ** Return: none 00352 ** Exceptions: none 00353 ** History: Fri May 4 10:42:04 1990, DSJ, Created. 00354 */ 00355 MFOUTLINE Current; 00356 00357 for (Current = Start; Current != End; Current = NextPointAfter (Current)) 00358 PointAt (Current)->Direction = Direction; 00359 00360 PointAt (End)->PreviousDirection = Direction; 00361 00362 } /* ChangeDirection */ 00363 00364 00365 /*---------------------------------------------------------------------------*/ 00366 void CharNormalizeOutline(MFOUTLINE Outline, const DENORM& cn_denorm) { 00367 /* 00368 ** Parameters: 00369 ** Outline outline to be character normalized 00370 ** XCenter, YCenter center point for normalization 00371 ** XScale, YScale scale factors for normalization 00372 ** Globals: none 00373 ** Operation: This routine normalizes each point in Outline by 00374 ** translating it to the specified center and scaling it 00375 ** anisotropically according to the given scale factors. 00376 ** Return: none 00377 ** Exceptions: none 00378 ** History: Fri Dec 14 10:27:11 1990, DSJ, Created. 00379 */ 00380 MFOUTLINE First, Current; 00381 MFEDGEPT *CurrentPoint; 00382 00383 if (Outline == NIL_LIST) 00384 return; 00385 00386 First = Outline; 00387 Current = First; 00388 do { 00389 CurrentPoint = PointAt(Current); 00390 FCOORD pos(CurrentPoint->Point.x, CurrentPoint->Point.y); 00391 cn_denorm.LocalNormTransform(pos, &pos); 00392 CurrentPoint->Point.x = (pos.x() - MAX_UINT8 / 2) * MF_SCALE_FACTOR; 00393 CurrentPoint->Point.y = (pos.y() - MAX_UINT8 / 2) * MF_SCALE_FACTOR; 00394 00395 Current = NextPointAfter(Current); 00396 } 00397 while (Current != First); 00398 00399 } /* CharNormalizeOutline */ 00400 00401 00402 /*---------------------------------------------------------------------------*/ 00403 void ComputeDirection(MFEDGEPT *Start, 00404 MFEDGEPT *Finish, 00405 FLOAT32 MinSlope, 00406 FLOAT32 MaxSlope) { 00407 /* 00408 ** Parameters: 00409 ** Start starting point to compute direction from 00410 ** Finish finishing point to compute direction to 00411 ** MinSlope slope below which lines are horizontal 00412 ** MaxSlope slope above which lines are vertical 00413 ** Globals: none 00414 ** Operation: 00415 ** This routine computes the slope from Start to Finish and 00416 ** and then computes the approximate direction of the line 00417 ** segment from Start to Finish. The direction is quantized 00418 ** into 8 buckets: 00419 ** N, S, E, W, NE, NW, SE, SW 00420 ** Both the slope and the direction are then stored into 00421 ** the appropriate fields of the Start edge point. The 00422 ** direction is also stored into the PreviousDirection field 00423 ** of the Finish edge point. 00424 ** Return: none 00425 ** Exceptions: none 00426 ** History: 7/25/89, DSJ, Created. 00427 */ 00428 FVECTOR Delta; 00429 00430 Delta.x = Finish->Point.x - Start->Point.x; 00431 Delta.y = Finish->Point.y - Start->Point.y; 00432 if (Delta.x == 0) 00433 if (Delta.y < 0) { 00434 Start->Slope = -MAX_FLOAT32; 00435 Start->Direction = south; 00436 } 00437 else { 00438 Start->Slope = MAX_FLOAT32; 00439 Start->Direction = north; 00440 } 00441 else { 00442 Start->Slope = Delta.y / Delta.x; 00443 if (Delta.x > 0) 00444 if (Delta.y > 0) 00445 if (Start->Slope > MinSlope) 00446 if (Start->Slope < MaxSlope) 00447 Start->Direction = northeast; 00448 else 00449 Start->Direction = north; 00450 else 00451 Start->Direction = east; 00452 else if (Start->Slope < -MinSlope) 00453 if (Start->Slope > -MaxSlope) 00454 Start->Direction = southeast; 00455 else 00456 Start->Direction = south; 00457 else 00458 Start->Direction = east; 00459 else if (Delta.y > 0) 00460 if (Start->Slope < -MinSlope) 00461 if (Start->Slope > -MaxSlope) 00462 Start->Direction = northwest; 00463 else 00464 Start->Direction = north; 00465 else 00466 Start->Direction = west; 00467 else if (Start->Slope > MinSlope) 00468 if (Start->Slope < MaxSlope) 00469 Start->Direction = southwest; 00470 else 00471 Start->Direction = south; 00472 else 00473 Start->Direction = west; 00474 } 00475 Finish->PreviousDirection = Start->Direction; 00476 } /* ComputeDirection */ 00477 00478 00479 /*---------------------------------------------------------------------------*/ 00480 void FinishOutlineStats(register OUTLINE_STATS *OutlineStats) { 00481 /* 00482 ** Parameters: 00483 ** OutlineStats statistics about a set of outlines 00484 ** Globals: none 00485 ** Operation: Use the preliminary statistics accumulated in OutlineStats 00486 ** to compute the final statistics. 00487 ** (see Dan Johnson's Tesseract lab 00488 ** notebook #2, pgs. 74-78). 00489 ** Return: none 00490 ** Exceptions: none 00491 ** History: Fri Dec 14 10:13:36 1990, DSJ, Created. 00492 */ 00493 OutlineStats->x = 0.5 * OutlineStats->My / OutlineStats->L; 00494 OutlineStats->y = 0.5 * OutlineStats->Mx / OutlineStats->L; 00495 00496 OutlineStats->Ix = (OutlineStats->Ix / 3.0 - 00497 OutlineStats->y * OutlineStats->Mx + 00498 OutlineStats->y * OutlineStats->y * OutlineStats->L); 00499 00500 OutlineStats->Iy = (OutlineStats->Iy / 3.0 - 00501 OutlineStats->x * OutlineStats->My + 00502 OutlineStats->x * OutlineStats->x * OutlineStats->L); 00503 00504 /* Ix and/or Iy could possibly be negative due to roundoff error */ 00505 if (OutlineStats->Ix < 0.0) 00506 OutlineStats->Ix = MIN_INERTIA; 00507 if (OutlineStats->Iy < 0.0) 00508 OutlineStats->Iy = MIN_INERTIA; 00509 00510 OutlineStats->Rx = sqrt (OutlineStats->Ix / OutlineStats->L); 00511 OutlineStats->Ry = sqrt (OutlineStats->Iy / OutlineStats->L); 00512 00513 OutlineStats->Mx *= 0.5; 00514 OutlineStats->My *= 0.5; 00515 00516 } /* FinishOutlineStats */ 00517 00518 00519 /*---------------------------------------------------------------------------*/ 00520 void InitOutlineStats(OUTLINE_STATS *OutlineStats) { 00521 /* 00522 ** Parameters: 00523 ** OutlineStats stats data structure to be initialized 00524 ** Globals: none 00525 ** Operation: Initialize the outline statistics data structure so 00526 ** that it is ready to start accumulating statistics. 00527 ** Return: none 00528 ** Exceptions: none 00529 ** History: Fri Dec 14 08:55:22 1990, DSJ, Created. 00530 */ 00531 OutlineStats->Mx = 0.0; 00532 OutlineStats->My = 0.0; 00533 OutlineStats->L = 0.0; 00534 OutlineStats->x = 0.0; 00535 OutlineStats->y = 0.0; 00536 OutlineStats->Ix = 0.0; 00537 OutlineStats->Iy = 0.0; 00538 OutlineStats->Rx = 0.0; 00539 OutlineStats->Ry = 0.0; 00540 } /* InitOutlineStats */ 00541 00542 00543 /*---------------------------------------------------------------------------*/ 00544 MFOUTLINE NextDirectionChange(MFOUTLINE EdgePoint) { 00545 /* 00546 ** Parameters: 00547 ** EdgePoint start search from this point 00548 ** Globals: none 00549 ** Operation: 00550 ** This routine returns the next point in the micro-feature 00551 ** outline that has a direction different than EdgePoint. The 00552 ** routine assumes that the outline being searched is not a 00553 ** degenerate outline (i.e. it must have 2 or more edge points). 00554 ** Return: Point of next direction change in micro-feature outline. 00555 ** Exceptions: none 00556 ** History: 7/25/89, DSJ, Created. 00557 */ 00558 DIRECTION InitialDirection; 00559 00560 InitialDirection = PointAt (EdgePoint)->Direction; 00561 00562 MFOUTLINE next_pt = NULL; 00563 do { 00564 EdgePoint = NextPointAfter(EdgePoint); 00565 next_pt = NextPointAfter(EdgePoint); 00566 } while (PointAt(EdgePoint)->Direction == InitialDirection && 00567 !PointAt(EdgePoint)->Hidden && 00568 next_pt != NULL && !PointAt(next_pt)->Hidden); 00569 00570 return (EdgePoint); 00571 } /* NextDirectionChange */ 00572 00573 00574 /*---------------------------------------------------------------------------*/ 00575 void UpdateOutlineStats(register OUTLINE_STATS *OutlineStats, 00576 register FLOAT32 x1, 00577 register FLOAT32 x2, 00578 register FLOAT32 y1, 00579 register FLOAT32 y2) { 00580 /* 00581 ** Parameters: 00582 ** OutlineStats statistics to add this segment to 00583 ** x1, y1, x2, y2 segment to be added to statistics 00584 ** Globals: none 00585 ** Operation: This routine adds the statistics for the specified 00586 ** line segment to OutlineStats. The statistics that are 00587 ** kept are: 00588 ** sum of length of all segments 00589 ** sum of 2*Mx for all segments 00590 ** sum of 2*My for all segments 00591 ** sum of 2*Mx*(y1+y2) - L*y1*y2 for all segments 00592 ** sum of 2*My*(x1+x2) - L*x1*x2 for all segments 00593 ** These numbers, once collected can later be used to easily 00594 ** compute the center of mass, first and second moments, 00595 ** and radii of gyration. (see Dan Johnson's Tesseract lab 00596 ** notebook #2, pgs. 74-78). 00597 ** Return: none 00598 ** Exceptions: none 00599 ** History: Fri Dec 14 08:59:17 1990, DSJ, Created. 00600 */ 00601 register FLOAT64 L; 00602 register FLOAT64 Mx2; 00603 register FLOAT64 My2; 00604 00605 /* compute length of segment */ 00606 L = sqrt ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)); 00607 OutlineStats->L += L; 00608 00609 /* compute 2Mx and 2My components */ 00610 Mx2 = L * (y1 + y2); 00611 My2 = L * (x1 + x2); 00612 OutlineStats->Mx += Mx2; 00613 OutlineStats->My += My2; 00614 00615 /* compute second moment component */ 00616 OutlineStats->Ix += Mx2 * (y1 + y2) - L * y1 * y2; 00617 OutlineStats->Iy += My2 * (x1 + x2) - L * x1 * x2; 00618 00619 } /* UpdateOutlineStats */