tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/wordrec/associate.cpp
Go to the documentation of this file.
00001 
00002 // File:        associate.cpp
00003 // Description: Functions for scoring segmentation paths according to
00004 //              their character widths, gap widths and seam cuts.
00005 // Author:      Daria Antonova
00006 // Created:     Mon Mar 8 11:26:43 PDT 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 
00022 #include <stdio.h>
00023 #ifdef __UNIX__
00024 #include <assert.h>
00025 #endif
00026 #include <math.h>
00027 
00028 #include "associate.h"
00029 #include "normalis.h"
00030 #include "pageres.h"
00031 
00032 namespace tesseract {
00033 
00034 const float AssociateUtils::kMaxFixedPitchCharAspectRatio = 2.0f;
00035 const float AssociateUtils::kMinGap = 0.03f;
00036 
00037 void AssociateUtils::ComputeStats(int col, int row,
00038                                   const AssociateStats *parent_stats,
00039                                   int parent_path_length,
00040                                   bool fixed_pitch,
00041                                   float max_char_wh_ratio,
00042                                   WERD_RES *word_res,
00043                                   bool debug,
00044                                   AssociateStats *stats) {
00045   stats->Clear();
00046 
00047   ASSERT_HOST(word_res != NULL);
00048   if (word_res->blob_widths.empty()) {
00049     return;
00050   }
00051   if (debug) {
00052     tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n",
00053             col, row, fixed_pitch ? " (fixed pitch)" : "");
00054   }
00055   float normalizing_height = kBlnXHeight;
00056   ROW* blob_row = word_res->blob_row;
00057   // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?
00058   if (fixed_pitch && blob_row != NULL) {
00059     // For fixed pitch language like CJK, we use the full text height
00060     // as the normalizing factor so we are not dependent on xheight
00061     // calculation.
00062     if (blob_row->body_size() > 0.0f) {
00063       normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();
00064     } else {
00065       normalizing_height = word_res->denorm.y_scale() *
00066           (blob_row->x_height() + blob_row->ascenders());
00067     }
00068     if (debug) {
00069       tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n",
00070               normalizing_height, word_res->denorm.y_scale(),
00071               blob_row->x_height(), blob_row->ascenders());
00072     }
00073   }
00074   float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;
00075   if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true;
00076   // Compute the gap sum for this shape. If there are only negative or only
00077   // positive gaps, record their sum in stats->gap_sum. However, if there is
00078   // a mixture, record only the sum of the positive gaps.
00079   // TODO(antonova): explain fragment.
00080   int negative_gap_sum = 0;
00081   for (int c = col; c < row; ++c) {
00082     int gap = word_res->GetBlobsGap(c);
00083     (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;
00084   }
00085   if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum;
00086   if (debug) {
00087     tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n",
00088             wh_ratio, max_char_wh_ratio, stats->gap_sum,
00089             stats->bad_shape ? "bad_shape" : "");
00090   }
00091   // Compute shape_cost (for fixed pitch mode).
00092   if (fixed_pitch) {
00093     bool end_row = (row == (word_res->ratings->dimension() - 1));
00094 
00095     // Ensure that the blob has gaps on the left and the right sides
00096     // (except for beginning and ending punctuation) and that there is
00097     // no cutting through ink at the blob boundaries.
00098     if (col > 0) {
00099       float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;
00100       SEAM *left_seam = word_res->seam_array[col - 1];
00101       if ((!end_row && left_gap < kMinGap) || left_seam->priority > 0.0f) {
00102         stats->bad_shape = true;
00103       }
00104       if (debug) {
00105         tprintf("left_gap %g, left_seam %g %s\n", left_gap, left_seam->priority,
00106                 stats->bad_shape ? "bad_shape" : "");
00107       }
00108     }
00109     float right_gap = 0.0f;
00110     if (!end_row) {
00111       right_gap = word_res->GetBlobsGap(row) / normalizing_height;
00112       SEAM *right_seam = word_res->seam_array[row];
00113       if (right_gap < kMinGap || right_seam->priority > 0.0f) {
00114         stats->bad_shape = true;
00115         if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true;
00116       }
00117       if (debug) {
00118         tprintf("right_gap %g right_seam %g %s\n",
00119                 right_gap, right_seam->priority,
00120                 stats->bad_shape ? "bad_shape" : "");
00121       }
00122     }
00123 
00124     // Impose additional segmentation penalties if blob widths or gaps
00125     // distribution don't fit a fixed-pitch model.
00126     // Since we only know the widths and gaps of the path explored so far,
00127     // the means and variances are computed for the path so far (not
00128     // considering characters to the right of the last character on the path).
00129     stats->full_wh_ratio = wh_ratio + right_gap;
00130     if (parent_stats != NULL) {
00131       stats->full_wh_ratio_total =
00132         (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);
00133       float mean =
00134         stats->full_wh_ratio_total / static_cast<float>(parent_path_length+1);
00135       stats->full_wh_ratio_var =
00136         parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2);
00137     } else {
00138       stats->full_wh_ratio_total = stats->full_wh_ratio;
00139     }
00140     if (debug) {
00141       tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n",
00142               stats->full_wh_ratio, stats->full_wh_ratio_total,
00143               stats->full_wh_ratio_var);
00144     }
00145 
00146     stats->shape_cost =
00147       FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio);
00148 
00149     // For some reason Tesseract prefers to treat the whole CJ words
00150     // as one blob when the initial segmentation is particularly bad.
00151     // This hack is to avoid favoring such states.
00152     if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) {
00153       stats->shape_cost += 10;
00154     }
00155     stats->shape_cost += stats->full_wh_ratio_var;
00156     if (debug) tprintf("shape_cost %g\n", stats->shape_cost);
00157   }
00158 }
00159 
00160 float AssociateUtils::FixedPitchWidthCost(float norm_width,
00161                                           float right_gap,
00162                                           bool end_pos,
00163                                           float max_char_wh_ratio) {
00164   float cost = 0.0f;
00165   if (norm_width > max_char_wh_ratio) cost += norm_width;
00166   if (norm_width > kMaxFixedPitchCharAspectRatio)
00167     cost += norm_width * norm_width;  // extra penalty for merging CJK chars
00168   // Penalize skinny blobs, except for punctuation in the last position.
00169   if (norm_width+right_gap < 0.5f && !end_pos) {
00170     cost += 1.0f - (norm_width + right_gap);
00171   }
00172   return cost;
00173 }
00174 
00175 }  // namespace tesseract
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines