#include "mfcpch.h"
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "elst.h"
#include "notdll.h"
#include "params.h"
#include "gap_map.h"
#include "publictypes.h"
#include "tordmain.h"
#include "fpchop.h"
#include "ocrblock.h"
#include "blobs.h"
#include "tprintf.h"
#include "tovars.h"
Namespaces | |
namespace | tesseract |
Defines | |
#define | MAX_HEIGHT_MODES 12 |
Functions | |
float | MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it) |
make_single_row | |
float | make_single_row (ICOORD page_tr, TO_BLOCK *block, TO_BLOCK_LIST *blocks) |
make_rows | |
float | make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks) |
make_initial_textrows | |
void | make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on) |
fit_lms_line | |
void | fit_lms_line (TO_ROW *row) |
find_best_dropout_row | |
BOOL8 | find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on) |
deskew_block_coords | |
TBOX | deskew_block_coords (TO_BLOCK *block, float gradient) |
compute_line_occupation | |
void | compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas) |
void | compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds) |
compute_dropout_distances | |
void | compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count) |
expand_rows | |
void | expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
void | adjust_row_limits (TO_BLOCK *block) |
compute_row_stats | |
void | compute_row_stats (TO_BLOCK *block, BOOL8 testing_on) |
fill_heights | |
void | fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights) |
compute_xheight_from_modes | |
Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, `, ", ', etc) If cap_only, then force finding of only the top mode. | |
int | compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise) |
compute_row_descdrop | |
Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise. | |
inT32 | compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights) |
compute_height_modes | |
inT32 | compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes) |
correct_row_xheight | |
void | correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop) |
separate_underlines | |
void | separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on) |
pre_associate_blobs | |
void | pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on) |
fit_parallel_rows | |
void | fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
fit_parallel_lms | |
void | fit_parallel_lms (float gradient, TO_ROW *row) |
make_baseline_spline | |
void | make_baseline_spline (TO_ROW *row, TO_BLOCK *block) |
segment_baseline | |
BOOL8 | segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[]) |
linear_spline_baseline | |
double * | linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[]) |
assign_blobs_to_rows | |
void | assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew) |
most_overlapping_row | |
OVERLAP_STATE | most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob) |
blob_x_order | |
int | blob_x_order (const void *item1, const void *item2) |
row_y_order | |
int | row_y_order (const void *item1, const void *item2) |
row_spacing_order | |
int | row_spacing_order (const void *item1, const void *item2) |
mark_repeated_chars | |
Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX. | |
void | mark_repeated_chars (TO_ROW *row) |
Variables | |
bool | textord_heavy_nr = FALSE |
bool | textord_show_initial_rows = FALSE |
bool | textord_show_parallel_rows = FALSE |
bool | textord_show_expanded_rows = FALSE |
bool | textord_show_final_rows = FALSE |
bool | textord_show_final_blobs = FALSE |
bool | textord_test_landscape = FALSE |
bool | textord_parallel_baselines = TRUE |
bool | textord_straight_baselines = FALSE |
bool | textord_old_baselines = TRUE |
bool | textord_old_xheight = FALSE |
bool | textord_fix_xheight_bug = TRUE |
bool | textord_fix_makerow_bug = TRUE |
bool | textord_debug_xheights = FALSE |
bool | textord_biased_skewcalc = TRUE |
bool | textord_interpolating_skew = TRUE |
int | textord_skewsmooth_offset = 2 |
int | textord_skewsmooth_offset2 = 1 |
int | textord_test_x = -1 |
int | textord_test_y = -1 |
int | textord_min_blobs_in_row = 4 |
int | textord_spline_minblobs = 8 |
int | textord_spline_medianwin = 6 |
int | textord_max_blob_overlaps = 4 |
int | textord_min_xheight = 10 |
double | textord_spline_shift_fraction = 0.02 |
double | textord_spline_outlier_fraction = 0.1 |
double | textord_skew_ile = 0.5 |
double | textord_skew_lag = 0.01 |
double | textord_linespace_iqrlimit = 0.2 |
double | textord_width_limit = 8 |
double | textord_chop_width = 1.5 |
double | textord_expansion_factor = 1.0 |
double | textord_overlap_x = 0.5 |
double | textord_minxh = 0.25 |
double | textord_min_linesize = 1.25 |
double | textord_excess_blobsize = 1.3 |
double | textord_occupancy_threshold = 0.4 |
double | textord_underline_width = 2.0 |
double | textord_min_blob_height_fraction = 0.75 |
double | textord_xheight_mode_fraction = 0.4 |
double | textord_ascheight_mode_fraction = 0.08 |
double | textord_descheight_mode_fraction = 0.08 |
double | textord_ascx_ratio_min = 1.25 |
double | textord_ascx_ratio_max = 1.8 |
double | textord_descx_ratio_min = 0.25 |
double | textord_descx_ratio_max = 0.6 |
double | textord_xheight_error_margin = 0.1 |
int | textord_lms_line_trials = 12 |
bool | textord_new_initial_xheight = TRUE |
const int | kMinLeaderCount = 5 |
compute_page_skew | |
Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row. | |
const double | kNoiseSize = 0.5 |
const int | kMinSize = 8 |
void | compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err) |
void | cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
void | delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
#define MAX_HEIGHT_MODES 12 |
void adjust_row_limits | ( | TO_BLOCK * | block | ) |
adjust_row_limits
Change the limits of rows to suit the default fractions.
void assign_blobs_to_rows | ( | TO_BLOCK * | block, | |
float * | gradient, | |||
int | pass, | |||
BOOL8 | reject_misses, | |||
BOOL8 | make_new_rows, | |||
BOOL8 | drawing_skew | |||
) |
int blob_x_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
void cleanup_rows_making | ( | ICOORD | page_tr, | |
TO_BLOCK * | block, | |||
float | gradient, | |||
FCOORD | rotation, | |||
inT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
cleanup_rows_making
Remove overlapping rows and fit all the blobs to what's left.
inT32 compute_height_modes | ( | STATS * | heights, | |
inT32 | min_height, | |||
inT32 | max_height, | |||
inT32 * | modes, | |||
inT32 | maxmodes | |||
) |
void compute_line_occupation | ( | TO_BLOCK * | block, | |
float | gradient, | |||
inT32 | min_y, | |||
inT32 | max_y, | |||
inT32 * | occupation, | |||
inT32 * | deltas | |||
) |
void compute_occupation_threshold | ( | inT32 | low_window, | |
inT32 | high_window, | |||
inT32 | line_count, | |||
inT32 * | occupation, | |||
inT32 * | thresholds | |||
) |
compute_occupation_threshold
Compute thresholds for textline or not for the occupation array.
void compute_page_skew | ( | TO_BLOCK_LIST * | blocks, | |
float & | page_m, | |||
float & | page_err | |||
) |
inT32 compute_row_descdrop | ( | TO_ROW * | row, | |
float | gradient, | |||
int | xheight_blob_count, | |||
STATS * | asc_heights | |||
) |
int compute_xheight_from_modes | ( | STATS * | heights, | |
STATS * | floating_heights, | |||
bool | cap_only, | |||
int | min_height, | |||
int | max_height, | |||
float * | xheight, | |||
float * | ascrise | |||
) |
void correct_row_xheight | ( | TO_ROW * | row, | |
float | xheight, | |||
float | ascrise, | |||
float | descdrop | |||
) |
void delete_non_dropout_rows | ( | TO_BLOCK * | block, | |
float | gradient, | |||
FCOORD | rotation, | |||
inT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
delete_non_dropout_rows
Compute the linespacing and offset.
void expand_rows | ( | ICOORD | page_tr, | |
TO_BLOCK * | block, | |||
float | gradient, | |||
FCOORD | rotation, | |||
inT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
void fill_heights | ( | TO_ROW * | row, | |
float | gradient, | |||
int | min_height, | |||
int | max_height, | |||
STATS * | heights, | |||
STATS * | floating_heights | |||
) |
BOOL8 find_best_dropout_row | ( | TO_ROW * | row, | |
inT32 | distance, | |||
float | dist_limit, | |||
inT32 | line_index, | |||
TO_ROW_IT * | row_it, | |||
BOOL8 | testing_on | |||
) |
void fit_lms_line | ( | TO_ROW * | row | ) |
void fit_parallel_lms | ( | float | gradient, | |
TO_ROW * | row | |||
) |
void fit_parallel_rows | ( | TO_BLOCK * | block, | |
float | gradient, | |||
FCOORD | rotation, | |||
inT32 | block_edge, | |||
BOOL8 | testing_on | |||
) |
double* linear_spline_baseline | ( | TO_ROW * | row, | |
TO_BLOCK * | block, | |||
inT32 & | segments, | |||
inT32 | xstarts[] | |||
) |
float make_rows | ( | ICOORD | page_tr, | |
TO_BLOCK_LIST * | port_blocks | |||
) |
void mark_repeated_chars | ( | TO_ROW * | row | ) |
OVERLAP_STATE most_overlapping_row | ( | TO_ROW_IT * | row_it, | |
TO_ROW *& | best_row, | |||
float | top, | |||
float | bottom, | |||
float | rowsize, | |||
BOOL8 | testing_blob | |||
) |
int row_spacing_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
int row_y_order | ( | const void * | item1, | |
const void * | item2 | |||
) |
const int kMinLeaderCount = 5 |
const int kMinSize = 8 |
const double kNoiseSize = 0.5 |
double textord_ascheight_mode_fraction = 0.08 |
"Min pile height to make ascheight"
double textord_ascx_ratio_max = 1.8 |
"Max cap/xheight"
double textord_ascx_ratio_min = 1.25 |
"Min cap/xheight"
bool textord_biased_skewcalc = TRUE |
"Bias skew estimates with line length"
double textord_chop_width = 1.5 |
"Max width before chopping"
bool textord_debug_xheights = FALSE |
"Test xheight algorithms"
double textord_descheight_mode_fraction = 0.08 |
"Min pile height to make descheight"
double textord_descx_ratio_max = 0.6 |
"Max desc/xheight"
double textord_descx_ratio_min = 0.25 |
"Min desc/xheight"
double textord_excess_blobsize = 1.3 |
"New row made if blob makes row this big"
double textord_expansion_factor = 1.0 |
"Factor to expand rows by in expand_rows"
bool textord_fix_makerow_bug = TRUE |
"Prevent multiple baselines"
bool textord_fix_xheight_bug = TRUE |
"Use spline baseline"
bool textord_heavy_nr = FALSE |
"Vigorously remove noise"
bool textord_interpolating_skew = TRUE |
"Interpolate across gaps"
double textord_linespace_iqrlimit = 0.2 |
"Max iqr/median for linespace"
int textord_lms_line_trials = 12 |
"Number of linew fits to do"
int textord_max_blob_overlaps = 4 |
"Max number of blobs a big blob can overlap"
double textord_min_blob_height_fraction = 0.75 |
"Min blob height/top to include blob top into xheight stats"
int textord_min_blobs_in_row = 4 |
"Min blobs before gradient counted"
double textord_min_linesize = 1.25 |
"* blob height for initial linesize"
int textord_min_xheight = 10 |
"Min credible pixel xheight"
double textord_minxh = 0.25 |
"fraction of linesize for min xheight"
bool textord_new_initial_xheight = TRUE |
"Use test xheight mechanism"
double textord_occupancy_threshold = 0.4 |
"Fraction of neighbourhood"
bool textord_old_baselines = TRUE |
"Use old baseline algorithm"
bool textord_old_xheight = FALSE |
"Use old xheight algorithm"
double textord_overlap_x = 0.5 |
"Fraction of linespace for good overlap"
bool textord_parallel_baselines = TRUE |
"Force parallel baselines"
bool textord_show_expanded_rows = FALSE |
"Display rows after expanding"
bool textord_show_final_blobs = FALSE |
"Display blob bounds after pre-ass"
bool textord_show_final_rows = FALSE |
"Display rows after final fitting"
bool textord_show_initial_rows = FALSE |
"Display row accumulation"
bool textord_show_parallel_rows = FALSE |
"Display page correlated rows"
double textord_skew_ile = 0.5 |
"Ile of gradients for page skew"
double textord_skew_lag = 0.01 |
"Lag for skew on row accumulation"
int textord_skewsmooth_offset = 2 |
"For smooth factor"
int textord_skewsmooth_offset2 = 1 |
"For smooth factor"
int textord_spline_medianwin = 6 |
"Size of window for spline segmentation"
int textord_spline_minblobs = 8 |
"Min blobs in each spline segment"
double textord_spline_outlier_fraction = 0.1 |
"Fraction of line spacing for outlier"
double textord_spline_shift_fraction = 0.02 |
"Fraction of line spacing for quad"
bool textord_straight_baselines = FALSE |
"Force straight baselines"
bool textord_test_landscape = FALSE |
"Tests refer to land/port"
int textord_test_x = -1 |
"coord of test pt"
int textord_test_y = -1 |
"coord of test pt"
double textord_underline_width = 2.0 |
"Multiple of line_size for underline"
double textord_width_limit = 8 |
"Max width of blobs to make rows"
double textord_xheight_error_margin = 0.1 |
"Accepted variation"
double textord_xheight_mode_fraction = 0.4 |
"Min pile height to make xheight"