tesseract::Tesseract Class Reference

#include <tesseractclass.h>

Inheritance diagram for tesseract::Tesseract:
tesseract::Wordrec tesseract::Classify tesseract::CCStruct tesseract::CUtil tesseract::CCUtil

List of all members.

Public Member Functions

 Tesseract ()
 ~Tesseract ()
void Clear ()
void ResetAdaptiveClassifier ()
void ResetDocumentDictionary ()
void SetEquationDetect (EquationDetect *detector)
const FCOORDreskew () const
Pix ** mutable_pix_binary ()
Pix * pix_binary () const
Pix * pix_grey () const
void set_pix_grey (Pix *grey_pix)
Pix * BestPix () const
int source_resolution () const
void set_source_resolution (int ppi)
int ImageWidth () const
int ImageHeight () const
Pix * scaled_color () const
int scaled_factor () const
void SetScaledColor (int factor, Pix *color)
const Textordtextord () const
Textordmutable_textord ()
bool right_to_left () const
int num_sub_langs () const
Tesseractget_sub_lang (int index) const
void SetBlackAndWhitelist ()
void PrepareForPageseg ()
void PrepareForTessOCR (BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
int SegmentPage (const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void SetupWordScripts (BLOCK_LIST *blocks)
int AutoPageSeg (bool single_column, bool osd, bool only_osd, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks, Tesseract *osd_tess, OSResults *osr)
ColumnFinderSetupPageSegAndDetectOrientation (bool single_column, bool osd, bool only_osd, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr, TO_BLOCK_LIST *to_blocks, Pix **photo_mask_pix, Pix **music_mask_pix)
bool ProcessTargetWord (const TBOX &word_box, const TBOX &target_word_box, const char *word_config, int pass)
bool recog_all_words (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
void rejection_passes (PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config)
void bigram_correction_pass (PAGE_RES *page_res)
void blamer_pass (PAGE_RES *page_res)
bool RetryWithLanguage (WERD_RES *word, BLOCK *block, ROW *row, WordRecognizer recognizer)
void classify_word_and_language (WordRecognizer recognizer, BLOCK *block, ROW *row, WERD_RES *word)
void classify_word_pass1 (BLOCK *block, ROW *row, WERD_RES *word)
void recog_pseudo_word (PAGE_RES *page_res, TBOX &selection_box)
void fix_rep_char (PAGE_RES_IT *page_res_it)
void ExplodeRepeatedWord (BLOB_CHOICE *best_choice, PAGE_RES_IT *page_res_it)
ACCEPTABLE_WERD_TYPE acceptable_word_string (const UNICHARSET &char_set, const char *s, const char *lengths)
void match_word_pass2 (WERD_RES *word, ROW *row, BLOCK *block)
void classify_word_pass2 (BLOCK *block, ROW *row, WERD_RES *word)
void ReportXhtFixResult (bool accept_new_word, float new_x_ht, WERD_RES *word, WERD_RES *new_word)
bool RunOldFixXht (WERD_RES *word, BLOCK *block, ROW *row)
bool TrainedXheightFix (WERD_RES *word, BLOCK *block, ROW *row)
BOOL8 recog_interactive (BLOCK *block, ROW *row, WERD_RES *word_res)
void set_word_fonts (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
void font_recognition_pass (PAGE_RES *page_res)
BOOL8 check_debug_pt (WERD_RES *word, int location)
bool init_cube_objects (bool load_combiner, TessdataManager *tessdata_manager)
void run_cube_combiner (PAGE_RES *page_res)
void cube_word_pass1 (BLOCK *block, ROW *row, WERD_RES *word)
CubeObjectcube_recognize_word (BLOCK *block, WERD_RES *word)
void cube_combine_word (CubeObject *cube_obj, WERD_RES *cube_word, WERD_RES *tess_word)
bool cube_recognize (CubeObject *cube_obj, BLOCK *block, WERD_RES *word)
void fill_werd_res (const BoxWord &cube_box_word, WERD_CHOICE *cube_werd_choice, const char *cube_best_str, WERD_RES *tess_werd_res)
bool extract_cube_state (CubeObject *cube_obj, int *num_chars, Boxa **char_boxes, CharSamp ***char_samples)
bool create_cube_box_word (Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord *box_word)
void output_pass (PAGE_RES_IT &page_res_it, const TBOX *target_word_box)
void write_results (PAGE_RES_IT &page_res_it, char newline_type, BOOL8 force_eol)
void set_unlv_suspects (WERD_RES *word)
UNICHAR_ID get_rep_char (WERD_RES *word)
BOOL8 acceptable_number_string (const char *s, const char *lengths)
inT16 count_alphanums (const WERD_CHOICE &word)
inT16 count_alphas (const WERD_CHOICE &word)
void read_config_file (const char *filename, SetParamConstraint constraint)
int init_tesseract (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
int init_tesseract (const char *datapath, const char *language, OcrEngineMode oem)
int init_tesseract_internal (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
void SetupUniversalFontIds ()
int init_tesseract_lm (const char *arg0, const char *textbase, const char *language)
void recognize_page (STRING &image_name)
void end_tesseract ()
bool init_tesseract_lang_data (const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params)
void ParseLanguageString (const char *lang_str, GenericVector< STRING > *to_load, GenericVector< STRING > *not_to_load)
SVMenuNodebuild_menu_new ()
void pgeditor_main (int width, int height, PAGE_RES *page_res)
void process_image_event (const SVEvent &event)
BOOL8 process_cmd_win_event (inT32 cmd_event, char *new_value)
void debug_word (PAGE_RES *page_res, const TBOX &selection_box)
void do_re_display (BOOL8(tesseract::Tesseract::*word_painter)(BLOCK *block, ROW *row, WERD_RES *word_res))
BOOL8 word_display (BLOCK *block, ROW *row, WERD_RES *word_res)
BOOL8 word_bln_display (BLOCK *block, ROW *row, WERD_RES *word_res)
BOOL8 word_blank_and_set_display (BLOCK *block, ROW *row, WERD_RES *word_res)
BOOL8 word_set_display (BLOCK *block, ROW *row, WERD_RES *word_res)
BOOL8 word_dumper (BLOCK *block, ROW *row, WERD_RES *word_res)
void make_reject_map (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices, ROW *row, inT16 pass)
BOOL8 one_ell_conflict (WERD_RES *word_res, BOOL8 update_map)
inT16 first_alphanum_index (const char *word, const char *word_lengths)
inT16 first_alphanum_offset (const char *word, const char *word_lengths)
inT16 alpha_count (const char *word, const char *word_lengths)
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
void dont_allow_1Il (WERD_RES *word)
inT16 count_alphanums (WERD_RES *word)
void flip_0O (WERD_RES *word)
BOOL8 non_0_digit (const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
BOOL8 non_O_upper (const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
BOOL8 repeated_nonalphanum_wd (WERD_RES *word, ROW *row)
void nn_match_word (WERD_RES *word, ROW *row)
void nn_recover_rejects (WERD_RES *word, ROW *row)
BOOL8 test_ambig_word (WERD_RES *word)
void set_done (WERD_RES *word, inT16 pass)
inT16 safe_dict_word (const WERD_RES *werd_res)
void flip_hyphens (WERD_RES *word)
void reject_I_1_L (WERD_RES *word)
void reject_edge_blobs (WERD_RES *word)
void reject_mostly_rejects (WERD_RES *word)
BOOL8 word_adaptable (WERD_RES *word, uinT16 mode)
void recog_word_recursive (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
void recog_word (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
void split_and_recog_word (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
void match_current_words (WERD_RES_LIST &words, ROW *row, BLOCK *block)
inT16 fp_eval_word_spacing (WERD_RES_LIST &word_res_list)
void dump_words (WERD_RES_LIST &perm, inT16 score, inT16 mode, BOOL8 improved)
GARBAGE_LEVEL garbage_word (WERD_RES *word, BOOL8 ok_dict_word)
BOOL8 potential_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level, BOOL8 ok_dict_word)
void tilde_crunch (PAGE_RES_IT &page_res_it)
void unrej_good_quality_words (PAGE_RES_IT &page_res_it)
void doc_and_block_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc)
void quality_based_rejection (PAGE_RES_IT &page_res_it, BOOL8 good_quality_doc)
void convert_bad_unlv_chs (WERD_RES *word_res)
void tilde_delete (PAGE_RES_IT &page_res_it)
inT16 word_blob_quality (WERD_RES *word, ROW *row)
void word_char_quality (WERD_RES *word, ROW *row, inT16 *match_count, inT16 *accepted_match_count)
void unrej_good_chs (WERD_RES *word, ROW *row)
inT16 count_outline_errs (char c, inT16 outline_count)
inT16 word_outline_errs (WERD_RES *word)
BOOL8 terrible_word_crunch (WERD_RES *word, GARBAGE_LEVEL garbage_level)
CRUNCH_MODE word_deletable (WERD_RES *word, inT16 &delete_mode)
inT16 failure_count (WERD_RES *word)
BOOL8 noise_outlines (TWERD *word)
void process_selected_words (PAGE_RES *page_res, TBOX &selection_box, BOOL8(tesseract::Tesseract::*word_processor)(BLOCK *block, ROW *row, WERD_RES *word_res))
void tess_segment_pass1 (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
PAGE_RESApplyBoxes (const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
PAGE_RESSetupApplyBoxes (const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
void MaximallyChopWord (const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
bool ResegmentCharBox (PAGE_RES *page_res, const TBOX *prev_box, const TBOX &box, const TBOX &next_box, const char *correct_text)
bool ResegmentWordBox (BLOCK_LIST *block_list, const TBOX &box, const TBOX &next_box, const char *correct_text)
void ReSegmentByClassification (PAGE_RES *page_res)
bool ConvertStringToUnichars (const char *utf8, GenericVector< UNICHAR_ID > *class_ids)
bool FindSegmentation (const GenericVector< UNICHAR_ID > &target_text, WERD_RES *word_res)
void SearchForText (const GenericVector< BLOB_CHOICE_LIST * > *choices, int choices_pos, int choices_length, const GenericVector< UNICHAR_ID > &target_text, int text_index, float rating, GenericVector< int > *segmentation, float *best_rating, GenericVector< int > *best_segmentation)
void TidyUp (PAGE_RES *page_res)
void ReportFailedBox (int boxfile_lineno, TBOX box, const char *box_ch, const char *err_msg)
void CorrectClassifyWords (PAGE_RES *page_res)
void ApplyBoxTraining (const STRING &filename, PAGE_RES *page_res)
int CountMisfitTops (WERD_RES *word_res)
float ComputeCompatibleXheight (WERD_RES *word_res)
FILE * init_recog_training (const STRING &fname)
void recog_training_segmented (const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
void ambigs_classify_and_output (WERD_RES *werd_res, ROW_RES *row_res, BLOCK_RES *block_res, const char *label, FILE *output_file)
CubeRecoContextGetCubeRecoContext ()
eval_word_spacing()

The basic measure is the number of characters in contextually confirmed words. (I.e the word is done) If all words are contextually confirmed the evaluation is deemed perfect.

Some fiddles are done to handle "1"s as these are VERY frequent causes of fuzzy spaces. The problem with the basic measure is that "561 63" would score the same as "56163", though given our knowledge that the space is fuzzy, and that there is a "1" next to the fuzzy space, we need to ensure that "56163" is prefered.

The solution is to NOT COUNT the score of any word which has a digit at one end and a "1Il" as the character the other side of the space.

Conversly, any character next to a "1" within a word is counted as a positive score. Thus "561 63" would score 4 (3 chars in a numeric word plus 1 side of the "1" joined). "56163" would score 7 - all chars in a numeric word + 2 sides of a "1" joined.

The joined 1 rule is applied to any word REGARDLESS of contextual confirmation. Thus "PS7a71 3/7a" scores 1 (neither word is contexutally confirmed. The only score is from the joined 1. "PS7a713/7a" scores 2.

BOOL8 digit_or_numeric_punct (WERD_RES *word, int char_position)
inT16 eval_word_spacing (WERD_RES_LIST &word_res_list)
fix_sp_fp_word()

Test the current word to see if it can be split by deleting noise blobs. If so, do the business. Return with the iterator pointing to the same place if the word is unchanged, or the last of the replacement words.

void fix_noisy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
void fix_sp_fp_word (WERD_RES_IT &word_res_it, ROW *row, BLOCK *block)
inT16 worst_noise_blob (WERD_RES *word_res, float *worst_noise_score)
float blob_noise_score (TBLOB *blob)
void break_noisiest_blob_word (WERD_RES_LIST &words)
fix_fuzzy_spaces()

Walk over the page finding sequences of words joined by fuzzy spaces. Extract them as a sublist, process the sublist to find the optimal arrangement of spaces then replace the sublist in the ROW_RES.

Parameters:
monitor progress monitor
word_count count of words in doc
[out] page_res 
void fix_fuzzy_space_list (WERD_RES_LIST &best_perm, ROW *row, BLOCK *block)
void fix_fuzzy_spaces (ETEXT_DESC *monitor, inT32 word_count, PAGE_RES *page_res)
uniformly_spaced()

Return true if one of the following are true:

  • All inter-char gaps are the same width
  • The largest gap is no larger than twice the mean/median of the others
  • The largest gap is < normalised_max_nonspace **** REMEMBER - WE'RE NOW WORKING WITH A BLN WERD !!!
BOOL8 uniformly_spaced (WERD_RES *word)
BOOL8 fixspace_thinks_word_done (WERD_RES *word)
tess_add_doc_word

Add the given word to the document dictionary

void tess_add_doc_word (WERD_CHOICE *word_choice)
tess_segment_pass2

Segment a word using the pass2 conditions of the tess segmenter.

Parameters:
word word to do
blob_choices list of blob lists
void tess_segment_pass2 (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
tess_acceptable_word

Returns:
true if the word is regarded as "good enough".
Parameters:
word_choice after context
raw_choice before context
BOOL8 tess_acceptable_word (WERD_CHOICE *word_choice, WERD_CHOICE *raw_choice)

Public Attributes

bool tessedit_resegment_from_boxes = false
bool tessedit_resegment_from_line_boxes = false
bool tessedit_train_from_boxes = false
bool tessedit_make_boxes_from_boxes = false
bool tessedit_dump_pageseg_images = false
int tessedit_pageseg_mode = PSM_SINGLE_BLOCK
int tessedit_ocr_engine_mode = tesseract::OEM_TESSERACT_ONLY
char * tessedit_char_blacklist = ""
char * tessedit_char_whitelist = ""
bool tessedit_ambigs_training = false
int pageseg_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT
int ocr_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT
char * tessedit_write_params_to_file = ""
bool tessedit_adapt_to_char_fragments = true
bool tessedit_adaption_debug = false
int bidi_debug = 0
int applybox_debug = 1
int applybox_page = 0
char * applybox_exposure_pattern = ".exp"
bool applybox_learn_chars_and_char_frags_mode = false
bool applybox_learn_ngrams_mode = false
bool tessedit_display_outwords = false
bool tessedit_training_tess = false
bool tessedit_dump_choices = false
bool tessedit_fix_fuzzy_spaces = true
bool tessedit_unrej_any_wd = false
bool tessedit_fix_hyphens = true
bool tessedit_redo_xheight = true
bool tessedit_enable_doc_dict = true
bool tessedit_debug_fonts = false
bool tessedit_debug_block_rejection = false
bool tessedit_enable_bigram_correction = false
int tessedit_bigram_debug = 0
int debug_x_ht_level = 0
bool debug_acceptable_wds = false
char * chs_leading_punct = "('`\""
char * chs_trailing_punct1 = ").,;:?!"
char * chs_trailing_punct2 = ")'`\""
double quality_rej_pc = 0.08
double quality_blob_pc = 0.0
double quality_outline_pc = 1.0
double quality_char_pc = 0.95
int quality_min_initial_alphas_reqd = 2
bool tessedit_tess_adapt_to_rejmap = false
int tessedit_tess_adaption_mode = 0x27
bool tessedit_minimal_rej_pass1 = false
bool tessedit_test_adaption = false
bool tessedit_matcher_log = false
int tessedit_test_adaption_mode = 3
bool save_blob_choices = false
bool test_pt = false
double test_pt_x = 99999.99
double test_pt_y = 99999.99
int paragraph_debug_level = 0
int cube_debug_level = 1
char * outlines_odd = "%| "
char * outlines_2 = "ij!?%\":;"
bool docqual_excuse_outline_errs = false
bool tessedit_good_quality_unrej = true
bool tessedit_use_reject_spaces = true
double tessedit_reject_doc_percent = 65.00
double tessedit_reject_block_percent = 45.00
double tessedit_reject_row_percent = 40.00
double tessedit_whole_wd_rej_row_percent = 70.00
bool tessedit_preserve_blk_rej_perfect_wds = true
bool tessedit_preserve_row_rej_perfect_wds = true
bool tessedit_dont_blkrej_good_wds = false
bool tessedit_dont_rowrej_good_wds = false
int tessedit_preserve_min_wd_len = 2
bool tessedit_row_rej_good_docs = true
double tessedit_good_doc_still_rowrej_wd = 1.1
bool tessedit_reject_bad_qual_wds = true
bool tessedit_debug_doc_rejection = false
bool tessedit_debug_quality_metrics = false
bool bland_unrej = false
double quality_rowrej_pc = 1.1
bool unlv_tilde_crunching = true
bool crunch_early_merge_tess_fails = true
bool crunch_early_convert_bad_unlv_chs = false
double crunch_terrible_rating = 80.0
bool crunch_terrible_garbage = true
double crunch_poor_garbage_cert = -9.0
double crunch_poor_garbage_rate = 60
double crunch_pot_poor_rate = 40
double crunch_pot_poor_cert = -8.0
bool crunch_pot_garbage = true
double crunch_del_rating = 60
double crunch_del_cert = -10.0
double crunch_del_min_ht = 0.7
double crunch_del_max_ht = 3.0
double crunch_del_min_width = 3.0
double crunch_del_high_word = 1.5
double crunch_del_low_word = 0.5
double crunch_small_outlines_size = 0.6
int crunch_rating_max = 10
int crunch_pot_indicators = 1
bool crunch_leave_ok_strings = true
bool crunch_accept_ok = true
bool crunch_leave_accept_strings = false
bool crunch_include_numerals = false
int crunch_leave_lc_strings = 4
int crunch_leave_uc_strings = 4
int crunch_long_repetitions = 3
int crunch_debug = 0
int fixsp_non_noise_limit = 1
double fixsp_small_outlines_size = 0.28
bool tessedit_prefer_joined_punct = false
int fixsp_done_mode = 1
int debug_fix_space_level = 0
char * numeric_punctuation = ".,"
int x_ht_acceptance_tolerance = 8
int x_ht_min_change = 8
bool tessedit_write_block_separators = false
bool tessedit_write_rep_codes = false
bool tessedit_write_unlv = false
bool tessedit_create_hocr = false
char * unrecognised_char = "|"
int suspect_level = 99
int suspect_space_level = 100
int suspect_short_words = 2
bool suspect_constrain_1Il = false
double suspect_rating_per_ch = 999.9
double suspect_accept_rating = -999.9
bool tessedit_minimal_rejection = false
bool tessedit_zero_rejection = false
bool tessedit_word_for_word = false
bool tessedit_zero_kelvin_rejection = false
bool tessedit_consistent_reps = true
int tessedit_reject_mode = 0
int tessedit_ok_mode = 5
bool tessedit_rejection_debug = false
bool tessedit_flip_0O = true
double tessedit_lower_flip_hyphen = 1.5
double tessedit_upper_flip_hyphen = 1.8
bool rej_trust_doc_dawg = false
bool rej_1Il_use_dict_word = false
bool rej_1Il_trust_permuter_type = true
bool rej_use_tess_accepted = true
bool rej_use_tess_blanks = true
bool rej_use_good_perm = true
bool rej_use_sensible_wd = false
bool rej_alphas_in_number_perm = false
double rej_whole_of_mostly_reject_word_fract = 0.85
int tessedit_image_border = 2
char * ok_repeated_ch_non_alphanum_wds = "-?*\075"
char * conflict_set_I_l_1 = "Il1[]"
int min_sane_x_ht_pixels = 8
bool tessedit_create_boxfile = false
int tessedit_page_number = -1
bool tessedit_write_images = false
bool interactive_display_mode = false
char * file_type = ".tif"
bool tessedit_override_permuter = true
int tessdata_manager_debug_level = 0
char * tessedit_load_sublangs = ""
double min_orientation_margin = 7.0
bool textord_tabfind_show_vlines = false
bool textord_use_cjk_fp_model = FALSE
bool tessedit_init_config_only = false
bool textord_equation_detect = false

Constructor & Destructor Documentation

tesseract::Tesseract::Tesseract (  ) 
tesseract::Tesseract::~Tesseract (  ) 

Member Function Documentation

BOOL8 tesseract::Tesseract::acceptable_number_string ( const char *  s,
const char *  lengths 
)
ACCEPTABLE_WERD_TYPE tesseract::Tesseract::acceptable_word_string ( const UNICHARSET char_set,
const char *  s,
const char *  lengths 
)
inT16 tesseract::Tesseract::alpha_count ( const char *  word,
const char *  word_lengths 
)
void tesseract::Tesseract::ambigs_classify_and_output ( WERD_RES werd_res,
ROW_RES row_res,
BLOCK_RES block_res,
const char *  label,
FILE *  output_file 
)
PAGE_RES * tesseract::Tesseract::ApplyBoxes ( const STRING fname,
bool  find_segmentation,
BLOCK_LIST *  block_list 
)
void tesseract::Tesseract::ApplyBoxTraining ( const STRING filename,
PAGE_RES page_res 
)
int tesseract::Tesseract::AutoPageSeg ( bool  single_column,
bool  osd,
bool  only_osd,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks,
Tesseract osd_tess,
OSResults osr 
)

Auto page segmentation. Divide the page image into blocks of uniform text linespacing and images.

Resolution (in ppi) is derived from the input image.

The output goes in the blocks list with corresponding TO_BLOCKs in the to_blocks list.

If single_column is true, then no attempt is made to divide the image into columns, but multiple blocks are still made if the text is of non-uniform linespacing.

If osd (orientation and script detection) is true then that is performed as well. If only_osd is true, then only orientation and script detection is performed. If osd is desired, (osd or only_osd) then osr_tess must be another Tesseract that was initialized especially for osd, and the results will be output into osr (orientation and script result).

Pix* tesseract::Tesseract::BestPix (  )  const [inline]
void tesseract::Tesseract::bigram_correction_pass ( PAGE_RES page_res  ) 
void tesseract::Tesseract::blamer_pass ( PAGE_RES page_res  ) 
float tesseract::Tesseract::blob_noise_score ( TBLOB blob  ) 
void tesseract::Tesseract::break_noisiest_blob_word ( WERD_RES_LIST &  words  ) 

break_noisiest_blob_word() Find the word with the blob which looks like the worst noise. Break the word into two, deleting the noise blob.

SVMenuNode * tesseract::Tesseract::build_menu_new (  ) 
BOOL8 tesseract::Tesseract::check_debug_pt ( WERD_RES word,
int  location 
)
void tesseract::Tesseract::classify_word_and_language ( WordRecognizer  recognizer,
BLOCK block,
ROW row,
WERD_RES word 
)
void tesseract::Tesseract::classify_word_pass1 ( BLOCK block,
ROW row,
WERD_RES word 
)

classify_word_pass1

Baseline normalize the word and pass it to Tess.

void tesseract::Tesseract::classify_word_pass2 ( BLOCK block,
ROW row,
WERD_RES word 
)

classify_word_pass2

Control what to do with the word in pass 2

void tesseract::Tesseract::Clear (  ) 
float tesseract::Tesseract::ComputeCompatibleXheight ( WERD_RES word_res  ) 
void tesseract::Tesseract::convert_bad_unlv_chs ( WERD_RES word_res  ) 
bool tesseract::Tesseract::ConvertStringToUnichars ( const char *  utf8,
GenericVector< UNICHAR_ID > *  class_ids 
)
void tesseract::Tesseract::CorrectClassifyWords ( PAGE_RES page_res  ) 
inT16 tesseract::Tesseract::count_alphanums ( WERD_RES word  ) 
inT16 tesseract::Tesseract::count_alphanums ( const WERD_CHOICE word  ) 
inT16 tesseract::Tesseract::count_alphas ( const WERD_CHOICE word  ) 
inT16 tesseract::Tesseract::count_outline_errs ( char  c,
inT16  outline_count 
)
int tesseract::Tesseract::CountMisfitTops ( WERD_RES word_res  ) 
bool tesseract::Tesseract::create_cube_box_word ( Boxa *  char_boxes,
int  num_chars,
TBOX  word_box,
BoxWord box_word 
)
void tesseract::Tesseract::cube_combine_word ( CubeObject cube_obj,
WERD_RES cube_word,
WERD_RES tess_word 
)
bool tesseract::Tesseract::cube_recognize ( CubeObject cube_obj,
BLOCK block,
WERD_RES word 
)
CubeObject * tesseract::Tesseract::cube_recognize_word ( BLOCK block,
WERD_RES word 
)
void tesseract::Tesseract::cube_word_pass1 ( BLOCK block,
ROW row,
WERD_RES word 
)
void tesseract::Tesseract::debug_word ( PAGE_RES page_res,
const TBOX selection_box 
)

debug_word

Process the whole image, but load word_config_ for the selected word(s).

BOOL8 tesseract::Tesseract::digit_or_numeric_punct ( WERD_RES word,
int  char_position 
)
void tesseract::Tesseract::do_re_display ( BOOL8(tesseract::Tesseract::*)(BLOCK *block, ROW *row, WERD_RES *word_res)  word_painter  ) 

do_re_display()

Redisplay page

void tesseract::Tesseract::doc_and_block_rejection ( PAGE_RES_IT page_res_it,
BOOL8  good_quality_doc 
)
void tesseract::Tesseract::dont_allow_1Il ( WERD_RES word  ) 
void tesseract::Tesseract::dump_words ( WERD_RES_LIST &  perm,
inT16  score,
inT16  mode,
BOOL8  improved 
)
void tesseract::Tesseract::end_tesseract (  ) 
inT16 tesseract::Tesseract::eval_word_spacing ( WERD_RES_LIST &  word_res_list  ) 
void tesseract::Tesseract::ExplodeRepeatedWord ( BLOB_CHOICE best_choice,
PAGE_RES_IT page_res_it 
)
bool tesseract::Tesseract::extract_cube_state ( CubeObject cube_obj,
int *  num_chars,
Boxa **  char_boxes,
CharSamp ***  char_samples 
)
inT16 tesseract::Tesseract::failure_count ( WERD_RES word  ) 
void tesseract::Tesseract::fill_werd_res ( const BoxWord cube_box_word,
WERD_CHOICE cube_werd_choice,
const char *  cube_best_str,
WERD_RES tess_werd_res 
)
bool tesseract::Tesseract::FindSegmentation ( const GenericVector< UNICHAR_ID > &  target_text,
WERD_RES word_res 
)
inT16 tesseract::Tesseract::first_alphanum_index ( const char *  word,
const char *  word_lengths 
)
inT16 tesseract::Tesseract::first_alphanum_offset ( const char *  word,
const char *  word_lengths 
)
void tesseract::Tesseract::fix_fuzzy_space_list ( WERD_RES_LIST &  best_perm,
ROW row,
BLOCK block 
)
void tesseract::Tesseract::fix_fuzzy_spaces ( ETEXT_DESC monitor,
inT32  word_count,
PAGE_RES page_res 
)
void tesseract::Tesseract::fix_noisy_space_list ( WERD_RES_LIST &  best_perm,
ROW row,
BLOCK block 
)
void tesseract::Tesseract::fix_rep_char ( PAGE_RES_IT page_res_it  ) 

fix_rep_char() The word is a repeated char. (Leader.) Find the repeated char character. Create the appropriate single-word or multi-word sequence according to the size of spaces in between blobs, and correct the classifications where some of the characters disagree with the majority.

void tesseract::Tesseract::fix_sp_fp_word ( WERD_RES_IT &  word_res_it,
ROW row,
BLOCK block 
)
BOOL8 tesseract::Tesseract::fixspace_thinks_word_done ( WERD_RES word  ) 
void tesseract::Tesseract::flip_0O ( WERD_RES word  ) 
void tesseract::Tesseract::flip_hyphens ( WERD_RES word  ) 
void tesseract::Tesseract::font_recognition_pass ( PAGE_RES page_res  ) 

font_recognition_pass

Smooth the fonts for the document.

inT16 tesseract::Tesseract::fp_eval_word_spacing ( WERD_RES_LIST &  word_res_list  ) 
GARBAGE_LEVEL tesseract::Tesseract::garbage_word ( WERD_RES word,
BOOL8  ok_dict_word 
)
UNICHAR_ID tesseract::Tesseract::get_rep_char ( WERD_RES word  ) 
Tesseract* tesseract::Tesseract::get_sub_lang ( int  index  )  const [inline]
CubeRecoContext* tesseract::Tesseract::GetCubeRecoContext (  )  [inline]
int tesseract::Tesseract::ImageHeight (  )  const [inline]
int tesseract::Tesseract::ImageWidth (  )  const [inline]
bool tesseract::Tesseract::init_cube_objects ( bool  load_combiner,
TessdataManager tessdata_manager 
)
FILE * tesseract::Tesseract::init_recog_training ( const STRING fname  ) 
int tesseract::Tesseract::init_tesseract ( const char *  datapath,
const char *  language,
OcrEngineMode  oem 
) [inline]
int tesseract::Tesseract::init_tesseract ( const char *  arg0,
const char *  textbase,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_init_params 
)
int tesseract::Tesseract::init_tesseract_internal ( const char *  arg0,
const char *  textbase,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_init_params 
)
bool tesseract::Tesseract::init_tesseract_lang_data ( const char *  arg0,
const char *  textbase,
const char *  language,
OcrEngineMode  oem,
char **  configs,
int  configs_size,
const GenericVector< STRING > *  vars_vec,
const GenericVector< STRING > *  vars_values,
bool  set_only_init_params 
)
int tesseract::Tesseract::init_tesseract_lm ( const char *  arg0,
const char *  textbase,
const char *  language 
)
void tesseract::Tesseract::make_reject_map ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices,
ROW row,
inT16  pass 
)
void tesseract::Tesseract::match_current_words ( WERD_RES_LIST &  words,
ROW row,
BLOCK block 
)
void tesseract::Tesseract::match_word_pass2 ( WERD_RES word,
ROW row,
BLOCK block 
)

match_word_pass2

Baseline normalize the word and pass it to Tess.

void tesseract::Tesseract::MaximallyChopWord ( const GenericVector< TBOX > &  boxes,
BLOCK block,
ROW row,
WERD_RES word_res 
)
Pix** tesseract::Tesseract::mutable_pix_binary (  )  [inline]
Textord* tesseract::Tesseract::mutable_textord (  )  [inline]
void tesseract::Tesseract::nn_match_word ( WERD_RES word,
ROW row 
)
void tesseract::Tesseract::nn_recover_rejects ( WERD_RES word,
ROW row 
)
BOOL8 tesseract::Tesseract::noise_outlines ( TWERD word  ) 
BOOL8 tesseract::Tesseract::non_0_digit ( const UNICHARSET ch_set,
UNICHAR_ID  unichar_id 
)
BOOL8 tesseract::Tesseract::non_O_upper ( const UNICHARSET ch_set,
UNICHAR_ID  unichar_id 
)
int tesseract::Tesseract::num_sub_langs (  )  const [inline]
BOOL8 tesseract::Tesseract::one_ell_conflict ( WERD_RES word_res,
BOOL8  update_map 
)
void tesseract::Tesseract::output_pass ( PAGE_RES_IT page_res_it,
const TBOX target_word_box 
)
void tesseract::Tesseract::ParseLanguageString ( const char *  lang_str,
GenericVector< STRING > *  to_load,
GenericVector< STRING > *  not_to_load 
)
void tesseract::Tesseract::pgeditor_main ( int  width,
int  height,
PAGE_RES page_res 
)

pgeditor_main()

Top level editor operation: Setup a new window and an according event handler

Pix* tesseract::Tesseract::pix_binary (  )  const [inline]
Pix* tesseract::Tesseract::pix_grey (  )  const [inline]
BOOL8 tesseract::Tesseract::potential_word_crunch ( WERD_RES word,
GARBAGE_LEVEL  garbage_level,
BOOL8  ok_dict_word 
)
void tesseract::Tesseract::PrepareForPageseg (  ) 
void tesseract::Tesseract::PrepareForTessOCR ( BLOCK_LIST *  block_list,
Tesseract osd_tess,
OSResults osr 
)
BOOL8 tesseract::Tesseract::process_cmd_win_event ( inT32  cmd_event,
char *  new_value 
)
void tesseract::Tesseract::process_image_event ( const SVEvent event  ) 

process_image_event()

User has done something in the image window - mouse down or up. Work out what it is and do something with it. If DOWN - just remember where it was. If UP - for each word in the selected area do the operation defined by the current mode.

void tesseract::Tesseract::process_selected_words ( PAGE_RES page_res,
TBOX selection_box,
BOOL8(tesseract::Tesseract::*)(BLOCK *block, ROW *row, WERD_RES *word_res)  word_processor 
)
bool tesseract::Tesseract::ProcessTargetWord ( const TBOX word_box,
const TBOX target_word_box,
const char *  word_config,
int  pass 
)
void tesseract::Tesseract::quality_based_rejection ( PAGE_RES_IT page_res_it,
BOOL8  good_quality_doc 
)
void tesseract::Tesseract::read_config_file ( const char *  filename,
SetParamConstraint  constraint 
)
bool tesseract::Tesseract::recog_all_words ( PAGE_RES page_res,
ETEXT_DESC monitor,
const TBOX target_word_box,
const char *  word_config,
int  dopasses 
)

recog_all_words()

Walk the page_res, recognizing all the words. If monitor is not null, it is used as a progress monitor/timeout/cancel. If dopasses is 0, all recognition passes are run, 1 just pass 1, 2 passes2 and higher. If target_word_box is not null, special things are done to words that overlap the target_word_box: if word_config is not null, the word config file is read for just the target word(s), otherwise, on pass 2 and beyond ONLY the target words are processed (Jetsoft modification.) Returns false if we cancelled prematurely.

Parameters:
page_res page structure
monitor progress monitor
target_word_box specifies just to extract a rectangle
dopasses 0 - all, 1 just pass 1, 2 passes 2 and higher
BOOL8 tesseract::Tesseract::recog_interactive ( BLOCK block,
ROW row,
WERD_RES word_res 
)

recog_interactive

Recognize a single word in interactive mode.

Parameters:
block block
row row of word
word word to recognise
void tesseract::Tesseract::recog_pseudo_word ( PAGE_RES page_res,
TBOX selection_box 
)
void tesseract::Tesseract::recog_training_segmented ( const STRING fname,
PAGE_RES page_res,
volatile ETEXT_DESC monitor,
FILE *  output_file 
)
void tesseract::Tesseract::recog_word ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)
void tesseract::Tesseract::recog_word_recursive ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)
void tesseract::Tesseract::recognize_page ( STRING image_name  ) 
void tesseract::Tesseract::reject_edge_blobs ( WERD_RES word  ) 
void tesseract::Tesseract::reject_I_1_L ( WERD_RES word  ) 
void tesseract::Tesseract::reject_mostly_rejects ( WERD_RES word  ) 
void tesseract::Tesseract::rejection_passes ( PAGE_RES page_res,
ETEXT_DESC monitor,
const TBOX target_word_box,
const char *  word_config 
)
BOOL8 tesseract::Tesseract::repeated_nonalphanum_wd ( WERD_RES word,
ROW row 
)
void tesseract::Tesseract::ReportFailedBox ( int  boxfile_lineno,
TBOX  box,
const char *  box_ch,
const char *  err_msg 
)
void tesseract::Tesseract::ReportXhtFixResult ( bool  accept_new_word,
float  new_x_ht,
WERD_RES word,
WERD_RES new_word 
)
void tesseract::Tesseract::ReSegmentByClassification ( PAGE_RES page_res  ) 
bool tesseract::Tesseract::ResegmentCharBox ( PAGE_RES page_res,
const TBOX prev_box,
const TBOX box,
const TBOX next_box,
const char *  correct_text 
)
bool tesseract::Tesseract::ResegmentWordBox ( BLOCK_LIST *  block_list,
const TBOX box,
const TBOX next_box,
const char *  correct_text 
)
void tesseract::Tesseract::ResetAdaptiveClassifier (  ) 
void tesseract::Tesseract::ResetDocumentDictionary (  ) 
const FCOORD& tesseract::Tesseract::reskew (  )  const [inline]
bool tesseract::Tesseract::RetryWithLanguage ( WERD_RES word,
BLOCK block,
ROW row,
WordRecognizer  recognizer 
)
bool tesseract::Tesseract::right_to_left (  )  const [inline]
void tesseract::Tesseract::run_cube_combiner ( PAGE_RES page_res  ) 
bool tesseract::Tesseract::RunOldFixXht ( WERD_RES word,
BLOCK block,
ROW row 
)
inT16 tesseract::Tesseract::safe_dict_word ( const WERD_RES werd_res  ) 
Pix* tesseract::Tesseract::scaled_color (  )  const [inline]
int tesseract::Tesseract::scaled_factor (  )  const [inline]
void tesseract::Tesseract::SearchForText ( const GenericVector< BLOB_CHOICE_LIST * > *  choices,
int  choices_pos,
int  choices_length,
const GenericVector< UNICHAR_ID > &  target_text,
int  text_index,
float  rating,
GenericVector< int > *  segmentation,
float *  best_rating,
GenericVector< int > *  best_segmentation 
)
int tesseract::Tesseract::SegmentPage ( const STRING input_file,
BLOCK_LIST *  blocks,
Tesseract osd_tess,
OSResults osr 
)

Segment the page according to the current value of tessedit_pageseg_mode. pix_binary_ is used as the source image and should not be NULL. On return the blocks list owns all the constructed page layout.

void tesseract::Tesseract::set_done ( WERD_RES word,
inT16  pass 
)
void tesseract::Tesseract::set_pix_grey ( Pix *  grey_pix  )  [inline]
void tesseract::Tesseract::set_source_resolution ( int  ppi  )  [inline]
void tesseract::Tesseract::set_unlv_suspects ( WERD_RES word  ) 
void tesseract::Tesseract::set_word_fonts ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

set_word_fonts

Get the fonts for the word.

void tesseract::Tesseract::SetBlackAndWhitelist (  ) 
void tesseract::Tesseract::SetEquationDetect ( EquationDetect detector  ) 
void tesseract::Tesseract::SetScaledColor ( int  factor,
Pix *  color 
) [inline]
PAGE_RES * tesseract::Tesseract::SetupApplyBoxes ( const GenericVector< TBOX > &  boxes,
BLOCK_LIST *  block_list 
)
ColumnFinder * tesseract::Tesseract::SetupPageSegAndDetectOrientation ( bool  single_column,
bool  osd,
bool  only_osd,
BLOCK_LIST *  blocks,
Tesseract osd_tess,
OSResults osr,
TO_BLOCK_LIST *  to_blocks,
Pix **  photo_mask_pix,
Pix **  music_mask_pix 
)

Sets up auto page segmentation, determines the orientation, and corrects it. Somewhat arbitrary chunk of functionality, factored out of AutoPageSeg to facilitate testing. photo_mask_pix is a pointer to a NULL pointer that will be filled on return with the leptonica photo mask, which must be pixDestroyed by the caller. to_blocks is an empty list that will be filled with (usually a single) block that is used during layout analysis. This ugly API is required because of the possibility of a unlv zone file. TODO(rays) clean this up. See AutoPageSeg for other arguments. The returned ColumnFinder must be deleted after use.

void tesseract::Tesseract::SetupUniversalFontIds (  ) 
void tesseract::Tesseract::SetupWordScripts ( BLOCK_LIST *  blocks  ) 
int tesseract::Tesseract::source_resolution (  )  const [inline]
void tesseract::Tesseract::split_and_recog_word ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)
BOOL8 tesseract::Tesseract::terrible_word_crunch ( WERD_RES word,
GARBAGE_LEVEL  garbage_level 
)
BOOL8 tesseract::Tesseract::tess_acceptable_word ( WERD_CHOICE word_choice,
WERD_CHOICE raw_choice 
)
void tesseract::Tesseract::tess_add_doc_word ( WERD_CHOICE word_choice  ) 
void tesseract::Tesseract::tess_segment_pass1 ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)
void tesseract::Tesseract::tess_segment_pass2 ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)
BOOL8 tesseract::Tesseract::test_ambig_word ( WERD_RES word  ) 
const Textord& tesseract::Tesseract::textord (  )  const [inline]
void tesseract::Tesseract::TidyUp ( PAGE_RES page_res  ) 
void tesseract::Tesseract::tilde_crunch ( PAGE_RES_IT page_res_it  ) 
void tesseract::Tesseract::tilde_delete ( PAGE_RES_IT page_res_it  ) 
bool tesseract::Tesseract::TrainedXheightFix ( WERD_RES word,
BLOCK block,
ROW row 
)
BOOL8 tesseract::Tesseract::uniformly_spaced ( WERD_RES word  ) 
void tesseract::Tesseract::unrej_good_chs ( WERD_RES word,
ROW row 
)
void tesseract::Tesseract::unrej_good_quality_words ( PAGE_RES_IT page_res_it  ) 
BOOL8 tesseract::Tesseract::word_adaptable ( WERD_RES word,
uinT16  mode 
)
BOOL8 tesseract::Tesseract::word_blank_and_set_display ( BLOCK block,
ROW row,
WERD_RES word_res 
)
BOOL8 tesseract::Tesseract::word_bln_display ( BLOCK block,
ROW row,
WERD_RES word_res 
)

word_bln_display()

Normalize word and display in word window

inT16 tesseract::Tesseract::word_blob_quality ( WERD_RES word,
ROW row 
)
void tesseract::Tesseract::word_char_quality ( WERD_RES word,
ROW row,
inT16 match_count,
inT16 accepted_match_count 
)
BOOL8 tesseract::Tesseract::word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)
CRUNCH_MODE tesseract::Tesseract::word_deletable ( WERD_RES word,
inT16 delete_mode 
)
BOOL8 tesseract::Tesseract::word_display ( BLOCK block,
ROW row,
WERD_RES word_res 
)

word_display() Word Processor

Display a word according to its display modes

BOOL8 tesseract::Tesseract::word_dumper ( BLOCK block,
ROW row,
WERD_RES word_res 
)

word_dumper()

Dump members to the debug window

inT16 tesseract::Tesseract::word_outline_errs ( WERD_RES word  ) 
BOOL8 tesseract::Tesseract::word_set_display ( BLOCK block,
ROW row,
WERD_RES word_res 
)

word_set_display() Word processor

Display word according to current display mode settings

inT16 tesseract::Tesseract::worst_noise_blob ( WERD_RES word_res,
float *  worst_noise_score 
)
void tesseract::Tesseract::write_results ( PAGE_RES_IT page_res_it,
char  newline_type,
BOOL8  force_eol 
)

Member Data Documentation

"Debug level"

"Exposure value follows this pattern in the image" " filename. The name of the image files are expected" " to be in the form [lang].[fontname].exp[num].tif"

"Learn both character fragments (as is done in the" " special low exposure mode) as well as unfragmented" " characters."

"Each bounding box is assumed to contain ngrams. Only" " learn the ngrams whose outlines overlap horizontally."

"Page number to apply boxes from"

"Debug level for BiDi"

"unrej potential with no chekcs"

"Leading punctuation"

"1st Trailing punctuation"

"2nd Trailing punctuation"

"Il1 conflict set"

"Use acceptability in okstring"

"As it says"

"POTENTIAL crunch cert lt this"

"Del if word gt xht x this above bl"

"Del if word gt xht x this below bl"

"Del if word ht gt xht x this"

"Del if word ht lt xht x this"

"Del if word width lt xht x this"

"POTENTIAL crunch rating lt this"

"Take out ~^ early?"

"Before word crunch?"

"Fiddle alpha figures"

"Dont pot crunch sensible strings"

"Dont crunch words with long lower case strings"

"Dont touch sensible strings"

"Dont crunch words with long lower case strings"

"Crunch words with long repetitions"

"crunch garbage cert lt this"

"crunch garbage rating lt this"

"POTENTIAL crunch garbage"

"How many potential indicators needed"

"POTENTIAL crunch cert lt this"

"POTENTIAL crunch rating lt this"

"For adj length in rating per ch"

"Small if lt xht x this"

"As it says"

"crunch rating lt this"

"Print cube debug info."

"Dump word pass/fail chk"

"Contextual fixspace debug"

"Reestimate debug"

"Allow outline errs in unrejection?"

"Filename extension"

"What constitues done for spacing"

"How many non-noise blbs either side?"

"Small if lt xht x this"

"Run interactively?"

"Min acceptable orientation margin"

"Reject any x-ht lt or eq than this"

"Punct. chs expected WITHIN numbers"

int tesseract::Tesseract::ocr_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT

"Whether to use the top-line splitting process for Devanagari " "documents while performing ocr."

"Allow NN to unrej"

char* tesseract::Tesseract::outlines_2 = "ij!?%\":;"

"Non standard number of outlines"

"Non standard number of outlines"

int tesseract::Tesseract::pageseg_devanagari_split_strategy = tesseract::ShiroRekhaSplitter::NO_SPLIT

"Whether to use the top-line splitting process for Devanagari " "documents while performing page-segmentation."

"Print paragraph debug info."

"good_quality_doc gte good blobs limit"

"good_quality_doc gte good char limit"

"alphas in a good word"

"good_quality_doc lte outline error limit"

"good_quality_doc lte rejection limit"

"good_quality_doc gte good char limit"

"Dont double check"

"Use dictword test"

"Extend permuter check"

"Use DOC dawg in 11l conf. detector"

"Individual rejection control"

"Extend permuter check"

"Individual rejection control"

"Individual rejection control"

"if >this fract"

"Save the results of the recognition step" " (blob_choices) within the corresponding WERD_CHOICE"

"Accept good rating limit"

"UNLV keep 1Il chars rejected"

"Suspect marker level"

"Dont touch bad rating limit"

"Dont Suspect dict wds longer than this"

"Min suspect level for rejecting spaces"

"Debug level for TessdataManager functions."

"Adapt to words that contain " " a character composed form fragments"

"Generate and print debug information for adaption"

"Perform training for ambiguities"

"Amount of debug output for bigram " "correction."

"Blacklist of chars not to recognize"

"Whitelist of chars to recognize"

"Force all rep chars the same"

"Output text with boxes"

"Write .html hOCR output file"

"Block and Row stats"

"Page stats"

"Output font info per char"

"Output data to debug file"

"Draw output words"

"Use word segmentation quality metric"

"Use word segmentation quality metric"

"Dump char choices"

"Dump intermediate images made during page segmentation"

"Enable correction based on the word bigram dictionary."

"Add words to the document dictionary"

"Try to improve fuzzy spaces"

"Crunch double hyphens?"

"Contextual 0O O0 flips"

"rej good doc wd if more than this fraction rejected"

"Reduce rejection on good docs"

"Rej blbs near image edge limit"

"Only initialize with the config file. Useful if the instance is " "not going to be used for OCR but say only for layout analysis."

"List of languages to load with this one"

"Aspect ratio dot/hyphen test"

"Generate more boxes from boxed chars"

"Log matcher activity"

"Do minimal rejection on pass 1 output"

"Only reject tess failures"

int tesseract::Tesseract::tessedit_ocr_engine_mode = tesseract::OEM_TESSERACT_ONLY

"Which OCR engine(s) to run (Tesseract, Cube, both). Defaults" " to loading and running only Tesseract (no Cube, no combiner)." " (Values from OcrEngineMode enum in tesseractclass.h)"

"Acceptance decision algorithm"

"According to dict_word"

"-1 -> All pages, else specifc page to process"

"Page seg mode: 0=osd only, 1=auto+osd, 2=auto, 3=col, 4=block," " 5=line, 6=word, 7=char" " (Values from PageSegMode enum in publictypes.h)"

"Reward punctation joins"

"Only rej partially rejected words in block rejection"

"Only preserve wds longer than this"

"Only rej partially rejected words in row rejection"

"Check/Correct x-height"

"Reject all bad quality wds"

"%rej allowed before rej whole block"

"%rej allowed before rej whole doc"

"Rejection algorithm"

"%rej allowed before rej whole row"

"Adaption debug"

"Take segmentation and labeling from box file"

"Conversion of word/line box file to char box file"

"Apply row rejection to good docs"

"Use reject map to control Tesseract adaption"

"Adaptation decision algorithm for tess"

"Test adaption criteria"

"Adaptation decision algorithm for tess"

"Generate training data from boxed chars"

"Call Tess to learn blobs"

"Dont bother with word plausibility"

"Aspect ratio dot/hyphen test"

"Reject spaces?"

"Number of row rejects in whole word rejects" "which prevents whole row rejection"

"Make output have exactly one word per WERD"

"Write block separators in output"

"Capture the image from the IPE"

"Write all parameters to the given file."

"Write repetition char code"

"Write .unlv output file"

"Dont reject ANYTHING AT ALL"

"Dont reject ANYTHING"

"Test for point"

"xcoord"

"ycoord"

"Turn on equation detector"

"Debug line finding"

"Use CJK fixed pitch model"

"Mark v.bad words for tilde crunch"

"Output char for unidentified blobs"

"Max allowed deviation of blob top outside of font data"

"Min change in xht before actually trying it"


The documentation for this class was generated from the following files:
Generated on Thu Feb 2 08:19:26 2012 for Tesseract by  doxygen 1.6.3