tesseract
3.03
|
#include <baselinedetect.h>
Public Member Functions | |
BaselineBlock (int debug_level, bool non_text, TO_BLOCK *block) | |
TO_BLOCK * | block () const |
double | skew_angle () const |
bool | FitBaselinesAndFindSkew (bool use_box_bottoms) |
void | ParallelizeBaselines (double default_block_skew) |
void | SetupBlockParameters () const |
void | PrepareForSplineFitting (ICOORD page_tr, bool remove_noise) |
void | FitBaselineSplines (bool enable_splines, bool show_final_rows, Textord *textord) |
void | DrawFinalRows (const ICOORD &page_tr) |
void | DrawPixSpline (Pix *pix_in) |
Static Public Member Functions | |
static double | SpacingModelError (double perp_disp, double line_spacing, double line_offset) |
Definition at line 129 of file baselinedetect.h.
tesseract::BaselineBlock::BaselineBlock | ( | int | debug_level, |
bool | non_text, | ||
TO_BLOCK * | block | ||
) |
Definition at line 403 of file baselinedetect.cpp.
: block_(block), debug_level_(debug_level), non_text_block_(non_text), good_skew_angle_(false), skew_angle_(0.0), line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) { TO_ROW_IT row_it(block_->get_rows()); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { // Sort the blobs on the rows. row_it.data()->blob_list()->sort(blob_x_order); rows_.push_back(new BaselineRow(block->line_spacing, row_it.data())); } }
TO_BLOCK* tesseract::BaselineBlock::block | ( | ) | const [inline] |
Definition at line 133 of file baselinedetect.h.
{
return block_;
}
void tesseract::BaselineBlock::DrawFinalRows | ( | const ICOORD & | page_tr | ) |
Definition at line 572 of file baselinedetect.cpp.
{ #ifndef GRAPHICS_DISABLED if (non_text_block_) return; double gradient = tan(skew_angle_); FCOORD rotation(1.0f, 0.0f); int left_edge = block_->block->bounding_box().left(); ScrollView* win = create_to_win(page_tr); ScrollView::Color colour = ScrollView::RED; TO_ROW_IT row_it = block_->get_rows(); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); colour = static_cast<ScrollView::Color>(colour + 1); if (colour > ScrollView::MAGENTA) colour = ScrollView::RED; } plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); // Show discarded blobs. plot_blob_list(win, &block_->underlines, ScrollView::YELLOW, ScrollView::CORAL); if (block_->blobs.length() > 0) tprintf("%d blobs discarded as noise\n", block_->blobs.length()); draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); #endif }
void tesseract::BaselineBlock::DrawPixSpline | ( | Pix * | pix_in | ) |
Definition at line 597 of file baselinedetect.cpp.
{ if (non_text_block_) return; TO_ROW_IT row_it = block_->get_rows(); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { row_it.data()->baseline.plot(pix_in); } }
bool tesseract::BaselineBlock::FitBaselinesAndFindSkew | ( | bool | use_box_bottoms | ) |
Definition at line 429 of file baselinedetect.cpp.
{ if (non_text_block_) return false; GenericVector<double> angles; for (int r = 0; r < rows_.size(); ++r) { BaselineRow* row = rows_[r]; if (row->FitBaseline(use_box_bottoms)) { double angle = row->BaselineAngle(); angles.push_back(angle); } if (debug_level_ > 1) row->Print(); } if (!angles.empty()) { skew_angle_ = MedianOfCircularValues(M_PI, &angles); good_skew_angle_ = true; } else { skew_angle_ = 0.0f; good_skew_angle_ = false; } if (debug_level_ > 0) { tprintf("Initial block skew angle = %g, good = %d\n", skew_angle_, good_skew_angle_); } return good_skew_angle_; }
void tesseract::BaselineBlock::FitBaselineSplines | ( | bool | enable_splines, |
bool | show_final_rows, | ||
Textord * | textord | ||
) |
Definition at line 543 of file baselinedetect.cpp.
{ double gradient = tan(skew_angle_); FCOORD rotation(1.0f, 0.0f); if (enable_splines) { textord->make_spline_rows(block_, gradient, show_final_rows); } else { // Make a fake spline from the existing line. TBOX block_box= block_->block->bounding_box(); TO_ROW_IT row_it = block_->get_rows(); for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { TO_ROW* row = row_it.data(); inT32 xstarts[2] = { block_box.left(), block_box.right() }; double coeffs[3] = { 0.0, row->line_m(), row->line_c() }; row->baseline = QSPLINE(1, xstarts, coeffs); textord->compute_row_xheight(row, block_->block->classify_rotation(), row->line_m(), block_->line_size); } } textord->compute_block_xheight(block_, gradient); block_->block->set_xheight(block_->xheight); if (textord_restore_underlines) // fix underlines restore_underlined_blobs(block_); }
void tesseract::BaselineBlock::ParallelizeBaselines | ( | double | default_block_skew | ) |
Definition at line 458 of file baselinedetect.cpp.
{ if (non_text_block_) return; if (!good_skew_angle_) skew_angle_ = default_block_skew; if (debug_level_ > 0) tprintf("Adjusting block to skew angle %g\n", skew_angle_); FCOORD direction(cos(skew_angle_), sin(skew_angle_)); for (int r = 0; r < rows_.size(); ++r) { BaselineRow* row = rows_[r]; row->AdjustBaselineToParallel(debug_level_, direction); if (debug_level_ > 1) row->Print(); } if (rows_.size() < 3 || !ComputeLineSpacing()) return; // Enforce the line spacing model on all lines that don't yet have a good // baseline. // Start by finding the row that is best fitted to the model. int best_row = 0; double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), line_spacing_, line_offset_); for (int r = 1; r < rows_.size(); ++r) { double error = SpacingModelError(rows_[r]->PerpDisp(direction), line_spacing_, line_offset_); if (error < best_error) { best_error = error; best_row = r; } } // Starting at the best fitting row, work outwards, syncing the offset. double offset = line_offset_; for (int r = best_row + 1; r < rows_.size(); ++r) { offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset); } offset = line_offset_; for (int r = best_row - 1; r >= 0; --r) { offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, line_spacing_, offset); } }
void tesseract::BaselineBlock::PrepareForSplineFitting | ( | ICOORD | page_tr, |
bool | remove_noise | ||
) |
Definition at line 527 of file baselinedetect.cpp.
{ if (non_text_block_) return; if (remove_noise) { vigorous_noise_removal(block_); } FCOORD rotation(1.0f, 0.0f); double gradient = tan(skew_angle_); separate_underlines(block_, gradient, rotation, true); pre_associate_blobs(page_tr, block_, rotation, true); }
void tesseract::BaselineBlock::SetupBlockParameters | ( | ) | const |
Definition at line 500 of file baselinedetect.cpp.
{ if (line_spacing_ > 0.0) { // Where was block_line_spacing set before? float min_spacing = MIN(block_->line_spacing, line_spacing_); if (min_spacing < block_->line_size) block_->line_size = min_spacing; block_->line_spacing = line_spacing_; block_->baseline_offset = line_offset_; block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; } // Setup the parameters on all the rows. TO_ROW_IT row_it(block_->get_rows()); for (int r = 0; r < rows_.size(); ++r, row_it.forward()) { BaselineRow* row = rows_[r]; TO_ROW* to_row = row_it.data(); row->SetupOldLineParameters(to_row); } }
double tesseract::BaselineBlock::skew_angle | ( | ) | const [inline] |
Definition at line 136 of file baselinedetect.h.
{
return skew_angle_;
}
double tesseract::BaselineBlock::SpacingModelError | ( | double | perp_disp, |
double | line_spacing, | ||
double | line_offset | ||
) | [static] |
Definition at line 417 of file baselinedetect.cpp.
{ // Round to the nearest multiple of line_spacing + line offset. int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); double model_y = line_spacing * multiple + line_offset; return fabs(perp_disp - model_y); }