tesseract  3.03
tesseract::BaselineBlock Class Reference

#include <baselinedetect.h>

List of all members.

Public Member Functions

 BaselineBlock (int debug_level, bool non_text, TO_BLOCK *block)
TO_BLOCKblock () const
double skew_angle () const
bool FitBaselinesAndFindSkew (bool use_box_bottoms)
void ParallelizeBaselines (double default_block_skew)
void SetupBlockParameters () const
void PrepareForSplineFitting (ICOORD page_tr, bool remove_noise)
void FitBaselineSplines (bool enable_splines, bool show_final_rows, Textord *textord)
void DrawFinalRows (const ICOORD &page_tr)
void DrawPixSpline (Pix *pix_in)

Static Public Member Functions

static double SpacingModelError (double perp_disp, double line_spacing, double line_offset)

Detailed Description

Definition at line 129 of file baselinedetect.h.


Constructor & Destructor Documentation

tesseract::BaselineBlock::BaselineBlock ( int  debug_level,
bool  non_text,
TO_BLOCK block 
)

Definition at line 403 of file baselinedetect.cpp.

  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
    good_skew_angle_(false), skew_angle_(0.0),
    line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
  TO_ROW_IT row_it(block_->get_rows());
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    // Sort the blobs on the rows.
    row_it.data()->blob_list()->sort(blob_x_order);
    rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
  }
}

Member Function Documentation

Definition at line 133 of file baselinedetect.h.

                          {
    return block_;
  }

Definition at line 572 of file baselinedetect.cpp.

                                                       {
#ifndef GRAPHICS_DISABLED
  if (non_text_block_) return;
  double gradient = tan(skew_angle_);
  FCOORD rotation(1.0f, 0.0f);
  int left_edge = block_->block->bounding_box().left();
  ScrollView* win = create_to_win(page_tr);
  ScrollView::Color colour = ScrollView::RED;
  TO_ROW_IT row_it = block_->get_rows();
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
    colour = static_cast<ScrollView::Color>(colour + 1);
    if (colour > ScrollView::MAGENTA)
      colour = ScrollView::RED;
  }
  plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
  // Show discarded blobs.
  plot_blob_list(win, &block_->underlines,
                 ScrollView::YELLOW, ScrollView::CORAL);
  if (block_->blobs.length() > 0)
    tprintf("%d blobs discarded as noise\n", block_->blobs.length());
  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
#endif
}

Definition at line 597 of file baselinedetect.cpp.

                                             {
  if (non_text_block_) return;
  TO_ROW_IT row_it = block_->get_rows();
  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
    row_it.data()->baseline.plot(pix_in);
  }
}
bool tesseract::BaselineBlock::FitBaselinesAndFindSkew ( bool  use_box_bottoms)

Definition at line 429 of file baselinedetect.cpp.

                                                                {
  if (non_text_block_) return false;
  GenericVector<double> angles;
  for (int r = 0; r < rows_.size(); ++r) {
    BaselineRow* row = rows_[r];
    if (row->FitBaseline(use_box_bottoms)) {
      double angle = row->BaselineAngle();
      angles.push_back(angle);
    }
    if (debug_level_ > 1)
      row->Print();
  }

  if (!angles.empty()) {
    skew_angle_ = MedianOfCircularValues(M_PI, &angles);
    good_skew_angle_ = true;
  } else {
    skew_angle_ = 0.0f;
    good_skew_angle_ = false;
  }
  if (debug_level_ > 0) {
    tprintf("Initial block skew angle = %g, good = %d\n",
            skew_angle_, good_skew_angle_);
  }
  return good_skew_angle_;
}
void tesseract::BaselineBlock::FitBaselineSplines ( bool  enable_splines,
bool  show_final_rows,
Textord textord 
)

Definition at line 543 of file baselinedetect.cpp.

                                                         {
  double gradient = tan(skew_angle_);
  FCOORD rotation(1.0f, 0.0f);

  if (enable_splines) {
    textord->make_spline_rows(block_, gradient, show_final_rows);
  } else {
    // Make a fake spline from the existing line.
    TBOX block_box= block_->block->bounding_box();
    TO_ROW_IT row_it = block_->get_rows();
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      TO_ROW* row = row_it.data();
      inT32 xstarts[2] = { block_box.left(), block_box.right() };
      double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
      row->baseline = QSPLINE(1, xstarts, coeffs);
      textord->compute_row_xheight(row, block_->block->classify_rotation(),
                                   row->line_m(), block_->line_size);
    }
  }
  textord->compute_block_xheight(block_, gradient);
  block_->block->set_xheight(block_->xheight);
  if (textord_restore_underlines)  // fix underlines
    restore_underlined_blobs(block_);
}
void tesseract::BaselineBlock::ParallelizeBaselines ( double  default_block_skew)

Definition at line 458 of file baselinedetect.cpp.

                                                                  {
  if (non_text_block_) return;
  if (!good_skew_angle_) skew_angle_ = default_block_skew;
  if (debug_level_ > 0)
    tprintf("Adjusting block to skew angle %g\n", skew_angle_);
  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
  for (int r = 0; r < rows_.size(); ++r) {
    BaselineRow* row = rows_[r];
    row->AdjustBaselineToParallel(debug_level_, direction);
    if (debug_level_ > 1)
      row->Print();
  }
  if (rows_.size() < 3 || !ComputeLineSpacing())
    return;
  // Enforce the line spacing model on all lines that don't yet have a good
  // baseline.
  // Start by finding the row that is best fitted to the model.
  int best_row = 0;
  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
                                        line_spacing_, line_offset_);
  for (int r = 1; r < rows_.size(); ++r) {
    double error = SpacingModelError(rows_[r]->PerpDisp(direction),
                                     line_spacing_, line_offset_);
    if (error < best_error) {
      best_error = error;
      best_row = r;
    }
  }
  // Starting at the best fitting row, work outwards, syncing the offset.
  double offset = line_offset_;
  for (int r = best_row + 1; r < rows_.size(); ++r) {
    offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
                                            line_spacing_, offset);
  }
  offset = line_offset_;
  for (int r = best_row - 1; r >= 0; --r) {
    offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
                                            line_spacing_, offset);
  }
}
void tesseract::BaselineBlock::PrepareForSplineFitting ( ICOORD  page_tr,
bool  remove_noise 
)

Definition at line 527 of file baselinedetect.cpp.

                                                                             {
  if (non_text_block_) return;
  if (remove_noise) {
    vigorous_noise_removal(block_);
  }
  FCOORD rotation(1.0f, 0.0f);
  double gradient = tan(skew_angle_);
  separate_underlines(block_, gradient, rotation, true);
  pre_associate_blobs(page_tr, block_, rotation, true);
}

Definition at line 500 of file baselinedetect.cpp.

                                               {
  if (line_spacing_ > 0.0) {
    // Where was block_line_spacing set before?
    float min_spacing = MIN(block_->line_spacing, line_spacing_);
    if (min_spacing < block_->line_size)
      block_->line_size = min_spacing;
    block_->line_spacing = line_spacing_;
    block_->baseline_offset = line_offset_;
    block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
  }
  // Setup the parameters on all the rows.
  TO_ROW_IT row_it(block_->get_rows());
  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
    BaselineRow* row = rows_[r];
    TO_ROW* to_row = row_it.data();
    row->SetupOldLineParameters(to_row);
  }
}
double tesseract::BaselineBlock::skew_angle ( ) const [inline]

Definition at line 136 of file baselinedetect.h.

                            {
    return skew_angle_;
  }
double tesseract::BaselineBlock::SpacingModelError ( double  perp_disp,
double  line_spacing,
double  line_offset 
) [static]

Definition at line 417 of file baselinedetect.cpp.

                                                            {
  // Round to the nearest multiple of line_spacing + line offset.
  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
  double model_y = line_spacing * multiple + line_offset;
  return fabs(perp_disp - model_y);
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines