tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/pitsync1.cpp File Reference
#include <math.h>
#include "memry.h"
#include "pitsync1.h"

Go to the source code of this file.

Defines

#define EXTERN

Functions

 ELISTIZE (FPSEGPT) CLISTIZE(FPSEGPT_LIST) EXTERN int pitsync_linear_version
double check_pitch_sync (BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
void make_illegal_segment (FPSEGPT_LIST *prev_list, TBOX blob_box, BLOBNBOX_IT blob_it, inT16 region_index, inT16 pitch, inT16 pitch_error, FPSEGPT_LIST *seg_list)

Variables

EXTERN double pitsync_joined_edge = 0.75
EXTERN double pitsync_offset_freecut_fraction = 0.25
EXTERN int pitsync_fake_depth = 1

Define Documentation

#define EXTERN

Function Documentation

double check_pitch_sync ( BLOBNBOX_IT *  blob_it,
inT16  blob_count,
inT16  pitch,
inT16  pitch_error,
STATS projection,
FPSEGPT_LIST *  seg_list 
)

Definition at line 148 of file pitsync1.cpp.

                         {
  inT16 x;                       //current coord
  inT16 min_index;               //blob number
  inT16 max_index;               //blob number
  inT16 left_edge;               //of word
  inT16 right_edge;              //of word
  inT16 right_max;               //max allowed x
  inT16 min_x;                   //in this region
  inT16 max_x;
  inT16 region_index;
  inT16 best_region_index = 0;   //for best result
  inT16 offset;                  //dist to legal area
  inT16 left_best_x;             //edge of good region
  inT16 right_best_x;            //right edge
  TBOX min_box;                   //bounding box
  TBOX max_box;                   //bounding box
  TBOX next_box;                  //box of next blob
  FPSEGPT *segpt;                //segment point
  FPSEGPT_LIST *segpts;          //points in a segment
  double best_cost;              //best path
  double mean_sum;               //computes result
  FPSEGPT *best_end;             //end of best path
  BLOBNBOX_IT min_it;            //copy iterator
  BLOBNBOX_IT max_it;            //copy iterator
  FPSEGPT_IT segpt_it;           //iterator
                                 //output segments
  FPSEGPT_IT outseg_it = seg_list;
  FPSEGPT_LIST_CLIST lattice;    //list of lists
                                 //region iterator
  FPSEGPT_LIST_C_IT lattice_it = &lattice;

  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
  //              blob_count, pitch);
  //      if (blob_count==8 && pitch==27)
  //              projection->print(stdout,TRUE);
  if (pitch < 3)
    pitch = 3;                   //nothing ludicrous
  if ((pitch - 3) / 2 < pitch_error)
    pitch_error = (pitch - 3) / 2;
  min_it = *blob_it;
  min_box = box_next (&min_it);  //get box
  //      if (blob_count==8 && pitch==27)
  //              tprintf("1st box at (%d,%d)->(%d,%d)\n",
  //                      min_box.left(),min_box.bottom(),
  //                      min_box.right(),min_box.top());
                                 //left of word
  left_edge = min_box.left () + pitch_error;
  for (min_index = 1; min_index < blob_count; min_index++) {
    min_box = box_next (&min_it);
    //              if (blob_count==8 && pitch==27)
    //                      tprintf("Box at (%d,%d)->(%d,%d)\n",
    //                              min_box.left(),min_box.bottom(),
    //                              min_box.right(),min_box.top());
  }
  right_edge = min_box.right (); //end of word
  max_x = left_edge;
                                 //min permissible
  min_x = max_x - pitch + pitch_error * 2 + 1;
  right_max = right_edge + pitch - pitch_error - 1;
  segpts = new FPSEGPT_LIST;     //list of points
  segpt_it.set_to_list (segpts);
  for (x = min_x; x <= max_x; x++) {
    segpt = new FPSEGPT (x);     //make a new one
                                 //put in list
    segpt_it.add_after_then_move (segpt);
  }
                                 //first segment
  lattice_it.add_before_then_move (segpts);
  min_index = 0;
  region_index = 1;
  best_cost = MAX_FLOAT32;
  best_end = NULL;
  min_it = *blob_it;
  min_box = box_next (&min_it);  //first box
  do {
    left_best_x = -1;
    right_best_x = -1;
    segpts = new FPSEGPT_LIST;   //list of points
    segpt_it.set_to_list (segpts);
    min_x += pitch - pitch_error;//next limits
    max_x += pitch + pitch_error;
    while (min_box.right () < min_x && min_index < blob_count) {
      min_index++;
      min_box = box_next (&min_it);
    }
    max_it = min_it;
    max_index = min_index;
    max_box = min_box;
    next_box = box_next (&max_it);
    for (x = min_x; x <= max_x && x <= right_max; x++) {
      while (x < right_edge && max_index < blob_count
      && x > max_box.right ()) {
        max_index++;
        max_box = next_box;
        next_box = box_next (&max_it);
      }
      if (x <= max_box.left () + pitch_error
        || x >= max_box.right () - pitch_error || x >= right_edge
        || (max_index < blob_count - 1 && x >= next_box.left ())
        || (x - max_box.left () > pitch * pitsync_joined_edge
      && max_box.right () - x > pitch * pitsync_joined_edge)) {
      //                      || projection->local_min(x))
        if (x - max_box.left () > 0
          && x - max_box.left () <= pitch_error)
                                 //dist to real break
          offset = x - max_box.left ();
        else if (max_box.right () - x > 0
          && max_box.right () - x <= pitch_error
          && (max_index >= blob_count - 1
          || x < next_box.left ()))
          offset = max_box.right () - x;
        else
          offset = 0;
        //                              offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
        segpt = new FPSEGPT (x, FALSE, offset, region_index,
          pitch, pitch_error, lattice_it.data ());
      }
      else {
        offset = projection->pile_count (x);
        segpt = new FPSEGPT (x, TRUE, offset, region_index,
          pitch, pitch_error, lattice_it.data ());
      }
      if (segpt->previous () != NULL) {
        segpt_it.add_after_then_move (segpt);
        if (x >= right_edge - pitch_error) {
          segpt->terminal = TRUE;//no more wanted
          if (segpt->cost_function () < best_cost) {
            best_cost = segpt->cost_function ();
            //find least
            best_end = segpt;
            best_region_index = region_index;
            left_best_x = x;
            right_best_x = x;
          }
          else if (segpt->cost_function () == best_cost
            && right_best_x == x - 1)
            right_best_x = x;
        }
      }
      else {
        delete segpt;            //no good
      }
    }
    if (segpts->empty ()) {
      if (best_end != NULL)
        break;                   //already found one
      make_illegal_segment (lattice_it.data (), min_box, min_it,
        region_index, pitch, pitch_error, segpts);
    }
    else {
      if (right_best_x > left_best_x + 1) {
        left_best_x = (left_best_x + right_best_x + 1) / 2;
        for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
          && segpt_it.data ()->position () != left_best_x;
          segpt_it.forward ());
        if (segpt_it.data ()->position () == left_best_x)
                                 //middle of region
          best_end = segpt_it.data ();
      }
    }
                                 //new segment
    lattice_it.add_before_then_move (segpts);
    region_index++;
  }
  while (min_x < right_edge);
  ASSERT_HOST (best_end != NULL);//must always find some

  for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
  lattice_it.forward ()) {
    segpts = lattice_it.data ();
    segpt_it.set_to_list (segpts);
    //              if (blob_count==8 && pitch==27)
    //              {
    //                      for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
    //                      {
    //                              segpt=segpt_it.data();
    //                              tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
    //                                      segpt->position(),segpt,segpt->cost_function(),
    //                                      segpt->sum(),segpt->squares(),segpt->previous());
    //                      }
    //                      tprintf("\n");
    //              }
    for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
      && segpt_it.data () != best_end; segpt_it.forward ());
    if (segpt_it.data () == best_end) {
                                 //save good one
      segpt = segpt_it.extract ();
      outseg_it.add_before_then_move (segpt);
      best_end = segpt->previous ();
    }
  }
  ASSERT_HOST (best_end == NULL);
  ASSERT_HOST (!outseg_it.empty ());
  outseg_it.move_to_last ();
  mean_sum = outseg_it.data ()->sum ();
  mean_sum = mean_sum * mean_sum / best_region_index;
  if (outseg_it.data ()->squares () - mean_sum < 0)
    tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
      outseg_it.data ()->squares (), outseg_it.data ()->sum (),
      best_region_index);
  lattice.deep_clear ();         //shift the lot
  return outseg_it.data ()->squares () - mean_sum;
}

"Use new fast algorithm"

void make_illegal_segment ( FPSEGPT_LIST *  prev_list,
TBOX  blob_box,
BLOBNBOX_IT  blob_it,
inT16  region_index,
inT16  pitch,
inT16  pitch_error,
FPSEGPT_LIST *  seg_list 
)

Definition at line 366 of file pitsync1.cpp.

                           {
  inT16 x;                       //current coord
  inT16 min_x = 0;               //in this region
  inT16 max_x = 0;
  inT16 offset;                  //dist to edge
  FPSEGPT *segpt;                //segment point
  FPSEGPT *prevpt;               //previous point
  float best_cost;               //best path
  FPSEGPT_IT segpt_it = seg_list;//iterator
                                 //previous points
  FPSEGPT_IT prevpt_it = prev_list;

  best_cost = MAX_FLOAT32;
  for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
  prevpt_it.forward ()) {
    prevpt = prevpt_it.data ();
    if (prevpt->cost_function () < best_cost) {
                                 //find least
      best_cost = prevpt->cost_function ();
      min_x = prevpt->position ();
      max_x = min_x;             //limits on coords
    }
    else if (prevpt->cost_function () == best_cost) {
      max_x = prevpt->position ();
    }
  }
  min_x += pitch - pitch_error;
  max_x += pitch + pitch_error;
  for (x = min_x; x <= max_x; x++) {
    while (x > blob_box.right ()) {
      blob_box = box_next (&blob_it);
    }
    offset = x - blob_box.left ();
    if (blob_box.right () - x < offset)
      offset = blob_box.right () - x;
    segpt = new FPSEGPT (x, FALSE, offset,
      region_index, pitch, pitch_error, prev_list);
    if (segpt->previous () != NULL) {
      ASSERT_HOST (offset >= 0);
      fprintf (stderr, "made fake at %d\n", x);
                                 //make one up
      segpt_it.add_after_then_move (segpt);
      segpt->faked = TRUE;
      segpt->fake_count++;
    }
    else
      delete segpt;
  }
}

Variable Documentation

"Max advance fake generation"

Definition at line 38 of file pitsync1.cpp.

EXTERN double pitsync_joined_edge = 0.75

"Dist inside big blob for chopping"

Definition at line 33 of file pitsync1.cpp.

"Fraction of cut for free cuts"

Definition at line 36 of file pitsync1.cpp.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines