tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/fpchop.h File Reference
#include "params.h"
#include "blobbox.h"

Go to the source code of this file.

Classes

class  C_OUTLINE_FRAG

Functions

ROWfixed_pitch_words (TO_ROW *row, FCOORD rotation)
WERDadd_repeated_word (WERD_IT *rep_it, inT16 &rep_left, inT16 &prev_chop_coord, uinT8 &blanks, float pitch, WERD_IT *word_it)
void split_to_blob (BLOBNBOX *blob, inT16 chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
void fixed_chop_cblob (C_BLOB *blob, inT16 chop_coord, float pitch_error, C_OUTLINE_LIST *left_outlines, C_OUTLINE_LIST *right_outlines)
void fixed_split_coutline (C_OUTLINE *srcline, inT16 chop_coord, float pitch_error, C_OUTLINE_IT *left_it, C_OUTLINE_IT *right_it)
BOOL8 fixed_chop_coutline (C_OUTLINE *srcline, inT16 chop_coord, float pitch_error, C_OUTLINE_FRAG_LIST *left_frags, C_OUTLINE_FRAG_LIST *right_frags)
void save_chop_cfragment (inT16 head_index, ICOORD head_pos, inT16 tail_index, ICOORD tail_pos, C_OUTLINE *srcline, C_OUTLINE_FRAG_LIST *frags)
void add_frag_to_list (C_OUTLINE_FRAG *frag, C_OUTLINE_FRAG_LIST *frags)
void close_chopped_cfragments (C_OUTLINE_FRAG_LIST *frags, C_OUTLINE_LIST *children, float pitch_error, C_OUTLINE_IT *dest_it)
C_OUTLINEjoin_chopped_fragments (C_OUTLINE_FRAG *bottom, C_OUTLINE_FRAG *top)
void join_segments (C_OUTLINE_FRAG *bottom, C_OUTLINE_FRAG *top)

Variables

int textord_fp_chop_error = 2
double textord_fp_chop_snap = 0.5

Function Documentation

void add_frag_to_list ( C_OUTLINE_FRAG frag,
C_OUTLINE_FRAG_LIST *  frags 
)

Definition at line 613 of file fpchop.cpp.

                       {
                                 //output list
  C_OUTLINE_FRAG_IT frag_it = frags;

  if (!frags->empty ()) {
    for (frag_it.mark_cycle_pt (); !frag_it.cycled_list ();
    frag_it.forward ()) {
      if (frag_it.data ()->ycoord > frag->ycoord
        || (frag_it.data ()->ycoord == frag->ycoord
         && frag->other_end->ycoord < frag->ycoord)) {
        frag_it.add_before_then_move (frag);
        return;
      }
    }
  }
  frag_it.add_to_end (frag);
}
WERD* add_repeated_word ( WERD_IT *  rep_it,
inT16 rep_left,
inT16 prev_chop_coord,
uinT8 blanks,
float  pitch,
WERD_IT *  word_it 
)

Definition at line 207 of file fpchop.cpp.

                         {
  WERD *word;                    //word to move
  inT16 new_blanks;              //extra blanks

  if (rep_left > prev_chop_coord) {
    new_blanks = (uinT8) floor ((rep_left - prev_chop_coord) / pitch + 0.5);
    blanks += new_blanks;
  }
  word = rep_it->extract ();
  prev_chop_coord = word->bounding_box ().right ();
  word_it->add_after_then_move (word);
  word->set_blanks (blanks);
  rep_it->forward ();
  if (rep_it->empty ())
    rep_left = MAX_INT16;
  else
    rep_left = rep_it->data ()->bounding_box ().left ();
  blanks = 0;
  return word;
}
void close_chopped_cfragments ( C_OUTLINE_FRAG_LIST *  frags,
C_OUTLINE_LIST *  children,
float  pitch_error,
C_OUTLINE_IT *  dest_it 
)

Definition at line 642 of file fpchop.cpp.

                               {
                                 //iterator
  C_OUTLINE_FRAG_IT frag_it = frags;
  C_OUTLINE_FRAG *bottom_frag;   //bottom of cut
  C_OUTLINE_FRAG *top_frag;      //top of cut
  C_OUTLINE *outline;            //new outline
  C_OUTLINE *child;              //current child
  C_OUTLINE_IT child_it = children;
  C_OUTLINE_IT olchild_it;       //children of outline

  while (!frag_it.empty()) {
    frag_it.move_to_first();
                                 // get bottom one
    bottom_frag = frag_it.extract();
    frag_it.forward();
    top_frag = frag_it.data();  // look at next
    if ((bottom_frag->steps == 0 && top_frag->steps == 0)
    || (bottom_frag->steps != 0 && top_frag->steps != 0)) {
      if (frag_it.data_relative(1)->ycoord == top_frag->ycoord)
        frag_it.forward();
    }
    top_frag = frag_it.extract();
    if (top_frag->other_end != bottom_frag) {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      ASSERT_HOST(outline == NULL);
    } else {
      outline = join_chopped_fragments(bottom_frag, top_frag);
      if (outline != NULL) {
        olchild_it.set_to_list(outline->child());
        for (child_it.mark_cycle_pt(); !child_it.cycled_list();
             child_it.forward()) {
          child = child_it.data();
          if (*child < *outline)
            olchild_it.add_to_end(child_it.extract());
        }
        if (outline->bounding_box().width() > pitch_error)
          dest_it->add_after_then_move(outline);
        else
          delete outline;          // Make it disappear.
      }
    }
  }
  while (!child_it.empty ()) {
    dest_it->add_after_then_move (child_it.extract ());
    child_it.forward ();
  }
}
void fixed_chop_cblob ( C_BLOB blob,
inT16  chop_coord,
float  pitch_error,
C_OUTLINE_LIST *  left_outlines,
C_OUTLINE_LIST *  right_outlines 
)

Definition at line 273 of file fpchop.cpp.

                       {
  C_OUTLINE *old_right;          //already there
  C_OUTLINE_LIST new_outlines;   //new right ones
                                 //ouput iterator
  C_OUTLINE_IT left_it = left_outlines;
                                 //in/out iterator
  C_OUTLINE_IT right_it = right_outlines;
  C_OUTLINE_IT new_it = &new_outlines;
  C_OUTLINE_IT blob_it;          //outlines in blob

  if (!right_it.empty ()) {
    while (!right_it.empty ()) {
      old_right = right_it.extract ();
      right_it.forward ();
      fixed_split_coutline(old_right,
                           chop_coord,
                           pitch_error,
                           &left_it,
                           &new_it);
    }
    right_it.add_list_before (&new_outlines);
  }
  if (blob != NULL) {
    blob_it.set_to_list (blob->out_list ());
    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
      blob_it.forward ())
    fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error,
        &left_it, &right_it);
    delete blob;
  }
}
BOOL8 fixed_chop_coutline ( C_OUTLINE srcline,
inT16  chop_coord,
float  pitch_error,
C_OUTLINE_FRAG_LIST *  left_frags,
C_OUTLINE_FRAG_LIST *  right_frags 
)

Definition at line 401 of file fpchop.cpp.

                           {
  BOOL8 first_frag;              //fragment
  inT16 left_edge;               //of outline
  inT16 startindex;              //in first fragment
  inT32 length;                  //of outline
  inT16 stepindex;               //into outline
  inT16 head_index;              //start of fragment
  ICOORD head_pos;               //start of fragment
  inT16 tail_index;              //end of fragment
  ICOORD tail_pos;               //end of fragment
  ICOORD pos;                    //current point
  inT16 first_index = 0;         //first tail
  ICOORD first_pos;              //first tail

  length = srcline->pathlength ();
  pos = srcline->start_pos ();
  left_edge = pos.x ();
  tail_index = 0;
  tail_pos = pos;
  for (stepindex = 0; stepindex < length; stepindex++) {
    if (pos.x () < left_edge) {
      left_edge = pos.x ();
      tail_index = stepindex;
      tail_pos = pos;
    }
    pos += srcline->step (stepindex);
  }
  if (left_edge >= chop_coord - pitch_error)
    return FALSE;                //not worth it

  startindex = tail_index;
  first_frag = TRUE;
  head_index = tail_index;
  head_pos = tail_pos;
  do {
    do {
      tail_pos += srcline->step (tail_index);
      tail_index++;
      if (tail_index == length)
        tail_index = 0;
    }
    while (tail_pos.x () != chop_coord && tail_index != startindex);
    if (tail_index == startindex) {
      if (first_frag)
        return FALSE;            //doesn't cross line
      else
        break;
    }
    //#ifdef __UNIX__
    ASSERT_HOST (head_index != tail_index);
    //#endif
    if (!first_frag) {
      save_chop_cfragment(head_index,
                          head_pos,
                          tail_index,
                          tail_pos,
                          srcline,
                          left_frags);
    }
    else {
      first_index = tail_index;
      first_pos = tail_pos;
      first_frag = FALSE;
    }
    while (srcline->step (tail_index).x () == 0) {
      tail_pos += srcline->step (tail_index);
      tail_index++;
      if (tail_index == length)
        tail_index = 0;
    }
    head_index = tail_index;
    head_pos = tail_pos;
    while (srcline->step (tail_index).x () > 0) {
      do {
        tail_pos += srcline->step (tail_index);
        tail_index++;
        if (tail_index == length)
          tail_index = 0;
      }
      while (tail_pos.x () != chop_coord);
      //#ifdef __UNIX__
      ASSERT_HOST (head_index != tail_index);
      //#endif
      save_chop_cfragment(head_index,
                          head_pos,
                          tail_index,
                          tail_pos,
                          srcline,
                          right_frags);
      while (srcline->step (tail_index).x () == 0) {
        tail_pos += srcline->step (tail_index);
        tail_index++;
        if (tail_index == length)
          tail_index = 0;
      }
      head_index = tail_index;
      head_pos = tail_pos;
    }
  }
  while (tail_index != startindex);
  save_chop_cfragment(head_index,
                      head_pos,
                      first_index,
                      first_pos,
                      srcline,
                      left_frags);
  return TRUE;                   //did some chopping
}
ROW* fixed_pitch_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 51 of file fpchop.cpp.

                        {
  BOOL8 bol;                     //start of line
  uinT8 blanks;                  //in front of word
  uinT8 new_blanks;              //blanks in empty cell
  inT16 chop_coord;              //chop boundary
  inT16 prev_chop_coord;         //start of cell
  inT16 rep_left;                //left edge of rep word
  ROW *real_row;                 //output row
  C_OUTLINE_LIST left_coutlines;
  C_OUTLINE_LIST right_coutlines;
  C_BLOB_LIST cblobs;
  C_BLOB_IT cblob_it = &cblobs;
  WERD_LIST words;
  WERD_IT word_it = &words;      //new words
                                 //repeated blobs
  WERD_IT rep_it = &row->rep_words;
  WERD *word;                    //new word
  inT32 xstarts[2];              //row ends
  inT32 prev_x;                  //end of prev blob
                                 //iterator
  BLOBNBOX_IT box_it = row->blob_list ();
                                 //boundaries
  ICOORDELT_IT cell_it = &row->char_cells;

#ifndef GRAPHICS_DISABLED
  if (textord_show_page_cuts && to_win != NULL) {
    plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells);
  }
#endif

  prev_x = -MAX_INT16;
  bol = TRUE;
  blanks = 0;
  if (rep_it.empty ())
    rep_left = MAX_INT16;
  else
    rep_left = rep_it.data ()->bounding_box ().left ();
  if (box_it.empty ())
    return NULL;                 //empty row
  xstarts[0] = box_it.data ()->bounding_box ().left ();
  if (rep_left < xstarts[0]) {
    xstarts[0] = rep_left;
  }
  if (cell_it.empty () || row->char_cells.singleton ()) {
    tprintf ("Row without enough char cells!\n");
    tprintf ("Leftmost blob is at (%d,%d)\n",
      box_it.data ()->bounding_box ().left (),
      box_it.data ()->bounding_box ().bottom ());
    return NULL;
  }
  ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ());
  prev_chop_coord = cell_it.data ()->x ();
  word = NULL;
  while (rep_left < cell_it.data ()->x ()) {
    word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
      blanks, row->fixed_pitch, &word_it);
  }
  cell_it.mark_cycle_pt ();
  if (prev_chop_coord >= cell_it.data ()->x ())
    cell_it.forward ();
  for (; !cell_it.cycled_list (); cell_it.forward ()) {
    chop_coord = cell_it.data ()->x ();
    while (!box_it.empty ()
    && box_it.data ()->bounding_box ().left () <= chop_coord) {
      if (box_it.data ()->bounding_box ().right () > prev_x)
        prev_x = box_it.data ()->bounding_box ().right ();
      split_to_blob (box_it.extract (), chop_coord,
        textord_fp_chop_error + 0.5f,
        &left_coutlines,
        &right_coutlines);
      box_it.forward ();
      while (!box_it.empty() && box_it.data()->cblob() == NULL) {
        delete box_it.extract();
        box_it.forward();
      }
    }
    if (!right_coutlines.empty() && left_coutlines.empty())
      split_to_blob (NULL, chop_coord,
        textord_fp_chop_error + 0.5f,
        &left_coutlines,
        &right_coutlines);
    if (!left_coutlines.empty()) {
      cblob_it.add_after_then_move(new C_BLOB(&left_coutlines));
    } else {
      if (rep_left < chop_coord) {
        if (rep_left > prev_chop_coord)
          new_blanks = (uinT8) floor ((rep_left - prev_chop_coord)
            / row->fixed_pitch + 0.5);
        else
          new_blanks = 0;
      }
      else {
        if (chop_coord > prev_chop_coord)
          new_blanks = (uinT8) floor ((chop_coord - prev_chop_coord)
            / row->fixed_pitch + 0.5);
        else
          new_blanks = 0;
      }
      if (!cblob_it.empty()) {
        if (blanks < 1 && word != NULL && !word->flag (W_REP_CHAR))
          blanks = 1;
        word = new WERD (&cblobs, blanks, NULL);
        cblob_it.set_to_list (&cblobs);
        word->set_flag (W_DONT_CHOP, TRUE);
        word_it.add_after_then_move (word);
        if (bol) {
          word->set_flag (W_BOL, TRUE);
          bol = FALSE;
        }
        blanks = new_blanks;
      }
      else
        blanks += new_blanks;
      while (rep_left < chop_coord) {
        word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
          blanks, row->fixed_pitch, &word_it);
      }
    }
    if (prev_chop_coord < chop_coord)
      prev_chop_coord = chop_coord;
  }
  if (!cblob_it.empty()) {
    word = new WERD(&cblobs, blanks, NULL);
    word->set_flag (W_DONT_CHOP, TRUE);
    word_it.add_after_then_move (word);
    if (bol)
      word->set_flag (W_BOL, TRUE);
  }
  ASSERT_HOST (word != NULL);
  while (!rep_it.empty ()) {
    add_repeated_word (&rep_it, rep_left, prev_chop_coord,
      blanks, row->fixed_pitch, &word_it);
  }
                                 //at end of line
  word_it.data ()->set_flag (W_EOL, TRUE);
  if (prev_chop_coord > prev_x)
    prev_x = prev_chop_coord;
  xstarts[1] = prev_x + 1;
  real_row = new ROW (row, (inT16) row->kern_size, (inT16) row->space_size);
  word_it.set_to_list (real_row->word_list ());
                                 //put words in row
  word_it.add_list_after (&words);
  real_row->recalc_bounding_box ();
  return real_row;
}
void fixed_split_coutline ( C_OUTLINE srcline,
inT16  chop_coord,
float  pitch_error,
C_OUTLINE_IT *  left_it,
C_OUTLINE_IT *  right_it 
)

Definition at line 319 of file fpchop.cpp.

                           {
  C_OUTLINE *child;              //child outline
  TBOX srcbox;                    //box of outline
  C_OUTLINE_LIST left_ch;        //left children
  C_OUTLINE_LIST right_ch;       //right children
  C_OUTLINE_FRAG_LIST left_frags;//chopped fragments
  C_OUTLINE_FRAG_LIST right_frags;;
  C_OUTLINE_IT left_ch_it = &left_ch;
                                 //for whole children
  C_OUTLINE_IT right_ch_it = &right_ch;
                                 //for holes
  C_OUTLINE_IT child_it = srcline->child ();

  srcbox = srcline->bounding_box();
  if (srcbox.left() + srcbox.right() <= chop_coord * 2
      && srcbox.right() < chop_coord + pitch_error) {
    // Whole outline is in the left side or not far over the chop_coord,
    // so put the whole thing on the left.
    left_it->add_after_then_move(srcline);
  } else if (srcbox.left() + srcbox.right() > chop_coord * 2
             && srcbox.left () > chop_coord - pitch_error) {
    // Whole outline is in the right side or not far over the chop_coord,
    // so put the whole thing on the right.
   right_it->add_before_stay_put(srcline);
  } else {
    // Needs real chopping.
    if (fixed_chop_coutline(srcline, chop_coord, pitch_error,
        &left_frags, &right_frags)) {
      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
           child_it.forward()) {
        child = child_it.extract();
        srcbox = child->bounding_box();
        if (srcbox.right() < chop_coord) {
          // Whole child is on the left.
          left_ch_it.add_after_then_move(child);
        } else if (srcbox.left() > chop_coord) {
          // Whole child is on the right.
          right_ch_it.add_after_then_move (child);
        } else {
          // No pitch_error is allowed when chopping children to prevent
          // impossible outlines from being created.
          if (fixed_chop_coutline(child, chop_coord, 0.0f,
              &left_frags, &right_frags)) {
            delete child;
          } else {
            if (srcbox.left() + srcbox.right() <= chop_coord * 2)
              left_ch_it.add_after_then_move(child);
            else
              right_ch_it.add_after_then_move(child);
          }
        }
      }
      close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it);
      close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it);
      ASSERT_HOST(left_ch.empty() && right_ch.empty());
      // No children left.
      delete srcline;            // Smashed up.
    } else {
      // Chop failed. Just use middle coord.
      if (srcbox.left() + srcbox.right() <= chop_coord * 2)
        left_it->add_after_then_move(srcline);  // Stick whole in left.
      else
        right_it->add_before_stay_put(srcline);
    }
  }
}

Definition at line 703 of file fpchop.cpp.

                                   {
  C_OUTLINE *outline;            //closed loop

  if (bottom->other_end == top) {
    if (bottom->steps == 0)
      outline = top->close ();   //turn to outline
    else
      outline = bottom->close ();
    delete top;
    delete bottom;
    return outline;
  }
  if (bottom->steps == 0) {
    ASSERT_HOST (top->steps != 0);
    join_segments (bottom->other_end, top);
  }
  else {
    ASSERT_HOST (top->steps == 0);
    join_segments (top->other_end, bottom);
  }
  top->other_end->other_end = bottom->other_end;
  bottom->other_end->other_end = top->other_end;
  delete bottom;
  delete top;
  return NULL;
}
void join_segments ( C_OUTLINE_FRAG bottom,
C_OUTLINE_FRAG top 
)

Definition at line 741 of file fpchop.cpp.

                    {
  DIR128 *steps;                  //new steps
  inT32 stepcount;               //no of steps
  inT16 fake_count;              //fake steps
  DIR128 fake_step;               //step entry

  ASSERT_HOST (bottom->end.x () == top->start.x ());
  fake_count = top->start.y () - bottom->end.y ();
  if (fake_count < 0) {
    fake_count = -fake_count;
    fake_step = 32;
  }
  else
    fake_step = 96;

  stepcount = bottom->stepcount + fake_count + top->stepcount;
  steps = new DIR128[stepcount];
  memmove (steps, bottom->steps, bottom->stepcount);
  memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count);
  memmove (steps + bottom->stepcount + fake_count, top->steps,
    top->stepcount);
  delete [] bottom->steps;
  bottom->steps = steps;
  bottom->stepcount = stepcount;
  bottom->end = top->end;
  bottom->other_end->end = top->end;
}
void save_chop_cfragment ( inT16  head_index,
ICOORD  head_pos,
inT16  tail_index,
ICOORD  tail_pos,
C_OUTLINE srcline,
C_OUTLINE_FRAG_LIST *  frags 
)

Definition at line 522 of file fpchop.cpp.

                          {
  inT16 jump;                    //gap across end
  inT16 stepcount;               //total steps
  C_OUTLINE_FRAG *head;          //head of fragment
  C_OUTLINE_FRAG *tail;          //tail of fragment
  inT16 tail_y;                  //ycoord of tail

  ASSERT_HOST (tail_pos.x () == head_pos.x ());
  ASSERT_HOST (tail_index != head_index);
  stepcount = tail_index - head_index;
  if (stepcount < 0)
    stepcount += srcline->pathlength ();
  jump = tail_pos.y () - head_pos.y ();
  if (jump < 0)
    jump = -jump;
  if (jump == stepcount)
    return;                      //its a nop
  tail_y = tail_pos.y ();
  head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline,
    head_index, tail_index);
  tail = new C_OUTLINE_FRAG (head, tail_y);
  head->other_end = tail;
  add_frag_to_list(head, frags);
  add_frag_to_list(tail, frags);
}
void split_to_blob ( BLOBNBOX blob,
inT16  chop_coord,
float  pitch_error,
C_OUTLINE_LIST *  left_coutlines,
C_OUTLINE_LIST *  right_coutlines 
)

Definition at line 243 of file fpchop.cpp.

                                                    {
  C_BLOB *real_cblob;            //cblob to chop

  if (blob != NULL) {
    real_cblob = blob->cblob();
  } else {
    real_cblob = NULL;
  }
  if (!right_coutlines->empty() || real_cblob != NULL)
    fixed_chop_cblob(real_cblob,
                     chop_coord,
                     pitch_error,
                     left_coutlines,
                     right_coutlines);
  if (blob != NULL)
    delete blob;                 //free it
}

Variable Documentation

"Max allowed bending of chop cells"

Definition at line 39 of file fpchop.cpp.

double textord_fp_chop_snap = 0.5

"Max distance of chop pt from vertex"

Definition at line 41 of file fpchop.cpp.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines