tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/textord/edgblob.cpp File Reference
#include "scanedg.h"
#include "drawedg.h"
#include "edgloop.h"
#include "edgblob.h"

Go to the source code of this file.

Defines

#define EXTERN

Functions

extract_edges

Run the edge detector over the block and return a list of blobs.

void extract_edges (Pix *pix, BLOCK *block)
outlines_to_blobs

Gather together outlines into blobs using the usual bucket sort.

void outlines_to_blobs (BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
fill_buckets

Run the edge detector over the block and return a list of blobs.

void fill_buckets (C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
empty_buckets

Run the edge detector over the block and return a list of blobs.

void empty_buckets (BLOCK *block, OL_BUCKETS *buckets)
capture_children

Find all neighbouring outlines that are children of this outline and either move them to the output list or declare this outline illegal and return FALSE.

BOOL8 capture_children (OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)

Variables

EXTERN bool edges_use_new_outline_complexity = FALSE
EXTERN int edges_max_children_per_outline = 10
EXTERN int edges_max_children_layers = 5
EXTERN bool edges_debug = FALSE
EXTERN int edges_children_per_grandchild = 10
EXTERN int edges_children_count_limit = 45
EXTERN bool edges_children_fix = FALSE
EXTERN int edges_min_nonhole = 12
EXTERN int edges_patharea_ratio = 40
EXTERN double edges_childarea = 0.5
EXTERN double edges_boxarea = 0.875

Define Documentation

#define EXTERN

Definition at line 30 of file edgblob.cpp.


Function Documentation

BOOL8 capture_children ( OL_BUCKETS buckets,
C_BLOB_IT *  reject_it,
C_OUTLINE_IT *  blob_it 
)

Definition at line 440 of file edgblob.cpp.

                        {
  C_OUTLINE *outline;            // master outline
  inT32 child_count;             // no of children

  outline = blob_it->data();
  if (edges_use_new_outline_complexity)
    child_count = buckets->outline_complexity(outline,
                                               edges_children_count_limit,
                                               0);
  else
    child_count = buckets->count_children(outline,
                                           edges_children_count_limit);
  if (child_count > edges_children_count_limit)
    return FALSE;

  if (child_count > 0)
    buckets->extract_children(outline, blob_it);
  return TRUE;
}
void empty_buckets ( BLOCK block,
OL_BUCKETS buckets 
)

Definition at line 398 of file edgblob.cpp.

                    {
  BOOL8 good_blob;               // healthy blob
  C_OUTLINE_LIST outlines;       // outlines in block
                                 // iterator
  C_OUTLINE_IT out_it = &outlines;
  C_OUTLINE_IT bucket_it = buckets->start_scan();
  C_OUTLINE_IT parent_it;        // parent outline
  C_BLOB_IT good_blobs = block->blob_list();
  C_BLOB_IT junk_blobs = block->reject_blobs();

  while (!bucket_it.empty()) {
    out_it.set_to_list(&outlines);
    do {
      parent_it = bucket_it;     // find outermost
      do {
        bucket_it.forward();
      } while (!bucket_it.at_first() &&
               !(*parent_it.data() < *bucket_it.data()));
    } while (!bucket_it.at_first());

                                 // move to new list
    out_it.add_after_then_move(parent_it.extract());
    good_blob = capture_children(buckets, &junk_blobs, &out_it);
    C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs,
                                       &junk_blobs);

    bucket_it.set_to_list(buckets->scan_next());
  }
}
void extract_edges ( Pix *  pix,
BLOCK block 
)

Definition at line 334 of file edgblob.cpp.

                                 {  // block to scan
  C_OUTLINE_LIST outlines;       // outlines in block
  C_OUTLINE_IT out_it = &outlines;

  block_edges(pix, block, &out_it);
  ICOORD bleft;                  // block box
  ICOORD tright;
  block->bounding_box(bleft, tright);
                                 // make blobs
  outlines_to_blobs(block, bleft, tright, &outlines);
}
void fill_buckets ( C_OUTLINE_LIST *  outlines,
OL_BUCKETS buckets 
)

Definition at line 373 of file edgblob.cpp.

                   {
  TBOX ol_box;                     // outline box
  C_OUTLINE_IT out_it = outlines;  // iterator
  C_OUTLINE_IT bucket_it;          // iterator in bucket
  C_OUTLINE *outline;              // current outline

  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
    outline = out_it.extract();  // take off list
                                 // get box
    ol_box = outline->bounding_box();
    bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
    bucket_it.add_to_end(outline);
  }
}
void outlines_to_blobs ( BLOCK block,
ICOORD  bleft,
ICOORD  tright,
C_OUTLINE_LIST *  outlines 
)

Definition at line 354 of file edgblob.cpp.

                                                 {
                                 // make buckets
  OL_BUCKETS buckets(bleft, tright);

  fill_buckets(outlines, &buckets);
  empty_buckets(block, &buckets);
}

Variable Documentation

EXTERN double edges_boxarea = 0.875

"Min area fraction of grandchild for box"

Definition at line 60 of file edgblob.cpp.

EXTERN double edges_childarea = 0.5

"Min area fraction of child outline"

Definition at line 58 of file edgblob.cpp.

"Max holes allowed in blob"

Definition at line 50 of file edgblob.cpp.

EXTERN bool edges_children_fix = FALSE

"Remove boxy parents of char-like children"

Definition at line 52 of file edgblob.cpp.

"Importance ratio for chucking outlines"

Definition at line 48 of file edgblob.cpp.

EXTERN bool edges_debug = FALSE

"turn on debugging for this module"

Definition at line 44 of file edgblob.cpp.

"Max layers of nested children inside a character outline"

Definition at line 42 of file edgblob.cpp.

"Max number of children inside a character outline"

Definition at line 40 of file edgblob.cpp.

"Min pixels for potential char in box"

Definition at line 54 of file edgblob.cpp.

"Max lensq/area for acceptable child outline"

Definition at line 56 of file edgblob.cpp.

"Use the new outline complexity module"

Definition at line 38 of file edgblob.cpp.

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines