tesseract  3.03
tesseract::ShapeTable Class Reference

#include <shapetable.h>

List of all members.

Public Member Functions

 ShapeTable ()
 ShapeTable (const UNICHARSET &unicharset)
bool Serialize (FILE *fp) const
bool DeSerialize (bool swap, FILE *fp)
int NumShapes () const
const UNICHARSETunicharset () const
int NumFonts () const
void set_unicharset (const UNICHARSET &unicharset)
void ReMapClassIds (const GenericVector< int > &unicharset_map)
STRING DebugStr (int shape_id) const
STRING SummaryStr () const
int AddShape (int unichar_id, int font_id)
int AddShape (const Shape &other)
void DeleteShape (int shape_id)
void AddToShape (int shape_id, int unichar_id, int font_id)
void AddShapeToShape (int shape_id, const Shape &other)
int FindShape (int unichar_id, int font_id) const
void GetFirstUnicharAndFont (int shape_id, int *unichar_id, int *font_id) const
const ShapeGetShape (int shape_id) const
ShapeMutableShape (int shape_id)
int BuildFromShape (const Shape &shape, const ShapeTable &master_shapes)
bool AlreadyMerged (int shape_id1, int shape_id2) const
bool AnyMultipleUnichars () const
int MaxNumUnichars () const
void ForceFontMerges (int start, int end)
int MasterUnicharCount (int shape_id) const
int MasterFontCount (int shape_id) const
int MergedUnicharCount (int shape_id1, int shape_id2) const
void MergeShapes (int shape_id1, int shape_id2)
void SwapShapes (int shape_id1, int shape_id2)
void AppendMasterShapes (const ShapeTable &other, GenericVector< int > *shape_map)
int NumMasterShapes () const
int MasterDestinationIndex (int shape_id) const
bool SubsetUnichar (int shape_id1, int shape_id2) const
bool MergeSubsetUnichar (int merge_id1, int merge_id2, int shape_id) const
bool EqualUnichars (int shape_id1, int shape_id2) const
bool MergeEqualUnichars (int merge_id1, int merge_id2, int shape_id) const
bool CommonUnichars (int shape_id1, int shape_id2) const
bool CommonFont (int shape_id1, int shape_id2) const
void AddShapeToResults (const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const

Detailed Description

Definition at line 249 of file shapetable.h.


Constructor & Destructor Documentation

Definition at line 243 of file shapetable.cpp.

                       : unicharset_(NULL), num_fonts_(0) {
}
tesseract::ShapeTable::ShapeTable ( const UNICHARSET unicharset) [explicit]

Definition at line 245 of file shapetable.cpp.

  : unicharset_(&unicharset), num_fonts_(0) {
}

Member Function Documentation

int tesseract::ShapeTable::AddShape ( int  unichar_id,
int  font_id 
)

Definition at line 346 of file shapetable.cpp.

                                                    {
  int index = shape_table_.size();
  Shape* shape = new Shape;
  shape->AddToShape(unichar_id, font_id);
  shape_table_.push_back(shape);
  num_fonts_ = MAX(num_fonts_, font_id + 1);
  return index;
}
int tesseract::ShapeTable::AddShape ( const Shape other)

Definition at line 357 of file shapetable.cpp.

                                           {
  int index;
  for (index = 0; index < shape_table_.size() &&
       !(other == *shape_table_[index]); ++index)
    continue;
  if (index == shape_table_.size()) {
    Shape* shape = new Shape(other);
    shape_table_.push_back(shape);
  }
  num_fonts_ = 0;
  return index;
}
void tesseract::ShapeTable::AddShapeToResults ( const ShapeRating shape_rating,
GenericVector< int > *  unichar_map,
GenericVector< UnicharRating > *  results 
) const

Definition at line 697 of file shapetable.cpp.

                                                                               {
  if (shape_rating.joined) {
    AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map,
                        results);
  }
  if (shape_rating.broken) {
    AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map,
                        results);
  }
  const Shape& shape = GetShape(shape_rating.shape_id);
  for (int u = 0; u < shape.size(); ++u) {
    int result_index = AddUnicharToResults(shape[u].unichar_id,
                                           shape_rating.rating,
                                           unichar_map, results);
    (*results)[result_index].fonts += shape[u].font_ids;
  }
}
void tesseract::ShapeTable::AddShapeToShape ( int  shape_id,
const Shape other 
)

Definition at line 386 of file shapetable.cpp.

                                                                 {
  Shape& shape = *shape_table_[shape_id];
  shape.AddShape(other);
  num_fonts_ = 0;
}
void tesseract::ShapeTable::AddToShape ( int  shape_id,
int  unichar_id,
int  font_id 
)

Definition at line 379 of file shapetable.cpp.

                                                                     {
  Shape& shape = *shape_table_[shape_id];
  shape.AddToShape(unichar_id, font_id);
  num_fonts_ = MAX(num_fonts_, font_id + 1);
}
bool tesseract::ShapeTable::AlreadyMerged ( int  shape_id1,
int  shape_id2 
) const

Definition at line 449 of file shapetable.cpp.

                                                                 {
  return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
}

Definition at line 454 of file shapetable.cpp.

                                           {
  int num_shapes = NumShapes();
  for (int s1 = 0; s1 < num_shapes; ++s1) {
    if (MasterDestinationIndex(s1) != s1) continue;
    if (GetShape(s1).size() > 1)
      return true;
  }
  return false;
}
void tesseract::ShapeTable::AppendMasterShapes ( const ShapeTable other,
GenericVector< int > *  shape_map 
)

Definition at line 666 of file shapetable.cpp.

                                                                   {
  if (shape_map != NULL)
    shape_map->init_to_size(other.NumShapes(), -1);
  for (int s = 0; s < other.shape_table_.size(); ++s) {
    if (other.shape_table_[s]->destination_index() < 0) {
      int index = AddShape(*other.shape_table_[s]);
      if (shape_map != NULL)
        (*shape_map)[s] = index;
    }
  }
}
int tesseract::ShapeTable::BuildFromShape ( const Shape shape,
const ShapeTable master_shapes 
)

Definition at line 423 of file shapetable.cpp.

                                                                {
  BitVector shape_map(master_shapes.NumShapes());
  for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
    for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
      int c = shape[u_ind].unichar_id;
      int f = shape[u_ind].font_ids[f_ind];
      int master_id = master_shapes.FindShape(c, f);
      if (master_id >= 0) {
        shape_map.SetBit(master_id);
      } else if (FindShape(c, f) < 0) {
        AddShape(c, f);
      }
    }
  }
  int num_masters = 0;
  for (int s = 0; s < master_shapes.NumShapes(); ++s) {
    if (shape_map[s]) {
      AddShape(master_shapes.GetShape(s));
      ++num_masters;
    }
  }
  return num_masters;
}
bool tesseract::ShapeTable::CommonFont ( int  shape_id1,
int  shape_id2 
) const

Definition at line 651 of file shapetable.cpp.

                                                              {
  const Shape& shape1 = GetShape(shape_id1);
  const Shape& shape2 = GetShape(shape_id2);
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
    const GenericVector<int>& font_list1 = shape1[c1].font_ids;
    for (int f = 0; f < font_list1.size(); ++f) {
      if (shape2.ContainsFont(font_list1[f]))
        return true;
    }
  }
  return false;
}
bool tesseract::ShapeTable::CommonUnichars ( int  shape_id1,
int  shape_id2 
) const

Definition at line 639 of file shapetable.cpp.

                                                                  {
  const Shape& shape1 = GetShape(shape_id1);
  const Shape& shape2 = GetShape(shape_id2);
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
    int unichar_id1 = shape1[c1].unichar_id;
    if (shape2.ContainsUnichar(unichar_id1))
      return true;
  }
  return false;
}
STRING tesseract::ShapeTable::DebugStr ( int  shape_id) const

Definition at line 291 of file shapetable.cpp.

                                              {
  if (shape_id < 0 || shape_id >= shape_table_.size())
    return STRING("INVALID_UNICHAR_ID");
  const Shape& shape = GetShape(shape_id);
  STRING result;
  result.add_str_int("Shape", shape_id);
  if (shape.size() > 100) {
    result.add_str_int(" Num unichars=", shape.size());
    return result;
  }
  for (int c = 0; c < shape.size(); ++c) {
    result.add_str_int(" c_id=", shape[c].unichar_id);
    result += "=";
    result += unicharset_->id_to_unichar(shape[c].unichar_id);
    if (shape.size() < 10) {
      result.add_str_int(", ", shape[c].font_ids.size());
      result += " fonts =";
      int num_fonts = shape[c].font_ids.size();
      if (num_fonts > 10) {
        result.add_str_int(" ", shape[c].font_ids[0]);
        result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
      } else {
        for (int f = 0; f < num_fonts; ++f) {
          result.add_str_int(" ", shape[c].font_ids[f]);
        }
      }
    }
  }
  return result;
}
void tesseract::ShapeTable::DeleteShape ( int  shape_id)

Definition at line 371 of file shapetable.cpp.

                                         {
  delete shape_table_[shape_id];
  shape_table_[shape_id] = NULL;
  shape_table_.remove(shape_id);
}
bool tesseract::ShapeTable::DeSerialize ( bool  swap,
FILE *  fp 
)

Definition at line 256 of file shapetable.cpp.

                                                {
  if (!shape_table_.DeSerialize(swap, fp)) return false;
  num_fonts_ = 0;
  return true;
}
bool tesseract::ShapeTable::EqualUnichars ( int  shape_id1,
int  shape_id2 
) const

Definition at line 597 of file shapetable.cpp.

                                                                 {
  const Shape& shape1 = GetShape(shape_id1);
  const Shape& shape2 = GetShape(shape_id2);
  for (int c1 = 0; c1 < shape1.size(); ++c1) {
    int unichar_id1 = shape1[c1].unichar_id;
    if (!shape2.ContainsUnichar(unichar_id1))
      return false;
  }
  for (int c2 = 0; c2 < shape2.size(); ++c2) {
    int unichar_id2 = shape2[c2].unichar_id;
    if (!shape1.ContainsUnichar(unichar_id2))
      return false;
  }
  return true;
}
int tesseract::ShapeTable::FindShape ( int  unichar_id,
int  font_id 
) const

Definition at line 396 of file shapetable.cpp.

                                                           {
  for (int s = 0; s < shape_table_.size(); ++s) {
    const Shape& shape = GetShape(s);
    for (int c = 0; c < shape.size(); ++c) {
      if (shape[c].unichar_id == unichar_id) {
        if (font_id < 0)
          return s;  // We don't care about the font.
        for (int f = 0; f < shape[c].font_ids.size(); ++f) {
          if (shape[c].font_ids[f] == font_id)
            return s;
        }
      }
    }
  }
  return -1;
}
void tesseract::ShapeTable::ForceFontMerges ( int  start,
int  end 
)

Definition at line 478 of file shapetable.cpp.

                                                   {
  for (int s1 = start; s1 < end; ++s1) {
    if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
      int unichar_id = GetShape(s1)[0].unichar_id;
      for (int s2 = s1 + 1; s2 < end; ++s2) {
        if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
            unichar_id == GetShape(s2)[0].unichar_id) {
          MergeShapes(s1, s2);
        }
      }
    }
  }
  ShapeTable compacted(*unicharset_);
  compacted.AppendMasterShapes(*this, NULL);
  *this = compacted;
}
void tesseract::ShapeTable::GetFirstUnicharAndFont ( int  shape_id,
int *  unichar_id,
int *  font_id 
) const

Definition at line 414 of file shapetable.cpp.

                                                                             {
  const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
  *unichar_id = unichar_and_fonts.unichar_id;
  *font_id = unichar_and_fonts.font_ids[0];
}
const Shape& tesseract::ShapeTable::GetShape ( int  shape_id) const [inline]

Definition at line 308 of file shapetable.h.

                                            {
    return *shape_table_[shape_id];
  }
int tesseract::ShapeTable::MasterDestinationIndex ( int  shape_id) const

Definition at line 541 of file shapetable.cpp.

                                                         {
  int dest_id = shape_table_[shape_id]->destination_index();
  if (dest_id == shape_id || dest_id < 0)
    return shape_id;  // Is master already.
  int master_id = shape_table_[dest_id]->destination_index();
  if (master_id == dest_id || master_id < 0)
    return dest_id;  // Dest is the master and shape_id points to it.
  master_id = MasterDestinationIndex(master_id);
  return master_id;
}
int tesseract::ShapeTable::MasterFontCount ( int  shape_id) const

Definition at line 502 of file shapetable.cpp.

                                                  {
  int master_id = MasterDestinationIndex(shape_id);
  const Shape& shape = GetShape(master_id);
  int font_count = 0;
  for (int c = 0; c < shape.size(); ++c) {
    font_count += shape[c].font_ids.size();
  }
  return font_count;
}
int tesseract::ShapeTable::MasterUnicharCount ( int  shape_id) const

Definition at line 496 of file shapetable.cpp.

                                                     {
  int master_id = MasterDestinationIndex(shape_id);
  return GetShape(master_id).size();
}

Definition at line 465 of file shapetable.cpp.

                                     {
  int max_num_unichars = 0;
  int num_shapes = NumShapes();
  for (int s = 0; s < num_shapes; ++s) {
    if (GetShape(s).size() > max_num_unichars)
      max_num_unichars = GetShape(s).size();
  }
  return max_num_unichars;
}
int tesseract::ShapeTable::MergedUnicharCount ( int  shape_id1,
int  shape_id2 
) const

Definition at line 513 of file shapetable.cpp.

                                                                     {
  // Do it the easy way for now.
  int master_id1 = MasterDestinationIndex(shape_id1);
  int master_id2 = MasterDestinationIndex(shape_id2);
  Shape combined_shape(*shape_table_[master_id1]);
  combined_shape.AddShape(*shape_table_[master_id2]);
  return combined_shape.size();
}
bool tesseract::ShapeTable::MergeEqualUnichars ( int  merge_id1,
int  merge_id2,
int  shape_id 
) const

Definition at line 614 of file shapetable.cpp.

                                                        {
  const Shape& merge1 = GetShape(merge_id1);
  const Shape& merge2 = GetShape(merge_id2);
  const Shape& shape = GetShape(shape_id);
  for (int cs = 0; cs < shape.size(); ++cs) {
    int unichar_id = shape[cs].unichar_id;
    if (!merge1.ContainsUnichar(unichar_id) &&
        !merge2.ContainsUnichar(unichar_id))
      return false;  // Shape has a unichar that appears in neither merge.
  }
  for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
    int unichar_id1 = merge1[cm1].unichar_id;
    if (!shape.ContainsUnichar(unichar_id1))
      return false;  // Merge has a unichar that is not in shape.
  }
  for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
    int unichar_id2 = merge2[cm2].unichar_id;
    if (!shape.ContainsUnichar(unichar_id2))
      return false;  // Merge has a unichar that is not in shape.
  }
  return true;
}
void tesseract::ShapeTable::MergeShapes ( int  shape_id1,
int  shape_id2 
)

Definition at line 523 of file shapetable.cpp.

                                                         {
  int master_id1 = MasterDestinationIndex(shape_id1);
  int master_id2 = MasterDestinationIndex(shape_id2);
  // Point master_id2 (and all merged shapes) to master_id1.
  shape_table_[master_id2]->set_destination_index(master_id1);
  // Add all the shapes of master_id2 to master_id1.
  shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
}
bool tesseract::ShapeTable::MergeSubsetUnichar ( int  merge_id1,
int  merge_id2,
int  shape_id 
) const

Definition at line 571 of file shapetable.cpp.

                                                        {
  const Shape& merge1 = GetShape(merge_id1);
  const Shape& merge2 = GetShape(merge_id2);
  const Shape& shape = GetShape(shape_id);
  int cm1, cm2, cs;
  for (cs = 0; cs < shape.size(); ++cs) {
    int unichar_id = shape[cs].unichar_id;
    if (!merge1.ContainsUnichar(unichar_id) &&
        !merge2.ContainsUnichar(unichar_id))
      break;  // Shape is not a subset of the merge.
  }
  for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
    int unichar_id1 = merge1[cm1].unichar_id;
    if (!shape.ContainsUnichar(unichar_id1))
      break;  // Merge is not a subset of shape
  }
  for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
    int unichar_id2 = merge2[cm2].unichar_id;
    if (!shape.ContainsUnichar(unichar_id2))
      break;  // Merge is not a subset of shape
  }
  return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
}
Shape* tesseract::ShapeTable::MutableShape ( int  shape_id) [inline]

Definition at line 311 of file shapetable.h.

                                    {
    return shape_table_[shape_id];
  }

Definition at line 264 of file shapetable.cpp.

                               {
  if (num_fonts_ <= 0) {
    for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
      const Shape& shape = *shape_table_[shape_id];
      for (int c = 0; c < shape.size(); ++c) {
        for (int f = 0; f < shape[c].font_ids.size(); ++f) {
          if (shape[c].font_ids[f] >= num_fonts_)
            num_fonts_ = shape[c].font_ids[f] + 1;
        }
      }
    }
  }
  return num_fonts_;
}

Definition at line 680 of file shapetable.cpp.

                                      {
  int num_shapes = 0;
  for (int s = 0; s < shape_table_.size(); ++s) {
    if (shape_table_[s]->destination_index() < 0)
      ++num_shapes;
  }
  return num_shapes;
}
int tesseract::ShapeTable::NumShapes ( ) const [inline]

Definition at line 263 of file shapetable.h.

                        {
    return shape_table_.size();
  }
void tesseract::ShapeTable::ReMapClassIds ( const GenericVector< int > &  unicharset_map)

Definition at line 281 of file shapetable.cpp.

                                                                       {
  for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
    Shape* shape = shape_table_[shape_id];
    for (int c = 0; c < shape->size(); ++c) {
      shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
    }
  }
}
bool tesseract::ShapeTable::Serialize ( FILE *  fp) const

Definition at line 250 of file shapetable.cpp.

                                         {
  if (!shape_table_.Serialize(fp)) return false;
  return true;
}
void tesseract::ShapeTable::set_unicharset ( const UNICHARSET unicharset) [inline]

Definition at line 274 of file shapetable.h.

                                                    {
    unicharset_ = &unicharset;
  }
bool tesseract::ShapeTable::SubsetUnichar ( int  shape_id1,
int  shape_id2 
) const

Definition at line 553 of file shapetable.cpp.

                                                                 {
  const Shape& shape1 = GetShape(shape_id1);
  const Shape& shape2 = GetShape(shape_id2);
  int c1, c2;
  for (c1 = 0; c1 < shape1.size(); ++c1) {
    int unichar_id1 = shape1[c1].unichar_id;
    if (!shape2.ContainsUnichar(unichar_id1))
      break;
  }
  for (c2 = 0; c2 < shape2.size(); ++c2) {
    int unichar_id2 = shape2[c2].unichar_id;
    if (!shape1.ContainsUnichar(unichar_id2))
      break;
  }
  return c1 == shape1.size() || c2 == shape2.size();
}

Definition at line 323 of file shapetable.cpp.

                                    {
  int max_unichars = 0;
  int num_multi_shapes = 0;
  int num_master_shapes = 0;
  for (int s = 0; s < shape_table_.size(); ++s) {
    if (MasterDestinationIndex(s) != s) continue;
    ++num_master_shapes;
    int shape_size = GetShape(s).size();
    if (shape_size > 1)
      ++num_multi_shapes;
    if (shape_size > max_unichars)
      max_unichars = shape_size;
  }
  STRING result;
  result.add_str_int("Number of shapes = ", num_master_shapes);
  result.add_str_int(" max unichars = ", max_unichars);
  result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
  return result;
}
void tesseract::ShapeTable::SwapShapes ( int  shape_id1,
int  shape_id2 
)

Definition at line 533 of file shapetable.cpp.

                                                        {
  Shape* tmp = shape_table_[shape_id1];
  shape_table_[shape_id1] = shape_table_[shape_id2];
  shape_table_[shape_id2] = tmp;
}
const UNICHARSET& tesseract::ShapeTable::unicharset ( ) const [inline]

Definition at line 266 of file shapetable.h.

                                       {
    return *unicharset_;
  }

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines