tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/boxword.h
Go to the documentation of this file.
00001 
00002 // File:        boxword.h
00003 // Description: Class to represent the bounding boxes of the output.
00004 // Author:      Ray Smith
00005 // Created:     Tue May 25 14:18:14 PDT 2010
00006 //
00007 // (C) Copyright 2010, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #ifndef TESSERACT_CSTRUCT_BOXWORD_H__
00021 #define TESSERACT_CSTRUCT_BOXWORD_H__
00022 
00023 #include "genericvector.h"
00024 #include "rect.h"
00025 #include "unichar.h"
00026 
00027 class BLOCK;
00028 class DENORM;
00029 struct TWERD;
00030 class UNICHARSET;
00031 class WERD;
00032 class WERD_CHOICE;
00033 class WERD_RES;
00034 
00035 namespace tesseract {
00036 
00037 // Class to hold an array of bounding boxes for an output word and
00038 // the bounding box of the whole word.
00039 class BoxWord {
00040  public:
00041   BoxWord();
00042   explicit BoxWord(const BoxWord& src);
00043   ~BoxWord();
00044 
00045   BoxWord& operator=(const BoxWord& src);
00046 
00047   void CopyFrom(const BoxWord& src);
00048 
00049   // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
00050   // switch back to original image coordinates.
00051   static BoxWord* CopyFromNormalized(TWERD* tessword);
00052 
00053   // Clean up the bounding boxes from the polygonal approximation by
00054   // expanding slightly, then clipping to the blobs from the original_word
00055   // that overlap. If not null, the block provides the inverse rotation.
00056   void ClipToOriginalWord(const BLOCK* block, WERD* original_word);
00057 
00058   // Merges the boxes from start to end, not including end, and deletes
00059   // the boxes between start and end.
00060   void MergeBoxes(int start, int end);
00061 
00062   // Inserts a new box before the given index.
00063   // Recomputes the bounding box.
00064   void InsertBox(int index, const TBOX& box);
00065 
00066   // Deletes the box with the given index, and shuffles up the rest.
00067   // Recomputes the bounding box.
00068   void DeleteBox(int index);
00069 
00070   // Deletes all the boxes stored in BoxWord.
00071   void DeleteAllBoxes();
00072 
00073   // This and other putatively are the same, so call the (permanent) callback
00074   // for each blob index where the bounding boxes match.
00075   // The callback is deleted on completion.
00076   void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const;
00077 
00078   const TBOX& bounding_box() const {
00079     return bbox_;
00080   }
00081   const int length() const {
00082     return length_;
00083   }
00084   const TBOX& BlobBox(int index) const {
00085     return boxes_[index];
00086   }
00087 
00088  private:
00089   void ComputeBoundingBox();
00090 
00091   TBOX bbox_;
00092   int length_;
00093   GenericVector<TBOX> boxes_;
00094 };
00095 
00096 }  // namespace tesseract.
00097 
00098 
00099 #endif  // TESSERACT_CSTRUCT_BOXWORD_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines