tesseract
3.03
|
00001 00002 // File: boxword.h 00003 // Description: Class to represent the bounding boxes of the output. 00004 // Author: Ray Smith 00005 // Created: Tue May 25 14:18:14 PDT 2010 00006 // 00007 // (C) Copyright 2010, Google Inc. 00008 // Licensed under the Apache License, Version 2.0 (the "License"); 00009 // you may not use this file except in compliance with the License. 00010 // You may obtain a copy of the License at 00011 // http://www.apache.org/licenses/LICENSE-2.0 00012 // Unless required by applicable law or agreed to in writing, software 00013 // distributed under the License is distributed on an "AS IS" BASIS, 00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 // See the License for the specific language governing permissions and 00016 // limitations under the License. 00017 // 00019 00020 #ifndef TESSERACT_CSTRUCT_BOXWORD_H__ 00021 #define TESSERACT_CSTRUCT_BOXWORD_H__ 00022 00023 #include "genericvector.h" 00024 #include "rect.h" 00025 #include "unichar.h" 00026 00027 class BLOCK; 00028 class DENORM; 00029 struct TWERD; 00030 class UNICHARSET; 00031 class WERD; 00032 class WERD_CHOICE; 00033 class WERD_RES; 00034 00035 namespace tesseract { 00036 00037 // Class to hold an array of bounding boxes for an output word and 00038 // the bounding box of the whole word. 00039 class BoxWord { 00040 public: 00041 BoxWord(); 00042 explicit BoxWord(const BoxWord& src); 00043 ~BoxWord(); 00044 00045 BoxWord& operator=(const BoxWord& src); 00046 00047 void CopyFrom(const BoxWord& src); 00048 00049 // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to 00050 // switch back to original image coordinates. 00051 static BoxWord* CopyFromNormalized(TWERD* tessword); 00052 00053 // Clean up the bounding boxes from the polygonal approximation by 00054 // expanding slightly, then clipping to the blobs from the original_word 00055 // that overlap. If not null, the block provides the inverse rotation. 00056 void ClipToOriginalWord(const BLOCK* block, WERD* original_word); 00057 00058 // Merges the boxes from start to end, not including end, and deletes 00059 // the boxes between start and end. 00060 void MergeBoxes(int start, int end); 00061 00062 // Inserts a new box before the given index. 00063 // Recomputes the bounding box. 00064 void InsertBox(int index, const TBOX& box); 00065 00066 // Deletes the box with the given index, and shuffles up the rest. 00067 // Recomputes the bounding box. 00068 void DeleteBox(int index); 00069 00070 // Deletes all the boxes stored in BoxWord. 00071 void DeleteAllBoxes(); 00072 00073 // This and other putatively are the same, so call the (permanent) callback 00074 // for each blob index where the bounding boxes match. 00075 // The callback is deleted on completion. 00076 void ProcessMatchedBlobs(const TWERD& other, TessCallback1<int>* cb) const; 00077 00078 const TBOX& bounding_box() const { 00079 return bbox_; 00080 } 00081 const int length() const { 00082 return length_; 00083 } 00084 const TBOX& BlobBox(int index) const { 00085 return boxes_[index]; 00086 } 00087 00088 private: 00089 void ComputeBoundingBox(); 00090 00091 TBOX bbox_; 00092 int length_; 00093 GenericVector<TBOX> boxes_; 00094 }; 00095 00096 } // namespace tesseract. 00097 00098 00099 #endif // TESSERACT_CSTRUCT_BOXWORD_H__