tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/pageiterator.cpp
Go to the documentation of this file.
00001 
00002 // File:        pageiterator.cpp
00003 // Description: Iterator for tesseract page structure that avoids using
00004 //              tesseract internal data structures.
00005 // Author:      Ray Smith
00006 // Created:     Fri Feb 26 14:32:09 PST 2010
00007 //
00008 // (C) Copyright 2010, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "pageiterator.h"
00022 #include "allheaders.h"
00023 #include "helpers.h"
00024 #include "pageres.h"
00025 #include "tesseractclass.h"
00026 
00027 namespace tesseract {
00028 
00029 PageIterator::PageIterator(PAGE_RES* page_res, Tesseract* tesseract,
00030                            int scale, int scaled_yres,
00031                            int rect_left, int rect_top,
00032                            int rect_width, int rect_height)
00033   : page_res_(page_res), tesseract_(tesseract),
00034     word_(NULL), word_length_(0), blob_index_(0), cblob_it_(NULL),
00035     scale_(scale), scaled_yres_(scaled_yres),
00036     rect_left_(rect_left), rect_top_(rect_top),
00037     rect_width_(rect_width), rect_height_(rect_height) {
00038   it_ = new PAGE_RES_IT(page_res);
00039   PageIterator::Begin();
00040 }
00041 
00042 PageIterator::~PageIterator() {
00043   delete it_;
00044   delete cblob_it_;
00045 }
00046 
00052 PageIterator::PageIterator(const PageIterator& src)
00053   : page_res_(src.page_res_), tesseract_(src.tesseract_),
00054     word_(NULL), word_length_(src.word_length_),
00055     blob_index_(src.blob_index_), cblob_it_(NULL),
00056     scale_(src.scale_), scaled_yres_(src.scaled_yres_),
00057     rect_left_(src.rect_left_), rect_top_(src.rect_top_),
00058     rect_width_(src.rect_width_), rect_height_(src.rect_height_) {
00059   it_ = new PAGE_RES_IT(*src.it_);
00060   BeginWord(src.blob_index_);
00061 }
00062 
00063 const PageIterator& PageIterator::operator=(const PageIterator& src) {
00064   page_res_ = src.page_res_;
00065   tesseract_ = src.tesseract_;
00066   scale_ = src.scale_;
00067   scaled_yres_ = src.scaled_yres_;
00068   rect_left_ = src.rect_left_;
00069   rect_top_ = src.rect_top_;
00070   rect_width_ = src.rect_width_;
00071   rect_height_ = src.rect_height_;
00072   if (it_ != NULL) delete it_;
00073   it_ = new PAGE_RES_IT(*src.it_);
00074   BeginWord(src.blob_index_);
00075   return *this;
00076 }
00077 
00078 bool PageIterator::PositionedAtSameWord(const PAGE_RES_IT* other) const {
00079   return (it_ == NULL && it_ == other) ||
00080      ((other != NULL) && (it_ != NULL) && (*it_ == *other));
00081 }
00082 
00083 // ============= Moving around within the page ============.
00084 
00086 void PageIterator::Begin() {
00087   it_->restart_page_with_empties();
00088   BeginWord(0);
00089 }
00090 
00091 void PageIterator::RestartParagraph() {
00092   if (it_->block() == NULL) return; // At end of the document.
00093   PAGE_RES_IT para(page_res_);
00094   PAGE_RES_IT next_para(para);
00095   next_para.forward_paragraph();
00096   while (next_para.cmp(*it_) <= 0) {
00097     para = next_para;
00098     next_para.forward_paragraph();
00099   }
00100   *it_ = para;
00101   BeginWord(0);
00102 }
00103 
00104 bool PageIterator::IsWithinFirstTextlineOfParagraph() const {
00105   PageIterator p_start(*this);
00106   p_start.RestartParagraph();
00107   return p_start.it_->row() == it_->row();
00108 }
00109 
00110 void PageIterator::RestartRow() {
00111   it_->restart_row();
00112   BeginWord(0);
00113 }
00114 
00128 bool PageIterator::Next(PageIteratorLevel level) {
00129   if (it_->block() == NULL) return false;  // Already at the end!
00130   if (it_->word() == NULL)
00131     level = RIL_BLOCK;
00132 
00133   switch (level) {
00134     case RIL_BLOCK:
00135       it_->forward_block();
00136       break;
00137     case RIL_PARA:
00138       it_->forward_paragraph();
00139       break;
00140     case RIL_TEXTLINE:
00141       for (it_->forward_with_empties(); it_->row() == it_->prev_row();
00142            it_->forward_with_empties());
00143       break;
00144     case RIL_WORD:
00145       it_->forward_with_empties();
00146       break;
00147     case RIL_SYMBOL:
00148       if (cblob_it_ != NULL)
00149         cblob_it_->forward();
00150       ++blob_index_;
00151       if (blob_index_ >= word_length_)
00152         it_->forward_with_empties();
00153       else
00154         return true;
00155       break;
00156   }
00157   BeginWord(0);
00158   return it_->block() != NULL;
00159 }
00160 
00166 bool PageIterator::IsAtBeginningOf(PageIteratorLevel level) const {
00167   if (it_->block() == NULL) return false;  // Already at the end!
00168   if (it_->word() == NULL) return true;  // In an image block.
00169   switch (level) {
00170     case RIL_BLOCK:
00171       return blob_index_ == 0 && it_->block() != it_->prev_block();
00172     case RIL_PARA:
00173       return blob_index_ == 0 &&
00174           (it_->block() != it_->prev_block() ||
00175            it_->row()->row->para() != it_->prev_row()->row->para());
00176     case RIL_TEXTLINE:
00177       return blob_index_ == 0 && it_->row() != it_->prev_row();
00178     case RIL_WORD:
00179       return blob_index_ == 0;
00180     case RIL_SYMBOL:
00181       return true;
00182   }
00183   return false;
00184 }
00185 
00190 bool PageIterator::IsAtFinalElement(PageIteratorLevel level,
00191                                     PageIteratorLevel element) const {
00192   if (Empty(element)) return true;  // Already at the end!
00193   // The result is true if we step forward by element and find we are
00194   // at the the end of the page or at beginning of *all* levels in:
00195   // [level, element).
00196   // When there is more than one level difference between element and level,
00197   // we could for instance move forward one symbol and still be at the first
00198   // word on a line, so we also have to be at the first symbol in a word.
00199   PageIterator next(*this);
00200   next.Next(element);
00201   if (next.Empty(element)) return true;  // Reached the end of the page.
00202   while (element > level) {
00203     element = static_cast<PageIteratorLevel>(element - 1);
00204     if (!next.IsAtBeginningOf(element))
00205       return false;
00206   }
00207   return true;
00208 }
00209 
00216 int PageIterator::Cmp(const PageIterator &other) const {
00217   int word_cmp = it_->cmp(*other.it_);
00218   if (word_cmp != 0)
00219     return word_cmp;
00220   if (blob_index_ < other.blob_index_)
00221     return -1;
00222   if (blob_index_ == other.blob_index_)
00223     return 0;
00224   return 1;
00225 }
00226 
00227 // ============= Accessing data ==============.
00228 // Coordinate system:
00229 // Integer coordinates are at the cracks between the pixels.
00230 // The top-left corner of the top-left pixel in the image is at (0,0).
00231 // The bottom-right corner of the bottom-right pixel in the image is at
00232 // (width, height).
00233 // Every bounding box goes from the top-left of the top-left contained
00234 // pixel to the bottom-right of the bottom-right contained pixel, so
00235 // the bounding box of the single top-left pixel in the image is:
00236 // (0,0)->(1,1).
00237 // If an image rectangle has been set in the API, then returned coordinates
00238 // relate to the original (full) image, rather than the rectangle.
00239 
00246 bool PageIterator::BoundingBoxInternal(PageIteratorLevel level,
00247                                        int* left, int* top,
00248                                        int* right, int* bottom) const {
00249   if (Empty(level))
00250     return false;
00251   TBOX box;
00252   PARA *para = NULL;
00253   switch (level) {
00254     case RIL_BLOCK:
00255       box = it_->block()->block->bounding_box();
00256       break;
00257     case RIL_PARA:
00258       para = it_->row()->row->para();
00259       // explicit fall-through.
00260     case RIL_TEXTLINE:
00261       box = it_->row()->row->bounding_box();
00262       break;
00263     case RIL_WORD:
00264       box = it_->word()->word->bounding_box();
00265       break;
00266     case RIL_SYMBOL:
00267       if (cblob_it_ == NULL)
00268         box = it_->word()->box_word->BlobBox(blob_index_);
00269       else
00270         box = cblob_it_->data()->bounding_box();
00271   }
00272   if (level == RIL_PARA) {
00273     PageIterator other = *this;
00274     other.Begin();
00275     do {
00276       if (other.it_->block() &&
00277           other.it_->block()->block == it_->block()->block &&
00278           other.it_->row() && other.it_->row()->row &&
00279           other.it_->row()->row->para() == para) {
00280         box = box.bounding_union(other.it_->row()->row->bounding_box());
00281       }
00282     } while (other.Next(RIL_TEXTLINE));
00283   }
00284   if (level != RIL_SYMBOL || cblob_it_ != NULL)
00285     box.rotate(it_->block()->block->re_rotation());
00286   // Now we have a box in tesseract coordinates relative to the image rectangle,
00287   // we have to convert the coords to a top-down system.
00288   const int pix_height = pixGetHeight(tesseract_->pix_binary());
00289   const int pix_width = pixGetWidth(tesseract_->pix_binary());
00290   *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
00291   *top = ClipToRange(pix_height - box.top(), 0, pix_height);
00292   *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
00293   *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
00294   return true;
00295 }
00296 
00303 bool PageIterator::BoundingBox(PageIteratorLevel level,
00304                                int* left, int* top,
00305                                int* right, int* bottom) const {
00306   return BoundingBox(level, 0, left, top, right, bottom);
00307 }
00308 
00309 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
00310                                int* left, int* top,
00311                                int* right, int* bottom) const {
00312   if (!BoundingBoxInternal(level, left, top, right, bottom))
00313     return false;
00314   // Convert to the coordinate system of the original image.
00315   *left = ClipToRange(*left / scale_ + rect_left_ - padding,
00316                       rect_left_, rect_left_ + rect_width_);
00317   *top = ClipToRange(*top / scale_ + rect_top_ - padding,
00318                      rect_top_, rect_top_ + rect_height_);
00319   *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
00320                        *left, rect_left_ + rect_width_);
00321   *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
00322                         *top, rect_top_ + rect_height_);
00323   return true;
00324 }
00325 
00327 bool PageIterator::Empty(PageIteratorLevel level) const {
00328   if (it_->block() == NULL) return true;  // Already at the end!
00329   if (it_->word() == NULL && level != RIL_BLOCK) return true;  // image block
00330   if (level == RIL_SYMBOL && blob_index_ >= word_length_)
00331     return true;  // Zero length word, or already at the end of it.
00332   return false;
00333 }
00334 
00336 PolyBlockType PageIterator::BlockType() const {
00337   if (it_->block() == NULL || it_->block()->block == NULL)
00338     return PT_UNKNOWN;  // Already at the end!
00339   if (it_->block()->block->poly_block() == NULL)
00340     return PT_FLOWING_TEXT;  // No layout analysis used - assume text.
00341   return it_->block()->block->poly_block()->isA();
00342 }
00343 
00346 Pta* PageIterator::BlockPolygon() const {
00347   if (it_->block() == NULL || it_->block()->block == NULL)
00348     return NULL;  // Already at the end!
00349   if (it_->block()->block->poly_block() == NULL)
00350     return NULL;  // No layout analysis used - no polygon.
00351   ICOORDELT_IT it(it_->block()->block->poly_block()->points());
00352   Pta* pta = ptaCreate(it.length());
00353   int num_pts = 0;
00354   for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
00355     ICOORD* pt = it.data();
00356     // Convert to top-down coords within the input image.
00357     float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
00358     float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
00359     ptaAddPt(pta, x, y);
00360   }
00361   return pta;
00362 }
00363 
00386 Pix* PageIterator::GetBinaryImage(PageIteratorLevel level) const {
00387   int left, top, right, bottom;
00388   if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
00389     return NULL;
00390   Pix* pix = NULL;
00391   switch (level) {
00392     case RIL_BLOCK:
00393     case RIL_PARA:
00394       int bleft, btop, bright, bbottom;
00395       BoundingBoxInternal(RIL_BLOCK, &bleft, &btop, &bright, &bbottom);
00396       pix = it_->block()->block->render_mask();
00397       // AND the mask and the image.
00398       pixRasterop(pix, 0, 0, pixGetWidth(pix), pixGetHeight(pix),
00399                   PIX_SRC & PIX_DST, tesseract_->pix_binary(),
00400                   bleft, btop);
00401       if (level == RIL_PARA) {
00402         // RIL_PARA needs further attention:
00403         //   clip the paragraph from the block mask.
00404         Box* box = boxCreate(left - bleft, top - btop,
00405                              right - left, bottom - top);
00406         Pix* pix2 = pixClipRectangle(pix, box, NULL);
00407         boxDestroy(&box);
00408         pixDestroy(&pix);
00409         pix = pix2;
00410       }
00411       break;
00412     case RIL_TEXTLINE:
00413     case RIL_WORD:
00414     case RIL_SYMBOL:
00415       if (level == RIL_SYMBOL && cblob_it_ != NULL &&
00416           cblob_it_->data()->area() != 0)
00417         return cblob_it_->data()->render();
00418       // Just clip from the bounding box.
00419       Box* box = boxCreate(left, top, right - left, bottom - top);
00420       pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
00421       boxDestroy(&box);
00422       break;
00423   }
00424   return pix;
00425 }
00426 
00437 Pix* PageIterator::GetImage(PageIteratorLevel level, int padding,
00438                             int* left, int* top) const {
00439   int right, bottom;
00440   if (!BoundingBox(level, left, top, &right, &bottom))
00441     return NULL;
00442   Pix* pix = tesseract_->pix_grey();
00443   if (pix == NULL)
00444     return GetBinaryImage(level);
00445 
00446   // Expand the box.
00447   *left = MAX(*left - padding, 0);
00448   *top = MAX(*top - padding, 0);
00449   right = MIN(right + padding, rect_width_);
00450   bottom = MIN(bottom + padding, rect_height_);
00451   Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
00452   Pix* grey_pix = pixClipRectangle(pix, box, NULL);
00453   boxDestroy(&box);
00454   if (level == RIL_BLOCK) {
00455     Pix* mask = it_->block()->block->render_mask();
00456     Pix* expanded_mask = pixCreate(right - *left, bottom - *top, 1);
00457     pixRasterop(expanded_mask, padding, padding,
00458                 pixGetWidth(mask), pixGetHeight(mask),
00459                 PIX_SRC, mask, 0, 0);
00460     pixDestroy(&mask);
00461     pixDilateBrick(expanded_mask, expanded_mask, 2*padding + 1, 2*padding + 1);
00462     pixInvert(expanded_mask, expanded_mask);
00463     pixSetMasked(grey_pix, expanded_mask, 255);
00464     pixDestroy(&expanded_mask);
00465   }
00466   return grey_pix;
00467 }
00468 
00474 bool PageIterator::Baseline(PageIteratorLevel level,
00475                             int* x1, int* y1, int* x2, int* y2) const {
00476   if (it_->word() == NULL) return false;  // Already at the end!
00477   ROW* row = it_->row()->row;
00478   WERD* word = it_->word()->word;
00479   TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
00480            ? word->bounding_box()
00481            : row->bounding_box();
00482   int left = box.left();
00483   ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
00484   int right = box.right();
00485   ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
00486   // Rotate to image coordinates and convert to global image coords.
00487   startpt.rotate(it_->block()->block->re_rotation());
00488   endpt.rotate(it_->block()->block->re_rotation());
00489   *x1 = startpt.x() / scale_ + rect_left_;
00490   *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
00491   *x2 = endpt.x() / scale_ + rect_left_;
00492   *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
00493   return true;
00494 }
00495 
00496 void PageIterator::Orientation(tesseract::Orientation *orientation,
00497                                tesseract::WritingDirection *writing_direction,
00498                                tesseract::TextlineOrder *textline_order,
00499                                float *deskew_angle) const {
00500   BLOCK* block = it_->block()->block;
00501 
00502   // Orientation
00503   FCOORD up_in_image(0.0, 1.0);
00504   up_in_image.unrotate(block->classify_rotation());
00505   up_in_image.rotate(block->re_rotation());
00506 
00507   if (up_in_image.x() == 0.0F) {
00508     if (up_in_image.y() > 0.0F) {
00509       *orientation = ORIENTATION_PAGE_UP;
00510     } else {
00511       *orientation = ORIENTATION_PAGE_DOWN;
00512     }
00513   } else if (up_in_image.x() > 0.0F) {
00514     *orientation = ORIENTATION_PAGE_RIGHT;
00515   } else {
00516     *orientation = ORIENTATION_PAGE_LEFT;
00517   }
00518 
00519   // Writing direction
00520   bool is_vertical_text = (block->classify_rotation().x() == 0.0);
00521   bool right_to_left = block->right_to_left();
00522   *writing_direction =
00523       is_vertical_text
00524           ? WRITING_DIRECTION_TOP_TO_BOTTOM
00525           : (right_to_left
00526                 ? WRITING_DIRECTION_RIGHT_TO_LEFT
00527                 : WRITING_DIRECTION_LEFT_TO_RIGHT);
00528 
00529   // Textline Order
00530   bool is_mongolian = false;  // TODO(eger): fix me
00531   *textline_order = is_vertical_text
00532       ? (is_mongolian
00533          ? TEXTLINE_ORDER_LEFT_TO_RIGHT
00534          : TEXTLINE_ORDER_RIGHT_TO_LEFT)
00535       : TEXTLINE_ORDER_TOP_TO_BOTTOM;
00536 
00537   // Deskew angle
00538   FCOORD skew = block->skew();  // true horizontal for textlines
00539   *deskew_angle = -skew.angle();
00540 }
00541 
00542 void PageIterator::ParagraphInfo(tesseract::ParagraphJustification *just,
00543                                  bool *is_list_item,
00544                                  bool *is_crown,
00545                                  int *first_line_indent) const {
00546   *just = tesseract::JUSTIFICATION_UNKNOWN;
00547   if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
00548       !it_->row()->row->para()->model)
00549     return;
00550 
00551   PARA *para = it_->row()->row->para();
00552   *is_list_item = para->is_list_item;
00553   *is_crown = para->is_very_first_or_continuation;
00554   *first_line_indent = para->model->first_indent() -
00555       para->model->body_indent();
00556 }
00557 
00562 void PageIterator::BeginWord(int offset) {
00563   WERD_RES* word_res = it_->word();
00564   if (word_res == NULL) {
00565     // This is a non-text block, so there is no word.
00566     word_length_ = 0;
00567     blob_index_ = 0;
00568     word_ = NULL;
00569     return;
00570   }
00571   if (word_res->best_choice != NULL) {
00572     // Recognition has been done, so we are using the box_word, which
00573     // is already baseline denormalized.
00574     word_length_ = word_res->best_choice->length();
00575     if (word_res->box_word != NULL) {
00576       if (word_res->box_word->length() != word_length_) {
00577         tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
00578                 word_length_, word_res->best_choice->unichar_string().string(),
00579                 word_res->box_word->length());
00580         word_res->box_word->bounding_box().print();
00581       }
00582       ASSERT_HOST(word_res->box_word->length() == word_length_);
00583     }
00584     word_ = NULL;
00585     // We will be iterating the box_word.
00586     if (cblob_it_ != NULL) {
00587       delete cblob_it_;
00588       cblob_it_ = NULL;
00589     }
00590   } else {
00591     // No recognition yet, so a "symbol" is a cblob.
00592     word_ = word_res->word;
00593     ASSERT_HOST(word_->cblob_list() != NULL);
00594     word_length_ = word_->cblob_list()->length();
00595     if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
00596     cblob_it_->set_to_list(word_->cblob_list());
00597   }
00598   for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
00599     if (cblob_it_ != NULL)
00600       cblob_it_->forward();
00601   }
00602 }
00603 
00604 bool PageIterator::SetWordBlamerBundle(BlamerBundle *blamer_bundle) {
00605   if (it_->word() != NULL) {
00606     it_->word()->blamer_bundle = blamer_bundle;
00607     return true;
00608   } else {
00609     return false;
00610   }
00611 }
00612 
00613 }  // namespace tesseract.
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines