tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccmain/par_control.cpp
Go to the documentation of this file.
00001 
00002 // File:        par_control.cpp
00003 // Description: Control code for parallel implementation.
00004 // Author:      Ray Smith
00005 // Created:     Mon Nov 04 13:23:15 PST 2013
00006 //
00007 // (C) Copyright 2013, Google Inc.
00008 // Licensed under the Apache License, Version 2.0 (the "License");
00009 // you may not use this file except in compliance with the License.
00010 // You may obtain a copy of the License at
00011 // http://www.apache.org/licenses/LICENSE-2.0
00012 // Unless required by applicable law or agreed to in writing, software
00013 // distributed under the License is distributed on an "AS IS" BASIS,
00014 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 // See the License for the specific language governing permissions and
00016 // limitations under the License.
00017 //
00019 
00020 #include "tesseractclass.h"
00021 
00022 namespace tesseract {
00023 
00024 struct BlobData {
00025   BlobData() : blob(NULL), choices(NULL) {}
00026   BlobData(int index, Tesseract* tess, const WERD_RES& word)
00027     : blob(word.chopped_word->blobs[index]),
00028       tesseract(tess),
00029       choices(&(*word.ratings)(index, index)) {}
00030 
00031   TBLOB* blob;
00032   Tesseract* tesseract;
00033   BLOB_CHOICE_LIST** choices;
00034 };
00035 
00036 void Tesseract::PrerecAllWordsPar(const GenericVector<WordData>& words) {
00037   // Prepare all the blobs.
00038   GenericVector<BlobData> blobs;
00039   for (int w = 0; w < words.size(); ++w) {
00040     if (words[w].word->ratings != NULL &&
00041         words[w].word->ratings->get(0, 0) == NULL) {
00042       for (int b = 0; b < words[w].word->chopped_word->NumBlobs(); ++b) {
00043         blobs.push_back(BlobData(b, this, *words[w].word));
00044       }
00045       for (int s = 0; s < words[w].lang_words.size(); ++s) {
00046         const WERD_RES& word = words[w].lang_words[s];
00047         for (int b = 0; b < word.chopped_word->NumBlobs(); ++b) {
00048           blobs.push_back(BlobData(b, sub_langs_[s], word));
00049         }
00050       }
00051     }
00052   }
00053   // Pre-classify all the blobs.
00054   if (tessedit_parallelize > 1) {
00055     #pragma omp parallel for num_threads(10)
00056     for (int b = 0; b < blobs.size(); ++b) {
00057       *blobs[b].choices =
00058           blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
00059     }
00060   } else {
00061     // TODO(AMD) parallelize this.
00062     for (int b = 0; b < blobs.size(); ++b) {
00063       *blobs[b].choices =
00064           blobs[b].tesseract->classify_blob(blobs[b].blob, "par", White, NULL);
00065     }
00066   }
00067 }
00068 
00069 }  // namespace tesseract.
00070 
00071 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines