tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/ccstruct/blamer.cpp
Go to the documentation of this file.
00001 
00002 // File:        blamer.cpp
00003 // Description: Module allowing precise error causes to be allocated.
00004 // Author:      Rike Antonova
00005 // Refactored:  Ray Smith
00006 // Created:     Mon Feb 04 14:37:01 PST 2013
00007 //
00008 // (C) Copyright 2013, Google Inc.
00009 // Licensed under the Apache License, Version 2.0 (the "License");
00010 // you may not use this file except in compliance with the License.
00011 // You may obtain a copy of the License at
00012 // http://www.apache.org/licenses/LICENSE-2.0
00013 // Unless required by applicable law or agreed to in writing, software
00014 // distributed under the License is distributed on an "AS IS" BASIS,
00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 // See the License for the specific language governing permissions and
00017 // limitations under the License.
00018 //
00020 
00021 #include "blamer.h"
00022 #include "blobs.h"
00023 #include "matrix.h"
00024 #include "normalis.h"
00025 #include "pageres.h"
00026 
00027 // Names for each value of IncorrectResultReason enum. Keep in sync.
00028 const char kBlameCorrect[] = "corr";
00029 const char kBlameClassifier[] = "cl";
00030 const char kBlameChopper[] = "chop";
00031 const char kBlameClassLMTradeoff[] = "cl/LM";
00032 const char kBlamePageLayout[] = "pglt";
00033 const char kBlameSegsearchHeur[] = "ss_heur";
00034 const char kBlameSegsearchPP[] = "ss_pp";
00035 const char kBlameClassOldLMTradeoff[] = "cl/old_LM";
00036 const char kBlameAdaption[] = "adapt";
00037 const char kBlameNoTruthSplit[] = "no_tr_spl";
00038 const char kBlameNoTruth[] = "no_tr";
00039 const char kBlameUnknown[] = "unkn";
00040 
00041 const char * const kIncorrectResultReasonNames[] = {
00042     kBlameCorrect,
00043     kBlameClassifier,
00044     kBlameChopper,
00045     kBlameClassLMTradeoff,
00046     kBlamePageLayout,
00047     kBlameSegsearchHeur,
00048     kBlameSegsearchPP,
00049     kBlameClassOldLMTradeoff,
00050     kBlameAdaption,
00051     kBlameNoTruthSplit,
00052     kBlameNoTruth,
00053     kBlameUnknown
00054 };
00055 
00056 const char *BlamerBundle::IncorrectReasonName(IncorrectResultReason irr) {
00057   return kIncorrectResultReasonNames[irr];
00058 }
00059 
00060 const char *BlamerBundle::IncorrectReason() const {
00061   return kIncorrectResultReasonNames[incorrect_result_reason_];
00062 }
00063 
00064 // Functions to setup the blamer.
00065 // Whole word string, whole word bounding box.
00066 void BlamerBundle::SetWordTruth(const UNICHARSET& unicharset,
00067                                 const char* truth_str, const TBOX& word_box) {
00068   truth_word_.InsertBox(0, word_box);
00069   truth_has_char_boxes_ = false;
00070   // Encode the string as UNICHAR_IDs.
00071   GenericVector<UNICHAR_ID> encoding;
00072   GenericVector<char> lengths;
00073   unicharset.encode_string(truth_str, false, &encoding, &lengths, NULL);
00074   int total_length = 0;
00075   for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
00076     STRING uch(truth_str + total_length);
00077     uch.truncate_at(lengths[i] - total_length);
00078     UNICHAR_ID id = encoding[i];
00079     if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
00080     truth_text_.push_back(uch);
00081   }
00082 }
00083 
00084 // Single "character" string, "character" bounding box.
00085 // May be called multiple times to indicate the characters in a word.
00086 void BlamerBundle::SetSymbolTruth(const UNICHARSET& unicharset,
00087                                   const char* char_str, const TBOX& char_box) {
00088   STRING symbol_str(char_str);
00089   UNICHAR_ID id = unicharset.unichar_to_id(char_str);
00090   if (id != INVALID_UNICHAR_ID) {
00091     STRING normed_uch(unicharset.get_normed_unichar(id));
00092     if (normed_uch.length() > 0) symbol_str = normed_uch;
00093   }
00094   int length = truth_word_.length();
00095   truth_text_.push_back(symbol_str);
00096   truth_word_.InsertBox(length, char_box);
00097   if (length == 0)
00098     truth_has_char_boxes_ = true;
00099   else if (truth_word_.BlobBox(length - 1) == char_box)
00100     truth_has_char_boxes_ = false;
00101 }
00102 
00103 // Marks that there is something wrong with the truth text, like it contains
00104 // reject characters.
00105 void BlamerBundle::SetRejectedTruth() {
00106   incorrect_result_reason_ = IRR_NO_TRUTH;
00107   truth_has_char_boxes_ = false;
00108 }
00109 
00110 // Returns true if the provided word_choice is correct.
00111 bool BlamerBundle::ChoiceIsCorrect(const WERD_CHOICE* word_choice) const {
00112   if (word_choice == NULL) return false;
00113   const UNICHARSET* uni_set = word_choice->unicharset();
00114   STRING normed_choice_str;
00115   for (int i = 0; i < word_choice->length(); ++i) {
00116     normed_choice_str +=
00117         uni_set->get_normed_unichar(word_choice->unichar_id(i));
00118   }
00119   STRING truth_str = TruthString();
00120   return truth_str == normed_choice_str;
00121 }
00122 
00123 void BlamerBundle::FillDebugString(const STRING &msg,
00124                                    const WERD_CHOICE *choice,
00125                                    STRING *debug) {
00126   (*debug) += "Truth ";
00127   for (int i = 0; i < this->truth_text_.length(); ++i) {
00128     (*debug) += this->truth_text_[i];
00129   }
00130   if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
00131   if (choice != NULL) {
00132     (*debug) += " Choice ";
00133     STRING choice_str;
00134     choice->string_and_lengths(&choice_str, NULL);
00135     (*debug) += choice_str;
00136   }
00137   if (msg.length() > 0) {
00138     (*debug) += "\n";
00139     (*debug) += msg;
00140   }
00141   (*debug) += "\n";
00142 }
00143 
00144 // Sets up the norm_truth_word from truth_word using the given DENORM.
00145 void BlamerBundle::SetupNormTruthWord(const DENORM& denorm) {
00146   // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
00147   norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
00148   TPOINT topleft;
00149   TPOINT botright;
00150   TPOINT norm_topleft;
00151   TPOINT norm_botright;
00152   for (int b = 0; b < truth_word_.length(); ++b) {
00153     const TBOX &box = truth_word_.BlobBox(b);
00154     topleft.x = box.left();
00155     topleft.y = box.top();
00156     botright.x = box.right();
00157     botright.y = box.bottom();
00158     denorm.NormTransform(NULL, topleft, &norm_topleft);
00159     denorm.NormTransform(NULL, botright, &norm_botright);
00160     TBOX norm_box(norm_topleft.x, norm_botright.y,
00161                   norm_botright.x, norm_topleft.y);
00162     norm_truth_word_.InsertBox(b, norm_box);
00163   }
00164 }
00165 
00166 // Splits *this into two pieces in bundle1 and bundle2 (preallocated, empty
00167 // bundles) where the right edge/ of the left-hand word is word1_right,
00168 // and the left edge of the right-hand word is word2_left.
00169 void BlamerBundle::SplitBundle(int word1_right, int word2_left, bool debug,
00170                                BlamerBundle* bundle1,
00171                                BlamerBundle* bundle2) const {
00172   STRING debug_str;
00173   // Find truth boxes that correspond to the split in the blobs.
00174   int b;
00175   int begin2_truth_index = -1;
00176   if (incorrect_result_reason_ != IRR_NO_TRUTH &&
00177       truth_has_char_boxes_) {
00178     debug_str = "Looking for truth split at";
00179     debug_str.add_str_int(" end1_x ", word1_right);
00180     debug_str.add_str_int(" begin2_x ", word2_left);
00181     debug_str += "\nnorm_truth_word boxes:\n";
00182     if (norm_truth_word_.length() > 1) {
00183       norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
00184       for (b = 1; b < norm_truth_word_.length(); ++b) {
00185         norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
00186         if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
00187             norm_box_tolerance_) &&
00188             (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
00189             norm_box_tolerance_)) {
00190           begin2_truth_index = b;
00191           debug_str += "Split found";
00192           break;
00193         }
00194       }
00195       debug_str += '\n';
00196     }
00197   }
00198   // Populate truth information in word and word2 with the first and second
00199   // part of the original truth.
00200   if (begin2_truth_index > 0) {
00201     bundle1->truth_has_char_boxes_ = true;
00202     bundle1->norm_box_tolerance_ = norm_box_tolerance_;
00203     bundle2->truth_has_char_boxes_ = true;
00204     bundle2->norm_box_tolerance_ = norm_box_tolerance_;
00205     BlamerBundle *curr_bb = bundle1;
00206     for (b = 0; b < norm_truth_word_.length(); ++b) {
00207       if (b == begin2_truth_index) curr_bb = bundle2;
00208       curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
00209       curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
00210       curr_bb->truth_text_.push_back(truth_text_[b]);
00211     }
00212   } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
00213     bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
00214     bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
00215   } else {
00216     debug_str += "Truth split not found";
00217     debug_str += truth_has_char_boxes_ ?
00218         "\n" : " (no truth char boxes)\n";
00219     bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
00220     bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, NULL, debug);
00221   }
00222 }
00223 
00224 // "Joins" the blames from bundle1 and bundle2 into *this.
00225 void BlamerBundle::JoinBlames(const BlamerBundle& bundle1,
00226                               const BlamerBundle& bundle2, bool debug) {
00227   STRING debug_str;
00228   IncorrectResultReason irr = incorrect_result_reason_;
00229   if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
00230   if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
00231       bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
00232       bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
00233     debug_str += "Blame from part 1: ";
00234     debug_str += bundle1.debug_;
00235     irr = bundle1.incorrect_result_reason_;
00236   }
00237   if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
00238       bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
00239       bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
00240     debug_str += "Blame from part 2: ";
00241     debug_str += bundle2.debug_;
00242     if (irr == IRR_CORRECT) {
00243       irr = bundle2.incorrect_result_reason_;
00244     } else if (irr != bundle2.incorrect_result_reason_) {
00245       irr = IRR_UNKNOWN;
00246     }
00247   }
00248   incorrect_result_reason_ = irr;
00249   if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
00250     SetBlame(irr, debug_str, NULL, debug);
00251   }
00252 }
00253 
00254 // If a blob with the same bounding box as one of the truth character
00255 // bounding boxes is not classified as the corresponding truth character
00256 // blames character classifier for incorrect answer.
00257 void BlamerBundle::BlameClassifier(const UNICHARSET& unicharset,
00258                                    const TBOX& blob_box,
00259                                    const BLOB_CHOICE_LIST& choices,
00260                                    bool debug) {
00261   if (!truth_has_char_boxes_ ||
00262       incorrect_result_reason_ != IRR_CORRECT)
00263     return;  // Nothing to do here.
00264 
00265   for (int b = 0; b < norm_truth_word_.length(); ++b) {
00266     const TBOX &truth_box = norm_truth_word_.BlobBox(b);
00267     // Note that we are more strict on the bounding box boundaries here
00268     // than in other places (chopper, segmentation search), since we do
00269     // not have the ability to check the previous and next bounding box.
00270     if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
00271       bool found = false;
00272       bool incorrect_adapted = false;
00273       UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
00274       const char *truth_str = truth_text_[b].string();
00275       // We promise not to modify the list or its contents, using a
00276       // const BLOB_CHOICE* below.
00277       BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
00278       for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
00279           choices_it.forward()) {
00280         const BLOB_CHOICE* choice = choices_it.data();
00281         if (strcmp(truth_str, unicharset.get_normed_unichar(
00282             choice->unichar_id())) == 0) {
00283           found = true;
00284           break;
00285         } else if (choice->IsAdapted()) {
00286           incorrect_adapted = true;
00287           incorrect_adapted_id = choice->unichar_id();
00288         }
00289       }  // end choices_it for loop
00290       if (!found) {
00291         STRING debug_str = "unichar ";
00292         debug_str += truth_str;
00293         debug_str += " not found in classification list";
00294         SetBlame(IRR_CLASSIFIER, debug_str, NULL, debug);
00295       } else if (incorrect_adapted) {
00296         STRING debug_str = "better rating for adapted ";
00297         debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
00298         debug_str += " than for correct ";
00299         debug_str += truth_str;
00300         SetBlame(IRR_ADAPTION, debug_str, NULL, debug);
00301       }
00302       break;
00303     }
00304   }  // end iterating over blamer_bundle->norm_truth_word
00305 }
00306 
00307 // Checks whether chops were made at all the character bounding box
00308 // boundaries in word->truth_word. If not - blames the chopper for an
00309 // incorrect answer.
00310 void BlamerBundle::SetChopperBlame(const WERD_RES* word, bool debug) {
00311   if (NoTruth() || !truth_has_char_boxes_ ||
00312       word->chopped_word->blobs.empty()) {
00313     return;
00314   }
00315   STRING debug_str;
00316   bool missing_chop = false;
00317   int num_blobs = word->chopped_word->blobs.size();
00318   int box_index = 0;
00319   int blob_index = 0;
00320   inT16 truth_x;
00321   while (box_index < truth_word_.length() && blob_index < num_blobs) {
00322     truth_x = norm_truth_word_.BlobBox(box_index).right();
00323     TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
00324     if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
00325       ++blob_index;
00326       continue;  // encountered an extra chop, keep looking
00327     } else if (curr_blob->bounding_box().right() >
00328                truth_x + norm_box_tolerance_) {
00329       missing_chop = true;
00330       break;
00331     } else {
00332       ++blob_index;
00333     }
00334   }
00335   if (missing_chop || box_index < norm_truth_word_.length()) {
00336     STRING debug_str;
00337     if (missing_chop) {
00338       debug_str.add_str_int("Detected missing chop (tolerance=",
00339                             norm_box_tolerance_);
00340       debug_str += ") at Bounding Box=";
00341       TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
00342       curr_blob->bounding_box().print_to_str(&debug_str);
00343       debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
00344     } else {
00345       debug_str.add_str_int("Missing chops for last ",
00346                             norm_truth_word_.length() - box_index);
00347       debug_str += " truth box(es)";
00348     }
00349     debug_str += "\nMaximally chopped word boxes:\n";
00350     for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
00351       TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
00352       curr_blob->bounding_box().print_to_str(&debug_str);
00353       debug_str += '\n';
00354     }
00355     debug_str += "Truth  bounding  boxes:\n";
00356     for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
00357       norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
00358       debug_str += '\n';
00359     }
00360     SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
00361   }
00362 }
00363 
00364 // Blames the classifier or the language model if, after running only the
00365 // chopper, best_choice is incorrect and no blame has been yet set.
00366 // Blames the classifier if best_choice is classifier's top choice and is a
00367 // dictionary word (i.e. language model could not have helped).
00368 // Otherwise, blames the language model (formerly permuter word adjustment).
00369 void BlamerBundle::BlameClassifierOrLangModel(
00370     const WERD_RES* word,
00371     const UNICHARSET& unicharset, bool valid_permuter, bool debug) {
00372   if (valid_permuter) {
00373     // Find out whether best choice is a top choice.
00374     best_choice_is_dict_and_top_choice_ = true;
00375     for (int i = 0; i < word->best_choice->length(); ++i) {
00376       BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
00377       ASSERT_HOST(!blob_choice_it.empty());
00378       BLOB_CHOICE *first_choice = NULL;
00379       for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
00380            blob_choice_it.forward()) {  // find first non-fragment choice
00381         if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
00382           first_choice = blob_choice_it.data();
00383           break;
00384         }
00385       }
00386       ASSERT_HOST(first_choice != NULL);
00387       if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
00388         best_choice_is_dict_and_top_choice_ = false;
00389         break;
00390       }
00391     }
00392   }
00393   STRING debug_str;
00394   if (best_choice_is_dict_and_top_choice_) {
00395     debug_str = "Best choice is: incorrect, top choice, dictionary word";
00396     debug_str += " with permuter ";
00397     debug_str += word->best_choice->permuter_name();
00398   } else {
00399     debug_str = "Classifier/Old LM tradeoff is to blame";
00400   }
00401   SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
00402                                               : IRR_CLASS_OLD_LM_TRADEOFF,
00403            debug_str, word->best_choice, debug);
00404 }
00405 
00406 // Sets up the correct_segmentation_* to mark the correct bounding boxes.
00407 void BlamerBundle::SetupCorrectSegmentation(const TWERD* word, bool debug) {
00408   params_training_bundle_.StartHypothesisList();
00409   if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
00410     return;  // Nothing to do here.
00411 
00412   STRING debug_str;
00413   debug_str += "Blamer computing correct_segmentation_cols\n";
00414   int curr_box_col = 0;
00415   int next_box_col = 0;
00416   int num_blobs = word->NumBlobs();
00417   if (num_blobs == 0) return;  // No blobs to play with.
00418   int blob_index = 0;
00419   inT16 next_box_x = word->blobs[blob_index]->bounding_box().right();
00420   for (int truth_idx = 0; blob_index < num_blobs &&
00421        truth_idx < norm_truth_word_.length();
00422        ++blob_index) {
00423     ++next_box_col;
00424     inT16 curr_box_x = next_box_x;
00425     if (blob_index + 1 < num_blobs)
00426       next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
00427     inT16 truth_x = norm_truth_word_.BlobBox(truth_idx).right();
00428     debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
00429     debug_str.add_str_int(" ", truth_x);
00430     debug_str += "\n";
00431     if (curr_box_x > (truth_x + norm_box_tolerance_)) {
00432       break;  // failed to find a matching box
00433     } else if (curr_box_x >= truth_x - norm_box_tolerance_ &&  // matched
00434                (blob_index + 1 >= num_blobs ||  // next box can't be included
00435                 next_box_x > truth_x + norm_box_tolerance_)) {
00436       correct_segmentation_cols_.push_back(curr_box_col);
00437       correct_segmentation_rows_.push_back(next_box_col-1);
00438       ++truth_idx;
00439       debug_str.add_str_int("col=", curr_box_col);
00440       debug_str.add_str_int(" row=", next_box_col-1);
00441       debug_str += "\n";
00442       curr_box_col = next_box_col;
00443     }
00444   }
00445   if (blob_index < num_blobs ||  // trailing blobs
00446       correct_segmentation_cols_.length() != norm_truth_word_.length()) {
00447     debug_str.add_str_int("Blamer failed to find correct segmentation"
00448                           " (tolerance=", norm_box_tolerance_);
00449     if (blob_index >= num_blobs) debug_str += " blob == NULL";
00450     debug_str += ")\n";
00451     debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
00452     debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
00453     debug_str += "\n";
00454     SetBlame(IRR_UNKNOWN, debug_str, NULL, debug);
00455     correct_segmentation_cols_.clear();
00456     correct_segmentation_rows_.clear();
00457   }
00458 }
00459 
00460 // Returns true if a guided segmentation search is needed.
00461 bool BlamerBundle::GuidedSegsearchNeeded(const WERD_CHOICE *best_choice) const {
00462   return incorrect_result_reason_ == IRR_CORRECT &&
00463       !segsearch_is_looking_for_blame_ &&
00464       truth_has_char_boxes_ &&
00465       !ChoiceIsCorrect(best_choice);
00466 }
00467 
00468 // Setup ready to guide the segmentation search to the correct segmentation.
00469 // The callback pp_cb is used to avoid a cyclic dependency.
00470 // It calls into LMPainPoints::GenerateForBlamer by pre-binding the
00471 // WERD_RES, and the LMPainPoints itself.
00472 // pp_cb must be a permanent callback, and should be deleted by the caller.
00473 void BlamerBundle::InitForSegSearch(const WERD_CHOICE *best_choice,
00474                                     MATRIX* ratings, UNICHAR_ID wildcard_id,
00475                                     bool debug, STRING *debug_str,
00476                                     TessResultCallback2<bool, int, int>* cb) {
00477   segsearch_is_looking_for_blame_ = true;
00478   if (debug) {
00479     tprintf("segsearch starting to look for blame\n");
00480   }
00481   // Fill pain points for any unclassifed blob corresponding to the
00482   // correct segmentation state.
00483   *debug_str += "Correct segmentation:\n";
00484   for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
00485     debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
00486     debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
00487     *debug_str += "\n";
00488     if (!ratings->Classified(correct_segmentation_cols_[idx],
00489                              correct_segmentation_rows_[idx],
00490                              wildcard_id) &&
00491         !cb->Run(correct_segmentation_cols_[idx],
00492                  correct_segmentation_rows_[idx])) {
00493       segsearch_is_looking_for_blame_ = false;
00494       *debug_str += "\nFailed to insert pain point\n";
00495       SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
00496       break;
00497     }
00498   }  // end for blamer_bundle->correct_segmentation_cols/rows
00499 }
00500 // Returns true if the guided segsearch is in progress.
00501 bool BlamerBundle::GuidedSegsearchStillGoing() const {
00502   return segsearch_is_looking_for_blame_;
00503 }
00504 
00505 // The segmentation search has ended. Sets the blame appropriately.
00506 void BlamerBundle::FinishSegSearch(const WERD_CHOICE *best_choice,
00507                                    bool debug, STRING *debug_str) {
00508   // If we are still looking for blame (i.e. best_choice is incorrect, but a
00509   // path representing the correct segmentation could be constructed), we can
00510   // blame segmentation search pain point prioritization if the rating of the
00511   // path corresponding to the correct segmentation is better than that of
00512   // best_choice (i.e. language model would have done the correct thing, but
00513   // because of poor pain point prioritization the correct segmentation was
00514   // never explored). Otherwise we blame the tradeoff between the language model
00515   // and the classifier, since even after exploring the path corresponding to
00516   // the correct segmentation incorrect best_choice would have been chosen.
00517   // One special case when we blame the classifier instead is when best choice
00518   // is incorrect, but it is a dictionary word and it classifier's top choice.
00519   if (segsearch_is_looking_for_blame_) {
00520     segsearch_is_looking_for_blame_ = false;
00521     if (best_choice_is_dict_and_top_choice_) {
00522       *debug_str = "Best choice is: incorrect, top choice, dictionary word";
00523       *debug_str += " with permuter ";
00524       *debug_str += best_choice->permuter_name();
00525       SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
00526     } else if (best_correctly_segmented_rating_ <
00527         best_choice->rating()) {
00528       *debug_str += "Correct segmentation state was not explored";
00529       SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
00530     } else {
00531       if (best_correctly_segmented_rating_ >=
00532           WERD_CHOICE::kBadRating) {
00533         *debug_str += "Correct segmentation paths were pruned by LM\n";
00534       } else {
00535         debug_str->add_str_double("Best correct segmentation rating ",
00536                                   best_correctly_segmented_rating_);
00537         debug_str->add_str_double(" vs. best choice rating ",
00538                                   best_choice->rating());
00539       }
00540       SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
00541     }
00542   }
00543 }
00544 
00545 // If the bundle is null or still does not indicate the correct result,
00546 // fix it and use some backup reason for the blame.
00547 void BlamerBundle::LastChanceBlame(bool debug, WERD_RES* word) {
00548   if (word->blamer_bundle == NULL) {
00549     word->blamer_bundle = new BlamerBundle();
00550     word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
00551                                   word->best_choice, debug);
00552   } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
00553     word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
00554                                   word->best_choice, debug);
00555   } else {
00556     bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
00557     IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
00558     if (irr == IRR_CORRECT && !correct) {
00559       STRING debug_str = "Choice is incorrect after recognition";
00560       word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
00561                                     debug);
00562     } else if (irr != IRR_CORRECT && correct) {
00563       if (debug) {
00564         tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
00565       }
00566       word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
00567       word->blamer_bundle->debug_ = "";
00568     }
00569   }
00570 }
00571 
00572 // Sets the misadaption debug if this word is incorrect, as this word is
00573 // being adapted to.
00574 void BlamerBundle::SetMisAdaptionDebug(const WERD_CHOICE *best_choice,
00575                                        bool debug) {
00576   if (incorrect_result_reason_ != IRR_NO_TRUTH &&
00577       !ChoiceIsCorrect(best_choice)) {
00578     misadaption_debug_ ="misadapt to word (";
00579     misadaption_debug_ += best_choice->permuter_name();
00580     misadaption_debug_ += "): ";
00581     FillDebugString("", best_choice, &misadaption_debug_);
00582     if (debug) {
00583       tprintf("%s\n", misadaption_debug_.string());
00584     }
00585   }
00586 }
00587 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines