tesseract
3.03
|
#include <errorcounter.h>
Classes | |
struct | Counts |
Static Public Member Functions | |
static double | ComputeErrorRate (ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report) |
static void | DebugNewErrors (ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it) |
Definition at line 94 of file errorcounter.h.
double tesseract::ErrorCounter::ComputeErrorRate | ( | ShapeClassifier * | classifier, |
int | report_level, | ||
CountTypes | boosting_mode, | ||
const FontInfoTable & | fontinfo_table, | ||
const GenericVector< Pix * > & | page_images, | ||
SampleIterator * | it, | ||
double * | unichar_error, | ||
double * | scaled_error, | ||
STRING * | fonts_report | ||
) | [static] |
Definition at line 39 of file errorcounter.cpp.
{ int fontsize = it->sample_set()->NumFonts(); ErrorCounter counter(classifier->GetUnicharset(), fontsize); GenericVector<UnicharRating> results; clock_t start = clock(); int total_samples = 0; double unscaled_error = 0.0; // Set a number of samples on which to run the classify debug mode. int error_samples = report_level > 3 ? report_level * report_level : 0; // Iterate over all the samples, accumulating errors. for (it->Begin(); !it->AtEnd(); it->Next()) { TrainingSample* mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); Pix* page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : NULL; // No debug, no keep this. classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); bool debug_it = false; int correct_id = mutable_sample->class_id(); if (counter.unicharset_.has_special_codes() && (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED || correct_id == UNICHAR_BROKEN)) { // This is junk so use the special counter. debug_it = counter.AccumulateJunk(report_level > 3, results, mutable_sample); } else { debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode, fontinfo_table, results, mutable_sample); } if (debug_it && error_samples > 0) { // Running debug, keep the correct answer, and debug the classifier. tprintf("Error on sample %d: %s Classifier debug output:\n", it->GlobalSampleIndex(), it->sample_set()->SampleToString(*mutable_sample).string()); classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); --error_samples; } ++total_samples; } double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC; // Create the appropriate error report. unscaled_error = counter.ReportErrors(report_level, boosting_mode, fontinfo_table, *it, unichar_error, fonts_report); if (scaled_error != NULL) *scaled_error = counter.scaled_error_; if (report_level > 1) { // It is useful to know the time in microseconds/char. tprintf("Errors computed in %.2fs at %.1f μs/char\n", total_time, 1000000.0 * total_time / total_samples); } return unscaled_error; }
void tesseract::ErrorCounter::DebugNewErrors | ( | ShapeClassifier * | new_classifier, |
ShapeClassifier * | old_classifier, | ||
CountTypes | boosting_mode, | ||
const FontInfoTable & | fontinfo_table, | ||
const GenericVector< Pix * > & | page_images, | ||
SampleIterator * | it | ||
) | [static] |
Definition at line 106 of file errorcounter.cpp.
{ int fontsize = it->sample_set()->NumFonts(); ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize); ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize); GenericVector<UnicharRating> results; int total_samples = 0; int error_samples = 25; int total_new_errors = 0; // Iterate over all the samples, accumulating errors. for (it->Begin(); !it->AtEnd(); it->Next()) { TrainingSample* mutable_sample = it->MutableSample(); int page_index = mutable_sample->page_num(); Pix* page_pix = 0 <= page_index && page_index < page_images.size() ? page_images[page_index] : NULL; // No debug, no keep this. old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); int correct_id = mutable_sample->class_id(); if (correct_id != 0 && !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, results, mutable_sample)) { // old classifier was correct, check the new one. new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0, INVALID_UNICHAR_ID, &results); if (correct_id != 0 && new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table, results, mutable_sample)) { tprintf("New Error on sample %d: Classifier debug output:\n", it->GlobalSampleIndex()); ++total_new_errors; new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1, correct_id, &results); if (results.size() > 0 && error_samples > 0) { new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id); --error_samples; } } } ++total_samples; } tprintf("Total new errors = %d\n", total_new_errors); }