tesseract  3.03
tesseract::ErrorCounter Class Reference

#include <errorcounter.h>

List of all members.

Classes

struct  Counts

Static Public Member Functions

static double ComputeErrorRate (ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report)
static void DebugNewErrors (ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it)

Detailed Description

Definition at line 94 of file errorcounter.h.


Member Function Documentation

double tesseract::ErrorCounter::ComputeErrorRate ( ShapeClassifier classifier,
int  report_level,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const GenericVector< Pix * > &  page_images,
SampleIterator it,
double *  unichar_error,
double *  scaled_error,
STRING fonts_report 
) [static]

Definition at line 39 of file errorcounter.cpp.

                                                                        {
  int fontsize = it->sample_set()->NumFonts();
  ErrorCounter counter(classifier->GetUnicharset(), fontsize);
  GenericVector<UnicharRating> results;

  clock_t start = clock();
  int total_samples = 0;
  double unscaled_error = 0.0;
  // Set a number of samples on which to run the classify debug mode.
  int error_samples = report_level > 3 ? report_level * report_level : 0;
  // Iterate over all the samples, accumulating errors.
  for (it->Begin(); !it->AtEnd(); it->Next()) {
    TrainingSample* mutable_sample = it->MutableSample();
    int page_index = mutable_sample->page_num();
    Pix* page_pix = 0 <= page_index && page_index < page_images.size()
                  ? page_images[page_index] : NULL;
    // No debug, no keep this.
    classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
                                      INVALID_UNICHAR_ID, &results);
    bool debug_it = false;
    int correct_id = mutable_sample->class_id();
    if (counter.unicharset_.has_special_codes() &&
        (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
         correct_id == UNICHAR_BROKEN)) {
      // This is junk so use the special counter.
      debug_it = counter.AccumulateJunk(report_level > 3,
                                        results,
                                        mutable_sample);
    } else {
      debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode,
                                          fontinfo_table,
                                          results, mutable_sample);
    }
    if (debug_it && error_samples > 0) {
      // Running debug, keep the correct answer, and debug the classifier.
      tprintf("Error on sample %d: %s Classifier debug output:\n",
              it->GlobalSampleIndex(),
              it->sample_set()->SampleToString(*mutable_sample).string());
      classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
      --error_samples;
    }
    ++total_samples;
  }
  double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
  // Create the appropriate error report.
  unscaled_error = counter.ReportErrors(report_level, boosting_mode,
                                        fontinfo_table,
                                        *it, unichar_error, fonts_report);
  if (scaled_error != NULL) *scaled_error = counter.scaled_error_;
  if (report_level > 1) {
    // It is useful to know the time in microseconds/char.
    tprintf("Errors computed in %.2fs at %.1f μs/char\n",
            total_time, 1000000.0 * total_time / total_samples);
  }
  return unscaled_error;
}
void tesseract::ErrorCounter::DebugNewErrors ( ShapeClassifier new_classifier,
ShapeClassifier old_classifier,
CountTypes  boosting_mode,
const FontInfoTable fontinfo_table,
const GenericVector< Pix * > &  page_images,
SampleIterator it 
) [static]

Definition at line 106 of file errorcounter.cpp.

                                                                {
  int fontsize = it->sample_set()->NumFonts();
  ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
  ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
  GenericVector<UnicharRating> results;

  int total_samples = 0;
  int error_samples = 25;
  int total_new_errors = 0;
  // Iterate over all the samples, accumulating errors.
  for (it->Begin(); !it->AtEnd(); it->Next()) {
    TrainingSample* mutable_sample = it->MutableSample();
    int page_index = mutable_sample->page_num();
    Pix* page_pix = 0 <= page_index && page_index < page_images.size()
                  ? page_images[page_index] : NULL;
    // No debug, no keep this.
    old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
                                          INVALID_UNICHAR_ID, &results);
    int correct_id = mutable_sample->class_id();
    if (correct_id != 0 &&
        !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
                                      results, mutable_sample)) {
      // old classifier was correct, check the new one.
      new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
                                            INVALID_UNICHAR_ID, &results);
      if (correct_id != 0 &&
          new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
                                        results, mutable_sample)) {
        tprintf("New Error on sample %d: Classifier debug output:\n",
                it->GlobalSampleIndex());
        ++total_new_errors;
        new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1,
                                              correct_id, &results);
        if (results.size() > 0 && error_samples > 0) {
          new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
          --error_samples;
        }
      }
    }
    ++total_samples;
  }
  tprintf("Total new errors = %d\n", total_new_errors);
}

The documentation for this class was generated from the following files:
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines