tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/training/shapeclustering.cpp
Go to the documentation of this file.
00001 // Copyright 2011 Google Inc. All Rights Reserved.
00002 // Author: rays@google.com (Ray Smith)
00003 
00004 // Licensed under the Apache License, Version 2.0 (the "License");
00005 // you may not use this file except in compliance with the License.
00006 // You may obtain a copy of the License at
00007 // http://www.apache.org/licenses/LICENSE-2.0
00008 // Unless required by applicable law or agreed to in writing, software
00009 // distributed under the License is distributed on an "AS IS" BASIS,
00010 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00011 // See the License for the specific language governing permissions and
00012 // limitations under the License.
00013 
00014 //  Filename: shapeclustering.cpp
00015 //  Purpose:  Generates a master shape table to merge similarly-shaped
00016 //            training data of whole, partial or multiple characters.
00017 //  Author:   Ray Smith
00018 
00019 #ifdef HAVE_CONFIG_H
00020 #include "config_auto.h"
00021 #endif
00022 
00023 #ifndef USE_STD_NAMESPACE
00024 #include "base/commandlineflags.h"
00025 #endif
00026 #include "commontraining.h"
00027 #include "mastertrainer.h"
00028 #include "params.h"
00029 #include "strngs.h"
00030 
00031 INT_PARAM_FLAG(display_cloud_font, -1,
00032                "Display cloud of this font, canonical_class1");
00033 INT_PARAM_FLAG(display_canonical_font, -1,
00034                "Display canonical sample of this font, canonical_class2");
00035 STRING_PARAM_FLAG(canonical_class1, "", "Class to show ambigs for");
00036 STRING_PARAM_FLAG(canonical_class2, "", "Class to show ambigs for");
00037 
00038 // Loads training data, if requested displays debug information, otherwise
00039 // creates the master shape table by shape clustering and writes it to a file.
00040 // If FLAGS_display_cloud_font is set, then the cloud features of
00041 // FLAGS_canonical_class1/FLAGS_display_cloud_font are shown in green ON TOP
00042 // OF the red canonical features of FLAGS_canonical_class2/
00043 // FLAGS_display_canonical_font, so as to show which canonical features are
00044 // NOT in the cloud.
00045 // Otherwise, if FLAGS_canonical_class1 is set, prints a table of font-wise
00046 // cluster distances between FLAGS_canonical_class1 and FLAGS_canonical_class2.
00047 int main(int argc, char **argv) {
00048   ParseArguments(&argc, &argv);
00049 
00050   STRING file_prefix;
00051   tesseract::MasterTrainer* trainer = tesseract::LoadTrainingData(
00052       argc, argv, false, NULL, &file_prefix);
00053 
00054   if (!trainer)
00055     return 1;
00056 
00057   if (FLAGS_display_cloud_font >= 0) {
00058 #ifndef GRAPHICS_DISABLED
00059     trainer->DisplaySamples(FLAGS_canonical_class1.c_str(),
00060                             FLAGS_display_cloud_font,
00061                             FLAGS_canonical_class2.c_str(),
00062                             FLAGS_display_canonical_font);
00063 #endif  // GRAPHICS_DISABLED
00064     return 0;
00065   } else if (!FLAGS_canonical_class1.empty()) {
00066     trainer->DebugCanonical(FLAGS_canonical_class1.c_str(),
00067                             FLAGS_canonical_class2.c_str());
00068     return 0;
00069   }
00070   trainer->SetupMasterShapes();
00071   WriteShapeTable(file_prefix, trainer->master_shapes());
00072   delete trainer;
00073 
00074   return 0;
00075 } /* main */
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines