tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/api/tesseractmain.cpp
Go to the documentation of this file.
00001 /**********************************************************************
00002 * File:        tessedit.cpp  (Formerly tessedit.c)
00003 * Description: Main program for merge of tess and editor.
00004 * Author:                  Ray Smith
00005 * Created:                 Tue Jan 07 15:21:46 GMT 1992
00006 *
00007 * (C) Copyright 1992, Hewlett-Packard Ltd.
00008 ** Licensed under the Apache License, Version 2.0 (the "License");
00009 ** you may not use this file except in compliance with the License.
00010 ** You may obtain a copy of the License at
00011 ** http://www.apache.org/licenses/LICENSE-2.0
00012 ** Unless required by applicable law or agreed to in writing, software
00013 ** distributed under the License is distributed on an "AS IS" BASIS,
00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00015 ** See the License for the specific language governing permissions and
00016 ** limitations under the License.
00017 *
00018 **********************************************************************/
00019 
00020 // Include automatically generated configuration file if running autoconf
00021 #ifdef HAVE_CONFIG_H
00022 #include "config_auto.h"
00023 #endif
00024 
00025 #ifdef _WIN32
00026 #include <fcntl.h>
00027 #include <io.h>
00028 #endif  // _WIN32
00029 #include <iostream>
00030 
00031 #include "allheaders.h"
00032 #include "baseapi.h"
00033 #include "basedir.h"
00034 #include "renderer.h"
00035 #include "strngs.h"
00036 #include "tprintf.h"
00037 #include "openclwrapper.h"
00038 #include "osdetect.h"
00039 
00040 /**********************************************************************
00041  *  main()
00042  *
00043  **********************************************************************/
00044 
00045 int main(int argc, char **argv) {
00046   if ((argc == 2 && strcmp(argv[1], "-v") == 0) ||
00047       (argc == 2 && strcmp(argv[1], "--version") == 0)) {
00048     char *versionStrP;
00049 
00050     fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version());
00051 
00052     versionStrP = getLeptonicaVersion();
00053     fprintf(stderr, " %s\n", versionStrP);
00054     lept_free(versionStrP);
00055 
00056     versionStrP = getImagelibVersions();
00057     fprintf(stderr, "  %s\n", versionStrP);
00058     lept_free(versionStrP);
00059 
00060 #ifdef USE_OPENCL
00061     cl_platform_id platform;
00062     cl_uint num_platforms;
00063     cl_device_id devices[2];
00064     cl_uint num_devices;
00065     char info[256];
00066     int i;
00067 
00068     fprintf(stderr, " OpenCL info:\n");
00069     clGetPlatformIDs(1, &platform, &num_platforms);
00070     fprintf(stderr, "  Found %d platforms.\n", num_platforms);
00071     clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0);
00072     fprintf(stderr, "  Platform name: %s.\n", info);
00073     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0);
00074     fprintf(stderr, "  Version: %s.\n", info);
00075     clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices);
00076     fprintf(stderr, "  Found %d devices.\n", num_devices);
00077     for (i = 0; i < num_devices; ++i) {
00078       clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0);
00079       fprintf(stderr, "    Device %d name: %s.\n", i+1, info);
00080     }
00081 #endif
00082     exit(0);
00083   }
00084 
00085   // Make the order of args a bit more forgiving than it used to be.
00086   const char* lang = "eng";
00087   const char* image = NULL;
00088   const char* output = NULL;
00089   const char* datapath = NULL;
00090   bool noocr = false;
00091   bool list_langs = false;
00092   bool print_parameters = false;
00093 
00094   tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO;
00095   int arg = 1;
00096   while (arg < argc && (output == NULL || argv[arg][0] == '-')) {
00097     if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) {
00098       lang = argv[arg + 1];
00099       ++arg;
00100     } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) {
00101       datapath = argv[arg + 1];
00102       ++arg;
00103     } else if (strcmp(argv[arg], "--list-langs") == 0) {
00104       noocr = true;
00105       list_langs = true;
00106     } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) {
00107       pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1]));
00108       ++arg;
00109     } else if (strcmp(argv[arg], "--print-parameters") == 0) {
00110       noocr = true;
00111       print_parameters = true;
00112     } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
00113       // handled properly after api init
00114       ++arg;
00115     } else if (image == NULL) {
00116       image = argv[arg];
00117     } else if (output == NULL) {
00118       output = argv[arg];
00119     }
00120     ++arg;
00121   }
00122 
00123   if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
00124     list_langs = true;
00125     noocr = true;
00126   }
00127 
00128   if (output == NULL && noocr == false) {
00129     fprintf(stderr, "Usage:\n  %s imagename|stdin outputbase|stdout "
00130             "[options...] [configfile...]\n\n", argv[0]);
00131 
00132     fprintf(stderr, "OCR options:\n");
00133     fprintf(stderr, "  --tessdata-dir /path\tspecify location of tessdata"
00134                       " path\n");
00135     fprintf(stderr, "  -l lang[+lang]\tspecify language(s) used for OCR\n");
00136     fprintf(stderr, "  -c configvar=value\tset value for control parameter.\n"
00137                       "\t\t\tMultiple -c arguments are allowed.\n");
00138     fprintf(stderr, "  -psm pagesegmode\tspecify page segmentation mode.\n");
00139     fprintf(stderr, "These options must occur before any configfile.\n\n");
00140     fprintf(stderr,
00141             "pagesegmode values are:\n"
00142             "  0 = Orientation and script detection (OSD) only.\n"
00143             "  1 = Automatic page segmentation with OSD.\n"
00144             "  2 = Automatic page segmentation, but no OSD, or OCR\n"
00145             "  3 = Fully automatic page segmentation, but no OSD. (Default)\n"
00146             "  4 = Assume a single column of text of variable sizes.\n"
00147             "  5 = Assume a single uniform block of vertically aligned text.\n"
00148             "  6 = Assume a single uniform block of text.\n"
00149             "  7 = Treat the image as a single text line.\n"
00150             "  8 = Treat the image as a single word.\n"
00151             "  9 = Treat the image as a single word in a circle.\n"
00152             "  10 = Treat the image as a single character.\n\n");
00153     fprintf(stderr, "Single options:\n");
00154     fprintf(stderr, "  -v --version: version info\n");
00155     fprintf(stderr, "  --list-langs: list available languages for tesseract "
00156                       "engine. Can be used with --tessdata-dir.\n");
00157     fprintf(stderr, "  --print-parameters: print tesseract parameters to the "
00158                       "stdout.\n");
00159     exit(1);
00160   }
00161 
00162   if (output != NULL && strcmp(output, "-") && strcmp(output, "stdout")) {
00163     tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
00164            tesseract::TessBaseAPI::Version());
00165   }
00166   PERF_COUNT_START("Tesseract:main")
00167   tesseract::TessBaseAPI api;
00168 
00169   api.SetOutputName(output);
00170   int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT,
00171                 &(argv[arg]), argc - arg, NULL, NULL, false);
00172 
00173   if (rc) {
00174     fprintf(stderr, "Could not initialize tesseract.\n");
00175     exit(1);
00176   }
00177 
00178   char opt1[255], opt2[255];
00179   for (arg = 0; arg < argc; arg++) {
00180     if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) {
00181       strncpy(opt1, argv[arg + 1], 255);
00182       *(strchr(opt1, '=')) = 0;
00183       strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255);
00184       opt2[254] = 0;
00185       ++arg;
00186 
00187       if (!api.SetVariable(opt1, opt2)) {
00188         fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
00189       }
00190     }
00191   }
00192 
00193   if (list_langs) {
00194      GenericVector<STRING> languages;
00195      api.GetAvailableLanguagesAsVector(&languages);
00196      fprintf(stderr, "List of available languages (%d):\n",
00197              languages.size());
00198      for (int index = 0; index < languages.size(); ++index) {
00199        STRING& string = languages[index];
00200        fprintf(stderr, "%s\n", string.string());
00201      }
00202      api.End();
00203      exit(0);
00204   }
00205 
00206   if (print_parameters) {
00207      FILE* fout = stdout;
00208      fprintf(stdout, "Tesseract parameters:\n");
00209      api.PrintVariables(fout);
00210      api.End();
00211      exit(0);
00212   }
00213 
00214   // We have 2 possible sources of pagesegmode: a config file and
00215   // the command line. For backwards compatability reasons, the
00216   // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the
00217   // default for this program is tesseract::PSM_AUTO. We will let
00218   // the config file take priority, so the command-line default
00219   // can take priority over the tesseract default, so we use the
00220   // value from the command line only if the retrieved mode
00221   // is still tesseract::PSM_SINGLE_BLOCK, indicating no change
00222   // in any config file. Therefore the only way to force
00223   // tesseract::PSM_SINGLE_BLOCK is from the command line.
00224   // It would be simpler if we could set the value before Init,
00225   // but that doesn't work.
00226   if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
00227      api.SetPageSegMode(pagesegmode);
00228 
00229   bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-");
00230   Pix* pixs = NULL;
00231   if (stdInput) {
00232     char byt;
00233     GenericVector<l_uint8> ch_data;
00234     std::istream file(std::cin.rdbuf());
00235 
00236 #ifdef WIN32
00237     if (_setmode(_fileno(stdin), _O_BINARY) == -1)
00238       tprintf("ERROR: cin to binary: %s", strerror(errno));
00239 #endif  // WIN32
00240 
00241     while (file.get(byt)) {
00242       ch_data.push_back(byt);
00243     }
00244     std::cin.ignore(std::cin.rdbuf()->in_avail() + 1);
00245 
00246     pixs = pixReadMem(&ch_data[0], ch_data.size());
00247   }
00248 
00249   if (pagesegmode == tesseract::PSM_AUTO_ONLY ||
00250       pagesegmode == tesseract::PSM_OSD_ONLY) {
00251     int ret_val = 0;
00252 
00253     if (!pixs)
00254       pixs = pixRead(image);
00255     if (!pixs) {
00256       fprintf(stderr, "Cannot open input file: %s\n", image);
00257       exit(2);
00258     }
00259     api.SetImage(pixs);
00260 
00261     if (pagesegmode == tesseract::PSM_OSD_ONLY) {
00262        OSResults osr;
00263        if (api.DetectOS(&osr)) {
00264          int orient = osr.best_result.orientation_id;
00265          int script_id = osr.get_best_script(orient);
00266          float orient_oco = osr.best_result.oconfidence;
00267          float orient_sco = osr.best_result.sconfidence;
00268          tprintf("Orientation: %d\nOrientation in degrees: %d\n" \
00269                  "Orientation confidence: %.2f\n" \
00270                  "Script: %d\nScript confidence: %.2f\n",
00271                  orient, OrientationIdToValue(orient), orient_oco,
00272                  script_id, orient_sco);
00273        } else {
00274          ret_val = 1;
00275        }
00276     } else {
00277        tesseract::Orientation orientation;
00278        tesseract::WritingDirection direction;
00279        tesseract::TextlineOrder order;
00280        float deskew_angle;
00281        tesseract::PageIterator* it =  api.AnalyseLayout();
00282        if (it) {
00283          it->Orientation(&orientation, &direction, &order, &deskew_angle);
00284          tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \
00285                  "Deskew angle: %.4f\n",
00286                   orientation, direction, order, deskew_angle);
00287        } else {
00288          ret_val = 1;
00289        }
00290        delete it;
00291     }
00292     pixDestroy(&pixs);
00293     exit(ret_val);
00294   }
00295 
00296   tesseract::TessResultRenderer* renderer = NULL;
00297   bool b;
00298   api.GetBoolVariable("tessedit_create_hocr", &b);
00299   if (b && renderer == NULL) renderer = new tesseract::TessHOcrRenderer();
00300 
00301   api.GetBoolVariable("tessedit_create_pdf", &b);
00302   if (b && renderer == NULL)
00303     renderer = new tesseract::TessPDFRenderer(api.GetDatapath());
00304 
00305   api.GetBoolVariable("tessedit_create_boxfile", &b);
00306   if (b && renderer == NULL) renderer = new tesseract::TessBoxTextRenderer();
00307 
00308   if (renderer == NULL) renderer = new tesseract::TessTextRenderer();
00309 
00310   if (pixs) {
00311     api.ProcessPage(pixs, 0, NULL, NULL, 0, renderer);
00312     pixDestroy(&pixs);
00313   } else {
00314     FILE* fin = fopen(image, "rb");
00315     if (fin == NULL) {
00316       fprintf(stderr, "Cannot open input file: %s\n", image);
00317       exit(2);
00318     }
00319     fclose(fin);
00320     if (!api.ProcessPages(image, NULL, 0, renderer)) {
00321       fprintf(stderr, "Error during processing.\n");
00322       exit(1);
00323     }
00324   }
00325 
00326   FILE* fout = stdout;
00327   if (strcmp(output, "-") && strcmp(output, "stdout")) {
00328     STRING outfile = STRING(output)
00329         + STRING(".")
00330         + STRING(renderer->file_extension());
00331     fout = fopen(outfile.string(), "wb");
00332     if (fout == NULL) {
00333       fprintf(stderr, "Cannot create output file %s\n", outfile.string());
00334       exit(1);
00335     }
00336   }
00337 
00338   const char* data;
00339   inT32 data_len;
00340   if (renderer->GetOutput(&data, &data_len)) {
00341     fwrite(data, 1, data_len, fout);
00342     if (fout != stdout)
00343       fclose(fout);
00344     else
00345       clearerr(fout);
00346   }
00347   PERF_COUNT_END
00348   return 0;                      // Normal exit
00349 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines