tesseract
3.03
|
00001 /********************************************************************** 00002 * File: tessedit.cpp (Formerly tessedit.c) 00003 * Description: Main program for merge of tess and editor. 00004 * Author: Ray Smith 00005 * Created: Tue Jan 07 15:21:46 GMT 1992 00006 * 00007 * (C) Copyright 1992, Hewlett-Packard Ltd. 00008 ** Licensed under the Apache License, Version 2.0 (the "License"); 00009 ** you may not use this file except in compliance with the License. 00010 ** You may obtain a copy of the License at 00011 ** http://www.apache.org/licenses/LICENSE-2.0 00012 ** Unless required by applicable law or agreed to in writing, software 00013 ** distributed under the License is distributed on an "AS IS" BASIS, 00014 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00015 ** See the License for the specific language governing permissions and 00016 ** limitations under the License. 00017 * 00018 **********************************************************************/ 00019 00020 // Include automatically generated configuration file if running autoconf 00021 #ifdef HAVE_CONFIG_H 00022 #include "config_auto.h" 00023 #endif 00024 00025 #ifdef _WIN32 00026 #include <fcntl.h> 00027 #include <io.h> 00028 #endif // _WIN32 00029 #include <iostream> 00030 00031 #include "allheaders.h" 00032 #include "baseapi.h" 00033 #include "basedir.h" 00034 #include "renderer.h" 00035 #include "strngs.h" 00036 #include "tprintf.h" 00037 #include "openclwrapper.h" 00038 #include "osdetect.h" 00039 00040 /********************************************************************** 00041 * main() 00042 * 00043 **********************************************************************/ 00044 00045 int main(int argc, char **argv) { 00046 if ((argc == 2 && strcmp(argv[1], "-v") == 0) || 00047 (argc == 2 && strcmp(argv[1], "--version") == 0)) { 00048 char *versionStrP; 00049 00050 fprintf(stderr, "tesseract %s\n", tesseract::TessBaseAPI::Version()); 00051 00052 versionStrP = getLeptonicaVersion(); 00053 fprintf(stderr, " %s\n", versionStrP); 00054 lept_free(versionStrP); 00055 00056 versionStrP = getImagelibVersions(); 00057 fprintf(stderr, " %s\n", versionStrP); 00058 lept_free(versionStrP); 00059 00060 #ifdef USE_OPENCL 00061 cl_platform_id platform; 00062 cl_uint num_platforms; 00063 cl_device_id devices[2]; 00064 cl_uint num_devices; 00065 char info[256]; 00066 int i; 00067 00068 fprintf(stderr, " OpenCL info:\n"); 00069 clGetPlatformIDs(1, &platform, &num_platforms); 00070 fprintf(stderr, " Found %d platforms.\n", num_platforms); 00071 clGetPlatformInfo(platform, CL_PLATFORM_NAME, 256, info, 0); 00072 fprintf(stderr, " Platform name: %s.\n", info); 00073 clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 256, info, 0); 00074 fprintf(stderr, " Version: %s.\n", info); 00075 clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 2, devices, &num_devices); 00076 fprintf(stderr, " Found %d devices.\n", num_devices); 00077 for (i = 0; i < num_devices; ++i) { 00078 clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0); 00079 fprintf(stderr, " Device %d name: %s.\n", i+1, info); 00080 } 00081 #endif 00082 exit(0); 00083 } 00084 00085 // Make the order of args a bit more forgiving than it used to be. 00086 const char* lang = "eng"; 00087 const char* image = NULL; 00088 const char* output = NULL; 00089 const char* datapath = NULL; 00090 bool noocr = false; 00091 bool list_langs = false; 00092 bool print_parameters = false; 00093 00094 tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; 00095 int arg = 1; 00096 while (arg < argc && (output == NULL || argv[arg][0] == '-')) { 00097 if (strcmp(argv[arg], "-l") == 0 && arg + 1 < argc) { 00098 lang = argv[arg + 1]; 00099 ++arg; 00100 } else if (strcmp(argv[arg], "--tessdata-dir") == 0 && arg + 1 < argc) { 00101 datapath = argv[arg + 1]; 00102 ++arg; 00103 } else if (strcmp(argv[arg], "--list-langs") == 0) { 00104 noocr = true; 00105 list_langs = true; 00106 } else if (strcmp(argv[arg], "-psm") == 0 && arg + 1 < argc) { 00107 pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[arg + 1])); 00108 ++arg; 00109 } else if (strcmp(argv[arg], "--print-parameters") == 0) { 00110 noocr = true; 00111 print_parameters = true; 00112 } else if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { 00113 // handled properly after api init 00114 ++arg; 00115 } else if (image == NULL) { 00116 image = argv[arg]; 00117 } else if (output == NULL) { 00118 output = argv[arg]; 00119 } 00120 ++arg; 00121 } 00122 00123 if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) { 00124 list_langs = true; 00125 noocr = true; 00126 } 00127 00128 if (output == NULL && noocr == false) { 00129 fprintf(stderr, "Usage:\n %s imagename|stdin outputbase|stdout " 00130 "[options...] [configfile...]\n\n", argv[0]); 00131 00132 fprintf(stderr, "OCR options:\n"); 00133 fprintf(stderr, " --tessdata-dir /path\tspecify location of tessdata" 00134 " path\n"); 00135 fprintf(stderr, " -l lang[+lang]\tspecify language(s) used for OCR\n"); 00136 fprintf(stderr, " -c configvar=value\tset value for control parameter.\n" 00137 "\t\t\tMultiple -c arguments are allowed.\n"); 00138 fprintf(stderr, " -psm pagesegmode\tspecify page segmentation mode.\n"); 00139 fprintf(stderr, "These options must occur before any configfile.\n\n"); 00140 fprintf(stderr, 00141 "pagesegmode values are:\n" 00142 " 0 = Orientation and script detection (OSD) only.\n" 00143 " 1 = Automatic page segmentation with OSD.\n" 00144 " 2 = Automatic page segmentation, but no OSD, or OCR\n" 00145 " 3 = Fully automatic page segmentation, but no OSD. (Default)\n" 00146 " 4 = Assume a single column of text of variable sizes.\n" 00147 " 5 = Assume a single uniform block of vertically aligned text.\n" 00148 " 6 = Assume a single uniform block of text.\n" 00149 " 7 = Treat the image as a single text line.\n" 00150 " 8 = Treat the image as a single word.\n" 00151 " 9 = Treat the image as a single word in a circle.\n" 00152 " 10 = Treat the image as a single character.\n\n"); 00153 fprintf(stderr, "Single options:\n"); 00154 fprintf(stderr, " -v --version: version info\n"); 00155 fprintf(stderr, " --list-langs: list available languages for tesseract " 00156 "engine. Can be used with --tessdata-dir.\n"); 00157 fprintf(stderr, " --print-parameters: print tesseract parameters to the " 00158 "stdout.\n"); 00159 exit(1); 00160 } 00161 00162 if (output != NULL && strcmp(output, "-") && strcmp(output, "stdout")) { 00163 tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", 00164 tesseract::TessBaseAPI::Version()); 00165 } 00166 PERF_COUNT_START("Tesseract:main") 00167 tesseract::TessBaseAPI api; 00168 00169 api.SetOutputName(output); 00170 int rc = api.Init(datapath, lang, tesseract::OEM_DEFAULT, 00171 &(argv[arg]), argc - arg, NULL, NULL, false); 00172 00173 if (rc) { 00174 fprintf(stderr, "Could not initialize tesseract.\n"); 00175 exit(1); 00176 } 00177 00178 char opt1[255], opt2[255]; 00179 for (arg = 0; arg < argc; arg++) { 00180 if (strcmp(argv[arg], "-c") == 0 && arg + 1 < argc) { 00181 strncpy(opt1, argv[arg + 1], 255); 00182 *(strchr(opt1, '=')) = 0; 00183 strncpy(opt2, strchr(argv[arg + 1], '=') + 1, 255); 00184 opt2[254] = 0; 00185 ++arg; 00186 00187 if (!api.SetVariable(opt1, opt2)) { 00188 fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2); 00189 } 00190 } 00191 } 00192 00193 if (list_langs) { 00194 GenericVector<STRING> languages; 00195 api.GetAvailableLanguagesAsVector(&languages); 00196 fprintf(stderr, "List of available languages (%d):\n", 00197 languages.size()); 00198 for (int index = 0; index < languages.size(); ++index) { 00199 STRING& string = languages[index]; 00200 fprintf(stderr, "%s\n", string.string()); 00201 } 00202 api.End(); 00203 exit(0); 00204 } 00205 00206 if (print_parameters) { 00207 FILE* fout = stdout; 00208 fprintf(stdout, "Tesseract parameters:\n"); 00209 api.PrintVariables(fout); 00210 api.End(); 00211 exit(0); 00212 } 00213 00214 // We have 2 possible sources of pagesegmode: a config file and 00215 // the command line. For backwards compatability reasons, the 00216 // default in tesseract is tesseract::PSM_SINGLE_BLOCK, but the 00217 // default for this program is tesseract::PSM_AUTO. We will let 00218 // the config file take priority, so the command-line default 00219 // can take priority over the tesseract default, so we use the 00220 // value from the command line only if the retrieved mode 00221 // is still tesseract::PSM_SINGLE_BLOCK, indicating no change 00222 // in any config file. Therefore the only way to force 00223 // tesseract::PSM_SINGLE_BLOCK is from the command line. 00224 // It would be simpler if we could set the value before Init, 00225 // but that doesn't work. 00226 if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) 00227 api.SetPageSegMode(pagesegmode); 00228 00229 bool stdInput = !strcmp(image, "stdin") || !strcmp(image, "-"); 00230 Pix* pixs = NULL; 00231 if (stdInput) { 00232 char byt; 00233 GenericVector<l_uint8> ch_data; 00234 std::istream file(std::cin.rdbuf()); 00235 00236 #ifdef WIN32 00237 if (_setmode(_fileno(stdin), _O_BINARY) == -1) 00238 tprintf("ERROR: cin to binary: %s", strerror(errno)); 00239 #endif // WIN32 00240 00241 while (file.get(byt)) { 00242 ch_data.push_back(byt); 00243 } 00244 std::cin.ignore(std::cin.rdbuf()->in_avail() + 1); 00245 00246 pixs = pixReadMem(&ch_data[0], ch_data.size()); 00247 } 00248 00249 if (pagesegmode == tesseract::PSM_AUTO_ONLY || 00250 pagesegmode == tesseract::PSM_OSD_ONLY) { 00251 int ret_val = 0; 00252 00253 if (!pixs) 00254 pixs = pixRead(image); 00255 if (!pixs) { 00256 fprintf(stderr, "Cannot open input file: %s\n", image); 00257 exit(2); 00258 } 00259 api.SetImage(pixs); 00260 00261 if (pagesegmode == tesseract::PSM_OSD_ONLY) { 00262 OSResults osr; 00263 if (api.DetectOS(&osr)) { 00264 int orient = osr.best_result.orientation_id; 00265 int script_id = osr.get_best_script(orient); 00266 float orient_oco = osr.best_result.oconfidence; 00267 float orient_sco = osr.best_result.sconfidence; 00268 tprintf("Orientation: %d\nOrientation in degrees: %d\n" \ 00269 "Orientation confidence: %.2f\n" \ 00270 "Script: %d\nScript confidence: %.2f\n", 00271 orient, OrientationIdToValue(orient), orient_oco, 00272 script_id, orient_sco); 00273 } else { 00274 ret_val = 1; 00275 } 00276 } else { 00277 tesseract::Orientation orientation; 00278 tesseract::WritingDirection direction; 00279 tesseract::TextlineOrder order; 00280 float deskew_angle; 00281 tesseract::PageIterator* it = api.AnalyseLayout(); 00282 if (it) { 00283 it->Orientation(&orientation, &direction, &order, &deskew_angle); 00284 tprintf("Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n" \ 00285 "Deskew angle: %.4f\n", 00286 orientation, direction, order, deskew_angle); 00287 } else { 00288 ret_val = 1; 00289 } 00290 delete it; 00291 } 00292 pixDestroy(&pixs); 00293 exit(ret_val); 00294 } 00295 00296 tesseract::TessResultRenderer* renderer = NULL; 00297 bool b; 00298 api.GetBoolVariable("tessedit_create_hocr", &b); 00299 if (b && renderer == NULL) renderer = new tesseract::TessHOcrRenderer(); 00300 00301 api.GetBoolVariable("tessedit_create_pdf", &b); 00302 if (b && renderer == NULL) 00303 renderer = new tesseract::TessPDFRenderer(api.GetDatapath()); 00304 00305 api.GetBoolVariable("tessedit_create_boxfile", &b); 00306 if (b && renderer == NULL) renderer = new tesseract::TessBoxTextRenderer(); 00307 00308 if (renderer == NULL) renderer = new tesseract::TessTextRenderer(); 00309 00310 if (pixs) { 00311 api.ProcessPage(pixs, 0, NULL, NULL, 0, renderer); 00312 pixDestroy(&pixs); 00313 } else { 00314 FILE* fin = fopen(image, "rb"); 00315 if (fin == NULL) { 00316 fprintf(stderr, "Cannot open input file: %s\n", image); 00317 exit(2); 00318 } 00319 fclose(fin); 00320 if (!api.ProcessPages(image, NULL, 0, renderer)) { 00321 fprintf(stderr, "Error during processing.\n"); 00322 exit(1); 00323 } 00324 } 00325 00326 FILE* fout = stdout; 00327 if (strcmp(output, "-") && strcmp(output, "stdout")) { 00328 STRING outfile = STRING(output) 00329 + STRING(".") 00330 + STRING(renderer->file_extension()); 00331 fout = fopen(outfile.string(), "wb"); 00332 if (fout == NULL) { 00333 fprintf(stderr, "Cannot create output file %s\n", outfile.string()); 00334 exit(1); 00335 } 00336 } 00337 00338 const char* data; 00339 inT32 data_len; 00340 if (renderer->GetOutput(&data, &data_len)) { 00341 fwrite(data, 1, data_len, fout); 00342 if (fout != stdout) 00343 fclose(fout); 00344 else 00345 clearerr(fout); 00346 } 00347 PERF_COUNT_END 00348 return 0; // Normal exit 00349 }