tesseract
3.03
|
00001 #!/usr/bin/python 00002 # -*- coding: utf-8 -*- 00003 00004 # Copyright 2012 Zdenko Podobný 00005 # Author: Zdenko Podobný 00006 # 00007 # Licensed under the Apache License, Version 2.0 (the "License"); 00008 # you may not use this file except in compliance with the License. 00009 # You may obtain a copy of the License at 00010 # 00011 # http://www.apache.org/licenses/LICENSE-2.0 00012 # 00013 # Unless required by applicable law or agreed to in writing, software 00014 # distributed under the License is distributed on an "AS IS" BASIS, 00015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 # See the License for the specific language governing permissions and 00017 # limitations under the License. 00018 00019 """ 00020 Simple python demo script of tesseract-ocr 3.02 c-api 00021 """ 00022 00023 import os 00024 import sys 00025 import ctypes 00026 00027 # Demo variables 00028 lang = "eng" 00029 filename = "../phototest.tif" 00030 libpath = "/usr/local/lib64/" 00031 libpath_w = "../vs2008/DLL_Release/" 00032 TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX') 00033 if not TESSDATA_PREFIX: 00034 TESSDATA_PREFIX = "../" 00035 00036 if sys.platform == "win32": 00037 libname = libpath_w + "libtesseract302.dll" 00038 libname_alt = "libtesseract302.dll" 00039 os.environ["PATH"] += os.pathsep + libpath_w 00040 else: 00041 libname = libpath + "libtesseract.so.3.0.2" 00042 libname_alt = "libtesseract.so.3" 00043 00044 try: 00045 tesseract = ctypes.cdll.LoadLibrary(libname) 00046 except: 00047 try: 00048 tesseract = ctypes.cdll.LoadLibrary(libname_alt) 00049 except WindowsError, err: 00050 print("Trying to load '%s'..." % libname) 00051 print("Trying to load '%s'..." % libname_alt) 00052 print(err) 00053 exit(1) 00054 00055 tesseract.TessVersion.restype = ctypes.c_char_p 00056 tesseract_version = tesseract.TessVersion()[:4] 00057 00058 # We need to check library version because libtesseract.so.3 is symlink 00059 # and can point to other version than 3.02 00060 if float(tesseract_version) < 3.02: 00061 print("Found tesseract-ocr library version %s." % tesseract_version) 00062 print("C-API is present only in version 3.02!") 00063 exit(2) 00064 00065 api = tesseract.TessBaseAPICreate() 00066 rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang); 00067 if (rc): 00068 tesseract.TessBaseAPIDelete(api) 00069 print("Could not initialize tesseract.\n") 00070 exit(3) 00071 00072 text_out = tesseract.TessBaseAPIProcessPages(api, filename, None , 0); 00073 result_text = ctypes.string_at(text_out) 00074 print result_text