tesseract  3.03
/usr/local/google/home/jbreiden/tesseract-ocr-read-only/contrib/tesseract-c_api-demo.py
Go to the documentation of this file.
00001 #!/usr/bin/python
00002 # -*- coding: utf-8 -*-
00003 
00004 # Copyright 2012 Zdenko Podobný
00005 # Author: Zdenko Podobný
00006 #
00007 # Licensed under the Apache License, Version 2.0 (the "License");
00008 # you may not use this file except in compliance with the License.
00009 # You may obtain a copy of the License at
00010 #
00011 #      http://www.apache.org/licenses/LICENSE-2.0
00012 #
00013 # Unless required by applicable law or agreed to in writing, software
00014 # distributed under the License is distributed on an "AS IS" BASIS,
00015 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00016 # See the License for the specific language governing permissions and
00017 # limitations under the License.
00018 
00019 """
00020 Simple python demo script of tesseract-ocr 3.02 c-api
00021 """
00022 
00023 import os
00024 import sys
00025 import ctypes
00026 
00027 # Demo variables
00028 lang = "eng"
00029 filename = "../phototest.tif"
00030 libpath = "/usr/local/lib64/"
00031 libpath_w = "../vs2008/DLL_Release/"
00032 TESSDATA_PREFIX = os.environ.get('TESSDATA_PREFIX')
00033 if not TESSDATA_PREFIX:
00034     TESSDATA_PREFIX = "../"
00035 
00036 if sys.platform == "win32":
00037         libname = libpath_w + "libtesseract302.dll"
00038         libname_alt = "libtesseract302.dll"
00039         os.environ["PATH"] += os.pathsep + libpath_w
00040 else:
00041         libname = libpath + "libtesseract.so.3.0.2"
00042         libname_alt = "libtesseract.so.3"
00043 
00044 try:
00045         tesseract = ctypes.cdll.LoadLibrary(libname)
00046 except:
00047         try:
00048                 tesseract = ctypes.cdll.LoadLibrary(libname_alt)
00049         except WindowsError, err:
00050                 print("Trying to load '%s'..." % libname)
00051                 print("Trying to load '%s'..." % libname_alt)
00052                 print(err)
00053                 exit(1)
00054 
00055 tesseract.TessVersion.restype = ctypes.c_char_p
00056 tesseract_version = tesseract.TessVersion()[:4]
00057 
00058 # We need to check library version because libtesseract.so.3 is symlink
00059 # and can point to other version than 3.02
00060 if float(tesseract_version) < 3.02:
00061         print("Found tesseract-ocr library version %s." % tesseract_version)
00062         print("C-API is present only in version 3.02!")
00063         exit(2)
00064 
00065 api = tesseract.TessBaseAPICreate()
00066 rc = tesseract.TessBaseAPIInit3(api, TESSDATA_PREFIX, lang);
00067 if (rc):
00068         tesseract.TessBaseAPIDelete(api)
00069         print("Could not initialize tesseract.\n")
00070         exit(3)
00071 
00072 text_out = tesseract.TessBaseAPIProcessPages(api, filename, None , 0);
00073 result_text = ctypes.string_at(text_out)
00074 print result_text
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines