Untitled

mail@pastecode.io avatar
unknown
plain_text
7 months ago
5.1 kB
1
Indexable
Never
Code-

from pytesseract import pytesseract
from PIL import Image

# Set the path to the Tesseract binary
pytesseract.tesseract_cmd = '/Analytics/venv/CAPEANALYTICS/bin/tesseract/'

# Path to the image you want to perform OCR on
image_path = '/Analytics/venv/Jup/CAPE_Case_Management_PDF_Invoicing/Data/images/Train_dataset_images/invoice/Email_28112023044602_ag_13048641_1001.png'

# Open the image using the PIL library
img = Image.open(image_path)

# Perform OCR on the image
text = pytesseract.image_to_string(img)

# Print the extracted text
print("Extracted Text:")
print(text)


Error-

---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:255, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
    254 try:
--> 255     proc = subprocess.Popen(cmd_args, **subprocess_args())
    256 except OSError as e:

File /Analytics/python3/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
    855             self.stderr = io.TextIOWrapper(self.stderr,
    856                     encoding=encoding, errors=errors)
--> 858     self._execute_child(args, executable, preexec_fn, close_fds,
    859                         pass_fds, cwd, env,
    860                         startupinfo, creationflags, shell,
    861                         p2cread, p2cwrite,
    862                         c2pread, c2pwrite,
    863                         errread, errwrite,
    864                         restore_signals, start_new_session)
    865 except:
    866     # Cleanup if the child failed starting.

File /Analytics/python3/lib/python3.8/subprocess.py:1704, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
   1703         err_msg = os.strerror(errno_num)
-> 1704     raise child_exception_type(errno_num, err_msg, err_filename)
   1705 raise child_exception_type(err_msg)

FileNotFoundError: [Errno 2] No such file or directory: '/Analytics/venv/CAPEANALYTICS/bin/tesseract/'

During handling of the above exception, another exception occurred:

TesseractNotFoundError                    Traceback (most recent call last)
Input In [8], in <cell line: 14>()
     11 img = Image.open(image_path)
     13 # Perform OCR on the image
---> 14 text = pytesseract.image_to_string(img)
     16 # Print the extracted text
     17 print("Extracted Text:")

File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:423, in image_to_string(image, lang, config, nice, output_type, timeout)
    418 """
    419 Returns the result of a Tesseract OCR run on the provided image to string
    420 """
    421 args = [image, 'txt', lang, config, nice, timeout]
--> 423 return {
    424     Output.BYTES: lambda: run_and_get_output(*(args + [True])),
    425     Output.DICT: lambda: {'text': run_and_get_output(*args)},
    426     Output.STRING: lambda: run_and_get_output(*args),
    427 }[output_type]()

File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:426, in image_to_string.<locals>.<lambda>()
    418 """
    419 Returns the result of a Tesseract OCR run on the provided image to string
    420 """
    421 args = [image, 'txt', lang, config, nice, timeout]
    423 return {
    424     Output.BYTES: lambda: run_and_get_output(*(args + [True])),
    425     Output.DICT: lambda: {'text': run_and_get_output(*args)},
--> 426     Output.STRING: lambda: run_and_get_output(*args),
    427 }[output_type]()

File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:288, in run_and_get_output(image, extension, lang, config, nice, timeout, return_bytes)
    277 with save(image) as (temp_name, input_filename):
    278     kwargs = {
    279         'input_filename': input_filename,
    280         'output_filename_base': temp_name,
   (...)
    285         'timeout': timeout,
    286     }
--> 288     run_tesseract(**kwargs)
    289     filename = f"{kwargs['output_filename_base']}{extsep}{extension}"
    290     with open(filename, 'rb') as output_file:

File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:260, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
    258         raise
    259     else:
--> 260         raise TesseractNotFoundError()
    262 with timeout_manager(proc, timeout) as error_string:
    263     if proc.returncode:

TesseractNotFoundError: /Analytics/venv/CAPEANALYTICS/bin/tesseract/ is not installed or it's not in your PATH. See README file for more information.
Leave a Comment