Untitled
unknown
plain_text
2 years ago
5.1 kB
14
Indexable
Code-
from pytesseract import pytesseract
from PIL import Image
# Set the path to the Tesseract binary
pytesseract.tesseract_cmd = '/Analytics/venv/CAPEANALYTICS/bin/tesseract/'
# Path to the image you want to perform OCR on
image_path = '/Analytics/venv/Jup/CAPE_Case_Management_PDF_Invoicing/Data/images/Train_dataset_images/invoice/Email_28112023044602_ag_13048641_1001.png'
# Open the image using the PIL library
img = Image.open(image_path)
# Perform OCR on the image
text = pytesseract.image_to_string(img)
# Print the extracted text
print("Extracted Text:")
print(text)
Error-
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:255, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
254 try:
--> 255 proc = subprocess.Popen(cmd_args, **subprocess_args())
256 except OSError as e:
File /Analytics/python3/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text)
855 self.stderr = io.TextIOWrapper(self.stderr,
856 encoding=encoding, errors=errors)
--> 858 self._execute_child(args, executable, preexec_fn, close_fds,
859 pass_fds, cwd, env,
860 startupinfo, creationflags, shell,
861 p2cread, p2cwrite,
862 c2pread, c2pwrite,
863 errread, errwrite,
864 restore_signals, start_new_session)
865 except:
866 # Cleanup if the child failed starting.
File /Analytics/python3/lib/python3.8/subprocess.py:1704, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session)
1703 err_msg = os.strerror(errno_num)
-> 1704 raise child_exception_type(errno_num, err_msg, err_filename)
1705 raise child_exception_type(err_msg)
FileNotFoundError: [Errno 2] No such file or directory: '/Analytics/venv/CAPEANALYTICS/bin/tesseract/'
During handling of the above exception, another exception occurred:
TesseractNotFoundError Traceback (most recent call last)
Input In [8], in <cell line: 14>()
11 img = Image.open(image_path)
13 # Perform OCR on the image
---> 14 text = pytesseract.image_to_string(img)
16 # Print the extracted text
17 print("Extracted Text:")
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:423, in image_to_string(image, lang, config, nice, output_type, timeout)
418 """
419 Returns the result of a Tesseract OCR run on the provided image to string
420 """
421 args = [image, 'txt', lang, config, nice, timeout]
--> 423 return {
424 Output.BYTES: lambda: run_and_get_output(*(args + [True])),
425 Output.DICT: lambda: {'text': run_and_get_output(*args)},
426 Output.STRING: lambda: run_and_get_output(*args),
427 }[output_type]()
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:426, in image_to_string.<locals>.<lambda>()
418 """
419 Returns the result of a Tesseract OCR run on the provided image to string
420 """
421 args = [image, 'txt', lang, config, nice, timeout]
423 return {
424 Output.BYTES: lambda: run_and_get_output(*(args + [True])),
425 Output.DICT: lambda: {'text': run_and_get_output(*args)},
--> 426 Output.STRING: lambda: run_and_get_output(*args),
427 }[output_type]()
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:288, in run_and_get_output(image, extension, lang, config, nice, timeout, return_bytes)
277 with save(image) as (temp_name, input_filename):
278 kwargs = {
279 'input_filename': input_filename,
280 'output_filename_base': temp_name,
(...)
285 'timeout': timeout,
286 }
--> 288 run_tesseract(**kwargs)
289 filename = f"{kwargs['output_filename_base']}{extsep}{extension}"
290 with open(filename, 'rb') as output_file:
File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:260, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout)
258 raise
259 else:
--> 260 raise TesseractNotFoundError()
262 with timeout_manager(proc, timeout) as error_string:
263 if proc.returncode:
TesseractNotFoundError: /Analytics/venv/CAPEANALYTICS/bin/tesseract/ is not installed or it's not in your PATH. See README file for more information.Editor is loading...
Leave a Comment