Untitled
unknown
plain_text
2 years ago
5.1 kB
5
Indexable
Code- from pytesseract import pytesseract from PIL import Image # Set the path to the Tesseract binary pytesseract.tesseract_cmd = '/Analytics/venv/CAPEANALYTICS/bin/tesseract/' # Path to the image you want to perform OCR on image_path = '/Analytics/venv/Jup/CAPE_Case_Management_PDF_Invoicing/Data/images/Train_dataset_images/invoice/Email_28112023044602_ag_13048641_1001.png' # Open the image using the PIL library img = Image.open(image_path) # Perform OCR on the image text = pytesseract.image_to_string(img) # Print the extracted text print("Extracted Text:") print(text) Error- --------------------------------------------------------------------------- FileNotFoundError Traceback (most recent call last) File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:255, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout) 254 try: --> 255 proc = subprocess.Popen(cmd_args, **subprocess_args()) 256 except OSError as e: File /Analytics/python3/lib/python3.8/subprocess.py:858, in Popen.__init__(self, args, bufsize, executable, stdin, stdout, stderr, preexec_fn, close_fds, shell, cwd, env, universal_newlines, startupinfo, creationflags, restore_signals, start_new_session, pass_fds, encoding, errors, text) 855 self.stderr = io.TextIOWrapper(self.stderr, 856 encoding=encoding, errors=errors) --> 858 self._execute_child(args, executable, preexec_fn, close_fds, 859 pass_fds, cwd, env, 860 startupinfo, creationflags, shell, 861 p2cread, p2cwrite, 862 c2pread, c2pwrite, 863 errread, errwrite, 864 restore_signals, start_new_session) 865 except: 866 # Cleanup if the child failed starting. File /Analytics/python3/lib/python3.8/subprocess.py:1704, in Popen._execute_child(self, args, executable, preexec_fn, close_fds, pass_fds, cwd, env, startupinfo, creationflags, shell, p2cread, p2cwrite, c2pread, c2pwrite, errread, errwrite, restore_signals, start_new_session) 1703 err_msg = os.strerror(errno_num) -> 1704 raise child_exception_type(errno_num, err_msg, err_filename) 1705 raise child_exception_type(err_msg) FileNotFoundError: [Errno 2] No such file or directory: '/Analytics/venv/CAPEANALYTICS/bin/tesseract/' During handling of the above exception, another exception occurred: TesseractNotFoundError Traceback (most recent call last) Input In [8], in <cell line: 14>() 11 img = Image.open(image_path) 13 # Perform OCR on the image ---> 14 text = pytesseract.image_to_string(img) 16 # Print the extracted text 17 print("Extracted Text:") File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:423, in image_to_string(image, lang, config, nice, output_type, timeout) 418 """ 419 Returns the result of a Tesseract OCR run on the provided image to string 420 """ 421 args = [image, 'txt', lang, config, nice, timeout] --> 423 return { 424 Output.BYTES: lambda: run_and_get_output(*(args + [True])), 425 Output.DICT: lambda: {'text': run_and_get_output(*args)}, 426 Output.STRING: lambda: run_and_get_output(*args), 427 }[output_type]() File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:426, in image_to_string.<locals>.<lambda>() 418 """ 419 Returns the result of a Tesseract OCR run on the provided image to string 420 """ 421 args = [image, 'txt', lang, config, nice, timeout] 423 return { 424 Output.BYTES: lambda: run_and_get_output(*(args + [True])), 425 Output.DICT: lambda: {'text': run_and_get_output(*args)}, --> 426 Output.STRING: lambda: run_and_get_output(*args), 427 }[output_type]() File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:288, in run_and_get_output(image, extension, lang, config, nice, timeout, return_bytes) 277 with save(image) as (temp_name, input_filename): 278 kwargs = { 279 'input_filename': input_filename, 280 'output_filename_base': temp_name, (...) 285 'timeout': timeout, 286 } --> 288 run_tesseract(**kwargs) 289 filename = f"{kwargs['output_filename_base']}{extsep}{extension}" 290 with open(filename, 'rb') as output_file: File /Analytics/venv/CAPEANALYTICS/lib/python3.8/site-packages/pytesseract/pytesseract.py:260, in run_tesseract(input_filename, output_filename_base, extension, lang, config, nice, timeout) 258 raise 259 else: --> 260 raise TesseractNotFoundError() 262 with timeout_manager(proc, timeout) as error_string: 263 if proc.returncode: TesseractNotFoundError: /Analytics/venv/CAPEANALYTICS/bin/tesseract/ is not installed or it's not in your PATH. See README file for more information.
Editor is loading...
Leave a Comment