Untitled
unknown
plain_text
a year ago
14 kB
10
Indexable
!pip install opencv-python==4.5.3.56 --user !pip install opencv-contrib-python==4.5.3.56 --user !pip install opencv-python-headless==4.5.3.56 --user import cv2 !pip install img2table from img2table.document import Image pip install Pillow from PIL import Image as PILImage pip install pdf2image conda install -c conda-forge poppler import easyocr import pathlib import pandas as pd cur_path = pathlib.Path().resolve() # текущий путь до папки name = '000586' img_path = f'{cur_path}/тест/{name}.pdf' img_path from pdf2image import convert_from_path pages = convert_from_path(img_path) pages for count, page in enumerate(pages): if count == 1: page.save(f'{cur_path}/{name}_1st_page.jpg', 'JPEG') # сохраняем 1-ую страницу в текущую папку instance_path = f'{cur_path}/{name}_1st_page.jpg' instance_path = f'{cur_path}/{name}_1st_page.jpg' TypingError Traceback (most recent call last) Input In [23], in <cell line: 4>() 1 img = Image(instance_path) 3 # Extract tables ----> 4 extracted_tables = img.extract_tables() File /opt/conda/lib/python3.8/site-packages/img2table/document/image.py:46, in Image.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence) 36 def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False, 37 min_confidence: int = 50) -> List[ExtractedTable]: 38 """ 39 Extract tables from document 40 :param ocr: OCRInstance object used to extract table content (...) 44 :return: list of extracted tables 45 """ ---> 46 extracted_tables = super(Image, self).extract_tables(ocr=ocr, 47 implicit_rows=implicit_rows, 48 borderless_tables=borderless_tables, 49 min_confidence=min_confidence) 50 return extracted_tables.get(0) File /opt/conda/lib/python3.8/site-packages/img2table/document/base/__init__.py:125, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence) 116 """ 117 Extract tables from document 118 :param ocr: OCRInstance object used to extract table content (...) 122 :return: dictionary with page number as key and list of extracted tables as values 123 """ 124 # Extract tables from document --> 125 from img2table.tables.image import TableImage 126 tables = {idx: TableImage(img=img, 127 min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows, 128 borderless_tables=borderless_tables) 129 for idx, img in enumerate(self.images)} 131 # Update table content with OCR if possible File /opt/conda/lib/python3.8/site-packages/img2table/tables/image.py:19, in <module> 17 from img2table.tables.processing.bordered_tables.tables import get_tables 18 from img2table.tables.processing.bordered_tables.tables.implicit_rows import handle_implicit_rows ---> 19 from img2table.tables.processing.borderless_tables import identify_borderless_tables 22 @dataclass 23 class TableImage: 24 img: np.ndarray File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/__init__.py:10, in <module> 8 from img2table.tables.objects.table import Table 9 from img2table.tables.processing.borderless_tables.columns import identify_columns ---> 10 from img2table.tables.processing.borderless_tables.layout import segment_image 11 from img2table.tables.processing.borderless_tables.rows import identify_delimiter_group_rows 12 from img2table.tables.processing.borderless_tables.table import identify_table File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/__init__.py:10, in <module> 8 from img2table.tables.processing.borderless_tables.layout.column_segments import segment_image_columns 9 from img2table.tables.processing.borderless_tables.layout.image_elements import get_image_elements ---> 10 from img2table.tables.processing.borderless_tables.layout.rlsa import identify_text_mask 11 from img2table.tables.processing.borderless_tables.layout.table_segments import get_table_segments 12 from img2table.tables.processing.borderless_tables.model import TableSegment, ImageSegment File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py:156, in <module> 150 mask_obstacles[id_row][col + idx] = True 152 return mask_obstacles 155 @njit("boolean[:, :](uint8[:, :],int32[:, :], float64)", fastmath=True, cache=True, parallel=False) --> 156 def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray: 157 """ 158 Identify image text mask 159 :param thresh: thresholded image (...) 162 :return: text mask array 163 """ 164 text_mask = np.full(shape=thresh.shape, fill_value=False) File /opt/conda/lib/python3.8/site-packages/numba/core/decorators.py:218, in _jit.<locals>.wrapper(func) 216 with typeinfer.register_dispatcher(disp): 217 for sig in sigs: --> 218 disp.compile(sig) 219 disp.disable_compile() 220 return disp File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs) 29 @functools.wraps(func) 30 def _acquire_compile_lock(*args, **kwargs): 31 with self: ---> 32 return func(*args, **kwargs) File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:819, in Dispatcher.compile(self, sig) 817 self._cache_misses[sig] += 1 818 try: --> 819 cres = self._compiler.compile(args, return_type) 820 except errors.ForceLiteralArg as e: 821 def folded(args, kws): File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:82, in _FunctionCompiler.compile(self, args, return_type) 80 return retval 81 else: ---> 82 raise retval File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:92, in _FunctionCompiler._compile_cached(self, args, return_type) 89 pass 91 try: ---> 92 retval = self._compile_core(args, return_type) 93 except errors.TypingError as e: 94 self._failed_cache[key] = e File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:105, in _FunctionCompiler._compile_core(self, args, return_type) 102 flags = self._customize_flags(flags) 104 impl = self._get_implementation(args, {}) --> 105 cres = compiler.compile_extra(self.targetdescr.typing_context, 106 self.targetdescr.target_context, 107 impl, 108 args=args, return_type=return_type, 109 flags=flags, locals=self.locals, 110 pipeline_class=self.pipeline_class) 111 # Check typing error if object mode is used 112 if cres.typing_error is not None and not flags.enable_pyobject: File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:627, in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class) 603 """Compiler entry point 604 605 Parameter (...) 623 compiler pipeline 624 """ 625 pipeline = pipeline_class(typingctx, targetctx, library, 626 args, return_type, flags, locals) --> 627 return pipeline.compile_extra(func) File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:363, in CompilerBase.compile_extra(self, func) 361 self.state.lifted = () 362 self.state.lifted_from = None --> 363 return self._compile_bytecode() File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:425, in CompilerBase._compile_bytecode(self) 421 """ 422 Populate and run pipeline for bytecode input 423 """ 424 assert self.state.func_ir is None --> 425 return self._compile_core() File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:405, in CompilerBase._compile_core(self) 403 self.state.status.fail_reason = e 404 if is_final_pipeline: --> 405 raise e 406 else: 407 raise CompilerError("All available pipelines exhausted") File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:396, in CompilerBase._compile_core(self) 394 res = None 395 try: --> 396 pm.run(self.state) 397 if self.state.cr is not None: 398 break File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:341, in PassManager.run(self, state) 338 msg = "Failed in %s mode pipeline (step: %s)" % \ 339 (self.pipeline_name, pass_desc) 340 patched_exception = self._patch_error(msg, e) --> 341 raise patched_exception File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:332, in PassManager.run(self, state) 330 pass_inst = _pass_registry.get(pss).pass_inst 331 if isinstance(pass_inst, CompilerPass): --> 332 self._runPass(idx, pass_inst, state) 333 else: 334 raise BaseException("Legacy pass in use") File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs) 29 @functools.wraps(func) 30 def _acquire_compile_lock(*args, **kwargs): 31 with self: ---> 32 return func(*args, **kwargs) File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:291, in PassManager._runPass(self, index, pss, internal_state) 289 mutated |= check(pss.run_initialization, internal_state) 290 with SimpleTimer() as pass_time: --> 291 mutated |= check(pss.run_pass, internal_state) 292 with SimpleTimer() as finalize_time: 293 mutated |= check(pss.run_finalizer, internal_state) File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:264, in PassManager._runPass.<locals>.check(func, compiler_state) 263 def check(func, compiler_state): --> 264 mangled = func(compiler_state) 265 if mangled not in (True, False): 266 msg = ("CompilerPass implementations should return True/False. " 267 "CompilerPass with name '%s' did not.") File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:92, in BaseTypeInference.run_pass(self, state) 86 """ 87 Type inference and legalization 88 """ 89 with fallback_context(state, 'Function "%s" failed type inference' 90 % (state.func_id.func_name,)): 91 # Type inference ---> 92 typemap, return_type, calltypes = type_inference_stage( 93 state.typingctx, 94 state.func_ir, 95 state.args, 96 state.return_type, 97 state.locals, 98 raise_errors=self._raise_errors) 99 state.typemap = typemap 100 if self._raise_errors: File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:70, in type_inference_stage(typingctx, interp, args, return_type, locals, raise_errors) 67 infer.seed_type(k, v) 69 infer.build_constraint() ---> 70 infer.propagate(raise_errors=raise_errors) 71 typemap, restype, calltypes = infer.unify(raise_errors=raise_errors) 73 # Output all Numba warnings File /opt/conda/lib/python3.8/site-packages/numba/core/typeinfer.py:1071, in TypeInferer.propagate(self, raise_errors) 1068 force_lit_args = [e for e in errors 1069 if isinstance(e, ForceLiteralArg)] 1070 if not force_lit_args: -> 1071 raise errors[0] 1072 else: 1073 raise reduce(operator.or_, force_lit_args) TypingError: Failed in nopython mode pipeline (step: nopython frontend) Use of unsupported NumPy function 'numpy.average' or unsupported use of the function. File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167: def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray: <source elided> # Get average height Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA]) ^ During: typing of get attribute at /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py (167) File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167: def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray: <source elided> # Get average height Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA]) ^
Editor is loading...
Leave a Comment