Untitled
unknown
plain_text
2 years ago
14 kB
21
Indexable
!pip install opencv-python==4.5.3.56 --user
!pip install opencv-contrib-python==4.5.3.56 --user
!pip install opencv-python-headless==4.5.3.56 --user
import cv2
!pip install img2table
from img2table.document import Image
pip install Pillow
from PIL import Image as PILImage
pip install pdf2image
conda install -c conda-forge poppler
import easyocr
import pathlib
import pandas as pd
cur_path = pathlib.Path().resolve() # текущий путь до папки
name = '000586'
img_path = f'{cur_path}/тест/{name}.pdf'
img_path
from pdf2image import convert_from_path
pages = convert_from_path(img_path)
pages
for count, page in enumerate(pages):
if count == 1:
page.save(f'{cur_path}/{name}_1st_page.jpg', 'JPEG') # сохраняем 1-ую страницу в текущую папку
instance_path = f'{cur_path}/{name}_1st_page.jpg'
instance_path = f'{cur_path}/{name}_1st_page.jpg'
TypingError Traceback (most recent call last)
Input In [23], in <cell line: 4>()
1 img = Image(instance_path)
3 # Extract tables
----> 4 extracted_tables = img.extract_tables()
File /opt/conda/lib/python3.8/site-packages/img2table/document/image.py:46, in Image.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
36 def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False,
37 min_confidence: int = 50) -> List[ExtractedTable]:
38 """
39 Extract tables from document
40 :param ocr: OCRInstance object used to extract table content
(...)
44 :return: list of extracted tables
45 """
---> 46 extracted_tables = super(Image, self).extract_tables(ocr=ocr,
47 implicit_rows=implicit_rows,
48 borderless_tables=borderless_tables,
49 min_confidence=min_confidence)
50 return extracted_tables.get(0)
File /opt/conda/lib/python3.8/site-packages/img2table/document/base/__init__.py:125, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
116 """
117 Extract tables from document
118 :param ocr: OCRInstance object used to extract table content
(...)
122 :return: dictionary with page number as key and list of extracted tables as values
123 """
124 # Extract tables from document
--> 125 from img2table.tables.image import TableImage
126 tables = {idx: TableImage(img=img,
127 min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows,
128 borderless_tables=borderless_tables)
129 for idx, img in enumerate(self.images)}
131 # Update table content with OCR if possible
File /opt/conda/lib/python3.8/site-packages/img2table/tables/image.py:19, in <module>
17 from img2table.tables.processing.bordered_tables.tables import get_tables
18 from img2table.tables.processing.bordered_tables.tables.implicit_rows import handle_implicit_rows
---> 19 from img2table.tables.processing.borderless_tables import identify_borderless_tables
22 @dataclass
23 class TableImage:
24 img: np.ndarray
File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/__init__.py:10, in <module>
8 from img2table.tables.objects.table import Table
9 from img2table.tables.processing.borderless_tables.columns import identify_columns
---> 10 from img2table.tables.processing.borderless_tables.layout import segment_image
11 from img2table.tables.processing.borderless_tables.rows import identify_delimiter_group_rows
12 from img2table.tables.processing.borderless_tables.table import identify_table
File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/__init__.py:10, in <module>
8 from img2table.tables.processing.borderless_tables.layout.column_segments import segment_image_columns
9 from img2table.tables.processing.borderless_tables.layout.image_elements import get_image_elements
---> 10 from img2table.tables.processing.borderless_tables.layout.rlsa import identify_text_mask
11 from img2table.tables.processing.borderless_tables.layout.table_segments import get_table_segments
12 from img2table.tables.processing.borderless_tables.model import TableSegment, ImageSegment
File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py:156, in <module>
150 mask_obstacles[id_row][col + idx] = True
152 return mask_obstacles
155 @njit("boolean[:, :](uint8[:, :],int32[:, :], float64)", fastmath=True, cache=True, parallel=False)
--> 156 def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
157 """
158 Identify image text mask
159 :param thresh: thresholded image
(...)
162 :return: text mask array
163 """
164 text_mask = np.full(shape=thresh.shape, fill_value=False)
File /opt/conda/lib/python3.8/site-packages/numba/core/decorators.py:218, in _jit.<locals>.wrapper(func)
216 with typeinfer.register_dispatcher(disp):
217 for sig in sigs:
--> 218 disp.compile(sig)
219 disp.disable_compile()
220 return disp
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs)
29 @functools.wraps(func)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:819, in Dispatcher.compile(self, sig)
817 self._cache_misses[sig] += 1
818 try:
--> 819 cres = self._compiler.compile(args, return_type)
820 except errors.ForceLiteralArg as e:
821 def folded(args, kws):
File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:82, in _FunctionCompiler.compile(self, args, return_type)
80 return retval
81 else:
---> 82 raise retval
File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:92, in _FunctionCompiler._compile_cached(self, args, return_type)
89 pass
91 try:
---> 92 retval = self._compile_core(args, return_type)
93 except errors.TypingError as e:
94 self._failed_cache[key] = e
File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:105, in _FunctionCompiler._compile_core(self, args, return_type)
102 flags = self._customize_flags(flags)
104 impl = self._get_implementation(args, {})
--> 105 cres = compiler.compile_extra(self.targetdescr.typing_context,
106 self.targetdescr.target_context,
107 impl,
108 args=args, return_type=return_type,
109 flags=flags, locals=self.locals,
110 pipeline_class=self.pipeline_class)
111 # Check typing error if object mode is used
112 if cres.typing_error is not None and not flags.enable_pyobject:
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:627, in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
603 """Compiler entry point
604
605 Parameter
(...)
623 compiler pipeline
624 """
625 pipeline = pipeline_class(typingctx, targetctx, library,
626 args, return_type, flags, locals)
--> 627 return pipeline.compile_extra(func)
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:363, in CompilerBase.compile_extra(self, func)
361 self.state.lifted = ()
362 self.state.lifted_from = None
--> 363 return self._compile_bytecode()
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:425, in CompilerBase._compile_bytecode(self)
421 """
422 Populate and run pipeline for bytecode input
423 """
424 assert self.state.func_ir is None
--> 425 return self._compile_core()
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:405, in CompilerBase._compile_core(self)
403 self.state.status.fail_reason = e
404 if is_final_pipeline:
--> 405 raise e
406 else:
407 raise CompilerError("All available pipelines exhausted")
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:396, in CompilerBase._compile_core(self)
394 res = None
395 try:
--> 396 pm.run(self.state)
397 if self.state.cr is not None:
398 break
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:341, in PassManager.run(self, state)
338 msg = "Failed in %s mode pipeline (step: %s)" % \
339 (self.pipeline_name, pass_desc)
340 patched_exception = self._patch_error(msg, e)
--> 341 raise patched_exception
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:332, in PassManager.run(self, state)
330 pass_inst = _pass_registry.get(pss).pass_inst
331 if isinstance(pass_inst, CompilerPass):
--> 332 self._runPass(idx, pass_inst, state)
333 else:
334 raise BaseException("Legacy pass in use")
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs)
29 @functools.wraps(func)
30 def _acquire_compile_lock(*args, **kwargs):
31 with self:
---> 32 return func(*args, **kwargs)
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:291, in PassManager._runPass(self, index, pss, internal_state)
289 mutated |= check(pss.run_initialization, internal_state)
290 with SimpleTimer() as pass_time:
--> 291 mutated |= check(pss.run_pass, internal_state)
292 with SimpleTimer() as finalize_time:
293 mutated |= check(pss.run_finalizer, internal_state)
File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:264, in PassManager._runPass.<locals>.check(func, compiler_state)
263 def check(func, compiler_state):
--> 264 mangled = func(compiler_state)
265 if mangled not in (True, False):
266 msg = ("CompilerPass implementations should return True/False. "
267 "CompilerPass with name '%s' did not.")
File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:92, in BaseTypeInference.run_pass(self, state)
86 """
87 Type inference and legalization
88 """
89 with fallback_context(state, 'Function "%s" failed type inference'
90 % (state.func_id.func_name,)):
91 # Type inference
---> 92 typemap, return_type, calltypes = type_inference_stage(
93 state.typingctx,
94 state.func_ir,
95 state.args,
96 state.return_type,
97 state.locals,
98 raise_errors=self._raise_errors)
99 state.typemap = typemap
100 if self._raise_errors:
File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:70, in type_inference_stage(typingctx, interp, args, return_type, locals, raise_errors)
67 infer.seed_type(k, v)
69 infer.build_constraint()
---> 70 infer.propagate(raise_errors=raise_errors)
71 typemap, restype, calltypes = infer.unify(raise_errors=raise_errors)
73 # Output all Numba warnings
File /opt/conda/lib/python3.8/site-packages/numba/core/typeinfer.py:1071, in TypeInferer.propagate(self, raise_errors)
1068 force_lit_args = [e for e in errors
1069 if isinstance(e, ForceLiteralArg)]
1070 if not force_lit_args:
-> 1071 raise errors[0]
1072 else:
1073 raise reduce(operator.or_, force_lit_args)
TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Use of unsupported NumPy function 'numpy.average' or unsupported use of the function.
File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167:
def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
<source elided>
# Get average height
Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA])
^
During: typing of get attribute at /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py (167)
File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167:
def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
<source elided>
# Get average height
Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA])
^Editor is loading...
Leave a Comment