Untitled

 avatar
unknown
plain_text
a year ago
14 kB
10
Indexable
!pip install opencv-python==4.5.3.56 --user
!pip install opencv-contrib-python==4.5.3.56 --user
!pip install opencv-python-headless==4.5.3.56 --user

import cv2

!pip install img2table

from img2table.document import Image
pip install Pillow
from PIL import Image as PILImage
pip install pdf2image
conda install -c conda-forge poppler
import easyocr
import pathlib
import pandas as pd

cur_path = pathlib.Path().resolve() # текущий путь до папки
name = '000586'
img_path = f'{cur_path}/тест/{name}.pdf'
img_path

from pdf2image import convert_from_path
pages = convert_from_path(img_path)
pages

for count, page in enumerate(pages):
    if count == 1:
        page.save(f'{cur_path}/{name}_1st_page.jpg', 'JPEG') # сохраняем 1-ую страницу в текущую папку

instance_path = f'{cur_path}/{name}_1st_page.jpg'

instance_path = f'{cur_path}/{name}_1st_page.jpg'

TypingError                               Traceback (most recent call last)
Input In [23], in <cell line: 4>()
      1 img = Image(instance_path)
      3 # Extract tables
----> 4 extracted_tables = img.extract_tables()

File /opt/conda/lib/python3.8/site-packages/img2table/document/image.py:46, in Image.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
     36 def extract_tables(self, ocr: "OCRInstance" = None, implicit_rows: bool = False, borderless_tables: bool = False,
     37                    min_confidence: int = 50) -> List[ExtractedTable]:
     38     """
     39     Extract tables from document
     40     :param ocr: OCRInstance object used to extract table content
   (...)
     44     :return: list of extracted tables
     45     """
---> 46     extracted_tables = super(Image, self).extract_tables(ocr=ocr,
     47                                                          implicit_rows=implicit_rows,
     48                                                          borderless_tables=borderless_tables,
     49                                                          min_confidence=min_confidence)
     50     return extracted_tables.get(0)

File /opt/conda/lib/python3.8/site-packages/img2table/document/base/__init__.py:125, in Document.extract_tables(self, ocr, implicit_rows, borderless_tables, min_confidence)
    116 """
    117 Extract tables from document
    118 :param ocr: OCRInstance object used to extract table content
   (...)
    122 :return: dictionary with page number as key and list of extracted tables as values
    123 """
    124 # Extract tables from document
--> 125 from img2table.tables.image import TableImage
    126 tables = {idx: TableImage(img=img,
    127                           min_confidence=min_confidence).extract_tables(implicit_rows=implicit_rows,
    128                                                                         borderless_tables=borderless_tables)
    129           for idx, img in enumerate(self.images)}
    131 # Update table content with OCR if possible

File /opt/conda/lib/python3.8/site-packages/img2table/tables/image.py:19, in <module>
     17 from img2table.tables.processing.bordered_tables.tables import get_tables
     18 from img2table.tables.processing.bordered_tables.tables.implicit_rows import handle_implicit_rows
---> 19 from img2table.tables.processing.borderless_tables import identify_borderless_tables
     22 @dataclass
     23 class TableImage:
     24     img: np.ndarray

File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/__init__.py:10, in <module>
      8 from img2table.tables.objects.table import Table
      9 from img2table.tables.processing.borderless_tables.columns import identify_columns
---> 10 from img2table.tables.processing.borderless_tables.layout import segment_image
     11 from img2table.tables.processing.borderless_tables.rows import identify_delimiter_group_rows
     12 from img2table.tables.processing.borderless_tables.table import identify_table

File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/__init__.py:10, in <module>
      8 from img2table.tables.processing.borderless_tables.layout.column_segments import segment_image_columns
      9 from img2table.tables.processing.borderless_tables.layout.image_elements import get_image_elements
---> 10 from img2table.tables.processing.borderless_tables.layout.rlsa import identify_text_mask
     11 from img2table.tables.processing.borderless_tables.layout.table_segments import get_table_segments
     12 from img2table.tables.processing.borderless_tables.model import TableSegment, ImageSegment

File /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py:156, in <module>
    150                     mask_obstacles[id_row][col + idx] = True
    152     return mask_obstacles
    155 @njit("boolean[:, :](uint8[:, :],int32[:, :], float64)", fastmath=True, cache=True, parallel=False)
--> 156 def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
    157     """
    158     Identify image text mask
    159     :param thresh: thresholded image
   (...)
    162     :return: text mask array
    163     """
    164     text_mask = np.full(shape=thresh.shape, fill_value=False)

File /opt/conda/lib/python3.8/site-packages/numba/core/decorators.py:218, in _jit.<locals>.wrapper(func)
    216     with typeinfer.register_dispatcher(disp):
    217         for sig in sigs:
--> 218             disp.compile(sig)
    219         disp.disable_compile()
    220 return disp

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs)
     29 @functools.wraps(func)
     30 def _acquire_compile_lock(*args, **kwargs):
     31     with self:
---> 32         return func(*args, **kwargs)

File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:819, in Dispatcher.compile(self, sig)
    817 self._cache_misses[sig] += 1
    818 try:
--> 819     cres = self._compiler.compile(args, return_type)
    820 except errors.ForceLiteralArg as e:
    821     def folded(args, kws):

File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:82, in _FunctionCompiler.compile(self, args, return_type)
     80     return retval
     81 else:
---> 82     raise retval

File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:92, in _FunctionCompiler._compile_cached(self, args, return_type)
     89     pass
     91 try:
---> 92     retval = self._compile_core(args, return_type)
     93 except errors.TypingError as e:
     94     self._failed_cache[key] = e

File /opt/conda/lib/python3.8/site-packages/numba/core/dispatcher.py:105, in _FunctionCompiler._compile_core(self, args, return_type)
    102 flags = self._customize_flags(flags)
    104 impl = self._get_implementation(args, {})
--> 105 cres = compiler.compile_extra(self.targetdescr.typing_context,
    106                               self.targetdescr.target_context,
    107                               impl,
    108                               args=args, return_type=return_type,
    109                               flags=flags, locals=self.locals,
    110                               pipeline_class=self.pipeline_class)
    111 # Check typing error if object mode is used
    112 if cres.typing_error is not None and not flags.enable_pyobject:

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:627, in compile_extra(typingctx, targetctx, func, args, return_type, flags, locals, library, pipeline_class)
    603 """Compiler entry point
    604 
    605 Parameter
   (...)
    623     compiler pipeline
    624 """
    625 pipeline = pipeline_class(typingctx, targetctx, library,
    626                           args, return_type, flags, locals)
--> 627 return pipeline.compile_extra(func)

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:363, in CompilerBase.compile_extra(self, func)
    361 self.state.lifted = ()
    362 self.state.lifted_from = None
--> 363 return self._compile_bytecode()

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:425, in CompilerBase._compile_bytecode(self)
    421 """
    422 Populate and run pipeline for bytecode input
    423 """
    424 assert self.state.func_ir is None
--> 425 return self._compile_core()

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:405, in CompilerBase._compile_core(self)
    403         self.state.status.fail_reason = e
    404         if is_final_pipeline:
--> 405             raise e
    406 else:
    407     raise CompilerError("All available pipelines exhausted")

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler.py:396, in CompilerBase._compile_core(self)
    394 res = None
    395 try:
--> 396     pm.run(self.state)
    397     if self.state.cr is not None:
    398         break

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:341, in PassManager.run(self, state)
    338 msg = "Failed in %s mode pipeline (step: %s)" % \
    339     (self.pipeline_name, pass_desc)
    340 patched_exception = self._patch_error(msg, e)
--> 341 raise patched_exception

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:332, in PassManager.run(self, state)
    330 pass_inst = _pass_registry.get(pss).pass_inst
    331 if isinstance(pass_inst, CompilerPass):
--> 332     self._runPass(idx, pass_inst, state)
    333 else:
    334     raise BaseException("Legacy pass in use")

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_lock.py:32, in _CompilerLock.__call__.<locals>._acquire_compile_lock(*args, **kwargs)
     29 @functools.wraps(func)
     30 def _acquire_compile_lock(*args, **kwargs):
     31     with self:
---> 32         return func(*args, **kwargs)

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:291, in PassManager._runPass(self, index, pss, internal_state)
    289     mutated |= check(pss.run_initialization, internal_state)
    290 with SimpleTimer() as pass_time:
--> 291     mutated |= check(pss.run_pass, internal_state)
    292 with SimpleTimer() as finalize_time:
    293     mutated |= check(pss.run_finalizer, internal_state)

File /opt/conda/lib/python3.8/site-packages/numba/core/compiler_machinery.py:264, in PassManager._runPass.<locals>.check(func, compiler_state)
    263 def check(func, compiler_state):
--> 264     mangled = func(compiler_state)
    265     if mangled not in (True, False):
    266         msg = ("CompilerPass implementations should return True/False. "
    267                "CompilerPass with name '%s' did not.")

File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:92, in BaseTypeInference.run_pass(self, state)
     86 """
     87 Type inference and legalization
     88 """
     89 with fallback_context(state, 'Function "%s" failed type inference'
     90                       % (state.func_id.func_name,)):
     91     # Type inference
---> 92     typemap, return_type, calltypes = type_inference_stage(
     93         state.typingctx,
     94         state.func_ir,
     95         state.args,
     96         state.return_type,
     97         state.locals,
     98         raise_errors=self._raise_errors)
     99     state.typemap = typemap
    100     if self._raise_errors:

File /opt/conda/lib/python3.8/site-packages/numba/core/typed_passes.py:70, in type_inference_stage(typingctx, interp, args, return_type, locals, raise_errors)
     67         infer.seed_type(k, v)
     69     infer.build_constraint()
---> 70     infer.propagate(raise_errors=raise_errors)
     71     typemap, restype, calltypes = infer.unify(raise_errors=raise_errors)
     73 # Output all Numba warnings

File /opt/conda/lib/python3.8/site-packages/numba/core/typeinfer.py:1071, in TypeInferer.propagate(self, raise_errors)
   1068 force_lit_args = [e for e in errors
   1069                   if isinstance(e, ForceLiteralArg)]
   1070 if not force_lit_args:
-> 1071     raise errors[0]
   1072 else:
   1073     raise reduce(operator.or_, force_lit_args)

TypingError: Failed in nopython mode pipeline (step: nopython frontend)
Use of unsupported NumPy function 'numpy.average' or unsupported use of the function.

File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167:
def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
    <source elided>
    # Get average height
    Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA])
    ^

During: typing of get attribute at /opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py (167)

File "../../../../../opt/conda/lib/python3.8/site-packages/img2table/tables/processing/borderless_tables/layout/rlsa.py", line 167:
def get_text_mask(thresh: np.ndarray, cc_stats_rlsa: np.ndarray, char_length: float) -> np.ndarray:
    <source elided>
    # Get average height
    Hm = np.average(cc_stats_rlsa[1:, cv2.CC_STAT_HEIGHT], weights=cc_stats_rlsa[1:, cv2.CC_STAT_AREA])
    ^
Editor is loading...
Leave a Comment