Untitled
unknown
plain_text
5 months ago
418 B
3
Indexable
from src.ingestion.loaders.loaderBase import LoaderBase import html2text import re class LoaderHTML(LoaderBase): def __init__(self,filepath:str): self.filepath=filepath def extract_metadata(self): raise NotImplementedError def extract_text(self): html = html2text.HTML2Text() text = html.handle(self.filepath) return text
Editor is loading...
Leave a Comment