Untitled

 avatar
unknown
plain_text
5 months ago
418 B
3
Indexable
from src.ingestion.loaders.loaderBase import LoaderBase
import html2text
import re

class LoaderHTML(LoaderBase):

    def __init__(self,filepath:str):
        self.filepath=filepath
    

    def extract_metadata(self):
        raise NotImplementedError
    
    def extract_text(self):
        html = html2text.HTML2Text()
        text = html.handle(self.filepath)

        return text
            
Editor is loading...
Leave a Comment