Untitled

 avatar
unknown
plain_text
2 years ago
1.3 kB
5
Indexable
def process(self, element):
        file_path = element
        print(file_path)
        # read the text file and split each line by comma
        with open(file_path, "r") as f:
            lines = f.readlines()

        # create an empty DataFrame with the specified columns
        columns=['timestamp','hostname','process_name','process_id','log_text']
        df = pd.DataFrame(columns=columns)

        # use list comprehension to extract the values for each column
        data = [(fields[0].split(": ")[1],
                 fields[1].split(": ")[1].split(".")[0],
                 re.search(r'Channel:\s*(.*)', line).group(1),
                 fields[3].split(": ")[1],
                 fields[4].split(": ", 1)[1].rsplit(",")[0].strip())
                if ",1" in fields[4] else
                (fields[0].split(": ")[1],
                 fields[1].split(": ")[1].split(".")[0],
                 re.search(r'Channel:\s*(.*)', line).group(1),
                 fields[3].split(": ")[1],
                 fields[4].split(": ", 1)[1].split(",")[0].strip())
                for line in lines
                for fields in [line.strip().split(",")]
               ]

        # convert the list of tuples to a DataFrame
        df = pd.DataFrame(data, columns=columns)

        return [df]
Editor is loading...