Untitled
unknown
plain_text
2 years ago
1.3 kB
5
Indexable
def process(self, element): file_path = element print(file_path) # read the text file and split each line by comma with open(file_path, "r") as f: lines = f.readlines() # create an empty DataFrame with the specified columns columns=['timestamp','hostname','process_name','process_id','log_text'] df = pd.DataFrame(columns=columns) # use list comprehension to extract the values for each column data = [(fields[0].split(": ")[1], fields[1].split(": ")[1].split(".")[0], re.search(r'Channel:\s*(.*)', line).group(1), fields[3].split(": ")[1], fields[4].split(": ", 1)[1].rsplit(",")[0].strip()) if ",1" in fields[4] else (fields[0].split(": ")[1], fields[1].split(": ")[1].split(".")[0], re.search(r'Channel:\s*(.*)', line).group(1), fields[3].split(": ")[1], fields[4].split(": ", 1)[1].split(",")[0].strip()) for line in lines for fields in [line.strip().split(",")] ] # convert the list of tuples to a DataFrame df = pd.DataFrame(data, columns=columns) return [df]
Editor is loading...