Untitled
unknown
plain_text
3 years ago
1.3 kB
6
Indexable
def process(self, element):
file_path = element
print(file_path)
# read the text file and split each line by comma
with open(file_path, "r") as f:
lines = f.readlines()
# create an empty DataFrame with the specified columns
columns=['timestamp','hostname','process_name','process_id','log_text']
df = pd.DataFrame(columns=columns)
# use list comprehension to extract the values for each column
data = [(fields[0].split(": ")[1],
fields[1].split(": ")[1].split(".")[0],
re.search(r'Channel:\s*(.*)', line).group(1),
fields[3].split(": ")[1],
fields[4].split(": ", 1)[1].rsplit(",")[0].strip())
if ",1" in fields[4] else
(fields[0].split(": ")[1],
fields[1].split(": ")[1].split(".")[0],
re.search(r'Channel:\s*(.*)', line).group(1),
fields[3].split(": ")[1],
fields[4].split(": ", 1)[1].split(",")[0].strip())
for line in lines
for fields in [line.strip().split(",")]
]
# convert the list of tuples to a DataFrame
df = pd.DataFrame(data, columns=columns)
return [df]
Editor is loading...