# importing pandas as pd
import pandas as pd
# importing re for regular expressions
import re
# importing csv for writing to csv
import csv
# importing zip_longest for iterating over rows
from itertools import zip_longest
# Getting All Column and Rows Data
df = pd.read_csv("datafolder/data.csv")
# printing All Column and Rows Data
print(df)
# Getting the "Line" Column
df2 = df["Line"]
# printing the "line" Column data
print(df2)
# Getting the "Index" Column
idx = df["Index"]
# printing the "Index" Column
# print(df)
# Creating the Df Index from the "Index" Column
df.index = idx
players = [
"KING",
"FERDINAND",
"BEROWNE",
"LONGAVILLE",
"DUMAIN",
"BOYET",
"MARCADE",
"DON ADRIANO DE ARMADO",
"SIR NATHANIEL",
"HOLOFERNES",
"DULL",
"COSTARD",
"MOTH",
"A FORESTER",
"THE PRINCESS OF FRANCE",
"ROSALINE",
"MARIA",
"KATHARINE",
"JAQUENETTA",
"LORDS",
"ATTENDANTS",
]
# extract groups having the strings in the players list with ending period
result = df["Line"].str.extract(pat=f"^({'{0}|'.join(players)})".format("\."))
result.ffill(inplace=True)
# print the result
print(result)
data = [df["Index"], result[0], df["Line"]]
columns_data = zip_longest(*data)
with open("file.csv", "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(["Index", "Player", "Line"])
writer.writerows(columns_data)