players column

mail@pastecode.io avatar
unknown
python
a year ago
1.5 kB
10
Indexable
Never
# importing pandas as pd
import pandas as pd

# importing re for regular expressions
import re

# importing csv for writing to csv
import csv

# importing zip_longest for iterating over rows
from itertools import zip_longest


# Getting All Column and Rows Data
df = pd.read_csv("datafolder/data.csv")
# printing All Column and Rows Data
print(df)

# Getting the "Line" Column
df2 = df["Line"]
# printing the "line" Column data
print(df2)

# Getting the "Index" Column
idx = df["Index"]

# printing the "Index" Column
# print(df)

# Creating the Df Index from the "Index" Column
df.index = idx


players = [
    "KING",
    "FERDINAND",
    "BEROWNE",
    "LONGAVILLE",
    "DUMAIN",
    "BOYET",
    "MARCADE",
    "DON ADRIANO DE ARMADO",
    "SIR NATHANIEL",
    "HOLOFERNES",
    "DULL",
    "COSTARD",
    "MOTH",
    "A FORESTER",
    "THE PRINCESS OF FRANCE",
    "ROSALINE",
    "MARIA",
    "KATHARINE",
    "JAQUENETTA",
    "LORDS",
    "ATTENDANTS",
]

# extract groups having the strings in the players list with ending period
result = df["Line"].str.extract(pat=f"^({'{0}|'.join(players)})".format("\."))
# print the result
print(result)


data = [df["Index"], result[0], df["Line"]]
columns_data = zip_longest(*data)

with open("file.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Index", "Player", "Line"])
    writer.writerows(columns_data)