Untitled
Скрипт для лабораторной работы №3unknown
python
a year ago
1.1 kB
8
Indexable
import csv
import re
import pandas as pd
import numpy as np
money_regex = re.compile(r"^\$(\d+(\.\d+)?)\sbillion.*$")
def parse_money(money_str: str) -> float:
match = money_regex.match(str(money_str))
if match:
return float(match.group(1))
else:
return None
def validate_net_worth(value):
try:
return float(value)
except ValueError:
return np.nan
data = []
with open("billion.csv", newline="", encoding="unicode_escape") as csvfile:
spamreader = csv.reader(csvfile, delimiter=",", quotechar="|")
for row in spamreader:
if not row[0].isdecimal():
continue
data.append([int(row[0]), row[2], parse_money(row[3])])
data.sort(key=lambda x: [x[0], x[1]])
df = pd.DataFrame(data, columns=["Year", "Name", "Net worth"])
df["Net worth"] = df["Net worth"].apply(validate_net_worth)
pivot_df = df.pivot_table(
index="Name", columns="Year", values="Net worth", aggfunc="first"
)
pivot_df = pivot_df.reindex(sorted(pivot_df.columns), axis=1)
pivot_df.reset_index(inplace=True)
pivot_df.to_excel("processed_data.xlsx", index=False)
Editor is loading...
Leave a Comment