Untitled

Скрипт для лабораторной работы №3
mail@pastecode.io avatar
unknown
python
5 months ago
1.1 kB
2
Indexable
import csv
import re
import pandas as pd
import numpy as np


money_regex = re.compile(r"^\$(\d+(\.\d+)?)\sbillion.*$")


def parse_money(money_str: str) -> float:
    match = money_regex.match(str(money_str))
    if match:
        return float(match.group(1))
    else:
        return None


def validate_net_worth(value):
    try:
        return float(value)
    except ValueError:
        return np.nan


data = []
with open("billion.csv", newline="", encoding="unicode_escape") as csvfile:
    spamreader = csv.reader(csvfile, delimiter=",", quotechar="|")
    for row in spamreader:
        if not row[0].isdecimal():
            continue
        data.append([int(row[0]), row[2], parse_money(row[3])])
data.sort(key=lambda x: [x[0], x[1]])

df = pd.DataFrame(data, columns=["Year", "Name", "Net worth"])
df["Net worth"] = df["Net worth"].apply(validate_net_worth)

pivot_df = df.pivot_table(
    index="Name", columns="Year", values="Net worth", aggfunc="first"
)
pivot_df = pivot_df.reindex(sorted(pivot_df.columns), axis=1)
pivot_df.reset_index(inplace=True)
pivot_df.to_excel("processed_data.xlsx", index=False)
Leave a Comment