Untitled

mail@pastecode.io avatar
unknown
python
a year ago
873 B
213
Indexable
Never
import pandas as pd
import polars as pl
import random
import datetime

data_dict = [{
    'date': datetime.datetime(2021, 1, 1) + datetime.timedelta(days=i), 
    'daily_profit': 2*(random.random()-0.5), 
    'mileage': random.random()*500
    } for i in range(0, 3801)]
    
# pandas
df = pd.DataFrame(data_dict)
df['max_monthly_mileage'] = df.groupby(pd.Grouper(key='date',freq='M'))['mileage'].transform(max)
df['daily_profit_by_mileage'] = df['daily_profit'] / df['max_monthly_mileage']

# polars
polars_df = pl.from_records(data_dict)
polars_df = polars_df \
  .with_columns([pl.col('date').dt.strftime('%Y-%m').alias('month')]) \
  .with_columns([pl.col('mileage').max().over('month').alias('max_monthly_mileage')]) \
  .with_columns([(pl.col('daily_profit') / pl.col('max_monthly_mileage')).alias('daily_profit_by_mileage')]) \
  .drop('month')