Untitled
unknown
python
a year ago
3.3 kB
12
Indexable
from sklearn.metrics import roc_auc_score, log_loss, mean_squared_error, ndcg_score
from tabulate import tabulate
from pandarallel import pandarallel
pandarallel.initialize(nb_workers=8, progress_bar=False)
def calc_metrics(model, df, target_column, features_columns, group_column):
def custom_roc_auc_score(y_true, y_pred):
if len(set(y_true)) == 1:
return None
return roc_auc_score(y_true, y_pred)
def custom_ndcg_score(y_true, y_pred, k):
if len(set(y_true)) == 1:
return None
return ndcg_score([y_true], [y_pred], k=k)
metrics = {}
prediction_column = "prediction"
# df = df.copy()
# df[prediction_column] = model.predict(df[features_columns])
metrics["roc_auc"] = custom_roc_auc_score(df[target_column], df[prediction_column])
metrics["mse"] = mean_squared_error(df[target_column], df[prediction_column])
grouped_df = df.groupby(group_column).agg(
{
prediction_column: lambda x: list(x),
target_column: lambda x: list(x),
}
)
metrics["roc_auc__grouped"] = grouped_df.parallel_apply(
lambda rows: custom_roc_auc_score(rows[target_column], rows[prediction_column]),
axis=1
).mean()
metrics["mse__grouped"] = grouped_df.parallel_apply(
lambda rows: mean_squared_error(rows[target_column], rows[prediction_column]),
axis=1
).mean()
for k in [1, 3, 5]:
metrics[f"dcg@{k}__grouped"] = grouped_df.parallel_apply(
lambda rows: custom_ndcg_score(rows[target_column], rows[prediction_column], k),
axis=1
).mean()
return pd.DataFrame.from_dict(metrics, orient="index", columns=["metrics"])
def make_pivot(features_res):
final = pd.DataFrame()
for feat, dfs in features_res.items():
dfs = dfs[0].rename(columns={"metrics": "val"}).join(dfs[1].rename(columns={"metrics": "test"}))
dfs['feat'] = feat
final = pd.concat([final, dfs])
pivoted = final.copy()
baseline = final[final['feat'] == 'baseline']
for col in ['val', 'test']:
for feat in pivoted['feat'].unique():
slice_baseline_ = baseline[col]
# try:
slice_ = pivoted.loc[pivoted['feat'] == feat, col]
slice_ = slice_.apply(lambda x: round(x, 4)).astype(str) + "(" + ((slice_ / slice_baseline_ - 1).apply(lambda x: round(x, 4)) * 100).astype(str).str[:5] + "%)"
pivoted.loc[pivoted['feat'] == feat, col] = slice_
return pivoted
feature_names = get_feature_names(ES_URL, FEATURESET_1)
target_column = "is_listen"
group_column = "top_expansion_clean"
val_metrics = calc_metrics(catboost, val, target_column, feature_names, group_column)
test_metrics = calc_metrics(catboost, test, target_column, feature_names, group_column)
features_res = {}
features_res['baseline'] = [val_metrics, test_metrics]
val_metrics = calc_metrics(catboost, val, target_column, feature_names, group_column)
test_metrics = calc_metrics(catboost, test, target_column, feature_names, group_column)
features_res['something_new'] = [val_metrics, test_metrics]
pivoted = make_pivot(features_res)
show = pd.pivot(pivoted.reset_index(), columns="index", index="feat").T
print(show.to_markdown())Editor is loading...
Leave a Comment