Untitled
unknown
python
10 months ago
3.3 kB
7
Indexable
from sklearn.metrics import roc_auc_score, log_loss, mean_squared_error, ndcg_score from tabulate import tabulate from pandarallel import pandarallel pandarallel.initialize(nb_workers=8, progress_bar=False) def calc_metrics(model, df, target_column, features_columns, group_column): def custom_roc_auc_score(y_true, y_pred): if len(set(y_true)) == 1: return None return roc_auc_score(y_true, y_pred) def custom_ndcg_score(y_true, y_pred, k): if len(set(y_true)) == 1: return None return ndcg_score([y_true], [y_pred], k=k) metrics = {} prediction_column = "prediction" # df = df.copy() # df[prediction_column] = model.predict(df[features_columns]) metrics["roc_auc"] = custom_roc_auc_score(df[target_column], df[prediction_column]) metrics["mse"] = mean_squared_error(df[target_column], df[prediction_column]) grouped_df = df.groupby(group_column).agg( { prediction_column: lambda x: list(x), target_column: lambda x: list(x), } ) metrics["roc_auc__grouped"] = grouped_df.parallel_apply( lambda rows: custom_roc_auc_score(rows[target_column], rows[prediction_column]), axis=1 ).mean() metrics["mse__grouped"] = grouped_df.parallel_apply( lambda rows: mean_squared_error(rows[target_column], rows[prediction_column]), axis=1 ).mean() for k in [1, 3, 5]: metrics[f"dcg@{k}__grouped"] = grouped_df.parallel_apply( lambda rows: custom_ndcg_score(rows[target_column], rows[prediction_column], k), axis=1 ).mean() return pd.DataFrame.from_dict(metrics, orient="index", columns=["metrics"]) def make_pivot(features_res): final = pd.DataFrame() for feat, dfs in features_res.items(): dfs = dfs[0].rename(columns={"metrics": "val"}).join(dfs[1].rename(columns={"metrics": "test"})) dfs['feat'] = feat final = pd.concat([final, dfs]) pivoted = final.copy() baseline = final[final['feat'] == 'baseline'] for col in ['val', 'test']: for feat in pivoted['feat'].unique(): slice_baseline_ = baseline[col] # try: slice_ = pivoted.loc[pivoted['feat'] == feat, col] slice_ = slice_.apply(lambda x: round(x, 4)).astype(str) + "(" + ((slice_ / slice_baseline_ - 1).apply(lambda x: round(x, 4)) * 100).astype(str).str[:5] + "%)" pivoted.loc[pivoted['feat'] == feat, col] = slice_ return pivoted feature_names = get_feature_names(ES_URL, FEATURESET_1) target_column = "is_listen" group_column = "top_expansion_clean" val_metrics = calc_metrics(catboost, val, target_column, feature_names, group_column) test_metrics = calc_metrics(catboost, test, target_column, feature_names, group_column) features_res = {} features_res['baseline'] = [val_metrics, test_metrics] val_metrics = calc_metrics(catboost, val, target_column, feature_names, group_column) test_metrics = calc_metrics(catboost, test, target_column, feature_names, group_column) features_res['something_new'] = [val_metrics, test_metrics] pivoted = make_pivot(features_res) show = pd.pivot(pivoted.reset_index(), columns="index", index="feat").T print(show.to_markdown())
Editor is loading...
Leave a Comment