Untitled
unknown
plain_text
2 years ago
4.7 kB
6
Indexable
visitorsADaily = visitors[visitors['group'] == 'A'][['date', 'visitors']] visitorsADaily.columns = ['date', 'visitorsPerDateA'] visitorsACummulative = visitorsADaily.apply( lambda x: visitorsADaily[visitorsADaily['date'] <= x['date']].agg( {'date': 'max', 'visitorsPerDateA': 'sum'} ), axis=1, ) visitorsACummulative.columns = ['date', 'visitorsCummulativeA'] visitorsBDaily = visitors[visitors['group'] == 'B'][['date', 'visitors']] visitorsBDaily.columns = ['date', 'visitorsPerDateB'] visitorsBCummulative = visitorsBDaily.apply( lambda x: visitorsBDaily[visitorsBDaily['date'] <= x['date']].agg( {'date': 'max', 'visitorsPerDateB': 'sum'} ), axis=1, ) visitorsBCummulative.columns = ['date', 'visitorsCummulativeB'] ordersADaily = ( orders[orders['group'] == 'A'][['date', 'transactionId', 'visitorId', 'revenue']] .groupby('date', as_index=False) .agg({'transactionId': pd.Series.nunique, 'revenue': 'sum'}) ) ordersADaily.columns = ['date', 'ordersPerDateA', 'revenuePerDateA'] ordersACummulative = ordersADaily.apply( lambda x: ordersADaily[ordersADaily['date'] <= x['date']].agg( {'date': 'max', 'ordersPerDateA': 'sum', 'revenuePerDateA': 'sum'} ), axis=1, ).sort_values(by=['date']) ordersACummulative.columns = [ 'date', 'ordersCummulativeA', 'revenueCummulativeA', ] ordersBDaily = ( orders[orders['group'] == 'B'][['date', 'transactionId', 'visitorId', 'revenue']] .groupby('date', as_index=False) .agg({'transactionId': pd.Series.nunique, 'revenue': 'sum'}) ) ordersBDaily.columns = ['date', 'ordersPerDateB', 'revenuePerDateB'] ordersBCummulative = ordersBDaily.apply( lambda x: ordersBDaily[ordersBDaily['date'] <= x['date']].agg( {'date': 'max', 'ordersPerDateB': 'sum', 'revenuePerDateB': 'sum'} ), axis=1, ).sort_values(by=['date']) ordersBCummulative.columns = [ 'date', 'ordersCummulativeB', 'revenueCummulativeB', ] data = ( ordersADaily.merge( ordersBDaily, left_on='date', right_on='date', how='left' ) .merge(ordersACummulative, left_on='date', right_on='date', how='left') .merge(ordersBCummulative, left_on='date', right_on='date', how='left') .merge(visitorsADaily, left_on='date', right_on='date', how='left') .merge(visitorsBDaily, left_on='date', right_on='date', how='left') .merge(visitorsACummulative, left_on='date', right_on='date', how='left') .merge(visitorsBCummulative, left_on='date', right_on='date', how='left') ) print(data.head(5)) ordersByUsersA = ( orders[orders['group'] == 'A'] .groupby('visitorId', as_index=False) .agg({'transactionId': pd.Series.nunique}) ) ordersByUsersA.columns = ['visitorId', 'orders'] ordersByUsersB = ( orders[orders['group'] == 'B'] .groupby('visitorId', as_index=False) .agg({'transactionId': pd.Series.nunique}) ) ordersByUsersB.columns = ['visitorId', 'orders'] pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders') [ordersByUsersA['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders')] sampleA = pd.concat([ordersByUsersA['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders')],axis=0) sampleB = pd.concat([ordersByUsersB['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateB'].sum() - len(ordersByUsersB['orders'])), name='orders')],axis=0) ordersByUsersA = ( orders[orders['group'] == 'A'] .groupby('visitorId', as_index=False) .agg({'transactionId': pd.Series.nunique}) ) ordersByUsersA.columns = ['visitorId', 'orders'] ordersByUsersB = ( orders[orders['group'] == 'B'] .groupby('visitorId', as_index=False) .agg({'transactionId': pd.Series.nunique}) ) ordersByUsersB.columns = ['visitorId', 'orders'] sampleA = pd.concat( [ ordersByUsersA['orders'], pd.Series( 0, index=np.arange( data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders']) ), name='orders', ), ], axis=0, ) sampleB = pd.concat( [ ordersByUsersB['orders'], pd.Series( 0, index=np.arange( data['visitorsPerDateB'].sum() - len(ordersByUsersB['orders']) ), name='orders', ), ], axis=0, ) print("P-value:{0:.3f}".format(stats.mannwhitneyu(sampleA, sampleB)[1])) print("Относительный прирост конверсии группы B по отношению к группе A:{0:.3f}".format(sampleB.mean() / sampleA.mean() - 1))
Editor is loading...