Untitled
unknown
plain_text
3 years ago
4.7 kB
14
Indexable
visitorsADaily = visitors[visitors['group'] == 'A'][['date', 'visitors']]
visitorsADaily.columns = ['date', 'visitorsPerDateA']
visitorsACummulative = visitorsADaily.apply(
lambda x: visitorsADaily[visitorsADaily['date'] <= x['date']].agg(
{'date': 'max', 'visitorsPerDateA': 'sum'}
),
axis=1,
)
visitorsACummulative.columns = ['date', 'visitorsCummulativeA']
visitorsBDaily = visitors[visitors['group'] == 'B'][['date', 'visitors']]
visitorsBDaily.columns = ['date', 'visitorsPerDateB']
visitorsBCummulative = visitorsBDaily.apply(
lambda x: visitorsBDaily[visitorsBDaily['date'] <= x['date']].agg(
{'date': 'max', 'visitorsPerDateB': 'sum'}
),
axis=1,
)
visitorsBCummulative.columns = ['date', 'visitorsCummulativeB']
ordersADaily = (
orders[orders['group'] == 'A'][['date', 'transactionId', 'visitorId', 'revenue']]
.groupby('date', as_index=False)
.agg({'transactionId': pd.Series.nunique, 'revenue': 'sum'})
)
ordersADaily.columns = ['date', 'ordersPerDateA', 'revenuePerDateA']
ordersACummulative = ordersADaily.apply(
lambda x: ordersADaily[ordersADaily['date'] <= x['date']].agg(
{'date': 'max', 'ordersPerDateA': 'sum', 'revenuePerDateA': 'sum'}
),
axis=1,
).sort_values(by=['date'])
ordersACummulative.columns = [
'date',
'ordersCummulativeA',
'revenueCummulativeA',
]
ordersBDaily = (
orders[orders['group'] == 'B'][['date', 'transactionId', 'visitorId', 'revenue']]
.groupby('date', as_index=False)
.agg({'transactionId': pd.Series.nunique, 'revenue': 'sum'})
)
ordersBDaily.columns = ['date', 'ordersPerDateB', 'revenuePerDateB']
ordersBCummulative = ordersBDaily.apply(
lambda x: ordersBDaily[ordersBDaily['date'] <= x['date']].agg(
{'date': 'max', 'ordersPerDateB': 'sum', 'revenuePerDateB': 'sum'}
),
axis=1,
).sort_values(by=['date'])
ordersBCummulative.columns = [
'date',
'ordersCummulativeB',
'revenueCummulativeB',
]
data = (
ordersADaily.merge(
ordersBDaily, left_on='date', right_on='date', how='left'
)
.merge(ordersACummulative, left_on='date', right_on='date', how='left')
.merge(ordersBCummulative, left_on='date', right_on='date', how='left')
.merge(visitorsADaily, left_on='date', right_on='date', how='left')
.merge(visitorsBDaily, left_on='date', right_on='date', how='left')
.merge(visitorsACummulative, left_on='date', right_on='date', how='left')
.merge(visitorsBCummulative, left_on='date', right_on='date', how='left')
)
print(data.head(5))
ordersByUsersA = (
orders[orders['group'] == 'A']
.groupby('visitorId', as_index=False)
.agg({'transactionId': pd.Series.nunique})
)
ordersByUsersA.columns = ['visitorId', 'orders']
ordersByUsersB = (
orders[orders['group'] == 'B']
.groupby('visitorId', as_index=False)
.agg({'transactionId': pd.Series.nunique})
)
ordersByUsersB.columns = ['visitorId', 'orders']
pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders')
[ordersByUsersA['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders')]
sampleA = pd.concat([ordersByUsersA['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])), name='orders')],axis=0)
sampleB = pd.concat([ordersByUsersB['orders'],pd.Series(0, index=np.arange(data['visitorsPerDateB'].sum() - len(ordersByUsersB['orders'])), name='orders')],axis=0)
ordersByUsersA = (
orders[orders['group'] == 'A']
.groupby('visitorId', as_index=False)
.agg({'transactionId': pd.Series.nunique})
)
ordersByUsersA.columns = ['visitorId', 'orders']
ordersByUsersB = (
orders[orders['group'] == 'B']
.groupby('visitorId', as_index=False)
.agg({'transactionId': pd.Series.nunique})
)
ordersByUsersB.columns = ['visitorId', 'orders']
sampleA = pd.concat(
[
ordersByUsersA['orders'],
pd.Series(
0,
index=np.arange(
data['visitorsPerDateA'].sum() - len(ordersByUsersA['orders'])
),
name='orders',
),
],
axis=0,
)
sampleB = pd.concat(
[
ordersByUsersB['orders'],
pd.Series(
0,
index=np.arange(
data['visitorsPerDateB'].sum() - len(ordersByUsersB['orders'])
),
name='orders',
),
],
axis=0,
)
print("P-value:{0:.3f}".format(stats.mannwhitneyu(sampleA, sampleB)[1]))
print("Относительный прирост конверсии группы B по отношению к группе A:{0:.3f}".format(sampleB.mean() / sampleA.mean() - 1))Editor is loading...