def train_test_ods(test_size=0.1):
groups_volume=data.groupby(["origincode","destinationcode"])["passengers"].sum()
result_df = groups_volume.reset_index().sort_values('passengers', ascending=False)
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(result_df, test_size=test_size, random_state=42)
train_org=train_df.origincode.values
train_dst=train_df.destinationcode.values
test_org=test_df.origincode.values
test_dst=test_df.destinationcode.values
import pandas as pd
train_data=[]
for org,dst in zip(train_org,train_dst):
train_data.append(data[(data.origincode==org) & (data.destinationcode==dst)])
train_data=pd.concat(train_data)
test_data=[]
for org,dst in zip(test_org,test_dst):
test_data.append(data[(data.origincode==org) & (data.destinationcode==dst)])
test_data=pd.concat(test_data)
return train_data,test_data