train_test_split
unknown
plain_text
6 months ago
949 B
2
Indexable
Never
def train_test_ods(test_size=0.1): groups_volume=data.groupby(["origincode","destinationcode"])["passengers"].sum() result_df = groups_volume.reset_index().sort_values('passengers', ascending=False) from sklearn.model_selection import train_test_split train_df, test_df = train_test_split(result_df, test_size=test_size, random_state=42) train_org=train_df.origincode.values train_dst=train_df.destinationcode.values test_org=test_df.origincode.values test_dst=test_df.destinationcode.values import pandas as pd train_data=[] for org,dst in zip(train_org,train_dst): train_data.append(data[(data.origincode==org) & (data.destinationcode==dst)]) train_data=pd.concat(train_data) test_data=[] for org,dst in zip(test_org,test_dst): test_data.append(data[(data.origincode==org) & (data.destinationcode==dst)]) test_data=pd.concat(test_data) return train_data,test_data