Untitled
unknown
plain_text
2 years ago
1.5 kB
10
Indexable
import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split import ast inter_train = pd.read_csv('D:\zuyd\python-zuyd\data_casus\inter_train.csv') visitors = pd.read_csv('D:\zuyd\python-zuyd\data_casus\\visitors.csv') inter_train = inter_train.drop(columns='date') for i in range(58): inter_train[f'interest{i}'] = 0 processed_u = {} interests_df = pd.DataFrame() for index, row_inter in inter_train.iterrows(): if row_inter['u'] in processed_u: #if row in the dictonary dont search for value print("dit scheelt werk") #voeg lijst toe aan dataframe interests_df.loc[len(interests_df)] = processed_u[row_inter['u']] else: # look through visitor file # Loop through all rows of data frame for index, row_visit in visitors.iterrows(): if row_inter['u'] == row_visit['u']: row_visit['interests'] interest_list = row_visit['interests'][1:-1].split(", ") #print(interest_list) #voeg toe aan dictinary processed_u[row_inter['u']] = interest_list print(processed_u) #voeg lijst toe aan dataframe interests_df.loc[len(interests_df)] = interest_list
Editor is loading...