Untitled
unknown
plain_text
3 years ago
1.5 kB
13
Indexable
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import ast
inter_train = pd.read_csv('D:\zuyd\python-zuyd\data_casus\inter_train.csv')
visitors = pd.read_csv('D:\zuyd\python-zuyd\data_casus\\visitors.csv')
inter_train = inter_train.drop(columns='date')
for i in range(58):
inter_train[f'interest{i}'] = 0
processed_u = {}
interests_df = pd.DataFrame()
for index, row_inter in inter_train.iterrows():
if row_inter['u'] in processed_u:
#if row in the dictonary dont search for value
print("dit scheelt werk")
#voeg lijst toe aan dataframe
interests_df.loc[len(interests_df)] = processed_u[row_inter['u']]
else:
# look through visitor file
# Loop through all rows of data frame
for index, row_visit in visitors.iterrows():
if row_inter['u'] == row_visit['u']:
row_visit['interests']
interest_list = row_visit['interests'][1:-1].split(", ")
#print(interest_list)
#voeg toe aan dictinary
processed_u[row_inter['u']] = interest_list
print(processed_u)
#voeg lijst toe aan dataframe
interests_df.loc[len(interests_df)] = interest_list
Editor is loading...