Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.5 kB
7
Indexable
Never
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import ast

inter_train = pd.read_csv('D:\zuyd\python-zuyd\data_casus\inter_train.csv')
visitors = pd.read_csv('D:\zuyd\python-zuyd\data_casus\\visitors.csv')
inter_train = inter_train.drop(columns='date')

for i in range(58):
    inter_train[f'interest{i}'] = 0

processed_u = {}

interests_df = pd.DataFrame()




for index, row_inter in inter_train.iterrows():
    if row_inter['u'] in processed_u:
        #if row in the dictonary dont search for value
        print("dit scheelt werk")
        #voeg lijst toe aan dataframe
        interests_df.loc[len(interests_df)] = processed_u[row_inter['u']]
    else:
        # look through visitor file
        # Loop through all rows of data frame
        for index, row_visit in visitors.iterrows():
            if row_inter['u'] == row_visit['u']:
                row_visit['interests']
                interest_list = row_visit['interests'][1:-1].split(", ")
                #print(interest_list)

                #voeg toe aan dictinary
                processed_u[row_inter['u']] = interest_list
                print(processed_u)

                #voeg lijst toe aan dataframe
                interests_df.loc[len(interests_df)] = interest_list