Untitled

 avatar
unknown
python
2 years ago
2.5 kB
4
Indexable
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import ast

inter_train = pd.read_csv('inter_train.csv')
visitors = pd.read_csv('visitors.csv')
stands = pd.read_csv('stands.csv')
inter_train = inter_train.drop(columns='date')

interests_df = pd.DataFrame()

for i in range(1,59):
    interests_df[f'interest{i}'] = 0

for i in range(59,117):
    interests_df[f'topics{i}'] = 0

processed_u = {}
processed_i = {}


for index, row_inter in inter_train.iterrows():
    interest_and_topics = []
    if row_inter['u'] in processed_u:
        #if row in the dictonary dont search for value
        #voeg lijst toe aan dataframe
        interest_and_topics = processed_u[row_inter['u']]
    else:
        # look through visitor file
        # Loop through all rows of data frame
        for index, row_visit in visitors.iterrows():
            if row_inter['u'] == row_visit['u']:
                interest_list = row_visit['interests'][1:-1].split(", ")
                norm_interest_list_int = [int(i) / int(row_visit['n_stands']) for i in interest_list]

                #voeg toe aan dictionary
                processed_u[row_inter['u']] = norm_interest_list_int

                #voeg lijst toe aan dataframe
                interest_and_topics = norm_interest_list_int
                break

    if row_inter['i'] in processed_i:
        #voeg lijst toe aan dataframe
        interest_and_topics = interest_and_topics + processed_i[row_inter['i']]
        interests_df.loc[len(interests_df)] = interest_and_topics
    else:
        # look through visitor file
        # Loop through all rows of data frame
        for index, row_stand in stands.iterrows():
            if row_inter['i'] == row_stand['i']:
                interest_list = row_stand['topics'][1:-1].split(", ")
                interest_list_int = [int(i) for i in interest_list]
                interest_and_topics = interest_and_topics + interest_list_int
                

                #voeg toe aan dictinary
                processed_i[row_inter['i']] = interest_list_int

                #voeg lijst toe aan dataframe
                print(interest_and_topics)
                interests_df.loc[len(interests_df)] = interest_and_topics
                break
Editor is loading...