Untitled
unknown
python
2 years ago
2.5 kB
4
Indexable
import pandas as pd from sklearn.linear_model import LinearRegression from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error, r2_score from sklearn.model_selection import train_test_split import ast inter_train = pd.read_csv('inter_train.csv') visitors = pd.read_csv('visitors.csv') stands = pd.read_csv('stands.csv') inter_train = inter_train.drop(columns='date') interests_df = pd.DataFrame() for i in range(1,59): interests_df[f'interest{i}'] = 0 for i in range(59,117): interests_df[f'topics{i}'] = 0 processed_u = {} processed_i = {} for index, row_inter in inter_train.iterrows(): interest_and_topics = [] if row_inter['u'] in processed_u: #if row in the dictonary dont search for value #voeg lijst toe aan dataframe interest_and_topics = processed_u[row_inter['u']] else: # look through visitor file # Loop through all rows of data frame for index, row_visit in visitors.iterrows(): if row_inter['u'] == row_visit['u']: interest_list = row_visit['interests'][1:-1].split(", ") norm_interest_list_int = [int(i) / int(row_visit['n_stands']) for i in interest_list] #voeg toe aan dictionary processed_u[row_inter['u']] = norm_interest_list_int #voeg lijst toe aan dataframe interest_and_topics = norm_interest_list_int break if row_inter['i'] in processed_i: #voeg lijst toe aan dataframe interest_and_topics = interest_and_topics + processed_i[row_inter['i']] interests_df.loc[len(interests_df)] = interest_and_topics else: # look through visitor file # Loop through all rows of data frame for index, row_stand in stands.iterrows(): if row_inter['i'] == row_stand['i']: interest_list = row_stand['topics'][1:-1].split(", ") interest_list_int = [int(i) for i in interest_list] interest_and_topics = interest_and_topics + interest_list_int #voeg toe aan dictinary processed_i[row_inter['i']] = interest_list_int #voeg lijst toe aan dataframe print(interest_and_topics) interests_df.loc[len(interests_df)] = interest_and_topics break
Editor is loading...