Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
1.8 kB
1
Indexable
Never
# Prepare R ----
setwd('C:/Users/vikto/Documents') # setting working directory
options(scipen = 999)             # disable scientific notation

# Open data ----
data <- read.csv("data.csv")      

# Prepare data ----
rownames(data) <- data[,1] # first row contains the row names
data[,1] <- NULL           # remove first row names
data <- as.matrix(data)    # dataset to matrix
data                       # print data; data looks fine

## Calculate correlations ----
correlations_customer  <- cor(x   = t(data),                 # correlations between customer; t() refers to the matrix transpose, columns are now customer
                           use = "pairwise.complete.obs") # only pairwise available data are used

# Calculate customer mean ratings
means_customer  <- rowMeans(x = data, na.rm = TRUE) # customer mean ratings; "na.rm = TRUE" drops missings

# TRUE/FALSE matrix whether customer has rated name
rated_customer.name  <- !is.na(x = data)

## Prepare required input ----
mean       <- means_customer["Customer.2"]          # Customer.2's mean rating
similarity <- correlations_customer[, "Customer.2"] # correlations for Customer.2
rating     <- data[, "Thomas"]                 # ratings for name Thomas
rated      <- rated_customer.name[, "Thomas"]   # whether users rated name Thomas

## Numerator and denominator of Equation (1) on p. 396 in Winston (2014) ----
numerator   <- sum(rated * similarity * (rating - means_customer), na.rm = TRUE) # similarity-weighted mean difference
denominator <- sum(rated * abs(similarity))                                   # sum of absolute similarity

## User-based prediction for Theresa-Amour ----
round(mean + numerator/denominator, 2)  # predicted rating, rounded to two decimals