Untitled

mail@pastecode.io avatar
unknown
r
2 years ago
1.7 kB
22
Indexable
Never
kmedioids <- function(X, k, max_iter = 1000, tol = 0.001) {
  # In this section we randomly assign each row to a random cluster
  n_col <- ncol(X)
  n_row <- nrow(X)
  
  # Get random objects as initial mediods
  medoids <- get_medoids(X, n_row, k)

  
  # Get the total costs of each cluster
  ### For every point, calculate distance to each cluster
  ### Assign each point to the cluster with lowest distance
  ### We then sum the total costs for this assignment (per cluster)
  
  
  costs <- get_costs(X, medoids)
  
  clusters <- costs[[1]]
  distances <- costs[[2]]
  dj <- costs[[3]]
  
  swap <- TRUE
  stop <- FALSE
  counter <- 0
  
  while(stop == FALSE){
    
    swap <- FALSE
    
    for(i in 1:n_row){
      
      if(nrow(merge(X[i,],medoids))==0){
        
        for(j in 1:nrow(medoids)){
          medoids_ <- medoids
          medoids_[j,] <- X[i,]
          
          costs_ <- get_costs(X, medoids_)
          clusters_ <- costs_[[1]]
          distances_ <- costs_[[2]]
          di <- costs_[[3]]
          
          if(di-dj < tol){
            clusters <- clusters_
            distances <- distances_
            dj <- di
            
            medoids <- medoids_
            swap <- TRUE
            print(paste0('Changed medoids with new cost: ', dj))
          }
        }
      }
    }
    
    if(counter>max_iter){
      print('Max iterations reached')
      stop <- TRUE
      break
    }
    
    if(!swap){
      print('Ending search by convergence')
      stop <- TRUE
      break
    }
    
    
    counter <- counter + 1 
    
  }
  
    
  return(list(medoids, clusters, distances, dj))
    
}