Untitled
unknown
python
3 years ago
4.4 kB
43
Indexable
import pickle from sklearn.cluster import DBSCAN from operator import itemgetter import numpy as np from sklearn.manifold import TSNE import matplotlib.pyplot as plt #Some pickled (serialized) files which contain match information. Get this using Riot API. pickled = pickle.load(open('./all_traits-trait_data_points.pkl','rb')) tdp = pickled[1] all_traits = list(pickled[0]) participant_info = pickle.load(open('./participant_info_vec.pkl','rb')) all_traits.sort() trait_to_index = dict() #Create a dictionary from trait to coordinate, e.g. Chemtech -> 5, Bruiser -> 4 (not actual numbers) for (i,trait) in enumerate(all_traits): trait_to_index[trait] = i tdp_vec = [] units_vec = [] placement_vec = [] units_to_index = dict() all_units = set() #Convert each datapoint into a vector [a b c d e ...] where a indicates the number of units with trait corresponding to #coordinate 1, b indicates # units with traits coordinate 2, etc. for data_point in tdp: vec = np.zeros(len(all_traits)) for trait in data_point: trait_name = trait['name'] ind = trait_to_index[trait_name] vec[ind] = float(trait['num_units']) tdp_vec.append(vec) tdp_vec = np.array(tdp_vec) #Get some placement information for each player for part in participant_info: placement_vec.append(part['placement']) units = part['units'] for unit in units: all_units.add(unit['character_id']) #Get some units information for each player all_units = list(all_units) all_units.sort() for (i,unit) in enumerate(all_units): units_to_index[unit] = i #Convert units into a vector. 0 indicates unit not present, 1 indicates unit present. for part in participant_info: vec = np.zeros(len(all_units)) units = part['units'] for unit in units: unit_name = unit['character_id'] ind = units_to_index[unit_name] vec[ind] = 1 units_vec.append(vec) #Run t-SNE on the trait vector datapoints. cutoff = len(tdp_vec) X_embedded = TSNE(n_components=2,init='pca').fit_transform(np.array(tdp_vec[0:cutoff])) #Run DBSCAN clustering db = DBSCAN(eps=3.3, min_samples=10).fit(X_embedded) core_samples_mask = np.zeros_like(db.labels_, dtype=bool) core_samples_mask[db.core_sample_indices_] = True labels = db.labels_ n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0) n_noise_ = list(labels).count(-1) label_to_units = [] label_placements = [] unique_labels = set(labels) #Colour map colors = [plt.cm.tab20(each) for each in np.linspace(0, 1, len(unique_labels))] #Colour/label each point by DBSCAN classification for k, col in zip(unique_labels, colors): if k == -1: # Black used for noise. col = [0, 0, 0, 1] else: label_to_units.append([]) label_placements.append([]) class_member_mask = labels == k xy = X_embedded[class_member_mask & core_samples_mask] plt.plot( xy[:, 0], xy[:, 1], "o", markerfacecolor=tuple(col), markeredgecolor="k", label = k ) xy = X_embedded[class_member_mask & ~core_samples_mask] plt.plot( xy[:, 0], xy[:, 1], "o", markerfacecolor=tuple(col), markeredgecolor="k", ) plt.legend() #Group units/placements by cluster for (i,label) in enumerate(labels): if label == -1: continue label_to_units[label].append(units_vec[i]) label_placements[label].append(placement_vec[i]) #Print the most used units/placement for each clsuter N = 15 for i in range(len(unique_labels)-1): #Skip clusters < 20 data points if len(label_to_units[i]) < 20: continue #Get average nubmer times unit is played vec = np.zeros(len(all_units)) for units in label_to_units[i]: vec += units/len(label_to_units[i]) #get the top N most played units for the cluster temp = np.argpartition(-vec, N) result_args = temp[:N] unit_args = [all_units[x] for x in result_args] temp = np.partition(-vec, N) result = -temp[:N] print(i, np.mean(label_placements[i]), len(label_placements[i])) for thing in sorted(zip(unit_args,result), key=lambda x : x[1], reverse = True): print(thing[0][5:], str(round(thing[1] * 100,2) ) + '%') print(len(tdp_vec)) plt.show() #for i in range(len(unique_labels)-1): # plt.hist(label_placements[i], bins = 8) # plt.show()
Editor is loading...