Untitled
unknown
python
2 years ago
2.0 kB
9
Indexable
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.model_selection import ParameterGrid
# Definir el espacio de parámetros a explorar
param_grid = {
'eps': [0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0],
'min_samples': [3, 5, 8, 10, 12, 15, 18, 20, 25, 30],
'metric': ['euclidean', 'manhattan'],
'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}
# Crear instancia de DBSCAN
dbscan = DBSCAN()
# Lista para almacenar los resultados
resultados_list = []
# Iterar sobre el espacio de parámetros
for params in ParameterGrid(param_grid):
dbscan.set_params(**params)
# Aplicar DBSCAN a los datos EMBEDDING_MARIA
clusters_maria = dbscan.fit_predict(EMBEDDING_MARIA)
unique_clusters_maria = len(set(clusters_maria))
# Calcular silhouette score
if unique_clusters_maria == 7:
silhouette_score_maria = silhouette_score(EMBEDDING_MARIA, clusters_maria)
# Agregar resultados a la lista solo si cumple con la condición
resultados_list.append({
'eps': params['eps'],
'min_samples': params['min_samples'],
'metric': params['metric'],
'algorithm': params['algorithm'],
'clusters': unique_clusters_maria,
'silhouette_score': silhouette_score_maria
})
plt.scatter(EMBEDDING_MARIA[:, 0], EMBEDDING_MARIA[:, 1], c=labels, cmap='viridis', edgecolor='k', s=50)
plt.scatter(centroidsm[:, 0], centroidsm[:, 1], c='red', marker='X', s=200, label='Centroids')
plt.title(f'K-Means Clustering MARIA. params: {params}')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()
# Crear DataFrame utilizando pd.concat
new_results = pd.concat([pd.DataFrame([result]) for result in resultados_list], ignore_index=True)
# Imprimir resultados
print(new_results)
Editor is loading...
Leave a Comment