Untitled

mail@pastecode.io avatar
unknown
python
7 months ago
2.0 kB
4
Indexable
Never
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.metrics import silhouette_score
from sklearn.model_selection import ParameterGrid

# Definir el espacio de parámetros a explorar
param_grid = {
    'eps': [0.1, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 4.0, 5.0],
    'min_samples': [3, 5, 8, 10, 12, 15, 18, 20, 25, 30],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']
}

# Crear instancia de DBSCAN
dbscan = DBSCAN()

# Lista para almacenar los resultados
resultados_list = []

# Iterar sobre el espacio de parámetros
for params in ParameterGrid(param_grid):
    dbscan.set_params(**params)

    # Aplicar DBSCAN a los datos EMBEDDING_MARIA
    clusters_maria = dbscan.fit_predict(EMBEDDING_MARIA)
    unique_clusters_maria = len(set(clusters_maria))

    # Calcular silhouette score
    if unique_clusters_maria == 7:
        silhouette_score_maria = silhouette_score(EMBEDDING_MARIA, clusters_maria)
        # Agregar resultados a la lista solo si cumple con la condición
        resultados_list.append({
            'eps': params['eps'],
            'min_samples': params['min_samples'],
            'metric': params['metric'],
            'algorithm': params['algorithm'],
            'clusters': unique_clusters_maria,
            'silhouette_score': silhouette_score_maria
        })
        plt.scatter(EMBEDDING_MARIA[:, 0], EMBEDDING_MARIA[:, 1], c=labels, cmap='viridis', edgecolor='k', s=50)
        plt.scatter(centroidsm[:, 0], centroidsm[:, 1], c='red', marker='X', s=200, label='Centroids')
        plt.title(f'K-Means Clustering MARIA. params: {params}')
        plt.xlabel('Feature 1')
        plt.ylabel('Feature 2')
        plt.legend()
        plt.show()

# Crear DataFrame utilizando pd.concat
new_results = pd.concat([pd.DataFrame([result]) for result in resultados_list], ignore_index=True)

# Imprimir resultados
print(new_results)
Leave a Comment