sadece cnn
unknown
python
a year ago
4.6 kB
4
Indexable
import matplotlib.pyplot as plt import numpy as np import pandas as pd import os from glob import glob import seaborn as sns from PIL import Image from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split from sklearn.utils import resample from sklearn.preprocessing import LabelEncoder from tensorflow.keras.utils import to_categorical from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D # Veri setinin bulunduğu dizin base_dir = '/Users/emreceng/archive/' # Metadata dosyasını oku metadata_path = os.path.join(base_dir, 'HAM10000_metadata.csv') skin_df = pd.read_csv(metadata_path) SIZE = 32 # Label encoding le = LabelEncoder() le.fit(skin_df['dx']) skin_df['label'] = le.transform(skin_df["dx"]) # Veri dağılımını görselleştir fig, axs = plt.subplots(2, 2, figsize=(15, 10)) skin_df['dx'].value_counts().plot(kind='bar', ax=axs[0, 0]) axs[0, 0].set_title('Cell Type Distribution') axs[0, 0].set_ylabel('Count') skin_df['sex'].value_counts().plot(kind='bar', ax=axs[0, 1]) axs[0, 1].set_title('Sex Distribution') skin_df['localization'].value_counts().plot(kind='bar', ax=axs[1, 0]) axs[1, 0].set_title('Localization Distribution') sns.distplot(skin_df['age'].dropna(), ax=axs[1, 1], kde=True) axs[1, 1].set_title('Age Distribution') plt.tight_layout() plt.show() # Balance data n_samples = 500 df_balanced = pd.concat([ resample(skin_df[skin_df['label'] == i], replace=True, n_samples=n_samples, random_state=42) for i in skin_df['label'].unique() ]) # Resimlerin tam yollarını oluştur image_paths = {os.path.splitext(os.path.basename(x))[0]: x for x in glob(os.path.join(base_dir, 'HAM10000_images_part_1', '*.jpg'))} image_paths.update({os.path.splitext(os.path.basename(x))[0]: x for x in glob(os.path.join(base_dir, 'HAM10000_images_part_2', '*.jpg'))}) # Resim yollarını DataFrame'e ekle df_balanced['path'] = df_balanced['image_id'].map(image_paths.get) df_balanced['image'] = df_balanced['path'].map(lambda x: np.asarray(Image.open(x).resize((SIZE, SIZE))) if x else None) # Eksik resimleri filtrele df_balanced = df_balanced.dropna(subset=['image']) # Convert images to numpy array X = np.asarray(df_balanced['image'].tolist()) X = X / 255.0 # Scale values Y = df_balanced['label'] Y_cat = to_categorical(Y, num_classes=7) # Split data x_train, x_test, y_train, y_test = train_test_split(X, Y_cat, test_size=0.25, random_state=42) # Model tanımı model = Sequential() model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(SIZE, SIZE, 3))) model.add(MaxPool2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPool2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Conv2D(128, (3, 3), activation='relu')) model.add(MaxPool2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(7, activation='softmax')) # Compile the model model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Model summary model.summary() # Train the model history = model.fit( x_train, y_train, batch_size=16, epochs=50, validation_split=0.1, verbose=2 ) # Model evaluation score = model.evaluate(x_test, y_test, verbose=0) print('Test accuracy:', score[1]) # Accuracy and loss plots epochs = range(1, len(history.history['accuracy']) + 1) plt.plot(epochs, history.history['loss'], 'y', label='Training loss') plt.plot(epochs, history.history['val_loss'], 'r', label='Validation loss') plt.title('Training and validation loss') plt.xlabel('Epochs') plt.ylabel('Loss') plt.legend() plt.show() plt.plot(epochs, history.history['accuracy'], 'y', label='Training accuracy') plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation accuracy') plt.title('Training and validation accuracy') plt.xlabel('Epochs') plt.ylabel('Accuracy') plt.legend() plt.show() # Prediction on test data y_pred = model.predict(x_test) y_pred_classes = np.argmax(y_pred, axis=1) y_true = np.argmax(y_test, axis=1) # Confusion matrix cm = confusion_matrix(y_true, y_pred_classes) sns.heatmap(cm, annot=True, fmt='d') plt.ylabel('True label') plt.xlabel('Predicted label') plt.show() # Fraction of incorrect predictions incorr_fraction = 1 - np.diag(cm) / np.sum(cm, axis=1) plt.bar(np.arange(7), incorr_fraction) plt.xlabel('True Label') plt.ylabel('Fraction of incorrect predictions') plt.show()
Editor is loading...
Leave a Comment