sadece cnn

mail@pastecode.io avatar
unknown
python
2 months ago
4.6 kB
2
Indexable
Never
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D

# Veri setinin bulunduğu dizin
base_dir = '/Users/emreceng/archive/'

# Metadata dosyasını oku
metadata_path = os.path.join(base_dir, 'HAM10000_metadata.csv')
skin_df = pd.read_csv(metadata_path)

SIZE = 32

# Label encoding
le = LabelEncoder()
le.fit(skin_df['dx'])
skin_df['label'] = le.transform(skin_df["dx"])

# Veri dağılımını görselleştir
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
skin_df['dx'].value_counts().plot(kind='bar', ax=axs[0, 0])
axs[0, 0].set_title('Cell Type Distribution')
axs[0, 0].set_ylabel('Count')

skin_df['sex'].value_counts().plot(kind='bar', ax=axs[0, 1])
axs[0, 1].set_title('Sex Distribution')

skin_df['localization'].value_counts().plot(kind='bar', ax=axs[1, 0])
axs[1, 0].set_title('Localization Distribution')

sns.distplot(skin_df['age'].dropna(), ax=axs[1, 1], kde=True)
axs[1, 1].set_title('Age Distribution')

plt.tight_layout()
plt.show()

# Balance data
n_samples = 500
df_balanced = pd.concat([
    resample(skin_df[skin_df['label'] == i], replace=True, n_samples=n_samples, random_state=42)
    for i in skin_df['label'].unique()
])

# Resimlerin tam yollarını oluştur
image_paths = {os.path.splitext(os.path.basename(x))[0]: x
               for x in glob(os.path.join(base_dir, 'HAM10000_images_part_1', '*.jpg'))}
image_paths.update({os.path.splitext(os.path.basename(x))[0]: x
                    for x in glob(os.path.join(base_dir, 'HAM10000_images_part_2', '*.jpg'))})

# Resim yollarını DataFrame'e ekle
df_balanced['path'] = df_balanced['image_id'].map(image_paths.get)
df_balanced['image'] = df_balanced['path'].map(lambda x: np.asarray(Image.open(x).resize((SIZE, SIZE))) if x else None)

# Eksik resimleri filtrele
df_balanced = df_balanced.dropna(subset=['image'])

# Convert images to numpy array
X = np.asarray(df_balanced['image'].tolist())
X = X / 255.0  # Scale values
Y = df_balanced['label']
Y_cat = to_categorical(Y, num_classes=7)

# Split data
x_train, x_test, y_train, y_test = train_test_split(X, Y_cat, test_size=0.25, random_state=42)

# Model tanımı
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(SIZE, SIZE, 3)))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(7, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

# Train the model
history = model.fit(
    x_train, y_train,
    batch_size=16,
    epochs=50,
    validation_split=0.1,
    verbose=2
)

# Model evaluation
score = model.evaluate(x_test, y_test, verbose=0)
print('Test accuracy:', score[1])

# Accuracy and loss plots
epochs = range(1, len(history.history['accuracy']) + 1)
plt.plot(epochs, history.history['loss'], 'y', label='Training loss')
plt.plot(epochs, history.history['val_loss'], 'r', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.plot(epochs, history.history['accuracy'], 'y', label='Training accuracy')
plt.plot(epochs, history.history['val_accuracy'], 'r', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

# Prediction on test data
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d')
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()

# Fraction of incorrect predictions
incorr_fraction = 1 - np.diag(cm) / np.sum(cm, axis=1)
plt.bar(np.arange(7), incorr_fraction)
plt.xlabel('True Label')
plt.ylabel('Fraction of incorrect predictions')
plt.show()
Leave a Comment