Untitled
unknown
plain_text
a year ago
11 kB
1
Indexable
Never
# If true, the WAV files will be read and their features will be saved in the CSV files # As this is the most time consuming task, only enable it if you don't have the CSV files yet CREATE_CSV_FILES = False # Defines the names of the CSV files TRAIN_CSV_FILE = "train.csv" TEST_CSV_FILE = "test.csv" MORE_TRAIN_CSV_FILE = "more_train.csv" MORE_TEST_CSV_FILE = "more_test.csv" import matplotlib.pyplot as plt import numpy as np from matplotlib import cm import librosa import csv import os def extractWavFeatures(soundFilesFolder, csvFileName): print("The features of the files in the folder "+soundFilesFolder+" will be saved to "+csvFileName) header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate' for i in range(1, 21): header += f' mfcc{i}' header += ' label' header = header.split() print('CSV Header: ', header) file = open(csvFileName, 'w', newline='') #with file: writer = csv.writer(file) writer.writerow(header) genres = '1 2 3 4 5 6 7 8 9 0'.split() for filename in os.listdir(soundFilesFolder): number = f'{soundFilesFolder}/{filename}' y, sr = librosa.load(number, mono=True, duration=30) # remove leading and trailing silence y, index = librosa.effects.trim(y) chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr) rmse = librosa.feature.rms(y=y) spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr) spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr) rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr) zcr = librosa.feature.zero_crossing_rate(y) mfcc = librosa.feature.mfcc(y=y, sr=sr) to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}' for e in mfcc: to_append += f' {np.mean(e)}' writer.writerow(to_append.split()) file.close() print("End of extractWavFeatures") if (CREATE_CSV_FILES == True): extractWavFeatures("../data/recordings/train", TRAIN_CSV_FILE) extractWavFeatures("../data/recordings/test", TEST_CSV_FILE) extractWavFeatures("../data/recordings/moreSpeakersTrain", MORE_TRAIN_CSV_FILE) extractWavFeatures("../data/recordings/moreSpeakersTest", MORE_TEST_CSV_FILE) print("CSV files are created") else: print("CSV files creation is skipped") #Reading a dataset and convert file name to corresbonding umnber import pandas as pd import csv from sklearn import preprocessing def preProcessData(csvFileName): print(csvFileName+ " will be preprocessed") data = pd.read_csv(csvFileName) # we have six speakers: # 0: Jackson # 1: Nicolas # 2: Theo # 3: Ankur # 4: Caroline # 5: Rodolfo filenameArray = data['filename'] speakerArray = [] #print(filenameArray) for i in range(len(filenameArray)): speaker = filenameArray[i][2] #print(speaker) if speaker == "j": speaker = "0" elif speaker == "n": speaker = "1" elif speaker == "t": speaker = "2" elif speaker == "a": speaker = "3" elif speaker == "c": speaker = "4" elif speaker == "r": speaker = "5" else: speaker = "6" #print(speaker) speakerArray.append(speaker) data['number'] = speakerArray #Dropping unnecessary columns data = data.drop(['filename'],axis=1) data = data.drop(['label'],axis=1) data = data.drop(['chroma_stft'],axis=1) data.shape print("Preprocessing is finished") print(data.head()) return data trainData = preProcessData(TRAIN_CSV_FILE) testData = preProcessData(TEST_CSV_FILE) moreTrainData = preProcessData(MORE_TRAIN_CSV_FILE) moreTestData = preProcessData(MORE_TEST_CSV_FILE) # Splitting the dataset into training, validation and testing dataset from sklearn.model_selection import train_test_split X = np.array(trainData.iloc[:, :-1], dtype = float) y = trainData.iloc[:, -1] X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42) X_test = np.array(testData.iloc[:, :-1], dtype = float) y_test = testData.iloc[:, -1] print("Y from training data:", y_train.shape) print("Y from validation data:", y_val.shape) print("Y from test data:", y_test.shape) #Normalizing the dataset from sklearn.preprocessing import StandardScaler import numpy as np scaler = StandardScaler() X_train = scaler.fit_transform( X_train ) X_val = scaler.transform( X_val ) X_test = scaler.transform( X_test ) print("X from training data", X_train.shape) print("X from validation data", X_val.shape) print("X from test data", X_test.shape) #Creating a Model from keras import models from keras import layers import keras # model 1 model = models.Sequential() model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(10, activation='softmax')) # Learning Process of a model model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # simple early stopping from keras.callbacks import EarlyStopping es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) #Train with early stopping to avoid overfitting history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=128, callbacks=[es]) # plot training history from matplotlib import pyplot pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='test') pyplot.legend() pyplot.show() def getSpeaker(speaker): speaker = str(speaker) if speaker == "0": return "Jackson" elif speaker == "1": return "Nicola" elif speaker == "2": return "Theo" elif speaker == "3": return "Ankur" elif speaker == "4": return "Caroline" elif speaker == "5": return "Rodolfo" else: speaker = "Unknown" def printPrediction(X_data, y_data, printDigit): print('\n# Generate predictions') for i in range(len(y_data)): prediction = getSpeaker(model.predict_classes(X_data[i:i+1])[0]) speaker = getSpeaker(y_data[i]) if printDigit == True: print("Number={0:d}, y={1:10s}- prediction={2:10s}- match={3}".format(i, speaker, prediction, speaker==prediction)) else: print("y={0:10s}- prediction={1:10s}- match={2}".format(speaker, prediction, speaker==prediction)) import numpy as np from keras import backend as K from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.preprocessing.image import ImageDataGenerator from sklearn.metrics import classification_report, confusion_matrix def report(X_data, y_data): #Confution Matrix and Classification Report Y_pred = model.predict_classes(X_data) y_test_num = y_data.astype(np.int64) conf_mt = confusion_matrix(y_test_num, Y_pred) print(conf_mt) plt.matshow(conf_mt) plt.show() print('\nClassification Report') target_names = ["Jackson", "Nicola", "Theo", "Ankur", "Caroline", "Rodolfo", "Unknown"] print(classification_report(y_test_num, Y_pred)) print('\n# TEST DATA #\n') score = model.evaluate(X_test, y_test) print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # Prediction printPrediction(X_test[0:10], y_test[0:10], False) print("Classification Report for Test Data\n") report(X_test, y_test) # Splitting the dataset into training, validation and testing dataset from sklearn.model_selection import train_test_split fullTrainData = trainData.append(moreTrainData) X = np.array(fullTrainData.iloc[:, :-1], dtype = float) y = fullTrainData.iloc[:, -1] X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42) X_test = np.array(testData.iloc[:, :-1], dtype = float) y_test = testData.iloc[:, -1] X_more_test = np.array(moreTestData.iloc[:, :-1], dtype = float) y_more_test = moreTestData.iloc[:, -1] print("Y from training data:", y_train.shape) print("Y from validation data:", y_val.shape) print("Y from test data:", y_test.shape) print("Y from other speakers test data:", y_more_test.shape) #Normalizing the dataset from sklearn.preprocessing import StandardScaler import numpy as np scaler = StandardScaler() X_train = scaler.fit_transform( X_train ) X_val = scaler.transform( X_val ) X_test = scaler.transform( X_test ) X_more_test = scaler.transform( X_more_test ) print("X from training data", X_train.shape) print("X from validation data", X_val.shape) print("X from test data", X_test.shape) print("X from other speakers test data", X_more_test.shape) #Creating a Model from keras import models from keras import layers import keras # model 1 model = models.Sequential() model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],))) model.add(layers.Dropout(0.5)) model.add(layers.Dense(128, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(64, activation='relu')) model.add(layers.Dropout(0.5)) model.add(layers.Dense(10, activation='softmax')) # Learning Process of a model model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # simple early stopping from keras.callbacks import EarlyStopping es = EarlyStopping(monitor='val_loss', mode='min', verbose=1) #Train with early stopping to avoid overfitting history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=50, batch_size=128, callbacks=[es]) # plot training history from matplotlib import pyplot pyplot.plot(history.history['loss'], label='train') pyplot.plot(history.history['val_loss'], label='test') pyplot.legend() pyplot.show() print('\n# TEST DATA #\n') score = model.evaluate(X_test, y_test) print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # Prediction printPrediction(X_test[0:10], y_test[0:10], False) print('\n# OTHER SPEAKERS DATA #\n') score = model.evaluate(X_more_test, y_more_test) print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100)) # Prediction printPrediction(X_more_test[0:10], y_more_test[0:10], False) print("Classification Report for Test Data\n") report(X_test, y_test) print("Classification Report for Other Speakers\n") report(X_more_test, y_more_test)