STFT: Conversion and Augmentation
unknown
python
2 years ago
2.3 kB
10
Indexable
import os import librosa import librosa.display import numpy as np import matplotlib.pyplot as plt from scipy.signal import stft # Path to audio audio_folder = "/Users/harshit/Desktop/SOP/data/normal" output_folder = "/Users/harshit/Desktop/SOP/data/Augmented/normalSTFT" # RMS val list rms_values = [0, 0.01,0.015, 0.02, 0.03,0.04] num_iterations = 1 if not os.path.exists(output_folder): os.makedirs(output_folder) for rms in rms_values: for filename in os.listdir(audio_folder): if filename.endswith(".wav") or filename.endswith(".mp3") or filename.endswith(".ogg"): filepath = os.path.join(audio_folder, filename) y, sr = librosa.load(filepath, sr=None) # Ignore the first 2000 bits y = y[2000:] num_samples = len(y) part_size = num_samples // 3 y_parts = [y[i:i+part_size] for i in range(0, num_samples, part_size)] for i, y_part in enumerate(y_parts[:3]): for _ in range(num_iterations): noise_duration = librosa.get_duration(y=y_part, sr=sr) noise = np.random.normal(0, rms, len(y_part)) y_with_noise = y_part + noise # Compute the Short-Time Fourier Transform (STFT) _, _, stft_mat = stft(y_with_noise, fs=sr, nperseg=400, noverlap=200, nfft=512, window='hamming') # Convert magnitude STFT to dB scale for visualization stft_mat_db = 10 * np.log10(np.abs(stft_mat)) # Normalize to [0, 1] range stft_mat_db -= stft_mat_db.min() stft_mat_db /= stft_mat_db.max() # Save the STFT as a PNG without axes and colorbar output_filename = os.path.join( output_folder, f"rms_{rms:.3f}_{os.path.splitext(filename)[0]}_part{i+1}.png") plt.figure(figsize=(10, 5)) librosa.display.specshow(stft_mat_db, y_axis=None, x_axis=None, sr=sr) plt.axis('off') # Remove axes plt.tight_layout() plt.savefig(output_filename) plt.close()
Editor is loading...
Leave a Comment