STFT: Conversion and Augmentation
unknown
python
2 years ago
2.3 kB
13
Indexable
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import stft
# Path to audio
audio_folder = "/Users/harshit/Desktop/SOP/data/normal"
output_folder = "/Users/harshit/Desktop/SOP/data/Augmented/normalSTFT"
# RMS val list
rms_values = [0, 0.01,0.015, 0.02, 0.03,0.04]
num_iterations = 1
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for rms in rms_values:
for filename in os.listdir(audio_folder):
if filename.endswith(".wav") or filename.endswith(".mp3") or filename.endswith(".ogg"):
filepath = os.path.join(audio_folder, filename)
y, sr = librosa.load(filepath, sr=None)
# Ignore the first 2000 bits
y = y[2000:]
num_samples = len(y)
part_size = num_samples // 3
y_parts = [y[i:i+part_size] for i in range(0, num_samples, part_size)]
for i, y_part in enumerate(y_parts[:3]):
for _ in range(num_iterations):
noise_duration = librosa.get_duration(y=y_part, sr=sr)
noise = np.random.normal(0, rms, len(y_part))
y_with_noise = y_part + noise
# Compute the Short-Time Fourier Transform (STFT)
_, _, stft_mat = stft(y_with_noise, fs=sr, nperseg=400, noverlap=200, nfft=512, window='hamming')
# Convert magnitude STFT to dB scale for visualization
stft_mat_db = 10 * np.log10(np.abs(stft_mat))
# Normalize to [0, 1] range
stft_mat_db -= stft_mat_db.min()
stft_mat_db /= stft_mat_db.max()
# Save the STFT as a PNG without axes and colorbar
output_filename = os.path.join(
output_folder, f"rms_{rms:.3f}_{os.path.splitext(filename)[0]}_part{i+1}.png")
plt.figure(figsize=(10, 5))
librosa.display.specshow(stft_mat_db, y_axis=None, x_axis=None, sr=sr)
plt.axis('off') # Remove axes
plt.tight_layout()
plt.savefig(output_filename)
plt.close()Editor is loading...
Leave a Comment