Untitled
unknown
plain_text
10 months ago
12 kB
6
Indexable
import os
from pydub import AudioSegment
from pydub.utils import mediainfo
from moviepy.editor import VideoFileClip
import logging
import librosa
import librosa.display
import numpy as np
from scipy import signal
from scipy.signal import find_peaks
from scipy.interpolate import interp1d
import matplotlib.pyplot as plt
from matplotlib import cm
def loadAudio(file_path, return_segment=False):
audio_segment = AudioSegment.from_file(file_path)
data = np.array(audio_segment.get_array_of_samples())
if audio_segment.channels > 1:
data = data.reshape((-1, audio_segment.channels)).mean(axis=1) # Convert to mono if stereo
rate = audio_segment.frame_rate
if return_segment:
return data, rate, audio_segment
else:
return data, rate
def findOffset2(audio1, audio2):
# Perform the cross-correlation between the two audio signals
correlation = signal.correlate(audio2, audio1, mode='full')
# Find the index of the maximum correlation value
max_index = np.argmax(correlation)
# Calculate the lag: the maximum correlation index minus the length of the first signal
# Adjust by subtracting one less than the length of audio1 to get the correct lag
lag = max_index - (len(audio1) - 1)
return lag
def findOffset(audio1, audio2):
correlation = signal.correlate(audio2, audio1, mode="full")
lags = signal.correlation_lags(audio2.size, audio1.size, mode="full")
lag = lags[np.argmax(correlation)]
return lag
def extractAudioFromAudioFile(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds):
audio = AudioSegment.from_file(inputFile)
# Convert start and end times from seconds to milliseconds
start_ms = int(startTimeInSeconds * 1000)
end_ms = int(endTimeInSeconds * 1000)
# Ensure end_ms does not exceed the actual duration of the audio
actual_duration_ms = len(audio)
end_ms = min(end_ms, actual_duration_ms)
# Extract the segment from start to end time
seg = audio[start_ms:end_ms]
# Retrieve the bitrate information from the original file
bitrate = mediainfo(inputFile)['bit_rate']
# Export the extracted segment
seg.export(outputFile, format="mp3", bitrate=bitrate)
def extractAudioFromVideoFile(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds):
video = VideoFileClip(inputFile)
if endTimeInSeconds > video.duration:
endTimeInSeconds = video.duration
clipped_audio = video.audio.subclip(startTimeInSeconds, endTimeInSeconds)
clipped_audio.write_audiofile(outputFile, codec="pcm_s16le", bitrate="320k")
import subprocess
def extractAudioUsingFFmpeg(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds, audio_delay_seconds=-0.002): # Default to 2ms
command = [
'ffmpeg',
'-y', # Overwrite output files without asking
'-v', 'error', # Show only errors
'-itsoffset', str(audio_delay_seconds), # Offset for audio, adjusts sync
'-i', inputFile, # Input file
'-ss', str(startTimeInSeconds), # Start time for audio extraction
'-to', str(endTimeInSeconds), # End time for audio extraction
'-vn', # No video output
'-acodec', 'pcm_s16le', # Set audio codec for WAV output (or use 'copy' for original codec)
'-ar', '44100', # Audio sample rate
outputFile # Output file
]
# Execute the FFmpeg command
process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
# Check for errors
if process.returncode != 0:
print(f"Error: {process.stderr}")
else:
print("Audio extraction completed successfully.")
def adjustAudio(audio_segment, lag, frame_rate):
# Convert lag from samples to milliseconds, rounding to nearest integer at the last step
ms_lag = round((lag / frame_rate) * 1000)
if lag > 0:
# Audio needs to start later: pad audio at the beginning
silence = AudioSegment.silent(duration=ms_lag, frame_rate=frame_rate)
adjusted_audio = silence + audio_segment
else:
# Audio needs to start earlier: trim audio from the beginning
adjusted_audio = audio_segment[abs(ms_lag):] # Use abs to convert negative lag to positive
return adjusted_audio
def alignAudioTrack(audioFile, newAudioFile, lag):
audio, rate, audio_segment = loadAudio(audioFile, return_segment=True)
# Adjust the AudioSegment based on lag, ensuring frame_rate is passed correctly
adjusted_audio = adjustAudio(audio_segment, lag, rate)
# Save the adjusted audio in 32-bit floating-point WAV format
adjusted_audio.export(newAudioFile, format="wav", codec="pcm_f32le")
def normalize_audio2(audio, amp_factor=1.0):
audio = audio / np.max(np.abs(audio)) * amp_factor
return audio
def downsample(data, num_samples):
indices = np.linspace(0, len(data) - 1, num_samples, dtype=int)
return data[indices], indices
def plotWaveforms(audioFile1, audioFile2, imageFile, startSegment=30, endSegment=40, clip_negative=True, num_samples=5000, barWidth=1):
y1, sr1 = librosa.load(audioFile1, sr=None, offset=startSegment, duration=endSegment - startSegment)
y2, sr2 = librosa.load(audioFile2, sr=None, offset=startSegment, duration=endSegment - startSegment)
# Normalize and adjust the amplitude of the audio signals
y1 = normalize_audio2(y1, 1)
y2 = normalize_audio2(y2, 1)
# Downsample data
y1, indices1 = downsample(y1, num_samples)
y2, indices2 = downsample(y2, num_samples)
t1 = np.linspace(0, len(y1) / sr1, num_samples)
t2 = np.linspace(0, len(y2) / sr2, num_samples)
# Apply clip_negative if True
if clip_negative:
y1 = np.clip(y1, 0, None)
y2 = np.clip(y2, 0, None)
# Create a figure with a black background
fig, ax = plt.subplots(figsize=(14, 1), facecolor='black')
# Set bar width based on the barWidth multiplier
bar_width = (t1[1] - t1[0]) * barWidth
# Plot amplitudes as bars centered on zero
ax.bar(t1, y1, width=bar_width, color=(0.55, 0, 0.8), alpha=1, bottom=-y1/2)
ax.bar(t2, y2, width=bar_width, color=(1, 0.6, 0), alpha=1, bottom=-y2/2)
# Set x and y-axis limits to fit the data range precisely, ensuring symmetric around zero
ax.set_xlim(t1[0] - bar_width/2, t1[-1] + bar_width/2)
max_abs_val = max(np.max(np.abs(y1 - y1/2)), np.max(np.abs(y2 - y2/2)))
ax.set_ylim(-max_abs_val, max_abs_val)
# Add a center line
ax.axhline(y=0, color=(1, 0.6, 0), linewidth=0.5)
# Remove axes, labels, and title for a clean look
ax.axis('off')
# Adjust the subplot padding
fig.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)
# Save the figure with a specific resolution
plt.savefig(imageFile, format='png', dpi=300, bbox_inches='tight', pad_inches=0.05)
plt.close()
def normalize_audio(audio):
audio = audio / np.max(np.abs(audio))
audio[audio < 0] = 0 # Clamp negative values to zero
return audio
def sample_peaks(y, num_points=100):
peaks, _ = find_peaks(y)
if len(peaks) > num_points:
selected_peaks = np.linspace(0, len(peaks) - 1, num_points, dtype=int)
peaks = peaks[selected_peaks]
return peaks
def plotWaveforms2(audioFile1, audioFile2, imageFile, startSegment=30, endSegment=31, num_points=441, line_thickness=1):
y1, sr1 = librosa.load(audioFile1, sr=None, offset=startSegment, duration=endSegment - startSegment)
y2, sr2 = librosa.load(audioFile2, sr=None, offset=startSegment, duration=endSegment - startSegment)
y1, y2 = normalize_audio(y1), normalize_audio(y2)
print(len(y1))
print(len(y2))
peaks1 = sample_peaks(y1, num_points)
peaks2 = sample_peaks(y2, num_points)
t1 = np.linspace(0, len(y1) / sr1, len(y1))[peaks1]
t2 = np.linspace(0, len(y2) / sr2, len(y2))[peaks2]
f1 = interp1d(t1, y1[peaks1], kind='cubic', fill_value="extrapolate", bounds_error=False)
f2 = interp1d(t2, y2[peaks2], kind='cubic', fill_value="extrapolate", bounds_error=False)
fine_t1 = np.linspace(t1.min(), t1.max(), 500)
fine_t2 = np.linspace(t2.min(), t2.max(), 500)
plt.figure(figsize=(16, 1), facecolor='black')
plt.plot(fine_t1, f1(fine_t1), color='green', alpha=1, linewidth=line_thickness)
plt.plot(fine_t2, f2(fine_t2), color='orange', alpha=1, linewidth=line_thickness)
plt.fill_between(fine_t1, 0, f1(fine_t1), color='green', alpha=0.5)
plt.fill_between(fine_t2, 0, f2(fine_t2), color='orange', alpha=0.5)
plt.axis('off')
plt.savefig(imageFile, format='png', dpi=300, bbox_inches='tight', pad_inches=0.1)
plt.close()
duration = 300 #seconds
limit = None
# Process all audio files in a directory
audioDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/1 original"
wavDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/_audio from video"
videoDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Chapters/Poly Carve/Video"
newAudioDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/2 synced"
imgDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/_img"
tempDir = os.path.join(os.path.dirname(audioDir), "_tracks")
processed_count = 0 # Counter for the number of files processed
# Create directories if not present
os.makedirs(newAudioDir, exist_ok=True)
#os.makedirs(imgDir, exist_ok=True)
#os.makedirs(tempDir, exist_ok=True)
for filename in os.listdir(videoDir):
if filename.endswith(".trec"):
if limit is not None and processed_count >= limit:
print(f"Limit reached: processed {processed_count} files.")
break
baseName, _ = os.path.splitext(filename)
origAudioFile = os.path.join(audioDir, f"{baseName}.mp3")
refAudioFile = os.path.join(wavDir, f"{baseName}.wav")
alignedAudioFile = os.path.join(newAudioDir, f"{baseName}.wav")
if not os.path.exists(origAudioFile):
print(f"Matching MP3 not found: {filename}.")
continue
audioFile1 = os.path.join(tempDir, f"{baseName}_audio.mp3")
audioFile2 = os.path.join(tempDir, f"{baseName}_video.wav")
if not os.path.exists(alignedAudioFile):
# Extract audio tracks
#if not os.path.exists(audioFile1):
# extractAudioFromAudioFile(origAudioFile, audioFile1, 0, 300)
#if not os.path.exists(audioFile2):
# trecFile = os.path.join(videoDir, filename)
# extractAudioFromVideoFile(trecFile, audioFile2, 0, 300)
#base_dir = os.path.dirname(trecFile)
#temp_filename = os.path.join(base_dir, "temp_extract_audio.mp4")
# Rename the original .trec file to .mp4 temporarily
#os.rename(trecFile, temp_filename)
#extractAudioUsingFFmpeg(temp_filename, audioFile2, 0, 300)
#os.rename(temp_filename, trecFile)
# Compute offset
audio1, rate1 = loadAudio(origAudioFile)
audio2, rate2 = loadAudio(refAudioFile)
#moviepy introduces some delay but it's fixed so we can hardcode it
#delay = 1055
lag = findOffset2(audio1, audio2)
print(f"audio: {baseName} lag: {lag}")
# Align original audio track using the same lag value in samples
alignAudioTrack(origAudioFile, alignedAudioFile, lag)
#else:
# print(f"Skipping {filename}, aligned file already exists.")
# Plot waveforms
#plotImageFile = os.path.join(imgDir, f"{baseName}.png")
#plotWaveforms(audioFile2, alignedAudioFile, plotImageFile)
processed_count += 1
#verifyAudioAlignment(newAudioDir, audioDir)
#audio1, rate1 = loadAudio("N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Curvature/_tracks/_manual.wav")
#audio2, rate2 = loadAudio("N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Curvature/_tracks/curvature 01_video.wav")
#lag = findOffset(audio1, audio2)
#print(lag)Editor is loading...
Leave a Comment