Untitled
unknown
plain_text
5 months ago
12 kB
3
Indexable
import os from pydub import AudioSegment from pydub.utils import mediainfo from moviepy.editor import VideoFileClip import logging import librosa import librosa.display import numpy as np from scipy import signal from scipy.signal import find_peaks from scipy.interpolate import interp1d import matplotlib.pyplot as plt from matplotlib import cm def loadAudio(file_path, return_segment=False): audio_segment = AudioSegment.from_file(file_path) data = np.array(audio_segment.get_array_of_samples()) if audio_segment.channels > 1: data = data.reshape((-1, audio_segment.channels)).mean(axis=1) # Convert to mono if stereo rate = audio_segment.frame_rate if return_segment: return data, rate, audio_segment else: return data, rate def findOffset2(audio1, audio2): # Perform the cross-correlation between the two audio signals correlation = signal.correlate(audio2, audio1, mode='full') # Find the index of the maximum correlation value max_index = np.argmax(correlation) # Calculate the lag: the maximum correlation index minus the length of the first signal # Adjust by subtracting one less than the length of audio1 to get the correct lag lag = max_index - (len(audio1) - 1) return lag def findOffset(audio1, audio2): correlation = signal.correlate(audio2, audio1, mode="full") lags = signal.correlation_lags(audio2.size, audio1.size, mode="full") lag = lags[np.argmax(correlation)] return lag def extractAudioFromAudioFile(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds): audio = AudioSegment.from_file(inputFile) # Convert start and end times from seconds to milliseconds start_ms = int(startTimeInSeconds * 1000) end_ms = int(endTimeInSeconds * 1000) # Ensure end_ms does not exceed the actual duration of the audio actual_duration_ms = len(audio) end_ms = min(end_ms, actual_duration_ms) # Extract the segment from start to end time seg = audio[start_ms:end_ms] # Retrieve the bitrate information from the original file bitrate = mediainfo(inputFile)['bit_rate'] # Export the extracted segment seg.export(outputFile, format="mp3", bitrate=bitrate) def extractAudioFromVideoFile(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds): video = VideoFileClip(inputFile) if endTimeInSeconds > video.duration: endTimeInSeconds = video.duration clipped_audio = video.audio.subclip(startTimeInSeconds, endTimeInSeconds) clipped_audio.write_audiofile(outputFile, codec="pcm_s16le", bitrate="320k") import subprocess def extractAudioUsingFFmpeg(inputFile, outputFile, startTimeInSeconds, endTimeInSeconds, audio_delay_seconds=-0.002): # Default to 2ms command = [ 'ffmpeg', '-y', # Overwrite output files without asking '-v', 'error', # Show only errors '-itsoffset', str(audio_delay_seconds), # Offset for audio, adjusts sync '-i', inputFile, # Input file '-ss', str(startTimeInSeconds), # Start time for audio extraction '-to', str(endTimeInSeconds), # End time for audio extraction '-vn', # No video output '-acodec', 'pcm_s16le', # Set audio codec for WAV output (or use 'copy' for original codec) '-ar', '44100', # Audio sample rate outputFile # Output file ] # Execute the FFmpeg command process = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) # Check for errors if process.returncode != 0: print(f"Error: {process.stderr}") else: print("Audio extraction completed successfully.") def adjustAudio(audio_segment, lag, frame_rate): # Convert lag from samples to milliseconds, rounding to nearest integer at the last step ms_lag = round((lag / frame_rate) * 1000) if lag > 0: # Audio needs to start later: pad audio at the beginning silence = AudioSegment.silent(duration=ms_lag, frame_rate=frame_rate) adjusted_audio = silence + audio_segment else: # Audio needs to start earlier: trim audio from the beginning adjusted_audio = audio_segment[abs(ms_lag):] # Use abs to convert negative lag to positive return adjusted_audio def alignAudioTrack(audioFile, newAudioFile, lag): audio, rate, audio_segment = loadAudio(audioFile, return_segment=True) # Adjust the AudioSegment based on lag, ensuring frame_rate is passed correctly adjusted_audio = adjustAudio(audio_segment, lag, rate) # Save the adjusted audio in 32-bit floating-point WAV format adjusted_audio.export(newAudioFile, format="wav", codec="pcm_f32le") def normalize_audio2(audio, amp_factor=1.0): audio = audio / np.max(np.abs(audio)) * amp_factor return audio def downsample(data, num_samples): indices = np.linspace(0, len(data) - 1, num_samples, dtype=int) return data[indices], indices def plotWaveforms(audioFile1, audioFile2, imageFile, startSegment=30, endSegment=40, clip_negative=True, num_samples=5000, barWidth=1): y1, sr1 = librosa.load(audioFile1, sr=None, offset=startSegment, duration=endSegment - startSegment) y2, sr2 = librosa.load(audioFile2, sr=None, offset=startSegment, duration=endSegment - startSegment) # Normalize and adjust the amplitude of the audio signals y1 = normalize_audio2(y1, 1) y2 = normalize_audio2(y2, 1) # Downsample data y1, indices1 = downsample(y1, num_samples) y2, indices2 = downsample(y2, num_samples) t1 = np.linspace(0, len(y1) / sr1, num_samples) t2 = np.linspace(0, len(y2) / sr2, num_samples) # Apply clip_negative if True if clip_negative: y1 = np.clip(y1, 0, None) y2 = np.clip(y2, 0, None) # Create a figure with a black background fig, ax = plt.subplots(figsize=(14, 1), facecolor='black') # Set bar width based on the barWidth multiplier bar_width = (t1[1] - t1[0]) * barWidth # Plot amplitudes as bars centered on zero ax.bar(t1, y1, width=bar_width, color=(0.55, 0, 0.8), alpha=1, bottom=-y1/2) ax.bar(t2, y2, width=bar_width, color=(1, 0.6, 0), alpha=1, bottom=-y2/2) # Set x and y-axis limits to fit the data range precisely, ensuring symmetric around zero ax.set_xlim(t1[0] - bar_width/2, t1[-1] + bar_width/2) max_abs_val = max(np.max(np.abs(y1 - y1/2)), np.max(np.abs(y2 - y2/2))) ax.set_ylim(-max_abs_val, max_abs_val) # Add a center line ax.axhline(y=0, color=(1, 0.6, 0), linewidth=0.5) # Remove axes, labels, and title for a clean look ax.axis('off') # Adjust the subplot padding fig.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) # Save the figure with a specific resolution plt.savefig(imageFile, format='png', dpi=300, bbox_inches='tight', pad_inches=0.05) plt.close() def normalize_audio(audio): audio = audio / np.max(np.abs(audio)) audio[audio < 0] = 0 # Clamp negative values to zero return audio def sample_peaks(y, num_points=100): peaks, _ = find_peaks(y) if len(peaks) > num_points: selected_peaks = np.linspace(0, len(peaks) - 1, num_points, dtype=int) peaks = peaks[selected_peaks] return peaks def plotWaveforms2(audioFile1, audioFile2, imageFile, startSegment=30, endSegment=31, num_points=441, line_thickness=1): y1, sr1 = librosa.load(audioFile1, sr=None, offset=startSegment, duration=endSegment - startSegment) y2, sr2 = librosa.load(audioFile2, sr=None, offset=startSegment, duration=endSegment - startSegment) y1, y2 = normalize_audio(y1), normalize_audio(y2) print(len(y1)) print(len(y2)) peaks1 = sample_peaks(y1, num_points) peaks2 = sample_peaks(y2, num_points) t1 = np.linspace(0, len(y1) / sr1, len(y1))[peaks1] t2 = np.linspace(0, len(y2) / sr2, len(y2))[peaks2] f1 = interp1d(t1, y1[peaks1], kind='cubic', fill_value="extrapolate", bounds_error=False) f2 = interp1d(t2, y2[peaks2], kind='cubic', fill_value="extrapolate", bounds_error=False) fine_t1 = np.linspace(t1.min(), t1.max(), 500) fine_t2 = np.linspace(t2.min(), t2.max(), 500) plt.figure(figsize=(16, 1), facecolor='black') plt.plot(fine_t1, f1(fine_t1), color='green', alpha=1, linewidth=line_thickness) plt.plot(fine_t2, f2(fine_t2), color='orange', alpha=1, linewidth=line_thickness) plt.fill_between(fine_t1, 0, f1(fine_t1), color='green', alpha=0.5) plt.fill_between(fine_t2, 0, f2(fine_t2), color='orange', alpha=0.5) plt.axis('off') plt.savefig(imageFile, format='png', dpi=300, bbox_inches='tight', pad_inches=0.1) plt.close() duration = 300 #seconds limit = None # Process all audio files in a directory audioDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/1 original" wavDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/_audio from video" videoDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Chapters/Poly Carve/Video" newAudioDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/2 synced" imgDir = "N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Poly Carve/_img" tempDir = os.path.join(os.path.dirname(audioDir), "_tracks") processed_count = 0 # Counter for the number of files processed # Create directories if not present os.makedirs(newAudioDir, exist_ok=True) #os.makedirs(imgDir, exist_ok=True) #os.makedirs(tempDir, exist_ok=True) for filename in os.listdir(videoDir): if filename.endswith(".trec"): if limit is not None and processed_count >= limit: print(f"Limit reached: processed {processed_count} files.") break baseName, _ = os.path.splitext(filename) origAudioFile = os.path.join(audioDir, f"{baseName}.mp3") refAudioFile = os.path.join(wavDir, f"{baseName}.wav") alignedAudioFile = os.path.join(newAudioDir, f"{baseName}.wav") if not os.path.exists(origAudioFile): print(f"Matching MP3 not found: {filename}.") continue audioFile1 = os.path.join(tempDir, f"{baseName}_audio.mp3") audioFile2 = os.path.join(tempDir, f"{baseName}_video.wav") if not os.path.exists(alignedAudioFile): # Extract audio tracks #if not os.path.exists(audioFile1): # extractAudioFromAudioFile(origAudioFile, audioFile1, 0, 300) #if not os.path.exists(audioFile2): # trecFile = os.path.join(videoDir, filename) # extractAudioFromVideoFile(trecFile, audioFile2, 0, 300) #base_dir = os.path.dirname(trecFile) #temp_filename = os.path.join(base_dir, "temp_extract_audio.mp4") # Rename the original .trec file to .mp4 temporarily #os.rename(trecFile, temp_filename) #extractAudioUsingFFmpeg(temp_filename, audioFile2, 0, 300) #os.rename(temp_filename, trecFile) # Compute offset audio1, rate1 = loadAudio(origAudioFile) audio2, rate2 = loadAudio(refAudioFile) #moviepy introduces some delay but it's fixed so we can hardcode it #delay = 1055 lag = findOffset2(audio1, audio2) print(f"audio: {baseName} lag: {lag}") # Align original audio track using the same lag value in samples alignAudioTrack(origAudioFile, alignedAudioFile, lag) #else: # print(f"Skipping {filename}, aligned file already exists.") # Plot waveforms #plotImageFile = os.path.join(imgDir, f"{baseName}.png") #plotWaveforms(audioFile2, alignedAudioFile, plotImageFile) processed_count += 1 #verifyAudioAlignment(newAudioDir, audioDir) #audio1, rate1 = loadAudio("N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Curvature/_tracks/_manual.wav") #audio2, rate2 = loadAudio("N:/RDX/Gumroad/Tutorials/VEX Volume 2/Audio/Curvature/_tracks/curvature 01_video.wav") #lag = findOffset(audio1, audio2) #print(lag)
Editor is loading...
Leave a Comment