vtt-concat.py
unknown
python
2 years ago
5.3 kB
7
Indexable
import os import webvtt directory = input("Enter the directory path where your subtitle segments are located: ").strip() output_directory_path = input("Enter the output directory path for the consolidated subtitle: ").strip() output_file_name = input("Enter the output file name for the consolidated subtitle (without extension): ").strip() def count_vtt_files(): vtt_files = [file for file in os.listdir(directory) if file.endswith(".vtt")] return len(vtt_files) def concatenate_subtitles(timescale): segment_count = count_vtt_files() #output_file = "concatenated.vtt" # Output file name output_file = os.path.join(output_directory_path, output_file_name + ".vtt") output_lines = [] # List to store concatenated lines for i in range(segment_count): filename = os.path.join(directory, f"{i:04d}.vtt") # Format the filename with leading zeros if not os.path.exists(filename): continue with open(filename, "r") as file: lines = file.readlines() if i > 0: # Add certain seconds to each line except for the first segment seconds_to_add = i * timescale for j in range(len(lines)): if "-->" in lines[j]: # Check if it's a time line start_time, end_time = lines[j].strip().split(" --> ") start_time = adjust_timestamp(start_time, seconds_to_add) end_time = adjust_timestamp(end_time, seconds_to_add) lines[j] = f"{start_time} --> {end_time}\n" if len(output_lines) > 0 and len(lines) > 0: output_lines[-1] = output_lines[-1].strip() # Remove trailing newline from the last line of the previous segment if i == 0: output_lines.append(lines[0]) # Add "WEBVTT" line from the first segment output_lines.extend(lines[1:]) # Append the rest of the lines with open(output_file, "w", encoding='utf-8') as file: file.writelines(output_lines) merge_consecutive_subtitles(output_file) with open(output_file, "a") as file: file.write("\n\n") ifSrt = int(input("Enter \"0\" for VTT format or \"1\" for SRT format(Any invalid inputs would default to VTT format): ").strip()) if ifSrt == 1: vtt_to_srt(output_file, os.path.join(output_directory_path, output_file_name + ".srt")) os.remove(output_file) else: print(f"Concatenation completed. Result saved in {output_file}") def adjust_timestamp(timestamp, seconds_to_add): hours, minutes, seconds = timestamp.split(":") seconds, milliseconds = seconds.split(".") total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds) adjusted_seconds = total_seconds + seconds_to_add adjusted_hours, remainder = divmod(adjusted_seconds, 3600) adjusted_minutes, adjusted_seconds = divmod(remainder, 60) return f"{adjusted_hours:02d}:{adjusted_minutes:02d}:{adjusted_seconds:02d}.{milliseconds}" def timestamp_to_milliseconds(timestamp): hours, minutes, seconds = timestamp.split(":") seconds, milliseconds = seconds.split(".") total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds) total_milliseconds = total_seconds * 1000 + int(milliseconds) return total_milliseconds def merge_consecutive_subtitles(file): last_lines = None last_end = None captionCounter = 0 vtt = webvtt.read(file) for caption in vtt: if captionCounter == 0: last_lines = caption.text last_end = caption.end captionCounter += 1 else: if caption.text == last_lines and timestamp_to_milliseconds(caption.start) - timestamp_to_milliseconds(last_end) == 1: vtt[captionCounter - 1].end = caption.end last_end = caption.end del vtt.captions[captionCounter] captionCounter += 1 else: last_lines = caption.text last_end = caption.end captionCounter += 1 vtt.save() def vtt_to_srt(in_file, out_file): with open(in_file, 'r') as f: vtt_content = f.read() # Remove WEBVTT header if present vtt_content = vtt_content.replace('WEBVTT\n\n', '') # Split VTT content into individual captions captions = vtt_content.strip().split('\n\n') srt_content = "" count = 1 # Process each caption for caption in captions: lines = caption.strip().split('\n') # Extract the timestamp timestamp = lines[0].strip() # Extract the caption text text = ' '.join(lines[1:]) # Replace VTT timestamp format with SRT format timestamp = timestamp.replace('.', ',') # Add the caption number srt_content += str(count) + '\n' # Add the timestamp srt_content += timestamp + '\n' # Add the caption text srt_content += text + '\n\n' count += 1 # Write the converted SRT content to the output file with open(out_file, 'w') as f: f.write(srt_content) print(f"Concatenation completed. Result saved in {out_file}") # Usage example timescale = 8 # Time scale in seconds concatenate_subtitles(timescale)
Editor is loading...