vtt-concat.py
unknown
python
2 years ago
5.3 kB
8
Indexable
import os
import webvtt
directory = input("Enter the directory path where your subtitle segments are located: ").strip()
output_directory_path = input("Enter the output directory path for the consolidated subtitle: ").strip()
output_file_name = input("Enter the output file name for the consolidated subtitle (without extension): ").strip()
def count_vtt_files():
vtt_files = [file for file in os.listdir(directory) if file.endswith(".vtt")]
return len(vtt_files)
def concatenate_subtitles(timescale):
segment_count = count_vtt_files()
#output_file = "concatenated.vtt" # Output file name
output_file = os.path.join(output_directory_path, output_file_name + ".vtt")
output_lines = [] # List to store concatenated lines
for i in range(segment_count):
filename = os.path.join(directory, f"{i:04d}.vtt") # Format the filename with leading zeros
if not os.path.exists(filename):
continue
with open(filename, "r") as file:
lines = file.readlines()
if i > 0:
# Add certain seconds to each line except for the first segment
seconds_to_add = i * timescale
for j in range(len(lines)):
if "-->" in lines[j]: # Check if it's a time line
start_time, end_time = lines[j].strip().split(" --> ")
start_time = adjust_timestamp(start_time, seconds_to_add)
end_time = adjust_timestamp(end_time, seconds_to_add)
lines[j] = f"{start_time} --> {end_time}\n"
if len(output_lines) > 0 and len(lines) > 0:
output_lines[-1] = output_lines[-1].strip() # Remove trailing newline from the last line of the previous segment
if i == 0:
output_lines.append(lines[0]) # Add "WEBVTT" line from the first segment
output_lines.extend(lines[1:]) # Append the rest of the lines
with open(output_file, "w", encoding='utf-8') as file:
file.writelines(output_lines)
merge_consecutive_subtitles(output_file)
with open(output_file, "a") as file:
file.write("\n\n")
ifSrt = int(input("Enter \"0\" for VTT format or \"1\" for SRT format(Any invalid inputs would default to VTT format): ").strip())
if ifSrt == 1:
vtt_to_srt(output_file, os.path.join(output_directory_path, output_file_name + ".srt"))
os.remove(output_file)
else:
print(f"Concatenation completed. Result saved in {output_file}")
def adjust_timestamp(timestamp, seconds_to_add):
hours, minutes, seconds = timestamp.split(":")
seconds, milliseconds = seconds.split(".")
total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds)
adjusted_seconds = total_seconds + seconds_to_add
adjusted_hours, remainder = divmod(adjusted_seconds, 3600)
adjusted_minutes, adjusted_seconds = divmod(remainder, 60)
return f"{adjusted_hours:02d}:{adjusted_minutes:02d}:{adjusted_seconds:02d}.{milliseconds}"
def timestamp_to_milliseconds(timestamp):
hours, minutes, seconds = timestamp.split(":")
seconds, milliseconds = seconds.split(".")
total_seconds = int(hours) * 3600 + int(minutes) * 60 + int(seconds)
total_milliseconds = total_seconds * 1000 + int(milliseconds)
return total_milliseconds
def merge_consecutive_subtitles(file):
last_lines = None
last_end = None
captionCounter = 0
vtt = webvtt.read(file)
for caption in vtt:
if captionCounter == 0:
last_lines = caption.text
last_end = caption.end
captionCounter += 1
else:
if caption.text == last_lines and timestamp_to_milliseconds(caption.start) - timestamp_to_milliseconds(last_end) == 1:
vtt[captionCounter - 1].end = caption.end
last_end = caption.end
del vtt.captions[captionCounter]
captionCounter += 1
else:
last_lines = caption.text
last_end = caption.end
captionCounter += 1
vtt.save()
def vtt_to_srt(in_file, out_file):
with open(in_file, 'r') as f:
vtt_content = f.read()
# Remove WEBVTT header if present
vtt_content = vtt_content.replace('WEBVTT\n\n', '')
# Split VTT content into individual captions
captions = vtt_content.strip().split('\n\n')
srt_content = ""
count = 1
# Process each caption
for caption in captions:
lines = caption.strip().split('\n')
# Extract the timestamp
timestamp = lines[0].strip()
# Extract the caption text
text = ' '.join(lines[1:])
# Replace VTT timestamp format with SRT format
timestamp = timestamp.replace('.', ',')
# Add the caption number
srt_content += str(count) + '\n'
# Add the timestamp
srt_content += timestamp + '\n'
# Add the caption text
srt_content += text + '\n\n'
count += 1
# Write the converted SRT content to the output file
with open(out_file, 'w') as f:
f.write(srt_content)
print(f"Concatenation completed. Result saved in {out_file}")
# Usage example
timescale = 8 # Time scale in seconds
concatenate_subtitles(timescale)
Editor is loading...