import os
import re
def convert_vtt_to_srt(vtt_file_path, srt_file_path):
with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
vtt_content = vtt_file.read()
# Remove WEBVTT header and convert timestamps to SRT format
srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content)
srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content)
# Split lines
srt_lines = srt_content.strip().split('\n')
# Create list to store modified lines
modified_srt_lines = []
# Initialize a counter for the subtitle index
subtitle_index = 1
# Process lines
for line in srt_lines:
if re.match(r'\d+', line):
# If the line starts with a number, it's a timestamp line
modified_srt_lines.append(f"{subtitle_index}\n{line}")
subtitle_index += 1
elif line.strip():
# If the line doesn't start with a number and is not empty, it's a subtitle text line
modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split())
# Join modified lines to form the final content
numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n'
with open(srt_file_path, 'w', encoding='utf-8') as srt_file:
srt_file.write(numbered_srt_content)
def batch_convert_vtt_to_srt(directory_path):
for filename in os.listdir(directory_path):
if filename.endswith(".vtt"):
vtt_file_path = os.path.join(directory_path, filename)
srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt")
convert_vtt_to_srt(vtt_file_path, srt_file_path)
print(f"Converted: {vtt_file_path} -> {srt_file_path}")
# Specify the directory
directory_path = r"D:\Work_Tools\chinaminjian\p1"
batch_convert_vtt_to_srt(directory_path)
p2 无空格
import os
import re
def convert_vtt_to_srt(vtt_file_path, srt_file_path):
with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
vtt_content = vtt_file.read()
# Remove WEBVTT header and convert timestamps to SRT format
srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content)
srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content)
# Split lines
srt_lines = srt_content.strip().split('\n')
# Create list to store modified lines
modified_srt_lines = []
# Initialize a counter for the subtitle index
subtitle_index = 1
# Process lines
for line in srt_lines:
if re.match(r'\d+', line):
# If the line starts with a number, it's a timestamp line
modified_srt_lines.append(f"{subtitle_index}\n{line}")
subtitle_index += 1
elif line.strip():
# If the line doesn't start with a number and is not empty, it's a subtitle text line
modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split())
# Replace spaces in the subtitle text
modified_srt_lines = [line.replace(' ', '') for line in modified_srt_lines]
# Join modified lines to form the final content
numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n'
with open(srt_file_path, 'w', encoding='utf-8') as srt_file:
srt_file.write(numbered_srt_content)
def batch_convert_vtt_to_srt(directory_path):
for filename in os.listdir(directory_path):
if filename.endswith(".vtt"):
vtt_file_path = os.path.join(directory_path, filename)
srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt")
convert_vtt_to_srt(vtt_file_path, srt_file_path)
print(f"Converted: {vtt_file_path} -> {srt_file_path}")
# Specify the directory
directory_path = "your_directory_path"
batch_convert_vtt_to_srt(directory_path)
pip install googletrans==4.0.0-rc1
import os
import time
from googletrans import Translator
def translate_srt(input_file, output_file, retry_count=3):
with open(input_file, 'r', encoding='utf-8') as file:
content = file.read()
for attempt in range(retry_count):
try:
translator = Translator()
source_language = translator.detect(content).lang if content else 'en'
# Set the source language to 'en' (English) if the content is empty
if source_language != 'zh-CN':
# If the detected language is not Chinese, specify it explicitly
translator = Translator(src=source_language, dest='en')
translated_content = translator.translate(content, dest='en').text
break # Break out of the loop if translation is successful
except Exception as e:
print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}")
time.sleep(5) # Add a delay before retrying
else:
# If all attempts fail, keep the original content
translated_content = content
print(f"Translation failed for file {input_file} after {retry_count} attempts.")
with open(output_file, 'w', encoding='utf-8') as file:
file.write(translated_content)
def translate_and_create_ensrt_directory(directory_path):
ensrt_directory = os.path.join(directory_path, 'ensrt')
if not os.path.exists(ensrt_directory):
os.makedirs(ensrt_directory)
for filename in os.listdir(directory_path):
if filename.endswith('.srt'):
input_file_path = os.path.join(directory_path, filename)
output_file_path = os.path.join(ensrt_directory, f'en_{filename}')
translate_srt(input_file_path, output_file_path)
if __name__ == "__main__":
target_directory = r"D:\Work_Tools\民间故事"
translate_and_create_ensrt_directory(target_directory)
print("翻译并创建ensrt目录完成!")
final
import os
import time
from googletrans import Translator
def translate_srt(input_file, output_file, retry_count=3):
with open(input_file, 'r', encoding='utf-8') as file:
content = file.read()
for attempt in range(retry_count):
try:
translator = Translator()
source_language = translator.detect(content).lang if content else 'en'
# Set the source language to 'en' (English) if the content is empty
if source_language != 'zh-CN':
# If the detected language is not Chinese, specify it explicitly
translator = Translator(src=source_language, dest='en')
translated_content = translator.translate(content, dest='en').text
break # Break out of the loop if translation is successful
except Exception as e:
print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}")
time.sleep(5) # Add a delay before retrying
else:
# If all attempts fail, return None
print(f"Translation failed for file {input_file} after {retry_count} attempts.")
return None
with open(output_file, 'w', encoding='utf-8') as file:
file.write(translated_content)
return output_file
def translate_and_create_ensrt_directory(directory_path):
ensrt_directory = os.path.join(directory_path, 'ensrt')
if not os.path.exists(ensrt_directory):
os.makedirs(ensrt_directory)
for filename in os.listdir(directory_path):
if filename.endswith('.srt'):
input_file_path = os.path.join(directory_path, filename)
output_file_path = os.path.join(ensrt_directory, f'en_{filename}')
translated_file = translate_srt(input_file_path, output_file_path)
if translated_file:
print(f"Translation successful for file {input_file_path}.")
else:
print(f"Translation failed for file {input_file_path}. File not added to 'ensrt' directory.")
if __name__ == "__main__":
target_directory = r"D:\Work_Tools\民间故事"
translate_and_create_ensrt_directory(target_directory)
print("翻译并创建ensrt目录完成!")