import os import re def convert_vtt_to_srt(vtt_file_path, srt_file_path): with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file: vtt_content = vtt_file.read() # Remove WEBVTT header and convert timestamps to SRT format srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content) srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content) # Split lines srt_lines = srt_content.strip().split('\n') # Create list to store modified lines modified_srt_lines = [] # Initialize a counter for the subtitle index subtitle_index = 1 # Process lines for line in srt_lines: if re.match(r'\d+', line): # If the line starts with a number, it's a timestamp line modified_srt_lines.append(f"{subtitle_index}\n{line}") subtitle_index += 1 elif line.strip(): # If the line doesn't start with a number and is not empty, it's a subtitle text line modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split()) # Join modified lines to form the final content numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n' with open(srt_file_path, 'w', encoding='utf-8') as srt_file: srt_file.write(numbered_srt_content) def batch_convert_vtt_to_srt(directory_path): for filename in os.listdir(directory_path): if filename.endswith(".vtt"): vtt_file_path = os.path.join(directory_path, filename) srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt") convert_vtt_to_srt(vtt_file_path, srt_file_path) print(f"Converted: {vtt_file_path} -> {srt_file_path}") # Specify the directory directory_path = r"D:\Work_Tools\chinaminjian\p1" batch_convert_vtt_to_srt(directory_path)
p2 无空格
import os import re def convert_vtt_to_srt(vtt_file_path, srt_file_path): with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file: vtt_content = vtt_file.read() # Remove WEBVTT header and convert timestamps to SRT format srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content) srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content) # Split lines srt_lines = srt_content.strip().split('\n') # Create list to store modified lines modified_srt_lines = [] # Initialize a counter for the subtitle index subtitle_index = 1 # Process lines for line in srt_lines: if re.match(r'\d+', line): # If the line starts with a number, it's a timestamp line modified_srt_lines.append(f"{subtitle_index}\n{line}") subtitle_index += 1 elif line.strip(): # If the line doesn't start with a number and is not empty, it's a subtitle text line modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split()) # Replace spaces in the subtitle text modified_srt_lines = [line.replace(' ', '') for line in modified_srt_lines] # Join modified lines to form the final content numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n' with open(srt_file_path, 'w', encoding='utf-8') as srt_file: srt_file.write(numbered_srt_content) def batch_convert_vtt_to_srt(directory_path): for filename in os.listdir(directory_path): if filename.endswith(".vtt"): vtt_file_path = os.path.join(directory_path, filename) srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt") convert_vtt_to_srt(vtt_file_path, srt_file_path) print(f"Converted: {vtt_file_path} -> {srt_file_path}") # Specify the directory directory_path = "your_directory_path" batch_convert_vtt_to_srt(directory_path)
pip install googletrans==4.0.0-rc1
import os import time from googletrans import Translator def translate_srt(input_file, output_file, retry_count=3): with open(input_file, 'r', encoding='utf-8') as file: content = file.read() for attempt in range(retry_count): try: translator = Translator() source_language = translator.detect(content).lang if content else 'en' # Set the source language to 'en' (English) if the content is empty if source_language != 'zh-CN': # If the detected language is not Chinese, specify it explicitly translator = Translator(src=source_language, dest='en') translated_content = translator.translate(content, dest='en').text break # Break out of the loop if translation is successful except Exception as e: print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}") time.sleep(5) # Add a delay before retrying else: # If all attempts fail, keep the original content translated_content = content print(f"Translation failed for file {input_file} after {retry_count} attempts.") with open(output_file, 'w', encoding='utf-8') as file: file.write(translated_content) def translate_and_create_ensrt_directory(directory_path): ensrt_directory = os.path.join(directory_path, 'ensrt') if not os.path.exists(ensrt_directory): os.makedirs(ensrt_directory) for filename in os.listdir(directory_path): if filename.endswith('.srt'): input_file_path = os.path.join(directory_path, filename) output_file_path = os.path.join(ensrt_directory, f'en_{filename}') translate_srt(input_file_path, output_file_path) if __name__ == "__main__": target_directory = r"D:\Work_Tools\民间故事" translate_and_create_ensrt_directory(target_directory) print("翻译并创建ensrt目录完成!")
final
import os import time from googletrans import Translator def translate_srt(input_file, output_file, retry_count=3): with open(input_file, 'r', encoding='utf-8') as file: content = file.read() for attempt in range(retry_count): try: translator = Translator() source_language = translator.detect(content).lang if content else 'en' # Set the source language to 'en' (English) if the content is empty if source_language != 'zh-CN': # If the detected language is not Chinese, specify it explicitly translator = Translator(src=source_language, dest='en') translated_content = translator.translate(content, dest='en').text break # Break out of the loop if translation is successful except Exception as e: print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}") time.sleep(5) # Add a delay before retrying else: # If all attempts fail, return None print(f"Translation failed for file {input_file} after {retry_count} attempts.") return None with open(output_file, 'w', encoding='utf-8') as file: file.write(translated_content) return output_file def translate_and_create_ensrt_directory(directory_path): ensrt_directory = os.path.join(directory_path, 'ensrt') if not os.path.exists(ensrt_directory): os.makedirs(ensrt_directory) for filename in os.listdir(directory_path): if filename.endswith('.srt'): input_file_path = os.path.join(directory_path, filename) output_file_path = os.path.join(ensrt_directory, f'en_{filename}') translated_file = translate_srt(input_file_path, output_file_path) if translated_file: print(f"Translation successful for file {input_file_path}.") else: print(f"Translation failed for file {input_file_path}. File not added to 'ensrt' directory.") if __name__ == "__main__": target_directory = r"D:\Work_Tools\民间故事" translate_and_create_ensrt_directory(target_directory) print("翻译并创建ensrt目录完成!")