Share

vtttosrt

import os
import re

def convert_vtt_to_srt(vtt_file_path, srt_file_path):
    with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
        vtt_content = vtt_file.read()

    # Remove WEBVTT header and convert timestamps to SRT format
    srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content)
    srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content)

    # Split lines
    srt_lines = srt_content.strip().split('\n')

    # Create list to store modified lines
    modified_srt_lines = []

    # Initialize a counter for the subtitle index
    subtitle_index = 1

    # Process lines
    for line in srt_lines:
        if re.match(r'\d+', line):
            # If the line starts with a number, it's a timestamp line
            modified_srt_lines.append(f"{subtitle_index}\n{line}")
            subtitle_index += 1
        elif line.strip():
            # If the line doesn't start with a number and is not empty, it's a subtitle text line
            modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split())

    # Join modified lines to form the final content
    numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n'

    with open(srt_file_path, 'w', encoding='utf-8') as srt_file:
        srt_file.write(numbered_srt_content)

def batch_convert_vtt_to_srt(directory_path):
    for filename in os.listdir(directory_path):
        if filename.endswith(".vtt"):
            vtt_file_path = os.path.join(directory_path, filename)
            srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt")
            convert_vtt_to_srt(vtt_file_path, srt_file_path)
            print(f"Converted: {vtt_file_path} -> {srt_file_path}")

# Specify the directory
directory_path = r"D:\Work_Tools\chinaminjian\p1"
batch_convert_vtt_to_srt(directory_path)

p2 无空格

import os
import re

def convert_vtt_to_srt(vtt_file_path, srt_file_path):
    with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
        vtt_content = vtt_file.read()

    # Remove WEBVTT header and convert timestamps to SRT format
    srt_content = re.sub(r"WEBVTT\n\n", "", vtt_content)
    srt_content = re.sub(r"(\d{2}):(\d{2}):(\d{2})\.(\d{3})", r"\1:\2:\3,\4", srt_content)

    # Split lines
    srt_lines = srt_content.strip().split('\n')

    # Create list to store modified lines
    modified_srt_lines = []

    # Initialize a counter for the subtitle index
    subtitle_index = 1

    # Process lines
    for line in srt_lines:
        if re.match(r'\d+', line):
            # If the line starts with a number, it's a timestamp line
            modified_srt_lines.append(f"{subtitle_index}\n{line}")
            subtitle_index += 1
        elif line.strip():
            # If the line doesn't start with a number and is not empty, it's a subtitle text line
            modified_srt_lines[-1] += '\n' + ' '.join(line.strip().split())

    # Replace spaces in the subtitle text
    modified_srt_lines = [line.replace(' ', '') for line in modified_srt_lines]

    # Join modified lines to form the final content
    numbered_srt_content = '\n\n'.join(modified_srt_lines) + '\n\n'

    with open(srt_file_path, 'w', encoding='utf-8') as srt_file:
        srt_file.write(numbered_srt_content)

def batch_convert_vtt_to_srt(directory_path):
    for filename in os.listdir(directory_path):
        if filename.endswith(".vtt"):
            vtt_file_path = os.path.join(directory_path, filename)
            srt_file_path = os.path.join(directory_path, os.path.splitext(filename)[0] + ".srt")
            convert_vtt_to_srt(vtt_file_path, srt_file_path)
            print(f"Converted: {vtt_file_path} -> {srt_file_path}")

# Specify the directory
directory_path = "your_directory_path"
batch_convert_vtt_to_srt(directory_path)
pip install googletrans==4.0.0-rc1
import os
import time
from googletrans import Translator

def translate_srt(input_file, output_file, retry_count=3):
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()

    for attempt in range(retry_count):
        try:
            translator = Translator()
            source_language = translator.detect(content).lang if content else 'en'
            # Set the source language to 'en' (English) if the content is empty

            if source_language != 'zh-CN':
                # If the detected language is not Chinese, specify it explicitly
                translator = Translator(src=source_language, dest='en')
            translated_content = translator.translate(content, dest='en').text
            break  # Break out of the loop if translation is successful
        except Exception as e:
            print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}")
            time.sleep(5)  # Add a delay before retrying

    else:
        # If all attempts fail, keep the original content
        translated_content = content
        print(f"Translation failed for file {input_file} after {retry_count} attempts.")

    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(translated_content)

def translate_and_create_ensrt_directory(directory_path):
    ensrt_directory = os.path.join(directory_path, 'ensrt')

    if not os.path.exists(ensrt_directory):
        os.makedirs(ensrt_directory)

    for filename in os.listdir(directory_path):
        if filename.endswith('.srt'):
            input_file_path = os.path.join(directory_path, filename)
            output_file_path = os.path.join(ensrt_directory, f'en_{filename}')

            translate_srt(input_file_path, output_file_path)

if __name__ == "__main__":
    target_directory = r"D:\Work_Tools\民间故事"
    translate_and_create_ensrt_directory(target_directory)
    print("翻译并创建ensrt目录完成!")

 

final

import os
import time
from googletrans import Translator

def translate_srt(input_file, output_file, retry_count=3):
    with open(input_file, 'r', encoding='utf-8') as file:
        content = file.read()

    for attempt in range(retry_count):
        try:
            translator = Translator()
            source_language = translator.detect(content).lang if content else 'en'
            # Set the source language to 'en' (English) if the content is empty

            if source_language != 'zh-CN':
                # If the detected language is not Chinese, specify it explicitly
                translator = Translator(src=source_language, dest='en')
            translated_content = translator.translate(content, dest='en').text
            break  # Break out of the loop if translation is successful
        except Exception as e:
            print(f"Translation error for file {input_file} (Attempt {attempt + 1}): {str(e)}")
            time.sleep(5)  # Add a delay before retrying

    else:
        # If all attempts fail, return None
        print(f"Translation failed for file {input_file} after {retry_count} attempts.")
        return None

    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(translated_content)

    return output_file

def translate_and_create_ensrt_directory(directory_path):
    ensrt_directory = os.path.join(directory_path, 'ensrt')

    if not os.path.exists(ensrt_directory):
        os.makedirs(ensrt_directory)

    for filename in os.listdir(directory_path):
        if filename.endswith('.srt'):
            input_file_path = os.path.join(directory_path, filename)
            output_file_path = os.path.join(ensrt_directory, f'en_{filename}')

            translated_file = translate_srt(input_file_path, output_file_path)

            if translated_file:
                print(f"Translation successful for file {input_file_path}.")
            else:
                print(f"Translation failed for file {input_file_path}. File not added to 'ensrt' directory.")

if __name__ == "__main__":
    target_directory = r"D:\Work_Tools\民间故事"
    translate_and_create_ensrt_directory(target_directory)
    print("翻译并创建ensrt目录完成!")