Easy subtitle translation using google api
Table of contents
Breaking Language Barriers: Translating Subtitles with Google API
Sometimes ago, I wanted to watch a movie with my mother, but she doesn't have the best English. I thought of machine translation and decided to use Google Translate to translate the subtitles into her preferred language. Here's how I did it.
Setting Up Google Translate API
To use the Google Translate API, you need to set up a Google Cloud project and authenticate your requests. Follow the instructions here.
The Script
Here's a simple Python script that uses the Google Translate API to translate subtitles.
from google.cloud import translate_v2 as translate
import re
def parse_srt_to_array(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
blocks = content.split('\n\n')
subtitles = []
for block in blocks:
lines = block.split('\n')
if len(lines) >= 3:
index = lines[0]
time_range = lines[1]
text = '\n'.join(lines[2:])
subtitles.append({'index': index, 'time_range': time_range, 'text': text})
return subtitles
def parse_subtitle_block(block):
index = int(block[0])
time_range = block[1]
text_lines = block[2:]
text = " ".join(text_lines)
return {'index': index, 'time_range': time_range, 'text': text}
def translate_text_with_google(text, target_language, source_language):
translate_client = translate.Client()
# The API automatically handles HTML tags if you pass the format as 'html'
result = translate_client.translate(text, source_language=source_language, target_language=target_language,
format_='html')
return result['translatedText']
def reverse_text(text):
text = clean_html_tags(text)
# Reverse each line in the subtitle text separately
return '\n'.join(line[::-1] for line in text.split('\n'))
def clean_html_tags(text):
# Remove HTML-like tags
clean_text = re.sub(r'<[^>]*>', '', text)
return clean_text
translated = []
def write_translated_subtitles(subtitles, output_path, target_language, source_language):
with open(output_path, 'w', encoding='utf-8') as file:
for subtitle in subtitles:
translated_text = translate_text_with_google(subtitle['text'], target_language, source_language)
print(translated_text)
translated.append(translated_text)
file.write(f"{subtitle['index']}\n")
file.write(f"{subtitle['time_range']}\n")
file.write(f"{translated_text}\n\n")
# Example usage
input_srt_path = './sub/input.srt' # Update this path
output_srt_path = './sub/output.srt' # Update this path
# Parse the .srt file to an array of subtitle blocks
subtitles = parse_srt_to_array(input_srt_path)
# Translate the subtitles (mock translation in this case) and write to a new file
write_translated_subtitles(subtitles, output_srt_path, target_language='fa', source_language='en')