bulk-text-translator/bulkTextTranslator.py

64 lines
2.1 KiB
Python

#!/usr/bin/env python3
import os
import argparse
import time
from googletrans import Translator
from chardet.universaldetector import UniversalDetector
import json
parser = argparse.ArgumentParser(description="recursive translation of txt files", allow_abbrev=False)
parser.add_argument("--directory", "-d", action="store", help="directory to scan")
args = parser.parse_args()
textfiles = []
for path in os.walk(args.directory):
if len(path[1]) > 0:
for file in path[2]:
fullpath = os.path.join(path[0], file)
split = os.path.splitext(file)
englishFilePath = os.path.join(path[0], split[0] + '_eng.txt')
if os.path.exists(englishFilePath):
if os.path.getsize(englishFilePath) == 0:
print(os.path.getsize(englishFilePath))
print(f"{englishFilePath} is zero size, removing")
os.remove(englishFilePath)
if split[0].endswith('_eng'):
print(f"{path[0]} already has translation")
continue
if split[1] == '.txt':
textfiles.append(os.path.join(path[0], file))
detector = UniversalDetector()
for filename in textfiles:
with open(filename, 'rb') as f:
lines = f.readlines()
detector.reset()
for line in lines:
detector.feed(line)
if detector.done:
break
detector.close()
try:
with open(filename, 'r', encoding=detector.result['encoding']) as contents:
filecontents = contents.read()
except UnicodeDecodeError:
print(f"can't decode {filename}")
continue
print(filename)
translator = Translator()
try:
translation = translator.translate(filecontents, dest='en')
except json.JSONDecodeError:
print("jsonerror")
continue
engfile = os.path.join(os.path.splitext(filename)[0] + '_eng.txt')
with open(engfile, 'w', encoding="utf8") as f:
f.write(translation.text)
time.sleep(5)