bulk-text-translator/bulkTextTranslator.py

#!/usr/bin/env python3
import os
import argparse
import time
from googletrans import Translator
from chardet.universaldetector import UniversalDetector
import json

parser = argparse.ArgumentParser(description="recursive translation of txt files", allow_abbrev=False)
parser.add_argument("--directory", "-d", action="store", help="directory to scan")
args = parser.parse_args()

textfiles = []
for path in os.walk(args.directory):
    if len(path[1]) > 0:
        for file in path[2]:
            fullpath = os.path.join(path[0], file)
            split = os.path.splitext(file)
            englishFilePath = os.path.join(path[0], split[0] + '_eng.txt')
            if os.path.exists(englishFilePath):
                if os.path.getsize(englishFilePath) == 0:
                    print(os.path.getsize(englishFilePath))
                    print(f"{englishFilePath} is zero size, removing")
                    os.remove(englishFilePath)
            if split[0].endswith('_eng'):
                print(f"{path[0]} already has translation")
                continue
            if split[1] == '.txt':
                textfiles.append(os.path.join(path[0], file))


detector = UniversalDetector()
for filename in textfiles:
    with open(filename, 'rb') as f:
        lines = f.readlines()
        detector.reset()
        for line in lines:
            detector.feed(line)
            if detector.done:
                break
        detector.close()

    try:
        with open(filename, 'r', encoding=detector.result['encoding']) as contents:
            filecontents = contents.read()
    except UnicodeDecodeError:
        print(f"can't decode {filename}")
        continue

    print(filename)
    translator = Translator()
    try:
        translation = translator.translate(filecontents, dest='en')
    except json.JSONDecodeError:
        print("jsonerror")
        continue

    engfile = os.path.join(os.path.splitext(filename)[0] + '_eng.txt')

    with open(engfile, 'w', encoding="utf8") as f:
        f.write(translation.text)

    time.sleep(5)