diff --git a/bulkTextTranslator.py b/bulkTextTranslator.py new file mode 100644 index 0000000..fe4bad9 --- /dev/null +++ b/bulkTextTranslator.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import os +import argparse +import time +from googletrans import Translator +from chardet.universaldetector import UniversalDetector +import json + +parser = argparse.ArgumentParser(description="recursive translation of txt files", allow_abbrev=False) +parser.add_argument("--directory", "-d", action="store", help="directory to scan") +args = parser.parse_args() + +textfiles = [] +for path in os.walk(args.directory): + if len(path[1]) > 0: + for file in path[2]: + fullpath = os.path.join(path[0], file) + split = os.path.splitext(file) + englishFilePath = os.path.join(path[0], split[0] + '_eng.txt') + if os.path.exists(englishFilePath): + if os.path.getsize(englishFilePath) == 0: + print(os.path.getsize(englishFilePath)) + print(f"{englishFilePath} is zero size, removing") + os.remove(englishFilePath) + if split[0].endswith('_eng'): + print(f"{path[0]} already has translation") + continue + if split[1] == '.txt': + textfiles.append(os.path.join(path[0], file)) + + +detector = UniversalDetector() +for filename in textfiles: + with open(filename, 'rb') as f: + lines = f.readlines() + detector.reset() + for line in lines: + detector.feed(line) + if detector.done: + break + detector.close() + + try: + with open(filename, 'r', encoding=detector.result['encoding']) as contents: + filecontents = contents.read() + except UnicodeDecodeError: + print(f"can't decode {filename}") + continue + + print(filename) + translator = Translator() + try: + translation = translator.translate(filecontents, dest='en') + except json.JSONDecodeError: + print("jsonerror") + continue + + engfile = os.path.join(os.path.splitext(filename)[0] + '_eng.txt') + + with open(engfile, 'w', encoding="utf8") as f: + f.write(translation.text) + + time.sleep(5)