#!/usr/bin/env python3 import os import argparse import time from googletrans import Translator from chardet.universaldetector import UniversalDetector import json parser = argparse.ArgumentParser(description="recursive translation of txt files", allow_abbrev=False) parser.add_argument("--directory", "-d", action="store", help="directory to scan") args = parser.parse_args() textfiles = [] for path in os.walk(args.directory): if len(path[1]) > 0: for file in path[2]: fullpath = os.path.join(path[0], file) split = os.path.splitext(file) englishFilePath = os.path.join(path[0], split[0] + '_eng.txt') if os.path.exists(englishFilePath): if os.path.getsize(englishFilePath) == 0: print(os.path.getsize(englishFilePath)) print(f"{englishFilePath} is zero size, removing") os.remove(englishFilePath) if split[0].endswith('_eng'): print(f"{path[0]} already has translation") continue if split[1] == '.txt': textfiles.append(os.path.join(path[0], file)) detector = UniversalDetector() for filename in textfiles: with open(filename, 'rb') as f: lines = f.readlines() detector.reset() for line in lines: detector.feed(line) if detector.done: break detector.close() try: with open(filename, 'r', encoding=detector.result['encoding']) as contents: filecontents = contents.read() except UnicodeDecodeError: print(f"can't decode {filename}") continue print(filename) translator = Translator() try: translation = translator.translate(filecontents, dest='en') except json.JSONDecodeError: print("jsonerror") continue engfile = os.path.join(os.path.splitext(filename)[0] + '_eng.txt') with open(engfile, 'w', encoding="utf8") as f: f.write(translation.text) time.sleep(5)