sourcemod/tools/language_check/compare_translation_phrases.py
Kyle Sanderson 48150e0c7a
Bring languages into the tree (#1625)
* translations: bring languages into tree

* Update translation phrases changed since 2021

* Update packaging script to include all translations

* Update languages.cfg

* Add Latin American Spanish translations

This is a copy of spanish for now.

* Ignore "en" when looking for translation folders

English is the default and doesn't use a subfolder.

* Only add each translation folder once

Korean "ko" is in there twice.

* Compare language coverage to english

All phrases are compared to the english baseline files and any differences
are reported. The differences are pushed to a Github Project as well for
an easier overview.

Thank you to @nosoop for sharing the Python SMC parser!

* Add link to README

---------

Co-authored-by: Peace-Maker <peace-maker@wcfan.de>
2023-03-29 16:23:05 +02:00

284 lines
11 KiB
Python

#!/usr/bin/python3
# Copyright (c) 2023 Peace-Maker
from collections import defaultdict
from dataclasses import dataclass
import os
import pathlib
import re
from smc_parser import smc_string_to_dict
from typing import Dict, List, Union
from github_gql import GithubGQL
@dataclass
class Translation:
langid: str
translation: str
param_count: int
@dataclass
class Phrase:
key: str
format: Union[Translation, None]
translations: List[Translation]
@dataclass
class PhraseFile:
filename: str
phrases: List[Phrase]
error: Union[str, None] = None
@dataclass
class Language:
langid: str
name: str
files: List[PhraseFile]
@dataclass
class Report:
langid: str
filename: str
file_warning: str = ''
phrase_key: str = ''
phrase_warning: str = ''
def parse_translations(path: str):
param_regex = re.compile(r'\{[0-9]+\}', re.MULTILINE)
units = []
for file in pathlib.Path(path).glob('*.txt'):
if not file.is_file():
continue
try:
phrases = smc_string_to_dict(file.read_text('utf-8'))
except Exception as ex:
print(f'Error parsing {file.name}: {ex}')
units.append(PhraseFile(file.name, [], str(ex)))
continue
if 'Phrases' not in phrases:
print(f'File {file.name} does not start with a "Phrases" section')
continue
parsed_phrases = []
for phrase in phrases['Phrases']:
for phrase_ident, raw_translations in phrase.items():
translations = []
format_special = None
for child_langid, translation in raw_translations.items():
if child_langid == '#format':
format_special = Translation(
child_langid, translation,
translation.count(',') + 1)
else:
translations.append(
Translation(child_langid, translation,
len(param_regex.findall(translation))))
parsed_phrases.append(
Phrase(phrase_ident, format_special, translations))
units.append(PhraseFile(file.name, parsed_phrases))
return units
# Parse the languages.cfg file to know which languages could be available
print('Parsing languages.cfg...')
available_languages: Dict[str, Language] = {}
languages_cfg = smc_string_to_dict(
pathlib.Path('../../configs/languages.cfg').read_text('utf-8'))
for langid, lang in languages_cfg['Languages'][0].items():
available_languages[langid] = Language(langid, lang, [])
print(f'Available languages: {len(available_languages)}')
# Parse the english translation, since it doesn't use a subdirectory and is the baseline for all other translations
available_languages['en'].files = parse_translations('../../translations')
# Parse the other translations
for langid, lang in available_languages.items():
if langid == 'en':
continue
lang.files = parse_translations(f'../../translations/{langid}')
reports: Dict[str, Dict[str,
List[Report]]] = defaultdict(lambda: defaultdict(list))
# Compare the english translation with the other translations
english = available_languages['en']
for langid, lang in available_languages.items():
if langid == 'en':
continue
# See if this language has anything that English doesn't
for file in lang.files:
english_file = next(
(x for x in english.files if x.filename == file.filename), None)
if english_file is None:
reports[langid][file.filename].append(
Report(langid,
file.filename,
file_warning='File doesn\'t exist in English'))
continue
if not file.phrases:
reports[langid][file.filename].append(
Report(langid, file.filename, file_warning='File is empty'))
continue
for phrase in file.phrases:
if phrase.format:
reports[langid][file.filename].append(
Report(langid,
file.filename,
phrase_key=phrase.key,
phrase_warning='Includes a "#format" key'))
english_phrase = next(
(x for x in english_file.phrases if x.key == phrase.key), None)
if english_phrase is None:
# look for this phrase in a different english file
warning = 'Phrase doesn\'t exist in English'
for other_file in english.files:
other_phrase = next(
(x for x in other_file.phrases if x.key == phrase.key),
None)
if other_phrase:
warning = f'Phrase exists in a different file in English: {other_file.filename}'
break
reports[langid][file.filename].append(
Report(langid,
file.filename,
phrase_key=phrase.key,
phrase_warning=warning))
continue
translation_found = False
for translation in phrase.translations:
if translation.langid == langid:
translation_found = True
else:
reports[langid][file.filename].append(
Report(
langid,
file.filename,
phrase_key=phrase.key,
phrase_warning=
f'Includes a translation for language "{translation.langid}"'
))
if english_phrase.format and translation.param_count != english_phrase.format.param_count:
reports[langid][file.filename].append(
Report(
langid,
file.filename,
phrase_key=phrase.key,
phrase_warning=
f'Has {translation.param_count} format parameters, but English has {english_phrase.format.param_count}'
))
if not translation_found:
reports[langid][file.filename].append(
Report(langid,
file.filename,
phrase_key=phrase.key,
phrase_warning=
'Phrase available, but translation missing'))
# See if this language is missing anything that English has
for file in english.files:
lang_file = next(
(x for x in lang.files if x.filename == file.filename), None)
if lang_file is None:
reports[langid][file.filename].append(
Report(langid, file.filename, file_warning='File missing'))
continue
# The file doesn't contain any phrases. We reported that already, so don't spam every single missing phrase
if not lang_file.phrases:
continue
for phrase in file.phrases:
lang_phrase = next(
(x for x in lang_file.phrases if x.key == phrase.key), None)
if lang_phrase is None:
reports[langid][file.filename].append(
Report(langid,
file.filename,
phrase_key=phrase.key,
phrase_warning='Phrase missing'))
if langid not in reports:
print(f'No issues found for {lang.name} ({langid})')
else:
print(
f'Found {len(reports[langid])} issues for {lang.name} ({langid})')
GITHUB_TOKEN = os.environ.get('GITHUB_TOKEN')
if not GITHUB_TOKEN:
raise Exception('GITHUB_TOKEN environment variable not set')
ORGANIZATION = os.environ.get('ORGANIZATION')
if not ORGANIZATION:
raise Exception('ORGANIZATION environment variable not set')
PROJECT_NUMBER = os.environ.get('PROJECT_NUMBER')
if not PROJECT_NUMBER:
raise Exception('PROJECT_NUMBER environment variable not set')
# Get the project and its draft issues
print('Getting project and draft issues...')
githubgql = GithubGQL(GITHUB_TOKEN)
project = githubgql.get_project(ORGANIZATION, int(PROJECT_NUMBER))
project_id = project['id']
field_ids = project['fields']['nodes']
status_field = [field for field in field_ids if field['name'] == 'Status']
assert len(status_field) == 1, 'Status field not found'
status_field_id = status_field[0]['id']
status_field_option_ids = {
option['name']: option['id']
for option in status_field[0]['options']
}
if 'Incomplete' not in status_field_option_ids:
raise Exception('Incomplete status field option not found')
if 'Complete' not in status_field_option_ids:
raise Exception('Complete status field option not found')
draft_issues = project['items']['nodes']
# Generate the report markdown for the project draft issues
for langid, lang in available_languages.items():
markdown = ''
status = ''
if langid in reports:
print(f'Generating report for {lang.name} ({langid})...')
status = 'Incomplete'
for filename, problems in reports[langid].items():
markdown += f'## [{filename}](https://github.com/alliedmodders/sourcemod/blob/master/translations/{langid}/{filename})\n'
added_phrase_warning = False
for report in problems:
if report.file_warning:
markdown += f'**{report.file_warning}**\n'
print(f' {report.file_warning} ({report.filename})')
if report.phrase_warning:
if not added_phrase_warning:
markdown += '| Phrase | Issue |\n| ------- | --------- |\n'
added_phrase_warning = True
markdown += f'| `{report.phrase_key}` | {report.phrase_warning} |\n'
print(
f' {report.filename}: "{report.phrase_key}" -> {report.phrase_warning}'
)
markdown += '\n'
else:
status = 'Complete'
markdown = 'No issues found'
print(f'Updating draft issue for {lang.name} ({langid})...')
issue = next(
(x for x in draft_issues if x['content']['title'] == lang.name), None)
if issue is None:
issue = githubgql.add_draft_issue(project_id, lang.name, markdown)
else:
githubgql.update_draft_issue(issue['content']['id'], lang.name,
markdown)
githubgql.update_item_field_value_option(project_id, issue['id'],
status_field_id,
status_field_option_ids[status])