48150e0c7a
* translations: bring languages into tree * Update translation phrases changed since 2021 * Update packaging script to include all translations * Update languages.cfg * Add Latin American Spanish translations This is a copy of spanish for now. * Ignore "en" when looking for translation folders English is the default and doesn't use a subfolder. * Only add each translation folder once Korean "ko" is in there twice. * Compare language coverage to english All phrases are compared to the english baseline files and any differences are reported. The differences are pushed to a Github Project as well for an easier overview. Thank you to @nosoop for sharing the Python SMC parser! * Add link to README --------- Co-authored-by: Peace-Maker <peace-maker@wcfan.de>
204 lines
5.9 KiB
Python
204 lines
5.9 KiB
Python
#!/usr/bin/python3
|
|
|
|
# BSD Zero Clause License
|
|
#
|
|
# Copyright (C) 2023 by nosoop
|
|
#
|
|
# Permission to use, copy, modify, and/or distribute this software for any purpose with or
|
|
# without fee is hereby granted.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
|
# SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
|
# THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
|
|
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
|
|
# CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
|
# OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
# https://gist.github.com/nosoop/8c6ccaec11b1d33340bec8dbc8096658
|
|
import collections
|
|
import enum
|
|
import itertools
|
|
|
|
|
|
class SMCOperation(enum.Enum):
|
|
STRING = 1
|
|
SUBSECTION_START = 2
|
|
SUBSECTION_END = 3
|
|
COMMENT = 4
|
|
COMMENT_MULTILINE = 5
|
|
KEYVALUE = 6
|
|
|
|
|
|
# https://stackoverflow.com/a/70762559
|
|
def takewhile_inclusive(predicate, it):
|
|
for x in it:
|
|
if predicate(x):
|
|
yield x
|
|
else:
|
|
yield x
|
|
break
|
|
|
|
|
|
def _is_whitespace(ch):
|
|
return ch in (' ', '\t', '\n', '\r')
|
|
|
|
|
|
def _smc_stream_skip_whitespace(stream):
|
|
# consumes whitespace and returns the first non-whitespace character if any, or None if EOS
|
|
values = tuple(takewhile_inclusive(_is_whitespace, stream))
|
|
if not values:
|
|
return None
|
|
*ws, last = values
|
|
if not ws and not _is_whitespace(last):
|
|
return last
|
|
return last if ws and not _is_whitespace(last) else None
|
|
|
|
|
|
def _smc_stream_extract_multiline_comment(stream):
|
|
while True:
|
|
yield from itertools.takewhile(lambda ch: ch != '*', stream)
|
|
ch = next(stream, None)
|
|
if ch == '/':
|
|
return
|
|
yield '*'
|
|
yield ch
|
|
|
|
|
|
_escape_mapping = str.maketrans({
|
|
'"': '"',
|
|
'n': '\n',
|
|
'r': '\r',
|
|
't': '\t',
|
|
'\\': '\\',
|
|
})
|
|
|
|
|
|
def _smc_stream_extract_string(stream):
|
|
for ch in stream:
|
|
if ch == "\\":
|
|
ch = next(stream).translate(_escape_mapping)
|
|
elif ch == '"':
|
|
return
|
|
yield ch
|
|
|
|
|
|
def parse_smc_string(data):
|
|
stream = iter(data)
|
|
while True:
|
|
ch = _smc_stream_skip_whitespace(stream)
|
|
if ch is None:
|
|
return
|
|
elif ch == '"':
|
|
# consume until the next quote, then determine if:
|
|
# - the string marks the subsection name '{'
|
|
# - we have another string to consume, making this a key / value pair
|
|
key = ''.join(_smc_stream_extract_string(stream))
|
|
|
|
ch = _smc_stream_skip_whitespace(stream)
|
|
if ch == '{':
|
|
yield SMCOperation.SUBSECTION_START, key
|
|
elif ch == '"':
|
|
value = ''.join(_smc_stream_extract_string(stream))
|
|
yield SMCOperation.KEYVALUE, key, value
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected character {ch.encode('ascii', 'backslashreplace')} after end of string"
|
|
)
|
|
elif ch == '}':
|
|
yield SMCOperation.SUBSECTION_END, None
|
|
elif ch == '/':
|
|
ch = next(stream)
|
|
if ch == '/':
|
|
# single line comment: consume until the end of the line
|
|
value = ''.join(
|
|
itertools.takewhile(lambda ch: ch != '\n', stream))
|
|
yield SMCOperation.COMMENT, value
|
|
elif ch == '*':
|
|
# multi line comment: consume until the sequence '*/' is reached
|
|
value = ''.join(_smc_stream_extract_multiline_comment(stream))
|
|
yield SMCOperation.COMMENT_MULTILINE, value
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected character {ch.encode('ascii', 'backslashreplace')} at start of comment"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected character {ch.encode('ascii', 'backslashreplace')}"
|
|
)
|
|
|
|
|
|
class MultiKeyDict(collections.defaultdict):
|
|
# a dict that supports supports one-to-many mappings
|
|
# init by passing keys pointing to a list of values
|
|
def __init__(self, *args, **kwargs):
|
|
super().__init__(list, *args, **kwargs)
|
|
|
|
# yields a key, value pair for every array item associated with a key
|
|
def items(self):
|
|
yield from ((k, iv) for k, v in super().items() for iv in v)
|
|
|
|
|
|
def smc_string_to_dict(data):
|
|
# returns a multidict instance
|
|
root_node = MultiKeyDict()
|
|
contexts = [root_node]
|
|
for event, *info in parse_smc_string(data):
|
|
if event == SMCOperation.SUBSECTION_START:
|
|
key, *_ = info
|
|
subkey = MultiKeyDict()
|
|
contexts[-1][key].append(subkey)
|
|
contexts.append(subkey)
|
|
elif event == SMCOperation.SUBSECTION_END:
|
|
contexts.pop()
|
|
elif event == SMCOperation.KEYVALUE:
|
|
key, value = info
|
|
contexts[-1][key].append(value)
|
|
return root_node
|
|
|
|
|
|
def main():
|
|
SMC_STRING = """
|
|
"thing"
|
|
{
|
|
// this is a comment node
|
|
"key" "value"
|
|
|
|
"subthing"
|
|
{
|
|
// and another
|
|
"subthing key" "subthing value"
|
|
"subthing key" "duplicate key value"
|
|
}
|
|
"subthing"
|
|
{
|
|
"duplicate subthing" "yes"
|
|
}
|
|
|
|
/**
|
|
* this is a multiline comment node
|
|
*/
|
|
"another key" "another value"
|
|
}
|
|
"""
|
|
|
|
# sections = []
|
|
# for event, *data in parse_smc_string(SMC_STRING):
|
|
# print(event, data, tuple(sections))
|
|
# if event == SMCOperation.SUBSECTION_START:
|
|
# section, *_ = data
|
|
# sections.append(section)
|
|
# elif event == SMCOperation.SUBSECTION_END:
|
|
# sections.pop()
|
|
# assert(not sections)
|
|
|
|
import json
|
|
import pathlib
|
|
# print(json.dumps(smc_string_to_dict(SMC_STRING), indent=4))
|
|
for f in pathlib.Path('translations').rglob('*.txt'):
|
|
print(f)
|
|
print(json.dumps(smc_string_to_dict(f.read_text('utf8')), indent = 4))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|