* translations: bring languages into tree * Update translation phrases changed since 2021 * Update packaging script to include all translations * Update languages.cfg * Add Latin American Spanish translations This is a copy of spanish for now. * Ignore "en" when looking for translation folders English is the default and doesn't use a subfolder. * Only add each translation folder once Korean "ko" is in there twice. * Compare language coverage to english All phrases are compared to the english baseline files and any differences are reported. The differences are pushed to a Github Project as well for an easier overview. Thank you to @nosoop for sharing the Python SMC parser! * Add link to README --------- Co-authored-by: Peace-Maker <peace-maker@wcfan.de>
		
			
				
	
	
		
			204 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			204 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/python3
 | |
| 
 | |
| # BSD Zero Clause License
 | |
| # 
 | |
| # Copyright (C) 2023 by nosoop
 | |
| # 
 | |
| # Permission to use, copy, modify, and/or distribute this software for any purpose with or
 | |
| # without fee is hereby granted.
 | |
| # 
 | |
| # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
 | |
| # SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
 | |
| # THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY
 | |
| # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
 | |
| # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
 | |
| # OR PERFORMANCE OF THIS SOFTWARE.
 | |
| 
 | |
| # https://gist.github.com/nosoop/8c6ccaec11b1d33340bec8dbc8096658
 | |
| import collections
 | |
| import enum
 | |
| import itertools
 | |
| 
 | |
| 
 | |
| class SMCOperation(enum.Enum):
 | |
|     STRING = 1
 | |
|     SUBSECTION_START = 2
 | |
|     SUBSECTION_END = 3
 | |
|     COMMENT = 4
 | |
|     COMMENT_MULTILINE = 5
 | |
|     KEYVALUE = 6
 | |
| 
 | |
| 
 | |
| # https://stackoverflow.com/a/70762559
 | |
| def takewhile_inclusive(predicate, it):
 | |
|     for x in it:
 | |
|         if predicate(x):
 | |
|             yield x
 | |
|         else:
 | |
|             yield x
 | |
|             break
 | |
| 
 | |
| 
 | |
| def _is_whitespace(ch):
 | |
|     return ch in (' ', '\t', '\n', '\r')
 | |
| 
 | |
| 
 | |
| def _smc_stream_skip_whitespace(stream):
 | |
|     # consumes whitespace and returns the first non-whitespace character if any, or None if EOS
 | |
|     values = tuple(takewhile_inclusive(_is_whitespace, stream))
 | |
|     if not values:
 | |
|         return None
 | |
|     *ws, last = values
 | |
|     if not ws and not _is_whitespace(last):
 | |
|         return last
 | |
|     return last if ws and not _is_whitespace(last) else None
 | |
| 
 | |
| 
 | |
| def _smc_stream_extract_multiline_comment(stream):
 | |
|     while True:
 | |
|         yield from itertools.takewhile(lambda ch: ch != '*', stream)
 | |
|         ch = next(stream, None)
 | |
|         if ch == '/':
 | |
|             return
 | |
|         yield '*'
 | |
|         yield ch
 | |
| 
 | |
| 
 | |
| _escape_mapping = str.maketrans({
 | |
|     '"': '"',
 | |
|     'n': '\n',
 | |
|     'r': '\r',
 | |
|     't': '\t',
 | |
|     '\\': '\\',
 | |
| })
 | |
| 
 | |
| 
 | |
| def _smc_stream_extract_string(stream):
 | |
|     for ch in stream:
 | |
|         if ch == "\\":
 | |
|             ch = next(stream).translate(_escape_mapping)
 | |
|         elif ch == '"':
 | |
|             return
 | |
|         yield ch
 | |
| 
 | |
| 
 | |
| def parse_smc_string(data):
 | |
|     stream = iter(data)
 | |
|     while True:
 | |
|         ch = _smc_stream_skip_whitespace(stream)
 | |
|         if ch is None:
 | |
|             return
 | |
|         elif ch == '"':
 | |
|             # consume until the next quote, then determine if:
 | |
|             # - the string marks the subsection name '{'
 | |
|             # - we have another string to consume, making this a key / value pair
 | |
|             key = ''.join(_smc_stream_extract_string(stream))
 | |
| 
 | |
|             ch = _smc_stream_skip_whitespace(stream)
 | |
|             if ch == '{':
 | |
|                 yield SMCOperation.SUBSECTION_START, key
 | |
|             elif ch == '"':
 | |
|                 value = ''.join(_smc_stream_extract_string(stream))
 | |
|                 yield SMCOperation.KEYVALUE, key, value
 | |
|             else:
 | |
|                 raise ValueError(
 | |
|                     f"Unexpected character {ch.encode('ascii', 'backslashreplace')} after end of string"
 | |
|                 )
 | |
|         elif ch == '}':
 | |
|             yield SMCOperation.SUBSECTION_END, None
 | |
|         elif ch == '/':
 | |
|             ch = next(stream)
 | |
|             if ch == '/':
 | |
|                 # single line comment: consume until the end of the line
 | |
|                 value = ''.join(
 | |
|                     itertools.takewhile(lambda ch: ch != '\n', stream))
 | |
|                 yield SMCOperation.COMMENT, value
 | |
|             elif ch == '*':
 | |
|                 # multi line comment: consume until the sequence '*/' is reached
 | |
|                 value = ''.join(_smc_stream_extract_multiline_comment(stream))
 | |
|                 yield SMCOperation.COMMENT_MULTILINE, value
 | |
|             else:
 | |
|                 raise ValueError(
 | |
|                     f"Unexpected character {ch.encode('ascii', 'backslashreplace')} at start of comment"
 | |
|                 )
 | |
|         else:
 | |
|             raise ValueError(
 | |
|                 f"Unexpected character {ch.encode('ascii', 'backslashreplace')}"
 | |
|             )
 | |
| 
 | |
| 
 | |
| class MultiKeyDict(collections.defaultdict):
 | |
|     # a dict that supports supports one-to-many mappings
 | |
|     # init by passing keys pointing to a list of values
 | |
|     def __init__(self, *args, **kwargs):
 | |
|         super().__init__(list, *args, **kwargs)
 | |
| 
 | |
|     # yields a key, value pair for every array item associated with a key
 | |
|     def items(self):
 | |
|         yield from ((k, iv) for k, v in super().items() for iv in v)
 | |
| 
 | |
| 
 | |
| def smc_string_to_dict(data):
 | |
|     # returns a multidict instance
 | |
|     root_node = MultiKeyDict()
 | |
|     contexts = [root_node]
 | |
|     for event, *info in parse_smc_string(data):
 | |
|         if event == SMCOperation.SUBSECTION_START:
 | |
|             key, *_ = info
 | |
|             subkey = MultiKeyDict()
 | |
|             contexts[-1][key].append(subkey)
 | |
|             contexts.append(subkey)
 | |
|         elif event == SMCOperation.SUBSECTION_END:
 | |
|             contexts.pop()
 | |
|         elif event == SMCOperation.KEYVALUE:
 | |
|             key, value = info
 | |
|             contexts[-1][key].append(value)
 | |
|     return root_node
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     SMC_STRING = """
 | |
| 	"thing"
 | |
| 	{
 | |
| 		// this is a comment node
 | |
| 		"key"	"value"
 | |
| 		
 | |
| 		"subthing"
 | |
| 		{
 | |
| 			// and another
 | |
| 			"subthing key"		"subthing value"
 | |
| 			"subthing key"		"duplicate key value"
 | |
| 		}
 | |
| 		"subthing"
 | |
| 		{
 | |
| 			"duplicate subthing" "yes"
 | |
| 		}
 | |
| 		
 | |
| 		/**
 | |
| 		 * this is a multiline comment node
 | |
| 		 */
 | |
| 		"another key"	"another value"
 | |
| 	}
 | |
| 	"""
 | |
| 
 | |
|     # sections = []
 | |
|     # for event, *data in parse_smc_string(SMC_STRING):
 | |
|     # 	print(event, data, tuple(sections))
 | |
|     # 	if event == SMCOperation.SUBSECTION_START:
 | |
|     # 		section, *_ = data
 | |
|     # 		sections.append(section)
 | |
|     # 	elif event == SMCOperation.SUBSECTION_END:
 | |
|     # 		sections.pop()
 | |
|     # assert(not sections)
 | |
| 
 | |
|     import json
 | |
|     import pathlib
 | |
|     # print(json.dumps(smc_string_to_dict(SMC_STRING), indent=4))
 | |
|     for f in pathlib.Path('translations').rglob('*.txt'):
 | |
|         print(f)
 | |
|         print(json.dumps(smc_string_to_dict(f.read_text('utf8')), indent = 4))
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |