From f7349d8e98be3954b5c3dcb3b07d86285311cd6f Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 20:57:50 -0300 Subject: [PATCH 01/13] Create check_langs_in_po.py --- scripts/check_langs_in_po.py | 82 ++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 scripts/check_langs_in_po.py diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py new file mode 100644 index 00000000000..8fdbe2e2620 --- /dev/null +++ b/scripts/check_langs_in_po.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python3 +import argparse +import re +import polib +from pathlib import Path + +# Russian letters (Cyrillic Unicode block) +RUSSIAN_PATTERN = r'[\u0400-\u04FF]' + +# Specific Polish letters +POLISH_PATTERN = r'[ĄĆĘŁŃÓŚŹŻąćęłńóśźż]' + + +def build_pattern(check_russian=True, check_polish=True): + """Build the combined regex based on selected languages.""" + parts = [] + if check_russian: + parts.append(RUSSIAN_PATTERN) + if check_polish: + parts.append(POLISH_PATTERN) + if not parts: + return None + return re.compile("|".join(parts)) + + +def find_matches_in_po(po_path, regex): + po = polib.pofile(po_path) + matches = [] + for entry in po: + texts = [entry.msgid, entry.msgstr] + if entry.msgid_plural: + texts.extend(entry.msgid_plural.values()) + + for text in texts: + if text and regex.search(text): + matches.append((po_path, entry.linenum, text)) + break # avoid multiple reports for the same entry + return matches + + +def main(): + parser = argparse.ArgumentParser( + description="Search for Russian and/or Polish patterns in PO files." + ) + parser.add_argument( + "paths", + nargs="+", + help="One or more PO files or directories to search" + ) + parser.add_argument( + "--no-russian", + action="store_true", + help="Disable Russian text detection" + ) + parser.add_argument( + "--no-polish", + action="store_true", + help="Disable Polish text detection" + ) + args = parser.parse_args() + + regex = build_pattern(not args.no_russian, not args.no_polish) + if regex is None: + parser.error("All checks are disabled. Enable at least one language.") + + po_files = [] + for arg in args.paths: + p = Path(arg) + if p.is_dir(): + po_files.extend(p.rglob("*.po")) + elif p.is_file(): + po_files.append(p) + else: + print(f"Warning: {p} not found.") + + for path in po_files: + for po_path, linenum, text in find_matches_in_po(path, regex): + print(f"{po_path}:{linenum}: {text}") + + +if __name__ == "__main__": + main() From 9b03e625b8137088d53e4835de26dc69f859d33f Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 21:58:19 -0300 Subject: [PATCH 02/13] Add Ukrainian, fix lint issues, reorganize patterns parsing --- scripts/check_langs_in_po.py | 97 ++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 44 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 8fdbe2e2620..f6d4e572a10 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -1,80 +1,89 @@ #!/usr/bin/env python3 +""" +Check .po files for presence of specific language patterns in translated strings. +Languages currently checked: Russian, Polish, Ukranian. +""" import argparse import re import polib from pathlib import Path -# Russian letters (Cyrillic Unicode block) -RUSSIAN_PATTERN = r'[\u0400-\u04FF]' +# Character patterns +RUSSIAN = r"\u0400-\u04FF" # Full Cyrillic block +POLISH = r"ĄĆĘŁŃŚŹŻąćęłńśźż" +UKRAINIAN = r"ҐЄІЇґєії" -# Specific Polish letters -POLISH_PATTERN = r'[ĄĆĘŁŃÓŚŹŻąćęłńóśźż]' - -def build_pattern(check_russian=True, check_polish=True): - """Build the combined regex based on selected languages.""" +def build_pattern(enable_russian=True, enable_polish=True, enable_ukrainian=True): + """ + Build a compiled regex pattern for the selected languages. + """ parts = [] - if check_russian: - parts.append(RUSSIAN_PATTERN) - if check_polish: - parts.append(POLISH_PATTERN) + if enable_russian: + parts.append(RUSSIAN) + if enable_polish: + parts.append(POLISH) + if enable_ukrainian: + parts.append(UKRAINIAN) if not parts: return None - return re.compile("|".join(parts)) + return re.compile(f"[{''.join(parts)}]") -def find_matches_in_po(po_path, regex): - po = polib.pofile(po_path) +def find_matches_in_po(po_path, pattern): + """ + Search for matches in translated strings of a PO file. + Skips entries with empty translations. + """ matches = [] + if not pattern: + return matches + + po = polib.pofile(po_path) for entry in po: - texts = [entry.msgid, entry.msgstr] - if entry.msgid_plural: - texts.extend(entry.msgid_plural.values()) + # Skip if there is no translation at all + if not entry.msgstr.strip() and not any(v.strip() for v in entry.msgstr_plural.values()): + continue + + texts = [entry.msgstr] for text in texts: - if text and regex.search(text): + if text and pattern.search(text): matches.append((po_path, entry.linenum, text)) break # avoid multiple reports for the same entry return matches def main(): - parser = argparse.ArgumentParser( - description="Search for Russian and/or Polish patterns in PO files." - ) - parser.add_argument( - "paths", - nargs="+", - help="One or more PO files or directories to search" - ) - parser.add_argument( - "--no-russian", - action="store_true", - help="Disable Russian text detection" - ) - parser.add_argument( - "--no-polish", - action="store_true", - help="Disable Polish text detection" - ) + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("paths", nargs="+", help="One or more PO files or directories to search") + parser.add_argument("--no-russian", action="store_true", help="Disable Russian pattern checking.") + parser.add_argument("--no-polish", action="store_true", help="Disable Polish pattern checking.") + parser.add_argument("--no-ukrainian", action="store_true", help="Disable Ukrainian pattern checking.") + args = parser.parse_args() - regex = build_pattern(not args.no_russian, not args.no_polish) - if regex is None: - parser.error("All checks are disabled. Enable at least one language.") + pattern = build_pattern( + enable_russian=not args.no_russian, + enable_polish=not args.no_polish, + enable_ukrainian=not args.no_ukrainian + ) + + if not pattern: + parser.error("All checks are disabled. Enable at least one language pattern.") - po_files = [] + paths = [] for arg in args.paths: p = Path(arg) if p.is_dir(): - po_files.extend(p.rglob("*.po")) + paths.extend(p.rglob("*.po")) elif p.is_file(): - po_files.append(p) + paths.append(p) else: print(f"Warning: {p} not found.") - for path in po_files: - for po_path, linenum, text in find_matches_in_po(path, regex): + for path in paths: + for po_path, linenum, text in find_matches_in_po(path, pattern): print(f"{po_path}:{linenum}: {text}") From 528a9000e2b19bc0347730acf09ba38ebe2a5e1e Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 22:14:12 -0300 Subject: [PATCH 03/13] Remove leftovers of unnecessary plural-forms handling --- scripts/check_langs_in_po.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index f6d4e572a10..a82735664a9 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -42,7 +42,7 @@ def find_matches_in_po(po_path, pattern): po = polib.pofile(po_path) for entry in po: # Skip if there is no translation at all - if not entry.msgstr.strip() and not any(v.strip() for v in entry.msgstr_plural.values()): + if not entry.msgstr.strip(): continue texts = [entry.msgstr] From fd1d74c46c759f699cc930eabeaa2b8e11c8bef1 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 22:14:28 -0300 Subject: [PATCH 04/13] Lint --- scripts/check_langs_in_po.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index a82735664a9..028e9035465 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -56,10 +56,18 @@ def find_matches_in_po(po_path, pattern): def main(): parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("paths", nargs="+", help="One or more PO files or directories to search") - parser.add_argument("--no-russian", action="store_true", help="Disable Russian pattern checking.") - parser.add_argument("--no-polish", action="store_true", help="Disable Polish pattern checking.") - parser.add_argument("--no-ukrainian", action="store_true", help="Disable Ukrainian pattern checking.") + parser.add_argument( + "paths", nargs="+", help="One or more PO files or directories to search" + ) + parser.add_argument( + "--no-russian", action="store_true", help="Disable Russian pattern checking." + ) + parser.add_argument( + "--no-polish", action="store_true", help="Disable Polish pattern checking." + ) + parser.add_argument( + "--no-ukrainian", action="store_true", help="Disable Ukrainian pattern checking." + ) args = parser.parse_args() From 098d61d50562b44391a7cc548541a37319846366 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 22:17:03 -0300 Subject: [PATCH 05/13] Lint --- scripts/check_langs_in_po.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 028e9035465..337b47f19af 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -66,7 +66,9 @@ def main(): "--no-polish", action="store_true", help="Disable Polish pattern checking." ) parser.add_argument( - "--no-ukrainian", action="store_true", help="Disable Ukrainian pattern checking." + "--no-ukrainian", + action="store_true", + help="Disable Ukrainian pattern checking." ) args = parser.parse_args() @@ -74,7 +76,7 @@ def main(): pattern = build_pattern( enable_russian=not args.no_russian, enable_polish=not args.no_polish, - enable_ukrainian=not args.no_ukrainian + enable_ukrainian=not args.no_ukrainian, ) if not pattern: From 6f07e3110ef03b14f027adfda2129621b3d8e1ca Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Mon, 11 Aug 2025 22:59:32 -0300 Subject: [PATCH 06/13] Add ignore words --- scripts/check_langs_in_po.py | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 337b47f19af..33c4c68628a 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 """ Check .po files for presence of specific language patterns in translated strings. -Languages currently checked: Russian, Polish, Ukranian. +Languages currently checked: Russian, Polish, Ukrainian. """ import argparse import re @@ -13,6 +13,25 @@ POLISH = r"ĄĆĘŁŃŚŹŻąćęłńśźż" UKRAINIAN = r"ҐЄІЇґєії" +# Words to ignore if found in msgstr +IGNORE_WORDS = [ + "Charles-François", + "Gruszczyński", + "Jędrzejewski-Szmek", + "Kołodziej", + "Коренберг Марк", + "Łukasz", + "Łapkiewicz", + "Марк Коренберг", + "Michał", + "Ożarowski", + "Sławecki", + "Stanisław", + "Tvrtković", + "Wołodźko", + "Є", +] + def build_pattern(enable_russian=True, enable_polish=True, enable_ukrainian=True): """ @@ -30,10 +49,20 @@ def build_pattern(enable_russian=True, enable_polish=True, enable_ukrainian=True return re.compile(f"[{''.join(parts)}]") +def should_ignore(text): + """ + Return True if the text contains any of the ignore words. + """ + for word in IGNORE_WORDS: + if word in text: + return True + return False + + def find_matches_in_po(po_path, pattern): """ Search for matches in translated strings of a PO file. - Skips entries with empty translations. + Skips entries with empty translations or containing ignored words. """ matches = [] if not pattern: @@ -45,6 +74,9 @@ def find_matches_in_po(po_path, pattern): if not entry.msgstr.strip(): continue + if should_ignore(entry.msgstr): + continue + texts = [entry.msgstr] for text in texts: From cc01eda73344775d02458303f588ac102cb7d4b9 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Wed, 13 Aug 2025 15:37:50 -0300 Subject: [PATCH 07/13] Add delete command --- scripts/check_langs_in_po.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 33c4c68628a..dd688fd73c5 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 """ Check .po files for presence of specific language patterns in translated strings. +Optionally delete matched translations. Languages currently checked: Russian, Polish, Ukrainian. """ import argparse @@ -59,30 +60,36 @@ def should_ignore(text): return False -def find_matches_in_po(po_path, pattern): +def find_matches_in_po(po_path, pattern, delete_matches=False): """ Search for matches in translated strings of a PO file. Skips entries with empty translations or containing ignored words. + Optionally delete matched translations. """ matches = [] if not pattern: return matches po = polib.pofile(po_path) + modified = False + for entry in po: # Skip if there is no translation at all if not entry.msgstr.strip(): continue + # Skip if contains ignored word if should_ignore(entry.msgstr): continue - texts = [entry.msgstr] + if pattern.search(entry.msgstr): + matches.append((po_path, entry.linenum, entry.msgstr)) + if delete_matches: + entry.msgstr = "" + modified = True - for text in texts: - if text and pattern.search(text): - matches.append((po_path, entry.linenum, text)) - break # avoid multiple reports for the same entry + if delete_matches and modified: + po.save() return matches @@ -100,7 +107,12 @@ def main(): parser.add_argument( "--no-ukrainian", action="store_true", - help="Disable Ukrainian pattern checking." + help="Disable Ukrainian pattern checking.", + ) + parser.add_argument( + "--delete-matches", + action="store_true", + help="Delete msgstr of matched entries.", ) args = parser.parse_args() @@ -125,7 +137,7 @@ def main(): print(f"Warning: {p} not found.") for path in paths: - for po_path, linenum, text in find_matches_in_po(path, pattern): + for po_path, linenum, text in find_matches_in_po(path, pattern, args.delete_matches): print(f"{po_path}:{linenum}: {text}") From c44b84ebda31861e40d4a9e18144e5e0699f81cf Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Wed, 13 Aug 2025 15:58:11 -0300 Subject: [PATCH 08/13] lint --- scripts/check_langs_in_po.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index dd688fd73c5..f4734f31ce0 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -137,7 +137,9 @@ def main(): print(f"Warning: {p} not found.") for path in paths: - for po_path, linenum, text in find_matches_in_po(path, pattern, args.delete_matches): + for po_path, linenum, text in find_matches_in_po( + path, pattern, args.delete_matches + ): print(f"{po_path}:{linenum}: {text}") From 85a0d549a330d569a18d31f46eceb00fd8694a9e Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 15 Aug 2025 23:38:46 -0300 Subject: [PATCH 09/13] Replace hardcoded pattern with pyfranc and iso639 libs --- scripts/check_langs_in_po.py | 149 +++++++++++++++++------------------ 1 file changed, 72 insertions(+), 77 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index f4734f31ce0..31bba98e5ed 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -1,95 +1,99 @@ #!/usr/bin/env python3 """ -Check .po files for presence of specific language patterns in translated strings. -Optionally delete matched translations. -Languages currently checked: Russian, Polish, Ukrainian. +Check PO files for translations from another language """ import argparse import re -import polib from pathlib import Path -# Character patterns -RUSSIAN = r"\u0400-\u04FF" # Full Cyrillic block -POLISH = r"ĄĆĘŁŃŚŹŻąćęłńśźż" -UKRAINIAN = r"ҐЄІЇґєії" - -# Words to ignore if found in msgstr -IGNORE_WORDS = [ - "Charles-François", - "Gruszczyński", - "Jędrzejewski-Szmek", - "Kołodziej", - "Коренберг Марк", - "Łukasz", - "Łapkiewicz", - "Марк Коренберг", - "Michał", - "Ożarowski", - "Sławecki", - "Stanisław", - "Tvrtković", - "Wołodźko", - "Є", +import polib +from iso639 import Lang +from pyfranc import franc + + +# $ ls -1 | grep -Poh '([a-z]{3}|[a-z]{2})' | grep -Ev '(pot|en)' | sort -u | tr '\n' ' ' +# TODO: de-hardcode this. +# Languages currently available in transifex-automations, without region +ALLOWED_LANGUAGES = [ + "ans", "ar", "az", "bn", "ca", "cmn", "cs", "da", "de", "el", "es", "fa", + "fi", "hi", "hu", "id", "it", "ja", "ka", "ko", "ky", "lt", "mr", "nb", + "ne", "nl", "pl", "ps", "pt", "ru", "si", "sq", "sv", "tr", "uk", "ur", + "vi", "zh" ] +LANGUAGE_PATTERN = r"([a-z]{3}|[a-z]{2})" + -def build_pattern(enable_russian=True, enable_polish=True, enable_ukrainian=True): +def get_lang_from_file(po: polib.POFile) -> str | None: """ - Build a compiled regex pattern for the selected languages. + Extract language from metadata['Language'], match the language pattern, + and return ISO 639-3 equivalent. + Returns None if language metadata is missing or invalid. """ - parts = [] - if enable_russian: - parts.append(RUSSIAN) - if enable_polish: - parts.append(POLISH) - if enable_ukrainian: - parts.append(UKRAINIAN) - if not parts: + lang = po.metadata.get('Language', '') + match = re.match(LANGUAGE_PATTERN, lang) + try: + lang_code_2 = match.group(0) + return Lang(lang_code_2).pt3 # ISO 639-3 code + except (AttributeError, KeyError): return None - return re.compile(f"[{''.join(parts)}]") -def should_ignore(text): +def convert_language_list_to_iso639_3(allowed_languages: list) -> list: """ - Return True if the text contains any of the ignore words. + Generate a ISO 639-3 list from the existing language list as downloaded + from Transifex. Handles lang nameas as "ru", "pt_BR", "cmn" and "es_419" """ - for word in IGNORE_WORDS: - if word in text: - return True - return False + converted = sorted([ + Lang(l).pt3 if len(l) == 2 else l + for l in allowed_languages + ]) + return converted -def find_matches_in_po(po_path, pattern, delete_matches=False): +def detect_language_from_text(text: str, allowed_languages: list) -> str | None: """ - Search for matches in translated strings of a PO file. - Skips entries with empty translations or containing ignored words. - Optionally delete matched translations. + Return the ISO 639-3 language code as detected by pyfranc's franc function, + or return None if matches nothing, if undefined or not a 100% match. """ - matches = [] - if not pattern: - return matches + found = franc.lang_detect(text, whitelist = allowed_languages) + if found and not found[0][0] == 'und': + return found[0][0] # returns ISO 639-3 + else: + return None + +def find_matches_in_po(po_path: str, delete_matches: bool = False, allowed_languages: list = []): + """ + Compare expected language from metadata with detected language from msgstr. + If different, record the entry. Optionally delete mismatched translations. + """ + matches = [] po = polib.pofile(po_path) + expected_lang = get_lang_from_file(po) modified = False + if not expected_lang: + return matches # skip if no valid expected language + for entry in po: # Skip if there is no translation at all if not entry.msgstr.strip(): continue - # Skip if contains ignored word - if should_ignore(entry.msgstr): - continue + detected_lang = detect_language_from_text( + entry.msgstr, allowed_languages + ) - if pattern.search(entry.msgstr): - matches.append((po_path, entry.linenum, entry.msgstr)) + if detected_lang != expected_lang and detected_lang: + matches.append((po_path, entry.linenum, detected_lang, entry.msgstr)) if delete_matches: entry.msgstr = "" modified = True if delete_matches and modified: po.save() + return matches @@ -98,33 +102,24 @@ def main(): parser.add_argument( "paths", nargs="+", help="One or more PO files or directories to search" ) - parser.add_argument( - "--no-russian", action="store_true", help="Disable Russian pattern checking." - ) - parser.add_argument( - "--no-polish", action="store_true", help="Disable Polish pattern checking." - ) - parser.add_argument( - "--no-ukrainian", - action="store_true", - help="Disable Ukrainian pattern checking.", - ) parser.add_argument( "--delete-matches", action="store_true", help="Delete msgstr of matched entries.", ) + parser.add_argument( + "--lang", + metavar="LANG", + help="Specific language (2- or 3-letter code) to compare translations against. " + "If not set, will check against all allowed languages." + ) args = parser.parse_args() - pattern = build_pattern( - enable_russian=not args.no_russian, - enable_polish=not args.no_polish, - enable_ukrainian=not args.no_ukrainian, - ) - - if not pattern: - parser.error("All checks are disabled. Enable at least one language pattern.") + if args.lang: + allowed_list = convert_language_list_to_iso639_3([args.lang]) + else: + allowed_list = convert_language_list_to_iso639_3(ALLOWED_LANGUAGES) paths = [] for arg in args.paths: @@ -137,10 +132,10 @@ def main(): print(f"Warning: {p} not found.") for path in paths: - for po_path, linenum, text in find_matches_in_po( - path, pattern, args.delete_matches + for po_path, linenum, detected_lang, text in find_matches_in_po( + path, args.delete_matches, allowed_list ): - print(f"{po_path}:{linenum}: {text}") + print(f"{po_path}:{linenum}: [{detected_lang}] {text}") if __name__ == "__main__": From 2b2020e174068f6409420ccc7108afe7d021a3a4 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Fri, 15 Aug 2025 23:44:09 -0300 Subject: [PATCH 10/13] lint --- scripts/check_langs_in_po.py | 66 ++++++++++++++++++++++++++---------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 31bba98e5ed..cbe45a955e1 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -2,6 +2,7 @@ """ Check PO files for translations from another language """ + import argparse import re from pathlib import Path @@ -15,10 +16,44 @@ # TODO: de-hardcode this. # Languages currently available in transifex-automations, without region ALLOWED_LANGUAGES = [ - "ans", "ar", "az", "bn", "ca", "cmn", "cs", "da", "de", "el", "es", "fa", - "fi", "hi", "hu", "id", "it", "ja", "ka", "ko", "ky", "lt", "mr", "nb", - "ne", "nl", "pl", "ps", "pt", "ru", "si", "sq", "sv", "tr", "uk", "ur", - "vi", "zh" + "ans", + "ar", + "az", + "bn", + "ca", + "cmn", + "cs", + "da", + "de", + "el", + "es", + "fa", + "fi", + "hi", + "hu", + "id", + "it", + "ja", + "ka", + "ko", + "ky", + "lt", + "mr", + "nb", + "ne", + "nl", + "pl", + "ps", + "pt", + "ru", + "si", + "sq", + "sv", + "tr", + "uk", + "ur", + "vi", + "zh", ] LANGUAGE_PATTERN = r"([a-z]{3}|[a-z]{2})" @@ -30,7 +65,7 @@ def get_lang_from_file(po: polib.POFile) -> str | None: and return ISO 639-3 equivalent. Returns None if language metadata is missing or invalid. """ - lang = po.metadata.get('Language', '') + lang = po.metadata.get("Language", "") match = re.match(LANGUAGE_PATTERN, lang) try: lang_code_2 = match.group(0) @@ -44,10 +79,7 @@ def convert_language_list_to_iso639_3(allowed_languages: list) -> list: Generate a ISO 639-3 list from the existing language list as downloaded from Transifex. Handles lang nameas as "ru", "pt_BR", "cmn" and "es_419" """ - converted = sorted([ - Lang(l).pt3 if len(l) == 2 else l - for l in allowed_languages - ]) + converted = sorted([Lang(l).pt3 if len(l) == 2 else l for l in allowed_languages]) return converted @@ -56,14 +88,16 @@ def detect_language_from_text(text: str, allowed_languages: list) -> str | None: Return the ISO 639-3 language code as detected by pyfranc's franc function, or return None if matches nothing, if undefined or not a 100% match. """ - found = franc.lang_detect(text, whitelist = allowed_languages) - if found and not found[0][0] == 'und': + found = franc.lang_detect(text, whitelist=allowed_languages) + if found and not found[0][0] == "und": return found[0][0] # returns ISO 639-3 else: return None -def find_matches_in_po(po_path: str, delete_matches: bool = False, allowed_languages: list = []): +def find_matches_in_po( + po_path: str, delete_matches: bool = False, allowed_languages: list = [] +): """ Compare expected language from metadata with detected language from msgstr. If different, record the entry. Optionally delete mismatched translations. @@ -74,16 +108,14 @@ def find_matches_in_po(po_path: str, delete_matches: bool = False, allowed_langu modified = False if not expected_lang: - return matches # skip if no valid expected language + return matches # skip if no valid expected language for entry in po: # Skip if there is no translation at all if not entry.msgstr.strip(): continue - detected_lang = detect_language_from_text( - entry.msgstr, allowed_languages - ) + detected_lang = detect_language_from_text(entry.msgstr, allowed_languages) if detected_lang != expected_lang and detected_lang: matches.append((po_path, entry.linenum, detected_lang, entry.msgstr)) @@ -111,7 +143,7 @@ def main(): "--lang", metavar="LANG", help="Specific language (2- or 3-letter code) to compare translations against. " - "If not set, will check against all allowed languages." + "If not set, will check against all allowed languages." ) args = parser.parse_args() From 81275ea42494975aaf96f671068f0e527f2f360f Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Sat, 16 Aug 2025 00:33:10 -0300 Subject: [PATCH 11/13] Lint --- scripts/check_langs_in_po.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index cbe45a955e1..92a6a85f8a9 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -143,7 +143,7 @@ def main(): "--lang", metavar="LANG", help="Specific language (2- or 3-letter code) to compare translations against. " - "If not set, will check against all allowed languages." + "If not set, will check against all allowed languages." ) args = parser.parse_args() From fd596872a1f06e443b2eae902b22bc0449304604 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Sat, 16 Aug 2025 03:42:06 -0300 Subject: [PATCH 12/13] lint --- scripts/check_langs_in_po.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index 92a6a85f8a9..e4c725088a9 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -77,9 +77,9 @@ def get_lang_from_file(po: polib.POFile) -> str | None: def convert_language_list_to_iso639_3(allowed_languages: list) -> list: """ Generate a ISO 639-3 list from the existing language list as downloaded - from Transifex. Handles lang nameas as "ru", "pt_BR", "cmn" and "es_419" + from Transifex. Handles lang names as "ru", "pt_BR", "cmn" and "es_419" """ - converted = sorted([Lang(l).pt3 if len(l) == 2 else l for l in allowed_languages]) + converted = sorted([Lang(lang).pt3 if len(lang) == 2 else lang for lang in allowed_languages]) return converted @@ -143,7 +143,7 @@ def main(): "--lang", metavar="LANG", help="Specific language (2- or 3-letter code) to compare translations against. " - "If not set, will check against all allowed languages." + "If not set, will check against all allowed languages." ) args = parser.parse_args() From 30c28b59a1ebfd47d8bb90dda37b281609cd40d1 Mon Sep 17 00:00:00 2001 From: Rafael Fontenelle Date: Sat, 16 Aug 2025 03:58:20 -0300 Subject: [PATCH 13/13] Lint --- scripts/check_langs_in_po.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/check_langs_in_po.py b/scripts/check_langs_in_po.py index e4c725088a9..eb2dc4ea7ab 100644 --- a/scripts/check_langs_in_po.py +++ b/scripts/check_langs_in_po.py @@ -79,7 +79,9 @@ def convert_language_list_to_iso639_3(allowed_languages: list) -> list: Generate a ISO 639-3 list from the existing language list as downloaded from Transifex. Handles lang names as "ru", "pt_BR", "cmn" and "es_419" """ - converted = sorted([Lang(lang).pt3 if len(lang) == 2 else lang for lang in allowed_languages]) + converted = sorted([ + Lang(lang).pt3 if len(lang) == 2 else lang for lang in allowed_languages] + ) return converted @@ -143,7 +145,7 @@ def main(): "--lang", metavar="LANG", help="Specific language (2- or 3-letter code) to compare translations against. " - "If not set, will check against all allowed languages." + "If not set, will check against all allowed languages.", ) args = parser.parse_args()