Files
andors-trail/AndorsTrail/tools/check_format_specifiers.py
OMGeeky 149ad3ae1c Differentiate between missing and extra format specifiers
Missing format specifiers in translated strings are now reported as warnings,
while extra format specifiers are reported as errors.
This change allows the script to pass if only warnings (missing specifiers)
are present, but still fail if there are errors (extra specifiers).

The summary now also includes counts for both errors and warnings per file.
2025-07-28 21:57:29 +02:00

255 lines
11 KiB
Python

import re
import os
import argparse
from xml.etree import ElementTree as ET
def get_string_value_and_specifiers(filepath, key_name):
"""
Parses an XML strings file to find a specific key and its format specifiers.
Args:
filepath (str): Path to the strings.xml file.
key_name (str): The name of the string resource to find.
Returns:
tuple: (string_value, set_of_specifiers) or (None, None) if key not found.
Specifiers are returned as a set, e.g., {"%1$s", "%2$d"}.
"""
try:
tree = ET.parse(filepath)
root = tree.getroot()
for string_tag in root.findall('string'):
if string_tag.get('name') == key_name:
value = string_tag.text if string_tag.text else ""
# Regex to find format specifiers like %s, %d, %1$s, %2$d, etc.
# It handles optional positional arguments (e.g., 1$) and type characters.
specifiers = set(re.findall(r'%(?:(?:\d+\$)?(?:[sdfeoxXgGaAbhHc]|(?:\.\d[fd])))', value))
return value, specifiers
except ET.ParseError:
print(f"Warning: Could not parse XML file: {filepath}")
except FileNotFoundError:
print(f"Warning: File not found: {filepath}")
return None, None
def find_res_directories(project_root):
"""
Finds all 'res' directories within a project, typically in module roots.
"""
res_dirs = []
for root_dir, dirs, _ in os.walk(project_root):
if 'res' in dirs:
res_dirs.append(os.path.join(root_dir, 'res'))
if not res_dirs and os.path.basename(project_root) == 'res': # If project_root itself is a res dir
res_dirs.append(project_root)
return res_dirs
def find_strings_files(res_dir_path, base_filename="strings.xml"):
"""
Finds all strings.xml files (or variants like strings-es.xml)
within a given 'res' directory.
"""
strings_files = {} # lang_code -> filepath
for dirpath, _, filenames in os.walk(res_dir_path):
if "values" in os.path.basename(dirpath).lower(): # e.g., values, values-es, values-en-rGB
for filename in filenames:
if filename.startswith(os.path.splitext(base_filename)[0]) and filename.endswith(".xml"):
full_path = os.path.join(dirpath, filename)
# Determine language code from directory name (e.g., "values-es" -> "es")
# or default if it's just "values"
dir_name_parts = os.path.basename(dirpath).split('-')
lang_code = "default" # For the base "values" folder
if len(dir_name_parts) > 1:
lang_code = "-".join(dir_name_parts[1:]) # Handles values-en-rUS correctly
strings_files[lang_code] = full_path
return strings_files
def find_non_escaped_percent(value):
"""
Finds non-escaped % characters in a string (not part of %% or a valid format specifier).
Returns a list of indices where such % occur.
"""
# Find all % positions
percent_indices = [m.start() for m in re.finditer(r'%', value)]
# Find all valid format specifiers and %% positions
valid_specifier_pattern = r'%(?:%|(?:\d+\$)?(?:[sdfeoxXgGaAbhHc]|(?:\.\d[fd])))'
valid_matches = [m.span() for m in re.finditer(valid_specifier_pattern, value)]
# Mark all indices covered by valid specifiers or %%
covered_indices = set()
for start, end in valid_matches:
covered_indices.update(range(start, end))
# Only report % indices not covered by valid specifiers or %%
return [idx for idx in percent_indices if idx not in covered_indices]
def get_all_keys(filepath):
"""
Returns a list of all string resource keys in the given XML file.
"""
try:
tree = ET.parse(filepath)
root = tree.getroot()
return [string_tag.get('name') for string_tag in root.findall('string') if string_tag.get('name')]
except Exception:
return []
def find_used_keys_in_java(project_root):
"""
Scans all .java files under project_root for usages of string resource keys.
Returns a set of keys found.
"""
key_pattern = re.compile(r'R\.string\.([a-zA-Z0-9_]+)')
used_keys = set()
for root, _, files in os.walk(project_root):
for fname in files:
if fname.endswith('.java'):
try:
with open(os.path.join(root, fname), encoding='utf-8') as f:
content = f.read()
used_keys.update(key_pattern.findall(content))
except Exception:
pass
return used_keys
def main():
parser = argparse.ArgumentParser(
description="Check format specifier consistency across language files for a given string key."
)
parser.add_argument(
"project_root",
help="Path to the Android project's root directory (or a specific module's root directory)."
)
parser.add_argument(
"res_root",
help="Path to the some 'res' directory."
)
parser.add_argument(
"key_name",
nargs="?",
default=None,
help="The name of the string resource to check (e.g., 'skill_longdescription_evasion'). If omitted, checks all keys in the base language file."
)
parser.add_argument(
"--base_lang",
default="default",
help="The language code for the base/reference strings file (e.g., 'en', 'default' for values/strings.xml). Default is 'default'."
)
parser.add_argument(
"--strings_filename",
default="strings.xml",
help="The base name of your strings files (default: strings.xml)."
)
args = parser.parse_args()
print(f"Using base language: '{args.base_lang}' from file '{args.strings_filename}'")
print(f"Project root path: {args.project_root}\n")
print(f"Project res path: {args.res_root}\n")
res_directories = find_res_directories(args.res_root)
if not res_directories:
print(f"Error: No 'res' directory found under {args.res_root}")
return
all_strings_files = {}
for res_dir in res_directories:
all_strings_files.update(find_strings_files(res_dir, args.strings_filename))
if not all_strings_files:
print(f"Error: No '{args.strings_filename}' files found in any 'res/values-*' directories under {args.res_root}.")
return
base_file_path = all_strings_files.get(args.base_lang)
if not base_file_path:
if args.base_lang != "default" and all_strings_files.get("default"):
print(f"Warning: Base language '{args.base_lang}' not found. Using 'default' (values/{args.strings_filename}) as base.")
base_file_path = all_strings_files.get("default")
args.base_lang = "default"
else:
print(f"Error: Base strings file for language '{args.base_lang}' not found.")
print(f"Available language files found: {list(all_strings_files.keys())}")
return
# If no key_name is provided, check only keys used in .java files
if args.key_name is None:
used_keys = find_used_keys_in_java(args.project_root)
all_keys = set(get_all_keys(base_file_path))
keys_to_check = sorted(list(all_keys & used_keys))
if not keys_to_check:
print('no keys to check')
return
else:
keys_to_check = [args.key_name]
total_issues_found = 0
file_error_counts = {}
file_warning_counts = {}
for key_name in keys_to_check:
base_value, base_specifiers = get_string_value_and_specifiers(base_file_path, key_name)
if base_specifiers is None:
continue
for lang_code, file_path in all_strings_files.items():
if lang_code == args.base_lang:
continue
current_value, current_specifiers = get_string_value_and_specifiers(file_path, key_name)
if current_specifiers is None:
continue
error_count = 0
warning_count = 0
non_escaped_percent_indices = find_non_escaped_percent(current_value)
if non_escaped_percent_indices:
error_count += 1
print(f"--- Language: {lang_code} (NON-ESCAPED % FOUND) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Non-escaped % at positions: {non_escaped_percent_indices}")
print("-" * 20)
if current_specifiers != base_specifiers:
missing_in_current = base_specifiers - current_specifiers
extra_in_current = current_specifiers - base_specifiers
if extra_in_current:
error_count += 1
print(f"--- Language: {lang_code} (ISSUE FOUND) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Specifiers: {sorted(list(current_specifiers)) if current_specifiers else 'None'}")
print(f"Expected specifiers (from base): {sorted(list(base_specifiers)) if base_specifiers else 'None'}")
print(f" EXTRA in '{lang_code}': {sorted(list(extra_in_current))}")
print("-" * 20)
if missing_in_current:
warning_count += 1
print(f"--- Language: {lang_code} (WARNING: MISSING SPECIFIERS) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Specifiers: {sorted(list(current_specifiers)) if current_specifiers else 'None'}")
print(f"Expected specifiers (from base): {sorted(list(base_specifiers)) if base_specifiers else 'None'}")
print(f" MISSING in '{lang_code}': {sorted(list(missing_in_current))}")
print("-" * 20)
if error_count:
file_error_counts[file_path] = file_error_counts.get(file_path, 0) + error_count
if warning_count:
file_warning_counts[file_path] = file_warning_counts.get(file_path, 0) + warning_count
if file_error_counts or file_warning_counts:
print("\nSummary of errors and warnings per file:")
for file_path in set(list(file_error_counts.keys()) + list(file_warning_counts.keys())):
error_str = f"{file_error_counts.get(file_path, 0)} error(s)"
warning_str = f"{file_warning_counts.get(file_path, 0)} warning(s)"
print(f"{file_path}:\t {error_str:>5}, {warning_str:>5}")
if file_error_counts:
exit(1)
if __name__ == "__main__":
main()