Files
andors-trail/AndorsTrail/tools/check_format_specifiers.py

259 lines
11 KiB
Python

import re
import os
import argparse
from xml.etree import ElementTree as ET
def get_string_value_and_specifiers(filepath, key_name):
"""
Parses an XML strings file to find a specific key and its format specifiers.
Args:
filepath (str): Path to the strings.xml file.
key_name (str): The name of the string resource to find.
Returns:
tuple: (string_value, set_of_specifiers) or (None, None) if key not found.
Specifiers are returned as a set, e.g., {"%1$s", "%2$d"}.
"""
try:
tree = ET.parse(filepath)
root = tree.getroot()
for string_tag in root.findall('string'):
if string_tag.get('name') == key_name:
value = string_tag.text if string_tag.text else ""
# Regex to find format specifiers like %s, %d, %1$s, %2$d, etc.
# It handles optional positional arguments (e.g., 1$) and type characters.
specifiers = set(re.findall(r'%(?:(?:\d+\$)?(?:[sdfeoxXgGaAbhHc]|(?:\.\d[fd])))', value))
return value, specifiers
except ET.ParseError:
print(f"Warning: Could not parse XML file: {filepath}")
except FileNotFoundError:
print(f"Warning: File not found: {filepath}")
return None, None
def find_res_directories(project_root):
"""
Finds all 'res' directories within a project, typically in module roots.
"""
res_dirs = []
for root_dir, dirs, _ in os.walk(project_root):
if 'res' in dirs:
res_dirs.append(os.path.join(root_dir, 'res'))
if not res_dirs and os.path.basename(project_root) == 'res': # If project_root itself is a res dir
res_dirs.append(project_root)
return res_dirs
def find_strings_files(res_dir_path, base_filename="strings.xml"):
"""
Finds all strings.xml files (or variants like strings-es.xml)
within a given 'res' directory.
"""
strings_files = {} # lang_code -> filepath
for dirpath, _, filenames in os.walk(res_dir_path):
if "values" in os.path.basename(dirpath).lower(): # e.g., values, values-es, values-en-rGB
for filename in filenames:
if filename.startswith(os.path.splitext(base_filename)[0]) and filename.endswith(".xml"):
full_path = os.path.join(dirpath, filename)
# Determine language code from directory name (e.g., "values-es" -> "es")
# or default if it's just "values"
dir_name_parts = os.path.basename(dirpath).split('-')
lang_code = "default" # For the base "values" folder
if len(dir_name_parts) > 1:
lang_code = "-".join(dir_name_parts[1:]) # Handles values-en-rUS correctly
strings_files[lang_code] = full_path
return strings_files
def find_non_escaped_percent(value):
"""
Finds non-escaped % characters in a string (not part of %% or a valid format specifier).
Returns a list of indices where such % occur.
"""
# Find all % positions
percent_indices = [m.start() for m in re.finditer(r'%', value)]
# Find all valid format specifiers and %% positions
valid_specifier_pattern = r'%(?:%|(?:\d+\$)?(?:[sdfeoxXgGaAbhHc]|(?:\.\d[fd])))'
valid_matches = [m.span() for m in re.finditer(valid_specifier_pattern, value)]
# Mark all indices covered by valid specifiers or %%
covered_indices = set()
for start, end in valid_matches:
covered_indices.update(range(start, end))
# Only report % indices not covered by valid specifiers or %%
return [idx for idx in percent_indices if idx not in covered_indices]
def get_all_keys(filepath):
"""
Returns a list of all string resource keys in the given XML file.
"""
try:
tree = ET.parse(filepath)
root = tree.getroot()
return [string_tag.get('name') for string_tag in root.findall('string') if string_tag.get('name')]
except Exception:
return []
def find_used_keys_in_java(project_root):
"""
Scans all .java files under project_root for usages of string resource keys.
Returns a set of keys found.
"""
key_pattern = re.compile(r'R\.string\.([a-zA-Z0-9_]+)')
used_keys = set()
for root, _, files in os.walk(project_root):
for fname in files:
if fname.endswith('.java'):
try:
with open(os.path.join(root, fname), encoding='utf-8') as f:
content = f.read()
used_keys.update(key_pattern.findall(content))
except Exception:
pass
return used_keys
def main():
parser = argparse.ArgumentParser(
description="Check format specifier consistency across language files for a given string key."
)
parser.add_argument(
"project_root",
help="Path to the Android project's root directory (or a specific module's root directory)."
)
parser.add_argument(
"res_root",
help="Path to the some 'res' directory."
)
parser.add_argument(
"key_name",
nargs="?",
default=None,
help="The name of the string resource to check (e.g., 'skill_longdescription_evasion'). If omitted, checks all keys in the base language file."
)
parser.add_argument(
"--base_lang",
default="default",
help="The language code for the base/reference strings file (e.g., 'en', 'default' for values/strings.xml). Default is 'default'."
)
parser.add_argument(
"--strings_filename",
default="strings.xml",
help="The base name of your strings files (default: strings.xml)."
)
args = parser.parse_args()
print(f"Using base language: '{args.base_lang}' from file '{args.strings_filename}'")
print(f"Project root path: {args.project_root}\n")
print(f"Project res path: {args.res_root}\n")
res_directories = find_res_directories(args.res_root)
if not res_directories:
print(f"Error: No 'res' directory found under {args.res_root}")
return
all_strings_files = {}
for res_dir in res_directories:
all_strings_files.update(find_strings_files(res_dir, args.strings_filename))
if not all_strings_files:
print(f"Error: No '{args.strings_filename}' files found in any 'res/values-*' directories under {args.res_root}.")
return
base_file_path = all_strings_files.get(args.base_lang)
if not base_file_path:
if args.base_lang != "default" and all_strings_files.get("default"):
print(f"Warning: Base language '{args.base_lang}' not found. Using 'default' (values/{args.strings_filename}) as base.")
base_file_path = all_strings_files.get("default")
args.base_lang = "default"
else:
print(f"Error: Base strings file for language '{args.base_lang}' not found.")
print(f"Available language files found: {list(all_strings_files.keys())}")
return
# If no key_name is provided, check only keys used in .java files
if args.key_name is None:
used_keys = find_used_keys_in_java(args.project_root)
all_keys = set(get_all_keys(base_file_path))
keys_to_check = sorted(list(all_keys & used_keys))
if not keys_to_check:
print('no keys to check')
return
else:
keys_to_check = [args.key_name]
total_issues_found = 0
file_error_counts = {}
file_warning_counts = {}
for key_name in keys_to_check:
base_value, base_specifiers = get_string_value_and_specifiers(base_file_path, key_name)
if base_specifiers is None:
continue
for lang_code, file_path in all_strings_files.items():
if lang_code == args.base_lang:
continue
current_value, current_specifiers = get_string_value_and_specifiers(file_path, key_name)
if current_specifiers is None:
continue
error_count = 0
warning_count = 0
non_escaped_percent_indices = find_non_escaped_percent(current_value)
if non_escaped_percent_indices:
error_count += 1
print(f"--- Language: {lang_code} (NON-ESCAPED % FOUND) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Non-escaped % at positions: {non_escaped_percent_indices}")
print("-" * 20)
if current_specifiers != base_specifiers:
missing_in_current = base_specifiers - current_specifiers
extra_in_current = current_specifiers - base_specifiers
if extra_in_current:
error_count += 1
print(f"--- Language: {lang_code} (ISSUE FOUND) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Specifiers: {sorted(list(current_specifiers)) if current_specifiers else 'None'}")
print(f"Expected specifiers (from base): {sorted(list(base_specifiers)) if base_specifiers else 'None'}")
print(f" EXTRA in '{lang_code}': {sorted(list(extra_in_current))}")
print("-" * 20)
if missing_in_current:
warning_count += 1
print(f"--- Language: {lang_code} (WARNING: MISSING SPECIFIERS) ---")
print(f"Key: {key_name}")
print(f"File: {file_path}")
print(f"Value: \"{current_value}\"")
print(f"Specifiers: {sorted(list(current_specifiers)) if current_specifiers else 'None'}")
print(f"Expected specifiers (from base): {sorted(list(base_specifiers)) if base_specifiers else 'None'}")
print(f" MISSING in '{lang_code}': {sorted(list(missing_in_current))}")
print("-" * 20)
if error_count:
file_error_counts[file_path] = file_error_counts.get(file_path, 0) + error_count
if warning_count:
file_warning_counts[file_path] = file_warning_counts.get(file_path, 0) + warning_count
if file_error_counts or file_warning_counts:
print("\nSummary of errors and warnings per file:")
for file_path in sorted(set(list(file_error_counts.keys()) + list(file_warning_counts.keys()))):
error_str = f"{file_error_counts.get(file_path, 0)} error(s)"
warning_str = f"{file_warning_counts.get(file_path, 0)} warning(s)"
print(f"{file_path}:\t {error_str:>5}, {warning_str:>5}")
total_errors = sum(file_error_counts.values())
total_warnings = sum(file_warning_counts.values())
print(f"\nTOTAL: {total_errors} error(s), {total_warnings} warning(s)")
if file_error_counts:
exit(1)
if __name__ == "__main__":
main()