From 81b6717a4a954351011441d7ec8cc776c081fa7c Mon Sep 17 00:00:00 2001
From: OMGeeky <aalaalgmx@gmail.com>
Date: Thu, 12 Sep 2024 18:44:00 +0200
Subject: [PATCH] add remove_duplicate_tileset_usages utility

---
 at_remove_duplicate_tilesets/.gitignore       |   2 +
 .../remove_duplicate_tileset_usages.py        | 189 ++++++++++++++++++
 2 files changed, 191 insertions(+)
 create mode 100644 at_remove_duplicate_tilesets/.gitignore
 create mode 100644 at_remove_duplicate_tilesets/remove_duplicate_tileset_usages.py

diff --git a/at_remove_duplicate_tilesets/.gitignore b/at_remove_duplicate_tilesets/.gitignore
new file mode 100644
index 0000000..dfcd050
--- /dev/null
+++ b/at_remove_duplicate_tilesets/.gitignore
@@ -0,0 +1,2 @@
+.venv/
+.idea/
diff --git a/at_remove_duplicate_tilesets/remove_duplicate_tileset_usages.py b/at_remove_duplicate_tilesets/remove_duplicate_tileset_usages.py
new file mode 100644
index 0000000..b56cff7
--- /dev/null
+++ b/at_remove_duplicate_tilesets/remove_duplicate_tileset_usages.py
@@ -0,0 +1,189 @@
+import shutil
+import base64
+import sys
+import xml.etree.ElementTree
+import zlib
+import gzip
+import struct
+from typing import Any
+
+
+def get_min_gid_after(after: int, tilesets: list[tuple[Any, int]]):
+    return min(
+        (tileset[1] for tileset in tilesets if tileset[1] > after), default=9999999999
+    )
+
+
+def remove_duplicate_usages(xml_file_path: str):
+    print(f"checking {xml_file_path}")
+    tree = xml.etree.ElementTree.parse(xml_file_path)
+    root = tree.getroot()
+    tileset_elements_list: list[Any] = root.findall("tileset")
+    tilesets_list: list[tuple[str, int]] = [
+        (tileset.get("name"), int(tileset.get("firstgid")))
+        for tileset in tileset_elements_list
+    ]
+    duplicates = find_duplicates(tilesets_list)
+    found_duplicate = len(duplicates) > 0
+
+    if not found_duplicate:
+        return
+
+    # basically a map where the first two numbers are the range, that needs to be
+    # subtracted by the third number to go from the duplicate to the first tileset
+    duplicate_usages_maps: list[tuple[int, int, int, str, int, int]] = []
+    for name, duplicate_list in duplicates.items():
+        for i, duplicate in enumerate(duplicate_list):
+            gid_dup = max(duplicate[0], duplicate[1])
+            gid_not_dup = min(duplicate[0], duplicate[1])
+            duplicate_usages_maps.append(
+                (
+                    gid_dup,
+                    get_min_gid_after(gid_dup, tilesets_list),
+                    gid_dup - gid_not_dup,
+                    name,
+                    i,
+                    gid_not_dup,
+                )
+            )
+
+    # go through each layer and remove all usages of these duplicates
+    target_elements = root.findall("layer")
+    for target_element in target_elements:
+        data_element = target_element.find("data")
+        encoding = data_element.get("encoding")
+        compression = data_element.get("compression")
+
+        data = data_element.text
+        if data is None or data == "":
+            continue
+
+        gid_data = read_data_from_str(data, encoding, compression)
+        found_change_in_layer = False
+
+        # check and apply all mappings
+        for duplicate_usages_map in duplicate_usages_maps:
+            for i, gid in enumerate(gid_data):
+                if duplicate_usages_map[0] <= gid < duplicate_usages_map[1]:
+                    # reduce the duplicate usage by the offset to use the first tile-set
+                    gid_data[i] -= duplicate_usages_map[2]
+                    found_change_in_layer = True
+
+        if found_change_in_layer:
+            modified_data_str = write_data_to_str(compression, encoding, gid_data)
+            print(f"writing file: {xml_file_path}")
+            with open(xml_file_path, "r", encoding="utf-8") as f:
+                original = f.read()
+            with open(xml_file_path, "w", encoding="utf-8") as f:
+                modified = original.replace(data, modified_data_str)
+                f.write(modified)
+
+    print(
+        f"XML file has unused duplicate tilesets please remove them manually: '{xml_file_path}' "
+    )
+
+
+def find_duplicates(
+    tilesets_list: list[tuple[str, int]]
+) -> dict[str, list[tuple[int, int]]]:
+    duplicates: dict[str, list[tuple[int, int]]] = {}
+    tilesets: dict[str, int] = {}
+    for tileset in tilesets_list:
+        current_name = tileset[0]
+        first_gid = tileset[1]
+        if first_gid is None:
+            raise Exception("This tileset had no first_gid")
+
+        first_gid = int(first_gid)
+        if tilesets.get(current_name) is None:
+            tilesets[current_name] = first_gid
+        else:
+            print(f"  - '{current_name}'")
+            duplicate_list = duplicates.get(current_name)
+            duplicate_value = (
+                tilesets.get(current_name),
+                first_gid,
+            )
+            if duplicate_list is None:
+                duplicates[current_name] = [duplicate_value]
+            else:
+                duplicate_list.append(duplicate_value)
+    return duplicates
+
+
+def read_data_from_str(data: str, encoding: str, compression: str) -> list[int]:
+    gid_data_str = decode_decompress(data, encoding, compression)
+
+    data_len = len(gid_data_str)
+    amount_of_ints = data_len // 4
+    struct_format = get_struct_format(amount_of_ints)
+    gid_data = list(struct.unpack(struct_format, gid_data_str))
+    return gid_data
+
+
+def write_data_to_str(compression: str, encoding: str, gid_data: list[int]) -> str:
+    modified_data = struct.pack(get_struct_format(len(gid_data)), *gid_data)
+    encoded_data = encode_compress(modified_data, encoding, compression)
+    modified_data_str = encoded_data.decode("utf-8")
+    return modified_data_str
+
+
+def get_struct_format(amount_of_ints: int) -> str:
+    """The format required to use struct.pack(...) and struct.unpack(...)"""
+    struct_format = "<" + "I" * amount_of_ints
+    return struct_format
+
+
+def encode_compress(content: bytes, encoding: str, compression: str):
+    fn_base64 = base64.b64encode
+    fn_zlib = zlib.compress
+    fn_gzip = gzip.compress
+
+    data = do_zlib_gzip_fn(compression, content, fn_gzip, fn_zlib)
+    data = do_base64_csv_fn(encoding, data, fn_base64)
+    return data
+
+
+def decode_decompress(content: str, encoding: str, compression: str) -> bytes:
+    fn_base64 = base64.b64decode
+    fn_zlib = zlib.decompress
+    fn_gzip = gzip.decompress
+
+    data = do_base64_csv_fn(encoding, content, fn_base64)
+    data = do_zlib_gzip_fn(compression, data, fn_gzip, fn_zlib)
+    return data
+
+
+def do_base64_csv_fn(encoding: str, data: Any, fn: callable):
+    if encoding == "base64":
+        data = fn(data)
+    elif encoding == "csv":
+        raise Exception("csv not supported")
+    else:
+        data = str(data)
+    return data
+
+
+def do_zlib_gzip_fn(compression: str, data: Any, fn_gzip: callable, fn_zlib: callable):
+    if compression == "zlib":
+        data = fn_zlib(data)
+    elif compression == "gzip":
+        data = fn_gzip(data)
+    return data
+
+
+def main():
+    args = sys.argv[1:]
+    print(f"checking {len(args)} files")
+    for arg in args:
+        if not arg.endswith(".tmx"):
+            print(f"file is not a tmx: {arg}")
+            continue
+
+        file = f"{arg}.altered.tmx"
+        shutil.copy(arg, file)
+        remove_duplicate_usages(file)
+
+
+if __name__ == "__main__":
+    main()