Init

2026-02-14 23:25:10 +01:00 · 2025-02-22 17:02:40 +01:00
commit c36f09aa38
9 changed files with 327 additions and 0 deletions
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
--- a/.idea/JsonSummary.iml
+++ b/.idea/JsonSummary.iml
@@ -0,0 +1,10 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/.venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.13 (JsonSummary)" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/JsonSummary.iml" filepath="$PROJECT_DIR$/.idea/JsonSummary.iml" />
+    </modules>
+  </component>
+</project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/main.py
+++ b/main.py
@@ -0,0 +1,234 @@
+
+import argparse
+import json
+import os
+from collections import defaultdict
+from typing import Dict, Any, List, Set
+
+
+def find_json_files(path: str) -> List[str]:
+    """Find all JSON files in the given path."""
+    json_files = []
+    if os.path.isfile(path):
+        if path.lower().endswith('.json'):
+            json_files.append(path)
+    else:
+        for root, _, files in os.walk(path):
+            for file in files:
+                if file.lower().endswith('.json'):
+                    json_files.append(os.path.join(root, file))
+    return json_files
+
+
+def read_json_file(file_path: str) -> Any:
+    """Read and parse a JSON file."""
+    try:
+        with open(file_path, 'r') as f:
+            return json.load(f)
+    except json.JSONDecodeError as e:
+        print(f"Error parsing {file_path}: {e}")
+        return None
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+        return None
+
+
+def init_stats_dict() -> Dict:
+    """Initialize a statistics dictionary with default values."""
+    return {
+        'types': set(),
+        'fields': defaultdict(lambda: {
+            'count': 0,
+            'types': set(),
+            'nested_fields': defaultdict(init_stats_dict),
+            'examples': set()
+        })
+    }
+
+def analyze_value(value: Any, stats: Dict, depth: int = 0, max_depth: int = 5) -> None:
+    """Analyze a value and update statistics."""
+    if depth >= max_depth:
+        return
+
+    # Ensure stats has the basic structure
+    if 'types' not in stats:
+        stats.update(init_stats_dict())
+
+    value_type = type(value).__name__
+    stats['types'].add(value_type)
+
+    if isinstance(value, (int, float)):
+        if 'min_value' not in stats:
+            stats['min_value'] = value
+            stats['max_value'] = value
+        else:
+            stats['min_value'] = min(stats['min_value'], value)
+            stats['max_value'] = max(stats['max_value'], value)
+
+    if isinstance(value, dict):
+        for k, v in value.items():
+            if k not in stats['fields']:
+                stats['fields'][k] = init_stats_dict()
+                stats['fields'][k]['count'] = 0
+                stats['fields'][k]['examples'] = set()
+
+            field_stats = stats['fields'][k]
+            field_stats['count'] += 1
+            field_stats['types'].add(type(v).__name__)
+
+            # Store example values (limit to 3)
+            if not isinstance(v, (dict, list)) and len(field_stats['examples']) < 3:
+                field_stats['examples'].add(str(v))
+
+            if isinstance(v, dict):
+                analyze_value(v, field_stats, depth + 1, max_depth)
+            elif isinstance(v, list):
+                analyze_value(v, field_stats, depth + 1, max_depth)
+
+    elif isinstance(value, list):
+        if 'list_item_types' not in stats:
+            stats['list_item_types'] = set()
+            stats['list_item_stats'] = init_stats_dict()
+            stats['min_length'] = len(value)
+            stats['max_length'] = len(value)
+        else:
+            stats['min_length'] = min(stats['min_length'], len(value))
+            stats['max_length'] = max(stats['max_length'], len(value))
+
+        for item in value:
+            item_type = type(item).__name__
+            stats['list_item_types'].add(item_type)
+
+            # Analyze list items in detail
+            if isinstance(item, (dict, list)):
+                analyze_value(item, stats['list_item_stats'], depth + 1, max_depth)
+            elif isinstance(item, (int, float)):
+                if 'min_value' not in stats['list_item_stats']:
+                    stats['list_item_stats']['min_value'] = item
+                    stats['list_item_stats']['max_value'] = item
+                else:
+                    stats['list_item_stats']['min_value'] = min(stats['list_item_stats']['min_value'], item)
+                    stats['list_item_stats']['max_value'] = max(stats['list_item_stats']['max_value'], item)
+            elif isinstance(item, str) and len(stats.get('examples', set())) < 3:
+                if 'examples' not in stats:
+                    stats['examples'] = set()
+                stats['examples'].add(str(item))
+
+
+def merge_objects(objects: List[Any]) -> Dict:
+    """Merge multiple JSON objects and analyze their structure."""
+    stats = init_stats_dict()
+    stats['total_objects'] = len(objects)
+
+    for obj in objects:
+        if obj is not None:
+            analyze_value(obj, stats)
+
+    return stats
+
+
+def format_value(value: Any) -> str:
+    """Format a value for display."""
+    if isinstance(value, float):
+        return f"{value:.2f}"
+    return str(value)
+
+def print_field_stats(stats: Dict, prefix: str = "") -> None:
+    """Helper function to print field statistics recursively."""
+    # Print examples for non-container types
+    if 'examples' in stats and stats['examples']:
+        print(f"{prefix}Examples: {', '.join(sorted(stats['examples']))}")
+
+    # Print numeric value ranges
+    if 'min_value' in stats:
+        min_val = format_value(stats['min_value'])
+        max_val = format_value(stats['max_value'])
+        if min_val != max_val:
+            print(f"{prefix}Value range: {min_val} to {max_val}")
+        else:
+            print(f"{prefix}Value: {min_val}")
+
+    # Print list properties
+    if 'list_item_types' in stats:
+        print(f"{prefix}List properties:")
+        print(f"{prefix}  Length range: {stats['min_length']} to {stats['max_length']}")
+        item_types = sorted(stats['list_item_types'])
+        print(f"{prefix}  Item types: {', '.join(item_types)}")
+
+        # Print list item statistics
+        if 'list_item_stats' in stats:
+            item_stats = stats['list_item_stats']
+            if 'min_value' in item_stats:
+                min_val = format_value(item_stats['min_value'])
+                max_val = format_value(item_stats['max_value'])
+                if min_val != max_val:
+                    print(f"{prefix}  Item value range: {min_val} to {max_val}")
+                else:
+                    print(f"{prefix}  Item value: {min_val}")
+
+            if 'examples' in item_stats and item_stats['examples']:
+                print(f"{prefix}  Item examples: {', '.join(sorted(item_stats['examples']))}")
+
+            if 'fields' in item_stats and item_stats['fields']:
+                print(f"{prefix}  Item structure:")
+                for field_name, field_stats in sorted(item_stats['fields'].items()):
+                    print(f"{prefix}    {field_name}:")
+                    print(f"{prefix}      Occurrences: {field_stats['count']}")
+                    print(f"{prefix}      Types: {', '.join(sorted(field_stats['types']))}")
+                    print_field_stats(field_stats, prefix + "      ")
+
+    # Print nested fields from the fields dictionary
+    if 'fields' in stats and stats['fields']:
+        print(f"{prefix}Nested structure:")
+        for field_name, field_stats in sorted(stats['fields'].items()):
+            print(f"{prefix}  {field_name}:")
+            print(f"{prefix}    Occurrences: {field_stats['count']}")
+            print(f"{prefix}    Types: {', '.join(sorted(field_stats['types']))}")
+            print_field_stats(field_stats, prefix + "    ")
+
+def print_summary(stats: Dict) -> None:
+    """Print a formatted summary of the JSON structure."""
+    print("\n=== JSON Structure Summary ===")
+    print(f"\nTotal objects processed: {stats['total_objects']}")
+    print(f"Root level types found: {', '.join(stats['types'])}")
+
+    print("\nField Analysis:")
+    for field, field_stats in sorted(stats['fields'].items()):
+        print(f"\n{field}:")
+        print(f"  Occurrences: {field_stats['count']}")
+        print(f"  Types: {', '.join(field_stats['types'])}")
+        print_field_stats(field_stats, "  ")
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Analyze and merge JSON files')
+    parser.add_argument('paths', nargs='+', help='Paths to JSON files or directories')
+    args = parser.parse_args()
+
+    # Find all JSON files
+    json_files = []
+    for path in args.paths:
+        json_files.extend(find_json_files(path))
+
+    if not json_files:
+        print("No JSON files found in the specified paths.")
+        return
+
+    # Read and process all JSON files
+    objects = []
+    for file_path in json_files:
+        obj = read_json_file(file_path)
+        if obj is not None:
+            objects.append(obj)
+
+    if not objects:
+        print("No valid JSON objects found in the specified files.")
+        return
+
+    # Analyze and print summary
+    stats = merge_objects(objects)
+    print_summary(stats)
+
+
+if __name__ == '__main__':
+    main()
--- a/test_data/product.json
+++ b/test_data/product.json
@@ -0,0 +1,31 @@
+{
+    "id": "PROD-123",
+    "name": "Smart Watch",
+    "price": 199.99,
+    "in_stock": true,
+    "categories": ["electronics", "wearables", "accessories"],
+    "specifications": {
+        "display": {
+            "type": "OLED",
+            "size": "1.5 inch",
+            "resolution": "360x360"
+        },
+        "battery": {
+            "capacity": "300mAh",
+            "life": "48 hours"
+        },
+        "features": ["heart-rate", "gps", "waterproof"]
+    },
+    "variants": [
+        {
+            "color": "black",
+            "sku": "SW-BLK-001",
+            "price": 199.99
+        },
+        {
+            "color": "silver",
+            "sku": "SW-SLV-001",
+            "price": 219.99
+        }
+    ]
+}
--- a/test_data/user1.json
+++ b/test_data/user1.json
@@ -0,0 +1,11 @@
+{
+    "name": "John Doe",
+    "age": 30,
+    "email": "john@example.com",
+    "address": {
+        "street": "123 Main St",
+        "city": "New York",
+        "country": "USA"
+    },
+    "hobbies": ["reading", "gaming", "hiking"]
+}
--- a/test_data/user2.json
+++ b/test_data/user2.json
@@ -0,0 +1,13 @@
+{
+    "name": "Jane Smith",
+    "age": 25,
+    "email": "jane@example.com",
+    "address": {
+        "street": "456 Oak Ave",
+        "city": "Los Angeles",
+        "country": "USA",
+        "zip": "90001"
+    },
+    "hobbies": ["painting", "music"],
+    "occupation": "Software Engineer"
+}