commit c36f09aa38d798334f52d0d83b1c63e47ab0cfbc Author: OMGeeky Date: Sat Feb 22 17:02:40 2025 +0100 Init diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/JsonSummary.iml b/.idea/JsonSummary.iml new file mode 100644 index 0000000..e75e34c --- /dev/null +++ b/.idea/JsonSummary.iml @@ -0,0 +1,10 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..8e8d5a2 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..03da0fd --- /dev/null +++ b/main.py @@ -0,0 +1,234 @@ + +import argparse +import json +import os +from collections import defaultdict +from typing import Dict, Any, List, Set + + +def find_json_files(path: str) -> List[str]: + """Find all JSON files in the given path.""" + json_files = [] + if os.path.isfile(path): + if path.lower().endswith('.json'): + json_files.append(path) + else: + for root, _, files in os.walk(path): + for file in files: + if file.lower().endswith('.json'): + json_files.append(os.path.join(root, file)) + return json_files + + +def read_json_file(file_path: str) -> Any: + """Read and parse a JSON file.""" + try: + with open(file_path, 'r') as f: + return json.load(f) + except json.JSONDecodeError as e: + print(f"Error parsing {file_path}: {e}") + return None + except Exception as e: + print(f"Error reading {file_path}: {e}") + return None + + +def init_stats_dict() -> Dict: + """Initialize a statistics dictionary with default values.""" + return { + 'types': set(), + 'fields': defaultdict(lambda: { + 'count': 0, + 'types': set(), + 'nested_fields': defaultdict(init_stats_dict), + 'examples': set() + }) + } + +def analyze_value(value: Any, stats: Dict, depth: int = 0, max_depth: int = 5) -> None: + """Analyze a value and update statistics.""" + if depth >= max_depth: + return + + # Ensure stats has the basic structure + if 'types' not in stats: + stats.update(init_stats_dict()) + + value_type = type(value).__name__ + stats['types'].add(value_type) + + if isinstance(value, (int, float)): + if 'min_value' not in stats: + stats['min_value'] = value + stats['max_value'] = value + else: + stats['min_value'] = min(stats['min_value'], value) + stats['max_value'] = max(stats['max_value'], value) + + if isinstance(value, dict): + for k, v in value.items(): + if k not in stats['fields']: + stats['fields'][k] = init_stats_dict() + stats['fields'][k]['count'] = 0 + stats['fields'][k]['examples'] = set() + + field_stats = stats['fields'][k] + field_stats['count'] += 1 + field_stats['types'].add(type(v).__name__) + + # Store example values (limit to 3) + if not isinstance(v, (dict, list)) and len(field_stats['examples']) < 3: + field_stats['examples'].add(str(v)) + + if isinstance(v, dict): + analyze_value(v, field_stats, depth + 1, max_depth) + elif isinstance(v, list): + analyze_value(v, field_stats, depth + 1, max_depth) + + elif isinstance(value, list): + if 'list_item_types' not in stats: + stats['list_item_types'] = set() + stats['list_item_stats'] = init_stats_dict() + stats['min_length'] = len(value) + stats['max_length'] = len(value) + else: + stats['min_length'] = min(stats['min_length'], len(value)) + stats['max_length'] = max(stats['max_length'], len(value)) + + for item in value: + item_type = type(item).__name__ + stats['list_item_types'].add(item_type) + + # Analyze list items in detail + if isinstance(item, (dict, list)): + analyze_value(item, stats['list_item_stats'], depth + 1, max_depth) + elif isinstance(item, (int, float)): + if 'min_value' not in stats['list_item_stats']: + stats['list_item_stats']['min_value'] = item + stats['list_item_stats']['max_value'] = item + else: + stats['list_item_stats']['min_value'] = min(stats['list_item_stats']['min_value'], item) + stats['list_item_stats']['max_value'] = max(stats['list_item_stats']['max_value'], item) + elif isinstance(item, str) and len(stats.get('examples', set())) < 3: + if 'examples' not in stats: + stats['examples'] = set() + stats['examples'].add(str(item)) + + +def merge_objects(objects: List[Any]) -> Dict: + """Merge multiple JSON objects and analyze their structure.""" + stats = init_stats_dict() + stats['total_objects'] = len(objects) + + for obj in objects: + if obj is not None: + analyze_value(obj, stats) + + return stats + + +def format_value(value: Any) -> str: + """Format a value for display.""" + if isinstance(value, float): + return f"{value:.2f}" + return str(value) + +def print_field_stats(stats: Dict, prefix: str = "") -> None: + """Helper function to print field statistics recursively.""" + # Print examples for non-container types + if 'examples' in stats and stats['examples']: + print(f"{prefix}Examples: {', '.join(sorted(stats['examples']))}") + + # Print numeric value ranges + if 'min_value' in stats: + min_val = format_value(stats['min_value']) + max_val = format_value(stats['max_value']) + if min_val != max_val: + print(f"{prefix}Value range: {min_val} to {max_val}") + else: + print(f"{prefix}Value: {min_val}") + + # Print list properties + if 'list_item_types' in stats: + print(f"{prefix}List properties:") + print(f"{prefix} Length range: {stats['min_length']} to {stats['max_length']}") + item_types = sorted(stats['list_item_types']) + print(f"{prefix} Item types: {', '.join(item_types)}") + + # Print list item statistics + if 'list_item_stats' in stats: + item_stats = stats['list_item_stats'] + if 'min_value' in item_stats: + min_val = format_value(item_stats['min_value']) + max_val = format_value(item_stats['max_value']) + if min_val != max_val: + print(f"{prefix} Item value range: {min_val} to {max_val}") + else: + print(f"{prefix} Item value: {min_val}") + + if 'examples' in item_stats and item_stats['examples']: + print(f"{prefix} Item examples: {', '.join(sorted(item_stats['examples']))}") + + if 'fields' in item_stats and item_stats['fields']: + print(f"{prefix} Item structure:") + for field_name, field_stats in sorted(item_stats['fields'].items()): + print(f"{prefix} {field_name}:") + print(f"{prefix} Occurrences: {field_stats['count']}") + print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}") + print_field_stats(field_stats, prefix + " ") + + # Print nested fields from the fields dictionary + if 'fields' in stats and stats['fields']: + print(f"{prefix}Nested structure:") + for field_name, field_stats in sorted(stats['fields'].items()): + print(f"{prefix} {field_name}:") + print(f"{prefix} Occurrences: {field_stats['count']}") + print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}") + print_field_stats(field_stats, prefix + " ") + +def print_summary(stats: Dict) -> None: + """Print a formatted summary of the JSON structure.""" + print("\n=== JSON Structure Summary ===") + print(f"\nTotal objects processed: {stats['total_objects']}") + print(f"Root level types found: {', '.join(stats['types'])}") + + print("\nField Analysis:") + for field, field_stats in sorted(stats['fields'].items()): + print(f"\n{field}:") + print(f" Occurrences: {field_stats['count']}") + print(f" Types: {', '.join(field_stats['types'])}") + print_field_stats(field_stats, " ") + + +def main(): + parser = argparse.ArgumentParser(description='Analyze and merge JSON files') + parser.add_argument('paths', nargs='+', help='Paths to JSON files or directories') + args = parser.parse_args() + + # Find all JSON files + json_files = [] + for path in args.paths: + json_files.extend(find_json_files(path)) + + if not json_files: + print("No JSON files found in the specified paths.") + return + + # Read and process all JSON files + objects = [] + for file_path in json_files: + obj = read_json_file(file_path) + if obj is not None: + objects.append(obj) + + if not objects: + print("No valid JSON objects found in the specified files.") + return + + # Analyze and print summary + stats = merge_objects(objects) + print_summary(stats) + + +if __name__ == '__main__': + main() diff --git a/test_data/product.json b/test_data/product.json new file mode 100644 index 0000000..c4887f3 --- /dev/null +++ b/test_data/product.json @@ -0,0 +1,31 @@ +{ + "id": "PROD-123", + "name": "Smart Watch", + "price": 199.99, + "in_stock": true, + "categories": ["electronics", "wearables", "accessories"], + "specifications": { + "display": { + "type": "OLED", + "size": "1.5 inch", + "resolution": "360x360" + }, + "battery": { + "capacity": "300mAh", + "life": "48 hours" + }, + "features": ["heart-rate", "gps", "waterproof"] + }, + "variants": [ + { + "color": "black", + "sku": "SW-BLK-001", + "price": 199.99 + }, + { + "color": "silver", + "sku": "SW-SLV-001", + "price": 219.99 + } + ] +} \ No newline at end of file diff --git a/test_data/user1.json b/test_data/user1.json new file mode 100644 index 0000000..53345ff --- /dev/null +++ b/test_data/user1.json @@ -0,0 +1,11 @@ +{ + "name": "John Doe", + "age": 30, + "email": "john@example.com", + "address": { + "street": "123 Main St", + "city": "New York", + "country": "USA" + }, + "hobbies": ["reading", "gaming", "hiking"] +} \ No newline at end of file diff --git a/test_data/user2.json b/test_data/user2.json new file mode 100644 index 0000000..ebc38d1 --- /dev/null +++ b/test_data/user2.json @@ -0,0 +1,13 @@ +{ + "name": "Jane Smith", + "age": 25, + "email": "jane@example.com", + "address": { + "street": "456 Oak Ave", + "city": "Los Angeles", + "country": "USA", + "zip": "90001" + }, + "hobbies": ["painting", "music"], + "occupation": "Software Engineer" +} \ No newline at end of file