This commit is contained in:
OMGeeky
2025-02-22 17:02:40 +01:00
commit c36f09aa38
9 changed files with 327 additions and 0 deletions

8
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

10
.idea/JsonSummary.iml generated Normal file
View File

@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (JsonSummary)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/JsonSummary.iml" filepath="$PROJECT_DIR$/.idea/JsonSummary.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

234
main.py Normal file
View File

@@ -0,0 +1,234 @@
import argparse
import json
import os
from collections import defaultdict
from typing import Dict, Any, List, Set
def find_json_files(path: str) -> List[str]:
"""Find all JSON files in the given path."""
json_files = []
if os.path.isfile(path):
if path.lower().endswith('.json'):
json_files.append(path)
else:
for root, _, files in os.walk(path):
for file in files:
if file.lower().endswith('.json'):
json_files.append(os.path.join(root, file))
return json_files
def read_json_file(file_path: str) -> Any:
"""Read and parse a JSON file."""
try:
with open(file_path, 'r') as f:
return json.load(f)
except json.JSONDecodeError as e:
print(f"Error parsing {file_path}: {e}")
return None
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
def init_stats_dict() -> Dict:
"""Initialize a statistics dictionary with default values."""
return {
'types': set(),
'fields': defaultdict(lambda: {
'count': 0,
'types': set(),
'nested_fields': defaultdict(init_stats_dict),
'examples': set()
})
}
def analyze_value(value: Any, stats: Dict, depth: int = 0, max_depth: int = 5) -> None:
"""Analyze a value and update statistics."""
if depth >= max_depth:
return
# Ensure stats has the basic structure
if 'types' not in stats:
stats.update(init_stats_dict())
value_type = type(value).__name__
stats['types'].add(value_type)
if isinstance(value, (int, float)):
if 'min_value' not in stats:
stats['min_value'] = value
stats['max_value'] = value
else:
stats['min_value'] = min(stats['min_value'], value)
stats['max_value'] = max(stats['max_value'], value)
if isinstance(value, dict):
for k, v in value.items():
if k not in stats['fields']:
stats['fields'][k] = init_stats_dict()
stats['fields'][k]['count'] = 0
stats['fields'][k]['examples'] = set()
field_stats = stats['fields'][k]
field_stats['count'] += 1
field_stats['types'].add(type(v).__name__)
# Store example values (limit to 3)
if not isinstance(v, (dict, list)) and len(field_stats['examples']) < 3:
field_stats['examples'].add(str(v))
if isinstance(v, dict):
analyze_value(v, field_stats, depth + 1, max_depth)
elif isinstance(v, list):
analyze_value(v, field_stats, depth + 1, max_depth)
elif isinstance(value, list):
if 'list_item_types' not in stats:
stats['list_item_types'] = set()
stats['list_item_stats'] = init_stats_dict()
stats['min_length'] = len(value)
stats['max_length'] = len(value)
else:
stats['min_length'] = min(stats['min_length'], len(value))
stats['max_length'] = max(stats['max_length'], len(value))
for item in value:
item_type = type(item).__name__
stats['list_item_types'].add(item_type)
# Analyze list items in detail
if isinstance(item, (dict, list)):
analyze_value(item, stats['list_item_stats'], depth + 1, max_depth)
elif isinstance(item, (int, float)):
if 'min_value' not in stats['list_item_stats']:
stats['list_item_stats']['min_value'] = item
stats['list_item_stats']['max_value'] = item
else:
stats['list_item_stats']['min_value'] = min(stats['list_item_stats']['min_value'], item)
stats['list_item_stats']['max_value'] = max(stats['list_item_stats']['max_value'], item)
elif isinstance(item, str) and len(stats.get('examples', set())) < 3:
if 'examples' not in stats:
stats['examples'] = set()
stats['examples'].add(str(item))
def merge_objects(objects: List[Any]) -> Dict:
"""Merge multiple JSON objects and analyze their structure."""
stats = init_stats_dict()
stats['total_objects'] = len(objects)
for obj in objects:
if obj is not None:
analyze_value(obj, stats)
return stats
def format_value(value: Any) -> str:
"""Format a value for display."""
if isinstance(value, float):
return f"{value:.2f}"
return str(value)
def print_field_stats(stats: Dict, prefix: str = "") -> None:
"""Helper function to print field statistics recursively."""
# Print examples for non-container types
if 'examples' in stats and stats['examples']:
print(f"{prefix}Examples: {', '.join(sorted(stats['examples']))}")
# Print numeric value ranges
if 'min_value' in stats:
min_val = format_value(stats['min_value'])
max_val = format_value(stats['max_value'])
if min_val != max_val:
print(f"{prefix}Value range: {min_val} to {max_val}")
else:
print(f"{prefix}Value: {min_val}")
# Print list properties
if 'list_item_types' in stats:
print(f"{prefix}List properties:")
print(f"{prefix} Length range: {stats['min_length']} to {stats['max_length']}")
item_types = sorted(stats['list_item_types'])
print(f"{prefix} Item types: {', '.join(item_types)}")
# Print list item statistics
if 'list_item_stats' in stats:
item_stats = stats['list_item_stats']
if 'min_value' in item_stats:
min_val = format_value(item_stats['min_value'])
max_val = format_value(item_stats['max_value'])
if min_val != max_val:
print(f"{prefix} Item value range: {min_val} to {max_val}")
else:
print(f"{prefix} Item value: {min_val}")
if 'examples' in item_stats and item_stats['examples']:
print(f"{prefix} Item examples: {', '.join(sorted(item_stats['examples']))}")
if 'fields' in item_stats and item_stats['fields']:
print(f"{prefix} Item structure:")
for field_name, field_stats in sorted(item_stats['fields'].items()):
print(f"{prefix} {field_name}:")
print(f"{prefix} Occurrences: {field_stats['count']}")
print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}")
print_field_stats(field_stats, prefix + " ")
# Print nested fields from the fields dictionary
if 'fields' in stats and stats['fields']:
print(f"{prefix}Nested structure:")
for field_name, field_stats in sorted(stats['fields'].items()):
print(f"{prefix} {field_name}:")
print(f"{prefix} Occurrences: {field_stats['count']}")
print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}")
print_field_stats(field_stats, prefix + " ")
def print_summary(stats: Dict) -> None:
"""Print a formatted summary of the JSON structure."""
print("\n=== JSON Structure Summary ===")
print(f"\nTotal objects processed: {stats['total_objects']}")
print(f"Root level types found: {', '.join(stats['types'])}")
print("\nField Analysis:")
for field, field_stats in sorted(stats['fields'].items()):
print(f"\n{field}:")
print(f" Occurrences: {field_stats['count']}")
print(f" Types: {', '.join(field_stats['types'])}")
print_field_stats(field_stats, " ")
def main():
parser = argparse.ArgumentParser(description='Analyze and merge JSON files')
parser.add_argument('paths', nargs='+', help='Paths to JSON files or directories')
args = parser.parse_args()
# Find all JSON files
json_files = []
for path in args.paths:
json_files.extend(find_json_files(path))
if not json_files:
print("No JSON files found in the specified paths.")
return
# Read and process all JSON files
objects = []
for file_path in json_files:
obj = read_json_file(file_path)
if obj is not None:
objects.append(obj)
if not objects:
print("No valid JSON objects found in the specified files.")
return
# Analyze and print summary
stats = merge_objects(objects)
print_summary(stats)
if __name__ == '__main__':
main()

31
test_data/product.json Normal file
View File

@@ -0,0 +1,31 @@
{
"id": "PROD-123",
"name": "Smart Watch",
"price": 199.99,
"in_stock": true,
"categories": ["electronics", "wearables", "accessories"],
"specifications": {
"display": {
"type": "OLED",
"size": "1.5 inch",
"resolution": "360x360"
},
"battery": {
"capacity": "300mAh",
"life": "48 hours"
},
"features": ["heart-rate", "gps", "waterproof"]
},
"variants": [
{
"color": "black",
"sku": "SW-BLK-001",
"price": 199.99
},
{
"color": "silver",
"sku": "SW-SLV-001",
"price": 219.99
}
]
}

11
test_data/user1.json Normal file
View File

@@ -0,0 +1,11 @@
{
"name": "John Doe",
"age": 30,
"email": "john@example.com",
"address": {
"street": "123 Main St",
"city": "New York",
"country": "USA"
},
"hobbies": ["reading", "gaming", "hiking"]
}

13
test_data/user2.json Normal file
View File

@@ -0,0 +1,13 @@
{
"name": "Jane Smith",
"age": 25,
"email": "jane@example.com",
"address": {
"street": "456 Oak Ave",
"city": "Los Angeles",
"country": "USA",
"zip": "90001"
},
"hobbies": ["painting", "music"],
"occupation": "Software Engineer"
}