mirror of
https://github.com/OMGeeky/json-summary.git
synced 2025-12-26 17:02:28 +01:00
Init
This commit is contained in:
8
.idea/.gitignore
generated
vendored
Normal file
8
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
10
.idea/JsonSummary.iml
generated
Normal file
10
.idea/JsonSummary.iml
generated
Normal file
@@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.13 (JsonSummary)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<settings>
|
||||
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||
<version value="1.0" />
|
||||
</settings>
|
||||
</component>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/JsonSummary.iml" filepath="$PROJECT_DIR$/.idea/JsonSummary.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
234
main.py
Normal file
234
main.py
Normal file
@@ -0,0 +1,234 @@
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Any, List, Set
|
||||
|
||||
|
||||
def find_json_files(path: str) -> List[str]:
|
||||
"""Find all JSON files in the given path."""
|
||||
json_files = []
|
||||
if os.path.isfile(path):
|
||||
if path.lower().endswith('.json'):
|
||||
json_files.append(path)
|
||||
else:
|
||||
for root, _, files in os.walk(path):
|
||||
for file in files:
|
||||
if file.lower().endswith('.json'):
|
||||
json_files.append(os.path.join(root, file))
|
||||
return json_files
|
||||
|
||||
|
||||
def read_json_file(file_path: str) -> Any:
|
||||
"""Read and parse a JSON file."""
|
||||
try:
|
||||
with open(file_path, 'r') as f:
|
||||
return json.load(f)
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error parsing {file_path}: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"Error reading {file_path}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def init_stats_dict() -> Dict:
|
||||
"""Initialize a statistics dictionary with default values."""
|
||||
return {
|
||||
'types': set(),
|
||||
'fields': defaultdict(lambda: {
|
||||
'count': 0,
|
||||
'types': set(),
|
||||
'nested_fields': defaultdict(init_stats_dict),
|
||||
'examples': set()
|
||||
})
|
||||
}
|
||||
|
||||
def analyze_value(value: Any, stats: Dict, depth: int = 0, max_depth: int = 5) -> None:
|
||||
"""Analyze a value and update statistics."""
|
||||
if depth >= max_depth:
|
||||
return
|
||||
|
||||
# Ensure stats has the basic structure
|
||||
if 'types' not in stats:
|
||||
stats.update(init_stats_dict())
|
||||
|
||||
value_type = type(value).__name__
|
||||
stats['types'].add(value_type)
|
||||
|
||||
if isinstance(value, (int, float)):
|
||||
if 'min_value' not in stats:
|
||||
stats['min_value'] = value
|
||||
stats['max_value'] = value
|
||||
else:
|
||||
stats['min_value'] = min(stats['min_value'], value)
|
||||
stats['max_value'] = max(stats['max_value'], value)
|
||||
|
||||
if isinstance(value, dict):
|
||||
for k, v in value.items():
|
||||
if k not in stats['fields']:
|
||||
stats['fields'][k] = init_stats_dict()
|
||||
stats['fields'][k]['count'] = 0
|
||||
stats['fields'][k]['examples'] = set()
|
||||
|
||||
field_stats = stats['fields'][k]
|
||||
field_stats['count'] += 1
|
||||
field_stats['types'].add(type(v).__name__)
|
||||
|
||||
# Store example values (limit to 3)
|
||||
if not isinstance(v, (dict, list)) and len(field_stats['examples']) < 3:
|
||||
field_stats['examples'].add(str(v))
|
||||
|
||||
if isinstance(v, dict):
|
||||
analyze_value(v, field_stats, depth + 1, max_depth)
|
||||
elif isinstance(v, list):
|
||||
analyze_value(v, field_stats, depth + 1, max_depth)
|
||||
|
||||
elif isinstance(value, list):
|
||||
if 'list_item_types' not in stats:
|
||||
stats['list_item_types'] = set()
|
||||
stats['list_item_stats'] = init_stats_dict()
|
||||
stats['min_length'] = len(value)
|
||||
stats['max_length'] = len(value)
|
||||
else:
|
||||
stats['min_length'] = min(stats['min_length'], len(value))
|
||||
stats['max_length'] = max(stats['max_length'], len(value))
|
||||
|
||||
for item in value:
|
||||
item_type = type(item).__name__
|
||||
stats['list_item_types'].add(item_type)
|
||||
|
||||
# Analyze list items in detail
|
||||
if isinstance(item, (dict, list)):
|
||||
analyze_value(item, stats['list_item_stats'], depth + 1, max_depth)
|
||||
elif isinstance(item, (int, float)):
|
||||
if 'min_value' not in stats['list_item_stats']:
|
||||
stats['list_item_stats']['min_value'] = item
|
||||
stats['list_item_stats']['max_value'] = item
|
||||
else:
|
||||
stats['list_item_stats']['min_value'] = min(stats['list_item_stats']['min_value'], item)
|
||||
stats['list_item_stats']['max_value'] = max(stats['list_item_stats']['max_value'], item)
|
||||
elif isinstance(item, str) and len(stats.get('examples', set())) < 3:
|
||||
if 'examples' not in stats:
|
||||
stats['examples'] = set()
|
||||
stats['examples'].add(str(item))
|
||||
|
||||
|
||||
def merge_objects(objects: List[Any]) -> Dict:
|
||||
"""Merge multiple JSON objects and analyze their structure."""
|
||||
stats = init_stats_dict()
|
||||
stats['total_objects'] = len(objects)
|
||||
|
||||
for obj in objects:
|
||||
if obj is not None:
|
||||
analyze_value(obj, stats)
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def format_value(value: Any) -> str:
|
||||
"""Format a value for display."""
|
||||
if isinstance(value, float):
|
||||
return f"{value:.2f}"
|
||||
return str(value)
|
||||
|
||||
def print_field_stats(stats: Dict, prefix: str = "") -> None:
|
||||
"""Helper function to print field statistics recursively."""
|
||||
# Print examples for non-container types
|
||||
if 'examples' in stats and stats['examples']:
|
||||
print(f"{prefix}Examples: {', '.join(sorted(stats['examples']))}")
|
||||
|
||||
# Print numeric value ranges
|
||||
if 'min_value' in stats:
|
||||
min_val = format_value(stats['min_value'])
|
||||
max_val = format_value(stats['max_value'])
|
||||
if min_val != max_val:
|
||||
print(f"{prefix}Value range: {min_val} to {max_val}")
|
||||
else:
|
||||
print(f"{prefix}Value: {min_val}")
|
||||
|
||||
# Print list properties
|
||||
if 'list_item_types' in stats:
|
||||
print(f"{prefix}List properties:")
|
||||
print(f"{prefix} Length range: {stats['min_length']} to {stats['max_length']}")
|
||||
item_types = sorted(stats['list_item_types'])
|
||||
print(f"{prefix} Item types: {', '.join(item_types)}")
|
||||
|
||||
# Print list item statistics
|
||||
if 'list_item_stats' in stats:
|
||||
item_stats = stats['list_item_stats']
|
||||
if 'min_value' in item_stats:
|
||||
min_val = format_value(item_stats['min_value'])
|
||||
max_val = format_value(item_stats['max_value'])
|
||||
if min_val != max_val:
|
||||
print(f"{prefix} Item value range: {min_val} to {max_val}")
|
||||
else:
|
||||
print(f"{prefix} Item value: {min_val}")
|
||||
|
||||
if 'examples' in item_stats and item_stats['examples']:
|
||||
print(f"{prefix} Item examples: {', '.join(sorted(item_stats['examples']))}")
|
||||
|
||||
if 'fields' in item_stats and item_stats['fields']:
|
||||
print(f"{prefix} Item structure:")
|
||||
for field_name, field_stats in sorted(item_stats['fields'].items()):
|
||||
print(f"{prefix} {field_name}:")
|
||||
print(f"{prefix} Occurrences: {field_stats['count']}")
|
||||
print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}")
|
||||
print_field_stats(field_stats, prefix + " ")
|
||||
|
||||
# Print nested fields from the fields dictionary
|
||||
if 'fields' in stats and stats['fields']:
|
||||
print(f"{prefix}Nested structure:")
|
||||
for field_name, field_stats in sorted(stats['fields'].items()):
|
||||
print(f"{prefix} {field_name}:")
|
||||
print(f"{prefix} Occurrences: {field_stats['count']}")
|
||||
print(f"{prefix} Types: {', '.join(sorted(field_stats['types']))}")
|
||||
print_field_stats(field_stats, prefix + " ")
|
||||
|
||||
def print_summary(stats: Dict) -> None:
|
||||
"""Print a formatted summary of the JSON structure."""
|
||||
print("\n=== JSON Structure Summary ===")
|
||||
print(f"\nTotal objects processed: {stats['total_objects']}")
|
||||
print(f"Root level types found: {', '.join(stats['types'])}")
|
||||
|
||||
print("\nField Analysis:")
|
||||
for field, field_stats in sorted(stats['fields'].items()):
|
||||
print(f"\n{field}:")
|
||||
print(f" Occurrences: {field_stats['count']}")
|
||||
print(f" Types: {', '.join(field_stats['types'])}")
|
||||
print_field_stats(field_stats, " ")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Analyze and merge JSON files')
|
||||
parser.add_argument('paths', nargs='+', help='Paths to JSON files or directories')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Find all JSON files
|
||||
json_files = []
|
||||
for path in args.paths:
|
||||
json_files.extend(find_json_files(path))
|
||||
|
||||
if not json_files:
|
||||
print("No JSON files found in the specified paths.")
|
||||
return
|
||||
|
||||
# Read and process all JSON files
|
||||
objects = []
|
||||
for file_path in json_files:
|
||||
obj = read_json_file(file_path)
|
||||
if obj is not None:
|
||||
objects.append(obj)
|
||||
|
||||
if not objects:
|
||||
print("No valid JSON objects found in the specified files.")
|
||||
return
|
||||
|
||||
# Analyze and print summary
|
||||
stats = merge_objects(objects)
|
||||
print_summary(stats)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
31
test_data/product.json
Normal file
31
test_data/product.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"id": "PROD-123",
|
||||
"name": "Smart Watch",
|
||||
"price": 199.99,
|
||||
"in_stock": true,
|
||||
"categories": ["electronics", "wearables", "accessories"],
|
||||
"specifications": {
|
||||
"display": {
|
||||
"type": "OLED",
|
||||
"size": "1.5 inch",
|
||||
"resolution": "360x360"
|
||||
},
|
||||
"battery": {
|
||||
"capacity": "300mAh",
|
||||
"life": "48 hours"
|
||||
},
|
||||
"features": ["heart-rate", "gps", "waterproof"]
|
||||
},
|
||||
"variants": [
|
||||
{
|
||||
"color": "black",
|
||||
"sku": "SW-BLK-001",
|
||||
"price": 199.99
|
||||
},
|
||||
{
|
||||
"color": "silver",
|
||||
"sku": "SW-SLV-001",
|
||||
"price": 219.99
|
||||
}
|
||||
]
|
||||
}
|
||||
11
test_data/user1.json
Normal file
11
test_data/user1.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"name": "John Doe",
|
||||
"age": 30,
|
||||
"email": "john@example.com",
|
||||
"address": {
|
||||
"street": "123 Main St",
|
||||
"city": "New York",
|
||||
"country": "USA"
|
||||
},
|
||||
"hobbies": ["reading", "gaming", "hiking"]
|
||||
}
|
||||
13
test_data/user2.json
Normal file
13
test_data/user2.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "Jane Smith",
|
||||
"age": 25,
|
||||
"email": "jane@example.com",
|
||||
"address": {
|
||||
"street": "456 Oak Ave",
|
||||
"city": "Los Angeles",
|
||||
"country": "USA",
|
||||
"zip": "90001"
|
||||
},
|
||||
"hobbies": ["painting", "music"],
|
||||
"occupation": "Software Engineer"
|
||||
}
|
||||
Reference in New Issue
Block a user