This is a rather old question, but the following is a solution that indents the JSON up to a maximum nesting depth. If the object nesting is deeper than indent_max_depth, the output JSON is flat.
The code is a modification of the cpython/Lib/json/encoder.py file. Sorry, but it is a bit long.
import json
from json.encoder import encode_basestring, encode_basestring_ascii, INFINITY
class JSONMaxDepthEncoder(json.JSONEncoder):
def __init__(
self,
*,
skipkeys: bool=False,
ensure_ascii: bool=True,
check_circular: bool=True,
allow_nan: bool=True,
sort_keys: bool=False,
indent: int|str=None,
separators: tuple[str,str]=None,
default: callable=None,
indent_max_depth: int=3
) -> None:
"""
JSON encoder that indents upto indent_max_depth.
"""
super().__init__(
skipkeys=skipkeys,
ensure_ascii=ensure_ascii,
check_circular=check_circular,
allow_nan=allow_nan,
sort_keys=sort_keys,
indent=indent,
separators=separators,
default=default,
)
self.indent_max_depth = indent_max_depth
self._level = 0
def iterencode(self, o, _one_shot=False):
"""Encode the given object and yield each string
representation as available.
For example::
for chunk in JSONEncoder().iterencode(bigobject):
mysocket.write(chunk)
"""
if self.check_circular:
markers = {}
else:
markers = None
if self.ensure_ascii:
_encoder = encode_basestring_ascii
else:
_encoder = encode_basestring
def floatstr(o, allow_nan=self.allow_nan,
_repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
# Check for specials. Note that this type of test is processor
# and/or platform-specific, so do tests which don't depend on the
# internals.
if o != o:
text = 'NaN'
elif o == _inf:
text = 'Infinity'
elif o == _neginf:
text = '-Infinity'
else:
return _repr(o)
if not allow_nan:
raise ValueError(
"Out of range float values are not JSON compliant: " +
repr(o))
return text
_iterencode = _make_iterencode(
markers, self.default, _encoder, self.indent, floatstr,
self.key_separator, self.item_separator, self.sort_keys,
self.skipkeys, _one_shot, self.indent_max_depth)
return _iterencode(o, 0)
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
indent_max_depth,
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
id=id,
int=int,
isinstance=isinstance,
list=list,
str=str,
tuple=tuple,
_intstr=int.__repr__,
):
if _indent is not None and not isinstance(_indent, str):
_indent = ' ' * _indent
def _iterencode_list(lst, current_indent_level, indent_max_depth):
if not lst:
yield '[]'
return
if markers is not None:
markerid = id(lst)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = lst
buf = '['
if _indent is not None:
current_indent_level += 1
newline_indent = (
'\n' + _indent * current_indent_level
if current_indent_level <= indent_max_depth
else ''
)
separator = _item_separator + newline_indent
buf += newline_indent
else:
newline_indent = None
separator = _item_separator
first = True
for value in lst:
if first:
first = False
else:
buf = separator
if isinstance(value, str):
yield buf + _encoder(value)
elif value is None:
yield buf + 'null'
elif value is True:
yield buf + 'true'
elif value is False:
yield buf + 'false'
elif isinstance(value, int):
yield buf + _intstr(value)
elif isinstance(value, float):
yield buf + _floatstr(value)
else:
yield buf
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, current_indent_level, indent_max_depth)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, current_indent_level, indent_max_depth)
else:
chunks = _iterencode(value, current_indent_level, indent_max_depth)
yield from chunks
if newline_indent is not None:
current_indent_level -= 1
if current_indent_level < indent_max_depth:
yield '\n' + _indent * current_indent_level
yield ']'
if markers is not None:
del markers[markerid]
def _iterencode_dict(dct, current_indent_level, indent_max_depth):
if not dct:
yield '{}'
return
if markers is not None:
markerid = id(dct)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = dct
yield '{'
if _indent is not None:
current_indent_level += 1
newline_indent = (
'\n' + _indent * current_indent_level
if current_indent_level <= indent_max_depth
else ''
)
item_separator = _item_separator + newline_indent
yield newline_indent
else:
newline_indent = None
item_separator = _item_separator
first = True
if _sort_keys:
items = sorted(dct.items())
else:
items = dct.items()
for key, value in items:
if isinstance(key, str):
pass
elif isinstance(key, float):
key = _floatstr(key)
elif key is True:
key = 'true'
elif key is False:
key = 'false'
elif key is None:
key = 'null'
elif isinstance(key, int):
key = _intstr(key)
elif _skipkeys:
continue
else:
raise TypeError(f'keys must be str, int, float, bool or None, '
f'not {key.__class__.__name__}')
if first:
first = False
else:
yield item_separator
yield _encoder(key)
yield _key_separator
if isinstance(value, str):
yield _encoder(value)
elif value is None:
yield 'null'
elif value is True:
yield 'true'
elif value is False:
yield 'false'
elif isinstance(value, int):
yield _intstr(value)
elif isinstance(value, float):
yield _floatstr(value)
else:
if isinstance(value, (list, tuple)):
chunks = _iterencode_list(value, current_indent_level, indent_max_depth)
elif isinstance(value, dict):
chunks = _iterencode_dict(value, current_indent_level, indent_max_depth)
else:
chunks = _iterencode(value, current_indent_level, indent_max_depth)
yield from chunks
if newline_indent is not None:
current_indent_level -= 1
if current_indent_level < indent_max_depth:
yield '\n' + _indent * current_indent_level
yield '}'
if markers is not None:
del markers[markerid]
def _iterencode(o, current_indent_level, indent_max_depth=indent_max_depth):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
elif o is True:
yield 'true'
elif o is False:
yield 'false'
elif isinstance(o, int):
yield _intstr(o)
elif isinstance(o, float):
yield _floatstr(o)
elif isinstance(o, (list, tuple)):
yield from _iterencode_list(o, current_indent_level, indent_max_depth)
elif isinstance(o, dict):
yield from _iterencode_dict(o, current_indent_level, indent_max_depth)
else:
if markers is not None:
markerid = id(o)
if markerid in markers:
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
yield from _iterencode(o, current_indent_level, indent_max_depth)
if markers is not None:
del markers[markerid]
return _iterencode
The usage is as follows:
data = {
'layer1': {
'layer2': {
'layer3_1': [
{'x': 1, 'y': 7},
{'x': 0, 'y': 4},
{'x': 5, 'y': 3},
{'x': 6, 'y': 9}
],
'layer3_2': 'string'
}
}
}
encoder = JSONMaxDepthEncoder(indent=2, indent_max_depth=3)
print(encoder.encode(data))
# prints:
{
"layer1": {
"layer2": {
"layer3_1": [{"x": 1,"y": 7},{"x": 0,"y": 4},{"x": 5,"y": 3},{"x": 6,"y": 9}],
"layer3_2": "string"
}
}
}
To write directly to file:
with open('data.json', 'w') as fp:
for chunk in encoder.iterencode(data):
fp.write(chunk)
pprintmodule?json.dumps(data_structure, indent=2)- Added that as an example.sort_keysand does not have special case implementation for sort order and instead relies on (composition with)collections.OrderedDict.