diff --git a/src/datasets/io/json.py b/src/datasets/io/json.py index 23b3e646645..41abfa518cc 100644 --- a/src/datasets/io/json.py +++ b/src/datasets/io/json.py @@ -104,6 +104,11 @@ def write(self) -> int: if compression not in [None, "infer", "gzip", "bz2", "xz"]: raise NotImplementedError(f"`datasets` currently does not support {compression} compression") + if not lines and self.batch_size < self.dataset.num_rows: + raise NotImplementedError( + "Output JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead." + ) + if isinstance(self.path_or_buf, (str, bytes, os.PathLike)): with fsspec.open( self.path_or_buf, "wb", compression=compression, **(self.storage_options or {})