From c1f4ec6be82bf32d1252358ec43898be919c7892 Mon Sep 17 00:00:00 2001 From: Varad Bhatnagar Date: Fri, 1 Nov 2024 17:20:44 +0530 Subject: [PATCH] Raise error for incorrect JSON serialization --- src/datasets/io/json.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/datasets/io/json.py b/src/datasets/io/json.py index 23b3e646645..41abfa518cc 100644 --- a/src/datasets/io/json.py +++ b/src/datasets/io/json.py @@ -104,6 +104,11 @@ def write(self) -> int: if compression not in [None, "infer", "gzip", "bz2", "xz"]: raise NotImplementedError(f"`datasets` currently does not support {compression} compression") + if not lines and self.batch_size < self.dataset.num_rows: + raise NotImplementedError( + "Output JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead." + ) + if isinstance(self.path_or_buf, (str, bytes, os.PathLike)): with fsspec.open( self.path_or_buf, "wb", compression=compression, **(self.storage_options or {})