From 2d0b9d94a72dd1ccc8caf78ba7d7876b116f395a Mon Sep 17 00:00:00 2001 From: mferrera Date: Thu, 19 Sep 2024 08:11:07 +0200 Subject: [PATCH 1/3] ENH: Add `num_columns`, `num_rows` to table spec --- .../0.8.0/examples/summary_table.yml | 2 ++ .../0.8.0/examples/table_inplace.yml | 4 +++- .../0.8.0/examples/table_wellpicks.yml | 4 +++- .../definitions/0.8.0/schema/fmu_results.json | 18 ++++++++++++++++++ src/fmu/dataio/_model/specification.py | 8 +++++++- src/fmu/dataio/providers/objectdata/_tables.py | 5 +++++ tests/test_units/test_ert_context.py | 6 ++++++ tests/test_units/test_table.py | 8 ++++---- 8 files changed, 48 insertions(+), 7 deletions(-) diff --git a/schema/definitions/0.8.0/examples/summary_table.yml b/schema/definitions/0.8.0/examples/summary_table.yml index 4b1663863..ef187c9d0 100644 --- a/schema/definitions/0.8.0/examples/summary_table.yml +++ b/schema/definitions/0.8.0/examples/summary_table.yml @@ -67,6 +67,8 @@ data: - FWPRH - WWPT:R_A4 - WTIRWT1:A4 + num_columns: 10 + num_rows: 20 size: 200 table_index: - DATE diff --git a/schema/definitions/0.8.0/examples/table_inplace.yml b/schema/definitions/0.8.0/examples/table_inplace.yml index a4d50e8a4..e4e499e5d 100644 --- a/schema/definitions/0.8.0/examples/table_inplace.yml +++ b/schema/definitions/0.8.0/examples/table_inplace.yml @@ -112,11 +112,13 @@ data: # The data block describes the actual data (e.g. surface). Only present in grid_model: # Making this an object to allow for expanding in the future name: MyGrid # important for data identification, also important for other data types spec: # class/layout dependent, optional? Can spec be expanded to work for all data types? - size: 123921 columns: - BULK_OIL - NET_OIL - PORE_OIL + num_columns: 3 + num_rows: 41306 + size: 123918 is_prediction: true # A mechanism for separating pure QC output from actual predictions is_observation: false description: diff --git a/schema/definitions/0.8.0/examples/table_wellpicks.yml b/schema/definitions/0.8.0/examples/table_wellpicks.yml index 375b0cabd..9d837d6c9 100644 --- a/schema/definitions/0.8.0/examples/table_wellpicks.yml +++ b/schema/definitions/0.8.0/examples/table_wellpicks.yml @@ -77,7 +77,6 @@ data: # The data block describes the actual data (e.g. surface). Only present in vertical_domain: depth # / time / null depth_reference: msl # / seabed / etc spec: # class/layout dependent, optional? Can spec be expanded to work for all data types? - size: 123921 columns: - X_UTME - Y_UTMN @@ -85,6 +84,9 @@ data: # The data block describes the actual data (e.g. surface). Only present in - MD - WELL - HORIZON + num_columns: 6 + num_rows: 20653 + size: 123918 table_index: - WELL - HORIZON diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index d63e3c2b1..08092deb7 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -7783,6 +7783,22 @@ "title": "Columns", "type": "array" }, + "num_columns": { + "examples": [ + 1, + 9999 + ], + "title": "Num Columns", + "type": "integer" + }, + "num_rows": { + "examples": [ + 1, + 9999 + ], + "title": "Num Rows", + "type": "integer" + }, "size": { "examples": [ 1, @@ -7794,6 +7810,8 @@ }, "required": [ "columns", + "num_columns", + "num_rows", "size" ], "title": "TableSpecification", diff --git a/src/fmu/dataio/_model/specification.py b/src/fmu/dataio/_model/specification.py index 948831171..fcc8e8507 100644 --- a/src/fmu/dataio/_model/specification.py +++ b/src/fmu/dataio/_model/specification.py @@ -65,8 +65,14 @@ class TableSpecification(BaseModel): columns: List[str] """List of columns present in a table.""" + num_columns: int = Field(examples=[1, 9999]) + """The number of columns in a table.""" + + num_rows: int = Field(examples=[1, 9999]) + """The number of rows in a table..""" + size: int = Field(examples=[1, 9999]) - """Size of data object.""" + """The total Size of the table, i.e. `rows x cols`.""" class CPGridSpecification(RowColumnLayer): diff --git a/src/fmu/dataio/providers/objectdata/_tables.py b/src/fmu/dataio/providers/objectdata/_tables.py index d90894a39..0ad64cc16 100644 --- a/src/fmu/dataio/providers/objectdata/_tables.py +++ b/src/fmu/dataio/providers/objectdata/_tables.py @@ -96,8 +96,11 @@ def get_bbox(self) -> None: def get_spec(self) -> TableSpecification: """Derive data.spec for pd.DataFrame.""" logger.info("Get spec for pd.DataFrame (tables)") + num_rows, num_columns = self.obj.shape return TableSpecification( columns=list(self.obj.columns), + num_columns=num_columns, + num_rows=num_rows, size=int(self.obj.size), ) @@ -142,5 +145,7 @@ def get_spec(self) -> TableSpecification: logger.info("Get spec for pyarrow (tables)") return TableSpecification( columns=list(self.obj.column_names), + num_columns=self.obj.num_columns, + num_rows=self.obj.num_rows, size=self.obj.num_columns * self.obj.num_rows, ) diff --git a/tests/test_units/test_ert_context.py b/tests/test_units/test_ert_context.py index db0327a29..832a48473 100644 --- a/tests/test_units/test_ert_context.py +++ b/tests/test_units/test_ert_context.py @@ -416,6 +416,9 @@ def test_dataframe_export_file_set_name( metaout = dataio.read_metadata(output) assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"] + assert metaout["data"]["spec"]["num_columns"] == 2 + assert metaout["data"]["spec"]["num_rows"] == 4 + assert metaout["data"]["spec"]["size"] == 8 def test_pyarrow_export_file_set_name( @@ -443,3 +446,6 @@ def test_pyarrow_export_file_set_name( metaout = dataio.read_metadata(output) assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"] + assert metaout["data"]["spec"]["num_columns"] == 2 + assert metaout["data"]["spec"]["num_rows"] == 4 + assert metaout["data"]["spec"]["size"] == 8 diff --git a/tests/test_units/test_table.py b/tests/test_units/test_table.py index 2a63a1862..a91b98ff6 100644 --- a/tests/test_units/test_table.py +++ b/tests/test_units/test_table.py @@ -9,17 +9,17 @@ from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory -def _read_dict(file_path): +def _read_dict(file_path: str) -> None: """Reads text file into dictionary Args: file_path (string): path to generated file Returns: dict: contents of file """ - file_path = Path(file_path) - meta_path = file_path.parent / f".{file_path.name}.yml" + path = Path(file_path) + meta_path = path.parent / f".{path.name}.yml" meta = yaml_load(meta_path) - file_path.unlink() + path.unlink() meta_path.unlink() return meta From c90db57f782f460b26507e1f36940f8bbe01b3c0 Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 23 Sep 2024 08:39:50 +0200 Subject: [PATCH 2/3] Make fields optional --- .../definitions/0.8.0/schema/fmu_results.json | 22 +++++++++++++++---- src/fmu/dataio/_model/specification.py | 4 ++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index 08092deb7..27b11b41e 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -7784,20 +7784,34 @@ "type": "array" }, "num_columns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], "examples": [ 1, 9999 ], - "title": "Num Columns", - "type": "integer" + "title": "Num Columns" }, "num_rows": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], "examples": [ 1, 9999 ], - "title": "Num Rows", - "type": "integer" + "title": "Num Rows" }, "size": { "examples": [ diff --git a/src/fmu/dataio/_model/specification.py b/src/fmu/dataio/_model/specification.py index fcc8e8507..054c7e794 100644 --- a/src/fmu/dataio/_model/specification.py +++ b/src/fmu/dataio/_model/specification.py @@ -65,10 +65,10 @@ class TableSpecification(BaseModel): columns: List[str] """List of columns present in a table.""" - num_columns: int = Field(examples=[1, 9999]) + num_columns: Optional[int] = Field(examples=[1, 9999]) """The number of columns in a table.""" - num_rows: int = Field(examples=[1, 9999]) + num_rows: Optional[int] = Field(examples=[1, 9999]) """The number of rows in a table..""" size: int = Field(examples=[1, 9999]) From 1f5ecf20214f82468e49c31d9078e6f39f054efd Mon Sep 17 00:00:00 2001 From: mferrera Date: Mon, 23 Sep 2024 11:36:00 +0200 Subject: [PATCH 3/3] Lowercase size --- src/fmu/dataio/_model/specification.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fmu/dataio/_model/specification.py b/src/fmu/dataio/_model/specification.py index 054c7e794..5d6511f20 100644 --- a/src/fmu/dataio/_model/specification.py +++ b/src/fmu/dataio/_model/specification.py @@ -72,7 +72,7 @@ class TableSpecification(BaseModel): """The number of rows in a table..""" size: int = Field(examples=[1, 9999]) - """The total Size of the table, i.e. `rows x cols`.""" + """The total size of the table, i.e. `rows x cols`.""" class CPGridSpecification(RowColumnLayer):