diff --git a/schema/definitions/0.8.0/examples/summary_table.yml b/schema/definitions/0.8.0/examples/summary_table.yml index 4b1663863..ef187c9d0 100644 --- a/schema/definitions/0.8.0/examples/summary_table.yml +++ b/schema/definitions/0.8.0/examples/summary_table.yml @@ -67,6 +67,8 @@ data: - FWPRH - WWPT:R_A4 - WTIRWT1:A4 + num_columns: 10 + num_rows: 20 size: 200 table_index: - DATE diff --git a/schema/definitions/0.8.0/examples/table_inplace.yml b/schema/definitions/0.8.0/examples/table_inplace.yml index a4d50e8a4..e4e499e5d 100644 --- a/schema/definitions/0.8.0/examples/table_inplace.yml +++ b/schema/definitions/0.8.0/examples/table_inplace.yml @@ -112,11 +112,13 @@ data: # The data block describes the actual data (e.g. surface). Only present in grid_model: # Making this an object to allow for expanding in the future name: MyGrid # important for data identification, also important for other data types spec: # class/layout dependent, optional? Can spec be expanded to work for all data types? - size: 123921 columns: - BULK_OIL - NET_OIL - PORE_OIL + num_columns: 3 + num_rows: 41306 + size: 123918 is_prediction: true # A mechanism for separating pure QC output from actual predictions is_observation: false description: diff --git a/schema/definitions/0.8.0/examples/table_wellpicks.yml b/schema/definitions/0.8.0/examples/table_wellpicks.yml index 375b0cabd..9d837d6c9 100644 --- a/schema/definitions/0.8.0/examples/table_wellpicks.yml +++ b/schema/definitions/0.8.0/examples/table_wellpicks.yml @@ -77,7 +77,6 @@ data: # The data block describes the actual data (e.g. surface). Only present in vertical_domain: depth # / time / null depth_reference: msl # / seabed / etc spec: # class/layout dependent, optional? Can spec be expanded to work for all data types? - size: 123921 columns: - X_UTME - Y_UTMN @@ -85,6 +84,9 @@ data: # The data block describes the actual data (e.g. surface). Only present in - MD - WELL - HORIZON + num_columns: 6 + num_rows: 20653 + size: 123918 table_index: - WELL - HORIZON diff --git a/schema/definitions/0.8.0/schema/fmu_results.json b/schema/definitions/0.8.0/schema/fmu_results.json index d63e3c2b1..27b11b41e 100644 --- a/schema/definitions/0.8.0/schema/fmu_results.json +++ b/schema/definitions/0.8.0/schema/fmu_results.json @@ -7783,6 +7783,36 @@ "title": "Columns", "type": "array" }, + "num_columns": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "examples": [ + 1, + 9999 + ], + "title": "Num Columns" + }, + "num_rows": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "examples": [ + 1, + 9999 + ], + "title": "Num Rows" + }, "size": { "examples": [ 1, @@ -7794,6 +7824,8 @@ }, "required": [ "columns", + "num_columns", + "num_rows", "size" ], "title": "TableSpecification", diff --git a/src/fmu/dataio/_model/specification.py b/src/fmu/dataio/_model/specification.py index 948831171..5d6511f20 100644 --- a/src/fmu/dataio/_model/specification.py +++ b/src/fmu/dataio/_model/specification.py @@ -65,8 +65,14 @@ class TableSpecification(BaseModel): columns: List[str] """List of columns present in a table.""" + num_columns: Optional[int] = Field(examples=[1, 9999]) + """The number of columns in a table.""" + + num_rows: Optional[int] = Field(examples=[1, 9999]) + """The number of rows in a table..""" + size: int = Field(examples=[1, 9999]) - """Size of data object.""" + """The total size of the table, i.e. `rows x cols`.""" class CPGridSpecification(RowColumnLayer): diff --git a/src/fmu/dataio/providers/objectdata/_tables.py b/src/fmu/dataio/providers/objectdata/_tables.py index d90894a39..0ad64cc16 100644 --- a/src/fmu/dataio/providers/objectdata/_tables.py +++ b/src/fmu/dataio/providers/objectdata/_tables.py @@ -96,8 +96,11 @@ def get_bbox(self) -> None: def get_spec(self) -> TableSpecification: """Derive data.spec for pd.DataFrame.""" logger.info("Get spec for pd.DataFrame (tables)") + num_rows, num_columns = self.obj.shape return TableSpecification( columns=list(self.obj.columns), + num_columns=num_columns, + num_rows=num_rows, size=int(self.obj.size), ) @@ -142,5 +145,7 @@ def get_spec(self) -> TableSpecification: logger.info("Get spec for pyarrow (tables)") return TableSpecification( columns=list(self.obj.column_names), + num_columns=self.obj.num_columns, + num_rows=self.obj.num_rows, size=self.obj.num_columns * self.obj.num_rows, ) diff --git a/tests/test_units/test_ert_context.py b/tests/test_units/test_ert_context.py index db0327a29..832a48473 100644 --- a/tests/test_units/test_ert_context.py +++ b/tests/test_units/test_ert_context.py @@ -416,6 +416,9 @@ def test_dataframe_export_file_set_name( metaout = dataio.read_metadata(output) assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"] + assert metaout["data"]["spec"]["num_columns"] == 2 + assert metaout["data"]["spec"]["num_rows"] == 4 + assert metaout["data"]["spec"]["size"] == 8 def test_pyarrow_export_file_set_name( @@ -443,3 +446,6 @@ def test_pyarrow_export_file_set_name( metaout = dataio.read_metadata(output) assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"] + assert metaout["data"]["spec"]["num_columns"] == 2 + assert metaout["data"]["spec"]["num_rows"] == 4 + assert metaout["data"]["spec"]["size"] == 8 diff --git a/tests/test_units/test_table.py b/tests/test_units/test_table.py index 2a63a1862..a91b98ff6 100644 --- a/tests/test_units/test_table.py +++ b/tests/test_units/test_table.py @@ -9,17 +9,17 @@ from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory -def _read_dict(file_path): +def _read_dict(file_path: str) -> None: """Reads text file into dictionary Args: file_path (string): path to generated file Returns: dict: contents of file """ - file_path = Path(file_path) - meta_path = file_path.parent / f".{file_path.name}.yml" + path = Path(file_path) + meta_path = path.parent / f".{path.name}.yml" meta = yaml_load(meta_path) - file_path.unlink() + path.unlink() meta_path.unlink() return meta