Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add num_columns, num_rows to table spec #784

Merged
merged 3 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions schema/definitions/0.8.0/examples/summary_table.yml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ data:
- FWPRH
- WWPT:R_A4
- WTIRWT1:A4
num_columns: 10
num_rows: 20
size: 200
table_index:
- DATE
Expand Down
4 changes: 3 additions & 1 deletion schema/definitions/0.8.0/examples/table_inplace.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ data: # The data block describes the actual data (e.g. surface). Only present in
grid_model: # Making this an object to allow for expanding in the future
name: MyGrid # important for data identification, also important for other data types
spec: # class/layout dependent, optional? Can spec be expanded to work for all data types?
size: 123921
columns:
- BULK_OIL
- NET_OIL
- PORE_OIL
num_columns: 3
num_rows: 41306
size: 123918
is_prediction: true # A mechanism for separating pure QC output from actual predictions
is_observation: false
description:
Expand Down
4 changes: 3 additions & 1 deletion schema/definitions/0.8.0/examples/table_wellpicks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,16 @@ data: # The data block describes the actual data (e.g. surface). Only present in
vertical_domain: depth # / time / null
depth_reference: msl # / seabed / etc
spec: # class/layout dependent, optional? Can spec be expanded to work for all data types?
size: 123921
columns:
- X_UTME
- Y_UTMN
- Z_TVDSS
- MD
- WELL
- HORIZON
num_columns: 6
num_rows: 20653
size: 123918
table_index:
- WELL
- HORIZON
Expand Down
32 changes: 32 additions & 0 deletions schema/definitions/0.8.0/schema/fmu_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -7783,6 +7783,36 @@
"title": "Columns",
"type": "array"
},
"num_columns": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"examples": [
1,
9999
],
"title": "Num Columns"
},
"num_rows": {
"anyOf": [
{
"type": "integer"
},
{
"type": "null"
}
],
"examples": [
1,
9999
],
"title": "Num Rows"
},
"size": {
"examples": [
1,
Expand All @@ -7794,6 +7824,8 @@
},
"required": [
"columns",
"num_columns",
"num_rows",
"size"
],
"title": "TableSpecification",
Expand Down
8 changes: 7 additions & 1 deletion src/fmu/dataio/_model/specification.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,14 @@ class TableSpecification(BaseModel):
columns: List[str]
"""List of columns present in a table."""

num_columns: Optional[int] = Field(examples=[1, 9999])
"""The number of columns in a table."""

num_rows: Optional[int] = Field(examples=[1, 9999])
"""The number of rows in a table.."""

size: int = Field(examples=[1, 9999])
"""Size of data object."""
"""The total size of the table, i.e. `rows x cols`."""


class CPGridSpecification(RowColumnLayer):
Expand Down
5 changes: 5 additions & 0 deletions src/fmu/dataio/providers/objectdata/_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,11 @@ def get_bbox(self) -> None:
def get_spec(self) -> TableSpecification:
"""Derive data.spec for pd.DataFrame."""
logger.info("Get spec for pd.DataFrame (tables)")
num_rows, num_columns = self.obj.shape
return TableSpecification(
columns=list(self.obj.columns),
num_columns=num_columns,
num_rows=num_rows,
size=int(self.obj.size),
)

Expand Down Expand Up @@ -142,5 +145,7 @@ def get_spec(self) -> TableSpecification:
logger.info("Get spec for pyarrow (tables)")
return TableSpecification(
columns=list(self.obj.column_names),
num_columns=self.obj.num_columns,
num_rows=self.obj.num_rows,
size=self.obj.num_columns * self.obj.num_rows,
)
6 changes: 6 additions & 0 deletions tests/test_units/test_ert_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,9 @@ def test_dataframe_export_file_set_name(

metaout = dataio.read_metadata(output)
assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"]
assert metaout["data"]["spec"]["num_columns"] == 2
assert metaout["data"]["spec"]["num_rows"] == 4
assert metaout["data"]["spec"]["size"] == 8


def test_pyarrow_export_file_set_name(
Expand Down Expand Up @@ -443,3 +446,6 @@ def test_pyarrow_export_file_set_name(

metaout = dataio.read_metadata(output)
assert metaout["data"]["spec"]["columns"] == ["COL1", "COL2"]
assert metaout["data"]["spec"]["num_columns"] == 2
assert metaout["data"]["spec"]["num_rows"] == 4
assert metaout["data"]["spec"]["size"] == 8
8 changes: 4 additions & 4 deletions tests/test_units/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,17 @@
from fmu.dataio.providers.objectdata._provider import objectdata_provider_factory


def _read_dict(file_path):
def _read_dict(file_path: str) -> None:
"""Reads text file into dictionary
Args:
file_path (string): path to generated file
Returns:
dict: contents of file
"""
file_path = Path(file_path)
meta_path = file_path.parent / f".{file_path.name}.yml"
path = Path(file_path)
meta_path = path.parent / f".{path.name}.yml"
Comment on lines +19 to +20
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forget why I type annotated this, and why it's in this PR, but let's call it a freebie

meta = yaml_load(meta_path)
file_path.unlink()
path.unlink()
meta_path.unlink()
return meta

Expand Down