Skip to content

Commit

Permalink
Merge pull request #1375 from danforthcenter/add_metadata_method
Browse files Browse the repository at this point in the history
Add metadata method
  • Loading branch information
nfahlgren authored Dec 23, 2023
2 parents 1caf381 + 463d1f9 commit 7c7b4ec
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 38 deletions.
30 changes: 15 additions & 15 deletions docs/outputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,32 +48,29 @@ Methods are accessed as plantcv.outputs.*method*.

* scale: Units of the measurement or a scale in which the observations are expressed; if possible, standard units and scales should be used and mapped to existing ontologies; in case of a non-standard scale a full explanation should be given.

* datatype: The type of data to be stored. In JSON, values must be one of the following data types:
- a string
- a number
- an array
- a boolean
- null
- a JSON object

They are equilvalent to python data types of the following:
- 'str'
- 'int' or 'float'
- 'list' or 'tuple'
- 'bool'
- 'NoneType'
- 'dict'
* datatype: The type of data to be stored. See note below for supported data types.

* value: The data itself. Make sure the data type of value matches the data type stated in "datatype".

* label: The label for each value, which will be useful when the data is a frequency table (e.g. hues).

**add_metadata**(*term, datatype, value*): Add metadata about the image or other information

* term: Metadata term/name

* datatype: The type of data to be stored. See note below for supported data types.

* value: The data itself. Make sure the data type of value matches the data type stated in "datatype".

**save_results**(*filename, outformat="json"*): Save results to a file

* filename: Path and name of the output file

* outformat: Output file format (default = "json"). Supports "json" and "csv" formats

!!!note
Supported data types for JSON output are: int, float, str, list, bool, tuple, dict, NoneType, numpy.float64.

**Example use:**
- [Use In VIS/NIR Tutorial](tutorials/vis_nir_tutorial.md)

Expand Down Expand Up @@ -119,6 +116,9 @@ pcv.outputs.add_observation(sample='default', variable='percent_diseased',
method='ratio of pixels', scale='percent', datatype=float,
value=percent_diseased, label='percent')

# Add metadata
pcv.outputs.add_metadata(term="genotype", datatype=str, value="wildtype")

# Write custom data to results file
pcv.outputs.save_results(filename=args.result, outformat="json")

Expand Down
5 changes: 5 additions & 0 deletions docs/updating.md
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,11 @@ pages for more details on the input and output variable types.
* post v3.3: **plantcv.outputs.add_observation**(*variable, trait, method, scale, datatype, value, label*)
* post v3.11: **plantcv.outputs.add_observation**(*sample, variable, trait, method, scale, datatype, value, label*)

#### plantcv.outputs.add_metadata

* pre v4.1: NA
* post v4.1: **plantcv.outputs.add_metadata**(*term, datatype, value*)

#### plantcv.outputs.clear

* pre v3.2: NA
Expand Down
103 changes: 80 additions & 23 deletions plantcv/plantcv/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,14 @@ def __init__(self):
self.measurements = {}
self.images = []
self.observations = {}
self.metadata = {}

# Add a method to clear measurements
def clear(self):
self.measurements = {}
self.images = []
self.observations = {}
self.metadata = {}

# Method to add observation to outputs
def add_observation(self, sample, variable, trait, method, scale, datatype, value, label):
Expand Down Expand Up @@ -108,16 +110,8 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
if sample not in self.observations:
self.observations[sample] = {}

# Supported data types
supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
# Supported class types
class_list = [f"<class '{cls}'>" for cls in supported_dtype]

# Send an error message if datatype is not supported by json
if str(type(value)) not in class_list:
# String list of supported types
type_list = ', '.join(map(str, supported_dtype))
fatal_error(f"The Data type {type(value)} is not compatible with JSON! Please use only these: {type_list}!")
# Validate that the data type is supported by JSON
_ = _validate_data_type(value)

# Save the observation for the sample and variable
self.observations[sample][variable] = {
Expand All @@ -129,6 +123,32 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
"label": label
}

# Method to add metadata instance to outputs
def add_metadata(self, term, datatype, value):
"""Add a metadata term and value to outputs.
Parameters
----------
term : str
Metadata term/name.
datatype : type
The type of data to be stored, e.g. 'int', 'float', 'str', 'list', 'bool', etc.
value : any
The data itself.
"""
# Create an empty dictionary for the sample if it does not exist
if term not in self.metadata:
self.metadata[term] = {}

# Validate that the data type is supported by JSON
_ = _validate_data_type(value)

# Save the observation for the sample and variable
self.metadata[term] = {
"datatype": str(datatype),
"value": value
}

# Method to save observations to a file
def save_results(self, filename, outformat="json"):
"""Save results to a file.
Expand All @@ -145,16 +165,26 @@ def save_results(self, filename, outformat="json"):
with open(filename, 'r') as f:
hierarchical_data = json.load(f)
hierarchical_data["observations"] = self.observations
existing_metadata = hierarchical_data["metadata"]
for term in self.metadata:
save_term = term
if term in existing_metadata:
save_term = f"{term}_1"
hierarchical_data["metadata"][save_term] = self.metadata[term]
else:
hierarchical_data = {"metadata": {}, "observations": self.observations}

hierarchical_data = {"metadata": self.metadata, "observations": self.observations}
with open(filename, mode='w') as f:
json.dump(hierarchical_data, f)

elif outformat.upper() == "CSV":
# Open output CSV file
csv_table = open(filename, "w")
# Gather any additional metadata
metadata_key_list = list(self.metadata.keys())
metadata_val_list = [val["value"] for val in self.metadata.values()]
# Write the header
csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n")
header = metadata_key_list + ["sample", "trait", "value", "label"]
csv_table.write(",".join(map(str, header)) + "\n")
# Iterate over data samples
for sample in self.observations:
# Iterate over traits for each sample
Expand All @@ -168,23 +198,18 @@ def save_results(self, filename, outformat="json"):
# Skip list of tuple data types
if not isinstance(value, tuple):
# Save one row per value-label
row = [sample, var, value, label]
row = metadata_val_list + [sample, var, value, label]
csv_table.write(",".join(map(str, row)) + "\n")
# If the data type is Boolean, store as a numeric 1/0 instead of True/False
elif isinstance(val, bool):
row = [sample,
var,
int(self.observations[sample][var]["value"]),
self.observations[sample][var]["label"]]
row = metadata_val_list + [sample, var, int(self.observations[sample][var]["value"]),
self.observations[sample][var]["label"]]
csv_table.write(",".join(map(str, row)) + "\n")
# For all other supported data types, save one row per trait
# Assumes no unusual data types are present (possibly a bad assumption)
else:
row = [sample,
var,
self.observations[sample][var]["value"],
self.observations[sample][var]["label"]
]
row = metadata_val_list + [sample, var, self.observations[sample][var]["value"],
self.observations[sample][var]["label"]]
csv_table.write(",".join(map(str, row)) + "\n")

def plot_dists(self, variable):
Expand Down Expand Up @@ -233,6 +258,38 @@ def plot_dists(self, variable):
return chart


def _validate_data_type(data):
"""Validate that the data type is supported by JSON.
Parameters
----------
data : any
Data to be validated.
Returns
-------
bool
True if the data type is supported by JSON.
Raises
------
ValueError
If the data type is not supported by JSON.
"""
# Supported data types
supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
# Supported class types
class_list = [f"<class '{cls}'>" for cls in supported_dtype]

# Send an error message if datatype is not supported by json
if str(type(data)) not in class_list:
# String list of supported types
type_list = ', '.join(map(str, supported_dtype))
fatal_error(f"The Data type {type(data)} is not compatible with JSON! Please use only these: {type_list}!")

return True


class Spectral_data:
"""PlantCV Hyperspectral data class"""

Expand Down
27 changes: 27 additions & 0 deletions tests/plantcv/test_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_save_results_json_newfile(tmpdir):
outputs = Outputs()
outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
datatype=str, value="test", label="none")
outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
outputs.save_results(filename=outfile, outformat="json")
with open(outfile, "r") as fp:
results = json.load(fp)
Expand All @@ -51,6 +52,8 @@ def test_save_results_json_existing_file(test_data, tmpdir):
outputs = Outputs()
outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
datatype=str, value="test", label="none")
outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
outputs.add_metadata(term="camera", datatype="str", value="TV")
outputs.save_results(filename=outfile, outformat="json")
with open(outfile, "r") as fp:
results = json.load(fp)
Expand Down Expand Up @@ -81,6 +84,30 @@ def test_save_results_csv(test_data, tmpdir):
assert results == test_results


def test_save_results_csv_add_metadata(tmpdir):
"""Test for PlantCV."""
# Create a test tmp directory
outfile = tmpdir.mkdir("cache").join("results.csv")
# Create output instance
outputs = Outputs()
outputs.add_observation(sample='default', variable='string', trait='string variable', method='string', scale='none',
datatype=str, value="string", label="none")
outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
outputs.save_results(filename=outfile, outformat="csv")
with open(outfile, "r") as fp:
results = fp.read()
x = slice(0, 33)
assert results[x] == "add_date,sample,trait,value,label"


def test_add_metadata_invalid_type():
"""Test for PlantCV."""
# Create output instance
outputs = Outputs()
with pytest.raises(RuntimeError):
outputs.add_metadata(term="bad_dtype", datatype="str", value=np.array([2]))


def test_clear_outputs():
"""Test for PlantCV."""
# Create output instance
Expand Down

0 comments on commit 7c7b4ec

Please sign in to comment.