Merge pull request #1375 from danforthcenter/add_metadata_method

Add metadata method
danforthcenter · Dec 23, 2023 · 7c7b4ec · 7c7b4ec
2 parents 1caf381 + 463d1f9
commit 7c7b4ec
Show file tree

Hide file tree

Showing 4 changed files with 127 additions and 38 deletions.
diff --git a/docs/outputs.md b/docs/outputs.md
@@ -48,32 +48,29 @@ Methods are accessed as plantcv.outputs.*method*.
 
 * scale: Units of the measurement or a scale in which the observations are expressed; if possible, standard units and scales should be used and mapped to existing ontologies; in case of a non-standard scale a full explanation should be given.
 
-* datatype: The type of data to be stored. In JSON, values must be one of the following data types:
- - a string
- - a number
- - an array
- - a boolean
- - null
- - a JSON object
-
- They are equilvalent to python data types of the following:
- - 'str'
- - 'int' or 'float'
- - 'list' or 'tuple'
- - 'bool'
- - 'NoneType'
- - 'dict'
+* datatype: The type of data to be stored. See note below for supported data types.
 
 * value: The data itself. Make sure the data type of value matches the data type stated in "datatype". 
 
 * label: The label for each value, which will be useful when the data is a frequency table (e.g. hues). 
 
+**add_metadata**(*term, datatype, value*): Add metadata about the image or other information
+
+* term: Metadata term/name
+
+* datatype: The type of data to be stored. See note below for supported data types.
+
+* value: The data itself. Make sure the data type of value matches the data type stated in "datatype". 
+
 **save_results**(*filename, outformat="json"*): Save results to a file
 
 * filename: Path and name of the output file
 
 * outformat: Output file format (default = "json"). Supports "json" and "csv" formats
 
+!!!note
+ Supported data types for JSON output are: int, float, str, list, bool, tuple, dict, NoneType, numpy.float64.
+
 **Example use:**
  - [Use In VIS/NIR Tutorial](tutorials/vis_nir_tutorial.md)
 
@@ -119,6 +116,9 @@ pcv.outputs.add_observation(sample='default', variable='percent_diseased',
  method='ratio of pixels', scale='percent', datatype=float,
  value=percent_diseased, label='percent')
 
+# Add metadata 
+pcv.outputs.add_metadata(term="genotype", datatype=str, value="wildtype")
+
 # Write custom data to results file
 pcv.outputs.save_results(filename=args.result, outformat="json")
 

diff --git a/docs/updating.md b/docs/updating.md
@@ -731,6 +731,11 @@ pages for more details on the input and output variable types.
 * post v3.3: **plantcv.outputs.add_observation**(*variable, trait, method, scale, datatype, value, label*)
 * post v3.11: **plantcv.outputs.add_observation**(*sample, variable, trait, method, scale, datatype, value, label*)
 
+#### plantcv.outputs.add_metadata
+
+* pre v4.1: NA
+* post v4.1: **plantcv.outputs.add_metadata**(*term, datatype, value*)
+
 #### plantcv.outputs.clear
 
 * pre v3.2: NA

diff --git a/plantcv/plantcv/classes.py b/plantcv/plantcv/classes.py
@@ -70,12 +70,14 @@ def __init__(self):
  self.measurements = {}
  self.images = []
  self.observations = {}
+ self.metadata = {}
 
  # Add a method to clear measurements
  def clear(self):
  self.measurements = {}
  self.images = []
  self.observations = {}
+ self.metadata = {}
 
  # Method to add observation to outputs
  def add_observation(self, sample, variable, trait, method, scale, datatype, value, label):
@@ -108,16 +110,8 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
  if sample not in self.observations:
  self.observations[sample] = {}
 
- # Supported data types
- supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
- # Supported class types
- class_list = [f"<class '{cls}'>" for cls in supported_dtype]
-
- # Send an error message if datatype is not supported by json
- if str(type(value)) not in class_list:
- # String list of supported types
- type_list = ', '.join(map(str, supported_dtype))
- fatal_error(f"The Data type {type(value)} is not compatible with JSON! Please use only these: {type_list}!")
+ # Validate that the data type is supported by JSON
+ _ = _validate_data_type(value)
 
  # Save the observation for the sample and variable
  self.observations[sample][variable] = {
@@ -129,6 +123,32 @@ def add_observation(self, sample, variable, trait, method, scale, datatype, valu
  "label": label
  }
 
+ # Method to add metadata instance to outputs
+ def add_metadata(self, term, datatype, value):
+ """Add a metadata term and value to outputs.
+
+ Parameters
+ ----------
+ term : str
+ Metadata term/name.
+ datatype : type
+ The type of data to be stored, e.g. 'int', 'float', 'str', 'list', 'bool', etc.
+ value : any
+ The data itself.
+ """
+ # Create an empty dictionary for the sample if it does not exist
+ if term not in self.metadata:
+ self.metadata[term] = {}
+
+ # Validate that the data type is supported by JSON
+ _ = _validate_data_type(value)
+
+ # Save the observation for the sample and variable
+ self.metadata[term] = {
+ "datatype": str(datatype),
+ "value": value
+ }
+
  # Method to save observations to a file
  def save_results(self, filename, outformat="json"):
  """Save results to a file.
@@ -145,16 +165,26 @@ def save_results(self, filename, outformat="json"):
  with open(filename, 'r') as f:
  hierarchical_data = json.load(f)
  hierarchical_data["observations"] = self.observations
+ existing_metadata = hierarchical_data["metadata"]
+ for term in self.metadata:
+ save_term = term
+ if term in existing_metadata:
+ save_term = f"{term}_1"
+ hierarchical_data["metadata"][save_term] = self.metadata[term]
  else:
- hierarchical_data = {"metadata": {}, "observations": self.observations}
-
+ hierarchical_data = {"metadata": self.metadata, "observations": self.observations}
  with open(filename, mode='w') as f:
  json.dump(hierarchical_data, f)
+
  elif outformat.upper() == "CSV":
  # Open output CSV file
  csv_table = open(filename, "w")
+ # Gather any additional metadata
+ metadata_key_list = list(self.metadata.keys())
+ metadata_val_list = [val["value"] for val in self.metadata.values()]
  # Write the header
- csv_table.write(",".join(map(str, ["sample", "trait", "value", "label"])) + "\n")
+ header = metadata_key_list + ["sample", "trait", "value", "label"]
+ csv_table.write(",".join(map(str, header)) + "\n")
  # Iterate over data samples
  for sample in self.observations:
  # Iterate over traits for each sample
@@ -168,23 +198,18 @@ def save_results(self, filename, outformat="json"):
  # Skip list of tuple data types
  if not isinstance(value, tuple):
  # Save one row per value-label
- row = [sample, var, value, label]
+ row = metadata_val_list + [sample, var, value, label]
  csv_table.write(",".join(map(str, row)) + "\n")
  # If the data type is Boolean, store as a numeric 1/0 instead of True/False
  elif isinstance(val, bool):
- row = [sample,
- var,
- int(self.observations[sample][var]["value"]),
- self.observations[sample][var]["label"]]
+ row = metadata_val_list + [sample, var, int(self.observations[sample][var]["value"]),
+ self.observations[sample][var]["label"]]
  csv_table.write(",".join(map(str, row)) + "\n")
  # For all other supported data types, save one row per trait
  # Assumes no unusual data types are present (possibly a bad assumption)
  else:
- row = [sample,
- var,
- self.observations[sample][var]["value"],
- self.observations[sample][var]["label"]
- ]
+ row = metadata_val_list + [sample, var, self.observations[sample][var]["value"],
+ self.observations[sample][var]["label"]]
  csv_table.write(",".join(map(str, row)) + "\n")
 
  def plot_dists(self, variable):
@@ -233,6 +258,38 @@ def plot_dists(self, variable):
  return chart
 
 
+def _validate_data_type(data):
+ """Validate that the data type is supported by JSON.
+
+ Parameters
+ ----------
+ data : any
+ Data to be validated.
+
+ Returns
+ -------
+ bool
+ True if the data type is supported by JSON.
+
+ Raises
+ ------
+ ValueError
+ If the data type is not supported by JSON.
+ """
+ # Supported data types
+ supported_dtype = ["int", "float", "str", "list", "bool", "tuple", "dict", "NoneType", "numpy.float64"]
+ # Supported class types
+ class_list = [f"<class '{cls}'>" for cls in supported_dtype]
+
+ # Send an error message if datatype is not supported by json
+ if str(type(data)) not in class_list:
+ # String list of supported types
+ type_list = ', '.join(map(str, supported_dtype))
+ fatal_error(f"The Data type {type(data)} is not compatible with JSON! Please use only these: {type_list}!")
+
+ return True
+
+
 class Spectral_data:
  """PlantCV Hyperspectral data class"""
 

diff --git a/tests/plantcv/test_outputs.py b/tests/plantcv/test_outputs.py
@@ -35,6 +35,7 @@ def test_save_results_json_newfile(tmpdir):
  outputs = Outputs()
  outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
  datatype=str, value="test", label="none")
+ outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
  outputs.save_results(filename=outfile, outformat="json")
  with open(outfile, "r") as fp:
  results = json.load(fp)
@@ -51,6 +52,8 @@ def test_save_results_json_existing_file(test_data, tmpdir):
  outputs = Outputs()
  outputs.add_observation(sample='default', variable='test', trait='test variable', method='test', scale='none',
  datatype=str, value="test", label="none")
+ outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
+ outputs.add_metadata(term="camera", datatype="str", value="TV")
  outputs.save_results(filename=outfile, outformat="json")
  with open(outfile, "r") as fp:
  results = json.load(fp)
@@ -81,6 +84,30 @@ def test_save_results_csv(test_data, tmpdir):
  assert results == test_results
 
 
+def test_save_results_csv_add_metadata(tmpdir):
+ """Test for PlantCV."""
+ # Create a test tmp directory
+ outfile = tmpdir.mkdir("cache").join("results.csv")
+ # Create output instance
+ outputs = Outputs()
+ outputs.add_observation(sample='default', variable='string', trait='string variable', method='string', scale='none',
+ datatype=str, value="string", label="none")
+ outputs.add_metadata(term="add_date", datatype="str", value="Nov-14-2023")
+ outputs.save_results(filename=outfile, outformat="csv")
+ with open(outfile, "r") as fp:
+ results = fp.read()
+ x = slice(0, 33)
+ assert results[x] == "add_date,sample,trait,value,label"
+
+
+def test_add_metadata_invalid_type():
+ """Test for PlantCV."""
+ # Create output instance
+ outputs = Outputs()
+ with pytest.raises(RuntimeError):
+ outputs.add_metadata(term="bad_dtype", datatype="str", value=np.array([2]))
+
+
 def test_clear_outputs():
  """Test for PlantCV."""
  # Create output instance