Add FoodData_parser plugin (#172)

* Remove submodule: plugins/FoodData_parser * Add files from submodule. * Replace FOODON_ by empty in FoodData_parser. * 💅
biothings · Jan 18, 2024 · 5d6ee44 · 5d6ee44
1 parent 0b18e7e
commit 5d6ee44
Show file tree

Hide file tree

Showing 5 changed files with 440 additions and 7 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -88,12 +88,6 @@
 	url = https://github.com/yichuanma95/foodb_json
 	branch = main
 	ignore = dirty
-[submodule "plugins/FoodData_parser"]
-	path = plugins/FoodData_parser
-	url = https://github.com/rjawesome/FoodData_parser
-	ignore = dirty
-
-
 [submodule "plugins/geneset1"]
 	path = plugins/geneset1
 	url = https://github.com/kevinxin90/geneset1

diff --git a/plugins/FoodData_parser b/plugins/FoodData_parser
diff --git a/plugins/FoodData_parser/get_nutrient_stats.py b/plugins/FoodData_parser/get_nutrient_stats.py
@@ -0,0 +1,72 @@
+import ujson
+import os
+
+
+def load_data(data_folder: str):
+    with open(os.path.join(data_folder, 'FoodData_Central_foundation_food_json_2022-04-28.json')) as f:
+        data = ujson.load(f)['FoundationFoods']
+    for food in data:
+        base = {
+            'subject': {
+                'description': food['description'],
+                'ndbNumber': food['ndbNumber'],
+                'fdcId': food['fdcId'],
+                'foodCategory': food['foodCategory']['description']
+            }
+        }
+        for n in food['foodNutrients']:
+            doc = base.copy()
+            doc['object'] = {
+                'nutrientName': n['nutrient']['name'],
+                'nutrientId': n['nutrient']['id'],
+                'nutrientRank': n['nutrient']['rank']
+            }
+            doc['relation'] = {}
+
+            if 'amount' in n:
+                doc['object']['nutrientAmount'] = n['amount']
+                doc['object']['nutrientAmountUnits'] = n['nutrient']['unitName']
+
+            if 'code' in n['foodNutrientDerivation']:
+                doc['relation']['code'] = n['foodNutrientDerivation']['code']
+            if 'code' in n['foodNutrientDerivation']['foodNutrientSource']:
+                doc['relation']['sourceCode'] = n['foodNutrientDerivation']['foodNutrientSource']['code']
+            if 'description' in n['foodNutrientDerivation']:
+                doc['relation']['description'] = n['foodNutrientDerivation']['description']
+            if 'description' in n['foodNutrientDerivation']['foodNutrientSource']:
+                doc['relation']['sourceDescription'] = n['foodNutrientDerivation']['foodNutrientSource']['description']
+
+            if 'min' in n:
+                doc['relation']['nutrientMinAmount'] = n['min']
+            if 'max' in n:
+                doc['relation']['nutrientMaxAmount'] = n['max']
+            if 'median' in n:
+                doc['relation']['nutrientMedianAmount'] = n['median']
+
+            doc['_id'] = f"{doc['subject']['fdcId']}-{doc['object']['nutrientId']}"
+            yield doc
+
+
+def main():
+    obj = {}
+    nutrients = {"total": 0}
+    for docs in load_data('./'):
+        if docs['_id'] in obj:
+            print(docs['_id'])
+        obj[docs['_id']] = docs
+
+        if docs['object']['nutrientName'] in nutrients:
+            nutrients[docs['object']['nutrientName']] += 1
+        else:
+            nutrients[docs['object']['nutrientName']] = 1
+
+    print('done')
+    nutrients["total"] = len(nutrients.keys())
+    with open('./output.tsv', 'w') as f:
+        f.write("nutrient\tcount\n")
+        for i in nutrients:
+            f.write(f"{i}\t{nutrients[i]}\n")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/plugins/FoodData_parser/manifest.json b/plugins/FoodData_parser/manifest.json
@@ -0,0 +1,18 @@
+{
+  "version": "0.2",
+  "__metadata__": {
+      "url": "https://fdc.nal.usda.gov/index.html"
+  },
+  "author" : {
+    "name" : "Rohan Juneja",
+    "url" : "https://github.com/rjawesome"
+  },
+  "dumper" : {
+      "data_url" : ["https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_json_2022-04-28.zip", "https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2022-04-28.zip"],
+      "uncompress" : true
+  },
+  "uploader" : {
+      "parser" : "parser:load_data",
+      "on_duplicates": "error"
+  }
+}