Skip to content

Commit

Permalink
Add FoodData_parser plugin (#172)
Browse files Browse the repository at this point in the history
* Remove submodule: plugins/FoodData_parser

* Add files from submodule.

* Replace FOODON_ by empty in FoodData_parser.

* 💅
  • Loading branch information
everaldorodrigo authored Jan 18, 2024
1 parent 0b18e7e commit 5d6ee44
Show file tree
Hide file tree
Showing 5 changed files with 440 additions and 7 deletions.
6 changes: 0 additions & 6 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -88,12 +88,6 @@
url = https://github.com/yichuanma95/foodb_json
branch = main
ignore = dirty
[submodule "plugins/FoodData_parser"]
path = plugins/FoodData_parser
url = https://github.com/rjawesome/FoodData_parser
ignore = dirty


[submodule "plugins/geneset1"]
path = plugins/geneset1
url = https://github.com/kevinxin90/geneset1
Expand Down
1 change: 0 additions & 1 deletion plugins/FoodData_parser
Submodule FoodData_parser deleted from 0789d8
72 changes: 72 additions & 0 deletions plugins/FoodData_parser/get_nutrient_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import ujson
import os


def load_data(data_folder: str):
with open(os.path.join(data_folder, 'FoodData_Central_foundation_food_json_2022-04-28.json')) as f:
data = ujson.load(f)['FoundationFoods']
for food in data:
base = {
'subject': {
'description': food['description'],
'ndbNumber': food['ndbNumber'],
'fdcId': food['fdcId'],
'foodCategory': food['foodCategory']['description']
}
}
for n in food['foodNutrients']:
doc = base.copy()
doc['object'] = {
'nutrientName': n['nutrient']['name'],
'nutrientId': n['nutrient']['id'],
'nutrientRank': n['nutrient']['rank']
}
doc['relation'] = {}

if 'amount' in n:
doc['object']['nutrientAmount'] = n['amount']
doc['object']['nutrientAmountUnits'] = n['nutrient']['unitName']

if 'code' in n['foodNutrientDerivation']:
doc['relation']['code'] = n['foodNutrientDerivation']['code']
if 'code' in n['foodNutrientDerivation']['foodNutrientSource']:
doc['relation']['sourceCode'] = n['foodNutrientDerivation']['foodNutrientSource']['code']
if 'description' in n['foodNutrientDerivation']:
doc['relation']['description'] = n['foodNutrientDerivation']['description']
if 'description' in n['foodNutrientDerivation']['foodNutrientSource']:
doc['relation']['sourceDescription'] = n['foodNutrientDerivation']['foodNutrientSource']['description']

if 'min' in n:
doc['relation']['nutrientMinAmount'] = n['min']
if 'max' in n:
doc['relation']['nutrientMaxAmount'] = n['max']
if 'median' in n:
doc['relation']['nutrientMedianAmount'] = n['median']

doc['_id'] = f"{doc['subject']['fdcId']}-{doc['object']['nutrientId']}"
yield doc


def main():
obj = {}
nutrients = {"total": 0}
for docs in load_data('./'):
if docs['_id'] in obj:
print(docs['_id'])
obj[docs['_id']] = docs

if docs['object']['nutrientName'] in nutrients:
nutrients[docs['object']['nutrientName']] += 1
else:
nutrients[docs['object']['nutrientName']] = 1

print('done')
nutrients["total"] = len(nutrients.keys())
with open('./output.tsv', 'w') as f:
f.write("nutrient\tcount\n")
for i in nutrients:
f.write(f"{i}\t{nutrients[i]}\n")


if __name__ == '__main__':
main()
18 changes: 18 additions & 0 deletions plugins/FoodData_parser/manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"version": "0.2",
"__metadata__": {
"url": "https://fdc.nal.usda.gov/index.html"
},
"author" : {
"name" : "Rohan Juneja",
"url" : "https://github.com/rjawesome"
},
"dumper" : {
"data_url" : ["https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_json_2022-04-28.zip", "https://fdc.nal.usda.gov/fdc-datasets/FoodData_Central_foundation_food_csv_2022-04-28.zip"],
"uncompress" : true
},
"uploader" : {
"parser" : "parser:load_data",
"on_duplicates": "error"
}
}
Loading

0 comments on commit 5d6ee44

Please sign in to comment.