CoronaWhy · nacharov · May 24, 2020 · May 24, 2020 · May 27, 2020 · May 28, 2020
diff --git a/immunology_kg/notebooks/1.1_data_pre-processing.ipynb b/immunology_kg/notebooks/1.1_data_pre-processing.ipynb
diff --git a/immunology_kg/notebooks/1.2_INDRA_baseline.ipynb b/immunology_kg/notebooks/1.2_INDRA_baseline.ipynb
@@ -9,19 +9,66 @@
     "__Goal:__ Evaluate accuracy of INDRA models\n",
     "\n",
     "__Method:__ Test of INDRA Covid-19 model:\n",
-    "1. Use entities form covid-19 dataset as search query to INDRA,\n",
-    "2. get INDRA statements,\n",
-    "3. convert them to BEL format,\n",
-    "4. compare with relations from covid-19 dataset, calculate accuracy\n",
+    "1. Convert Fraunhofer COVID19 knowledge graph to INDRA statements\n",
+    "2. Compare relations with relations from INDRA\n",
+    "4. Calculate accuracy\n",
     "5. run error analysis\n",
     "\n",
     "\n",
     "__Data:__ covid-19-kg dataset, [Covid-19 model from INDRA](https://emmaa.indra.bio/dashboard/covid19?tab=model)\n",
     "\n",
-    "__Tools:__ [INDRA](http://www.indra.bio/), [PyBEL](https://github.com/pybel/pybel)\n",
+    "__Tools:__ [INDRA](http://www.indra.bio/), [PyBEL](https://github.com/pybel/pybel), [pyobo](https://github.com/pyobo/pyobo)\n",
     "\n",
     "__Result:__ INDRA models accuracy, results of error analysis"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pybel\n",
+    "import requests\n",
+    "from indra.processors import bel\n",
+    "from indra.util import batch_iter\n",
+    "from indra.sources import indra_db_rest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#load graph pre-procesed by Charlie Hoyt: https://github.com/CoronaWhy/bel4corona/tree/master/data/covid19kg\n",
+    "url = 'https://github.com/CoronaWhy/bel4corona/raw/master/data/covid19kg/covid19-fraunhofer-grounded.bel.nodelink.json'\n",
+    "res = requests.get(url)\n",
+    "graph = pybel.from_nodelink(res.json())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Process the PyBEL graph into INDRA Statements\n",
+    "pybel_proc = bel.process_pybel_graph(pybel_graph)\n",
+    "\n",
+    "# Note that of the ~4k statements in the PyBEL graph, only 831 are successfully\n",
+    "# converted to INDRA statements in large part because of issues with namespaces\n",
+    "covid_stmts = pybel_proc.statements\n",
+    "stmt_hashes = [s.get_hash() for s in covid_stmts]\n",
+    "\n",
+    "# Use the INDRA Database REST API client to search for corresponding evidences\n",
+    "# for 100 statements at a time\n",
+    "db_stmts = []\n",
+    "for hash_batch in batch_iter(stmt_hashes, 100):\n",
+    "    idrp = idr.get_statements_by_hash(stmt_hashes, ev_limit=1000)\n",
+    "    db_stmts.extend(idrp.statements)"
+   ]
   }
  ],
  "metadata": {