diff --git a/micom_2023.ipynb b/micom_2023.ipynb new file mode 100644 index 0000000..73d7e12 --- /dev/null +++ b/micom_2023.ipynb @@ -0,0 +1,8614 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C0vqP4LJ9y6K" + }, + "source": [ + "# 🧫🦠 Modeling microbiota-wide metabolism with MICOM\n", + "\n", + "This notebook will accompany the second session of the 2023 ISB Microbiome Course. The presentation slides can be [found here](https://gibbons-lab.github.io/isb_course_2023/micom).\n", + "\n", + "You can save your own local copy of this notebook by using `File > Save a copy in Drive`. You may be promted to cetify the notebook is safe. We promise that it is 🀞\n", + "\n", + "**Disclaimer:**\n", + "The linear and quadratic programming problems MICOM has to solve are very large and very complicated. There are some very good commercial solvers that are very expensive (even though they are often free for academic use). To make this tutorial as accessible as possible we will use the Open Source solver [OSQP](https://osqp.org/), which is installed along with MICOM. OSQP is amazing with quadratic programming problems (kudos!) but not as accurate for linear problems. Solvers usually only guarantee a solution within a certain numerical tolerance of the real solution. In order to make everything work with OSQP this tolerance has to be relaxed to about 10-3. This means that any result with an absolute value smaller than that might very well be zero so we should look at larger values only. Installing cost-free academic versions of commercial solvers like [IBM CPLEX](https://www.ibm.com/analytics/cplex-optimizer) or [Gurobi](https://www.gurobi.com/) would alow you to lower the tolerance to 10-6.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qgBBl4GtuTuX" + }, + "source": [ + "# πŸ“ Setup\n", + "\n", + "MICOM installation is is usually pretty straight-forward and can be as easy as typing `pip install micom` into your Terminal.\n", + "\n", + "First let's start by downloading the materials again and switching to the folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ckON4xr3_bW5" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/gibbons-lab/isb_course_2023 materials\n", + "!cd materials\n", + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "is6fmBUeorwv" + }, + "source": [ + "## Basic Installation\n", + "\n", + "Installing MICOM is straight-forward in Python. OSQP itself will be installed automatically along with it." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M_TeC5yrst3h", + "outputId": "c3cb59af-6caf-4eda-f0f4-b20f4d19ae49" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m832.5/832.5 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.4/37.4 MB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.7/75.7 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.3/138.3 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.0/8.0 MB\u001b[0m \u001b[31m71.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.9/112.9 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.0/76.0 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDone! πŸŽ‰ \n" + ] + } + ], + "source": [ + "!pip install -q micom\n", + "\n", + "print(\"Done! πŸŽ‰ \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6oJrxxz6tV9T" + }, + "source": [ + "## Enable QIIME 2 interactions\n", + "\n", + "Before we start, we also need to install packages to read the \"biom\" file format used by QIIME 2 to save tables. This is only necessary if you want to read QIIME 2 FeatureTable artifacts (like the ones we constructed yesterday)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5rZX7SK_toLp", + "outputId": "9cc5af18-c4de-486e-cb03-8f01bacf92a5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDone! πŸŽ‰ \n" + ] + } + ], + "source": [ + "!pip install -q numpy Cython biom-format\n", + "\n", + "print(\"Done! πŸŽ‰ \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oPy1f-WLI0lZ" + }, + "source": [ + "Okay, all done. So let's get started building some models πŸ¦ΊπŸ› d😁.\n", + "\n", + "# πŸ’» MICOM\n", + "\n", + "We will use the Python interface to MICOM since it plays nicely with Colaboratory. However, you could run the same steps within the QIIME 2 MICOM plugin ([q2-micom](https://library.qiime2.org/plugins/q2-micom/26/)).\n", + "\n", + "Here is an overview of all the steps and functions across both interfaces:\n", + "![micom overview](https://github.com/micom-dev/q2-micom/raw/706f583a060b91c12c0cec7acea2354fdd0dd320/docs/assets/overview.png)\n", + "\n", + "The process of building a metabolic model in MICOM begins with constructing a combined abundance/taxonomy table, referred to hereafter as a taxonomy table. Let's load a sample taxonomy table to see what it looks like:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "UV9SObSQkSZh", + "outputId": "2b3f71ac-8873-45fb-9352-961177f76c6a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id genus species reactions \\\n", + "0 Escherichia_coli_1 Escherichia Escherichia coli 0 95 \n", + "1 Escherichia_coli_2 Escherichia Escherichia coli 1 95 \n", + "2 Escherichia_coli_3 Escherichia Escherichia coli 2 95 \n", + "3 Escherichia_coli_4 Escherichia Escherichia coli 3 95 \n", + "0 Escherichia_coli_1 Escherichia Escherichia coli 0 95 \n", + "\n", + " metabolites sample_id abundance \n", + "0 72 sample_1 96 \n", + "1 72 sample_1 48 \n", + "2 72 sample_1 708 \n", + "3 72 sample_1 622 \n", + "0 72 sample_2 120 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgenusspeciesreactionsmetabolitessample_idabundance
0Escherichia_coli_1EscherichiaEscherichia coli 09572sample_196
1Escherichia_coli_2EscherichiaEscherichia coli 19572sample_148
2Escherichia_coli_3EscherichiaEscherichia coli 29572sample_1708
3Escherichia_coli_4EscherichiaEscherichia coli 39572sample_1622
0Escherichia_coli_1EscherichiaEscherichia coli 09572sample_2120
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ], + "source": [ + "from micom.data import test_data\n", + "\n", + "test_data().head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AEk7yfd1lbYp" + }, + "source": [ + "In this taxonomy table, we see four identical strains of _E. coli_ (1 through 4), across two samples (sample_1 and sample_2). We can see that each row represents a single taxon in a single sample, and the `abundance` column identifies the abundance of that taxon in the sample.\n", + "\n", + "The `id` column specifies identifiers for the taxa and should be expressive and not include spaces or special characters. Since we are using a taxonomy database to build our models (more on that soon), we don't need a `file` column.\n", + "\n", + "You might notice that this dataframe looks very different from what we generated in yesterday's tutorial, where we ended up with separate QIIME 2 artifacts 😱\n", + "\n", + "No worries, we can deal with that.\n", + "\n", + "## Importing data from QIIME 2\n", + "\n", + "MICOM can read QIIME 2 artifacts. You don't even need to have QIIME 2 installed for that! But before we do so, let's resolve one issue. We discussed that MICOM summarizes genome-scale models into pangenome-scale models as a first step, but our data are on the ASV level...so how will we know what to summarize?\n", + "\n", + "Basically, a specific model database can be used to quickly summarize pangenome-scale models for use within MICOM. So, before we read our data we have to decide which model database to use. We will go with the [AGORA database](https://pubmed.ncbi.nlm.nih.gov/27893703/), which is a curated database of more than 800 bacterial strains that commonly live in the human gut. In particular, we will use a version of this database summarized on the genus rank which can be downloaded from the [MICOM data repository](https://doi.org/10.5281/zenodo.3755182), which contains a whole lot of prebuilt databases. This database is available from the materials folder that we previously cloned." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_57iya0D3L6-" + }, + "source": [ + "Now we're all set to start building models! The data we previously collected can be found in the `treasure_chest` folder, so we can use those files to build our taxonomy for MICOM." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "id": "o0vBAiiqqPLC" + }, + "outputs": [], + "source": [ + "from micom.taxonomy import qiime_to_micom\n", + "\n", + "tax = qiime_to_micom(\n", + " \"dada2/table.qza\",\n", + " \"taxa.qza\",\n", + " collapse_on=\"genus\"\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "tax.sample_id.unique()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TkOm89mQbwXa", + "outputId": "a6093f5e-a2c8-4bf3-9e3f-bb925b81bf98" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['ERR1883195', 'ERR1883207', 'ERR1883214', 'ERR1883240',\n", + " 'ERR1883250', 'ERR1883212', 'ERR1883225', 'ERR1883294'],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TQ6Zp7wouk4" + }, + "source": [ + "Notice the `collapse_on` argument. That will specify the rank on which to sumarize and can be a list of several ranks. When matching taxonomy you can either match by the particular rank of interest (for example, just comparing genus names here), or you could compare the entire taxonomy, which will require all taxonomic ranks prior to the target rank to match. For that you cloud specify `collapse_on=[\"kingdom\", \"phylum\", \"class\", \"order\", \"family\", \"genus\"]`.\n", + "\n", + "Taxonomic names will often not match 100% between databases. For instance, the genus name \"Prevotella\" in one database may be \"Prevotella_6\" in another. The more ranks you use for matching the more likely you are to run into these issues. However, the more taxonomic ranks you use to match the more confident you can be that your observed taxon really is the same taxon as the one in the model database.\n", + "\n", + "The resulting table will contain the same abundances but it will include more ranks if `collapse_on` is a list. All ranks present in the taxonomy will be used when matching to the database. We will stick with the \"lax\" option of only matching on genus ranks.\n", + "\n", + "Let's now take a look at the taxonomy table we generated:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5zwLVFcNTbq" + }, + "source": [ + "That looks more like the example! Again, we have a row for each taxon in each sample, so we're good to go.\n", + "\n", + "One helpful thing to do is to merge in our metadata, so we'll have it at hand for the following steps. In our case, the metadata will include the ethnic group, region, and subsistence type of each of the study participants." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "X9hqoO4go0h1", + "outputId": "6d3ff6a9-7c37-4c52-ff4f-5ae39aef4cd7", + "collapsed": true + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id abundance genus id relative \\\n", + "0 ERR1883195 6.0 Akkermansia Akkermansia 0.000116 \n", + "1 ERR1883195 17971.0 Phocaeicola Phocaeicola 0.346522 \n", + "2 ERR1883195 5.0 Veillonella Veillonella 0.000096 \n", + "3 ERR1883195 4741.0 Faecalibacterium Faecalibacterium 0.091417 \n", + "4 ERR1883195 576.0 Mediterraneibacter Mediterraneibacter 0.011107 \n", + ".. ... ... ... ... ... \n", + "344 ERR1883294 5.0 Stomatobaculum Stomatobaculum 0.000926 \n", + "345 ERR1883294 3.0 Lancefieldella Lancefieldella 0.000556 \n", + "346 ERR1883294 3.0 Tumebacillus Tumebacillus 0.000556 \n", + "347 ERR1883294 3.0 Tropheryma Tropheryma 0.000556 \n", + "348 ERR1883294 2.0 Mogibacterium Mogibacterium 0.000370 \n", + "\n", + " Unnamed: 0 collection_timestamp day_relative_to_fmt description \\\n", + "0 12 2011-10-24 26 Donor 11 \n", + "1 12 2011-10-24 26 Donor 11 \n", + "2 12 2011-10-24 26 Donor 11 \n", + "3 12 2011-10-24 26 Donor 11 \n", + "4 12 2011-10-24 26 Donor 11 \n", + ".. ... ... ... ... \n", + "344 112 2011-09-29 0 Day 0 CD3 \n", + "345 112 2011-09-29 0 Day 0 CD3 \n", + "346 112 2011-09-29 0 Day 0 CD3 \n", + "347 112 2011-09-29 0 Day 0 CD3 \n", + "348 112 2011-09-29 0 Day 0 CD3 \n", + "\n", + " disease_state host_age host_age_units host_body_mass_index \\\n", + "0 healthy Restricted access years Restricted access \n", + "1 healthy Restricted access years Restricted access \n", + "2 healthy Restricted access years Restricted access \n", + "3 healthy Restricted access years Restricted access \n", + "4 healthy Restricted access years Restricted access \n", + ".. ... ... ... ... \n", + "344 Pre-FMT 61 years 32.5 \n", + "345 Pre-FMT 61 years 32.5 \n", + "346 Pre-FMT 61 years 32.5 \n", + "347 Pre-FMT 61 years 32.5 \n", + "348 Pre-FMT 61 years 32.5 \n", + "\n", + " host_height host_height_units host_subject_id host_weight \\\n", + "0 Restricted access m Donor Restricted access \n", + "1 Restricted access m Donor Restricted access \n", + "2 Restricted access m Donor Restricted access \n", + "3 Restricted access m Donor Restricted access \n", + "4 Restricted access m Donor Restricted access \n", + ".. ... ... ... ... \n", + "344 1.727 m CD3 97.3 \n", + "345 1.727 m CD3 97.3 \n", + "346 1.727 m CD3 97.3 \n", + "347 1.727 m CD3 97.3 \n", + "348 1.727 m CD3 97.3 \n", + "\n", + " host_weight_units race sex \n", + "0 kg Restricted access Restricted access \n", + "1 kg Restricted access Restricted access \n", + "2 kg Restricted access Restricted access \n", + "3 kg Restricted access Restricted access \n", + "4 kg Restricted access Restricted access \n", + ".. ... ... ... \n", + "344 kg white male \n", + "345 kg white male \n", + "346 kg white male \n", + "347 kg white male \n", + "348 kg white male \n", + "\n", + "[349 rows x 20 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idabundancegenusidrelativeUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
0ERR18831956.0AkkermansiaAkkermansia0.000116122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
1ERR188319517971.0PhocaeicolaPhocaeicola0.346522122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
2ERR18831955.0VeillonellaVeillonella0.000096122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
3ERR18831954741.0FaecalibacteriumFaecalibacterium0.091417122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
4ERR1883195576.0MediterraneibacterMediterraneibacter0.011107122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
...............................................................
344ERR18832945.0StomatobaculumStomatobaculum0.0009261122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
345ERR18832943.0LancefieldellaLancefieldella0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
346ERR18832943.0TumebacillusTumebacillus0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
347ERR18832943.0TropherymaTropheryma0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
348ERR18832942.0MogibacteriumMogibacterium0.0003701122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
\n", + "

349 rows Γ— 20 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "metadata = pd.read_table(\"metadata.tsv\").rename(columns={\"id\": \"sample_id\"})\n", + "tax = pd.merge(tax, metadata, on=\"sample_id\")\n", + "tax" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Ok, now we want to invade our samples with C. diff. The goal is to predict susceptibility to invasion and see how disease context can influence predicted engraftment. To do this we will introduce 10% C. diff to all the samples! You can read more about this approach and its applications [here](https://www.biorxiv.org/content/10.1101/2023.04.28.538771v2) " + ], + "metadata": { + "id": "ZsFgo_CRjvsr" + } + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "vHyvSZJJ-iIi", + "outputId": "0f934995-8bd5-4c03-ddda-491fb5d5bd4f" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id abundance genus id \\\n", + "1 ERR1883195 17971.000000 Phocaeicola Phocaeicola \n", + "3 ERR1883195 4741.000000 Faecalibacterium Faecalibacterium \n", + "8 ERR1883195 6805.000000 Bacteroides Bacteroides \n", + "11 ERR1883195 3076.000000 Parabacteroides Parabacteroides \n", + "5 ERR1883195 3621.444444 Clostridioides Clostridioides \n", + "71 ERR1883207 16983.000000 Phocaeicola Phocaeicola \n", + "73 ERR1883207 3717.000000 Faecalibacterium Faecalibacterium \n", + "77 ERR1883207 2956.000000 Barnesiella Barnesiella \n", + "78 ERR1883207 6984.000000 Bacteroides Bacteroides \n", + "79 ERR1883207 2901.000000 Blautia Blautia \n", + "81 ERR1883207 3413.000000 Parabacteroides Parabacteroides \n", + "7 ERR1883207 4106.000000 Clostridioides Clostridioides \n", + "222 ERR1883212 8393.000000 Phocaeicola Phocaeicola \n", + "224 ERR1883212 10841.000000 Faecalibacterium Faecalibacterium \n", + "227 ERR1883212 4038.000000 Fusicatenibacter Fusicatenibacter \n", + "229 ERR1883212 3286.000000 Bacteroides Bacteroides \n", + "230 ERR1883212 3198.000000 Blautia Blautia \n", + "234 ERR1883212 5106.000000 Streptococcus Streptococcus \n", + "7 ERR1883212 3873.555556 Clostridioides Clostridioides \n", + "152 ERR1883214 54726.000000 Akkermansia Akkermansia \n", + "161 ERR1883214 3968.000000 Clostridium Clostridium \n", + "3 ERR1883214 6521.555556 Clostridioides Clostridioides \n", + "291 ERR1883225 3734.000000 Phocaeicola Phocaeicola \n", + "295 ERR1883225 492.000000 Barnesiella Barnesiella \n", + "296 ERR1883225 1475.000000 Bacteroides Bacteroides \n", + "299 ERR1883225 911.000000 Parabacteroides Parabacteroides \n", + "5 ERR1883225 734.666667 Clostridioides Clostridioides \n", + "174 ERR1883240 35554.000000 Akkermansia Akkermansia \n", + "176 ERR1883240 15517.000000 Veillonella Veillonella \n", + "180 ERR1883240 5555.000000 Fusobacterium Fusobacterium \n", + "4 ERR1883240 6291.777778 Clostridioides Clostridioides \n", + "188 ERR1883250 11467.000000 Akkermansia Akkermansia \n", + "190 ERR1883250 9955.000000 Mediterraneibacter Mediterraneibacter \n", + "191 ERR1883250 11923.000000 Erysipelatoclostridium Erysipelatoclostridium \n", + "193 ERR1883250 13975.000000 Blautia Blautia \n", + "196 ERR1883250 3282.000000 Ruthenibacterium Ruthenibacterium \n", + "6 ERR1883250 5622.444444 Clostridioides Clostridioides \n", + "321 ERR1883294 619.000000 Veillonella Veillonella \n", + "325 ERR1883294 591.000000 Fusobacterium Fusobacterium \n", + "329 ERR1883294 1866.000000 Anaerosinus Anaerosinus \n", + "330 ERR1883294 580.000000 Megasphaera Megasphaera \n", + "332 ERR1883294 734.000000 Phytobacter Phytobacter \n", + "333 ERR1883294 294.000000 Morganella Morganella \n", + "7 ERR1883294 520.444444 Clostridioides Clostridioides \n", + "\n", + " relative Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "1 0.496238 12.0 2011-10-24 26 \n", + "3 0.130915 12.0 2011-10-24 26 \n", + "8 0.187908 12.0 2011-10-24 26 \n", + "11 0.084938 12.0 2011-10-24 26 \n", + "5 0.100000 12.0 2011-10-24 26 \n", + "71 0.413614 24.0 2012-01-12 44 \n", + "73 0.090526 24.0 2012-01-12 44 \n", + "77 0.071992 24.0 2012-01-12 44 \n", + "78 0.170093 24.0 2012-01-12 44 \n", + "79 0.070653 24.0 2012-01-12 44 \n", + "81 0.083122 24.0 2012-01-12 44 \n", + "7 0.100000 24.0 2012-01-12 44 \n", + "222 0.216674 30.0 2012-10-10 135 \n", + "224 0.279872 30.0 2012-10-10 135 \n", + "227 0.104245 30.0 2012-10-10 135 \n", + "229 0.084832 30.0 2012-10-10 135 \n", + "230 0.082560 30.0 2012-10-10 135 \n", + "234 0.131817 30.0 2012-10-10 135 \n", + "7 0.100000 30.0 2012-10-10 135 \n", + "152 0.839156 32.0 2011-07-26 0 \n", + "161 0.060844 32.0 2011-07-26 0 \n", + "3 0.100000 32.0 2011-07-26 0 \n", + "291 0.508258 43.0 2011-07-26 54 \n", + "295 0.066969 43.0 2011-07-26 54 \n", + "296 0.200771 43.0 2011-07-26 54 \n", + "299 0.124002 43.0 2011-07-26 54 \n", + "5 0.100000 43.0 2011-07-26 54 \n", + "174 0.565087 58.0 2012-02-14 pre-FMT \n", + "176 0.246623 58.0 2012-02-14 pre-FMT \n", + "180 0.088290 58.0 2012-02-14 pre-FMT \n", + "4 0.100000 58.0 2012-02-14 pre-FMT \n", + "188 0.203950 68.0 2011-12-23 pre-FMT \n", + "190 0.177058 68.0 2011-12-23 pre-FMT \n", + "191 0.212061 68.0 2011-12-23 pre-FMT \n", + "193 0.248557 68.0 2011-12-23 pre-FMT \n", + "196 0.058373 68.0 2011-12-23 pre-FMT \n", + "6 0.100000 68.0 2011-12-23 pre-FMT \n", + "321 0.118937 112.0 2011-09-29 0 \n", + "325 0.113557 112.0 2011-09-29 0 \n", + "329 0.358540 112.0 2011-09-29 0 \n", + "330 0.111443 112.0 2011-09-29 0 \n", + "332 0.141033 112.0 2011-09-29 0 \n", + "333 0.056490 112.0 2011-09-29 0 \n", + "7 0.100000 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "1 Donor 11 healthy Restricted access years \n", + "3 Donor 11 healthy Restricted access years \n", + "8 Donor 11 healthy Restricted access years \n", + "11 Donor 11 healthy Restricted access years \n", + "5 Donor 11 healthy Restricted access years \n", + "71 Donor 12 healthy Restricted access years \n", + "73 Donor 12 healthy Restricted access years \n", + "77 Donor 12 healthy Restricted access years \n", + "78 Donor 12 healthy Restricted access years \n", + "79 Donor 12 healthy Restricted access years \n", + "81 Donor 12 healthy Restricted access years \n", + "7 Donor 12 healthy Restricted access years \n", + "222 Donor 14 healthy Restricted access years \n", + "224 Donor 14 healthy Restricted access years \n", + "227 Donor 14 healthy Restricted access years \n", + "229 Donor 14 healthy Restricted access years \n", + "230 Donor 14 healthy Restricted access years \n", + "234 Donor 14 healthy Restricted access years \n", + "7 Donor 14 healthy Restricted access years \n", + "152 Day 0 CD1 Pre-FMT 39 years \n", + "161 Day 0 CD1 Pre-FMT 39 years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "291 Donor CD1 healthy Restricted access years \n", + "295 Donor CD1 healthy Restricted access years \n", + "296 Donor CD1 healthy Restricted access years \n", + "299 Donor CD1 healthy Restricted access years \n", + "5 Donor CD1 healthy Restricted access years \n", + "174 CD9 pre-FMT Pre-FMT 47 years \n", + "176 CD9 pre-FMT Pre-FMT 47 years \n", + "180 CD9 pre-FMT Pre-FMT 47 years \n", + "4 CD9 pre-FMT Pre-FMT 47 years \n", + "188 CD13 pre-FMT Pre-FMT 53 years \n", + "190 CD13 pre-FMT Pre-FMT 53 years \n", + "191 CD13 pre-FMT Pre-FMT 53 years \n", + "193 CD13 pre-FMT Pre-FMT 53 years \n", + "196 CD13 pre-FMT Pre-FMT 53 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "321 Day 0 CD3 Pre-FMT 61 years \n", + "325 Day 0 CD3 Pre-FMT 61 years \n", + "329 Day 0 CD3 Pre-FMT 61 years \n", + "330 Day 0 CD3 Pre-FMT 61 years \n", + "332 Day 0 CD3 Pre-FMT 61 years \n", + "333 Day 0 CD3 Pre-FMT 61 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height host_height_units host_subject_id \\\n", + "1 Restricted access Restricted access m Donor \n", + "3 Restricted access Restricted access m Donor \n", + "8 Restricted access Restricted access m Donor \n", + "11 Restricted access Restricted access m Donor \n", + "5 Restricted access Restricted access m Donor \n", + "71 Restricted access Restricted access m Donor \n", + "73 Restricted access Restricted access m Donor \n", + "77 Restricted access Restricted access m Donor \n", + "78 Restricted access Restricted access m Donor \n", + "79 Restricted access Restricted access m Donor \n", + "81 Restricted access Restricted access m Donor \n", + "7 Restricted access Restricted access m Donor \n", + "222 Restricted access Restricted access m Donor \n", + "224 Restricted access Restricted access m Donor \n", + "227 Restricted access Restricted access m Donor \n", + "229 Restricted access Restricted access m Donor \n", + "230 Restricted access Restricted access m Donor \n", + "234 Restricted access Restricted access m Donor \n", + "7 Restricted access Restricted access m Donor \n", + "152 29.3 165.1 m CD1 \n", + "161 29.3 165.1 m CD1 \n", + "3 29.3 165.1 m CD1 \n", + "291 Restricted access Restricted access m Donor \n", + "295 Restricted access Restricted access m Donor \n", + "296 Restricted access Restricted access m Donor \n", + "299 Restricted access Restricted access m Donor \n", + "5 Restricted access Restricted access m Donor \n", + "174 35.5 1.55 m CD9 \n", + "176 35.5 1.55 m CD9 \n", + "180 35.5 1.55 m CD9 \n", + "4 35.5 1.55 m CD9 \n", + "188 34.4 1.56 m CD13 \n", + "190 34.4 1.56 m CD13 \n", + "191 34.4 1.56 m CD13 \n", + "193 34.4 1.56 m CD13 \n", + "196 34.4 1.56 m CD13 \n", + "6 34.4 1.56 m CD13 \n", + "321 32.5 1.727 m CD3 \n", + "325 32.5 1.727 m CD3 \n", + "329 32.5 1.727 m CD3 \n", + "330 32.5 1.727 m CD3 \n", + "332 32.5 1.727 m CD3 \n", + "333 32.5 1.727 m CD3 \n", + "7 32.5 1.727 m CD3 \n", + "\n", + " host_weight host_weight_units race sex \n", + "1 Restricted access kg Restricted access Restricted access \n", + "3 Restricted access kg Restricted access Restricted access \n", + "8 Restricted access kg Restricted access Restricted access \n", + "11 Restricted access kg Restricted access Restricted access \n", + "5 Restricted access kg Restricted access Restricted access \n", + "71 Restricted access kg Restricted access Restricted access \n", + "73 Restricted access kg Restricted access Restricted access \n", + "77 Restricted access kg Restricted access Restricted access \n", + "78 Restricted access kg Restricted access Restricted access \n", + "79 Restricted access kg Restricted access Restricted access \n", + "81 Restricted access kg Restricted access Restricted access \n", + "7 Restricted access kg Restricted access Restricted access \n", + "222 Restricted access kg Restricted access Restricted access \n", + "224 Restricted access kg Restricted access Restricted access \n", + "227 Restricted access kg Restricted access Restricted access \n", + "229 Restricted access kg Restricted access Restricted access \n", + "230 Restricted access kg Restricted access Restricted access \n", + "234 Restricted access kg Restricted access Restricted access \n", + "7 Restricted access kg Restricted access Restricted access \n", + "152 80.1 kg white female \n", + "161 80.1 kg white female \n", + "3 80.1 kg white female \n", + "291 Restricted access kg Restricted access Restricted access \n", + "295 Restricted access kg Restricted access Restricted access \n", + "296 Restricted access kg Restricted access Restricted access \n", + "299 Restricted access kg Restricted access Restricted access \n", + "5 Restricted access kg Restricted access Restricted access \n", + "174 85.1 kg white female \n", + "176 85.1 kg white female \n", + "180 85.1 kg white female \n", + "4 85.1 kg white female \n", + "188 83.9 kg white female \n", + "190 83.9 kg white female \n", + "191 83.9 kg white female \n", + "193 83.9 kg white female \n", + "196 83.9 kg white female \n", + "6 83.9 kg white female \n", + "321 97.3 kg white male \n", + "325 97.3 kg white male \n", + "329 97.3 kg white male \n", + "330 97.3 kg white male \n", + "332 97.3 kg white male \n", + "333 97.3 kg white male \n", + "7 97.3 kg white male " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idabundancegenusidrelativeUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
1ERR188319517971.000000PhocaeicolaPhocaeicola0.49623812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
3ERR18831954741.000000FaecalibacteriumFaecalibacterium0.13091512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
8ERR18831956805.000000BacteroidesBacteroides0.18790812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
11ERR18831953076.000000ParabacteroidesParabacteroides0.08493812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
5ERR18831953621.444444ClostridioidesClostridioides0.10000012.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
71ERR188320716983.000000PhocaeicolaPhocaeicola0.41361424.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
73ERR18832073717.000000FaecalibacteriumFaecalibacterium0.09052624.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
77ERR18832072956.000000BarnesiellaBarnesiella0.07199224.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
78ERR18832076984.000000BacteroidesBacteroides0.17009324.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
79ERR18832072901.000000BlautiaBlautia0.07065324.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
81ERR18832073413.000000ParabacteroidesParabacteroides0.08312224.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
7ERR18832074106.000000ClostridioidesClostridioides0.10000024.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
222ERR18832128393.000000PhocaeicolaPhocaeicola0.21667430.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
224ERR188321210841.000000FaecalibacteriumFaecalibacterium0.27987230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
227ERR18832124038.000000FusicatenibacterFusicatenibacter0.10424530.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
229ERR18832123286.000000BacteroidesBacteroides0.08483230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
230ERR18832123198.000000BlautiaBlautia0.08256030.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
234ERR18832125106.000000StreptococcusStreptococcus0.13181730.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
7ERR18832123873.555556ClostridioidesClostridioides0.10000030.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
152ERR188321454726.000000AkkermansiaAkkermansia0.83915632.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
161ERR18832143968.000000ClostridiumClostridium0.06084432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
3ERR18832146521.555556ClostridioidesClostridioides0.10000032.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
291ERR18832253734.000000PhocaeicolaPhocaeicola0.50825843.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
295ERR1883225492.000000BarnesiellaBarnesiella0.06696943.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
296ERR18832251475.000000BacteroidesBacteroides0.20077143.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
299ERR1883225911.000000ParabacteroidesParabacteroides0.12400243.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
5ERR1883225734.666667ClostridioidesClostridioides0.10000043.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
174ERR188324035554.000000AkkermansiaAkkermansia0.56508758.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
176ERR188324015517.000000VeillonellaVeillonella0.24662358.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
180ERR18832405555.000000FusobacteriumFusobacterium0.08829058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
4ERR18832406291.777778ClostridioidesClostridioides0.10000058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
188ERR188325011467.000000AkkermansiaAkkermansia0.20395068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
190ERR18832509955.000000MediterraneibacterMediterraneibacter0.17705868.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
191ERR188325011923.000000ErysipelatoclostridiumErysipelatoclostridium0.21206168.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
193ERR188325013975.000000BlautiaBlautia0.24855768.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
196ERR18832503282.000000RuthenibacteriumRuthenibacterium0.05837368.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
6ERR18832505622.444444ClostridioidesClostridioides0.10000068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
321ERR1883294619.000000VeillonellaVeillonella0.118937112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
325ERR1883294591.000000FusobacteriumFusobacterium0.113557112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
329ERR18832941866.000000AnaerosinusAnaerosinus0.358540112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
330ERR1883294580.000000MegasphaeraMegasphaera0.111443112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
332ERR1883294734.000000PhytobacterPhytobacter0.141033112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
333ERR1883294294.000000MorganellaMorganella0.056490112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
7ERR1883294520.444444ClostridioidesClostridioides0.100000112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 106 + } + ], + "source": [ + "invader='Clostridioides' # choose your favorite invader, here we have chosen C. diff\n", + "invader_rel=0.1 # set the fraction of total relative abundance you want it to invade with\n", + "invaded=pd.DataFrame() # set up results dataframe\n", + "\n", + "for smp,df in tax.groupby(by='sample_id'): # loop through data, one sample at a time\n", + " df=df[df.relative>0.05].copy() # filter out genera below 5% (this is a high threshold, which will make simulation run faster)\n", + " df=df[df.genus!='Clostridioides'] # some samples already have C. diff, so lets remove it and then re-introduce\n", + " abund=df.abundance.sum()*invader_rel/(1-invader_rel) # calculate the abundance needed to achieve desired relative abundance\n", + "\n", + " info=df.iloc[0,:].copy() # get necessary sample info\n", + " info.genus=invader # add invader name\n", + " info.id=invader\n", + " info.abundance=abund # add invader abundance\n", + " df.loc[df.shape[0]+1,info.index]=info.values #append invader info\n", + " df.relative=df.abundance.apply(lambda x:x/df.abundance.sum()) # re-calculate relative abundance\n", + " invaded=pd.concat([invaded,df]) # append results to output dataframe\n", + "invaded\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RTbYBR8cJfup" + }, + "source": [ + "With our taxonomy table ready to go, and our metadata merged, its finally time to get to the model building! πŸŽ‰\n", + "\n", + "## Building community models\n", + "\n", + "With the data we have now, building our models is pretty easy. We just pass our taxonomy table and model database to MICOM. We will remove all taxa that make up less than 5% of the community to keep the models small and speed up this tutorial. We will also have to specify where to write the models. For simplicity, we'll run this process in parallel over two threads. It should take around 10 minutes to finish." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51, + "referenced_widgets": [ + "37b0134de4bd4fe38c1afe7bc2bbdc59", + "bc2776a5195f4a8c8dda0e22e2fd9ea3" + ] + }, + "id": "kDbSN71SmCZr", + "outputId": "e51c2208-4536-4f90-804c-442d884549bf" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "37b0134de4bd4fe38c1afe7bc2bbdc59" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:micom.logger:Less than 50% of the abundance could be matched to the model database. Model `ERR1883294` may not be representative of the sample\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + } + ], + "source": [ + "from micom.workflows import build\n", + "from micom import Community\n", + "import pandas as pd\n", + "\n", + "manifest = build(invaded, \"agora103_genus.qza\", \"models\", solver=\"osqp\",\n", + " cutoff=0.05, threads=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kwya6vbZZSUo" + }, + "source": [ + "You'll see a warning pop up indicating that less than 50% of the abundances can be matched to the database for one of the samples. This can happen with some data, and may indicate the models may not be completely representative of the samples.Typically a fraction of 80% or more is considered great. We'll continue, but remember to keep an eye out for this in future projects!\n", + "\n", + "In lower-biomass 16S amplicon sequencing samples from stool, many reads can match to food components or to host mitochondria and these hits probably do not contribute much to bacterial community metabolism. These hits will be excluded from MICOM.\n", + "\n", + "Let's take a look what we got back from the `build` process." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 454 + }, + "id": "r9qwglr88Ise", + "outputId": "168ee50d-23c5-4dfe-e3fb-bc733ec82188" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 ERR1883195 12.0 2011-10-24 26 \n", + "1 ERR1883207 24.0 2012-01-12 44 \n", + "2 ERR1883212 30.0 2012-10-10 135 \n", + "3 ERR1883214 32.0 2011-07-26 0 \n", + "4 ERR1883225 43.0 2011-07-26 54 \n", + "5 ERR1883240 58.0 2012-02-14 pre-FMT \n", + "6 ERR1883250 68.0 2011-12-23 pre-FMT \n", + "7 ERR1883294 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "0 Donor 11 healthy Restricted access years \n", + "1 Donor 12 healthy Restricted access years \n", + "2 Donor 14 healthy Restricted access years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "4 Donor CD1 healthy Restricted access years \n", + "5 CD9 pre-FMT Pre-FMT 47 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height ... host_subject_id \\\n", + "0 Restricted access Restricted access ... Donor \n", + "1 Restricted access Restricted access ... Donor \n", + "2 Restricted access Restricted access ... Donor \n", + "3 29.3 165.1 ... CD1 \n", + "4 Restricted access Restricted access ... Donor \n", + "5 35.5 1.55 ... CD9 \n", + "6 34.4 1.56 ... CD13 \n", + "7 32.5 1.727 ... CD3 \n", + "\n", + " host_weight host_weight_units race sex \\\n", + "0 Restricted access kg Restricted access Restricted access \n", + "1 Restricted access kg Restricted access Restricted access \n", + "2 Restricted access kg Restricted access Restricted access \n", + "3 80.1 kg white female \n", + "4 Restricted access kg Restricted access Restricted access \n", + "5 85.1 kg white female \n", + "6 83.9 kg white female \n", + "7 97.3 kg white male \n", + "\n", + " file found_taxa total_taxa found_fraction \\\n", + "0 ERR1883195.pickle 5.0 5.0 1.000000 \n", + "1 ERR1883207.pickle 7.0 7.0 1.000000 \n", + "2 ERR1883212.pickle 6.0 7.0 0.857143 \n", + "3 ERR1883214.pickle 6.0 6.0 1.000000 \n", + "4 ERR1883225.pickle 5.0 5.0 1.000000 \n", + "5 ERR1883240.pickle 4.0 4.0 1.000000 \n", + "6 ERR1883250.pickle 5.0 6.0 0.833333 \n", + "7 ERR1883294.pickle 5.0 7.0 0.714286 \n", + "\n", + " found_abundance_fraction \n", + "0 1.000000 \n", + "1 1.000000 \n", + "2 0.895755 \n", + "3 1.000000 \n", + "4 1.000000 \n", + "5 1.000000 \n", + "6 0.941627 \n", + "7 0.500427 \n", + "\n", + "[8 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_height...host_subject_idhost_weighthost_weight_unitsracesexfilefound_taxatotal_taxafound_fractionfound_abundance_fraction
0ERR188319512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883195.pickle5.05.01.0000001.000000
1ERR188320724.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883207.pickle7.07.01.0000001.000000
2ERR188321230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883212.pickle6.07.00.8571430.895755
3ERR188321432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1...CD180.1kgwhitefemaleERR1883214.pickle6.06.01.0000001.000000
4ERR188322543.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883225.pickle5.05.01.0000001.000000
5ERR188324058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55...CD985.1kgwhitefemaleERR1883240.pickle4.04.01.0000001.000000
6ERR188325068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56...CD1383.9kgwhitefemaleERR1883250.pickle5.06.00.8333330.941627
7ERR1883294112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727...CD397.3kgwhitemaleERR1883294.pickle5.07.00.7142860.500427
\n", + "

8 rows Γ— 21 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 63 + } + ], + "source": [ + "manifest" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y4KAJkhIdspQ" + }, + "source": [ + "This will tell you how many taxa were found in the database and what fraction of the total abundance was represented by the database. For most samples, this looks okay (i.e., >70% of abundance represented).\n", + "\n", + "So we now have our community models and can leverage MICOM fully by simulating community growth - let's discuss what we want to look at.\n", + "\n", + "### Microbiome Context\n", + "\n", + "Now that our models are ready to go, let's think about some of the insights we might gain from these samples. First and foremost, we want to investigate the invasion potential of C. diff. How does its ability to invade vary in samples from healthy donors versus individuals with dysbiotic gut microbiomes (pre-FMT)?\n", + "\n", + "Additionally, we can use MICOM to take a mechanistic look at what metabolic strategies are leveraged by C. diff (e.g., what niche(s) does it occupy) in these different contexts.\n", + "\n", + "All that and more, coming up. Stay tuned!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "etrvjwBLkKdR" + }, + "source": [ + "First we need to import our dietary context. For simplicity we will be using a formulation that represents an average western diet, but if information about host diet is known other formulations can be used (e.g., vegetarian or vegan diet). Additional dietary formulations can be found [here]( https://github.com/micom-dev/media/tree/main/media)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 455 + }, + "id": "aJwXAR4PPAkA", + "outputId": "4397913a-b2b6-4059-e355-4226d29a0a90" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " reaction metabolite global_id flux\n", + "reaction \n", + "EX_4abz_m EX_4abz_m 4abz_m EX_4abz(e) 0.1\n", + "EX_4hbz_m EX_4hbz_m 4hbz_m EX_4hbz(e) 0.1\n", + "EX_ac_m EX_ac_m ac_m EX_ac(e) 0.1\n", + "EX_acgam_m EX_acgam_m acgam_m EX_acgam(e) 0.1\n", + "EX_ala_L_m EX_ala_L_m ala_L_m EX_ala_L(e) 0.1\n", + "... ... ... ... ...\n", + "EX_octa_m EX_octa_m octa_m EX_octa(e) 0.0\n", + "EX_adpcbl_m EX_adpcbl_m adpcbl_m EX_adpcbl(e) 0.0\n", + "EX_fe3dcit_m EX_fe3dcit_m fe3dcit_m EX_fe3dcit(e) 0.0\n", + "EX_pydx5p_m EX_pydx5p_m pydx5p_m EX_pydx5p(e) 0.0\n", + "EX_glu_D_m EX_glu_D_m glu_D_m EX_glu_D(e) 0.0\n", + "\n", + "[171 rows x 4 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
reactionmetaboliteglobal_idflux
reaction
EX_4abz_mEX_4abz_m4abz_mEX_4abz(e)0.1
EX_4hbz_mEX_4hbz_m4hbz_mEX_4hbz(e)0.1
EX_ac_mEX_ac_mac_mEX_ac(e)0.1
EX_acgam_mEX_acgam_macgam_mEX_acgam(e)0.1
EX_ala_L_mEX_ala_L_mala_L_mEX_ala_L(e)0.1
...............
EX_octa_mEX_octa_mocta_mEX_octa(e)0.0
EX_adpcbl_mEX_adpcbl_madpcbl_mEX_adpcbl(e)0.0
EX_fe3dcit_mEX_fe3dcit_mfe3dcit_mEX_fe3dcit(e)0.0
EX_pydx5p_mEX_pydx5p_mpydx5p_mEX_pydx5p(e)0.0
EX_glu_D_mEX_glu_D_mglu_D_mEX_glu_D(e)0.0
\n", + "

171 rows Γ— 4 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 64 + } + ], + "source": [ + "from micom.qiime_formats import load_qiime_medium\n", + "medium = load_qiime_medium(\"western_diet_gut_agora.qza\")\n", + "medium" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4s8R4WYUez4g" + }, + "source": [ + "### Growing the models\n", + "Great, now we have our media & our models, it's time to get growing. This will take some time, so we'll use that time as an opportunity to discuss more in depth what these processes do, and what to look for in the results. First, let's run the `grow()` command. This will take the models we've built, and find an optimal solution to the fluxes based upon the medium that's been applied." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3WH8VVrVS4mv" + }, + "source": [ + "If that takes too long or was aborted, we can read it in from the treasure chest." + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34, + "referenced_widgets": [ + "59428de9a88b4204885d03de441cbde3", + "901fb7e8d8c844a8b2e86735ca2bead3" + ] + }, + "id": "IjDguZEcWGjG", + "outputId": "03508c5b-c7fc-46dd-c53b-bbd88dc5c236" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "59428de9a88b4204885d03de441cbde3" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + } + ], + "source": [ + "from micom.workflows import grow, save_results\n", + "\n", + "growth = grow(manifest, \"models\",medium, tradeoff=0.8, threads=2)\n", + "\n", + "# We'll save the results to a file\n", + "save_results(growth, \"growth.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rHedHJxHWkjy" + }, + "source": [ + "Again, if that takes too long or was aborted, we can read it in from the treasure chest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rcPNBkDpWGrQ" + }, + "outputs": [], + "source": [ + "from micom.workflows import load_results\n", + "\n", + "try: # Will only run if the previous step failed\n", + " growth\n", + "except NameError:\n", + " growth = load_results(\"treasure_chest/growth.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "69gq9QfAzqxq" + }, + "source": [ + "What kind of results did we get? Well, `grow` returns a tuple of 3 data sets:\n", + "\n", + "1. The predicted growth rate for all taxa in all samples\n", + "2. The import and export fluxes for each taxon and the external environment\n", + "3. Annotations for the fluxes mapping to other databases\n", + "\n", + "### πŸ“ˆ Growth Rates\n", + "\n", + "The growth rates are pretty straightforward." + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "2r_XUm7U-HSm", + "outputId": "2ef89987-13a9-48a6-85b0-26dcdcf7ff92" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " abundance growth_rate reactions metabolites taxon \\\n", + "0 0.187908 0.147278 3293 1883 Bacteroides \n", + "1 0.100000 0.078512 1844 1401 Clostridioides \n", + "2 0.130915 0.102472 1986 1472 Faecalibacterium \n", + "3 0.084938 0.066644 2879 1747 Parabacteroides \n", + "4 0.496238 0.389606 2712 1656 Phocaeicola \n", + "5 0.170093 0.248553 3293 1883 Bacteroides \n", + "6 0.071992 0.108048 2235 1371 Barnesiella \n", + "7 0.070653 0.101473 2808 1732 Blautia \n", + "8 0.100000 0.143653 1844 1401 Clostridioides \n", + "9 0.090526 0.129872 1986 1472 Faecalibacterium \n", + "10 0.083122 0.122119 2879 1747 Parabacteroides \n", + "11 0.413614 0.607244 2712 1656 Phocaeicola \n", + "12 0.759954 0.300619 2274 1386 Akkermansia \n", + "13 0.100000 0.144461 1844 1401 Clostridioides \n", + "14 0.055102 0.041183 3532 2001 Clostridium \n", + "15 0.013039 0.009244 2826 1639 Erysipelatoclostridium \n", + "16 0.035175 0.025067 1274 1138 Paeniclostridium \n", + "17 0.036730 0.031724 1652 1308 Veillonella \n", + "18 0.094704 0.133863 3293 1883 Bacteroides \n", + "19 0.092168 0.130290 2808 1732 Blautia \n", + "20 0.111638 0.157821 1844 1401 Clostridioides \n", + "21 0.312443 0.441683 1986 1472 Faecalibacterium \n", + "22 0.241890 0.341913 2712 1656 Phocaeicola \n", + "23 0.147157 0.208032 2860 1805 Streptococcus \n", + "24 0.565087 0.128835 2274 1386 Akkermansia \n", + "25 0.100000 0.377698 1844 1401 Clostridioides \n", + "26 0.088290 0.323804 2197 1602 Fusobacterium \n", + "27 0.246623 0.928642 1652 1308 Veillonella \n", + "28 0.202085 0.267660 2274 1386 Akkermansia \n", + "29 0.246283 0.412257 2808 1732 Blautia \n", + "30 0.105784 0.176446 1844 1401 Clostridioides \n", + "31 0.020161 0.033840 1966 1392 Coprobacillus \n", + "32 0.012706 0.023217 1187 1056 Dialister \n", + "33 0.210121 0.353430 2826 1639 Erysipelatoclostridium \n", + "34 0.014204 0.022333 1207 1105 Flavonifractor \n", + "35 0.013217 0.021691 1369 1170 Lachnospira \n", + "36 0.175438 0.295404 2681 1578 Mediterraneibacter \n", + "37 0.045896 0.069084 2650 1613 Alistipes \n", + "38 0.012559 0.017830 2034 1491 Anaerostipes \n", + "39 0.168400 0.243903 3293 1883 Bacteroides \n", + "40 0.056172 0.083090 2235 1371 Barnesiella \n", + "41 0.027401 0.039184 2808 1732 Blautia \n", + "42 0.100000 0.141583 1844 1401 Clostridioides \n", + "43 0.021236 0.029938 2073 1577 Eubacterium \n", + "44 0.027287 0.038572 1986 1472 Faecalibacterium \n", + "45 0.104009 0.151023 2879 1747 Parabacteroides \n", + "46 0.010732 0.012497 1080 1019 Parasutterella \n", + "47 0.426310 0.619569 2712 1656 Phocaeicola \n", + "48 0.192591 0.360678 1844 1401 Clostridioides \n", + "49 0.197835 0.370503 2197 1602 Fusobacterium \n", + "50 0.194153 0.363601 1206 1068 Megasphaera \n", + "51 0.098416 0.184309 1577 1286 Morganella \n", + "52 0.025775 0.042924 1080 1019 Parasutterella \n", + "53 0.084021 0.156784 2860 1805 Streptococcus \n", + "54 0.207208 0.388049 1652 1308 Veillonella \n", + "\n", + " tradeoff sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 0.8 ERR1883195 12 2011-10-24 26 \n", + "1 0.8 ERR1883195 12 2011-10-24 26 \n", + "2 0.8 ERR1883195 12 2011-10-24 26 \n", + "3 0.8 ERR1883195 12 2011-10-24 26 \n", + "4 0.8 ERR1883195 12 2011-10-24 26 \n", + "5 0.8 ERR1883207 24 2012-01-12 44 \n", + "6 0.8 ERR1883207 24 2012-01-12 44 \n", + "7 0.8 ERR1883207 24 2012-01-12 44 \n", + "8 0.8 ERR1883207 24 2012-01-12 44 \n", + "9 0.8 ERR1883207 24 2012-01-12 44 \n", + "10 0.8 ERR1883207 24 2012-01-12 44 \n", + "11 0.8 ERR1883207 24 2012-01-12 44 \n", + "12 0.8 ERR1883214 32 2011-07-26 0 \n", + "13 0.8 ERR1883214 32 2011-07-26 0 \n", + "14 0.8 ERR1883214 32 2011-07-26 0 \n", + "15 0.8 ERR1883214 32 2011-07-26 0 \n", + "16 0.8 ERR1883214 32 2011-07-26 0 \n", + "17 0.8 ERR1883214 32 2011-07-26 0 \n", + "18 0.8 ERR1883212 30 2012-10-10 135 \n", + "19 0.8 ERR1883212 30 2012-10-10 135 \n", + "20 0.8 ERR1883212 30 2012-10-10 135 \n", + "21 0.8 ERR1883212 30 2012-10-10 135 \n", + "22 0.8 ERR1883212 30 2012-10-10 135 \n", + "23 0.8 ERR1883212 30 2012-10-10 135 \n", + "24 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "25 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "26 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "27 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "28 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "29 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "30 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "31 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "32 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "33 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "34 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "35 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "36 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "37 0.8 ERR1883225 43 2011-07-26 54 \n", + "38 0.8 ERR1883225 43 2011-07-26 54 \n", + "39 0.8 ERR1883225 43 2011-07-26 54 \n", + "40 0.8 ERR1883225 43 2011-07-26 54 \n", + "41 0.8 ERR1883225 43 2011-07-26 54 \n", + "42 0.8 ERR1883225 43 2011-07-26 54 \n", + "43 0.8 ERR1883225 43 2011-07-26 54 \n", + "44 0.8 ERR1883225 43 2011-07-26 54 \n", + "45 0.8 ERR1883225 43 2011-07-26 54 \n", + "46 0.8 ERR1883225 43 2011-07-26 54 \n", + "47 0.8 ERR1883225 43 2011-07-26 54 \n", + "48 0.8 ERR1883294 112 2011-09-29 0 \n", + "49 0.8 ERR1883294 112 2011-09-29 0 \n", + "50 0.8 ERR1883294 112 2011-09-29 0 \n", + "51 0.8 ERR1883294 112 2011-09-29 0 \n", + "52 0.8 ERR1883294 112 2011-09-29 0 \n", + "53 0.8 ERR1883294 112 2011-09-29 0 \n", + "54 0.8 ERR1883294 112 2011-09-29 0 \n", + "\n", + " ... host_age host_age_units host_body_mass_index \\\n", + "0 ... Restricted access years Restricted access \n", + "1 ... Restricted access years Restricted access \n", + "2 ... Restricted access years Restricted access \n", + "3 ... Restricted access years Restricted access \n", + "4 ... Restricted access years Restricted access \n", + "5 ... Restricted access years Restricted access \n", + "6 ... Restricted access years Restricted access \n", + "7 ... Restricted access years Restricted access \n", + "8 ... Restricted access years Restricted access \n", + "9 ... Restricted access years Restricted access \n", + "10 ... Restricted access years Restricted access \n", + "11 ... Restricted access years Restricted access \n", + "12 ... 39 years 29.3 \n", + "13 ... 39 years 29.3 \n", + "14 ... 39 years 29.3 \n", + "15 ... 39 years 29.3 \n", + "16 ... 39 years 29.3 \n", + "17 ... 39 years 29.3 \n", + "18 ... Restricted access years Restricted access \n", + "19 ... Restricted access years Restricted access \n", + "20 ... Restricted access years Restricted access \n", + "21 ... Restricted access years Restricted access \n", + "22 ... Restricted access years Restricted access \n", + "23 ... Restricted access years Restricted access \n", + "24 ... 47 years 35.5 \n", + "25 ... 47 years 35.5 \n", + "26 ... 47 years 35.5 \n", + "27 ... 47 years 35.5 \n", + "28 ... 53 years 34.4 \n", + "29 ... 53 years 34.4 \n", + "30 ... 53 years 34.4 \n", + "31 ... 53 years 34.4 \n", + "32 ... 53 years 34.4 \n", + "33 ... 53 years 34.4 \n", + "34 ... 53 years 34.4 \n", + "35 ... 53 years 34.4 \n", + "36 ... 53 years 34.4 \n", + "37 ... Restricted access years Restricted access \n", + "38 ... Restricted access years Restricted access \n", + "39 ... Restricted access years Restricted access \n", + "40 ... Restricted access years Restricted access \n", + "41 ... Restricted access years Restricted access \n", + "42 ... Restricted access years Restricted access \n", + "43 ... Restricted access years Restricted access \n", + "44 ... Restricted access years Restricted access \n", + "45 ... Restricted access years Restricted access \n", + "46 ... Restricted access years Restricted access \n", + "47 ... Restricted access years Restricted access \n", + "48 ... 61 years 32.5 \n", + "49 ... 61 years 32.5 \n", + "50 ... 61 years 32.5 \n", + "51 ... 61 years 32.5 \n", + "52 ... 61 years 32.5 \n", + "53 ... 61 years 32.5 \n", + "54 ... 61 years 32.5 \n", + "\n", + " host_height host_height_units host_subject_id host_weight \\\n", + "0 Restricted access m Donor Restricted access \n", + "1 Restricted access m Donor Restricted access \n", + "2 Restricted access m Donor Restricted access \n", + "3 Restricted access m Donor Restricted access \n", + "4 Restricted access m Donor Restricted access \n", + "5 Restricted access m Donor Restricted access \n", + "6 Restricted access m Donor Restricted access \n", + "7 Restricted access m Donor Restricted access \n", + "8 Restricted access m Donor Restricted access \n", + "9 Restricted access m Donor Restricted access \n", + "10 Restricted access m Donor Restricted access \n", + "11 Restricted access m Donor Restricted access \n", + "12 165.1 m CD1 80.1 \n", + "13 165.1 m CD1 80.1 \n", + "14 165.1 m CD1 80.1 \n", + "15 165.1 m CD1 80.1 \n", + "16 165.1 m CD1 80.1 \n", + "17 165.1 m CD1 80.1 \n", + "18 Restricted access m Donor Restricted access \n", + "19 Restricted access m Donor Restricted access \n", + "20 Restricted access m Donor Restricted access \n", + "21 Restricted access m Donor Restricted access \n", + "22 Restricted access m Donor Restricted access \n", + "23 Restricted access m Donor Restricted access \n", + "24 1.55 m CD9 85.1 \n", + "25 1.55 m CD9 85.1 \n", + "26 1.55 m CD9 85.1 \n", + "27 1.55 m CD9 85.1 \n", + "28 1.56 m CD13 83.9 \n", + "29 1.56 m CD13 83.9 \n", + "30 1.56 m CD13 83.9 \n", + "31 1.56 m CD13 83.9 \n", + "32 1.56 m CD13 83.9 \n", + "33 1.56 m CD13 83.9 \n", + "34 1.56 m CD13 83.9 \n", + "35 1.56 m CD13 83.9 \n", + "36 1.56 m CD13 83.9 \n", + "37 Restricted access m Donor Restricted access \n", + "38 Restricted access m Donor Restricted access \n", + "39 Restricted access m Donor Restricted access \n", + "40 Restricted access m Donor Restricted access \n", + "41 Restricted access m Donor Restricted access \n", + "42 Restricted access m Donor Restricted access \n", + "43 Restricted access m Donor Restricted access \n", + "44 Restricted access m Donor Restricted access \n", + "45 Restricted access m Donor Restricted access \n", + "46 Restricted access m Donor Restricted access \n", + "47 Restricted access m Donor Restricted access \n", + "48 1.727 m CD3 97.3 \n", + "49 1.727 m CD3 97.3 \n", + "50 1.727 m CD3 97.3 \n", + "51 1.727 m CD3 97.3 \n", + "52 1.727 m CD3 97.3 \n", + "53 1.727 m CD3 97.3 \n", + "54 1.727 m CD3 97.3 \n", + "\n", + " host_weight_units race sex \n", + "0 kg Restricted access Restricted access \n", + "1 kg Restricted access Restricted access \n", + "2 kg Restricted access Restricted access \n", + "3 kg Restricted access Restricted access \n", + "4 kg Restricted access Restricted access \n", + "5 kg Restricted access Restricted access \n", + "6 kg Restricted access Restricted access \n", + "7 kg Restricted access Restricted access \n", + "8 kg Restricted access Restricted access \n", + "9 kg Restricted access Restricted access \n", + "10 kg Restricted access Restricted access \n", + "11 kg Restricted access Restricted access \n", + "12 kg white female \n", + "13 kg white female \n", + "14 kg white female \n", + "15 kg white female \n", + "16 kg white female \n", + "17 kg white female \n", + "18 kg Restricted access Restricted access \n", + "19 kg Restricted access Restricted access \n", + "20 kg Restricted access Restricted access \n", + "21 kg Restricted access Restricted access \n", + "22 kg Restricted access Restricted access \n", + "23 kg Restricted access Restricted access \n", + "24 kg white female \n", + "25 kg white female \n", + "26 kg white female \n", + "27 kg white female \n", + "28 kg white female \n", + "29 kg white female \n", + "30 kg white female \n", + "31 kg white female \n", + "32 kg white female \n", + "33 kg white female \n", + "34 kg white female \n", + "35 kg white female \n", + "36 kg white female \n", + "37 kg Restricted access Restricted access \n", + "38 kg Restricted access Restricted access \n", + "39 kg Restricted access Restricted access \n", + "40 kg Restricted access Restricted access \n", + "41 kg Restricted access Restricted access \n", + "42 kg Restricted access Restricted access \n", + "43 kg Restricted access Restricted access \n", + "44 kg Restricted access Restricted access \n", + "45 kg Restricted access Restricted access \n", + "46 kg Restricted access Restricted access \n", + "47 kg Restricted access Restricted access \n", + "48 kg white male \n", + "49 kg white male \n", + "50 kg white male \n", + "51 kg white male \n", + "52 kg white male \n", + "53 kg white male \n", + "54 kg white male \n", + "\n", + "[55 rows x 22 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abundancegrowth_ratereactionsmetabolitestaxontradeoffsample_idUnnamed: 0collection_timestampday_relative_to_fmt...host_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
00.1879080.14727832931883Bacteroides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
10.1000000.07851218441401Clostridioides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
20.1309150.10247219861472Faecalibacterium0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
30.0849380.06664428791747Parabacteroides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
40.4962380.38960627121656Phocaeicola0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
50.1700930.24855332931883Bacteroides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
60.0719920.10804822351371Barnesiella0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
70.0706530.10147328081732Blautia0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
80.1000000.14365318441401Clostridioides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
90.0905260.12987219861472Faecalibacterium0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
100.0831220.12211928791747Parabacteroides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
110.4136140.60724427121656Phocaeicola0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
120.7599540.30061922741386Akkermansia0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
130.1000000.14446118441401Clostridioides0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
140.0551020.04118335322001Clostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
150.0130390.00924428261639Erysipelatoclostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
160.0351750.02506712741138Paeniclostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
170.0367300.03172416521308Veillonella0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
180.0947040.13386332931883Bacteroides0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
190.0921680.13029028081732Blautia0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
200.1116380.15782118441401Clostridioides0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
210.3124430.44168319861472Faecalibacterium0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
220.2418900.34191327121656Phocaeicola0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
230.1471570.20803228601805Streptococcus0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
240.5650870.12883522741386Akkermansia0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
250.1000000.37769818441401Clostridioides0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
260.0882900.32380421971602Fusobacterium0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
270.2466230.92864216521308Veillonella0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
280.2020850.26766022741386Akkermansia0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
290.2462830.41225728081732Blautia0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
300.1057840.17644618441401Clostridioides0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
310.0201610.03384019661392Coprobacillus0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
320.0127060.02321711871056Dialister0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
330.2101210.35343028261639Erysipelatoclostridium0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
340.0142040.02233312071105Flavonifractor0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
350.0132170.02169113691170Lachnospira0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
360.1754380.29540426811578Mediterraneibacter0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
370.0458960.06908426501613Alistipes0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
380.0125590.01783020341491Anaerostipes0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
390.1684000.24390332931883Bacteroides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
400.0561720.08309022351371Barnesiella0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
410.0274010.03918428081732Blautia0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
420.1000000.14158318441401Clostridioides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
430.0212360.02993820731577Eubacterium0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
440.0272870.03857219861472Faecalibacterium0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
450.1040090.15102328791747Parabacteroides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
460.0107320.01249710801019Parasutterella0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
470.4263100.61956927121656Phocaeicola0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
480.1925910.36067818441401Clostridioides0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
490.1978350.37050321971602Fusobacterium0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
500.1941530.36360112061068Megasphaera0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
510.0984160.18430915771286Morganella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
520.0257750.04292410801019Parasutterella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
530.0840210.15678428601805Streptococcus0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
540.2072080.38804916521308Veillonella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
\n", + "

55 rows Γ— 22 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 112 + } + ], + "source": [ + "growth_rates=pd.merge(growth.growth_rates,metadata,on='sample_id')\n", + "growth_rates" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F5BK7DDv0UfA" + }, + "source": [ + "### ↔️ Exchange Fluxes\n", + "\n", + "More interesting are the exchange fluxes. These reactions represent the import and export of metabolites from the system Let's look at those now:" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "lQW2BBS10jdN", + "outputId": "d0ebb86e-549a-4727-8f9c-4461c25eeeca" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " taxon sample_id tolerance reaction flux \\\n", + "0 Bacteroides ERR1883195 0.0001 EX_MGlcn150(e) -0.001865 \n", + "1 Bacteroides ERR1883195 0.0001 EX_MGlcn180(e) -0.001879 \n", + "4 Bacteroides ERR1883195 0.0001 EX_MGlcn81_rl(e) 0.002123 \n", + "5 Bacteroides ERR1883195 0.0001 EX_MGlcn24(e) -0.002457 \n", + "6 Bacteroides ERR1883195 0.0001 EX_MGlcn165(e) 0.004506 \n", + "... ... ... ... ... ... \n", + "22046 medium ERR1883294 0.0001 EX_MGlcn170_rl_m -0.000192 \n", + "22050 medium ERR1883294 0.0001 EX_nmn_m -0.100040 \n", + "22056 medium ERR1883294 0.0001 EX_akg_m 0.000115 \n", + "22070 medium ERR1883294 0.0001 EX_oaa_m 0.000154 \n", + "22078 medium ERR1883294 0.0001 EX_xtsn_m 0.000457 \n", + "\n", + " abundance metabolite direction \n", + "0 0.208787 MGlcn150[e] import \n", + "1 0.208787 MGlcn180[e] import \n", + "4 0.208787 MGlcn81_rl[e] export \n", + "5 0.208787 MGlcn24[e] import \n", + "6 0.208787 MGlcn165[e] export \n", + "... ... ... ... \n", + "22046 NaN MGlcn170_rl_m import \n", + "22050 NaN nmn_m import \n", + "22056 NaN akg_m export \n", + "22070 NaN oaa_m export \n", + "22078 NaN xtsn_m export \n", + "\n", + "[10597 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
taxonsample_idtolerancereactionfluxabundancemetabolitedirection
0BacteroidesERR18831950.0001EX_MGlcn150(e)-0.0018650.208787MGlcn150[e]import
1BacteroidesERR18831950.0001EX_MGlcn180(e)-0.0018790.208787MGlcn180[e]import
4BacteroidesERR18831950.0001EX_MGlcn81_rl(e)0.0021230.208787MGlcn81_rl[e]export
5BacteroidesERR18831950.0001EX_MGlcn24(e)-0.0024570.208787MGlcn24[e]import
6BacteroidesERR18831950.0001EX_MGlcn165(e)0.0045060.208787MGlcn165[e]export
...........................
22046mediumERR18832940.0001EX_MGlcn170_rl_m-0.000192NaNMGlcn170_rl_mimport
22050mediumERR18832940.0001EX_nmn_m-0.100040NaNnmn_mimport
22056mediumERR18832940.0001EX_akg_m0.000115NaNakg_mexport
22070mediumERR18832940.0001EX_oaa_m0.000154NaNoaa_mexport
22078mediumERR18832940.0001EX_xtsn_m0.000457NaNxtsn_mexport
\n", + "

10597 rows Γ— 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 102 + } + ], + "source": [ + "growth.exchanges" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pu5XtkUl1YG1" + }, + "source": [ + "So we see how much of each metabolite is either consumed or produced by each taxon in each sample. `tolerance` denotes the accuracy of the solver and tells you the smallest absolute flux that is likely different form zero (i.e., substantial flux). *All of the fluxes are normalized to 1g dry weight of bacteria*. So, you can directly compare fluxes between taxa, even if they are present at very different abundances.\n", + "\n", + "If you're curious what the abbreviation for each of these metabolites represents, that can be found in the annotations dataframe. For instance, let's find out what `\"tre[e]\"` represents." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "DphXa9hw1yxM", + "outputId": "4bf58c2c-2cc4-47b0-e4af-e7e7b827e54d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " metabolite name molecular_weight C_number N_number \\\n", + "reaction \n", + "EX_tre(e) tre[e] Trehalose 342.29648 12 0 \n", + "\n", + " hmdb inchi kegg.compound pubchem.compound chebi reaction \n", + "reaction \n", + "EX_tre(e) HMDB00975 NaN C01083 7427 NaN EX_tre(e) " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
metabolitenamemolecular_weightC_numberN_numberhmdbinchikegg.compoundpubchem.compoundchebireaction
reaction
EX_tre(e)tre[e]Trehalose342.29648120HMDB00975NaNC010837427NaNEX_tre(e)
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 83 + } + ], + "source": [ + "anns = growth.annotations\n", + "anns[anns.metabolite == \"tre[e]\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GVHLD2dm4a6B" + }, + "source": [ + "Trehalose! Interesting, [that's an important metabolite](https://pubmed.ncbi.nlm.nih.gov/34277467/) in the context of CDI! All of these annotations and more information at are also available at https://vmh.life, maintained by Dr. Ines Thiele's lab." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CImtzqRJbbGj" + }, + "source": [ + "\n", + "# πŸ“Š Visualizations\n", + "\n", + "Let's visualize our results. Because of the rich output of these models, it can be overwhelming to represent it all, but don't worry! There are tools in place for this already.\n", + "\n", + "We will use the standard visualizations included in MICOM. These tools take in the growth results we obtained before and create visualizations in standalone HTML files that bundle the plots and raw data and can be viewed directly in your browser.\n", + "\n", + "First, let's look at the growth rates of each taxon across samples." + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "id": "EaplMHFLcMT7" + }, + "outputs": [], + "source": [ + "from micom.viz import *\n", + "\n", + "viz = plot_growth(growth, filename=\"growthrates.html\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Normally, we could call `viz.view()` afterwards and it would open it in our web browser. However, this will not work in Colab. However, the plot function creates the file `growth_rates_[DATE].html` in your `materials` folder. To open it, simply download that file and view it in your web browser. We can see that there are many things going on, but it's not super clear. Let's continue." + ], + "metadata": { + "id": "_sjNIuCSXkyb" + } + }, + { + "cell_type": "markdown", + "source": [ + "We're interested in understanding the invasion potential of C. diff so lets extract the predicted C. diff growth rates. In addition to C. diff growth rate we can also look at what fraction of the community growth rate this represents.\n", + "\n" + ], + "metadata": { + "id": "qPTCyw-7RqFm" + } + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "9YlWytSp-yKI", + "outputId": "c77dda37-1dd1-4ccf-a99c-cdb1519c5e8a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " abundance growth_rate reactions metabolites taxon tradeoff \\\n", + "1 0.100000 0.078512 1844 1401 Clostridioides 0.8 \n", + "8 0.100000 0.143653 1844 1401 Clostridioides 0.8 \n", + "13 0.100000 0.144461 1844 1401 Clostridioides 0.8 \n", + "20 0.111638 0.157821 1844 1401 Clostridioides 0.8 \n", + "25 0.100000 0.377698 1844 1401 Clostridioides 0.8 \n", + "30 0.105784 0.176446 1844 1401 Clostridioides 0.8 \n", + "42 0.100000 0.141583 1844 1401 Clostridioides 0.8 \n", + "48 0.192591 0.360678 1844 1401 Clostridioides 0.8 \n", + "\n", + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt ... \\\n", + "1 ERR1883195 12 2011-10-24 26 ... \n", + "8 ERR1883207 24 2012-01-12 44 ... \n", + "13 ERR1883214 32 2011-07-26 0 ... \n", + "20 ERR1883212 30 2012-10-10 135 ... \n", + "25 ERR1883240 58 2012-02-14 pre-FMT ... \n", + "30 ERR1883250 68 2011-12-23 pre-FMT ... \n", + "42 ERR1883225 43 2011-07-26 54 ... \n", + "48 ERR1883294 112 2011-09-29 0 ... \n", + "\n", + " host_age_units host_body_mass_index host_height host_height_units \\\n", + "1 years Restricted access Restricted access m \n", + "8 years Restricted access Restricted access m \n", + "13 years 29.3 165.1 m \n", + "20 years Restricted access Restricted access m \n", + "25 years 35.5 1.55 m \n", + "30 years 34.4 1.56 m \n", + "42 years Restricted access Restricted access m \n", + "48 years 32.5 1.727 m \n", + "\n", + " host_subject_id host_weight host_weight_units race \\\n", + "1 Donor Restricted access kg Restricted access \n", + "8 Donor Restricted access kg Restricted access \n", + "13 CD1 80.1 kg white \n", + "20 Donor Restricted access kg Restricted access \n", + "25 CD9 85.1 kg white \n", + "30 CD13 83.9 kg white \n", + "42 Donor Restricted access kg Restricted access \n", + "48 CD3 97.3 kg white \n", + "\n", + " sex abundance_weighted_growth_rate \n", + "1 Restricted access 0.007851 \n", + "8 Restricted access 0.014365 \n", + "13 female 0.014446 \n", + "20 Restricted access 0.017619 \n", + "25 female 0.037770 \n", + "30 female 0.018665 \n", + "42 Restricted access 0.014158 \n", + "48 male 0.069463 \n", + "\n", + "[8 rows x 23 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abundancegrowth_ratereactionsmetabolitestaxontradeoffsample_idUnnamed: 0collection_timestampday_relative_to_fmt...host_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesexabundance_weighted_growth_rate
10.1000000.07851218441401Clostridioides0.8ERR1883195122011-10-2426...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.007851
80.1000000.14365318441401Clostridioides0.8ERR1883207242012-01-1244...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.014365
130.1000000.14446118441401Clostridioides0.8ERR1883214322011-07-260...years29.3165.1mCD180.1kgwhitefemale0.014446
200.1116380.15782118441401Clostridioides0.8ERR1883212302012-10-10135...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.017619
250.1000000.37769818441401Clostridioides0.8ERR1883240582012-02-14pre-FMT...years35.51.55mCD985.1kgwhitefemale0.037770
300.1057840.17644618441401Clostridioides0.8ERR1883250682011-12-23pre-FMT...years34.41.56mCD1383.9kgwhitefemale0.018665
420.1000000.14158318441401Clostridioides0.8ERR1883225432011-07-2654...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.014158
480.1925910.36067818441401Clostridioides0.8ERR18832941122011-09-290...years32.51.727mCD397.3kgwhitemale0.069463
\n", + "

8 rows Γ— 23 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 121 + } + ], + "source": [ + "cdiff = growth_rates[growth_rates.taxon=='Clostridioides'].copy()\n", + "cdiff" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now that we've extracted the C. diff specific growth rates lets take a look at how they compare between patients with healthy and disbiotic gut microbiomes" + ], + "metadata": { + "id": "bQKYxSZTSPk1" + } + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 468 + }, + "id": "AWG3U094-_OI", + "outputId": "ef7e9ff0-9348-48ba-af7f-d05c65ad91d6" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 122 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "import seaborn as sns\n", + "sns.boxplot(x='disease_state',y='growth_rate',data=cdiff)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Looks like C. diff is predicted to grow in all samples but its predicted growth rate is ~2x higher in the Pre-FMT samples. You can see there is also a decent amount of variation in the Pre-FMT results." + ], + "metadata": { + "id": "MEuym_PVui-k" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G1JbbKrLcVye" + }, + "source": [ + "## Growth niches\n", + "\n", + "Another thing we can look at is whether individual taxa inhabit different growth niches across different disease contexts. Here we can use the `plot_exchanges_per_taxon` function to see how exchanges differ within and between taxa, within and across human populations." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "id": "NlZrfv38esj8", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6df836fd-71ac-4cfd-fd1c-883e9d31f311" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 89 + } + ], + "source": [ + "plot_exchanges_per_taxon(growth, perplexity=4, direction=\"import\", filename=\"niche.html\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qXnbUCCs2yVG" + }, + "source": [ + "\n", + "This function projects the full set of import or export fluxes onto a two dimensional plane, and arranges taxa so that more similar flux patterns lie nearer together. Taxa closer to one another compete for a more similar set of resources (and/or produce a more similar set of metabolites). The center of the plot signifies a more competitive nutrient space, whereas clusters on the outskirts denote more isolated niches.\n", + "\n", + "You can tune [TSNE parameters](https://distill.pub/2016/misread-tsne/), such as perplexity, to get a more meaningful grouping. We will lower the perplexity here since we don't have a lot of data points.\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "One small take away from this analysis is the speration between C. diff pre- and post- FMT samples, suggesting that C. diff may leverage different metabolic strategies in these contexts. Lets take a closer look at this..." + ], + "metadata": { + "id": "weoSaLDPZYAK" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Y_XfHkB4sO8" + }, + "source": [ + "## Comparative Metabolomics\n", + "\n", + "Now let's compare the metabolomic imports between the two disease contexts. We're interested to see how the metabolomic profile of the microbiome changes when the disease state changes, as changes in microbiome context can lead to changes in host succeptibility to infection. To look into this deeper, we'll transform the microbiome import data and then plot the metabolite exports on a heatmap." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pAtec3I78DJJ" + }, + "source": [ + "We can use the `consumption_rates` function in MICOM to calculate consumption rates from the growth results. This will tell us what the patient microbiomes are consuming and provide additional insight into available niches. To visualize the results we'll run a centered log ratio transformation on the data, to account for the compositional nature of these data and compare all the fluxes against each other. Importantly, here we consider the consumption rates for samples with no C. diff present to understand how the initial state of the patient microbiomes may influence invasion potential." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "id": "DJAsxjnFdajN", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "outputId": "59b69294-cdef-4fdf-a5fa-5f1d8cab1cde" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "disease_state Pre-FMT healthy \\\n", + "sample_id ERR1883214 ERR1883240 ERR1883250 ERR1883294 ERR1883195 \n", + "name \n", + "D-glucose -3.253101 -3.564477 -2.064038 -3.972573 4.450353 \n", + "acetate 2.572101 3.594739 4.306373 4.539524 -2.642172 \n", + "Water 2.966225 -3.584166 3.043172 -3.972573 4.497614 \n", + "D-Fructose 3.219760 -3.584166 -0.695070 -3.972573 3.268810 \n", + "pyruvate -3.404889 3.505223 -3.497278 3.022735 -2.642172 \n", + "... ... ... ... ... ... \n", + "Thiamin -3.404889 -2.773089 -2.982110 -3.403379 -2.388275 \n", + "Spermidine -3.404889 -2.445104 -2.992604 -3.250518 -2.374584 \n", + "Fe2+ -2.838896 -2.091063 -2.203376 -2.370595 -1.778976 \n", + "Putrescine -3.404889 -2.848543 -3.087577 -3.295817 -2.353273 \n", + "Menaquinone 8 -2.873638 -3.295815 -2.979719 -3.351557 -2.276149 \n", + "\n", + "disease_state \n", + "sample_id ERR1883207 ERR1883212 ERR1883225 \n", + "name \n", + "D-glucose 4.493531 3.416515 4.569671 \n", + "acetate -2.772490 -3.668369 -2.625105 \n", + "Water 4.280483 3.763363 4.354428 \n", + "D-Fructose 3.253561 2.612354 3.276231 \n", + "pyruvate -2.772490 -3.668369 -2.625105 \n", + "... ... ... ... \n", + "Thiamin -2.444364 -2.939826 -2.378889 \n", + "Spermidine -2.445049 -2.936479 -2.376260 \n", + "Fe2+ -1.820472 -2.611296 -1.668659 \n", + "Putrescine -2.438953 -2.912851 -2.400161 \n", + "Menaquinone 8 -2.471199 -3.185775 -2.625105 \n", + "\n", + "[647 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
disease_statePre-FMThealthy
sample_idERR1883214ERR1883240ERR1883250ERR1883294ERR1883195ERR1883207ERR1883212ERR1883225
name
D-glucose-3.253101-3.564477-2.064038-3.9725734.4503534.4935313.4165154.569671
acetate2.5721013.5947394.3063734.539524-2.642172-2.772490-3.668369-2.625105
Water2.966225-3.5841663.043172-3.9725734.4976144.2804833.7633634.354428
D-Fructose3.219760-3.584166-0.695070-3.9725733.2688103.2535612.6123543.276231
pyruvate-3.4048893.505223-3.4972783.022735-2.642172-2.772490-3.668369-2.625105
...........................
Thiamin-3.404889-2.773089-2.982110-3.403379-2.388275-2.444364-2.939826-2.378889
Spermidine-3.404889-2.445104-2.992604-3.250518-2.374584-2.445049-2.936479-2.376260
Fe2+-2.838896-2.091063-2.203376-2.370595-1.778976-1.820472-2.611296-1.668659
Putrescine-3.404889-2.848543-3.087577-3.295817-2.353273-2.438953-2.912851-2.400161
Menaquinone 8-2.873638-3.295815-2.979719-3.351557-2.276149-2.471199-3.185775-2.625105
\n", + "

647 rows Γ— 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 103 + } + ], + "source": [ + "from micom.measures import consumption_rates\n", + "import numpy as np\n", + "no_cdiff_growth = load_results(\"treasure_chest/no-cdiff-growth.zip\") # Load growth results with no C. diff invasion\n", + "exchanges = consumption_rates(no_cdiff_growth) # extract consumption rates\n", + "exchanges=pd.merge(exchanges,metadata,on='sample_id') # add metadata\n", + "exchanges = pd.pivot_table( # convert to a matrix of samples vs. metabolites\n", + " exchanges, # that contains the production rates\n", + " index = ['disease_state','sample_id'],\n", + " columns = 'name',\n", + " values = 'flux'\n", + ")\n", + "exchanges = exchanges.T.fillna(0.0) # if a metabolite is not produced its flux is zero\n", + "exchanges = exchanges.apply( # ...and a CLR transform again, normalizes the fluxes\n", + " lambda xs: np.log(xs + 0.001) - np.log(xs.mean() + 0.001),\n", + " axis=0)\n", + "exchanges = exchanges.reindex( # sort by variance, highest variance fluxes first\n", + " exchanges.var(axis = 1).sort_values(ascending=False).index\n", + ")\n", + "\n", + "exchanges" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nYuc7Wu38nYd" + }, + "source": [ + "We can use seaborn to plot our heatmap:" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "id": "YHnKeFuF3qAt", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "06c5b995-06e9-48ca-fd7e-25e5998af6e0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 104 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "sns.clustermap(\n", + " exchanges.head(50), # take 50 highest fluxes\n", + " cmap = 'viridis',\n", + " yticklabels = True, # show all metabolite names\n", + " figsize = (8, 12) # size of the heatmap\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1i2LStZiUst" + }, + "source": [ + "We can see here that the disease context is important - there are significant differences in consumption rates between the healthy and pre-fmt microbiomes. These differences may why C. diff can exploit the pre-FMT microbiomes and achieve higher predicted growth rates." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l8maOr3w2bOo" + }, + "source": [ + "# 🏫 Exercises" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Time for you to try your hand at some analysis, lets take a closer looks at the metabolic strategies used by C. diff" + ], + "metadata": { + "id": "XZt3ojssnCzz" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VjUustQG2_bX" + }, + "source": [ + "## Metabolic strategies used by C. diff\n", + "We've alread looked at the community wide consumption fluxes in the absencence of C. diff and found that they differ between disease contexts. What about the import fluxes of C. diff specifically? Can you develop a visualization to look at those?" + ] + }, + { + "cell_type": "code", + "source": [ + "# Your code here" + ], + "metadata": { + "id": "zrUZSSp62uUt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CsqIRTbC7doD" + }, + "source": [ + "# πŸ”΅ Addendum\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hycoXNTi5xsH" + }, + "source": [ + "## Choosing a tradeoff value\n", + "\n", + "Even if you don't have growth rates available you can still use your data to choose a decent tradeoff value. This can be done by choosing the largest tradeoff value that still allows growth for the majority of the taxa that you observed in the sample (if they are present at an appreciable abundance, they should be able to grow). This can be done with the `tradeoff` workflow in MICOM that will run cooperative tradeoff with varying tradeoff values, which can be visualized with the `plot_tradeoff` function." + ] + }, + { + "cell_type": "code", + "source": [ + "manifest" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 454 + }, + "id": "I8NnkbnymxoE", + "outputId": "41feea98-0cfb-4fe5-c0d6-3fa11631782e" + }, + "execution_count": 123, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 ERR1883195 12.0 2011-10-24 26 \n", + "1 ERR1883207 24.0 2012-01-12 44 \n", + "2 ERR1883212 30.0 2012-10-10 135 \n", + "3 ERR1883214 32.0 2011-07-26 0 \n", + "4 ERR1883225 43.0 2011-07-26 54 \n", + "5 ERR1883240 58.0 2012-02-14 pre-FMT \n", + "6 ERR1883250 68.0 2011-12-23 pre-FMT \n", + "7 ERR1883294 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "0 Donor 11 healthy Restricted access years \n", + "1 Donor 12 healthy Restricted access years \n", + "2 Donor 14 healthy Restricted access years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "4 Donor CD1 healthy Restricted access years \n", + "5 CD9 pre-FMT Pre-FMT 47 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height ... host_subject_id \\\n", + "0 Restricted access Restricted access ... Donor \n", + "1 Restricted access Restricted access ... Donor \n", + "2 Restricted access Restricted access ... Donor \n", + "3 29.3 165.1 ... CD1 \n", + "4 Restricted access Restricted access ... Donor \n", + "5 35.5 1.55 ... CD9 \n", + "6 34.4 1.56 ... CD13 \n", + "7 32.5 1.727 ... CD3 \n", + "\n", + " host_weight host_weight_units race sex \\\n", + "0 Restricted access kg Restricted access Restricted access \n", + "1 Restricted access kg Restricted access Restricted access \n", + "2 Restricted access kg Restricted access Restricted access \n", + "3 80.1 kg white female \n", + "4 Restricted access kg Restricted access Restricted access \n", + "5 85.1 kg white female \n", + "6 83.9 kg white female \n", + "7 97.3 kg white male \n", + "\n", + " file found_taxa total_taxa found_fraction \\\n", + "0 ERR1883195.pickle 4.0 4.0 1.000000 \n", + "1 ERR1883207.pickle 6.0 6.0 1.000000 \n", + "2 ERR1883212.pickle 5.0 6.0 0.833333 \n", + "3 ERR1883214.pickle 2.0 2.0 1.000000 \n", + "4 ERR1883225.pickle 4.0 4.0 1.000000 \n", + "5 ERR1883240.pickle 3.0 3.0 1.000000 \n", + "6 ERR1883250.pickle 4.0 5.0 0.800000 \n", + "7 ERR1883294.pickle 4.0 6.0 0.666667 \n", + "\n", + " found_abundance_fraction \n", + "0 1.000000 \n", + "1 1.000000 \n", + "2 0.884172 \n", + "3 1.000000 \n", + "4 1.000000 \n", + "5 1.000000 \n", + "6 0.935141 \n", + "7 0.444919 \n", + "\n", + "[8 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_height...host_subject_idhost_weighthost_weight_unitsracesexfilefound_taxatotal_taxafound_fractionfound_abundance_fraction
0ERR188319512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883195.pickle4.04.01.0000001.000000
1ERR188320724.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883207.pickle6.06.01.0000001.000000
2ERR188321230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883212.pickle5.06.00.8333330.884172
3ERR188321432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1...CD180.1kgwhitefemaleERR1883214.pickle2.02.01.0000001.000000
4ERR188322543.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883225.pickle4.04.01.0000001.000000
5ERR188324058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55...CD985.1kgwhitefemaleERR1883240.pickle3.03.01.0000001.000000
6ERR188325068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56...CD1383.9kgwhitefemaleERR1883250.pickle4.05.00.8000000.935141
7ERR1883294112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727...CD397.3kgwhitemaleERR1883294.pickle4.06.00.6666670.444919
\n", + "

8 rows Γ— 21 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 123 + } + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": { + "id": "8_1jesZTHYra", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51, + "referenced_widgets": [ + "d450a2f0b9de425990f12bc7a5631782", + "af7b0c394a064469a6f7e8216b197336" + ] + }, + "outputId": "b86ed405-c9b6-499c-b124-0915e21af8fa" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "d450a2f0b9de425990f12bc7a5631782" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 149 + } + ], + "source": [ + "from micom.workflows import tradeoff\n", + "import micom\n", + "\n", + "tradeoff_results = tradeoff(manifest, \"models\", medium, threads=2)\n", + "tradeoff_results.to_csv(\"tradeoff.csv\", index=False)\n", + "\n", + "plot_tradeoff(tradeoff_results, tolerance=1e-4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y9703vhK6d6c" + }, + "source": [ + "After opeing `tradeoff_[DATE].html` you will see that, for our example here, all tradeoff values work great. This is because we modeled very few taxa, which keeps the compettion down. If you would allow for fewer abundant taxa in the models, this would change drastically. For instance, here is an example from a colorectal cancer data set:\n", + "\n", + "[![tradeoff example](https://micom-dev.github.io/micom/_images/tradeoff.png)](https://micom-dev.github.io/micom/_static/tradeoff.html)\n", + "\n", + "You can see how not using the cooperative tradeoff would give you nonsense results where only 10% of all observed taxa grew. A tradeoff value of 0.6-0.8 would probably be a good choice for this particular data set." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "c991a7ed881363492957ff225bb30af9d5174cd8515a21cbef71fcaa303e4050" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "37b0134de4bd4fe38c1afe7bc2bbdc59": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_bc2776a5195f4a8c8dda0e22e2fd9ea3", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:05:08\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:05:08\n
\n" + }, + "metadata": {} + } + ] + } + }, + "bc2776a5195f4a8c8dda0e22e2fd9ea3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "59428de9a88b4204885d03de441cbde3": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_901fb7e8d8c844a8b2e86735ca2bead3", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:09:32\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:09:32\n
\n" + }, + "metadata": {} + } + ] + } + }, + "901fb7e8d8c844a8b2e86735ca2bead3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d450a2f0b9de425990f12bc7a5631782": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_af7b0c394a064469a6f7e8216b197336", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:32:42\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:32:42\n
\n" + }, + "metadata": {} + } + ] + } + }, + "af7b0c394a064469a6f7e8216b197336": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/micom_2023_solutions.ipynb b/micom_2023_solutions.ipynb new file mode 100644 index 0000000..ec7efaa --- /dev/null +++ b/micom_2023_solutions.ipynb @@ -0,0 +1,9112 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C0vqP4LJ9y6K" + }, + "source": [ + "# 🧫🦠 Modeling microbiota-wide metabolism with MICOM\n", + "\n", + "This notebook will accompany the second session of the 2023 ISB Microbiome Course. The presentation slides can be [found here](https://gibbons-lab.github.io/isb_course_2023/micom).\n", + "\n", + "You can save your own local copy of this notebook by using `File > Save a copy in Drive`. You may be promted to cetify the notebook is safe. We promise that it is 🀞\n", + "\n", + "**Disclaimer:**\n", + "The linear and quadratic programming problems MICOM has to solve are very large and very complicated. There are some very good commercial solvers that are very expensive (even though they are often free for academic use). To make this tutorial as accessible as possible we will use the Open Source solver [OSQP](https://osqp.org/), which is installed along with MICOM. OSQP is amazing with quadratic programming problems (kudos!) but not as accurate for linear problems. Solvers usually only guarantee a solution within a certain numerical tolerance of the real solution. In order to make everything work with OSQP this tolerance has to be relaxed to about 10-3. This means that any result with an absolute value smaller than that might very well be zero so we should look at larger values only. Installing cost-free academic versions of commercial solvers like [IBM CPLEX](https://www.ibm.com/analytics/cplex-optimizer) or [Gurobi](https://www.gurobi.com/) would alow you to lower the tolerance to 10-6.\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qgBBl4GtuTuX" + }, + "source": [ + "# πŸ“ Setup\n", + "\n", + "MICOM installation is is usually pretty straight-forward and can be as easy as typing `pip install micom` into your Terminal.\n", + "\n", + "First let's start by downloading the materials again and switching to the folder." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ckON4xr3_bW5" + }, + "outputs": [], + "source": [ + "!git clone https://github.com/gibbons-lab/isb_course_2023 materials\n", + "!cd materials\n", + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "is6fmBUeorwv" + }, + "source": [ + "## Basic Installation\n", + "\n", + "Installing MICOM is straight-forward in Python. OSQP itself will be installed automatically along with it." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "M_TeC5yrst3h", + "outputId": "c3cb59af-6caf-4eda-f0f4-b20f4d19ae49" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m832.5/832.5 kB\u001b[0m \u001b[31m9.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m31.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m37.4/37.4 MB\u001b[0m \u001b[31m22.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.7/75.7 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m138.3/138.3 kB\u001b[0m \u001b[31m12.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.0/8.0 MB\u001b[0m \u001b[31m71.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.9/112.9 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.0/76.0 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m526.7/526.7 kB\u001b[0m \u001b[31m28.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m4.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDone! πŸŽ‰ \n" + ] + } + ], + "source": [ + "!pip install -q micom\n", + "\n", + "print(\"Done! πŸŽ‰ \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6oJrxxz6tV9T" + }, + "source": [ + "## Enable QIIME 2 interactions\n", + "\n", + "Before we start, we also need to install packages to read the \"biom\" file format used by QIIME 2 to save tables. This is only necessary if you want to read QIIME 2 FeatureTable artifacts (like the ones we constructed yesterday)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "5rZX7SK_toLp", + "outputId": "9cc5af18-c4de-486e-cb03-8f01bacf92a5" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.0/12.0 MB\u001b[0m \u001b[31m41.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDone! πŸŽ‰ \n" + ] + } + ], + "source": [ + "!pip install -q numpy Cython biom-format\n", + "\n", + "print(\"Done! πŸŽ‰ \")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oPy1f-WLI0lZ" + }, + "source": [ + "Okay, all done. So let's get started building some models πŸ¦ΊπŸ› d😁.\n", + "\n", + "# πŸ’» MICOM\n", + "\n", + "We will use the Python interface to MICOM since it plays nicely with Colaboratory. However, you could run the same steps within the QIIME 2 MICOM plugin ([q2-micom](https://library.qiime2.org/plugins/q2-micom/26/)).\n", + "\n", + "Here is an overview of all the steps and functions across both interfaces:\n", + "![micom overview](https://github.com/micom-dev/q2-micom/raw/706f583a060b91c12c0cec7acea2354fdd0dd320/docs/assets/overview.png)\n", + "\n", + "The process of building a metabolic model in MICOM begins with constructing a combined abundance/taxonomy table, referred to hereafter as a taxonomy table. Let's load a sample taxonomy table to see what it looks like:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "UV9SObSQkSZh", + "outputId": "2b3f71ac-8873-45fb-9352-961177f76c6a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " id genus species reactions \\\n", + "0 Escherichia_coli_1 Escherichia Escherichia coli 0 95 \n", + "1 Escherichia_coli_2 Escherichia Escherichia coli 1 95 \n", + "2 Escherichia_coli_3 Escherichia Escherichia coli 2 95 \n", + "3 Escherichia_coli_4 Escherichia Escherichia coli 3 95 \n", + "0 Escherichia_coli_1 Escherichia Escherichia coli 0 95 \n", + "\n", + " metabolites sample_id abundance \n", + "0 72 sample_1 96 \n", + "1 72 sample_1 48 \n", + "2 72 sample_1 708 \n", + "3 72 sample_1 622 \n", + "0 72 sample_2 120 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgenusspeciesreactionsmetabolitessample_idabundance
0Escherichia_coli_1EscherichiaEscherichia coli 09572sample_196
1Escherichia_coli_2EscherichiaEscherichia coli 19572sample_148
2Escherichia_coli_3EscherichiaEscherichia coli 29572sample_1708
3Escherichia_coli_4EscherichiaEscherichia coli 39572sample_1622
0Escherichia_coli_1EscherichiaEscherichia coli 09572sample_2120
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 51 + } + ], + "source": [ + "from micom.data import test_data\n", + "\n", + "test_data().head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AEk7yfd1lbYp" + }, + "source": [ + "In this taxonomy table, we see four identical strains of _E. coli_ (1 through 4), across two samples (sample_1 and sample_2). We can see that each row represents a single taxon in a single sample, and the `abundance` column identifies the abundance of that taxon in the sample.\n", + "\n", + "The `id` column specifies identifiers for the taxa and should be expressive and not include spaces or special characters. Since we are using a taxonomy database to build our models (more on that soon), we don't need a `file` column.\n", + "\n", + "You might notice that this dataframe looks very different from what we generated in yesterday's tutorial, where we ended up with separate QIIME 2 artifacts 😱\n", + "\n", + "No worries, we can deal with that.\n", + "\n", + "## Importing data from QIIME 2\n", + "\n", + "MICOM can read QIIME 2 artifacts. You don't even need to have QIIME 2 installed for that! But before we do so, let's resolve one issue. We discussed that MICOM summarizes genome-scale models into pangenome-scale models as a first step, but our data are on the ASV level...so how will we know what to summarize?\n", + "\n", + "Basically, a specific model database can be used to quickly summarize pangenome-scale models for use within MICOM. So, before we read our data we have to decide which model database to use. We will go with the [AGORA database](https://pubmed.ncbi.nlm.nih.gov/27893703/), which is a curated database of more than 800 bacterial strains that commonly live in the human gut. In particular, we will use a version of this database summarized on the genus rank which can be downloaded from the [MICOM data repository](https://doi.org/10.5281/zenodo.3755182), which contains a whole lot of prebuilt databases. This database is available from the materials folder that we previously cloned." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_57iya0D3L6-" + }, + "source": [ + "Now we're all set to start building models! The data we previously collected can be found in the `treasure_chest` folder, so we can use those files to build our taxonomy for MICOM." + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "id": "o0vBAiiqqPLC" + }, + "outputs": [], + "source": [ + "from micom.taxonomy import qiime_to_micom\n", + "\n", + "tax = qiime_to_micom(\n", + " \"dada2/table.qza\",\n", + " \"taxa.qza\",\n", + " collapse_on=\"genus\"\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "tax.sample_id.unique()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "TkOm89mQbwXa", + "outputId": "a6093f5e-a2c8-4bf3-9e3f-bb925b81bf98" + }, + "execution_count": 53, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array(['ERR1883195', 'ERR1883207', 'ERR1883214', 'ERR1883240',\n", + " 'ERR1883250', 'ERR1883212', 'ERR1883225', 'ERR1883294'],\n", + " dtype=object)" + ] + }, + "metadata": {}, + "execution_count": 53 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-TQ6Zp7wouk4" + }, + "source": [ + "Notice the `collapse_on` argument. That will specify the rank on which to sumarize and can be a list of several ranks. When matching taxonomy you can either match by the particular rank of interest (for example, just comparing genus names here), or you could compare the entire taxonomy, which will require all taxonomic ranks prior to the target rank to match. For that you cloud specify `collapse_on=[\"kingdom\", \"phylum\", \"class\", \"order\", \"family\", \"genus\"]`.\n", + "\n", + "Taxonomic names will often not match 100% between databases. For instance, the genus name \"Prevotella\" in one database may be \"Prevotella_6\" in another. The more ranks you use for matching the more likely you are to run into these issues. However, the more taxonomic ranks you use to match the more confident you can be that your observed taxon really is the same taxon as the one in the model database.\n", + "\n", + "The resulting table will contain the same abundances but it will include more ranks if `collapse_on` is a list. All ranks present in the taxonomy will be used when matching to the database. We will stick with the \"lax\" option of only matching on genus ranks.\n", + "\n", + "Let's now take a look at the taxonomy table we generated:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R5zwLVFcNTbq" + }, + "source": [ + "That looks more like the example! Again, we have a row for each taxon in each sample, so we're good to go.\n", + "\n", + "One helpful thing to do is to merge in our metadata, so we'll have it at hand for the following steps. In our case, the metadata will include the ethnic group, region, and subsistence type of each of the study participants." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 548 + }, + "id": "X9hqoO4go0h1", + "outputId": "6d3ff6a9-7c37-4c52-ff4f-5ae39aef4cd7", + "collapsed": true + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id abundance genus id relative \\\n", + "0 ERR1883195 6.0 Akkermansia Akkermansia 0.000116 \n", + "1 ERR1883195 17971.0 Phocaeicola Phocaeicola 0.346522 \n", + "2 ERR1883195 5.0 Veillonella Veillonella 0.000096 \n", + "3 ERR1883195 4741.0 Faecalibacterium Faecalibacterium 0.091417 \n", + "4 ERR1883195 576.0 Mediterraneibacter Mediterraneibacter 0.011107 \n", + ".. ... ... ... ... ... \n", + "344 ERR1883294 5.0 Stomatobaculum Stomatobaculum 0.000926 \n", + "345 ERR1883294 3.0 Lancefieldella Lancefieldella 0.000556 \n", + "346 ERR1883294 3.0 Tumebacillus Tumebacillus 0.000556 \n", + "347 ERR1883294 3.0 Tropheryma Tropheryma 0.000556 \n", + "348 ERR1883294 2.0 Mogibacterium Mogibacterium 0.000370 \n", + "\n", + " Unnamed: 0 collection_timestamp day_relative_to_fmt description \\\n", + "0 12 2011-10-24 26 Donor 11 \n", + "1 12 2011-10-24 26 Donor 11 \n", + "2 12 2011-10-24 26 Donor 11 \n", + "3 12 2011-10-24 26 Donor 11 \n", + "4 12 2011-10-24 26 Donor 11 \n", + ".. ... ... ... ... \n", + "344 112 2011-09-29 0 Day 0 CD3 \n", + "345 112 2011-09-29 0 Day 0 CD3 \n", + "346 112 2011-09-29 0 Day 0 CD3 \n", + "347 112 2011-09-29 0 Day 0 CD3 \n", + "348 112 2011-09-29 0 Day 0 CD3 \n", + "\n", + " disease_state host_age host_age_units host_body_mass_index \\\n", + "0 healthy Restricted access years Restricted access \n", + "1 healthy Restricted access years Restricted access \n", + "2 healthy Restricted access years Restricted access \n", + "3 healthy Restricted access years Restricted access \n", + "4 healthy Restricted access years Restricted access \n", + ".. ... ... ... ... \n", + "344 Pre-FMT 61 years 32.5 \n", + "345 Pre-FMT 61 years 32.5 \n", + "346 Pre-FMT 61 years 32.5 \n", + "347 Pre-FMT 61 years 32.5 \n", + "348 Pre-FMT 61 years 32.5 \n", + "\n", + " host_height host_height_units host_subject_id host_weight \\\n", + "0 Restricted access m Donor Restricted access \n", + "1 Restricted access m Donor Restricted access \n", + "2 Restricted access m Donor Restricted access \n", + "3 Restricted access m Donor Restricted access \n", + "4 Restricted access m Donor Restricted access \n", + ".. ... ... ... ... \n", + "344 1.727 m CD3 97.3 \n", + "345 1.727 m CD3 97.3 \n", + "346 1.727 m CD3 97.3 \n", + "347 1.727 m CD3 97.3 \n", + "348 1.727 m CD3 97.3 \n", + "\n", + " host_weight_units race sex \n", + "0 kg Restricted access Restricted access \n", + "1 kg Restricted access Restricted access \n", + "2 kg Restricted access Restricted access \n", + "3 kg Restricted access Restricted access \n", + "4 kg Restricted access Restricted access \n", + ".. ... ... ... \n", + "344 kg white male \n", + "345 kg white male \n", + "346 kg white male \n", + "347 kg white male \n", + "348 kg white male \n", + "\n", + "[349 rows x 20 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idabundancegenusidrelativeUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
0ERR18831956.0AkkermansiaAkkermansia0.000116122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
1ERR188319517971.0PhocaeicolaPhocaeicola0.346522122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
2ERR18831955.0VeillonellaVeillonella0.000096122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
3ERR18831954741.0FaecalibacteriumFaecalibacterium0.091417122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
4ERR1883195576.0MediterraneibacterMediterraneibacter0.011107122011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
...............................................................
344ERR18832945.0StomatobaculumStomatobaculum0.0009261122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
345ERR18832943.0LancefieldellaLancefieldella0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
346ERR18832943.0TumebacillusTumebacillus0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
347ERR18832943.0TropherymaTropheryma0.0005561122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
348ERR18832942.0MogibacteriumMogibacterium0.0003701122011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
\n", + "

349 rows Γ— 20 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 55 + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "metadata = pd.read_table(\"metadata.tsv\").rename(columns={\"id\": \"sample_id\"})\n", + "tax = pd.merge(tax, metadata, on=\"sample_id\")\n", + "tax" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Ok, now we want to invade our samples with C. diff. The goal is to predict susceptibility to invasion and see how disease context can influence predicted engraftment. To do this we will introduce 10% C. diff to all the samples! You can read more about this approach and its applications [here](https://www.biorxiv.org/content/10.1101/2023.04.28.538771v2) " + ], + "metadata": { + "id": "ZsFgo_CRjvsr" + } + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "vHyvSZJJ-iIi", + "outputId": "0f934995-8bd5-4c03-ddda-491fb5d5bd4f" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id abundance genus id \\\n", + "1 ERR1883195 17971.000000 Phocaeicola Phocaeicola \n", + "3 ERR1883195 4741.000000 Faecalibacterium Faecalibacterium \n", + "8 ERR1883195 6805.000000 Bacteroides Bacteroides \n", + "11 ERR1883195 3076.000000 Parabacteroides Parabacteroides \n", + "5 ERR1883195 3621.444444 Clostridioides Clostridioides \n", + "71 ERR1883207 16983.000000 Phocaeicola Phocaeicola \n", + "73 ERR1883207 3717.000000 Faecalibacterium Faecalibacterium \n", + "77 ERR1883207 2956.000000 Barnesiella Barnesiella \n", + "78 ERR1883207 6984.000000 Bacteroides Bacteroides \n", + "79 ERR1883207 2901.000000 Blautia Blautia \n", + "81 ERR1883207 3413.000000 Parabacteroides Parabacteroides \n", + "7 ERR1883207 4106.000000 Clostridioides Clostridioides \n", + "222 ERR1883212 8393.000000 Phocaeicola Phocaeicola \n", + "224 ERR1883212 10841.000000 Faecalibacterium Faecalibacterium \n", + "227 ERR1883212 4038.000000 Fusicatenibacter Fusicatenibacter \n", + "229 ERR1883212 3286.000000 Bacteroides Bacteroides \n", + "230 ERR1883212 3198.000000 Blautia Blautia \n", + "234 ERR1883212 5106.000000 Streptococcus Streptococcus \n", + "7 ERR1883212 3873.555556 Clostridioides Clostridioides \n", + "152 ERR1883214 54726.000000 Akkermansia Akkermansia \n", + "161 ERR1883214 3968.000000 Clostridium Clostridium \n", + "3 ERR1883214 6521.555556 Clostridioides Clostridioides \n", + "291 ERR1883225 3734.000000 Phocaeicola Phocaeicola \n", + "295 ERR1883225 492.000000 Barnesiella Barnesiella \n", + "296 ERR1883225 1475.000000 Bacteroides Bacteroides \n", + "299 ERR1883225 911.000000 Parabacteroides Parabacteroides \n", + "5 ERR1883225 734.666667 Clostridioides Clostridioides \n", + "174 ERR1883240 35554.000000 Akkermansia Akkermansia \n", + "176 ERR1883240 15517.000000 Veillonella Veillonella \n", + "180 ERR1883240 5555.000000 Fusobacterium Fusobacterium \n", + "4 ERR1883240 6291.777778 Clostridioides Clostridioides \n", + "188 ERR1883250 11467.000000 Akkermansia Akkermansia \n", + "190 ERR1883250 9955.000000 Mediterraneibacter Mediterraneibacter \n", + "191 ERR1883250 11923.000000 Erysipelatoclostridium Erysipelatoclostridium \n", + "193 ERR1883250 13975.000000 Blautia Blautia \n", + "196 ERR1883250 3282.000000 Ruthenibacterium Ruthenibacterium \n", + "6 ERR1883250 5622.444444 Clostridioides Clostridioides \n", + "321 ERR1883294 619.000000 Veillonella Veillonella \n", + "325 ERR1883294 591.000000 Fusobacterium Fusobacterium \n", + "329 ERR1883294 1866.000000 Anaerosinus Anaerosinus \n", + "330 ERR1883294 580.000000 Megasphaera Megasphaera \n", + "332 ERR1883294 734.000000 Phytobacter Phytobacter \n", + "333 ERR1883294 294.000000 Morganella Morganella \n", + "7 ERR1883294 520.444444 Clostridioides Clostridioides \n", + "\n", + " relative Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "1 0.496238 12.0 2011-10-24 26 \n", + "3 0.130915 12.0 2011-10-24 26 \n", + "8 0.187908 12.0 2011-10-24 26 \n", + "11 0.084938 12.0 2011-10-24 26 \n", + "5 0.100000 12.0 2011-10-24 26 \n", + "71 0.413614 24.0 2012-01-12 44 \n", + "73 0.090526 24.0 2012-01-12 44 \n", + "77 0.071992 24.0 2012-01-12 44 \n", + "78 0.170093 24.0 2012-01-12 44 \n", + "79 0.070653 24.0 2012-01-12 44 \n", + "81 0.083122 24.0 2012-01-12 44 \n", + "7 0.100000 24.0 2012-01-12 44 \n", + "222 0.216674 30.0 2012-10-10 135 \n", + "224 0.279872 30.0 2012-10-10 135 \n", + "227 0.104245 30.0 2012-10-10 135 \n", + "229 0.084832 30.0 2012-10-10 135 \n", + "230 0.082560 30.0 2012-10-10 135 \n", + "234 0.131817 30.0 2012-10-10 135 \n", + "7 0.100000 30.0 2012-10-10 135 \n", + "152 0.839156 32.0 2011-07-26 0 \n", + "161 0.060844 32.0 2011-07-26 0 \n", + "3 0.100000 32.0 2011-07-26 0 \n", + "291 0.508258 43.0 2011-07-26 54 \n", + "295 0.066969 43.0 2011-07-26 54 \n", + "296 0.200771 43.0 2011-07-26 54 \n", + "299 0.124002 43.0 2011-07-26 54 \n", + "5 0.100000 43.0 2011-07-26 54 \n", + "174 0.565087 58.0 2012-02-14 pre-FMT \n", + "176 0.246623 58.0 2012-02-14 pre-FMT \n", + "180 0.088290 58.0 2012-02-14 pre-FMT \n", + "4 0.100000 58.0 2012-02-14 pre-FMT \n", + "188 0.203950 68.0 2011-12-23 pre-FMT \n", + "190 0.177058 68.0 2011-12-23 pre-FMT \n", + "191 0.212061 68.0 2011-12-23 pre-FMT \n", + "193 0.248557 68.0 2011-12-23 pre-FMT \n", + "196 0.058373 68.0 2011-12-23 pre-FMT \n", + "6 0.100000 68.0 2011-12-23 pre-FMT \n", + "321 0.118937 112.0 2011-09-29 0 \n", + "325 0.113557 112.0 2011-09-29 0 \n", + "329 0.358540 112.0 2011-09-29 0 \n", + "330 0.111443 112.0 2011-09-29 0 \n", + "332 0.141033 112.0 2011-09-29 0 \n", + "333 0.056490 112.0 2011-09-29 0 \n", + "7 0.100000 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "1 Donor 11 healthy Restricted access years \n", + "3 Donor 11 healthy Restricted access years \n", + "8 Donor 11 healthy Restricted access years \n", + "11 Donor 11 healthy Restricted access years \n", + "5 Donor 11 healthy Restricted access years \n", + "71 Donor 12 healthy Restricted access years \n", + "73 Donor 12 healthy Restricted access years \n", + "77 Donor 12 healthy Restricted access years \n", + "78 Donor 12 healthy Restricted access years \n", + "79 Donor 12 healthy Restricted access years \n", + "81 Donor 12 healthy Restricted access years \n", + "7 Donor 12 healthy Restricted access years \n", + "222 Donor 14 healthy Restricted access years \n", + "224 Donor 14 healthy Restricted access years \n", + "227 Donor 14 healthy Restricted access years \n", + "229 Donor 14 healthy Restricted access years \n", + "230 Donor 14 healthy Restricted access years \n", + "234 Donor 14 healthy Restricted access years \n", + "7 Donor 14 healthy Restricted access years \n", + "152 Day 0 CD1 Pre-FMT 39 years \n", + "161 Day 0 CD1 Pre-FMT 39 years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "291 Donor CD1 healthy Restricted access years \n", + "295 Donor CD1 healthy Restricted access years \n", + "296 Donor CD1 healthy Restricted access years \n", + "299 Donor CD1 healthy Restricted access years \n", + "5 Donor CD1 healthy Restricted access years \n", + "174 CD9 pre-FMT Pre-FMT 47 years \n", + "176 CD9 pre-FMT Pre-FMT 47 years \n", + "180 CD9 pre-FMT Pre-FMT 47 years \n", + "4 CD9 pre-FMT Pre-FMT 47 years \n", + "188 CD13 pre-FMT Pre-FMT 53 years \n", + "190 CD13 pre-FMT Pre-FMT 53 years \n", + "191 CD13 pre-FMT Pre-FMT 53 years \n", + "193 CD13 pre-FMT Pre-FMT 53 years \n", + "196 CD13 pre-FMT Pre-FMT 53 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "321 Day 0 CD3 Pre-FMT 61 years \n", + "325 Day 0 CD3 Pre-FMT 61 years \n", + "329 Day 0 CD3 Pre-FMT 61 years \n", + "330 Day 0 CD3 Pre-FMT 61 years \n", + "332 Day 0 CD3 Pre-FMT 61 years \n", + "333 Day 0 CD3 Pre-FMT 61 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height host_height_units host_subject_id \\\n", + "1 Restricted access Restricted access m Donor \n", + "3 Restricted access Restricted access m Donor \n", + "8 Restricted access Restricted access m Donor \n", + "11 Restricted access Restricted access m Donor \n", + "5 Restricted access Restricted access m Donor \n", + "71 Restricted access Restricted access m Donor \n", + "73 Restricted access Restricted access m Donor \n", + "77 Restricted access Restricted access m Donor \n", + "78 Restricted access Restricted access m Donor \n", + "79 Restricted access Restricted access m Donor \n", + "81 Restricted access Restricted access m Donor \n", + "7 Restricted access Restricted access m Donor \n", + "222 Restricted access Restricted access m Donor \n", + "224 Restricted access Restricted access m Donor \n", + "227 Restricted access Restricted access m Donor \n", + "229 Restricted access Restricted access m Donor \n", + "230 Restricted access Restricted access m Donor \n", + "234 Restricted access Restricted access m Donor \n", + "7 Restricted access Restricted access m Donor \n", + "152 29.3 165.1 m CD1 \n", + "161 29.3 165.1 m CD1 \n", + "3 29.3 165.1 m CD1 \n", + "291 Restricted access Restricted access m Donor \n", + "295 Restricted access Restricted access m Donor \n", + "296 Restricted access Restricted access m Donor \n", + "299 Restricted access Restricted access m Donor \n", + "5 Restricted access Restricted access m Donor \n", + "174 35.5 1.55 m CD9 \n", + "176 35.5 1.55 m CD9 \n", + "180 35.5 1.55 m CD9 \n", + "4 35.5 1.55 m CD9 \n", + "188 34.4 1.56 m CD13 \n", + "190 34.4 1.56 m CD13 \n", + "191 34.4 1.56 m CD13 \n", + "193 34.4 1.56 m CD13 \n", + "196 34.4 1.56 m CD13 \n", + "6 34.4 1.56 m CD13 \n", + "321 32.5 1.727 m CD3 \n", + "325 32.5 1.727 m CD3 \n", + "329 32.5 1.727 m CD3 \n", + "330 32.5 1.727 m CD3 \n", + "332 32.5 1.727 m CD3 \n", + "333 32.5 1.727 m CD3 \n", + "7 32.5 1.727 m CD3 \n", + "\n", + " host_weight host_weight_units race sex \n", + "1 Restricted access kg Restricted access Restricted access \n", + "3 Restricted access kg Restricted access Restricted access \n", + "8 Restricted access kg Restricted access Restricted access \n", + "11 Restricted access kg Restricted access Restricted access \n", + "5 Restricted access kg Restricted access Restricted access \n", + "71 Restricted access kg Restricted access Restricted access \n", + "73 Restricted access kg Restricted access Restricted access \n", + "77 Restricted access kg Restricted access Restricted access \n", + "78 Restricted access kg Restricted access Restricted access \n", + "79 Restricted access kg Restricted access Restricted access \n", + "81 Restricted access kg Restricted access Restricted access \n", + "7 Restricted access kg Restricted access Restricted access \n", + "222 Restricted access kg Restricted access Restricted access \n", + "224 Restricted access kg Restricted access Restricted access \n", + "227 Restricted access kg Restricted access Restricted access \n", + "229 Restricted access kg Restricted access Restricted access \n", + "230 Restricted access kg Restricted access Restricted access \n", + "234 Restricted access kg Restricted access Restricted access \n", + "7 Restricted access kg Restricted access Restricted access \n", + "152 80.1 kg white female \n", + "161 80.1 kg white female \n", + "3 80.1 kg white female \n", + "291 Restricted access kg Restricted access Restricted access \n", + "295 Restricted access kg Restricted access Restricted access \n", + "296 Restricted access kg Restricted access Restricted access \n", + "299 Restricted access kg Restricted access Restricted access \n", + "5 Restricted access kg Restricted access Restricted access \n", + "174 85.1 kg white female \n", + "176 85.1 kg white female \n", + "180 85.1 kg white female \n", + "4 85.1 kg white female \n", + "188 83.9 kg white female \n", + "190 83.9 kg white female \n", + "191 83.9 kg white female \n", + "193 83.9 kg white female \n", + "196 83.9 kg white female \n", + "6 83.9 kg white female \n", + "321 97.3 kg white male \n", + "325 97.3 kg white male \n", + "329 97.3 kg white male \n", + "330 97.3 kg white male \n", + "332 97.3 kg white male \n", + "333 97.3 kg white male \n", + "7 97.3 kg white male " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idabundancegenusidrelativeUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
1ERR188319517971.000000PhocaeicolaPhocaeicola0.49623812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
3ERR18831954741.000000FaecalibacteriumFaecalibacterium0.13091512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
8ERR18831956805.000000BacteroidesBacteroides0.18790812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
11ERR18831953076.000000ParabacteroidesParabacteroides0.08493812.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
5ERR18831953621.444444ClostridioidesClostridioides0.10000012.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
71ERR188320716983.000000PhocaeicolaPhocaeicola0.41361424.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
73ERR18832073717.000000FaecalibacteriumFaecalibacterium0.09052624.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
77ERR18832072956.000000BarnesiellaBarnesiella0.07199224.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
78ERR18832076984.000000BacteroidesBacteroides0.17009324.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
79ERR18832072901.000000BlautiaBlautia0.07065324.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
81ERR18832073413.000000ParabacteroidesParabacteroides0.08312224.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
7ERR18832074106.000000ClostridioidesClostridioides0.10000024.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
222ERR18832128393.000000PhocaeicolaPhocaeicola0.21667430.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
224ERR188321210841.000000FaecalibacteriumFaecalibacterium0.27987230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
227ERR18832124038.000000FusicatenibacterFusicatenibacter0.10424530.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
229ERR18832123286.000000BacteroidesBacteroides0.08483230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
230ERR18832123198.000000BlautiaBlautia0.08256030.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
234ERR18832125106.000000StreptococcusStreptococcus0.13181730.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
7ERR18832123873.555556ClostridioidesClostridioides0.10000030.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
152ERR188321454726.000000AkkermansiaAkkermansia0.83915632.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
161ERR18832143968.000000ClostridiumClostridium0.06084432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
3ERR18832146521.555556ClostridioidesClostridioides0.10000032.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1mCD180.1kgwhitefemale
291ERR18832253734.000000PhocaeicolaPhocaeicola0.50825843.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
295ERR1883225492.000000BarnesiellaBarnesiella0.06696943.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
296ERR18832251475.000000BacteroidesBacteroides0.20077143.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
299ERR1883225911.000000ParabacteroidesParabacteroides0.12400243.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
5ERR1883225734.666667ClostridioidesClostridioides0.10000043.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
174ERR188324035554.000000AkkermansiaAkkermansia0.56508758.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
176ERR188324015517.000000VeillonellaVeillonella0.24662358.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
180ERR18832405555.000000FusobacteriumFusobacterium0.08829058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
4ERR18832406291.777778ClostridioidesClostridioides0.10000058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55mCD985.1kgwhitefemale
188ERR188325011467.000000AkkermansiaAkkermansia0.20395068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
190ERR18832509955.000000MediterraneibacterMediterraneibacter0.17705868.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
191ERR188325011923.000000ErysipelatoclostridiumErysipelatoclostridium0.21206168.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
193ERR188325013975.000000BlautiaBlautia0.24855768.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
196ERR18832503282.000000RuthenibacteriumRuthenibacterium0.05837368.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
6ERR18832505622.444444ClostridioidesClostridioides0.10000068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56mCD1383.9kgwhitefemale
321ERR1883294619.000000VeillonellaVeillonella0.118937112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
325ERR1883294591.000000FusobacteriumFusobacterium0.113557112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
329ERR18832941866.000000AnaerosinusAnaerosinus0.358540112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
330ERR1883294580.000000MegasphaeraMegasphaera0.111443112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
332ERR1883294734.000000PhytobacterPhytobacter0.141033112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
333ERR1883294294.000000MorganellaMorganella0.056490112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
7ERR1883294520.444444ClostridioidesClostridioides0.100000112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727mCD397.3kgwhitemale
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 106 + } + ], + "source": [ + "invader='Clostridioides' # choose your favorite invader, here we have chosen C. diff\n", + "invader_rel=0.1 # set the fraction of total relative abundance you want it to invade with\n", + "invaded=pd.DataFrame() # set up results dataframe\n", + "\n", + "for smp,df in tax.groupby(by='sample_id'): # loop through data, one sample at a time\n", + " df=df[df.relative>0.05].copy() # filter out genera below 5% (this is a high threshold, which will make simulation run faster)\n", + " df=df[df.genus!='Clostridioides'] # some samples already have C. diff, so lets remove it and then re-introduce\n", + " abund=df.abundance.sum()*invader_rel/(1-invader_rel) # calculate the abundance needed to achieve desired relative abundance\n", + "\n", + " info=df.iloc[0,:].copy() # get necessary sample info\n", + " info.genus=invader # add invader name\n", + " info.id=invader\n", + " info.abundance=abund # add invader abundance\n", + " df.loc[df.shape[0]+1,info.index]=info.values #append invader info\n", + " df.relative=df.abundance.apply(lambda x:x/df.abundance.sum()) # re-calculate relative abundance\n", + " invaded=pd.concat([invaded,df]) # append results to output dataframe\n", + "invaded\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RTbYBR8cJfup" + }, + "source": [ + "With our taxonomy table ready to go, and our metadata merged, its finally time to get to the model building! πŸŽ‰\n", + "\n", + "## Building community models\n", + "\n", + "With the data we have now, building our models is pretty easy. We just pass our taxonomy table and model database to MICOM. We will remove all taxa that make up less than 5% of the community to keep the models small and speed up this tutorial. We will also have to specify where to write the models. For simplicity, we'll run this process in parallel over two threads. It should take around 10 minutes to finish." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51, + "referenced_widgets": [ + "37b0134de4bd4fe38c1afe7bc2bbdc59", + "bc2776a5195f4a8c8dda0e22e2fd9ea3" + ] + }, + "id": "kDbSN71SmCZr", + "outputId": "e51c2208-4536-4f90-804c-442d884549bf" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "37b0134de4bd4fe38c1afe7bc2bbdc59" + } + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:micom.logger:Less than 50% of the abundance could be matched to the model database. Model `ERR1883294` may not be representative of the sample\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + } + ], + "source": [ + "from micom.workflows import build\n", + "from micom import Community\n", + "import pandas as pd\n", + "\n", + "manifest = build(invaded, \"agora103_genus.qza\", \"models\", solver=\"osqp\",\n", + " cutoff=0.05, threads=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Kwya6vbZZSUo" + }, + "source": [ + "You'll see a warning pop up indicating that less than 50% of the abundances can be matched to the database for one of the samples. This can happen with some data, and may indicate the models may not be completely representative of the samples.Typically a fraction of 80% or more is considered great. We'll continue, but remember to keep an eye out for this in future projects!\n", + "\n", + "In lower-biomass 16S amplicon sequencing samples from stool, many reads can match to food components or to host mitochondria and these hits probably do not contribute much to bacterial community metabolism. These hits will be excluded from MICOM.\n", + "\n", + "Let's take a look what we got back from the `build` process." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 454 + }, + "id": "r9qwglr88Ise", + "outputId": "168ee50d-23c5-4dfe-e3fb-bc733ec82188" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 ERR1883195 12.0 2011-10-24 26 \n", + "1 ERR1883207 24.0 2012-01-12 44 \n", + "2 ERR1883212 30.0 2012-10-10 135 \n", + "3 ERR1883214 32.0 2011-07-26 0 \n", + "4 ERR1883225 43.0 2011-07-26 54 \n", + "5 ERR1883240 58.0 2012-02-14 pre-FMT \n", + "6 ERR1883250 68.0 2011-12-23 pre-FMT \n", + "7 ERR1883294 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "0 Donor 11 healthy Restricted access years \n", + "1 Donor 12 healthy Restricted access years \n", + "2 Donor 14 healthy Restricted access years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "4 Donor CD1 healthy Restricted access years \n", + "5 CD9 pre-FMT Pre-FMT 47 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height ... host_subject_id \\\n", + "0 Restricted access Restricted access ... Donor \n", + "1 Restricted access Restricted access ... Donor \n", + "2 Restricted access Restricted access ... Donor \n", + "3 29.3 165.1 ... CD1 \n", + "4 Restricted access Restricted access ... Donor \n", + "5 35.5 1.55 ... CD9 \n", + "6 34.4 1.56 ... CD13 \n", + "7 32.5 1.727 ... CD3 \n", + "\n", + " host_weight host_weight_units race sex \\\n", + "0 Restricted access kg Restricted access Restricted access \n", + "1 Restricted access kg Restricted access Restricted access \n", + "2 Restricted access kg Restricted access Restricted access \n", + "3 80.1 kg white female \n", + "4 Restricted access kg Restricted access Restricted access \n", + "5 85.1 kg white female \n", + "6 83.9 kg white female \n", + "7 97.3 kg white male \n", + "\n", + " file found_taxa total_taxa found_fraction \\\n", + "0 ERR1883195.pickle 5.0 5.0 1.000000 \n", + "1 ERR1883207.pickle 7.0 7.0 1.000000 \n", + "2 ERR1883212.pickle 6.0 7.0 0.857143 \n", + "3 ERR1883214.pickle 6.0 6.0 1.000000 \n", + "4 ERR1883225.pickle 5.0 5.0 1.000000 \n", + "5 ERR1883240.pickle 4.0 4.0 1.000000 \n", + "6 ERR1883250.pickle 5.0 6.0 0.833333 \n", + "7 ERR1883294.pickle 5.0 7.0 0.714286 \n", + "\n", + " found_abundance_fraction \n", + "0 1.000000 \n", + "1 1.000000 \n", + "2 0.895755 \n", + "3 1.000000 \n", + "4 1.000000 \n", + "5 1.000000 \n", + "6 0.941627 \n", + "7 0.500427 \n", + "\n", + "[8 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_height...host_subject_idhost_weighthost_weight_unitsracesexfilefound_taxatotal_taxafound_fractionfound_abundance_fraction
0ERR188319512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883195.pickle5.05.01.0000001.000000
1ERR188320724.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883207.pickle7.07.01.0000001.000000
2ERR188321230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883212.pickle6.07.00.8571430.895755
3ERR188321432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1...CD180.1kgwhitefemaleERR1883214.pickle6.06.01.0000001.000000
4ERR188322543.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883225.pickle5.05.01.0000001.000000
5ERR188324058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55...CD985.1kgwhitefemaleERR1883240.pickle4.04.01.0000001.000000
6ERR188325068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56...CD1383.9kgwhitefemaleERR1883250.pickle5.06.00.8333330.941627
7ERR1883294112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727...CD397.3kgwhitemaleERR1883294.pickle5.07.00.7142860.500427
\n", + "

8 rows Γ— 21 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 63 + } + ], + "source": [ + "manifest" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y4KAJkhIdspQ" + }, + "source": [ + "This will tell you how many taxa were found in the database and what fraction of the total abundance was represented by the database. For most samples, this looks okay (i.e., >70% of abundance represented).\n", + "\n", + "So we now have our community models and can leverage MICOM fully by simulating community growth - let's discuss what we want to look at.\n", + "\n", + "### Microbiome Context\n", + "\n", + "Now that our models are ready to go, let's think about some of the insights we might gain from these samples. First and foremost, we want to investigate the invasion potential of C. diff. How does its ability to invade vary in samples from healthy donors versus individuals with dysbiotic gut microbiomes (pre-FMT)?\n", + "\n", + "Additionally, we can use MICOM to take a mechanistic look at what metabolic strategies are leveraged by C. diff (e.g., what niche(s) does it occupy) in these different contexts.\n", + "\n", + "All that and more, coming up. Stay tuned!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "etrvjwBLkKdR" + }, + "source": [ + "First we need to import our dietary context. For simplicity we will be using a formulation that represents an average western diet, but if information about host diet is known other formulations can be used (e.g., vegetarian or vegan diet). Additional dietary formulations can be found [here]( https://github.com/micom-dev/media/tree/main/media)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 455 + }, + "id": "aJwXAR4PPAkA", + "outputId": "4397913a-b2b6-4059-e355-4226d29a0a90" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " reaction metabolite global_id flux\n", + "reaction \n", + "EX_4abz_m EX_4abz_m 4abz_m EX_4abz(e) 0.1\n", + "EX_4hbz_m EX_4hbz_m 4hbz_m EX_4hbz(e) 0.1\n", + "EX_ac_m EX_ac_m ac_m EX_ac(e) 0.1\n", + "EX_acgam_m EX_acgam_m acgam_m EX_acgam(e) 0.1\n", + "EX_ala_L_m EX_ala_L_m ala_L_m EX_ala_L(e) 0.1\n", + "... ... ... ... ...\n", + "EX_octa_m EX_octa_m octa_m EX_octa(e) 0.0\n", + "EX_adpcbl_m EX_adpcbl_m adpcbl_m EX_adpcbl(e) 0.0\n", + "EX_fe3dcit_m EX_fe3dcit_m fe3dcit_m EX_fe3dcit(e) 0.0\n", + "EX_pydx5p_m EX_pydx5p_m pydx5p_m EX_pydx5p(e) 0.0\n", + "EX_glu_D_m EX_glu_D_m glu_D_m EX_glu_D(e) 0.0\n", + "\n", + "[171 rows x 4 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
reactionmetaboliteglobal_idflux
reaction
EX_4abz_mEX_4abz_m4abz_mEX_4abz(e)0.1
EX_4hbz_mEX_4hbz_m4hbz_mEX_4hbz(e)0.1
EX_ac_mEX_ac_mac_mEX_ac(e)0.1
EX_acgam_mEX_acgam_macgam_mEX_acgam(e)0.1
EX_ala_L_mEX_ala_L_mala_L_mEX_ala_L(e)0.1
...............
EX_octa_mEX_octa_mocta_mEX_octa(e)0.0
EX_adpcbl_mEX_adpcbl_madpcbl_mEX_adpcbl(e)0.0
EX_fe3dcit_mEX_fe3dcit_mfe3dcit_mEX_fe3dcit(e)0.0
EX_pydx5p_mEX_pydx5p_mpydx5p_mEX_pydx5p(e)0.0
EX_glu_D_mEX_glu_D_mglu_D_mEX_glu_D(e)0.0
\n", + "

171 rows Γ— 4 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 64 + } + ], + "source": [ + "from micom.qiime_formats import load_qiime_medium\n", + "medium = load_qiime_medium(\"western_diet_gut_agora.qza\")\n", + "medium" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4s8R4WYUez4g" + }, + "source": [ + "### Growing the models\n", + "Great, now we have our media & our models, it's time to get growing. This will take some time, so we'll use that time as an opportunity to discuss more in depth what these processes do, and what to look for in the results. First, let's run the `grow()` command. This will take the models we've built, and find an optimal solution to the fluxes based upon the medium that's been applied." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3WH8VVrVS4mv" + }, + "source": [ + "If that takes too long or was aborted, we can read it in from the treasure chest." + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34, + "referenced_widgets": [ + "59428de9a88b4204885d03de441cbde3", + "901fb7e8d8c844a8b2e86735ca2bead3" + ] + }, + "id": "IjDguZEcWGjG", + "outputId": "03508c5b-c7fc-46dd-c53b-bbd88dc5c236" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "59428de9a88b4204885d03de441cbde3" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + } + ], + "source": [ + "from micom.workflows import grow, save_results\n", + "\n", + "growth = grow(manifest, \"models\",medium, tradeoff=0.8, threads=2)\n", + "\n", + "# We'll save the results to a file\n", + "save_results(growth, \"growth.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rHedHJxHWkjy" + }, + "source": [ + "Again, if that takes too long or was aborted, we can read it in from the treasure chest." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rcPNBkDpWGrQ" + }, + "outputs": [], + "source": [ + "from micom.workflows import load_results\n", + "\n", + "try: # Will only run if the previous step failed\n", + " growth\n", + "except NameError:\n", + " growth = load_results(\"treasure_chest/growth.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "69gq9QfAzqxq" + }, + "source": [ + "What kind of results did we get? Well, `grow` returns a tuple of 3 data sets:\n", + "\n", + "1. The predicted growth rate for all taxa in all samples\n", + "2. The import and export fluxes for each taxon and the external environment\n", + "3. Annotations for the fluxes mapping to other databases\n", + "\n", + "### πŸ“ˆ Growth Rates\n", + "\n", + "The growth rates are pretty straightforward." + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "2r_XUm7U-HSm", + "outputId": "2ef89987-13a9-48a6-85b0-26dcdcf7ff92" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " abundance growth_rate reactions metabolites taxon \\\n", + "0 0.187908 0.147278 3293 1883 Bacteroides \n", + "1 0.100000 0.078512 1844 1401 Clostridioides \n", + "2 0.130915 0.102472 1986 1472 Faecalibacterium \n", + "3 0.084938 0.066644 2879 1747 Parabacteroides \n", + "4 0.496238 0.389606 2712 1656 Phocaeicola \n", + "5 0.170093 0.248553 3293 1883 Bacteroides \n", + "6 0.071992 0.108048 2235 1371 Barnesiella \n", + "7 0.070653 0.101473 2808 1732 Blautia \n", + "8 0.100000 0.143653 1844 1401 Clostridioides \n", + "9 0.090526 0.129872 1986 1472 Faecalibacterium \n", + "10 0.083122 0.122119 2879 1747 Parabacteroides \n", + "11 0.413614 0.607244 2712 1656 Phocaeicola \n", + "12 0.759954 0.300619 2274 1386 Akkermansia \n", + "13 0.100000 0.144461 1844 1401 Clostridioides \n", + "14 0.055102 0.041183 3532 2001 Clostridium \n", + "15 0.013039 0.009244 2826 1639 Erysipelatoclostridium \n", + "16 0.035175 0.025067 1274 1138 Paeniclostridium \n", + "17 0.036730 0.031724 1652 1308 Veillonella \n", + "18 0.094704 0.133863 3293 1883 Bacteroides \n", + "19 0.092168 0.130290 2808 1732 Blautia \n", + "20 0.111638 0.157821 1844 1401 Clostridioides \n", + "21 0.312443 0.441683 1986 1472 Faecalibacterium \n", + "22 0.241890 0.341913 2712 1656 Phocaeicola \n", + "23 0.147157 0.208032 2860 1805 Streptococcus \n", + "24 0.565087 0.128835 2274 1386 Akkermansia \n", + "25 0.100000 0.377698 1844 1401 Clostridioides \n", + "26 0.088290 0.323804 2197 1602 Fusobacterium \n", + "27 0.246623 0.928642 1652 1308 Veillonella \n", + "28 0.202085 0.267660 2274 1386 Akkermansia \n", + "29 0.246283 0.412257 2808 1732 Blautia \n", + "30 0.105784 0.176446 1844 1401 Clostridioides \n", + "31 0.020161 0.033840 1966 1392 Coprobacillus \n", + "32 0.012706 0.023217 1187 1056 Dialister \n", + "33 0.210121 0.353430 2826 1639 Erysipelatoclostridium \n", + "34 0.014204 0.022333 1207 1105 Flavonifractor \n", + "35 0.013217 0.021691 1369 1170 Lachnospira \n", + "36 0.175438 0.295404 2681 1578 Mediterraneibacter \n", + "37 0.045896 0.069084 2650 1613 Alistipes \n", + "38 0.012559 0.017830 2034 1491 Anaerostipes \n", + "39 0.168400 0.243903 3293 1883 Bacteroides \n", + "40 0.056172 0.083090 2235 1371 Barnesiella \n", + "41 0.027401 0.039184 2808 1732 Blautia \n", + "42 0.100000 0.141583 1844 1401 Clostridioides \n", + "43 0.021236 0.029938 2073 1577 Eubacterium \n", + "44 0.027287 0.038572 1986 1472 Faecalibacterium \n", + "45 0.104009 0.151023 2879 1747 Parabacteroides \n", + "46 0.010732 0.012497 1080 1019 Parasutterella \n", + "47 0.426310 0.619569 2712 1656 Phocaeicola \n", + "48 0.192591 0.360678 1844 1401 Clostridioides \n", + "49 0.197835 0.370503 2197 1602 Fusobacterium \n", + "50 0.194153 0.363601 1206 1068 Megasphaera \n", + "51 0.098416 0.184309 1577 1286 Morganella \n", + "52 0.025775 0.042924 1080 1019 Parasutterella \n", + "53 0.084021 0.156784 2860 1805 Streptococcus \n", + "54 0.207208 0.388049 1652 1308 Veillonella \n", + "\n", + " tradeoff sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 0.8 ERR1883195 12 2011-10-24 26 \n", + "1 0.8 ERR1883195 12 2011-10-24 26 \n", + "2 0.8 ERR1883195 12 2011-10-24 26 \n", + "3 0.8 ERR1883195 12 2011-10-24 26 \n", + "4 0.8 ERR1883195 12 2011-10-24 26 \n", + "5 0.8 ERR1883207 24 2012-01-12 44 \n", + "6 0.8 ERR1883207 24 2012-01-12 44 \n", + "7 0.8 ERR1883207 24 2012-01-12 44 \n", + "8 0.8 ERR1883207 24 2012-01-12 44 \n", + "9 0.8 ERR1883207 24 2012-01-12 44 \n", + "10 0.8 ERR1883207 24 2012-01-12 44 \n", + "11 0.8 ERR1883207 24 2012-01-12 44 \n", + "12 0.8 ERR1883214 32 2011-07-26 0 \n", + "13 0.8 ERR1883214 32 2011-07-26 0 \n", + "14 0.8 ERR1883214 32 2011-07-26 0 \n", + "15 0.8 ERR1883214 32 2011-07-26 0 \n", + "16 0.8 ERR1883214 32 2011-07-26 0 \n", + "17 0.8 ERR1883214 32 2011-07-26 0 \n", + "18 0.8 ERR1883212 30 2012-10-10 135 \n", + "19 0.8 ERR1883212 30 2012-10-10 135 \n", + "20 0.8 ERR1883212 30 2012-10-10 135 \n", + "21 0.8 ERR1883212 30 2012-10-10 135 \n", + "22 0.8 ERR1883212 30 2012-10-10 135 \n", + "23 0.8 ERR1883212 30 2012-10-10 135 \n", + "24 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "25 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "26 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "27 0.8 ERR1883240 58 2012-02-14 pre-FMT \n", + "28 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "29 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "30 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "31 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "32 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "33 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "34 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "35 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "36 0.8 ERR1883250 68 2011-12-23 pre-FMT \n", + "37 0.8 ERR1883225 43 2011-07-26 54 \n", + "38 0.8 ERR1883225 43 2011-07-26 54 \n", + "39 0.8 ERR1883225 43 2011-07-26 54 \n", + "40 0.8 ERR1883225 43 2011-07-26 54 \n", + "41 0.8 ERR1883225 43 2011-07-26 54 \n", + "42 0.8 ERR1883225 43 2011-07-26 54 \n", + "43 0.8 ERR1883225 43 2011-07-26 54 \n", + "44 0.8 ERR1883225 43 2011-07-26 54 \n", + "45 0.8 ERR1883225 43 2011-07-26 54 \n", + "46 0.8 ERR1883225 43 2011-07-26 54 \n", + "47 0.8 ERR1883225 43 2011-07-26 54 \n", + "48 0.8 ERR1883294 112 2011-09-29 0 \n", + "49 0.8 ERR1883294 112 2011-09-29 0 \n", + "50 0.8 ERR1883294 112 2011-09-29 0 \n", + "51 0.8 ERR1883294 112 2011-09-29 0 \n", + "52 0.8 ERR1883294 112 2011-09-29 0 \n", + "53 0.8 ERR1883294 112 2011-09-29 0 \n", + "54 0.8 ERR1883294 112 2011-09-29 0 \n", + "\n", + " ... host_age host_age_units host_body_mass_index \\\n", + "0 ... Restricted access years Restricted access \n", + "1 ... Restricted access years Restricted access \n", + "2 ... Restricted access years Restricted access \n", + "3 ... Restricted access years Restricted access \n", + "4 ... Restricted access years Restricted access \n", + "5 ... Restricted access years Restricted access \n", + "6 ... Restricted access years Restricted access \n", + "7 ... Restricted access years Restricted access \n", + "8 ... Restricted access years Restricted access \n", + "9 ... Restricted access years Restricted access \n", + "10 ... Restricted access years Restricted access \n", + "11 ... Restricted access years Restricted access \n", + "12 ... 39 years 29.3 \n", + "13 ... 39 years 29.3 \n", + "14 ... 39 years 29.3 \n", + "15 ... 39 years 29.3 \n", + "16 ... 39 years 29.3 \n", + "17 ... 39 years 29.3 \n", + "18 ... Restricted access years Restricted access \n", + "19 ... Restricted access years Restricted access \n", + "20 ... Restricted access years Restricted access \n", + "21 ... Restricted access years Restricted access \n", + "22 ... Restricted access years Restricted access \n", + "23 ... Restricted access years Restricted access \n", + "24 ... 47 years 35.5 \n", + "25 ... 47 years 35.5 \n", + "26 ... 47 years 35.5 \n", + "27 ... 47 years 35.5 \n", + "28 ... 53 years 34.4 \n", + "29 ... 53 years 34.4 \n", + "30 ... 53 years 34.4 \n", + "31 ... 53 years 34.4 \n", + "32 ... 53 years 34.4 \n", + "33 ... 53 years 34.4 \n", + "34 ... 53 years 34.4 \n", + "35 ... 53 years 34.4 \n", + "36 ... 53 years 34.4 \n", + "37 ... Restricted access years Restricted access \n", + "38 ... Restricted access years Restricted access \n", + "39 ... Restricted access years Restricted access \n", + "40 ... Restricted access years Restricted access \n", + "41 ... Restricted access years Restricted access \n", + "42 ... Restricted access years Restricted access \n", + "43 ... Restricted access years Restricted access \n", + "44 ... Restricted access years Restricted access \n", + "45 ... Restricted access years Restricted access \n", + "46 ... Restricted access years Restricted access \n", + "47 ... Restricted access years Restricted access \n", + "48 ... 61 years 32.5 \n", + "49 ... 61 years 32.5 \n", + "50 ... 61 years 32.5 \n", + "51 ... 61 years 32.5 \n", + "52 ... 61 years 32.5 \n", + "53 ... 61 years 32.5 \n", + "54 ... 61 years 32.5 \n", + "\n", + " host_height host_height_units host_subject_id host_weight \\\n", + "0 Restricted access m Donor Restricted access \n", + "1 Restricted access m Donor Restricted access \n", + "2 Restricted access m Donor Restricted access \n", + "3 Restricted access m Donor Restricted access \n", + "4 Restricted access m Donor Restricted access \n", + "5 Restricted access m Donor Restricted access \n", + "6 Restricted access m Donor Restricted access \n", + "7 Restricted access m Donor Restricted access \n", + "8 Restricted access m Donor Restricted access \n", + "9 Restricted access m Donor Restricted access \n", + "10 Restricted access m Donor Restricted access \n", + "11 Restricted access m Donor Restricted access \n", + "12 165.1 m CD1 80.1 \n", + "13 165.1 m CD1 80.1 \n", + "14 165.1 m CD1 80.1 \n", + "15 165.1 m CD1 80.1 \n", + "16 165.1 m CD1 80.1 \n", + "17 165.1 m CD1 80.1 \n", + "18 Restricted access m Donor Restricted access \n", + "19 Restricted access m Donor Restricted access \n", + "20 Restricted access m Donor Restricted access \n", + "21 Restricted access m Donor Restricted access \n", + "22 Restricted access m Donor Restricted access \n", + "23 Restricted access m Donor Restricted access \n", + "24 1.55 m CD9 85.1 \n", + "25 1.55 m CD9 85.1 \n", + "26 1.55 m CD9 85.1 \n", + "27 1.55 m CD9 85.1 \n", + "28 1.56 m CD13 83.9 \n", + "29 1.56 m CD13 83.9 \n", + "30 1.56 m CD13 83.9 \n", + "31 1.56 m CD13 83.9 \n", + "32 1.56 m CD13 83.9 \n", + "33 1.56 m CD13 83.9 \n", + "34 1.56 m CD13 83.9 \n", + "35 1.56 m CD13 83.9 \n", + "36 1.56 m CD13 83.9 \n", + "37 Restricted access m Donor Restricted access \n", + "38 Restricted access m Donor Restricted access \n", + "39 Restricted access m Donor Restricted access \n", + "40 Restricted access m Donor Restricted access \n", + "41 Restricted access m Donor Restricted access \n", + "42 Restricted access m Donor Restricted access \n", + "43 Restricted access m Donor Restricted access \n", + "44 Restricted access m Donor Restricted access \n", + "45 Restricted access m Donor Restricted access \n", + "46 Restricted access m Donor Restricted access \n", + "47 Restricted access m Donor Restricted access \n", + "48 1.727 m CD3 97.3 \n", + "49 1.727 m CD3 97.3 \n", + "50 1.727 m CD3 97.3 \n", + "51 1.727 m CD3 97.3 \n", + "52 1.727 m CD3 97.3 \n", + "53 1.727 m CD3 97.3 \n", + "54 1.727 m CD3 97.3 \n", + "\n", + " host_weight_units race sex \n", + "0 kg Restricted access Restricted access \n", + "1 kg Restricted access Restricted access \n", + "2 kg Restricted access Restricted access \n", + "3 kg Restricted access Restricted access \n", + "4 kg Restricted access Restricted access \n", + "5 kg Restricted access Restricted access \n", + "6 kg Restricted access Restricted access \n", + "7 kg Restricted access Restricted access \n", + "8 kg Restricted access Restricted access \n", + "9 kg Restricted access Restricted access \n", + "10 kg Restricted access Restricted access \n", + "11 kg Restricted access Restricted access \n", + "12 kg white female \n", + "13 kg white female \n", + "14 kg white female \n", + "15 kg white female \n", + "16 kg white female \n", + "17 kg white female \n", + "18 kg Restricted access Restricted access \n", + "19 kg Restricted access Restricted access \n", + "20 kg Restricted access Restricted access \n", + "21 kg Restricted access Restricted access \n", + "22 kg Restricted access Restricted access \n", + "23 kg Restricted access Restricted access \n", + "24 kg white female \n", + "25 kg white female \n", + "26 kg white female \n", + "27 kg white female \n", + "28 kg white female \n", + "29 kg white female \n", + "30 kg white female \n", + "31 kg white female \n", + "32 kg white female \n", + "33 kg white female \n", + "34 kg white female \n", + "35 kg white female \n", + "36 kg white female \n", + "37 kg Restricted access Restricted access \n", + "38 kg Restricted access Restricted access \n", + "39 kg Restricted access Restricted access \n", + "40 kg Restricted access Restricted access \n", + "41 kg Restricted access Restricted access \n", + "42 kg Restricted access Restricted access \n", + "43 kg Restricted access Restricted access \n", + "44 kg Restricted access Restricted access \n", + "45 kg Restricted access Restricted access \n", + "46 kg Restricted access Restricted access \n", + "47 kg Restricted access Restricted access \n", + "48 kg white male \n", + "49 kg white male \n", + "50 kg white male \n", + "51 kg white male \n", + "52 kg white male \n", + "53 kg white male \n", + "54 kg white male \n", + "\n", + "[55 rows x 22 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abundancegrowth_ratereactionsmetabolitestaxontradeoffsample_idUnnamed: 0collection_timestampday_relative_to_fmt...host_agehost_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesex
00.1879080.14727832931883Bacteroides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
10.1000000.07851218441401Clostridioides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
20.1309150.10247219861472Faecalibacterium0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
30.0849380.06664428791747Parabacteroides0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
40.4962380.38960627121656Phocaeicola0.8ERR1883195122011-10-2426...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
50.1700930.24855332931883Bacteroides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
60.0719920.10804822351371Barnesiella0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
70.0706530.10147328081732Blautia0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
80.1000000.14365318441401Clostridioides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
90.0905260.12987219861472Faecalibacterium0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
100.0831220.12211928791747Parabacteroides0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
110.4136140.60724427121656Phocaeicola0.8ERR1883207242012-01-1244...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
120.7599540.30061922741386Akkermansia0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
130.1000000.14446118441401Clostridioides0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
140.0551020.04118335322001Clostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
150.0130390.00924428261639Erysipelatoclostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
160.0351750.02506712741138Paeniclostridium0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
170.0367300.03172416521308Veillonella0.8ERR1883214322011-07-260...39years29.3165.1mCD180.1kgwhitefemale
180.0947040.13386332931883Bacteroides0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
190.0921680.13029028081732Blautia0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
200.1116380.15782118441401Clostridioides0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
210.3124430.44168319861472Faecalibacterium0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
220.2418900.34191327121656Phocaeicola0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
230.1471570.20803228601805Streptococcus0.8ERR1883212302012-10-10135...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
240.5650870.12883522741386Akkermansia0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
250.1000000.37769818441401Clostridioides0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
260.0882900.32380421971602Fusobacterium0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
270.2466230.92864216521308Veillonella0.8ERR1883240582012-02-14pre-FMT...47years35.51.55mCD985.1kgwhitefemale
280.2020850.26766022741386Akkermansia0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
290.2462830.41225728081732Blautia0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
300.1057840.17644618441401Clostridioides0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
310.0201610.03384019661392Coprobacillus0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
320.0127060.02321711871056Dialister0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
330.2101210.35343028261639Erysipelatoclostridium0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
340.0142040.02233312071105Flavonifractor0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
350.0132170.02169113691170Lachnospira0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
360.1754380.29540426811578Mediterraneibacter0.8ERR1883250682011-12-23pre-FMT...53years34.41.56mCD1383.9kgwhitefemale
370.0458960.06908426501613Alistipes0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
380.0125590.01783020341491Anaerostipes0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
390.1684000.24390332931883Bacteroides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
400.0561720.08309022351371Barnesiella0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
410.0274010.03918428081732Blautia0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
420.1000000.14158318441401Clostridioides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
430.0212360.02993820731577Eubacterium0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
440.0272870.03857219861472Faecalibacterium0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
450.1040090.15102328791747Parabacteroides0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
460.0107320.01249710801019Parasutterella0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
470.4263100.61956927121656Phocaeicola0.8ERR1883225432011-07-2654...Restricted accessyearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access
480.1925910.36067818441401Clostridioides0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
490.1978350.37050321971602Fusobacterium0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
500.1941530.36360112061068Megasphaera0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
510.0984160.18430915771286Morganella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
520.0257750.04292410801019Parasutterella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
530.0840210.15678428601805Streptococcus0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
540.2072080.38804916521308Veillonella0.8ERR18832941122011-09-290...61years32.51.727mCD397.3kgwhitemale
\n", + "

55 rows Γ— 22 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 112 + } + ], + "source": [ + "growth_rates=pd.merge(growth.growth_rates,metadata,on='sample_id')\n", + "growth_rates" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F5BK7DDv0UfA" + }, + "source": [ + "### ↔️ Exchange Fluxes\n", + "\n", + "More interesting are the exchange fluxes. These reactions represent the import and export of metabolites from the system Let's look at those now:" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "lQW2BBS10jdN", + "outputId": "d0ebb86e-549a-4727-8f9c-4461c25eeeca" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " taxon sample_id tolerance reaction flux \\\n", + "0 Bacteroides ERR1883195 0.0001 EX_MGlcn150(e) -0.001865 \n", + "1 Bacteroides ERR1883195 0.0001 EX_MGlcn180(e) -0.001879 \n", + "4 Bacteroides ERR1883195 0.0001 EX_MGlcn81_rl(e) 0.002123 \n", + "5 Bacteroides ERR1883195 0.0001 EX_MGlcn24(e) -0.002457 \n", + "6 Bacteroides ERR1883195 0.0001 EX_MGlcn165(e) 0.004506 \n", + "... ... ... ... ... ... \n", + "22046 medium ERR1883294 0.0001 EX_MGlcn170_rl_m -0.000192 \n", + "22050 medium ERR1883294 0.0001 EX_nmn_m -0.100040 \n", + "22056 medium ERR1883294 0.0001 EX_akg_m 0.000115 \n", + "22070 medium ERR1883294 0.0001 EX_oaa_m 0.000154 \n", + "22078 medium ERR1883294 0.0001 EX_xtsn_m 0.000457 \n", + "\n", + " abundance metabolite direction \n", + "0 0.208787 MGlcn150[e] import \n", + "1 0.208787 MGlcn180[e] import \n", + "4 0.208787 MGlcn81_rl[e] export \n", + "5 0.208787 MGlcn24[e] import \n", + "6 0.208787 MGlcn165[e] export \n", + "... ... ... ... \n", + "22046 NaN MGlcn170_rl_m import \n", + "22050 NaN nmn_m import \n", + "22056 NaN akg_m export \n", + "22070 NaN oaa_m export \n", + "22078 NaN xtsn_m export \n", + "\n", + "[10597 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
taxonsample_idtolerancereactionfluxabundancemetabolitedirection
0BacteroidesERR18831950.0001EX_MGlcn150(e)-0.0018650.208787MGlcn150[e]import
1BacteroidesERR18831950.0001EX_MGlcn180(e)-0.0018790.208787MGlcn180[e]import
4BacteroidesERR18831950.0001EX_MGlcn81_rl(e)0.0021230.208787MGlcn81_rl[e]export
5BacteroidesERR18831950.0001EX_MGlcn24(e)-0.0024570.208787MGlcn24[e]import
6BacteroidesERR18831950.0001EX_MGlcn165(e)0.0045060.208787MGlcn165[e]export
...........................
22046mediumERR18832940.0001EX_MGlcn170_rl_m-0.000192NaNMGlcn170_rl_mimport
22050mediumERR18832940.0001EX_nmn_m-0.100040NaNnmn_mimport
22056mediumERR18832940.0001EX_akg_m0.000115NaNakg_mexport
22070mediumERR18832940.0001EX_oaa_m0.000154NaNoaa_mexport
22078mediumERR18832940.0001EX_xtsn_m0.000457NaNxtsn_mexport
\n", + "

10597 rows Γ— 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 102 + } + ], + "source": [ + "growth.exchanges" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pu5XtkUl1YG1" + }, + "source": [ + "So we see how much of each metabolite is either consumed or produced by each taxon in each sample. `tolerance` denotes the accuracy of the solver and tells you the smallest absolute flux that is likely different form zero (i.e., substantial flux). *All of the fluxes are normalized to 1g dry weight of bacteria*. So, you can directly compare fluxes between taxa, even if they are present at very different abundances.\n", + "\n", + "If you're curious what the abbreviation for each of these metabolites represents, that can be found in the annotations dataframe. For instance, let's find out what `\"tre[e]\"` represents." + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 112 + }, + "id": "DphXa9hw1yxM", + "outputId": "4bf58c2c-2cc4-47b0-e4af-e7e7b827e54d" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " metabolite name molecular_weight C_number N_number \\\n", + "reaction \n", + "EX_tre(e) tre[e] Trehalose 342.29648 12 0 \n", + "\n", + " hmdb inchi kegg.compound pubchem.compound chebi reaction \n", + "reaction \n", + "EX_tre(e) HMDB00975 NaN C01083 7427 NaN EX_tre(e) " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
metabolitenamemolecular_weightC_numberN_numberhmdbinchikegg.compoundpubchem.compoundchebireaction
reaction
EX_tre(e)tre[e]Trehalose342.29648120HMDB00975NaNC010837427NaNEX_tre(e)
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 83 + } + ], + "source": [ + "anns = growth.annotations\n", + "anns[anns.metabolite == \"tre[e]\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GVHLD2dm4a6B" + }, + "source": [ + "Trehalose! Interesting, [that's an important metabolite](https://pubmed.ncbi.nlm.nih.gov/34277467/) in the context of CDI! All of these annotations and more information at are also available at https://vmh.life, maintained by Dr. Ines Thiele's lab." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CImtzqRJbbGj" + }, + "source": [ + "\n", + "# πŸ“Š Visualizations\n", + "\n", + "Let's visualize our results. Because of the rich output of these models, it can be overwhelming to represent it all, but don't worry! There are tools in place for this already.\n", + "\n", + "We will use the standard visualizations included in MICOM. These tools take in the growth results we obtained before and create visualizations in standalone HTML files that bundle the plots and raw data and can be viewed directly in your browser.\n", + "\n", + "First, let's look at the growth rates of each taxon across samples." + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "id": "EaplMHFLcMT7" + }, + "outputs": [], + "source": [ + "from micom.viz import *\n", + "\n", + "viz = plot_growth(growth, filename=\"growthrates.html\")" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Normally, we could call `viz.view()` afterwards and it would open it in our web browser. However, this will not work in Colab. However, the plot function creates the file `growth_rates_[DATE].html` in your `materials` folder. To open it, simply download that file and view it in your web browser. We can see that there are many things going on, but it's not super clear. Let's continue." + ], + "metadata": { + "id": "_sjNIuCSXkyb" + } + }, + { + "cell_type": "markdown", + "source": [ + "We're interested in understanding the invasion potential of C. diff so lets extract the predicted C. diff growth rates. In addition to C. diff growth rate we can also look at what fraction of the community growth rate this represents.\n", + "\n" + ], + "metadata": { + "id": "qPTCyw-7RqFm" + } + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 436 + }, + "id": "9YlWytSp-yKI", + "outputId": "c77dda37-1dd1-4ccf-a99c-cdb1519c5e8a" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " abundance growth_rate reactions metabolites taxon tradeoff \\\n", + "1 0.100000 0.078512 1844 1401 Clostridioides 0.8 \n", + "8 0.100000 0.143653 1844 1401 Clostridioides 0.8 \n", + "13 0.100000 0.144461 1844 1401 Clostridioides 0.8 \n", + "20 0.111638 0.157821 1844 1401 Clostridioides 0.8 \n", + "25 0.100000 0.377698 1844 1401 Clostridioides 0.8 \n", + "30 0.105784 0.176446 1844 1401 Clostridioides 0.8 \n", + "42 0.100000 0.141583 1844 1401 Clostridioides 0.8 \n", + "48 0.192591 0.360678 1844 1401 Clostridioides 0.8 \n", + "\n", + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt ... \\\n", + "1 ERR1883195 12 2011-10-24 26 ... \n", + "8 ERR1883207 24 2012-01-12 44 ... \n", + "13 ERR1883214 32 2011-07-26 0 ... \n", + "20 ERR1883212 30 2012-10-10 135 ... \n", + "25 ERR1883240 58 2012-02-14 pre-FMT ... \n", + "30 ERR1883250 68 2011-12-23 pre-FMT ... \n", + "42 ERR1883225 43 2011-07-26 54 ... \n", + "48 ERR1883294 112 2011-09-29 0 ... \n", + "\n", + " host_age_units host_body_mass_index host_height host_height_units \\\n", + "1 years Restricted access Restricted access m \n", + "8 years Restricted access Restricted access m \n", + "13 years 29.3 165.1 m \n", + "20 years Restricted access Restricted access m \n", + "25 years 35.5 1.55 m \n", + "30 years 34.4 1.56 m \n", + "42 years Restricted access Restricted access m \n", + "48 years 32.5 1.727 m \n", + "\n", + " host_subject_id host_weight host_weight_units race \\\n", + "1 Donor Restricted access kg Restricted access \n", + "8 Donor Restricted access kg Restricted access \n", + "13 CD1 80.1 kg white \n", + "20 Donor Restricted access kg Restricted access \n", + "25 CD9 85.1 kg white \n", + "30 CD13 83.9 kg white \n", + "42 Donor Restricted access kg Restricted access \n", + "48 CD3 97.3 kg white \n", + "\n", + " sex abundance_weighted_growth_rate \n", + "1 Restricted access 0.007851 \n", + "8 Restricted access 0.014365 \n", + "13 female 0.014446 \n", + "20 Restricted access 0.017619 \n", + "25 female 0.037770 \n", + "30 female 0.018665 \n", + "42 Restricted access 0.014158 \n", + "48 male 0.069463 \n", + "\n", + "[8 rows x 23 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
abundancegrowth_ratereactionsmetabolitestaxontradeoffsample_idUnnamed: 0collection_timestampday_relative_to_fmt...host_age_unitshost_body_mass_indexhost_heighthost_height_unitshost_subject_idhost_weighthost_weight_unitsracesexabundance_weighted_growth_rate
10.1000000.07851218441401Clostridioides0.8ERR1883195122011-10-2426...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.007851
80.1000000.14365318441401Clostridioides0.8ERR1883207242012-01-1244...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.014365
130.1000000.14446118441401Clostridioides0.8ERR1883214322011-07-260...years29.3165.1mCD180.1kgwhitefemale0.014446
200.1116380.15782118441401Clostridioides0.8ERR1883212302012-10-10135...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.017619
250.1000000.37769818441401Clostridioides0.8ERR1883240582012-02-14pre-FMT...years35.51.55mCD985.1kgwhitefemale0.037770
300.1057840.17644618441401Clostridioides0.8ERR1883250682011-12-23pre-FMT...years34.41.56mCD1383.9kgwhitefemale0.018665
420.1000000.14158318441401Clostridioides0.8ERR1883225432011-07-2654...yearsRestricted accessRestricted accessmDonorRestricted accesskgRestricted accessRestricted access0.014158
480.1925910.36067818441401Clostridioides0.8ERR18832941122011-09-290...years32.51.727mCD397.3kgwhitemale0.069463
\n", + "

8 rows Γ— 23 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 121 + } + ], + "source": [ + "cdiff = growth_rates[growth_rates.taxon=='Clostridioides'].copy()\n", + "cdiff" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now that we've extracted the C. diff specific growth rates lets take a look at how they compare between patients with healthy and disbiotic gut microbiomes" + ], + "metadata": { + "id": "bQKYxSZTSPk1" + } + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 468 + }, + "id": "AWG3U094-_OI", + "outputId": "ef7e9ff0-9348-48ba-af7f-d05c65ad91d6" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 122 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "import seaborn as sns\n", + "sns.boxplot(x='disease_state',y='growth_rate',data=cdiff)" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Looks like C. diff is predicted to grow in all samples but its predicted growth rate is ~2x higher in the Pre-FMT samples. You can see there is also a decent amount of variation in the Pre-FMT results." + ], + "metadata": { + "id": "MEuym_PVui-k" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G1JbbKrLcVye" + }, + "source": [ + "## Growth niches\n", + "\n", + "Another thing we can look at is whether individual taxa inhabit different growth niches across different disease contexts. Here we can use the `plot_exchanges_per_taxon` function to see how exchanges differ within and between taxa, within and across human populations." + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": { + "id": "NlZrfv38esj8", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "6df836fd-71ac-4cfd-fd1c-883e9d31f311" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 89 + } + ], + "source": [ + "plot_exchanges_per_taxon(growth, perplexity=4, direction=\"import\", filename=\"niche.html\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qXnbUCCs2yVG" + }, + "source": [ + "\n", + "This function projects the full set of import or export fluxes onto a two dimensional plane, and arranges taxa so that more similar flux patterns lie nearer together. Taxa closer to one another compete for a more similar set of resources (and/or produce a more similar set of metabolites). The center of the plot signifies a more competitive nutrient space, whereas clusters on the outskirts denote more isolated niches.\n", + "\n", + "You can tune [TSNE parameters](https://distill.pub/2016/misread-tsne/), such as perplexity, to get a more meaningful grouping. We will lower the perplexity here since we don't have a lot of data points.\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "One small take away from this analysis is the speration between C. diff pre- and post- FMT samples, suggesting that C. diff may leverage different metabolic strategies in these contexts. Lets take a closer look at this..." + ], + "metadata": { + "id": "weoSaLDPZYAK" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4Y_XfHkB4sO8" + }, + "source": [ + "## Comparative Metabolomics\n", + "\n", + "Now let's compare the metabolomic imports between the two disease contexts. We're interested to see how the metabolomic profile of the microbiome changes when the disease state changes, as changes in microbiome context can lead to changes in host succeptibility to infection. To look into this deeper, we'll transform the microbiome import data and then plot the metabolite exports on a heatmap." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pAtec3I78DJJ" + }, + "source": [ + "We can use the `consumption_rates` function in MICOM to calculate consumption rates from the growth results. This will tell us what the patient microbiomes are consuming and provide additional insight into available niches. To visualize the results we'll run a centered log ratio transformation on the data, to account for the compositional nature of these data and compare all the fluxes against each other. Importantly, here we consider the consumption rates for samples with no C. diff present to understand how the initial state of the patient microbiomes may influence invasion potential." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": { + "id": "DJAsxjnFdajN", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "outputId": "59b69294-cdef-4fdf-a5fa-5f1d8cab1cde" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "disease_state Pre-FMT healthy \\\n", + "sample_id ERR1883214 ERR1883240 ERR1883250 ERR1883294 ERR1883195 \n", + "name \n", + "D-glucose -3.253101 -3.564477 -2.064038 -3.972573 4.450353 \n", + "acetate 2.572101 3.594739 4.306373 4.539524 -2.642172 \n", + "Water 2.966225 -3.584166 3.043172 -3.972573 4.497614 \n", + "D-Fructose 3.219760 -3.584166 -0.695070 -3.972573 3.268810 \n", + "pyruvate -3.404889 3.505223 -3.497278 3.022735 -2.642172 \n", + "... ... ... ... ... ... \n", + "Thiamin -3.404889 -2.773089 -2.982110 -3.403379 -2.388275 \n", + "Spermidine -3.404889 -2.445104 -2.992604 -3.250518 -2.374584 \n", + "Fe2+ -2.838896 -2.091063 -2.203376 -2.370595 -1.778976 \n", + "Putrescine -3.404889 -2.848543 -3.087577 -3.295817 -2.353273 \n", + "Menaquinone 8 -2.873638 -3.295815 -2.979719 -3.351557 -2.276149 \n", + "\n", + "disease_state \n", + "sample_id ERR1883207 ERR1883212 ERR1883225 \n", + "name \n", + "D-glucose 4.493531 3.416515 4.569671 \n", + "acetate -2.772490 -3.668369 -2.625105 \n", + "Water 4.280483 3.763363 4.354428 \n", + "D-Fructose 3.253561 2.612354 3.276231 \n", + "pyruvate -2.772490 -3.668369 -2.625105 \n", + "... ... ... ... \n", + "Thiamin -2.444364 -2.939826 -2.378889 \n", + "Spermidine -2.445049 -2.936479 -2.376260 \n", + "Fe2+ -1.820472 -2.611296 -1.668659 \n", + "Putrescine -2.438953 -2.912851 -2.400161 \n", + "Menaquinone 8 -2.471199 -3.185775 -2.625105 \n", + "\n", + "[647 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
disease_statePre-FMThealthy
sample_idERR1883214ERR1883240ERR1883250ERR1883294ERR1883195ERR1883207ERR1883212ERR1883225
name
D-glucose-3.253101-3.564477-2.064038-3.9725734.4503534.4935313.4165154.569671
acetate2.5721013.5947394.3063734.539524-2.642172-2.772490-3.668369-2.625105
Water2.966225-3.5841663.043172-3.9725734.4976144.2804833.7633634.354428
D-Fructose3.219760-3.584166-0.695070-3.9725733.2688103.2535612.6123543.276231
pyruvate-3.4048893.505223-3.4972783.022735-2.642172-2.772490-3.668369-2.625105
...........................
Thiamin-3.404889-2.773089-2.982110-3.403379-2.388275-2.444364-2.939826-2.378889
Spermidine-3.404889-2.445104-2.992604-3.250518-2.374584-2.445049-2.936479-2.376260
Fe2+-2.838896-2.091063-2.203376-2.370595-1.778976-1.820472-2.611296-1.668659
Putrescine-3.404889-2.848543-3.087577-3.295817-2.353273-2.438953-2.912851-2.400161
Menaquinone 8-2.873638-3.295815-2.979719-3.351557-2.276149-2.471199-3.185775-2.625105
\n", + "

647 rows Γ— 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 103 + } + ], + "source": [ + "from micom.measures import consumption_rates\n", + "import numpy as np\n", + "no_cdiff_growth = load_results(\"treasure_chest/no-cdiff-growth.zip\") # Load growth results with no C. diff invasion\n", + "exchanges = consumption_rates(no_cdiff_growth) # extract consumption rates\n", + "exchanges=pd.merge(exchanges,metadata,on='sample_id') # add metadata\n", + "exchanges = pd.pivot_table( # convert to a matrix of samples vs. metabolites\n", + " exchanges, # that contains the production rates\n", + " index = ['disease_state','sample_id'],\n", + " columns = 'name',\n", + " values = 'flux'\n", + ")\n", + "exchanges = exchanges.T.fillna(0.0) # if a metabolite is not produced its flux is zero\n", + "exchanges = exchanges.apply( # ...and a CLR transform again, normalizes the fluxes\n", + " lambda xs: np.log(xs + 0.001) - np.log(xs.mean() + 0.001),\n", + " axis=0)\n", + "exchanges = exchanges.reindex( # sort by variance, highest variance fluxes first\n", + " exchanges.var(axis = 1).sort_values(ascending=False).index\n", + ")\n", + "\n", + "exchanges" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nYuc7Wu38nYd" + }, + "source": [ + "We can use seaborn to plot our heatmap:" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": { + "id": "YHnKeFuF3qAt", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "06c5b995-06e9-48ca-fd7e-25e5998af6e0" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 104 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "import seaborn as sns\n", + "import numpy as np\n", + "\n", + "sns.clustermap(\n", + " exchanges.head(50), # take 50 highest fluxes\n", + " cmap = 'viridis',\n", + " yticklabels = True, # show all metabolite names\n", + " figsize = (8, 12) # size of the heatmap\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b1i2LStZiUst" + }, + "source": [ + "We can see here that the disease context is important - there are significant differences in consumption rates between the healthy and pre-fmt microbiomes. These differences may why C. diff can exploit the pre-FMT microbiomes and achieve higher predicted growth rates." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l8maOr3w2bOo" + }, + "source": [ + "# 🏫 Exercises" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Time for you to try your hand at some analysis, lets take a closer looks at the metabolic strategies used by C. diff" + ], + "metadata": { + "id": "XZt3ojssnCzz" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VjUustQG2_bX" + }, + "source": [ + "## Metabolic strategies used by C. diff\n", + "We've alread looked at the community wide consumption fluxes in the absencence of C. diff and found that they differ between disease contexts. What about the import fluxes of C. diff specifically? Can you develop a visualization to look at those?" + ] + }, + { + "cell_type": "code", + "source": [ + "exchanges=growth.exchanges # extract exchanges from growth data\n", + "cdiff_exchanges=exchanges[exchanges.taxon=='Clostridioides'] # get cdiff specific exchanges\n", + "cdiff_imports=cdiff_exchanges[cdiff_exchanges.direction=='import'] # specifically look at imports\n", + "cdiff_imports=pd.merge(cdiff_imports,metadata,on='sample_id') # add metadata\n", + "cdiff_imports = pd.pivot_table( # convert to a matrix of samples vs. metabolites\n", + " cdiff_imports, # that contains the production rates\n", + " index = ['disease_state','sample_id'],\n", + " columns = 'reaction',\n", + " values = 'flux'\n", + ")\n", + "cdiff_imports = abs(cdiff_imports.T.fillna(0.0)) # fill nans with 0s\n", + "\n", + "annot=growth.annotations # optionally map reactions to metabolite names\n", + "annot.index=annot.reaction\n", + "cdiff_imports.index=cdiff_imports.index.map(annot.name.to_dict()) # not necessary but makes results more human readible\n", + "\n", + "cdiff_imports = cdiff_imports.apply( # ...and a CLR transform again, normalizes the fluxes\n", + " lambda xs: np.log(xs + 0.001) - np.log(xs.mean() + 0.001),\n", + " axis=0)\n", + "cdiff_imports = cdiff_imports.reindex( # sort by variance, highest variance fluxes first\n", + " cdiff_imports.var(axis = 1).sort_values(ascending=False).index\n", + ")\n", + "\n", + "cdiff_imports\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 486 + }, + "id": "eIyRahJbn4hc", + "outputId": "65c9debc-66ea-44bb-f725-dc18d3df66af" + }, + "execution_count": 147, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "disease_state Pre-FMT \\\n", + "sample_id ERR1883214 ERR1883240 ERR1883250 ERR1883294 \n", + "reaction \n", + "D-Mannitol -5.804770 -4.420214 -1.034209 -4.012576 \n", + "D-glucose -5.804770 -4.420214 -3.732151 -4.012576 \n", + "L-aspartate(1-) -5.804770 -4.420214 0.734590 3.142715 \n", + "Formate 3.603973 -4.420214 -3.965938 -4.012576 \n", + "(R)-3-hydroxybutyrate -5.804770 3.427428 -2.999371 2.600317 \n", + "... ... ... ... ... \n", + "Co2+ -5.032875 -3.060466 -3.113042 -2.668395 \n", + "O2 -5.566462 -4.232652 -3.799558 -3.812628 \n", + "Hexadecanoate (n-C16:0) -3.015255 -4.162366 -3.965938 -4.012576 \n", + "Hypoxanthine -0.457435 -0.330558 0.006263 0.048169 \n", + "L-valine 0.714780 0.474769 0.203827 0.859942 \n", + "\n", + "disease_state healthy \n", + "sample_id ERR1883195 ERR1883207 ERR1883212 ERR1883225 \n", + "reaction \n", + "D-Mannitol 3.467234 3.488661 3.020506 3.115883 \n", + "D-glucose 3.013143 3.070334 2.663872 2.791790 \n", + "L-aspartate(1-) 1.986893 1.851587 3.110281 -3.445003 \n", + "Formate 1.981723 1.819535 -3.348525 2.981770 \n", + "(R)-3-hydroxybutyrate -2.852466 -3.347560 1.757644 -3.445003 \n", + "... ... ... ... ... \n", + "Co2+ -2.374232 -2.584710 -2.540125 -2.703519 \n", + "O2 -2.852466 -3.347560 -3.348525 -3.445003 \n", + "Hexadecanoate (n-C16:0) -2.852466 -3.347560 -3.348525 -3.445003 \n", + "Hypoxanthine -0.257878 -0.179446 -0.093329 0.309034 \n", + "L-valine 0.526749 0.618198 0.705706 0.547314 \n", + "\n", + "[237 rows x 8 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
disease_statePre-FMThealthy
sample_idERR1883214ERR1883240ERR1883250ERR1883294ERR1883195ERR1883207ERR1883212ERR1883225
reaction
D-Mannitol-5.804770-4.420214-1.034209-4.0125763.4672343.4886613.0205063.115883
D-glucose-5.804770-4.420214-3.732151-4.0125763.0131433.0703342.6638722.791790
L-aspartate(1-)-5.804770-4.4202140.7345903.1427151.9868931.8515873.110281-3.445003
Formate3.603973-4.420214-3.965938-4.0125761.9817231.819535-3.3485252.981770
(R)-3-hydroxybutyrate-5.8047703.427428-2.9993712.600317-2.852466-3.3475601.757644-3.445003
...........................
Co2+-5.032875-3.060466-3.113042-2.668395-2.374232-2.584710-2.540125-2.703519
O2-5.566462-4.232652-3.799558-3.812628-2.852466-3.347560-3.348525-3.445003
Hexadecanoate (n-C16:0)-3.015255-4.162366-3.965938-4.012576-2.852466-3.347560-3.348525-3.445003
Hypoxanthine-0.457435-0.3305580.0062630.048169-0.257878-0.179446-0.0933290.309034
L-valine0.7147800.4747690.2038270.8599420.5267490.6181980.7057060.547314
\n", + "

237 rows Γ— 8 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 147 + } + ] + }, + { + "cell_type": "code", + "source": [ + "sns.clustermap(\n", + " cdiff_imports.head(50), # take 50 highest fluxes\n", + " cmap = 'viridis',\n", + " yticklabels = True, # show all metabolite names\n", + " figsize = (8, 12) # size of the heatmap\n", + ")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "vWVMA7o1pGGD", + "outputId": "05feed82-9fd4-49c6-f9b7-b1ca5e726446" + }, + "execution_count": 148, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 148 + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CsqIRTbC7doD" + }, + "source": [ + "# πŸ”΅ Addendum\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hycoXNTi5xsH" + }, + "source": [ + "## Choosing a tradeoff value\n", + "\n", + "Even if you don't have growth rates available you can still use your data to choose a decent tradeoff value. This can be done by choosing the largest tradeoff value that still allows growth for the majority of the taxa that you observed in the sample (if they are present at an appreciable abundance, they should be able to grow). This can be done with the `tradeoff` workflow in MICOM that will run cooperative tradeoff with varying tradeoff values, which can be visualized with the `plot_tradeoff` function." + ] + }, + { + "cell_type": "code", + "source": [ + "manifest" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 454 + }, + "id": "I8NnkbnymxoE", + "outputId": "41feea98-0cfb-4fe5-c0d6-3fa11631782e" + }, + "execution_count": 123, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " sample_id Unnamed: 0 collection_timestamp day_relative_to_fmt \\\n", + "0 ERR1883195 12.0 2011-10-24 26 \n", + "1 ERR1883207 24.0 2012-01-12 44 \n", + "2 ERR1883212 30.0 2012-10-10 135 \n", + "3 ERR1883214 32.0 2011-07-26 0 \n", + "4 ERR1883225 43.0 2011-07-26 54 \n", + "5 ERR1883240 58.0 2012-02-14 pre-FMT \n", + "6 ERR1883250 68.0 2011-12-23 pre-FMT \n", + "7 ERR1883294 112.0 2011-09-29 0 \n", + "\n", + " description disease_state host_age host_age_units \\\n", + "0 Donor 11 healthy Restricted access years \n", + "1 Donor 12 healthy Restricted access years \n", + "2 Donor 14 healthy Restricted access years \n", + "3 Day 0 CD1 Pre-FMT 39 years \n", + "4 Donor CD1 healthy Restricted access years \n", + "5 CD9 pre-FMT Pre-FMT 47 years \n", + "6 CD13 pre-FMT Pre-FMT 53 years \n", + "7 Day 0 CD3 Pre-FMT 61 years \n", + "\n", + " host_body_mass_index host_height ... host_subject_id \\\n", + "0 Restricted access Restricted access ... Donor \n", + "1 Restricted access Restricted access ... Donor \n", + "2 Restricted access Restricted access ... Donor \n", + "3 29.3 165.1 ... CD1 \n", + "4 Restricted access Restricted access ... Donor \n", + "5 35.5 1.55 ... CD9 \n", + "6 34.4 1.56 ... CD13 \n", + "7 32.5 1.727 ... CD3 \n", + "\n", + " host_weight host_weight_units race sex \\\n", + "0 Restricted access kg Restricted access Restricted access \n", + "1 Restricted access kg Restricted access Restricted access \n", + "2 Restricted access kg Restricted access Restricted access \n", + "3 80.1 kg white female \n", + "4 Restricted access kg Restricted access Restricted access \n", + "5 85.1 kg white female \n", + "6 83.9 kg white female \n", + "7 97.3 kg white male \n", + "\n", + " file found_taxa total_taxa found_fraction \\\n", + "0 ERR1883195.pickle 4.0 4.0 1.000000 \n", + "1 ERR1883207.pickle 6.0 6.0 1.000000 \n", + "2 ERR1883212.pickle 5.0 6.0 0.833333 \n", + "3 ERR1883214.pickle 2.0 2.0 1.000000 \n", + "4 ERR1883225.pickle 4.0 4.0 1.000000 \n", + "5 ERR1883240.pickle 3.0 3.0 1.000000 \n", + "6 ERR1883250.pickle 4.0 5.0 0.800000 \n", + "7 ERR1883294.pickle 4.0 6.0 0.666667 \n", + "\n", + " found_abundance_fraction \n", + "0 1.000000 \n", + "1 1.000000 \n", + "2 0.884172 \n", + "3 1.000000 \n", + "4 1.000000 \n", + "5 1.000000 \n", + "6 0.935141 \n", + "7 0.444919 \n", + "\n", + "[8 rows x 21 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sample_idUnnamed: 0collection_timestampday_relative_to_fmtdescriptiondisease_statehost_agehost_age_unitshost_body_mass_indexhost_height...host_subject_idhost_weighthost_weight_unitsracesexfilefound_taxatotal_taxafound_fractionfound_abundance_fraction
0ERR188319512.02011-10-2426Donor 11healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883195.pickle4.04.01.0000001.000000
1ERR188320724.02012-01-1244Donor 12healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883207.pickle6.06.01.0000001.000000
2ERR188321230.02012-10-10135Donor 14healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883212.pickle5.06.00.8333330.884172
3ERR188321432.02011-07-260Day 0 CD1Pre-FMT39years29.3165.1...CD180.1kgwhitefemaleERR1883214.pickle2.02.01.0000001.000000
4ERR188322543.02011-07-2654Donor CD1healthyRestricted accessyearsRestricted accessRestricted access...DonorRestricted accesskgRestricted accessRestricted accessERR1883225.pickle4.04.01.0000001.000000
5ERR188324058.02012-02-14pre-FMTCD9 pre-FMTPre-FMT47years35.51.55...CD985.1kgwhitefemaleERR1883240.pickle3.03.01.0000001.000000
6ERR188325068.02011-12-23pre-FMTCD13 pre-FMTPre-FMT53years34.41.56...CD1383.9kgwhitefemaleERR1883250.pickle4.05.00.8000000.935141
7ERR1883294112.02011-09-290Day 0 CD3Pre-FMT61years32.51.727...CD397.3kgwhitemaleERR1883294.pickle4.06.00.6666670.444919
\n", + "

8 rows Γ— 21 columns

\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "execution_count": 123 + } + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": { + "id": "8_1jesZTHYra", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51, + "referenced_widgets": [ + "d450a2f0b9de425990f12bc7a5631782", + "af7b0c394a064469a6f7e8216b197336" + ] + }, + "outputId": "b86ed405-c9b6-499c-b124-0915e21af8fa" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "Output()" + ], + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "d450a2f0b9de425990f12bc7a5631782" + } + }, + "metadata": {} + }, + { + "output_type": "display_data", + "data": { + "text/plain": [], + "text/html": [ + "
\n"
+            ]
+          },
+          "metadata": {}
+        },
+        {
+          "output_type": "display_data",
+          "data": {
+            "text/plain": [
+              "\n"
+            ],
+            "text/html": [
+              "
\n",
+              "
\n" + ] + }, + "metadata": {} + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 149 + } + ], + "source": [ + "from micom.workflows import tradeoff\n", + "import micom\n", + "\n", + "tradeoff_results = tradeoff(manifest, \"models\", medium, threads=2)\n", + "tradeoff_results.to_csv(\"tradeoff.csv\", index=False)\n", + "\n", + "plot_tradeoff(tradeoff_results, tolerance=1e-4)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y9703vhK6d6c" + }, + "source": [ + "After opeing `tradeoff_[DATE].html` you will see that, for our example here, all tradeoff values work great. This is because we modeled very few taxa, which keeps the compettion down. If you would allow for fewer abundant taxa in the models, this would change drastically. For instance, here is an example from a colorectal cancer data set:\n", + "\n", + "[![tradeoff example](https://micom-dev.github.io/micom/_images/tradeoff.png)](https://micom-dev.github.io/micom/_static/tradeoff.html)\n", + "\n", + "You can see how not using the cooperative tradeoff would give you nonsense results where only 10% of all observed taxa grew. A tradeoff value of 0.6-0.8 would probably be a good choice for this particular data set." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "vscode": { + "interpreter": { + "hash": "c991a7ed881363492957ff225bb30af9d5174cd8515a21cbef71fcaa303e4050" + } + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "37b0134de4bd4fe38c1afe7bc2bbdc59": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_bc2776a5195f4a8c8dda0e22e2fd9ea3", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:05:08\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:05:08\n
\n" + }, + "metadata": {} + } + ] + } + }, + "bc2776a5195f4a8c8dda0e22e2fd9ea3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "59428de9a88b4204885d03de441cbde3": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_901fb7e8d8c844a8b2e86735ca2bead3", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:09:32\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:09:32\n
\n" + }, + "metadata": {} + } + ] + } + }, + "901fb7e8d8c844a8b2e86735ca2bead3": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d450a2f0b9de425990f12bc7a5631782": { + "model_module": "@jupyter-widgets/output", + "model_name": "OutputModel", + "model_module_version": "1.0.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/output", + "_model_module_version": "1.0.0", + "_model_name": "OutputModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/output", + "_view_module_version": "1.0.0", + "_view_name": "OutputView", + "layout": "IPY_MODEL_af7b0c394a064469a6f7e8216b197336", + "msg_id": "", + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "Running \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[35m100%\u001b[0m \u001b[33m0:32:42\u001b[0m\n", + "text/html": "
Running ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% 0:32:42\n
\n" + }, + "metadata": {} + } + ] + } + }, + "af7b0c394a064469a6f7e8216b197336": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file