Updates for v0.8.0 release

pzivich · Jul 17, 2019 · b931022 · b931022
1 parent 21995e4
commit b931022
Show file tree

Hide file tree

Showing 14 changed files with 1,468 additions and 830 deletions.
diff --git a/3_Epidemiology_Analysis/b_missing_data/4_IPCW.ipynb b/3_Epidemiology_Analysis/b_missing_data/4_IPCW.ipynb
@@ -196,7 +196,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -296,7 +296,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -339,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {

diff --git a/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/1_g-formula.ipynb b/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/1_g-formula.ipynb
diff --git a/...demiology_Analysis/c_causal_inference/1_time-fixed-treatments/2_gformula_stochastic.ipynb b/...demiology_Analysis/c_causal_inference/1_time-fixed-treatments/2_gformula_stochastic.ipynb
@@ -76,8 +76,8 @@
       "Model Family:                Binomial   Df Model:                            9\n",
       "Link Function:                  logit   Scale:                          1.0000\n",
       "Method:                          IRLS   Log-Likelihood:                -195.12\n",
-      "Date:                Wed, 24 Apr 2019   Deviance:                       390.24\n",
-      "Time:                        13:21:17   Pearson chi2:                     484.\n",
+      "Date:                Wed, 17 Jul 2019   Deviance:                       390.24\n",
+      "Time:                        12:30:49   Pearson chi2:                     484.\n",
       "No. Iterations:                     5   Covariance Type:             nonrobust\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [0.025      0.975]\n",

diff --git a/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/3_IPTW_intro.ipynb b/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/3_IPTW_intro.ipynb
diff --git a/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/4_IPTW_SMR.ipynb b/3_Epidemiology_Analysis/c_causal_inference/1_time-fixed-treatments/4_IPTW_SMR.ipynb
@@ -83,7 +83,7 @@
    "source": [
     "from zepid.causal.ipw import IPTW\n",
     "\n",
-    "iptw = IPTW(df, treatment='art', stabilized=False, standardize='exposed')"
+    "iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='exposed')"
    ]
   },
   {
@@ -97,47 +97,54 @@
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
-    "iptw.fit()\n",
-    "df['uw'] = iptw.Weight"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
-      "  DomainWarning)\n"
+      "c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
+      "  \"function\", UserWarning)\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "RD =  -0.091\n",
-      "95% CL: -0.18 -0.002\n"
+      "======================================================================\n",
+      "              Inverse Probability of Treatment Weights                \n",
+      "======================================================================\n",
+      "Treatment:        art             No. Observations:     547                 \n",
+      "Outcome:          dead            No. Missing Outcome:  30                  \n",
+      "g-Model:          Logistic        Missing Model:        None                \n",
+      "======================================================================\n",
+      "Risk Difference\n",
+      "----------------------------------------------------------------------\n",
+      "              RD  SE(RD)  95%LCL  95%UCL\n",
+      "labels                                  \n",
+      "Intercept  0.221   0.025   0.172   0.269\n",
+      "art       -0.091   0.046  -0.180  -0.002\n",
+      "----------------------------------------------------------------------\n",
+      "Risk Ratio\n",
+      "              RR  SE(log(RR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.221        0.112   0.177   0.275\n",
+      "art        0.588        0.315   0.317   1.092\n",
+      "----------------------------------------------------------------------\n",
+      "Odds Ratio\n",
+      "              OR  SE(log(OR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.283        0.143   0.214   0.375\n",
+      "art        0.527        0.368   0.256   1.084\n",
+      "======================================================================\n"
      ]
     }
    ],
    "source": [
-    "import statsmodels.api as sm\n",
-    "import statsmodels.formula.api as smf\n",
-    "from statsmodels.genmod.families import family,links\n",
-    "\n",
-    "ind = sm.cov_struct.Independence()\n",
-    "f = sm.families.family.Binomial(sm.families.links.identity)\n",
-    "linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['uw']).fit()\n",
-    "\n",
-    "print('RD = ', np.round(linrisk.params[1], 3))\n",
-    "print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
-    "      np.round(linrisk.conf_int().iloc[1][1], 3))"
+    "iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
+    "                     stabilized=False, print_results=False)\n",
+    "iptw.marginal_structural_model('art')\n",
+    "iptw.fit()\n",
+    "iptw.summary()"
    ]
   },
   {
@@ -150,37 +157,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "RD =  -0.091\n",
-      "95% CL: -0.18 -0.002\n"
+      "c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
+      "  \"function\", UserWarning)\n"
      ]
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
-      "  DomainWarning)\n"
+      "======================================================================\n",
+      "              Inverse Probability of Treatment Weights                \n",
+      "======================================================================\n",
+      "Treatment:        art             No. Observations:     547                 \n",
+      "Outcome:          dead            No. Missing Outcome:  30                  \n",
+      "g-Model:          Logistic        Missing Model:        None                \n",
+      "======================================================================\n",
+      "Risk Difference\n",
+      "----------------------------------------------------------------------\n",
+      "              RD  SE(RD)  95%LCL  95%UCL\n",
+      "labels                                  \n",
+      "Intercept  0.221   0.025   0.172   0.269\n",
+      "art       -0.091   0.046  -0.180  -0.002\n",
+      "----------------------------------------------------------------------\n",
+      "Risk Ratio\n",
+      "              RR  SE(log(RR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.221        0.112   0.177   0.275\n",
+      "art        0.588        0.315   0.317   1.092\n",
+      "----------------------------------------------------------------------\n",
+      "Odds Ratio\n",
+      "              OR  SE(log(OR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.283        0.143   0.214   0.375\n",
+      "art        0.527        0.368   0.256   1.084\n",
+      "======================================================================\n"
      ]
     }
    ],
    "source": [
-    "iptw = IPTW(df, treatment='art', stabilized=True, standardize='exposed')\n",
-    "iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
+    "iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='exposed')\n",
+    "iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
+    "                     print_results=False)\n",
+    "iptw.marginal_structural_model('art')\n",
     "iptw.fit()\n",
-    "df['sw'] = iptw.Weight\n",
-    "\n",
-    "linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
-    "\n",
-    "print('RD = ', np.round(linrisk.params[1], 3))\n",
-    "print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
-    "      np.round(linrisk.conf_int().iloc[1][1], 3))"
+    "iptw.summary()"
    ]
   },
   {
@@ -190,42 +217,64 @@
     "The results, as expected, are the same between the unstabilized and stabilized weights. We can also use the same process to estimate the effect of ART on continuous treatments detailed in the IPTW tutorial. I leave that as a challenge for you\n",
     "\n",
     "## Average Treatment Effect in the Untreated\n",
-    "We can also standardize to the untreated. Instead of setting `standardize` to exposed, we instead set `standardize='unexposed'`. Let's look at an example with unstabilized weights"
+    "We can also standardize to the untreated. Below is our estimand\n",
+    "$$E[Y^{a=1}|A=0] - E[Y|A=0]$$\n",
+    "Instead of setting `standardize` to exposed, we instead set `standardize='unexposed'`. Let's look at an example with unstabilized weights"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "RD =  -0.08\n",
-      "95% CL: -0.154 -0.007\n"
+      "c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
+      "  \"function\", UserWarning)\n"
      ]
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
-      "  DomainWarning)\n"
+      "======================================================================\n",
+      "              Inverse Probability of Treatment Weights                \n",
+      "======================================================================\n",
+      "Treatment:        art             No. Observations:     547                 \n",
+      "Outcome:          dead            No. Missing Outcome:  30                  \n",
+      "g-Model:          Logistic        Missing Model:        None                \n",
+      "======================================================================\n",
+      "Risk Difference\n",
+      "----------------------------------------------------------------------\n",
+      "              RD  SE(RD)  95%LCL  95%UCL\n",
+      "labels                                  \n",
+      "Intercept  0.175   0.018   0.139   0.211\n",
+      "art       -0.080   0.038  -0.154  -0.007\n",
+      "----------------------------------------------------------------------\n",
+      "Risk Ratio\n",
+      "              RR  SE(log(RR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.175        0.104   0.143   0.214\n",
+      "art        0.543        0.361   0.267   1.101\n",
+      "----------------------------------------------------------------------\n",
+      "Odds Ratio\n",
+      "              OR  SE(log(OR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.212        0.125   0.166   0.271\n",
+      "art        0.495        0.402   0.225   1.088\n",
+      "======================================================================\n"
      ]
     }
    ],
    "source": [
-    "iptw = IPTW(df, treatment='art', stabilized=False, standardize='unexposed')\n",
-    "iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
+    "iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='unexposed')\n",
+    "iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
+    "                     stabilized=False, print_results=False)\n",
+    "iptw.marginal_structural_model('art')\n",
     "iptw.fit()\n",
-    "df['sw'] = iptw.Weight\n",
-    "\n",
-    "linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
-    "\n",
-    "print('RD = ', np.round(linrisk.params[1], 3))\n",
-    "print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
-    "      np.round(linrisk.conf_int().iloc[1][1], 3))"
+    "iptw.summary()"
    ]
   },
   {
@@ -237,37 +286,57 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "RD =  -0.08\n",
-      "95% CL: -0.154 -0.007\n"
+      "c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
+      "  \"function\", UserWarning)\n"
      ]
     },
     {
-     "name": "stderr",
+     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
-      "  DomainWarning)\n"
+      "======================================================================\n",
+      "              Inverse Probability of Treatment Weights                \n",
+      "======================================================================\n",
+      "Treatment:        art             No. Observations:     547                 \n",
+      "Outcome:          dead            No. Missing Outcome:  30                  \n",
+      "g-Model:          Logistic        Missing Model:        None                \n",
+      "======================================================================\n",
+      "Risk Difference\n",
+      "----------------------------------------------------------------------\n",
+      "              RD  SE(RD)  95%LCL  95%UCL\n",
+      "labels                                  \n",
+      "Intercept  0.175   0.018   0.139   0.211\n",
+      "art       -0.080   0.038  -0.154  -0.007\n",
+      "----------------------------------------------------------------------\n",
+      "Risk Ratio\n",
+      "              RR  SE(log(RR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.175        0.104   0.143   0.214\n",
+      "art        0.543        0.361   0.267   1.101\n",
+      "----------------------------------------------------------------------\n",
+      "Odds Ratio\n",
+      "              OR  SE(log(OR))  95%LCL  95%UCL\n",
+      "labels                                       \n",
+      "Intercept  0.212        0.125   0.166   0.271\n",
+      "art        0.495        0.402   0.225   1.088\n",
+      "======================================================================\n"
      ]
     }
    ],
    "source": [
-    "iptw = IPTW(df, treatment='art', stabilized=True, standardize='unexposed')\n",
-    "iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
+    "iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='unexposed')\n",
+    "iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
+    "                     stabilized=False, print_results=False)\n",
+    "iptw.marginal_structural_model('art')\n",
     "iptw.fit()\n",
-    "df['sw'] = iptw.Weight\n",
-    "\n",
-    "linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
-    "\n",
-    "print('RD = ', np.round(linrisk.params[1], 3))\n",
-    "print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
-    "      np.round(linrisk.conf_int().iloc[1][1], 3))"
+    "iptw.summary()"
    ]
   },
   {