Skip to content

Commit

Permalink
Updates for v0.8.0 release
Browse files Browse the repository at this point in the history
  • Loading branch information
pzivich committed Jul 17, 2019
1 parent 21995e4 commit b931022
Show file tree
Hide file tree
Showing 14 changed files with 1,468 additions and 830 deletions.
6 changes: 3 additions & 3 deletions 3_Epidemiology_Analysis/b_missing_data/4_IPCW.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -296,7 +296,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -339,7 +339,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@
"Model Family: Binomial Df Model: 9\n",
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -195.12\n",
"Date: Wed, 24 Apr 2019 Deviance: 390.24\n",
"Time: 13:21:17 Pearson chi2: 484.\n",
"Date: Wed, 17 Jul 2019 Deviance: 390.24\n",
"Time: 12:30:49 Pearson chi2: 484.\n",
"No. Iterations: 5 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"source": [
"from zepid.causal.ipw import IPTW\n",
"\n",
"iptw = IPTW(df, treatment='art', stabilized=False, standardize='exposed')"
"iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='exposed')"
]
},
{
Expand All @@ -97,47 +97,54 @@
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
"iptw.fit()\n",
"df['uw'] = iptw.Weight"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
" DomainWarning)\n"
"c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
" \"function\", UserWarning)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"RD = -0.091\n",
"95% CL: -0.18 -0.002\n"
"======================================================================\n",
" Inverse Probability of Treatment Weights \n",
"======================================================================\n",
"Treatment: art No. Observations: 547 \n",
"Outcome: dead No. Missing Outcome: 30 \n",
"g-Model: Logistic Missing Model: None \n",
"======================================================================\n",
"Risk Difference\n",
"----------------------------------------------------------------------\n",
" RD SE(RD) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.221 0.025 0.172 0.269\n",
"art -0.091 0.046 -0.180 -0.002\n",
"----------------------------------------------------------------------\n",
"Risk Ratio\n",
" RR SE(log(RR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.221 0.112 0.177 0.275\n",
"art 0.588 0.315 0.317 1.092\n",
"----------------------------------------------------------------------\n",
"Odds Ratio\n",
" OR SE(log(OR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.283 0.143 0.214 0.375\n",
"art 0.527 0.368 0.256 1.084\n",
"======================================================================\n"
]
}
],
"source": [
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"from statsmodels.genmod.families import family,links\n",
"\n",
"ind = sm.cov_struct.Independence()\n",
"f = sm.families.family.Binomial(sm.families.links.identity)\n",
"linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['uw']).fit()\n",
"\n",
"print('RD = ', np.round(linrisk.params[1], 3))\n",
"print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
" np.round(linrisk.conf_int().iloc[1][1], 3))"
"iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
" stabilized=False, print_results=False)\n",
"iptw.marginal_structural_model('art')\n",
"iptw.fit()\n",
"iptw.summary()"
]
},
{
Expand All @@ -150,37 +157,57 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"RD = -0.091\n",
"95% CL: -0.18 -0.002\n"
"c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
" \"function\", UserWarning)\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
" DomainWarning)\n"
"======================================================================\n",
" Inverse Probability of Treatment Weights \n",
"======================================================================\n",
"Treatment: art No. Observations: 547 \n",
"Outcome: dead No. Missing Outcome: 30 \n",
"g-Model: Logistic Missing Model: None \n",
"======================================================================\n",
"Risk Difference\n",
"----------------------------------------------------------------------\n",
" RD SE(RD) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.221 0.025 0.172 0.269\n",
"art -0.091 0.046 -0.180 -0.002\n",
"----------------------------------------------------------------------\n",
"Risk Ratio\n",
" RR SE(log(RR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.221 0.112 0.177 0.275\n",
"art 0.588 0.315 0.317 1.092\n",
"----------------------------------------------------------------------\n",
"Odds Ratio\n",
" OR SE(log(OR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.283 0.143 0.214 0.375\n",
"art 0.527 0.368 0.256 1.084\n",
"======================================================================\n"
]
}
],
"source": [
"iptw = IPTW(df, treatment='art', stabilized=True, standardize='exposed')\n",
"iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
"iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='exposed')\n",
"iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
" print_results=False)\n",
"iptw.marginal_structural_model('art')\n",
"iptw.fit()\n",
"df['sw'] = iptw.Weight\n",
"\n",
"linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
"\n",
"print('RD = ', np.round(linrisk.params[1], 3))\n",
"print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
" np.round(linrisk.conf_int().iloc[1][1], 3))"
"iptw.summary()"
]
},
{
Expand All @@ -190,42 +217,64 @@
"The results, as expected, are the same between the unstabilized and stabilized weights. We can also use the same process to estimate the effect of ART on continuous treatments detailed in the IPTW tutorial. I leave that as a challenge for you\n",
"\n",
"## Average Treatment Effect in the Untreated\n",
"We can also standardize to the untreated. Instead of setting `standardize` to exposed, we instead set `standardize='unexposed'`. Let's look at an example with unstabilized weights"
"We can also standardize to the untreated. Below is our estimand\n",
"$$E[Y^{a=1}|A=0] - E[Y|A=0]$$\n",
"Instead of setting `standardize` to exposed, we instead set `standardize='unexposed'`. Let's look at an example with unstabilized weights"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"RD = -0.08\n",
"95% CL: -0.154 -0.007\n"
"c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
" \"function\", UserWarning)\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
" DomainWarning)\n"
"======================================================================\n",
" Inverse Probability of Treatment Weights \n",
"======================================================================\n",
"Treatment: art No. Observations: 547 \n",
"Outcome: dead No. Missing Outcome: 30 \n",
"g-Model: Logistic Missing Model: None \n",
"======================================================================\n",
"Risk Difference\n",
"----------------------------------------------------------------------\n",
" RD SE(RD) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.175 0.018 0.139 0.211\n",
"art -0.080 0.038 -0.154 -0.007\n",
"----------------------------------------------------------------------\n",
"Risk Ratio\n",
" RR SE(log(RR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.175 0.104 0.143 0.214\n",
"art 0.543 0.361 0.267 1.101\n",
"----------------------------------------------------------------------\n",
"Odds Ratio\n",
" OR SE(log(OR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.212 0.125 0.166 0.271\n",
"art 0.495 0.402 0.225 1.088\n",
"======================================================================\n"
]
}
],
"source": [
"iptw = IPTW(df, treatment='art', stabilized=False, standardize='unexposed')\n",
"iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
"iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='unexposed')\n",
"iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
" stabilized=False, print_results=False)\n",
"iptw.marginal_structural_model('art')\n",
"iptw.fit()\n",
"df['sw'] = iptw.Weight\n",
"\n",
"linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
"\n",
"print('RD = ', np.round(linrisk.params[1], 3))\n",
"print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
" np.round(linrisk.conf_int().iloc[1][1], 3))"
"iptw.summary()"
]
},
{
Expand All @@ -237,37 +286,57 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"name": "stderr",
"output_type": "stream",
"text": [
"RD = -0.08\n",
"95% CL: -0.154 -0.007\n"
"c:\\users\\zivic\\python programs\\development\\zepid\\zepid\\causal\\ipw\\IPTW.py:353: UserWarning: All missing outcome data is assumed to be missing completely at random. To relax this assumption to outcome data is missing at random please use the `missing_model()` function\n",
" \"function\", UserWarning)\n"
]
},
{
"name": "stderr",
"name": "stdout",
"output_type": "stream",
"text": [
"c:\\users\\zivic\\appdata\\local\\programs\\python\\python36\\lib\\site-packages\\statsmodels\\genmod\\generalized_estimating_equations.py:472: DomainWarning: The identity link function does not respect the domain of the Binomial family.\n",
" DomainWarning)\n"
"======================================================================\n",
" Inverse Probability of Treatment Weights \n",
"======================================================================\n",
"Treatment: art No. Observations: 547 \n",
"Outcome: dead No. Missing Outcome: 30 \n",
"g-Model: Logistic Missing Model: None \n",
"======================================================================\n",
"Risk Difference\n",
"----------------------------------------------------------------------\n",
" RD SE(RD) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.175 0.018 0.139 0.211\n",
"art -0.080 0.038 -0.154 -0.007\n",
"----------------------------------------------------------------------\n",
"Risk Ratio\n",
" RR SE(log(RR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.175 0.104 0.143 0.214\n",
"art 0.543 0.361 0.267 1.101\n",
"----------------------------------------------------------------------\n",
"Odds Ratio\n",
" OR SE(log(OR)) 95%LCL 95%UCL\n",
"labels \n",
"Intercept 0.212 0.125 0.166 0.271\n",
"art 0.495 0.402 0.225 1.088\n",
"======================================================================\n"
]
}
],
"source": [
"iptw = IPTW(df, treatment='art', stabilized=True, standardize='unexposed')\n",
"iptw.regression_models('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)\n",
"iptw = IPTW(df.drop(columns='cd4_wk45'), treatment='art', outcome='dead', standardize='unexposed')\n",
"iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', \n",
" stabilized=False, print_results=False)\n",
"iptw.marginal_structural_model('art')\n",
"iptw.fit()\n",
"df['sw'] = iptw.Weight\n",
"\n",
"linrisk = smf.gee('dead ~ art', df['id'], df, cov_struct=ind, family=f, weights=df['sw']).fit()\n",
"\n",
"print('RD = ', np.round(linrisk.params[1], 3))\n",
"print('95% CL:', np.round(linrisk.conf_int().iloc[1][0], 3), \n",
" np.round(linrisk.conf_int().iloc[1][1], 3))"
"iptw.summary()"
]
},
{
Expand Down
Loading

0 comments on commit b931022

Please sign in to comment.