Skip to content

Commit

Permalink
Merge pull request #73 from poldrack/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
poldrack authored Jan 25, 2020
2 parents f1fe7e7 + 8dd6f56 commit ba45508
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 29 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
command: |
cat /root/project/ImageAnalyses/*.Rmd | grep library >| /root/project/ImageAnalyses/R_libraries.R
export NARPS_BASEDIR="/tmp/data"; Rscript -e 'library(knitr);library(rmarkdown);rmarkdown::render("/root/project/ImageAnalyses/DecisionAnalysis.Rmd","html_document", output_dir = "/tmp/data/figures")'
no_output_timeout: 10000
no_output_timeout: 90000
- run:
name: run PM analyses
command: |
Expand All @@ -62,6 +62,6 @@ jobs:
- run:
name: create results tarball
command: |
cd /tmp/data; tar zcvf results.tgz ./cached ./figures ./logs ./metadata ./image_diagnostics* ./output/unthresh_concat_zstat/hypo*_voxelmap.nii.gz ./output/overlap_binarized_thresh/hypo*.nii.gz ./output/consensus_analysis/hypo*_1-fdr.nii.gz ./output/ALE/hypo*_fdr_oneminusp.nii.gz ./output/cluster_maps ./output/correlation_unthresh ./PredictionMarkets
cd /tmp/data; tar zcvf results.tgz ./output/npboot_output.RData ./cached ./figures ./logs ./metadata ./image_diagnostics* ./output/unthresh_concat_zstat/hypo*_voxelmap.nii.gz ./output/overlap_binarized_thresh/hypo*.nii.gz ./output/consensus_analysis/hypo*_1-fdr.nii.gz ./output/ALE/hypo*_fdr_oneminusp.nii.gz ./output/cluster_maps ./output/correlation_unthresh ./PredictionMarkets
- store_artifacts:
path: /tmp/data/results.tgz
204 changes: 181 additions & 23 deletions ImageAnalyses/DecisionAnalysis.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ library(xtable)
library(psych)
library(knitr)
library(sessioninfo)
library(boot)
basedir = Sys.getenv('NARPS_BASEDIR')
Expand Down Expand Up @@ -218,39 +218,175 @@ r.squaredGLMM(m_hyp_full)
```

Run bootstrap to get confidence intervals
### Nonparametric bootstrap
The bootMer package performs resampling for lmer models, but it uses a parametric bootstrap which we did not feel adequately addressed concerns about non-normality. Thus, we implemented a nonparametric bootstrap, in which we resample over teams.

First we generate wide data, for resampling.

```{r}
decisions_wide <- hyp_df %>%
mutate(Hypothesis = str_c("Hyp", varnum, sep="")) %>%
dplyr::select(teamID, Decision, Hypothesis) %>%
spread(Hypothesis,Decision)
team_info <- hyp_df %>%
filter(varnum==1) %>%
dplyr::select(fwhm, used_fmriprep_data,package,
testing, movement_modeling,teamID)
decisions_wide <- join(decisions_wide,
team_info,
by='teamID')
```

mySumm <- function(.) {
c(beta=fixef(.))
Function to run analysis within bootstrap. For the regressors that are parametric or binary, we can simply compute a bootstrap confidence interval on their parameter estimate. For the two variables that are factorial, we can't do this because some bootstrap samples will not contain all of the same values for the factors, and thus the models are not equivalent. Instead, for those variables we perform a model comparison and save the model comparison statistics so that we can compute a bootstrap estimate on those (ala https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5482523/).

```{r message=FALSE,warning=FALSE}
bs_fun <- function(df, indices){
df <- df[indices,]
control = glmerControl(
optimizer ='optimx',
optCtrl=list(method='nlminb'))
# we take in a wide data frame, but must
# convert it back to long form for lmer
df_long <- df %>% gather(key='Hypothesis',
value='Decision',
-movement_modeling,-teamID,-fwhm,
-used_fmriprep_data,-package,-testing)
model_full <- glmer(Decision ~ fwhm +
used_fmriprep_data +
movement_modeling +
Hypothesis +
package +
testing + (1|teamID),
data = df_long,family=binomial,
control = control)
# get coefs for all but factorial effects
fixed_effect_coefs = fixef(model_full)[2:4]
# do model comparison for Hypothesis effect
model_nohyp <- glmer(Decision ~ fwhm +
used_fmriprep_data +
movement_modeling +
package +
testing + (1|teamID),
data = df_long,family=binomial,
control = control)
# perform model comparison and save statistics
anova_nohyp <- anova(model_nohyp,model_full)
fixed_effect_coefs['hyp_BICdiff'] =
diff(anova_nohyp$BIC)
fixed_effect_coefs['hyp_AICdiff'] =
diff(anova_nohyp$AIC)
# do model comparison for package effect
model_nopackage <- glmer(Decision ~ Hypothesis +
fwhm +
used_fmriprep_data +
movement_modeling +
testing + (1|teamID),
data = df_long,family=binomial,
control = control)
# perform model comparison and save statistics
anova_nopackage <- anova(model_nopackage,model_full)
fixed_effect_coefs['package_BICdiff'] =
diff(anova_nopackage$BIC)
fixed_effect_coefs['package_AICdiff'] =
diff(anova_nopackage$AIC)
# do model comparison for testing effect
model_notesting <- glmer(Decision ~ Hypothesis +
fwhm +
used_fmriprep_data +
movement_modeling +
package + (1|teamID),
data = df_long,family=binomial,
control = control)
# perform model comparison and save statistics
anova_notesting <- anova(model_notesting,model_full)
fixed_effect_coefs['testing_BICdiff'] =
diff(anova_notesting$BIC)
fixed_effect_coefs['testing_AICdiff'] =
diff(anova_notesting$AIC)
fixed_effect_coefs
}
boo01 <- bootMer(m_hyp_full, mySumm, nsim = 1000, seed=12345678)
```


```{r}
# perform bootstrap sampling with 1000 replications
Out <- boot(data=decisions_wide, statistic=bs_fun, R=1000)
# save bootstrap samples
save(Out,file=paste(basedir,
'output/npboot_output.RData',
sep='/'))
```

Get confidence intervals, first for betas for parametric regressors.

```{r}
model.coefs <- coef(summary_full_model)
percentile_bs_ci <- data.frame(lower=array(NA, 17),
mean=array(NA, 17),
upper=array(NA,17),
row.names=row.names(model.coefs))
boot_means = apply(boo01$t,2,mean)
for (i in 1:17){
ci_raw <- boot::boot.ci(boot.out = boo01, type = c("perc"), index = i)
percentile_bs_ci[i,] <- c(ci_raw$percent[4],boot_means[i],ci_raw$percent[5])
# set up data frame to save results
rownames = c('fwhm',
'used_fmriprep_data',
'movement_modeling',
'Hypothesis (deltaBIC)',
'Hypothesis (deltaAIC)',
'package (deltaBIC)',
'package (deltaAIC)',
'testing (deltaBIC)',
'testing (deltaAIC)')
percentile_np_ci <- data.frame(lower=array(NA,
length(rownames)),
mean=array(NA,
length(rownames)),
upper=array(NA,
length(rownames)),
exceedence=array(NA,
length(rownames)),
row.names=rownames)
# get means for parameters across amples
npboot_means = apply(Out$t,2,mean)
# loop over parametric regressors and get percentile CI
for (i in 1:3){
ci_raw <- boot::boot.ci(boot.out = Out, type = c("perc"), index = i)
percentile_np_ci[i,] <- c(ci_raw$percent[4],npboot_means[i],ci_raw$percent[5],NA)
}
```

Get CI for model comparisons for factorial regressors (package and testing).

```{r}
# get ci for BIC diff
for (i in 4:9){
ci_raw <- boot::boot.ci(boot.out = Out, type = c("perc"), index = i)
# use lower p because we want to know how often full model
# is better than reduced model
percentile_np_ci[i,] <- c(ci_raw$percent[4],npboot_means[i],ci_raw$percent[5],mean(Out$t[,i]<0))
}
kable(percentile_bs_ci)
write.table(percentile_bs_ci,
file=paste(basedir,"figures/full_model_bootstrap_CI.tsv",sep='/'),
kable(percentile_np_ci)
write.table(percentile_np_ci,
file=paste(basedir,"output/nonparametric_bootstrap_CI.tsv",sep='/'),
sep='\t')
```


Get odds ratios for factorial variables.

```{r}
Expand Down Expand Up @@ -406,12 +542,36 @@ summary_df['Chi-squared'] = c(anova_hyp$Chisq[2],
anova_package$Chisq[2],
anova_testing$Chisq[2],
anova_movement$Chisq[2])
summary_df['P value'] = c(anova_hyp$"Pr(>Chisq)"[2],
summary_df['P value (parametric)'] = c(anova_hyp$"Pr(>Chisq)"[2],
anova_smoothing$"Pr(>Chisq)"[2],
anova_fmriprep$"Pr(>Chisq)"[2],
anova_package$"Pr(>Chisq)"[2],
anova_package$"Pr(>Chisq)"[2],
anova_testing$"Pr(>Chisq)"[2],
anova_movement$"Pr(>Chisq)"[2])
summary_df['Bootstrap CI'] = c(NA,
sprintf('(%0.2f - %0.2f)',
percentile_np_ci['fwhm','lower'],
percentile_np_ci['fwhm','upper']),
sprintf('(%0.2f - %0.2f)',
percentile_np_ci['used_fmriprep_data','lower'],
percentile_np_ci['used_fmriprep_data','upper']),
NA,
NA,
sprintf('(%0.2f - %0.2f)',
percentile_np_ci['movement_modeling','lower'],
percentile_np_ci['movement_modeling','upper']))
summary_df['Model selection probability (BIC)'] = c(percentile_np_ci['Hypothesis (deltaBIC)','exceedence'],
NA,
NA,
percentile_np_ci['package (deltaBIC)','exceedence'],
percentile_np_ci['testing (deltaBIC)','exceedence'],
NA)
summary_df['Model selection probability (AIC)'] = c(percentile_np_ci['Hypothesis (deltaAIC)','exceedence'],
NA,
NA,
percentile_np_ci['package (deltaAIC)','exceedence'],
percentile_np_ci['testing (deltaAIC)','exceedence'],
NA)
summary_df['Delta R^2'] = c(delta_r2_hyp[2,1],
delta_r2_smoothing[2,1],
delta_r2_fmriprep[2,1],
Expand All @@ -421,8 +581,6 @@ summary_df['Delta R^2'] = c(delta_r2_hyp[2,1],
summary_df <- summary_df %>%
mutate(`Chi-squared` = formatC(summary_df$'Chi-squared',
digits=2,format='f'),
`P value` = formatC(summary_df$'P value',
digits=3,format='f'),
`Delta R^2` = formatC(summary_df$'Delta R^2',
digits=2,format='f')
)
Expand Down
21 changes: 19 additions & 2 deletions ImageAnalyses/MakeSupplementaryFigure1.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,17 +114,34 @@ def mk_supp_figure1(narps, metadata):
narps.dirs.dirs['figures'],
'SuppFigure1.png'))

# save data to file
metadata_merged.to_csv(
os.path.join(
narps.dirs.dirs['figures'],
'MethodsTableMetadataMerged.csv')
)
decision_wide.to_csv(
os.path.join(
narps.dirs.dirs['figures'],
'DecisionDataWide.csv')
)
confidence_wide.to_csv(
os.path.join(
narps.dirs.dirs['figures'],
'ConfidenceDataWide.csv')
)


if __name__ == "__main__":
# set an environment variable called NARPS_BASEDIR
# with location of base directory
if 'NARPS_BASEDIR' in os.environ:
basedir = os.environ['NARPS_BASEDIR']
else:
basedir = '/data'
basedir = '/tmp/data'

# setup main class
narps = Narps(basedir)
narps = Narps(basedir, dataurl=os.environ['DATA_URL'])
narps.load_data()

logfile = os.path.join(
Expand Down
4 changes: 2 additions & 2 deletions PredictionMarketAnalyses/PM_Analyses.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ supptable_df <- supptable_df %>%
TeamsOutside = recode(supptable_df$t_exceed, `0` = '', `1` = '*')) %>%
unite('CI', CI_lower:CI_upper, sep='-') %>%
mutate(CI = sapply(CI,bracket)) %>%
unite('Non-teams FV', NonTeams:NonTeamsOutside, sep=' ') %>%
unite('Teams FV', Teams:TeamsOutside, sep=' ') %>%
unite('Non-teams market prediction', NonTeams:NonTeamsOutside, sep=' ') %>%
unite('Teams market prediction', Teams:TeamsOutside, sep=' ') %>%
dplyr::select(-fv,-fv_95l, -fv_95u, -price.0, -price.1, -t_exceed, -nt_exceed, -hypothesis)
supptable_df <- supptable_df %>%
rename(`Hyp #` = hid)
Expand Down

0 comments on commit ba45508

Please sign in to comment.