From e83f55bee197a213ea65ad746a5b25ed24c55ae1 Mon Sep 17 00:00:00 2001
From: Etienne Roesch
Date: Fri, 23 Jul 2021 16:08:56 +0100
Subject: [PATCH] ENH: Initial commit.
---
Appendix_-_Glossary/Glossary.ipynb | 73 +
.../Ch0.1_Beginning.ipynb | 222 +++
.../Ch0_Introduction.ipynb | 223 +++
Chapter_0_-_Introduction/README.md | 4 +
Chapter_1_-_Z-test/Ch1_Bayesian Z-test.ipynb | 986 ++++++++++
.../4.5.0/models/23wo2jct/model_23wo2jct.o | Bin 0 -> 260568 bytes
.../Ch2_One-sample_t-test.ipynb | 691 +++++++
Chapter_2_-_One-sample_t-test/test.ipynb | 65 +
LICENSE.txt | 88 +
README.md | 10 +
environment.yml | 13 +
requirements_conda.txt | 153 ++
requirements_pip.txt | 106 +
...tegorical regression (one-way ANOVA).ipynb | 785 ++++++++
...n estimation of logistic regression.ipynb | 1262 ++++++++++++
wip/Bayesian Factor analysis.ipynb | 289 +++
wip/Bayesian Multiple regression .ipynb | 902 +++++++++
wip/Bayesian Poisson estimation.ipynb | 1304 +++++++++++++
wip/Bayesian Robust Regression.ipynb | 531 +++++
...an between subject t-test estimation.ipynb | 1044 ++++++++++
...mation of 2x2 Between subjects ANOVA.ipynb | 299 +++
...ian estimation of Linear mixed model.ipynb | 924 +++++++++
...yesian estimation of Welch's t-test .ipynb | 946 +++++++++
...timation of repeated measures t-test.ipynb | 990 ++++++++++
...timation of simple linear regression.ipynb | 1228 ++++++++++++
...yesian estimation ordinal regression.ipynb | 126 ++
wip/Bayesian fisher test.ipynb | 331 ++++
...sian multiple correlation estimation.ipynb | 665 +++++++
...ian one-way ANOVA (between subjects).ipynb | 1730 +++++++++++++++++
wip/Data/.Rhistory | 512 +++++
wip/Data/Atir Rosenzweig Dunning 2015.csv | 203 ++
wip/Data/Awards.csv | 201 ++
wip/Data/Birthweight_reduced_kg.csv | 43 +
wip/Data/Cholesterol_R.csv | 19 +
wip/Data/Crime.csv | 48 +
wip/Data/Dawtry Sutton and Sibley 2015.csv | 306 +++
wip/Data/Diet.csv | 79 +
wip/Data/Harvie et al. 2015.csv | 49 +
...James et al 2015 Experiment 2 Data Set.csv | 73 +
wip/Data/Maglio and Polman 2014.csv | 203 ++
...Mehr Song and Spelke 2016 Experiment 1.csv | 97 +
wip/Data/Schroeder and Epley 2015.csv | 40 +
wip/Data/Sleepstudy_data | 181 ++
wip/Data/Turning_Hands_Data_Final.csv | 103 +
wip/Data/Tworek and Cimpian 2016 Study 1.csv | 132 ++
wip/Paths to Bayes.drawio | 13 +
wip/Patsy contrast analysis tutorial.ipynb | 412 ++++
wip/Practicing Bayesian Statistics.md | 160 ++
wip/README.md | 2 +
wip/Within subject ANOVA.ipynb | 1427 ++++++++++++++
.../4.5.0/models/ceykbmxk/model_ceykbmxk.o | Bin 0 -> 283176 bytes
wip/debug.log | 5 +
52 files changed, 20298 insertions(+)
create mode 100644 Appendix_-_Glossary/Glossary.ipynb
create mode 100644 Chapter_0_-_Introduction/Ch0.1_Beginning.ipynb
create mode 100644 Chapter_0_-_Introduction/Ch0_Introduction.ipynb
create mode 100644 Chapter_0_-_Introduction/README.md
create mode 100644 Chapter_1_-_Z-test/Ch1_Bayesian Z-test.ipynb
create mode 100644 Chapter_1_-_Z-test/build/temp.macosx-10.9-x86_64-3.9/Users/eroesch/Library/Caches/httpstan/4.5.0/models/23wo2jct/model_23wo2jct.o
create mode 100644 Chapter_2_-_One-sample_t-test/Ch2_One-sample_t-test.ipynb
create mode 100644 Chapter_2_-_One-sample_t-test/test.ipynb
create mode 100644 LICENSE.txt
create mode 100644 README.md
create mode 100644 environment.yml
create mode 100644 requirements_conda.txt
create mode 100644 requirements_pip.txt
create mode 100644 wip/Bayesian categorical regression (one-way ANOVA).ipynb
create mode 100644 wip/Bayesian estimation of logistic regression.ipynb
create mode 100644 wip/Bayesian Factor analysis.ipynb
create mode 100644 wip/Bayesian Multiple regression .ipynb
create mode 100644 wip/Bayesian Poisson estimation.ipynb
create mode 100644 wip/Bayesian Robust Regression.ipynb
create mode 100644 wip/Bayesian between subject t-test estimation.ipynb
create mode 100644 wip/Bayesian estimation of 2x2 Between subjects ANOVA.ipynb
create mode 100644 wip/Bayesian estimation of Linear mixed model.ipynb
create mode 100644 wip/Bayesian estimation of Welch's t-test .ipynb
create mode 100644 wip/Bayesian estimation of repeated measures t-test.ipynb
create mode 100644 wip/Bayesian estimation of simple linear regression.ipynb
create mode 100644 wip/Bayesian estimation ordinal regression.ipynb
create mode 100644 wip/Bayesian fisher test.ipynb
create mode 100644 wip/Bayesian multiple correlation estimation.ipynb
create mode 100644 wip/Bayesian one-way ANOVA (between subjects).ipynb
create mode 100644 wip/Data/.Rhistory
create mode 100644 wip/Data/Atir Rosenzweig Dunning 2015.csv
create mode 100644 wip/Data/Awards.csv
create mode 100644 wip/Data/Birthweight_reduced_kg.csv
create mode 100644 wip/Data/Cholesterol_R.csv
create mode 100644 wip/Data/Crime.csv
create mode 100644 wip/Data/Dawtry Sutton and Sibley 2015.csv
create mode 100644 wip/Data/Diet.csv
create mode 100644 wip/Data/Harvie et al. 2015.csv
create mode 100644 wip/Data/James et al 2015 Experiment 2 Data Set.csv
create mode 100644 wip/Data/Maglio and Polman 2014.csv
create mode 100644 wip/Data/Mehr Song and Spelke 2016 Experiment 1.csv
create mode 100644 wip/Data/Schroeder and Epley 2015.csv
create mode 100644 wip/Data/Sleepstudy_data
create mode 100644 wip/Data/Turning_Hands_Data_Final.csv
create mode 100644 wip/Data/Tworek and Cimpian 2016 Study 1.csv
create mode 100644 wip/Paths to Bayes.drawio
create mode 100644 wip/Patsy contrast analysis tutorial.ipynb
create mode 100644 wip/Practicing Bayesian Statistics.md
create mode 100644 wip/README.md
create mode 100644 wip/Within subject ANOVA.ipynb
create mode 100644 wip/build/temp.macosx-10.9-x86_64-3.9/Users/eroesch/Library/Caches/httpstan/4.5.0/models/ceykbmxk/model_ceykbmxk.o
create mode 100644 wip/debug.log
diff --git a/Appendix_-_Glossary/Glossary.ipynb b/Appendix_-_Glossary/Glossary.ipynb
new file mode 100644
index 0000000..3a30687
--- /dev/null
+++ b/Appendix_-_Glossary/Glossary.ipynb
@@ -0,0 +1,73 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ca7f37e5",
+ "metadata": {},
+ "source": [
+ "# Glossary"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a81e1836",
+ "metadata": {},
+ "source": [
+ "## M"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e1d11961",
+ "metadata": {},
+ "source": [
+ "* Model - A model is an expression about the state of the world, as we hypothesise it to be. It is always formulated in the form of a mathematical expression, even if your favourite statistical software hides it from you, and it is always based on"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Stan",
+ "language": "python",
+ "name": "stan"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.5"
+ },
+ "toc": {
+ "colors": {
+ "hover_highlight": "#DAA520",
+ "navigate_num": "#000000",
+ "navigate_text": "#333333",
+ "running_highlight": "#FF0000",
+ "selected_highlight": "#FFD700",
+ "sidebar_border": "#EEEEEE",
+ "wrapper_background": "#FFFFFF"
+ },
+ "moveMenuLeft": true,
+ "nav_menu": {
+ "height": "48px",
+ "width": "252px"
+ },
+ "navigate_menu": true,
+ "number_sections": true,
+ "sideBar": true,
+ "threshold": 4,
+ "toc_cell": false,
+ "toc_section_display": "block",
+ "toc_window_display": true,
+ "widenNotebook": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Chapter_0_-_Introduction/Ch0.1_Beginning.ipynb b/Chapter_0_-_Introduction/Ch0.1_Beginning.ipynb
new file mode 100644
index 0000000..02b2775
--- /dev/null
+++ b/Chapter_0_-_Introduction/Ch0.1_Beginning.ipynb
@@ -0,0 +1,222 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Practicing Bayesian statistics\n",
+ "\n",
+ "## Contents\n",
+ "- Introduction\n",
+ "- The two statistical philosophies\n",
+ " - Frequentist Statistics\n",
+ " - Bayesian Statistics\n",
+ "- Bayes Theorem\n",
+ " - prior\n",
+ " - Likelihood\n",
+ " - Posterior\n",
+ "- Steps of a Bayesian analysis \n",
+ "- Effect indices/ hypothesis testing\n",
+ "\n",
+ " - Maximum a posteriori estimates (posterior point estimates )\n",
+ " - Credible intervals\n",
+ "- Markov Chain Monte Carlo (MCMC)\n",
+ "\n",
+ "## Introduction\n",
+ "When Efron and Hastie (2016) identify that “Statistical inference is an unusually wide-ranging discipline, located as it is at the triple-point of mathematics, empirical science, and philosophy” (pp. xv) they make its multidisciplinary nature and complexity clear. This complexity has led to misinterpretation of the standardly taught analysis (frequentist) methods by applied by researchers (Hoekstra, Morey, Rouder, & Wagenmakers, 2014; Lyu, Xu, Zhao, Zuo, & Hu, 2020), seemingly result primarily due to the “philosophy” section of Efron and Hastie’s quote above. As demonstrated dominant use of frequentist methods, whilst seemingly wanting to interpret the results of their analyses under the Bayesian framework (Etz, Bartlema, Vanpaemel, Wagenmakers, & Morey, 2019). The question then is what is Bayesian statistics?\n",
+ "\n",
+ "## The two statistical philosophies\n",
+ "“The numbers have no way of speaking for themselves. We speak for them. We Imbue them with meaning.”\n",
+ "\n",
+ "Silver (2012).\n",
+ "\n",
+ "“Technical mathematical arguments and formula, though valid and of interest, must always assume, tacitly or explicitly, a philosophy.”\n",
+ "\n",
+ "Briggs (2019).\n",
+ "\n",
+ "“An important insight that would seem desirable for any statistical philosophy: Conclusions are only as plausible as the subjective foundations on which they are based.”\n",
+ "\n",
+ "Western (1999).\n",
+ "\n",
+ "As the quotes above point out there is no free lunch when analysing data and conducting statistical inference, but this is often forgotten or ritualised to make it seem as such (Gigerenzeer, 2004).\n",
+ "\n",
+ " Crucially, statistical inference is only possible at all because of the application of probability theory and its interpretation when the data are analysed. However, heated debates about what interpretation of probability to apply have occurred since the inception of statistics and have resulted in what has been termed “The statistics Wars” within the statistical literature (Gigerenzer et al., 1983; Mayo, 2018; Salsburg, 2002).\n",
+ "\n",
+ "Despite these worthwhile discussions, it is more important to remember it is the act of applying interpreting probability, that makes statistical inference possible at all; and crucially understand what separates the Frequentist and Bayesian frameworks is their differing interpretations of probability.\n",
+ "\n",
+ " The two interpretations of probability are objective and subjective probability. Lambert (2018), describes it as a difference in world view by the analyst. As a result, by applying either framework, the analyst is making assumptions about how to model the world through these different views of probability.\n",
+ "\n",
+ "Frequentist Statistics (objective probability).\n",
+ "\n",
+ "The philosophical grounding of the Frequentist statistics framework is objective probability. The meaning of objective probability in terms of data is expressed by the view that any set of observed data is a sample that has been generated from an infinite replication of the experiment from the data generating process. Therefore, any inference that is justifiable within this framework by analyst is that the observed sample is one with a long-run view of repeated sampling; meaning the data is treated as random but the statistical models that are fit to data are an attempt to estimate fixed population parameters.\n",
+ "through the attempts to estimate sampling error and this variation is expressed in varying estimates between experiments or observations and stipulates the necessity for replication and long run error control to understand the phenomena under from a statistical analysis view.\n",
+ "\n",
+ "Frequentist statistics dominates research with the use of by Null hypothesis significance testing (NHST). NHST tools for inference are based on repeated on tis sampling paradigm such as the p-value which is the $P(D|H_0)$ and confidence intervals (CI) which are not probability statements but an interval estimate of the parameter estimates generated from the data. As such a CI is again based on repeated sampling. As CI is a such that 95% of the confidence calculated will contain the true population parameter within their estimated intervals.\n",
+ "\n",
+ "Bayesian statistics (subjective probability).\n",
+ "\n",
+ "Bayesian statistics applies probability as an expression of the analyst’s belief and as an expression of their uncertainty around what they are analysing. This is expressed in the reversal of what the Frequentists do by treating the parameters of the statistical models that they fit as random and the data as fixed. In terms of parameters then a Bayesian does not have to deny the existence of a population parameter but can accept the uncertainty generated by each experiment/acquisition of a sample, that the estimates will vary due sample variation. However, a Bayesian analysis can also specify that differences in parameters is due to our uncertainty based on our knowledge.\n",
+ "\n",
+ " Under this framework, the use of Bayes theorem and data can be used to update beliefs about phenomena being studied through data analysis. Bayesian statistics allows for inference statements that $P(H_0|D)$, which is of course the opposite of that above from frequentist methods. This type of inference is achieved by the application of Bayes rule.\n",
+ "\n",
+ "## Bayes Rule\n",
+ "$$P(A,B) = P(B,A)\\: \\:\\: \\: \\: \\:(1)$$\n",
+ " $$P(B|A)P(A) = P(A|B)P(B)\\: \\:\\: \\: \\: \\:(2) $$\n",
+ " $$P(A|B) = \\frac{P(B|A)P(A)}{P(B)}\\: \\:\\: \\: (3)$$\n",
+ "\n",
+ "Hopefully by the outlining Bayes rule above it becomes explicit why we would want to use the statistical practices on which it is based. In case it is not explicit, it is because the posterior allow sus to answer a question in which we are interested in that what is the probability of our hypothesis we are testing base on assumptions of the statistical models of which we use to test the data.\n",
+ "\n",
+ "## Steps of a Bayesian analysis\n",
+ "Kruschke (2015) outlines general Bayesian analysis steps which include:\n",
+ "\n",
+ "1. Identify data relevant to the research question including the variables of interest.\n",
+ "2. Identify a descriptive model for the data that you are going to analyse. Meaning the mathematical model and the parameter's should be appropriate to the data.\n",
+ "3. Specify priors for the parameter of the model, priors should be reasonable and will have to pass an audience of reviewers.\n",
+ "4. Use Bayes theorem and calculate the posterior.\n",
+ "5. Conduct posterior predictive checks.\n",
+ "\n",
+ "## Testing indices/hypothesis testing in the Bayesian framework\n",
+ "\n",
+ " Due to the standard training of researchers to use NHST methods in analysing their data and the ingrained practice of using p-values, which has created an illusionary sense of the ease to understand data analysis and statistical inference with decision-making rules such as standard use of thresholds (i.e. ≤ .05) and concluding a result or body of work is meaningful or significant in the standard sense of the word when it is only significant in the statistical sense of the word. With this culminating and resulting in publication.\n",
+ "\n",
+ " A result of the standard training in NHST is likely to lead to the first question any researcher curious about Bayesian methods to probably ask, what is its p-value equivalent? A more general and helpful question though would be what are the outputs that are used for making inferences from the data within this framework? Answering that question is likely to result in trepidation by interested parties in applying Bayesian tools in analyzing data, due to the flexibility of the framework and the variety of the analysts’ options. This can either be seen as complicating or liberating.\n",
+ "\n",
+ " To simplify Bayesian methods of inference is to separate out Bayes factor analysis and Bayesian estimation. There is plenty of discussion in the literature of the advantages and disadvantages of each method (Dienes, 2020; Makowski, Ben-Shachar, Chen, & Lüdecke, 2019), with no general consensus.\n",
+ " The notebooks here focus on Bayesian estimation. For clarity, this is not to suggest a preference because when it comes to statistical tools more options are an advantage but to go over those methods is a project on its own.\n",
+ " Bayesian estimation test indices\n",
+ " Point estimates (Mean, median)/Maximum a posteriori estimation (MAP - the mode)\n",
+ " Posterior mean - minimises the expected square error\n",
+ " Posterior median - minimises expected absolute error\n",
+ " MAP - most probable value on the posterior distribution.\n",
+ "\n",
+ " Expression of the uncertainty in parameter estimates.\n",
+ " Credible intervals Krushcke (2015) (plausibility intervals, McElreath, 2020; uncertainty intervals, Gelman et al., 2013).\n",
+ "\n",
+ " 95% credible interval - Krushcke (2015) 10000 effective samples for stable estimates\n",
+ "\n",
+ " 50% credible interval - (Gelman, 2016) suggest using the 50% credibility interval, which gives the quantile interval between 25% and 75% of the posterior distribution.\n",
+ " Gelman argues usin this interval: Increases the computational stability, gives the credibility interpretation that true value contained in this 50% interval and finally this interval helps avoid certainty.\n",
+ "\n",
+ " ROPE (95%)\n",
+ "\n",
+ " ROPE (Full)\n",
+ "\n",
+ " Hopefully, the different recommendations of the interval size to use when summarising the posterior makes the reader think this is arbitrary like a p-value threshold of .05 because it is and it is down to the analysis to decide and make the argument for their choice. If that choice is due to expert suggestions or for general research practices.\n",
+ "\n",
+ "## Markov chain Monte Carlo (MCMC)\n",
+ "In laymen terms MCMC are mathematical tools/algorithms for sampling form posterior distributions. MCMC underlies all modern applications of Bayesian statistics. this is because many of the statically models to answer complex question in which statisticians and researchers are concerned do not have analytical solutions for calculating the posterior. To overcome this numerical methods such as MCMC must be applied.\n",
+ "\n",
+ "## Why Stan?\n",
+ "\n",
+ "\n",
+ "### References\n",
+ "\n",
+ "Betancourt, M. (2017). A conceptual introduction to Hamiltonian Monte Carlo. arXiv preprint\t \tarXiv:1701.02434.\n",
+ "\n",
+ "Briggs, W. M., 2019. Everything Wrong with P-Values Under One Roof. In Beyond Traditional Probabilistic Methods in Economics, Kreinovich, V., Thach, N. N., Trung, N. D., Thanh, D. V. (eds.), pp 22–44. DOI 978-3-030-04200-4_2\n",
+ "\n",
+ "Briggs, W. M. (2012). It is time to stop teaching frequentism to non-statisticians. arXiv preprint\t\t arXiv:1201.2590.\n",
+ "\n",
+ "Bürkner, P. C. (2017). brms: An R package for Bayesian multilevel models using Stan. Journal\t\t of statistical software, 80(1), 1-28.\n",
+ "\n",
+ "Carpenter, B., Gelman, A., Hoffman, M. D., Lee, D., Goodrich, B., Betancourt, M., ... & Riddell,\t\t A. (2017). Stan: A probabilistic programming language. Journal of statistical software,\t\t 76(1).\n",
+ "\n",
+ "Dienes, Z. (2020). How to use and report Bayesian hypothesis tests.\n",
+ "\n",
+ "Efron, B., & Hastie. (2016). Computer age Statistical Inference: Algorithms, evidence, and data science. New York, NY: Cambridge University Press.\n",
+ "\n",
+ "Etz, A., Bartlema, A., Vanpaemel, W., Wagenmakers, E. J., & Morey, R. D. (2019). An exploratory survey of student and researcher intuitions about statistical evidence. In\t\t Poster presented at the annual meeting of the Association for Psychological Science, Washington, DC.\n",
+ "\n",
+ "Gabry, J., & Goodrich, B. (2016). rstanarm: Bayesian applied regression modeling via stan\t[computer software manual]. Retrieved from http://CRAN.R‐project.org/\t\t\t\t package=rstanarm (R package version 2.9.0‐1)\n",
+ "\n",
+ "Gelman, A. (2016, November 5) Why I prefer 50% rather than 95% intervals [blog post]. Retrieved from https://statmodeling.stat.columbia.edu/2016/11/05/why-i-prefer-50-to-95-intervals/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+MyDataScienceBlogs+%28My+Data+Science+Blogs%29\n",
+ "\n",
+ "Gigerenzer, G., Swijtink, Z., Porter, T., Daston, L., Beatty, J., Kruger, L. (1989). The empire \t \tof chance: How probability changed science and everyday life. New York, NY:\tCambridge University Press.\n",
+ "\n",
+ "Hoekstra, R., Morey, R. D., Rouder, J. N., & Wagenmakers, E. J. (2014). Robust misinterpretation of confidence intervals. Psychonomic bulletin & review, 21(5), 1157-1164.\n",
+ "\n",
+ "Krushcke, J. K. (2015). Doing Bayesian analysis: A tutorial with R, JAGS and Stan. London, England: Academic Press.\n",
+ "\n",
+ "Lambert, B. (2018). A student guide to Bayesian statistics. London, England: SAGE.\n",
+ "\n",
+ "Lynch, S. M., & Bartlett, B. (2019). Bayesian Statistics in Sociology: Past, Present, and Future.Annual Review of Sociology, 45, 47-68.\n",
+ "\n",
+ "Lyu, X. K., Xu, Y., Zhao, X. F., Zuo, X. N., & Hu, C. P. (2020). Beyond psychology: prevalence of\tp values and confidence interval misinterpretation across different fields. Journal of Pacific Rim Psychology, 14.\n",
+ "\n",
+ "Makowski, D., Ben-Shachar, M. S., Chen, S. H., & Lüdecke, D. (2019). Indices of effect existence and\t\t significance in the bayesian framework. Frontiers in psychology, 10, 2767.\n",
+ "\n",
+ "Mayo, D.G. (2018). Statistical inference as severe testing: How to get beyond the statistics\t\t wars. New York: NY. Cambridge University Press.\n",
+ "\n",
+ "McElreath, R. (2020). Statistical rethinking: A Bayesian course with examples in R and\t \tStan. London, England: CRC Press.\n",
+ "\n",
+ "Morey, R. D., Hoekstra, R., Rouder, J. N., Lee, M. D., & Wagenmakers, E. J. (2016). The fallacy of placing confidence in confidence intervals. Psychonomic bulletin & review, 23(1), 103-123.\n",
+ "\n",
+ "Morey, R. D., Rouder, J. N., Jamil, T., & Morey, M. R. D. (2015). Package ‘bayesfactor’. URLh\t\t http://cran/r-projectorg/web/packages/BayesFactor/BayesFactor pdf i (accessed 1006\t\t 15).\n",
+ "\n",
+ "Muth, C., Oravecz, Z., & Gabry, J. (2018). User-friendly Bayesian regression modeling: A tutorial with rstanarm and shinystan. Quantitative Methods for Psychology, 14(2),\t\t 99-119.\n",
+ "\n",
+ "Salzburg, D. (2002). The lady Tasting tea: How statistics revolutionised science in the twentieth\t\t century. New York, NY. First Holts.\n",
+ "\n",
+ "Stan Development Team. (2017). Stan modeling language users guide and reference manual,\tversion 2.17.0. Retrieved from http://mc-stan.org/\n",
+ "\n",
+ "Vasishth, S., & Nicenboim, B. (2016). Statistical methods for linguistic research: Foundational ideas–Part I. Language and Linguistics Compass, 10(8), 349-369.\n",
+ "\n",
+ "Vasishth, S., & Nicenboim, B. (2016). Statistical methods for linguistic research: Foundational\t\t ideas–Part I. Language and Linguistics Compass, 10(8), 349-369.\n",
+ "\n",
+ "Western, B. (1999). Bayesian analysis for sociologists: An introduction. Sociological Methods &\t Research, 28(1), 7-34.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Stan",
+ "language": "python",
+ "name": "stan"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.5"
+ },
+ "toc": {
+ "colors": {
+ "hover_highlight": "#DAA520",
+ "navigate_num": "#000000",
+ "navigate_text": "#333333",
+ "running_highlight": "#FF0000",
+ "selected_highlight": "#FFD700",
+ "sidebar_border": "#EEEEEE",
+ "wrapper_background": "#FFFFFF"
+ },
+ "moveMenuLeft": true,
+ "nav_menu": {
+ "height": "243px",
+ "width": "252px"
+ },
+ "navigate_menu": true,
+ "number_sections": true,
+ "sideBar": true,
+ "threshold": 4,
+ "toc_cell": false,
+ "toc_section_display": "block",
+ "toc_window_display": true,
+ "widenNotebook": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Chapter_0_-_Introduction/Ch0_Introduction.ipynb b/Chapter_0_-_Introduction/Ch0_Introduction.ipynb
new file mode 100644
index 0000000..2f16aba
--- /dev/null
+++ b/Chapter_0_-_Introduction/Ch0_Introduction.ipynb
@@ -0,0 +1,223 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Statistical Methods for Research Workers: Bayes for Psychologists & Neuroscientists\n",
+ "\n",
+ "##### Version 0.1 (EBRLab)\n",
+ "\n",
+ "The full Github repository is available at [github/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists](https://github.com/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists). We hope you enjoy this book, and encourage contributions!\n",
+ "\n",
+ "This work is shared under a [CC BY-NC 4.0 license](https://creativecommons.org/licenses/by-nc/4.0/), which means:\n",
+ "* You CAN share — copy and redistribute the material in any medium or format\n",
+ "* You CAN adapt — remix, transform, and build upon the material\n",
+ "* You MUST give proper appropriate credit - provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.\n",
+ "\n",
+ "EBRLab (in preparation). _Statistical methods for research workers: Bayes for psychologists and neuroscientists_. [https://github.com/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists](https://github.com/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists). doi: https://doi.org/10.5281/zenodo.8475.\n",
+ "\n",
+ "* You CANNOT use the material for commercial purposes."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**tl;dr**: In these pages, you will be shown how Bayesian statistics can work for the kinds of things that researchers in psychology and neuroscience might want to do, using Python notebooks. You can dive in the notebooks and copy-paste what's most relevant to you, or you can take your time and follow the table of content. We aim to explain what it means to run a Bayesian analysis, and introduce concepts as we need them. Therefore, it may be useful to review the earlier notebooks even if you are not interested in these simpler statistical tests."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "_\"[...] we may say that a phenomenon is experimentally demonstrable when we know how to conduct an experiment which will rarely fail to give us statistically significant results.\"_\n",
+ "[Ronald A. Fisher](https://en.wikipedia.org/wiki/Ronald_Fisher) (17 February 1890 - 29 July 1962). The design of experiments (1951). p. 14.\n",
+ "\n",
+ "_\"All models are wrong but some models are useful.\"_\n",
+ "[George E. P. Box](https://en.wikipedia.org/wiki/George_E._P._Box) (18 October 1919 – 28 March 2013). Robustness in the Strategy of Scientific Model Building (1979). p. 2.\n",
+ "\n",
+ "_\"The big problem in science is not cheaters or opportunists, but sincere researchers who have unfortunately been trained to think that every statistically 'significant' result is notable.\"_\n",
+ "[Andrew Gelman](http://www.stat.columbia.edu/~gelman/) (11 February 1965 – very much alive). Essay: The Experiments Are Fascinating. But Nobody Can Repeat Them. [The New York Times (19/10/2018)](https://www.nytimes.com/2018/11/19/science/science-research-fraud-reproducibility.html).\n",
+ "\n",
+ "_\"The purpose of models is not to fit the data but to sharpen the question.\"_\n",
+ "[Samuel Karlin](https://en.wikipedia.org/wiki/Samuel_Karlin) (8 June 1924 - very much alive). 11th R. A. Fisher Memorial Lecture (20 April 1983) Royal Society.\n",
+ "\n",
+ "_“But this long history of learning how to not fool ourselves of having utter scientific integrity is, I'm sorry to say, something that we haven't specifically included in any particular course that I know of. We just hope you've caught on by osmosis.\n",
+ "The first principle is that you must not fool yourself and you are the easiest person to fool. So you have to be very careful about that. After you've not fooled yourself, it's easy not to fool other scientists. You just have to be honest in a conventional way after that.”_\n",
+ "[Richard P Feynman](https://en.wikipedia.org/wiki/Richard_Feynman) (11 May 1918 - 15 February 1988). Surely You're Joking, Mr. Feynman! (1997), p. 198.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Introduction\n",
+ "\n",
+ "[Statistical Methods for Research Workers](https://en.wikipedia.org/wiki/Statistical_Methods_for_Research_Workers), written by [Ronald A. Fisher](https://en.wikipedia.org/wiki/Ronald_Fisher) in 1925, is a landmark publication that ended up not only defining how psychologists and neuroscientists analyse their data, but also influencing how they actually perceive the world.\n",
+ "Almost a hundred years ago, the field of statistics, i.e. mathematics applied to data, was not at all structured in the way it is today, and \"research workers\" had to refine, and often invent the methods that would best suit their needs (e.g., computing numbers without a computer) and constraints (e.g., not having a lot of data).\n",
+ "\n",
+ "In this seminal book, Fisher codified what it meant to analyse data for the purpose of creating knowledge about the world, paving the way for what came to be statistics as we now know it. Later, the advent of computers transformed the tools available to analyse data, widening the use of statistics to researchers who did not necessarily need or want to deepen their mathematical understanding of the methods.\n",
+ "\n",
+ "It is important to note that we, \"research workers\", do not all have the same experience of statistics. Most psychologists and neuroscientists, for instance, would barely have heard of the debate between Frequentist and Bayesian methods, which can be rocking the core of other scientific fields.\n",
+ "Indeed, in psychology and neuroscience, the teaching of statistics typically follows almost to the letter the table of content from Fisher's book, and it is not unusual for students to only \"know\" statistics through the menus of a Graphic User Interface on a proprietary piece of software, instead of having acquired enough intuition to understand how distributions of data are manipulated and evaluated.\n",
+ "\n",
+ "In this book, we are proposing an opinionated exploration of how Bayesian methods can be applied to the fields of psychology and neuroscience, using [Python](https://www.anaconda.com/products/individual), with the explicit intent of providing research workers with an opportunity to see and understand what they are doing to their data.\n",
+ "Our motivation stems from our experience of the so-called [Reproducibility Crisis](https://osf.io/qky8t), which has hit both fields.\n",
+ "The Bayesian approach requires commitment to all parts of a given [model](https://nbviewer.jupyter.org/github/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists/blob/master/Chapter_0_-_Introduction/Glossary.ipynb#M-11), and relies on explicit assumptions formulated by the researcher.\n",
+ "Doing good research is thus about justifying one's choices, and not about finding a test that will yield the required number of stars in a table.\n",
+ "\n",
+ "Importantly, we do not seek to engage in the debate opposing Frequentists and Bayesians.\n",
+ "Although our preference might show at time, we favour the more oecumenical view that frequentist shortcuts can be justified in precise contexts; a point, we posit even Fisher would concede.\n",
+ "We will do our best to only expect entry-level [Python](https://www.anaconda.com/products/individual) coding skills and statistics, and to favour explaining over writing optimised code, showing over assuming."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prerequisites\n",
+ "\n",
+ "The ultimate prerequisite to be able to follow these pages is the willingness to question your knowledge, and to deepen your understanding of the methods you may have heard about for many years.\n",
+ "\n",
+ "Although you might learn a thing or two, these pages are not about teaching and learning [Python](https://www.anaconda.com/products/individual). In fact, even though the code is documented extensively, we ambition for the notebooks to be understandable by anyone who has passing command of English and no Python; just skip the code and focus on the rest.\n",
+ "If you did want to brush up your Parseltongue skills, we recommend you start [here](https://swcarpentry.github.io/python-novice-inflammation/), and hopefully we will even meet you [there](https://aspp.school/).\n",
+ "\n",
+ "Perhaps the most difficult prerequisite, however, for a psychologist or a neuroscientist, may be the ability to see the world differently, to ask questions beyond evaluating differences between groups.\n",
+ "The Bayesian method affords many more questions to be asked than does the Frequentist method.\n",
+ "In these pages, we apply the Bayesian framework to the subset of questions that would feature in your typical frequentist textbook, like [this one](https://www.amazon.co.uk/Discovering-Statistics-Using-Andy-Field/dp/1446200469), which followed suite from Fisher's seminal work.\n",
+ "In so doing, we also point out a range of other questions that someone may be interested in.\n",
+ "\n",
+ "We are forever in debt to the authors who compiled academic books and articles about Bayesian methods, which we learned from and inspired many, many of our examples and explanations. If you want to go beyond the material we present, we recommend the following textbooks:\n",
+ "\n",
+ "* Kruschke, J.K. (2011). [Doing Bayesian Data Analysis: A tutorial with R, JAGS, and Stan (Second edition)](https://www.amazon.co.uk/Doing-Bayesian-Data-Analysis-Tutorial/dp/0124058884). Academic Press. ISBN: 978-0-12-405888-0.\n",
+ "* Gelman, A., Carlin, J.B., Stern, H.S., Dunson, D.B., Vehtari, A., & Rubin, D.B. (2014). [Bayesian data analysis (Third edition)](https://www.amazon.co.uk/Bayesian-Analysis-Chapman-Statistical-Science/dp/1439840954). CRC Press. ISBN: 978-1-4398-4095-5.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Running our notebooks\n",
+ "\n",
+ "The notebooks should be self-explicit, as we think that explanation is more important than code. Therefore, there is no need to run the notebooks yourself. We provide you with [everything you need to do so](https://github.com/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists), if you want to, but you don't have to.\n",
+ "\n",
+ "Each notebook is also meant to be self-contained, and will have been compiled to show the output of each cell. We aim for the notebooks to be interactive as well, using [Binder](https://mybinder.org), but please use this feature sparingly: running any model is always done at a cost (e.g. [MIT Tech Review | Training a single AI model can emit as much carbon as five cars in their lifetimes](https://www.technologyreview.com/s/613630/training-a-single-ai-model-can-emit-as-much-carbon-as-five-cars-in-their-lifetimes/), 6/6/2019), so think before you click.\n",
+ "\n",
+ "If you want to run these models at home or adapt the way of doing things to your own purposes, it helps to understand the workflow. Designing a Bayesian analysis is done iteratively, looping through the following stages:\n",
+ "1. Formulating a question: writing down a model/assumptions on paper.\n",
+ "2. Implementing the model in code.\n",
+ "3. Answering the question: Evaluating specific parts of the model, and going back to 1.\n",
+ "\n",
+ "Writing down a model means that you must make explicit every little assumption you have about the state of the work your model is describing, that includes information about the kind of data you intend to collect (collectively known as priors of the model), about the kind of apparatus you will be using that will have its own intrinsic biases (known as the likelihood of the model) and everything else you can think of, like how the variables relate to each other, etc.\n",
+ "\n",
+ "In your typical Frequentist framework, these decisions are made for you, in the form of the (infamous) \"assumptions of the test\", which you may remember from your UG Stats 101 module. These include assumptions about the sample of data you are collecting, like constancy of variance of residuals (homoscedasticity) and their independence and normality, but they also include assumptions about how the data is expected to relate to the \"population\" of all possible data.\n",
+ "\n",
+ "Implementing the model and running it will typically be done with dedicated tools. Because of the big number and wide range of questions that could be asked of any given data, there is no such a thing as a complete piece of software with a nice graphic user interface. In fact, because the aim is to be explicit about everything, it may not even be desirable to have such a magic tool, which may be incomplete and opaque. The closest approximations of such tools may currently be [JASP](https://jasp-stats.org), or the R packages [BRMS](https://cran.r-project.org/web/packages/brms/index.html) or [rethinking](https://github.com/rmcelreath/rethinking), but what you can do with these packages will be limited in some ways.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Table of Content\n",
+ "\n",
+ "### Bayesian analysis in practice\n",
+ "\n",
+ "* The beginning\n",
+ "* Diagrams, Distributions\n",
+ "\n",
+ "\n",
+ "### Tests of Goodness of Fit, independence and homogeneity\n",
+ "\n",
+ "* [One-sample Z-test](https://nbviewer.jupyter.org/github/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists/blob/master/Chapter_1_-_Z-test/Ch1_Bayesian%20Z-test.ipynb)\n",
+ "\n",
+ "* Simple linear regression (work in progress)\n",
+ "* Logistic regression (work in progress)\n",
+ "* Ordinal regression (work in progress)\n",
+ "* Multiple correlation estimates (work in progress)\n",
+ "* Poisson regression (work in progress)\n",
+ "* Robust regression (work in progress)\n",
+ "\n",
+ "### Tests of significance of means, differences of means, and regression coefficients\n",
+ "\n",
+ "* [One-sample t-test](https://nbviewer.jupyter.org/github/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists/blob/master/Chapter_2_-_One-sample_t-test/Ch2_One-sample_t-test.ipynb)\n",
+ "\n",
+ "* Between-subjects t-test (work in progress)\n",
+ "* Repeated measures t-test (work in progress)\n",
+ "\n",
+ "* Welch's unequal variance t-test (work in progress)\n",
+ "\n",
+ "* Categorical regression (one-way ANOVA) (work in progress)\n",
+ "* Within-subjects ANOVA (work in progress)\n",
+ "* One-way Between-subjects ANOVA (work in progress)\n",
+ "* 2x2 Between-subjects ANOVA (work in progress)\n",
+ "* Linear mixed model (work in progress)\n",
+ "* Patsy contrast analysis (categorical dummy variable) (work in progress)\n",
+ "\n",
+ "### Intraclass correlations and the analysis of variance\n",
+ "\n",
+ "* Fisher's exact test (work in progress)\n",
+ "* Factor analysis (work in progress)\n",
+ "\n",
+ "### Further applications fo the Analysis of variance\n",
+ "\n",
+ "### Appendices\n",
+ "\n",
+ "### Prologue"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.5"
+ },
+ "toc": {
+ "colors": {
+ "hover_highlight": "#DAA520",
+ "navigate_num": "#000000",
+ "navigate_text": "#333333",
+ "running_highlight": "#FF0000",
+ "selected_highlight": "#FFD700",
+ "sidebar_border": "#EEEEEE",
+ "wrapper_background": "#FFFFFF"
+ },
+ "moveMenuLeft": true,
+ "nav_menu": {
+ "height": "243px",
+ "width": "252px"
+ },
+ "navigate_menu": true,
+ "number_sections": true,
+ "sideBar": true,
+ "threshold": 4,
+ "toc_cell": false,
+ "toc_section_display": "block",
+ "toc_window_display": true,
+ "widenNotebook": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/Chapter_0_-_Introduction/README.md b/Chapter_0_-_Introduction/README.md
new file mode 100644
index 0000000..5757761
--- /dev/null
+++ b/Chapter_0_-_Introduction/README.md
@@ -0,0 +1,4 @@
+# Chapter 0: Introduction
+
+### [Read it online here](https://nbviewer.jupyter.org/github/ebrlab/Statistical-methods-for-research-workers-bayes-for-psychologists-and-neuroscientists/blob/master/Chapter_0_-_Introduction/Ch0_Introduction.ipynb)
+
diff --git a/Chapter_1_-_Z-test/Ch1_Bayesian Z-test.ipynb b/Chapter_1_-_Z-test/Ch1_Bayesian Z-test.ipynb
new file mode 100644
index 0000000..5f30079
--- /dev/null
+++ b/Chapter_1_-_Z-test/Ch1_Bayesian Z-test.ipynb
@@ -0,0 +1,986 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "toc": "true"
+ },
+ "source": [
+ "# One-sample Z-test"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This notebook will be slightly different from the other notebooks in this repository: here, we will simulate data, whereas in the others, we use data from the real world.\n",
+ "\n",
+ "Z-tests are a family of statistical tests that allow us to ask questions like \"Is this sample of data representative of the population, which we assume follows a normal distribution?\" Such tests will calculate a statistic (a number) to estimate whether the distribution of the data mimicks what we could be expecting to see in the [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution) of the population. For instance, assuming that [the IQ of the general population peaks at 100, with a standard deviation of 15](https://en.wikipedia.org/wiki/IQ_classification), you measure the IQ of your closest friends. Are you and your friends unusually intelligent?\n",
+ "\n",
+ "Demonstrating the technique is therefore more easily done when you can create as much or as little data as you want. Particularly, we can specify the mean and standard deviation of the grand population, and we can also draw as much sample data as we want and create all kinds of situations. Because we are the Masters of our \"data generation process\", we also remove the uncertainty on the fact that there may not be any statistical relationship in the data to begin with."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## The classic one sample z-test\n",
+ "\n",
+ "The one-sample Z-test calculates a Z statistic by comparing the mean of the data $\\bar{X}$ against that of a population with mean $\\mu_0$ and standard deviation $\\sigma$, and assumes that there is no difference between the two distributions (the \"Null hypothesis\").\n",
+ "\n",
+ "$$Z = \\frac{\\bar{X}-\\mu_0}{\\sigma}$$\n",
+ "\n",
+ "This Z-score, as it is sometimes called, is a quantity that represents how the mean should be in relation to a standard deviation, and we can then compare the score we obtained from the distribution of our data to that of what we would obtain in a normal distribution of data. If the scores are different, it means they are drawn from the same population. Alongside tables of such numbers calculated for a normal distribution, you will also find measures of probability that a given score can be found, which would be used to determine a p-value, against an acceptable $\\alpha$ level, as it is frequently used in a frequentist null hypothesis significance test.\n",
+ "\n",
+ "$$H_0:\\bar{X} = \\mu_0$$\n",
+ "$$H_1:\\bar{X}\\neq \\mu_0$$"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Bayesian inference\n",
+ "Following on from the quick description of the classic Z-test above its important to keep in mind that Bayesian analysis inference are all derived from the application of Bayes rule\n",
+ "\n",
+ "$$P(\\theta \\mid y) = \\frac{P(y \\mid \\theta) \\, P(\\theta)}{P(y)}$$\n",
+ "\n",
+ "and as such while the following description of the Bayesian model is an equivalent to the Z-test, it is fundamentally different, because its uses fully probabilistic modelling and the infernce is not based on sampling distributions.\n",
+ " \n",
+ "For a fuller description see the Practicing Bayesian statistics markdown file within the Github repository."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Steps of Bayesian data analysis\n",
+ "\n",
+ " Kruscke (2015) offers a step by step formulation for how to conduct a Bayesian analysis:\n",
+ "\n",
+ "1. Identify the relevant data for question under investigation.\n",
+ "\n",
+ "2. Define the descriptive (mathematical) model for the data.\n",
+ "\n",
+ "3. Specify the Priors for the model. If scientific research publication is the goal, the priors must be accepted by a skeptical audience. Much of this can be achieved using prior predcitve checks to ascertain if the priors are reasonable.\n",
+ "\n",
+ "4. Using Bayes rule estimate the posterior for the parameters of the model using the likelihood and priors. Then use the posterior for inference.\n",
+ "\n",
+ "5. Conduct model checks. i.e. Posterior predcitive checks. \n",
+ "\n",
+ "This notebook will follow this approach generally. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 1 - Identify the relevant data for question under investigation\n",
+ "\n",
+ "The example below is based on simulating and analysing IQ scores. A highly understood phenemomena in the general population, in terms of the standard deviation; due the standardisations of the test with an average IQ set at 100 and standard deviation of 15 (Warne, 2020). In additon to this using IQ data has become standard in Bayesian data analysis teaching (Kruschke, 2015) for pedagogical reasons and as such is used here."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Simulate the IQ scores"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import data analysis and visualisation packages.\n",
+ "import numpy\n",
+ "import pandas\n",
+ "import stan\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn\n",
+ "import scipy.stats as stats\n",
+ "import arviz\n",
+ "\n",
+ "# Importing nest_asyncio is only necessary to run pystan in Jupyter Notebooks.\n",
+ "import nest_asyncio\n",
+ "nest_asyncio.apply()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#from IPython.core.display import HTML as Center\n",
+ "\n",
+ "#Center(\"\"\" \"\"\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set random seed so the analysis to follow is reproducible.\n",
+ "np.random.seed(1)\n",
+ "\n",
+ "# Generating 100 random samples from a normal distribution with mean of 100 and standard deviation of 15, representing \n",
+ "# IQ scores, that are then analysed below.\n",
+ "IQ = np.round(np.random.normal(loc = 100, scale = 15, size = 100))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualise and explore the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "# Set Seaborn theme for data visualisations.\n",
+ "sns.set_style(\"white\");\n",
+ "\n",
+ "# Plot a histogram of the data with a normal distribution imposed.\n",
+ "sns.distplot(IQ, fit = stats.norm, kde= False);\n",
+ "plt.xlabel(\"IQ Scores\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "From eyeballing the data its looks normally distributed. This of course is unsurprising seeing as it has been randomly generated from a normal distribution. This point is brought up because even though the data visually looks normally distributed it is not a justification for selecting a normal likelihood to model the data. When deciding on how to model the data the likelihood should be based on attempting to model the Data Generating Process (DGP) this of course is unknown, but there is a level of pragmatic choice. As as analyst being open to critisism to your modelling choices can helpful oneto improve your modelling and two ultimately improve the inferences and understanding of the phenomena you are studying.\n",
+ " \n",
+ "However as this notebook is demonstrating a Bayesian Z-test equivalent a normal likelihood will be used as that is an assumption of the Z-test, and is an assumption of the Bayesian model specified below in section 5, however using full probability models and Bayes rule for our inference allows much greater flexibilty, which will be demonstrated in the other repository notebooks."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 2 - Define the descriptive statistical model \\begin{align*}\n",
+ "\\large y_i &\\sim Normal(\\mu,15)\n",
+ "\\\\ \\large \\mu &\\sim Normal(100, 20)\n",
+ "\\end{align*}\n",
+ "\n",
+ "The formulation for presenting statistical models here follows that used by McElreath (2020) for its intuitive nature. In plain english the model specifies that the dependent variable $y_i$ (IQ scores) is distributed normally in terms of the Likelihood with a known standard deviation of 15 but an unkown $\\mu$ that is to be estimated with a normally distributed prior probability on $\\mu$ that has a $ \\mu = 100$ and $ \\sigma = 20$."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 3 - Specifying priors"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Prior predictive checks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Visualising priors\n",
+ "The first step in prior predictive check is to visualise the priors within the model for the parameters being modelled.\n",
+ "As the statistical model specified above shows their is only one prior in the analysis and the standard deviation\n",
+ "is assumed to be known at 15 and as such is not estimated and does not need a prior.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# PLot the prior of the model for describing possible mean values of the DGP for IQ scores. This is quite a broad prior\n",
+ "# and shouldnt be too controversialto a skeptical audience of IQ researchersas it assigns 95% probability of the mean for IQ to be estimated ranging from \n",
+ "# from 60 and 140.\n",
+ "\n",
+ "#Creat range of points to plot the pdf function on\n",
+ "Range_of_X_axis = np.arange(60, 140, 0.001)\n",
+ "\n",
+ "#Plot the normal pdf that shows our prior on the means of the probable mean values for IQ\n",
+ "plt.plot(Range_of_X_axis, stats.norm.pdf(Range_of_X_axis, loc = 100, scale = 20));\n",
+ "plt.xlabel(\"95% prior probability on the mean value of IQ\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Note that that the pdf is cut of at 2 standard deviations above as this is where 95% of the probability mass lies for the probable values of $mu$ and shows how broad the prior is in the case of probable IQ scores, follwing the 68–95–99.7 rule of normal distributions.<\\font>"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Simulating data based on the priors\n",
+ "\n",
+ "Following the visualisation of the priors for the parameters of the model to \n",
+ "check how they interact it is important to run prior predcitive check by \n",
+ "simulating data based on the model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Following visualisng the prior for the only parameter that will be estimated within the model \n",
+ "# we can generate simulated data based on this prior and assumed known standard deviation of 15 for IQ scores.\n",
+ "\n",
+ "# Set seed to allow for the reprodcuiblity of notebook.\n",
+ "np.random.seed(1)\n",
+ "\n",
+ "# Simulate data from the prior for the mean of the model specified above.\n",
+ "sample_mu = np.random.normal(loc= 100, scale = 20, size = 1000)\n",
+ "\n",
+ "# Generate a simulated data set of IQ scores based on the prior for the means and the assumed known Standard deviation (15)\n",
+ "# and the normal likelihood.\n",
+ "prior_PC = np.random.normal(loc = sample_mu, scale = 15, size = 1000)\n",
+ "\n",
+ "# Plot the simulated data density\n",
+ "sns.distplot(prior_PC, hist=False);\n",
+ "plt.xlabel(\"Prior Predictive simulated IQ scores\");\n",
+ "\n",
+ "# Plot vertical line of the 2 standard deviatons either side of the simulated data.\n",
+ "plt.vlines((np.mean(prior_PC) + 2 * np.std(prior_PC)), ymin=0, ymax=0.018);\n",
+ "plt.vlines((np.mean(prior_PC) - 2 * np.std(prior_PC)), ymin=0, ymax=0.018);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "As the prior predictive check of our priors shows that the simulated data is reasonable within the understanding of IQ, whilst sldo incoprortaing s reasonble uncertainty for a skeptical audience in that our priors enocde that it is 95% probable to observe individuals with IQ's as low as 50 and as high as 150."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 4 - Use Bayes rule.\n",
+ "\n",
+ "The software of choice to conduct Bayesian inference on the data here is Stan and the model is specified below."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Stan model of a Bayesian Z-test\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Below the statistical model defined above in section 5 of this notebook is coded in Stan code for compilation below"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Stan model to repliciate a bayesain estiamtion equivalnet of the classical one sample z-test.\n",
+ "\n",
+ "One_z_test_model = \"\"\"\n",
+ "\n",
+ "data{\n",
+ "// Number of IQ data points\n",
+ "int N;\n",
+ "\n",
+ "// Define a vector of dependent variable (IQ scores) values\n",
+ "vector[N] y;\n",
+ "real sigma;\n",
+ "\n",
+ "}\n",
+ "\n",
+ "parameters{\n",
+ "// Because in the traditional Z-test we assume we know the standard deviation of the DGP\n",
+ "// we do not estimate it and the only unknown is the mu (mean) parameter in the normal \n",
+ "// model specified below.\n",
+ "real mu;\n",
+ "}\n",
+ "\n",
+ "model{\n",
+ "// The priors\n",
+ "mu ~ normal(100, 20);\n",
+ "\n",
+ "// The likelihood\n",
+ " y ~ normal(mu, sigma);\n",
+ "}\n",
+ "\n",
+ "generated quantities{\n",
+ "\n",
+ "// Generated a real value for difference between the MCMC samples of mu and 100.\n",
+ "real diff = mu - 100;\n",
+ "\n",
+ "// Cohen D calculation of the MCMC samples to get the standardised effect size\n",
+ "// for the diffence between the mean estimates and 100.\n",
+ "real Cohen_D = (mu - 100) / sigma;\n",
+ "\n",
+ "// Generate posterior p-value variable \n",
+ "int mean_pv;\n",
+ "real yrep[N];\n",
+ "\n",
+ "{\n",
+ " // Generate data for posterior samples\n",
+ " for (i in 1:N) {\n",
+ " yrep[i] = normal_rng(mu, sigma); \n",
+ " }\n",
+ "}\n",
+ "\n",
+ "mean_pv = mean(yrep) > mean(y);\n",
+ "}\n",
+ "\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The Stan model above takes the data in the model block, the only parameter in this Z-test equivalent to be estimated is $\\large \\mu$ and as such is specified in the parameters block. The model itself is then defined in the model block, this is where most of the action happens, and the postersior is estimated using Stan NUTS HMC sampler by taking the prior on $\\large \\mu$ and the likelihood for the IQ scores data to estimate the posterior."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The Stan model is complied into C++ code\n",
+ "#sm = ps.StanModel(model_code = One_z_test_model) # 2.19 (windows)\n",
+ "\n",
+ "# To use pystan the data must be stored in a python dictionary that coincides with what was declared in the data\n",
+ "# block of the stan model above, in order, that it can be passed to the compiled stan model for fitting below.\n",
+ "\n",
+ "data = {'N': len(IQ),\n",
+ " 'y': IQ,\n",
+ " 'sigma': 15\n",
+ " }\n",
+ "\n",
+ "sm = ps.build(One_z_test_model, data=data, random_seed=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fit the One_z_test_model specified above.\n",
+ "#fit = sm.sampling(data = data, iter = 2000, chains = 4, seed = 1, warmup = 1000) # 2.19\n",
+ "\n",
+ "\n",
+ "\n",
+ "# 2000 iterations of MCMC samples with half being used as warmup, within 4 indepedent chains, \n",
+ "# seed is set for reprodcubiltiy, however due to stochastic nature of sampling there may \n",
+ "# be slight variation across machines."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Because of python print statement a large number of model outputs do not scale and pRICULAR outputs cannot be selected,\n",
+ "# so it is easier to put outputs into a pandas dataframe.\n",
+ "summary = fit.summary()\n",
+ "fit_df = pd.DataFrame(summary['summary'], \n",
+ " columns=summary['summary_colnames'], \n",
+ " index=summary['summary_rownames'])\n",
+ "\n",
+ "#Output model results.\n",
+ "fit_df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Post model fit-visualisations - Bayesian one sample Z-test\n",
+ "The arviz package offers many useful functions for plotting MCMC samples of the posteriors produced by Bayesian data analysis with Stan."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Posterior distributions plots\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using the arviz package the posteriors can be plotted from the MCMC samples\n",
+ "az.plot_posterior(fit, var_names=(\"mu\", \"diff\",\n",
+ " \"Cohen_D\"));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The posterior above shows that the simulated $\\large \\mu$ of 100 is captured witin the posterior with a posterior mean of 101 and there no statistical difference between the estimate $\\large \\mu$ and 100 "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Autocorrelation plots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using the arviz package the autocorrelation of the 4 MCMC chains can be plotted.\n",
+ "az.plot_autocorr(fit, var_names=(\"mu\"));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The autocorrelation plots do not show any serious autocorrelation problems, as the values quickly decrease to 0."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## MCMC traceplots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using the arviz package the traceplots of the 4 MCMC chains can be plotted.\n",
+ "az.plot_trace(fit, var_names=(\"mu\", \"diff\", \"Cohen_D\"));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The traceplot show good mixing of chains and show a \"hairy catepillar\"."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 5 - Posterior predictive checks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Create a pandas dataframe of simulated datasets genreted from the posterior check. #\n",
+ "# the dataframe is a subet of the first 202 simualted data sets. (Stan generates as many datasets as Iterations)\n",
+ "yrep_df = pd.DataFrame(fit['yrep']).T.iloc[:,0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Convert the pystan object into Arviz inference object for use in plotting functions\n",
+ "data = az.from_pystan(\n",
+ " posterior=fit,\n",
+ " posterior_predictive='yrep',\n",
+ " observed_data=[\"y\"])\n",
+ " \n",
+ "az.plot_ppc(data, data_pairs = {\"y\" : \"yrep\"}, num_pp_samples= 100);"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ " figure 1"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The posterior samples show that the simulated datasets can capture the orignal data well."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Reporting the results of the Bayesian one sample Z-test equivalent\n",
+ "\n",
+ "As Kruscke (2015) correctly points out there is not standard formula or presentation method for presenting the results from Bayesian data analyis in journal article like the APA guide for reporting frequentist analysis. It is likely there never will be, because as McElreath (2020) explains Bayesian data analysis is more like a engineering approach to the problem and the resulting model that is fit will be analysis specific. In addition, as Gabry et al, (2017) argue visualisations maybe even more key so the all the visualistions above would have to be included with any write up. Anyway, the write up below generally follows the advice of Kruscke (2015) chapter 25. In any application though it comes down to the problem to be described and satidying the audience of the work.
\n",
+ "\n",
+ "Write up of One sample Z-test
\n",
+ "\n",
+ " Four chains were ran with 2000 MCMC samples each disaring the first 1000 warm up samples. The data was analsyed using the model described above with the $\\mu$ being the only model parameter estimated with a normal prior with a $\\mu = 100$ and $\\sigma =20$. The standard deviation of the normal likelihood was modelled as known at a value of 15. Prior predcitve checks revealed that these values were uninfomative giving high porbaility to a large range of values for the IQ scores.\n",
+ " \n",
+ "Convergence of the MCMC chains was examined using autocorrelation and traceplots (in a paper referncing appropriate figures here will be of value). The posteriors showed that the most credible value of $\\mu$ = 100.88 with a 95% CrI [97.94, 103.78]) for the IQ scores under the model used in the analysis. Posterior predctive checks revelaed that the model could also reasonable recover the orignal data of the two groups. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "=========================================================================================================================="
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Bayesian estimation equivalent of the two sample Z-test\n",
+ "Below the model will be extended to estimate two groups in a classical z-test Bayesian estimation equivalent"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 1 - Identify the relevant data for the question under investigation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Simulate a second group of data\n",
+ "For the analsyis below we need to simulate a second group of IQ scores."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set seed for reproducible notebook\n",
+ "np.random.seed(1)\n",
+ "\n",
+ "# Simulation seocnd group of 100 Independent IQ scores from a normal distribtuion but this\n",
+ "#time with a mean of 110 and Standard deviation of 15\n",
+ "IQ_2 = np.round(np.random.normal(loc = 110, scale = 15, size = 100))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualise and explore the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Set Seaborn theme for data visualisations.\n",
+ "sns.set_style(\"whitegrid\")\n",
+ "\n",
+ "# Plot a histogram of the first group IQ data with a normal distribution imposed.\n",
+ "sns.distplot(IQ, fit = stats.norm, kde= False);\n",
+ "plt.xlabel(\"IQ Scores\");\n",
+ "\n",
+ "# Plot a histogram of the first group IQ data with a normal distribution imposed.\n",
+ "sns.distplot(IQ_2, fit = stats.norm, kde= False);\n",
+ "plt.xlabel(\"IQ Scores\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 2 - Define the descriptive statistical model \\begin{align*}\n",
+ "y_i{_k} &\\sim Normal(\\mu{_k},15)\n",
+ "\\\\\n",
+ "\\mu{_k} &\\sim Normal(100, 20)\n",
+ "\\\\\n",
+ "\\label{eq:1}\n",
+ "\\end{align*}\n",
+ "\n",
+ "The model above in plain english states that the depedent variable of (IQ) for the k groups (two here) are distributed normally, with the prior specifying the mu for IQ are distributed normally with a mean of 100 being the most probable $\\mu$ value for being IQ 100 but with a standard deviation of 20. \n",
+ "\n",
+ " Note to reader in this model the prior is build so that they are the same for both the $mu$ estimations for the separtae groups, but this is not a neccesity and separate priors can be set<\\font>"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 3 - Prior predictive checks\n",
+ "as the model priors are the same as above for both groups for PPC see section 6 above"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 4 - Use Bayes rule.\n",
+ "Below the statistical model defined above in section of this notebook is coded in Stan code for compilation below"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "Two_z_test_model = \"\"\"\n",
+ "\n",
+ "data{\n",
+ "\n",
+ "// Number of IQ data points for the two groups which are the same.\n",
+ "int N;\n",
+ "\n",
+ "// Vector of dependent variable (IQ) values defined for the two groups\n",
+ "vector[N] y_1; \n",
+ "vector[N] y_2;\n",
+ "\n",
+ "real sigma;\n",
+ "}\n",
+ "\n",
+ "parameters{\n",
+ "\n",
+ "// Because in the traditional Z-test we assume we know the standard deviation of the data generating process\n",
+ "// we do not estimate it and the only unknown to be modelled is the mu parameter in the normal likelihood\n",
+ "// specified below in the model block.\n",
+ "real mu_1;\n",
+ "real mu_2;\n",
+ "\n",
+ "}\n",
+ "\n",
+ "model{\n",
+ "\n",
+ "//Priors\n",
+ "mu_1 ~ normal(100, 20);\n",
+ "mu_2 ~ normal(100, 20);\n",
+ "\n",
+ "//Likelihood\n",
+ "y_1 ~ normal(mu_1, sigma);\n",
+ "y_2 ~ normal(mu_2, sigma);\n",
+ "\n",
+ "}\n",
+ "\n",
+ "generated quantities{\n",
+ "\n",
+ "// Unstandardised difference between the MCMC samples of the two posteriors\n",
+ "// for the two groups modelled above\n",
+ "real diff = mu_1 - mu_2;\n",
+ "\n",
+ "\n",
+ "// Calculating a standardised Cohen D measure between the two groups\n",
+ "real Cohen_D = (mu_1 - mu_2) / sigma;\n",
+ "\n",
+ "\n",
+ "//Posterior predictive check code\n",
+ "real y1rep[N];\n",
+ "real y2rep[N];\n",
+ " \n",
+ " {\n",
+ " // Generate simulated data from posterior samples\n",
+ " for (i in 1:N) {\n",
+ " y1rep[i] = normal_rng(mu_1, sigma);\n",
+ " }\n",
+ "}\n",
+ " {\n",
+ " // Generate simulated data for posterior samples\n",
+ " for (i in 1:N) {\n",
+ " y2rep[i] = normal_rng(mu_2, sigma);\n",
+ " } \n",
+ " }\n",
+ "}\n",
+ "\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Compile stan model into C++ code\n",
+ "sm_2 = ps.StanModel(model_code = Two_z_test_model);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Generate a python dictionary for passing to pass Stan for fitting the model above.\n",
+ "data_2 = {'N': len(IQ),\n",
+ " 'y_1': IQ,\n",
+ " 'y_2': IQ_2,\n",
+ " 'sigma': 15}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Fit two sample z-test model to the data.\n",
+ "fit_2 = sm_2.sampling(data = data_2, iter = 2000, chains = 4, seed = 1, warmup = 1000)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Because of python print statement arge number of model outputs do not scale and cannot outputs cannot be selcted,\n",
+ "# so it is easier to put outputs into a pandas dataframe.\n",
+ "summary_2 = fit_2.summary()\n",
+ "fit_2_df = pd.DataFrame(summary_2['summary'], \n",
+ " columns=summary_2['summary_colnames'], \n",
+ " index=summary_2['summary_rownames'])\n",
+ "\n",
+ "#Output model results.\n",
+ "fit_2_df.head(5)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Post model fit visualisations - Bayesian two sample Z-test"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Posterior distribution plots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Using the arviz package the posteriors can be plotted from the MCMC samples \n",
+ "az.plot_posterior(fit_2, var_names=(\"mu_1\", \"mu_2\" ));\n",
+ "az.plot_posterior(fit_2, var_names=(\"diff\", \"Cohen_D\" ));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Autocorrelation plots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "az.plot_autocorr(fit_2, var_names=(\"mu_1\", \"mu_2\", \"diff\", \"Cohen_D\"));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Autocorrelation plots show little issue in terms of autocorreltion with the values centered around 0."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## MCMC Traceplots"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "az.plot_trace(fit_2, var_names=(\"mu_1\", \"mu_2\"));"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Traceplots show good mixing of the Markow chains and show a hairy catepillar."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Step 5 - Posterior predictive check - two groups"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data = az.from_pystan(\n",
+ " posterior=fit_2,\n",
+ " posterior_predictive='y1rep',\n",
+ " observed_data=[\"y_1\"])\n",
+ " \n",
+ "az.plot_ppc(data, data_pairs = {\"y_1\" : \"y1rep\"}, num_pp_samples=100);\n",
+ "plt.xlabel(\"IQ Scores\");\n",
+ "\n",
+ "data = az.from_pystan(\n",
+ " posterior=fit_2,\n",
+ " posterior_predictive='y2rep',\n",
+ " observed_data=[\"y_2\"])\n",
+ " \n",
+ "az.plot_ppc(data, data_pairs = {\"y_2\" : \"y2rep\"}, num_pp_samples=100);\n",
+ "plt.xlabel(\"IQ Scores\");"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Reporting the results of the two sample Z-test\n",
+ "see section (9) of this notebook for brief discussion on the lack of clarity in reporting Bayesian analyses.\n",
+ "\n",
+ "## Write up of two sample Z-test\n",
+ "\n",
+ " The IQ data for the two groups was analsyed using the model defined above with the $\\mu$ being the only model parameter to be estimated for each of the two groups, each with a normal prior with a $\\mu = 100$ and $\\sigma =20$. The standard deviation of the normal likelihood of each group was modelled as known at a value of 15. Prior predcitve checks revealed that these values were uninformative give high probability to a large range of values for the IQ scores.\n",
+ " \n",
+ "The MCMC chains were ran to acquire 2000 samples wiht the first 1000 samples being for warm up. Convergence of the MCMC chains was examined using autocorrelation and traceplots (in a paper referncing appropriate figures here will be of value). Both sets of plots showed no issues of autocorrelation or lack of mixing for the chains. The posteriors showed that the most credible values for group one was $\\mu$ = 101 with a 95% CrI [98, 104]) and $\\mu$ = 111 with a 95% CrI [108, 114]) for group two of for the IQ scores under the model used in the analysis. Anasyis of the unstandarsised differnce between the two groups showed credidle values of $\\mu$ = -10 with a 95% CrI [-14, -6]). For the standardised Cohen D scores credible values of $\\mu$ = -.67 with a 95% CrI [-.95, -.38]) were found.\n",
+ "\n",
+ "Finally, the posterior predctive checks revealed that the model could also reasonable recover the orignal data. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# References \n",
+ "\n",
+ "Gabry, J., Simpson, D., Vehtari, A., Betancourt, M., & Gelman, A. (2019). Visualization in Bayesian workflow. Journal of the Royal Statistical Society: Series A (Statistics in Society), 182(2), 389-402.\n",
+ " \n",
+ "Kruschke, J. (2015). Doing Bayesian data analysis: A tutorial with R, JAGS and Stan. Oxford, England: Academic Press. \n",
+ " \n",
+ "McElreath, R. (2020). Statistical rethinking: A Bayesian course with examples in R and Stan. Boca Raton: CRC Press.\n",
+ "\n",
+ "Warne, R., T. (2020). In the know: Debunking 35 myths about human intelligence. New york, NY: Cambridge University Press."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Stan",
+ "language": "python",
+ "name": "stan"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.5"
+ },
+ "toc": {
+ "base_numbering": 1,
+ "colors": {
+ "hover_highlight": "#DAA520",
+ "navigate_num": "#000000",
+ "navigate_text": "#333333",
+ "running_highlight": "#FF0000",
+ "selected_highlight": "#FFD700",
+ "sidebar_border": "#EEEEEE",
+ "wrapper_background": "#FFFFFF"
+ },
+ "moveMenuLeft": true,
+ "nav_menu": {},
+ "navigate_menu": true,
+ "number_sections": true,
+ "sideBar": true,
+ "skip_h1_title": false,
+ "threshold": 4,
+ "title_cell": "Table of Contents",
+ "title_sidebar": "Contents",
+ "toc_cell": true,
+ "toc_position": {
+ "height": "calc(100% - 180px)",
+ "left": "10px",
+ "top": "150px",
+ "width": "383.917px"
+ },
+ "toc_section_display": true,
+ "toc_window_display": false,
+ "widenNotebook": false
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/Chapter_1_-_Z-test/build/temp.macosx-10.9-x86_64-3.9/Users/eroesch/Library/Caches/httpstan/4.5.0/models/23wo2jct/model_23wo2jct.o b/Chapter_1_-_Z-test/build/temp.macosx-10.9-x86_64-3.9/Users/eroesch/Library/Caches/httpstan/4.5.0/models/23wo2jct/model_23wo2jct.o
new file mode 100644
index 0000000000000000000000000000000000000000..ade60cc162bc593424f95420645154d0f1a3b0bc
GIT binary patch
literal 260568
zcmeEv4SZD9nRkXHGAeKf1&xXlT5J;)ny6T^#hOS4&XqfW6*ROUPz0eBq+~{M`3g=V
z++MF`yRW-+TX(a&wVQ2UTi)$zgNi1ANdU!w7K2zx{mgW%4WJ=_GVlL+&bf2%OeTQV
zRof-MU*?{B?)g5?dCqg5uk-xTKOXO!mF3IM!Vms)@t=PN{oo&e7vO(AMfURN|8^h#
zQc8L7S3gU>qs%P&A4rk-n>jN)cWGFL<@=Oi#T5753H4b)t;ouv@^bL(p4TU9-~a@2
zva0Z#{F^!R_BnHA&RsfZ?&9#w@a%8ToS9l)jw)x!`7%p+)R#pqt9q$V)}Ul5ZeZrj
zo5Hih3+CvH7H})m%gg^ivRw~Tr|>uHpM2cD{I)(>ODUp9`|el}p1WlB9aqyw>VInc
zT7E3c8zP4*%l~h(eKK`c7R`tEZ{|!@Uhy>?onBt&L$bW8e3`50r?P!c88n36zhrr%
zWo&wRi=UF^6{+@Z*y>AZ6U~kHZ|2NzS5|r=X3i`xn^NXRIB-!Qi{%gAK6;}0+sv7B
z?p(O=&P7fM>E*T0mJQ2u65x5_@@|_Qp6w=1FR!*iwxCKiq`gV@w|!rqth3x~$q=<~
zQ6+;4$EBB-zgZR(=qIzxYLVp?z0xOZw5lUn7y>=ZONE!Bohs86GS}i4Q|H%P7KKq>
zWq8SgMYpRD>GNB3RF>DN$pnjeb*RL6sA*3|VExWtGI#cE&NQd@w_D9eAekd|KF;yP
zQ!rWHU9*=E+@zP6f0k@Pk;*qKAm2OR?8EEsTovy;X3kuA=WR0=VD-+N7g;o?M|qP~
zd9^CTI+RE75_n7EgEO6i)RWm)&CDr!DOU)m}5WM&s1T)
zIU`1S=4nGQLi8@IyZtI|9le*`Qg(CMl$&&OPnmgGH$QlK7Ls>R9${_~X0u_wRc;tPgrrcTDP+6bN8`O*~>Rr_*%wwW?
zyq^-{$%BO_BMf124iRk#2(zzY-=2>z`6^|BdNSxCo-t(e_)=^h*BSOIfhh4Gzxer;
zc=AZ%S)sDbQqTG7xmZ1y%IBIYdg70fD|bgkm-(|I14O<2kX0qjv1rqNH-&dF3^O5f
zS<8woS8IbbI>nd|^j6I1?;t8nl{2Sd+q>(O{Sh
zTMR3*!7w{TV@Lk=cE9(zchN?_C~g#wFBma8?*GI7hlKX4?ZVtC8sGg2#TOr-2;F>6
zZ~Ws|^y23X?N@&YS<8nPgfLj&HOy|Yt3zy@s{X{VMv5wI5gV^kf5O}>hHXcceZ;U9
z;cFFUqiBrf2w$V9*VG?<+2~V$sILVnMvLeX-`xXr`<8?;><{t5v425x4f7A7k`I^o
z4YQ+ow|@Aru!0T3LjR_=3M;1-9S$!OR-UNY7p{yR>7zyWVIk`F_=GTn^`V-s$XRhA
ztc&RTO~M-MH%dA%C0`8Ii?0P+Bc;Mx7NF^V#W3X*qP*6afrX684rFQ7WfYY8&vo*@
z;p7j${B%CrsODpfVJ;4c=F&VGWy75B7tIvP(yj90qeY@&Y2QOayR0ETmN&}*L89&GeCYL7Kew69)zNV~O>TYN|h^f&*}
zm!%b;yy?48@D-cIMs6~eDL{c@I<=9FLK}NPFX_^?v0YTY1I7##G}L-cb0k+E_DnoL
zQ+2j8RcMZG4YbEiAt<&yG>mx?MiJNVld8FB0atx;cG#(G4b2S=j`?)!UFl
zSfzfa?KS^O5yM(yKf@DKs?`uZ8v8bWB)*jMqgCH|Jdtqb=zOE3dx>9yjM++)t<_B8
z?{dO-xD&2bKaofzVz=}tZ|F(NOMHm>2uvc+#fPNU*Vvw|XP+U=JUxNQe;P0;p+r{;
zzie$^!>Y_P%w>7#kzp-t313%M_4TZ9v9PX1;;h8^hBca2T)@Fs&NG=nftNEK4t5aH
ztp$YdX1M6zT9zl8$1+@caGSY5p64JU=NZRzG&wcJ&y42~5}_T$TG7Hl#<+83FqTVL0kaSEa3wAGKgwQ3^w+~)DF*Rw|oLnpVbc&O%&;r}wM
z%MDLgXmnmFtGX^LyfayXVdaNv-UvTpm~X4*%4ht?Zk=0rKfU=T+S^k0FcDql&kB!p
z5w`bp27LalvRh|t@uo4eWz>nVyV~EK{WbbdKH6c}cLTf2&6(Q`?A0QoTL`(gc#wN|
zrLb2Z;#J^Sna-NgBASCOJjB3pw^pra%L-3UA11&ShRF+CJWPPC)L}}FV-mK~XF|3S
zvzedVQ$c_@ZFidLLWefAlCHh`UCZQMm|}_N3L%&&m;dN>D45?
z0@%CLkL9XWV{6IM%`Ra94-y5C52Ofc5yVGJ3h}X70KYKt(ISdFK@4mLmed>n1mdF^
z#0QAlj-Dh($0tZ~oEfs`CPKqLh@TrWKM0lliMFtA+B)9X%@>8$7h706?@Z4SB4c7h
ztEP>T36Ad~peLS-U4o~a4`Wz|QPQr}`~xQ^+C!`UK8K0^=#K2yt!pIV(G@1S$X->5R=ta|UWBaL&?@|9
zf##80;rqgDMa#m!F5V~1HiiXvqj2ECd8V#MnqS84Pk4;}
zkhKzhJ9gK_vCA>-Lg6x?2K7N(b1vew=mF%`t#KgOPC^fXyt#E;KKF?Tl{X$Z38B(0
z36E_}u#-b^Fvv)y6+CoeGN82S{uM-3n5*Rcv%gRnV;-QWc=wF*@4qyZdTEWTv*M
zC3@hxuD$iN$BP%*MKsYzXxBH3k}Y_&VN8|`kt5BgG47E2t?~_^PQvGe5qZOw=RnW#
zOyW8GP@5=h6_2~ep(ZhGk30^w3STE3j~f2~7~AYU4z-G>JTQg>+q*ap
z9rxXRrm)8*n6}nSey3<_arh9^);Ys(F#B~F)^(tSwCcSW+30bcfL`Wi?}!nV%=j7f
zoJ?exhqUVdhjeJ8Zq4))m<-!ul*GfY=oV%wI;IUr9t-)1>=WjSR^8G<=7*wYTlfY%
zW5j_Xd~`VSH5?FybtZiQCBF)X;_vZ6>U9E)R~Z0+&j#yO2z!7^snzTTW}1ib4FX_S
z;9%I7tF?04aHP}CAm#|Iz^h@7CYm@FM0#P4!FxhCe;2!kUuU$$hEO|_YXo0I=KIb#
z?7}x{ddPe?gc(X7wou9YTJ?4mM0MQ|9n*mxuR#{w>T7@?qyvb7SA=7O@Fe<#`%7#V
zoIBAe)nA@
zrqz6fqN7{$JQOUCuY;(aXL3C_^sRG}s2ZIWKF=lXqIfMWZcx;#I&Cy%`JT=PWT=w2
z81`cE6h%0tKSi<)ht7+CHCfz8%P5+oc_@_r5Dd6MzZ*n7PJ(V3qO_fEklTn?#J0P!
zZBEeXHcnp~eB4Vuz%aKbf^ooMs3s9964uw~IWflT#TtwC2HGUr&@D=uBf!baM$55~
z>BC?*jLlC^lCP3bNm}*aQJcVHNJ$lscxnd=LDoJ()jmEx6FE*&^$RDfI(%V|YSDhb
zn#*+FU+Q^H@|V!xW6q{c?QI4&rvV=8S5HQvPt%(!HCxyM=x+kf5IDt80JB3KFq_Bb
zt2_nt^yV3h^Jb$U{*yVOfS}%-kdJ&tlrH2ujMMOF!>lYtl%Oa)C#z?Ldd^bM`Rch?
zJ(tR7w2Gei18A(3A>tT>HLM@+q1do-_O_Sh8}=OGHc&U8aiCqKY^%=EI(9!54b~H-
zw3aO;1duN*t~F)Sh51Akl6FuNgdN-}Ol)5W0H*@Mnft-5HOzxx_k>K~?@%iz1aGA!
zp3Trg@N}X{b=IMFwsU@R_vh8`2rM93HBb={R&LjpzLCMSFRk86-#my!*qiczUJTjY
zkiDqXFdwNSzK8wj24;Uqj;1Xzb@qCSi8ho9vxahs=9*f>P$y+|&B0oFX%5!mYeKg{
z=7CFz)Ra+=HqAH8N4Ywuj!jgD>ingYRgZ23Kt0S=H`jEk(os>j8_Yu4loqjm6>P2v
zAe2Xu_9ofH-flnQDHM(6qxJ1NTn`l7g7})NFI8BT;
z0dP0b1UgA*2Gx+QkdT`}o*>T)9u^BjkY?DwsiH2~V!kkcz(Izm@d3kLl_$J3==i*@
z@fFO=PIo$;<_1mb
zH0-+<%i)={Sgy6c!k+1OQn9pUyuWL_z|es3l0)N+gJ;s5(E_SS8=55k$9UK?PoaLa
zGIRaFfu}Mxy$nSvy~5{99Jayv?J(>yc=iXN08(xQ5*Y;f(tr15aItR$=p{!h`wDYqg>D`#
zH@E1`jldX*=<9h}^%hJiq&$%`qsQ>}DBeOfyTe-7xKdwah_G)h6^SkJ^Ml3vYPLl#
zEZ!!3yY3OO2M7+VTPKIi7xY4W3BLu#OZWmR=dQA@27d$)aztD{v4E0a5dXGe?xm0x
zuMpP6JbVb?b?F4XWXJMxbYRJ&Okpt9R_0}euf|ItoJ$*SA;)XNx)~WD61y4v`J40k
z``Kd7E|Gn60V0cZa~q^QisC3$6iDfOG!k1<=`8hB6icb1U`mCGFICUc>RF_o1?riv
zp0t@!c>(qG%P0MU3}0J?2M*|-`7Zan07=p!b#NNiU>`Ct0*jWLs
z`XwHov6xDbW7b%791=x)88FF;R>fd_>O4-fqP45957I*Gt0MZ=ago?ryt@M*Fu1`M
zy$}>&q(KxyG!kmXsMPr)eqt8v>WBkbjupb1Fj@Foh|+~PdbI9)&M?1QBz!xCSy`dm
zA)hcqpb~Dv3>;wUK)e2+u+9_aO%+{ZF$G`f8jCf7<3H%(L=!f3t8%jL1L08#0ttr*
z^QL_2>m{h5D7ZF~71SPs1_0PkDTHpB_81tjIDVGSm+Rj{1t;1aYL|NzZ*=%6e10T-
z16TgMM<+JbF%TIb=Oh_bwPFK~&cEdGpbCJshvT_CyP({YsyAzQcFtixr
z6)hlp)d0#J98v1sPqbD((u3Ao8+?fp^B3)@;7es@^>7K(D;h$+<7KPg+<=5-(GT+t
z^Qf+s?eK)y?O1^TTp8
z^rSFbL}90HiM(KOmndvBv`GhW_^1=+3!?B4xXLTfBTfnqC&GL=F?p6KY13*pV`xZ5
z`T&iXVb0y4M~|OhIdH6tFyX-4ZiRh;8?WAvw^M+3zazScoiwBDdrYVMHH(#+>{)O
z6k^5H>4k^Eu3CAH%7Q6Gdg6+oO*}rvdhzUF@gIrnAKg0IdsgXDsMGjf=ud`Ul1EzO
zX4M5E`E~xtxAo?60gOx*o__gc@nj4tYFO9fuz$_N{Yf!r$r9
z4?DH!Gf0Qfd3WDD)-E5BRkrpFQm;rf7$vVqz8m`?mvSpugY)vexqhH>X={kp)FY=>
zL&z#N1*t;zihv${yVD`xbaNcupZA5#uZPSDb
z$qwz_Qi?HSBKrGwVSf)R{IF>JQ?5ufn@6Lsfz5h+R^?sAT`^*kok~@WC=T64RsYeW
zss~S6l@@J6J*e&rNGhUle;^J+Rc4(Q$hivo3|9rTfjLFmK%*$21O?rMSryH;dvHQ`{_y1Cji0m54UyMCwG%Zms$<)F`4&0de?c2cJ={PA$;y
zD$3NU4eVEh1o`SYm!O^Xi#23=4FGo{!>%J;#
z5eCIA-fh2bK1H9oNys-VIXIAz&F4%^UnSe4n=9pc3yB3Mrv4)FWR+OTL%iuF?T_@t
z^pxX+^!dG}m*WGy4Ng8$7S9|_w*#ttXqthlLYw3mxo2D`E`aU6U#lkmjbwC&FurFzeyl-IJG)ne|
zF9ogXFH?nlBO!*h#`_y3ty0W-kj1Pm;Zef!5&sQ|OHZ2EKS4>bgmnRFtBnL0%S?Vf
zvE9g60Z3C2jAJ%a7IUa*2KWKk5)&by^M}j|e_2y$z-O4m>L*Q52Om1)o!lQ&m~HbP@)9MK=v51a^ntYM#Q?tyIzer$?1CeUDn;o8{b#zeOZ*?bZ8FbDLy!(}
z3|hv)l@7guD0g)1m2%CbCvw+I%6N;_?+_l;O(+*Ys}r0RS8Fy8A8_VN>e>4F-5py9
z%<#2Hr5epod%OUHl^CJ;B-E$L^eo=D@fV52s;5Ff0kzEHX$8*&=ZCv+?!MG)Hg}nw
zcso#rh2cA<{c1~@ufg0?vpe!~Sv}=0D{Lz_8^nSZDn&g0=HjfZpQCJ|O%K1u6e=%L
z2+SKhE)feFC=X~=oXxfBF-W-j2xZjPP^0J7hkjDLd*0y#+HYE(qJ&O+xCLm>0CSIQ
zo4E@u=s*YhQU?wJl4J+u{(P$Kr_9OrQz7E93_Dnh
zGXeO#0&j`QU|OxH1vV#nLa>eye{z4oxvY-yau?U-(`x=5)G$%*TFtL0#2sKJ(uyih
zSOyxs099AH?Ub$9M6EDv(hhx>&j3XvBQFpZe*oLev`dk~^a-nGDFSFij)RX7{(yyU
zkUbD{*)TT+D2cg=QpyI>nZrFz2s`)I9X`w=9cgmcage*vTr(e82oGYAYVpW2tcR)M
zpmoLYAdtonZlXAQ6D1S&L)7YZsE?kI&(vZsVN3Bb){Y0=juD$AQPm6zTzwYE5vw
zKIa%%K4moiLf9n#YOzqQGJ*
zfcki1GC0f(FAfBoHThgJuSzZ*Kv0EwlP6>sTFb06pjPm}-{T7yQigr)_H7WQ+mBGg
z;!~kdVFQd!S%L#}Dd6D99~BTlS7Oey!Wy(gSdaQqhJZSmIcR5DbZeiolC44Wy;YY7
z?eK6cHJf0~(kB~Wz?h63%FPJCuxq82Xe=)|uMVed1Ak4yhO?T3jg<>-Ux*=oX0}3Z9|a71Lv||&*@et*7tv|e7h|Ly=puT;
zJujr=yTLe0OMEwsLOcgveCODdbg$AO!Y%F5k6w^pUdX;X<*F}cP@=)AC?J}E6qGFdkp2T<(Gl4+1`(MQE3IQqiKlTi@mSGM!qo(1W
z%QIp;Kqj!=Kx%#}whK^`n^_kGVrwFqJcn0|>@0jwZVg)XjYnu5C}xwIN3EtDu`$7)
z(lcy0mMO!(BR2LQ{3$)de$w{uQNwP;#`fTcc7xb)RHd6Qfr}VBhTT908NR9?!+SD&
ztS=ZbCVK7Y{>hYO+XKA(04FVeH=!kS$RF$?3~B
zk=0^sN-~#fNaR9A3`BxdPUJ>sQOqH6%1H=_@UwAEb((?G7k(mrs=~iYj)GG%VM;|_
z$VNS#Y%w*#v+tK}A{ieW201J2(>G?itW0;n`T!g-@{qZ8VYR5Y22
zCFyLa)m(znb`}c2?$_TQ5?S(+HiXG8E^*Q1gB71g)s-WW;D|1(dTgLdxUZc^PwWjOf1~vU=z!!
zZ^{{h<@P8St2A}HxDu%kKxJ^kG_8hFWiZH19*cTWVW4g*seJrmyf_6zD6=V9Ff-G+
z3Ple=3Rl!hUQg
z)P&L{%cC9&Mfvq|15D8Njl#Do0n^d(I2iA`iEqxu#WBg8JIrV7nXaP(i6M
zLx)K8i+UmKQOCf+ef(@h$5(Fhs5Ln&y=ErNF@p4G-*@DE+#
z$XXbPpP?t75xf3?$p?Kei?UbL0fwlJYT~>qEvTV*n~aAjrZ0!$6Q0iZGz5F;IimjC
zFo&Lf#~pWKm>NMEY@36&gOf|g8ArvY7eK=jNy83Z!8)8e8!=@-C{n8@_G@}m))L$b
zhYL{W3#(@RcY3n|vRQLxSK@#cY-|c{JHNvrf;&&O4X5CnsjrYpz-e*@0^$SS%XI8b2s%i#(
z=(I2$kW^Lk|NN>hrK+Z%ys8gB__KTUIEsiJ04h95uYP#qs?t`ygzsCaY?2S1r0i=>
zTy}CRH~}0SB8u&@lUKC^>HbVOSW8tAZE})c&HD4Jx)?>oZ#hXl-A7N@rHnJP6?vGW
z`L>*?7Jiqz0LP@Ggv&&IWYKpQ-Ff$-e2JFsT(aQy1&e0ifwyg<9)Wy?E_OBr_na!C
zxE9sM{zkSXR*q-5C
z`6U0!P`=XzPd&tMhB{VL-yvcnJbKK%W9F4tLC3}?HePwv-FII3ojKUnCiQI^uVs-;
zz@v{ag}e=v>L%zcx%6`_0UVfW-7q*t{rm>e~Jza{iR?QE#pv)X9b0AFfH^S*k
zc*LB=i-jq!5gQlG$saLd@e;64MUmLJXvE^Vx6NLJK8OObaZVWD?w-8_Z~0;)^^|&u
zw>+^CjzaRO|9A_Cjki~#AtOd$JB8l*MsDZ4J7$A})l~X*r~^o*JDz?50lm2|J)SPY
zk7^!CjWtv+q9=z5=og6A-a%CZjpR{%(U+My9F!B}8kbW)L
z7J;IBH>RwV4z(rF>-y~s)N(}!Juj{!v&iVUc#8jym8a!Y4CpfIl
zM~EX+>kF^rNI)0^>Lu}c2txWDxyCutav#dL4>xchZgToC-RZ+z2d?OxhNH)EF4J2;
z_-QoCvmhQz?ZhKJI`PvUo%pf06K;jBKDg9xTm8=ZOz^AS(m?nP|LgEyr4=&Z;z5ij
zJeVUhXJ!FDZosjx0cCh>UD%WaALy(KA%$+vAWT}H3aXivI~@-qc;-`~cowK9VIz7U
zt)8XoDb#badRFk$u9$_ivsB!C^;}HPr~UZpppXJ>;5I-z+ve!zq{TS*p}jaEPMWV<
zmk_gKB>_EYm{pTbfD$gn$U0&kD0^;DM#^eyYDU>7Cfts7b}5f~6HtasQUn0%fhSKox!Pb&Ao
z+(husO-hAwD5tL8N^s#>Rh*2FE*p-8FVwB^^HYk`O{I?S0J5^?!H%YQ7SP!2t(BxD
zaN-uGXdrr3-qPjE)NXSJYiu%ySAjC1bOXMCL>ncZ03!#iMAdU_k7lSyRt+oo>G+MD;qzt-bV6x>_K_5L?ODgAZyjg*kL*hrA)&9
z2=E&8%cM~`gVs71wAKQ&s(J;j6;)4-L{SPXiQoqgwSrYx^OUg;@3nFwQ3mJ({DHht
z3d!1I4^l6{xKjFXmXxqs57Jf^3m~zQw}6plKRO$cF0%M2O90E;VfgV1_(~UV3cko_
zo{sXaV3jWt>f-T1TQ=dK5xWt!C*|GA$&vZOEkIU87ID8BZl6rf7!?-&HD2Yun=JKq
zl&WwXXN%oT5j~Jr{H*wp_!;rMc%S$n5Jk)}8<2z!!o7&AMJwykK&mt*)jK$4GLNE6W1p^5ky*EWMkN3*uko
zv={?3{(CN#U`!tP2=FARD7uYKaOLv+=H>UE93PH`bmkMnK#@NmA3pNvC)S#ef)Bs;
z*;M=Ne}7`V{k-DCoc}zH4^QJm+MImh&j=r~A4%xf%H6Xa0+x10od>~iX#?7I-U4_d
z6C(04g>7HOI|pIc-ki$AdAdh=skx}Eyc3jFjr~L7>(lL%Gw|0se(ru8eFt_mJIR3!
zJF=AkiPJ`~m0&?w--MsQec=%#TyN}vW1;6M*6UbE*!|i-2sYrK4l1p(gj)A_GqqOO
zp#~6!U@U~pPDvuL-_Q^&s^Gur2*{ow5tLB9ia@(h3eW-Vq=}(bU&8HRRR`!OmXiLE
z)K5(6ry*$rC;g&K+Cm3B5SJ33@#5GgC>92mK}GsWl2{+K!l>Ka>@4l3x0}zYA9=$gKGB@V*0A
zJzwVWhixh`1l;i_enSiK8%X}%Ec=$K|DKuuPA+|%`IpRgHUG|hF3WQWPE4ue!!Z5)
zJsKw@Vxrrd@`3VGd~4hZhNq5OCWzB*K9hkIIFv-DBn9~V3P>w31bL+CAp$(2F
zjaCRf$l-?aq``%ok8GgQ)a)A8r5F(QqJetoi($_3qfh}Lo->(#D(GicR`b~Tc}R)F
z+K-^O`kgO-(D0X=2$rH88VETTa+qa!^;Q~u&r$956LF5$Ca
zzSM)2E}@4;0HaSmb--ULqO7MFsffa99C%Sj;^yFbF=a>
zn1arPIFsQ*VHXd)Tm>}Pvdi|SWJ8^XNPwepc9J9)O(uSK3eIClf7
z$)O0Cp(zeU{BS6uoUX{xa3<6<8_oa`(DgPNuKWSxaRPrL-u93Kk>1(^9l2APonA)A
znHxtIB6#m2#kf31Aq?>_7RR{ETnD=e-fh%g3=o2ZfcGxQ59aI9+dHxTr7TB8=hkBH
zs`Kt$1gIV_P_Q${8hEIV(S(FxM-eaD4Dl*EaxzRvrs44)i$v0e53&$9qh>ea~hS3YaED5b?QloW^0^&
zoLpQQ?Q9KPzTDZi#zT~DCZ&@&MIvJMyu}2fStG}sC-akV1m(!Qz5F-wmkc+R$3@BA
z{_xch_sAiD*MfXS+DE>eO4hT#Nb`(%U*$5KWJ2t|?`c1F0cQlAiOoDl%C`wt`6NtW
z7Usm76tJLQ+kXN0*2oKuX+`R$`a|*V$3#M4KL(`|YAJ(B8oEKmT!0pYExn_;D*!s~
zyg|$DnA12KlGumQaJWOOMNVwTk~V|yd)kj1lEmCr?8gtwjPfAjY}`=r#pXbBbev`CZ7-%K%DotCm`aqxX^*-IUYglUT5??NQQxLG7&y^*sbOy}H`Nsv_NW`^
zGb=%>&rnI8f^~E~;xSBA)#C-o%~`P-3i45&TzuZ$SboGf@8`6sTXlQ|$~@mwCLPP1
z=D1gTAkjc-5RucK_Bzz+$F4q|QmAtFy8hZvFod5{
z&JKJ~?b|ip&*(EFPsTlcoWQ@`|DyaZeG`3WG+Q#g2Y+6S<5G688THW4OykMI9%RVy
zt}WPZbPG<~L)LA>VG$yB#q|Uf_s5<^HcVQy0I<$BO9RPfDcAsQf=04dx-?!4ce@wr
z(YIi#B-IP0c2uh#-3dFT2mpOX3z)EcqIWEm%AtdRt$j7_RRcNQ@HlB2acj|-^ObGN
zEojOrwoPd!+Y}^o_Gl;{1wg$)cK}o%tF&Oh4uM6CQc!LWm6lP%Wj(cOK;W#E&emKC
zyazg^-0Trqb<*Jwo%(C_KS(V@#5c856a8^Ge90_@YFWl}*
z@qeqt=!k`{3uXD(BfF#DdrIp^_CCc=q>~@|#I)YvkdD~P3ApAXTdXwwVdXFGF9^Pd
z*`E%4ZM~oCISpS#q7Do))7e`u>~LqEZk+*Vb+B1z)~d2Zyq>Fc`8wRfQNLXY-01M%Qv}aW0ep6JUgGDY-&`bVcgrGq0NiOf
zu?sk7e5h2(hDy#)Dt}ns|3;Ue8P?tKocVs`Ab1EGh|9a8&tM(gG29>f0Ha5Dui^4$
zQ3!vUFX@emY+UCH4i_$ZCZ#S~5~7~OBKRAs7j~f^f)X_z@1vLO1mZX#Hp;IF4Yor-
z9G!HL&nU7^A;h1P4>w5%dueW==&bMmA{Q|24OY(>&v*IffvX7hE+p&eNvk{(fjOOK$&C
ze2Q*JX3PCo&K5+`xjSUA!wSAfuF9T7B3TMb3iRK1oe#_LRt)@QQ#%ncW5Yph`pXBk
zX?yX%5C0c8a*7IWJ9;#uj{AzFp*kdOfg3j1mSMSqZD6CxFonaj9Z%qTn0SzSiX8-k
zh0x8tfG67ub;d3L6lBN)gq=B0Y-r{TV0UH6fd0nLMTQgL*VXUTVKjJGm10Hd)C%G|
zK{b)@wMXZ3s1K^BPnkgUs(&64Q)FIc_b-*f1_wvOK^xgH;7$syn!uQLKJvT1q7`~n
z8w}f?g|EOn;H5o6l^OPmDzc}u9;7#nZq8;5HXB+HO#JNtFlNw(4M3zzUXH`ZpL%rK5r3x{LpCj
zpFk9x?A&S@#4be8`HLK~wwaQ<_+lTPBXjGnCE$Ayu`*}wa86G1ojXwm3DiRMaw8kr
zqTTlsBv!6u{{}&P&HWBa0U!pEO4?&8_d#$#NCOFrLTaKc0!EwYRTZ<~9loGZvSKaS
z>83|pE7oH9SJD0Jjj`(t-x2svKMo(keU^`$r)!J09@Nf3>&{`nTg6@Tw7{T4^R(%$
z^KkPzS~3WRj!k_;$!bb29@KJ!ef2R>
za5Z{IIxd(XC+A~e
zM`zg6cN7VGN=uPp?&J$94!>HA>jq1+=D`8n^kc2`xb0Xw8u~K+UqfT};{W$>fjIpr
znm+KhqwO+fCq0IPgvgMv6A6bOg@1KuVKnuL{IsmA(HAZhzUMIrLy!eE1TthC(oGd~
zV|&<~9E|E8qko34vD`dfh+8nWi^IDOABuh}Tl%F=?l+j}eme+l0JC5e2KH;=TR3+{
z-V)KH*-LKe`5iIF4!>8^gAG%u$xm;DN`Q
zv6fVkzl2Q$ptKCP)9(|Sj!Q5mfLy_SrRSlzbI``!u?uOMwOe+_2GRsWyQJ~m>*#VZ
zTpNP!_rb6?i0H8#?fyAbHXPw2_6CgGv~7s?Yxk4U+eW}ROvK8mq=G>NL({iYlHK#P
z3Jl`3eR%Gm#)^_7%U{9gR{R6WS8Ns~$5-~pOTVL3B4Q9rUk3wxDq2O!A?<;CP`0_Z
z5cfwM7KdNLy%hj(3_D#BI&U4ik#i9J0Ei=bqDdsSMdG-}He0*53<;d?0plPBl~V!j
zOoKbX3d~TT{2+#vV;k`{+Jv?qiZoz}JXl2Q5{LsY-_HH+1%wo)=T`Ew
z0NcZ0lvIIdAulnM6O>$VbcC~_%)yZo$|e1I;vKb3V+Aeb1eDMwBXfiPsp9PiiP>ly
zR&j*FJzAl~34&v-=4x`cQ-L4sFrZ`!W02Yhbi-{%b_lLRR0E#t32Sh%K!&le!aqyT
zJiz{$G7vtJh^&V4#qPjZfLcqo(@ORz&(sk)Q;Qv8JqVR`y7?aNG70dxqFd44gRj;l3HVhxobU9FKoi0cM9j;VQwD^0q%AdL~C
zFEf0mDBeH0vzhlA@{ar`p)S>=B;il-Uk^{I*
z0$!jU3|^)$D1lZ+t%AZCb(xc)fDOHVL1!}+?VdgeFl{y)hhUkehihY{mq(W};L7~Y
ztnv7rOgr|=fYA7I{KPB_7oJ^guq?G-XpbClA~PSYY&>!#fD8Mx8PurK~U%W0vEvcAXbWeMI4
zyZ>T}G@HwADob3+!OYN=2z04$9PLA7V85FJ8M%{=0sr9uNrC8(tW#|fLb6qBImA+9vzQlUBFww!Ez
zpgnD>JyMliEfrbxM_MYqEC@h4aE*jVIzYN=op`e%FUl#lPOwmVSq>;TFLS|Z3uOg%
z44xUTN~(rcIXRu#I;ABtoR$dbj>pzwwrGiv;WzOI)^XVx*M~EQYEkg~$%{W}kP+)l
zRu|(kxjqPg$&s-)(L~B{%K9LpYrH3!QELTcr
zMNYPTAreTP?c6=FakuH+Aj
zFl7pncDhq2af(~wcv<2|S>hEvN_6CNL~oJ|_yr~50P~Hn$$`gG+SBm2s(80Jd<;zZ
z<%SN?gkEy1KYW!}?|i7EcYcLvd>!ny{Srk$?ffEKMw3)K52ajawB>0((6LMj%IJh-
zxMP2qy5C3NnXfeAC0T{T02ha+*LRcwuR_+h{B+6y_~)*>BIz<9SOo&Lj#$s2*OP2}
z+^7+(YYx`lMtghkr;2S~2PRG}*!DkFEhhd786!j#x8GhVV1ZGJ2p~Fnryt5mO(b(L
zu_sG$u0WJ$mJQ6IzYx{(u0!fT9{#*Mw$VsOx6b#RxF9NFS2OW*C`7`8I5@1$PY|P?
zdGu%TAwupn*!_g$(Hqv7HiupH`Qy1@Kpttf6Rt|=NzHU;>8Ej<$F8`kKMb$fouy1O7$@?PaYVw}TE9z+1Q
z=f^HVNjh%yHmtse`I=FDUPUz}Zt(vYYz_nUNVY^2j5UOb~EVS%PhBKiyM-hX1XEs
zr1`)wV@~9JG;3@AUCzgcc*y=I=K~W*Vi~&SbAoMr2#`8sf{A&VWQXOHT)-S2R6t&bYa}ZD5MraJmsLMZjqx~d~tO$W(p-cgp_QiKW|I(P(;qvli++f0?gcF7Hgo)$h3
zo*jIFn`RP%Qp&=zn#dWPo3iBo=u5;dhfqdk%YA<#eI#j5?ociv*lq>i;cI0mkquo}
z+$U^USCK-kEEMd#z~BlL1dchlfrV4S$h$1mf!G;f(@ZDX4P-b2AEHw|SY_b2PNV<+
zBRETi;;jt~Rq+r}-`65pa2?oLxqEY|*lB#GMA1PM4f;8W8&dEMw!c(-a}Kqb@XZ$y
zAPiH5=8!)IwIiosAWU~2SBo0P<^u~AU{0XIJGMwYN2_P4dJ6TNtezF>Ig6gL|H6-A
z@?l#{ilXW;whef1#}jf5rE-gzwL%@S>#<-nEQ6`EgW$A~_WA&$NmEii3LrJs|S87W=VS*9(he
zR=B_EQe=V17KV~M4*lnRm%t4`W%svaupUCIT1o}+5En*5E0Pnk&0s^aGEreF3
zwC3!o@P_*cRi`f7a+laT$gk)FW?ss#CYM2X5SKPH2aGLa~&KU@Km
zDC_+`Qj|3qV5>zB!$3|BR_-)TPC~TYs0`KqioA=G|BuPLzMKm*{}&|h;=%a*%e&ywwWgtrZ7B^c
z*olKPgebS{X59Q$Lej)`wQo{_uNRVnul@i~h!G(8lJc&-BsCSjy(BdSuEuIfm3O^H
z@-7h|>69z)dMXDHmy~yPBE|GhN|Gk;A{o-2ba~ey($}T`eAY7s=*!NO@N$@HQg>#^$uJ
zc0%&izwtOLHr3=Y%A3yb=sBdwgLk1s{09K)gl`HTB!gbm#3KHDgfT5h`XIp;VHU0W
z>Jv}VG>m4!_SgVQWqPLwLS+&!(lkLU<5%Us)Gsis}aKLsff@N)81iSrRI-q947k
zkzM%<=y}EJ`r3HERt?b&&VkUBRj1h$inSn}V!hd!bR76WCPA@uqMvqO5GvtwK+gap
zR9sJd5a=*AM&Mr}6D8}d3ktQmb(dch?h%$Gj&0mZZ~kB{x+%W)gDO0(EIzd?y`zZfHKp^ZPh5v>BUJ_jzl5#L9CjoSa5IG!-U%u&{Jh^25uga@*(Ot<_VDvircFlVQv}%YgzUCQeod+wbkdqc
zO_uj(khZW@iSh?+r+X<`d7~u~i+=@lN(T)a!x46yjyTEz)mwnPGEPywKT^;zqeF6%
z0+Q@ST2AJUUhEa5bbz5Z{u}Jkpyr2D+<3pZoP^;;qOm*Muy4;p(@3AnoKS>$tHhKH
zf*kyNKBMGnwEV!5w_;OZ?TdRi^2wpU@Fk3r!@71oE>cUv=Q5y>WdWlkQF$);B_ll<
za~U{Wi5MMx4GT=2qedZ0WA+EG4~NAg;bQ1OR-kOX9^g%XV1GPM%0rDup#z!d6Drvs
z(yl+|9<}w7?ICR}9kn5(0GDD#efX=O;Bo4kT8E|95`voZ8Xrmo3ncBxjpQTw6>8-p_cC;ALVz4C870_xh!ceC(nVnl(TC1?
zdt11lVLs0@ghBD^(N*19T6GzK6R-J(^<8jJ{uCZWR0ZzQfZP$>iDmh*P81dbb}(q9
zGBS#;q2=cpsb{FI%qaR0Oo`{fl!&<_CACF6=%xl@618B2AloD+(KFhb{iq8g^hZ1-
z#KB97ViG}fFTzZTH=uU@j%;Mkd%9izp2H@3I|Pmr^CW=&JX)g|mWGho0agnZ3t+{Z
zfQ~kY2QXuz8}~yv{D?3*?C5a-bG6^uuDuB4UUD^VDuw!
zq18f?7|2mRa8F-K|8&L{H;TFT>H?G*TbR3)u7mQG8FV;u+MdY`7V&Hq+H?4wK*TQo
z1^-_97-Ie5q$HbVqof3jwdq`Z?%-s@DhOF~VdNVDvj-f;kU2(i80Y$9-3&EBfMIX!
z#W#_{3pJ<6UQ9v_j!BBO_%KRJVJ%|YQBc#sP@^=A4Rg#PxN17Y0CNF`!{sZ=S&{ri
zV*Q286~Gcf$;Il8pBO%`7v5Eh$}#kdr7*n*>(Ij=0A(^hAEXFuL9Fp0i7HC5XALZc
zR*$rgr|(71_ky0^1&Lru^^o0zlm9<^NutbbEmNCIWlnKd{8rvzx~MEvlXJi=Mn#n$
z@Mnml-BUFuna1v{&$5FS&1w>-AUeX$QlQb~W
z^e;}^OQ;Kn{<`cX9uD<}pc~TLgf>4KKBwXA=nyS@nn|v080}fHBTNabbI-5NV&Hr
zjnp2Uh311!kV`wM#_P?46K54jzO_j^wZYtUobb#e>#(h)*r$cDKMqXZ)x-2%(mqWp
zGh+PY_nudCCAEps6;d^_I!Sh
zMS8oGIg>RV8{Rl5y^DDqQpb(T;OTZ%ez@_YR*paUlJ#yA7t=PO3Z^TplLe<3*Ht;2
z#aiY8c=RD8JU9dBN_Ac8+w}+P_~4}V>u7D8wj2Xqr)+E#!7+}cHXC|a8vXJ{UV^Py
zf_r4J0ZYFgrJ*=k4he0DnoY4Zf~1D;!BUn+yjq~zh9Q*nJLEMK2cVYnnhO5n%4_PS
zyk;EaHIxa5(<+e8YYlS+Bs6o^LsC=g%n?LH^?+|Q5eGBU;H178%`0l9?YvqDx5Q-_P`ARbHpC#1_s$GS}Qc${-L8^vZxISSoYZ
z4TLekTB70hvXZ`+Zty+ca0P9IOKy%|ZK43Mc=fUATbDnsOJN+5x
z(~mdwZ8!IHlxw0bsA5T8jvXJPJeD%LkHMuuNQEEW1=ag$t;1(o?t$
z0m2I$Tq41`3OYXhEbcmn6LbuH+xo+=KpyE13!U0vZjlp!M<
ze-FMC?~_d1St+^-F2+mx<97DD7A#zcMeCT@7V$=srmp}Jjds4ZJ@plSeFJX~=x_gh
zGD!Lgbtss&J}!*5zHN9<(O0O;sINfeuBW{x(^r6Z1G2Zh4f`ME6!KO{=qpINlXqUj
zWQ$Pv^QE05v*H4XGuG8?z|M$GUN)O|Myf%(H;kN)@!8G-MFlA#Fgc$46Hd;`3r#XuPoTcm*(SC7nsSjiy37w=?r{p*lC*8+
z2amAqDeFNose}S@$D<0zvU$T8OK{20c%5eT|_Q(mZ|@%!A_>tZmfA3(#{ShvfOq8q-0Bt;Kji0S>*?Q?fV
z{u5`k7NWC9gXIdSxdmzo3E1PlL`+AxCxl}O=pb~9X!~))9`Dnm-Px62LR0ZS?TuI-
z%`I;cf&TvugD8NW9RMiChC*fnG42_VS2sX-1{+;@YAu);Kos;95*h7tA%6_rFBqDpmC*P!`QGt^2+}BavqwwId%llfOSR2+yl^w78*2)rw95qp{XS0$KW)G#CFo>@QzRNJJAZ|O*BqP
z4S^;apQQFc6O9jl!%+@%+06O^^87Op^d(;}4u1oDEITzi;vz{$pkp-za#}TX`+FfD
zJ~35*9T*r#72r6j0({{ErV1KxOyNyRsshka4m819#8MX#tVfXhX0uC$4wR#?e@&mG
zlp;+VAc8!Z3#~eFp|q35?j{9;43tE2tZH6V6h&`}0Yp4_Pj8Np3XQX&l*Y}2J(O{5x8Q6yL4FPK*0*W
z#6%%(w1nPKNq1$5!SpJWfc_1nK#I!F4R*h7Hr_FD2a#^#Vj)0g*za%~nu7H?^O}Pj
z7e<k#?=tP|(NHl^%{wRm7N9`Tz(f