From d0bc00a3889c87692146887b5ad83134b3da80b8 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 18:06:27 +0100
Subject: [PATCH 01/11] Fix bugs in chpt 05 and 06

---
 notebooks/05-Probability.ipynb | 2 +-
 notebooks/05-Probability.py    | 2 +-
 notebooks/06-Sampling.ipynb    | 2 +-
 notebooks/06-Sampling.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/05-Probability.ipynb b/notebooks/05-Probability.ipynb
index 0ed71ff..fbb8ad5 100644
--- a/notebooks/05-Probability.ipynb
+++ b/notebooks/05-Probability.ipynb
@@ -75,7 +75,7 @@
    "source": [
     "#+\n",
     "import pandas as pd\n",
-    "SFrain = pd.read_csv('https://raw.githubusercontent.com/statsthinking21/statsthinking21-python/master/notebooks/data/SanFranciscoRain.csv')\n",
+    "SFrain = pd.read_csv('data/SanFranciscoRain.csv')\n",
     "\n",
     "# we will remove the STATION and NAME variables \n",
     "# since they are identical for all rows\n",
diff --git a/notebooks/05-Probability.py b/notebooks/05-Probability.py
index 9f77495..3abad40 100644
--- a/notebooks/05-Probability.py
+++ b/notebooks/05-Probability.py
@@ -49,7 +49,7 @@
 # %%
 #+
 import pandas as pd
-SFrain = pd.read_csv('https://raw.githubusercontent.com/statsthinking21/statsthinking21-python/master/notebooks/data/SanFranciscoRain.csv')
+SFrain = pd.read_csv('data/SanFranciscoRain.csv')
 
 # we will remove the STATION and NAME variables 
 # since they are identical for all rows
diff --git a/notebooks/06-Sampling.ipynb b/notebooks/06-Sampling.ipynb
index 8011dd4..7223dd8 100644
--- a/notebooks/06-Sampling.ipynb
+++ b/notebooks/06-Sampling.ipynb
@@ -79,7 +79,7 @@
     "# we need to use the maximum of those data to set\n",
     "# the height of the vertical line that shows the mean\n",
     "plt.axvline(x=adult_nhanes_data['Height'].mean(),\n",
-    "            ymax=np.max(hist[0]), color='k')\n",
+    "            ymax=1, color='k')\n",
     "\n",
     "# draw the normal distribution with same mean and standard deviation\n",
     "# as the sampling distribution\n",
diff --git a/notebooks/06-Sampling.py b/notebooks/06-Sampling.py
index dfb77ed..06253e0 100644
--- a/notebooks/06-Sampling.py
+++ b/notebooks/06-Sampling.py
@@ -62,7 +62,7 @@
 # we need to use the maximum of those data to set
 # the height of the vertical line that shows the mean
 plt.axvline(x=adult_nhanes_data['Height'].mean(),
-            ymax=np.max(hist[0]), color='k')
+            ymax=1, color='k')
 
 # draw the normal distribution with same mean and standard deviation
 # as the sampling distribution

From d26461f7ee322bd3a0645b26cc0d7479a13b0796 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 18:08:37 +0100
Subject: [PATCH 02/11] Fix y=x line in chpt 03 cell 13

---
 notebooks/03-DataVisualization.ipynb | 4 ++--
 notebooks/03-DataVisualization.py    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/03-DataVisualization.ipynb b/notebooks/03-DataVisualization.ipynb
index f68ca08..6df626b 100644
--- a/notebooks/03-DataVisualization.ipynb
+++ b/notebooks/03-DataVisualization.ipynb
@@ -387,8 +387,8 @@
     "                data=adult_nhanes_data)\n",
     "plt.plot([adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].min(),\n",
     "          adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].max()],\n",
-    "          [adult_nhanes_data['SystolicBloodPres2NdRdgMmHg'].min(),\n",
-    "          adult_nhanes_data['SystolicBloodPres2NdRdgMmHg'].max()],\n",
+    "          [adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].min(),\n",
+    "          adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].max()],\n",
     "          color='k')\n",
     "plt.xlabel('Systolic BP - First reading')\n",
     "plt.ylabel('Systolic BP - Second reading')"
diff --git a/notebooks/03-DataVisualization.py b/notebooks/03-DataVisualization.py
index 21b59a1..9d6f49e 100644
--- a/notebooks/03-DataVisualization.py
+++ b/notebooks/03-DataVisualization.py
@@ -152,8 +152,8 @@
                 data=adult_nhanes_data)
 plt.plot([adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].min(),
           adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].max()],
-          [adult_nhanes_data['SystolicBloodPres2NdRdgMmHg'].min(),
-          adult_nhanes_data['SystolicBloodPres2NdRdgMmHg'].max()],
+          [adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].min(),
+          adult_nhanes_data['SystolicBloodPres1StRdgMmHg'].max()],
           color='k')
 plt.xlabel('Systolic BP - First reading')
 plt.ylabel('Systolic BP - Second reading')

From de75e9b9b8eaeb97380641342b70d2e4986ec625 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 18:35:07 +0100
Subject: [PATCH 03/11] Fix typos in chpt 07

---
 notebooks/07-ResamplingAndSimulation.ipynb | 4 ++--
 notebooks/07-ResamplingAndSimulation.py    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/07-ResamplingAndSimulation.ipynb b/notebooks/07-ResamplingAndSimulation.ipynb
index fe6122d..9571ae6 100644
--- a/notebooks/07-ResamplingAndSimulation.ipynb
+++ b/notebooks/07-ResamplingAndSimulation.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Resampling and simulation in R\n",
+    "# Resampling and simulation\n",
     "\n",
     "## Generating random samples\n",
     "Here we will generate random samples from a number of different distributions and plot their histograms.  We could write out separate commands to plot each of our functions of interest, but that would involve repeating a lot of code, so instead we will take advantage of the fact that Python allows us to treat modules as variables.  We will specify the module that creates each distribution, and then loop through them, each time incrementing the panel number.  Some distributions also take specific parameters; for example, the Chi-squared distribution requires specifying the degrees of freedom.  We will store those in a separate dictionary and use them as needed."
@@ -97,7 +97,7 @@
     "lines_to_next_cell": 2
    },
    "source": [
-    "Now let's find the 99th percentile of the maximum distriibution.  There is a built-in function in the `scipy.stats` module, called `scoreatpercentile` that will do this for us:\n"
+    "Now let's find the 95th percentile of the maximum distriibution.  There is a built-in function in the `scipy.stats` module, called `scoreatpercentile` that will do this for us:\n"
    ]
   },
   {
diff --git a/notebooks/07-ResamplingAndSimulation.py b/notebooks/07-ResamplingAndSimulation.py
index f552c15..4adf8e6 100644
--- a/notebooks/07-ResamplingAndSimulation.py
+++ b/notebooks/07-ResamplingAndSimulation.py
@@ -14,7 +14,7 @@
 # ---
 
 # %% [markdown]
-# # Resampling and simulation in R
+# # Resampling and simulation
 #
 # ## Generating random samples
 # Here we will generate random samples from a number of different distributions and plot their histograms.  We could write out separate commands to plot each of our functions of interest, but that would involve repeating a lot of code, so instead we will take advantage of the fact that Python allows us to treat modules as variables.  We will specify the module that creates each distribution, and then loop through them, each time incrementing the panel number.  Some distributions also take specific parameters; for example, the Chi-squared distribution requires specifying the degrees of freedom.  We will store those in a separate dictionary and use them as needed.
@@ -83,7 +83,7 @@ def sample_and_return_max(sample_size,
 
 
 # %% [markdown]
-# Now let's find the 99th percentile of the maximum distriibution.  There is a built-in function in the `scipy.stats` module, called `scoreatpercentile` that will do this for us:
+# Now let's find the 95th percentile of the maximum distriibution.  There is a built-in function in the `scipy.stats` module, called `scoreatpercentile` that will do this for us:
 #
 
 

From a0de4ff83796a6cc338604ba711068fd3c6d5886 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 19:08:26 +0100
Subject: [PATCH 04/11] Fix bugs in chpt 08

---
 notebooks/08-HypothesisTesting.ipynb | 5 +++--
 notebooks/08-HypothesisTesting.py    | 5 +++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/notebooks/08-HypothesisTesting.ipynb b/notebooks/08-HypothesisTesting.ipynb
index e0c86c1..a908050 100644
--- a/notebooks/08-HypothesisTesting.ipynb
+++ b/notebooks/08-HypothesisTesting.ipynb
@@ -57,7 +57,7 @@
    "source": [
     "import scipy.stats\n",
     "\n",
-    "pvalue = 100 - scipy.stats.percentileofscore(flip_results_df, 0.7) \n",
+    "pvalue = 100 - scipy.stats.percentileofscore(flip_results_df, 70) \n",
     "pvalue"
    ]
   },
@@ -144,7 +144,8 @@
     "for run in range(num_runs):\n",
     "    sim_results_df.loc[run, 'p_value'] = sample_ttest()\n",
     "\n",
-    "p_error = sim_results_df.loc[sim_results_df['p_value'] < 0.05].mean(axis=0)\n",
+    "p_error = sim_results_df['p_value'] < 0.05\n",
+    "p_error = p_error.mean(axis=0)\n",
     "p_error"
    ]
   },
diff --git a/notebooks/08-HypothesisTesting.py b/notebooks/08-HypothesisTesting.py
index 75a13ec..3701435 100644
--- a/notebooks/08-HypothesisTesting.py
+++ b/notebooks/08-HypothesisTesting.py
@@ -50,7 +50,7 @@ def toss_coins_and_count_heads(num_coins=100, p_heads=0.5):
 # %%
 import scipy.stats
 
-pvalue = 100 - scipy.stats.percentileofscore(flip_results_df, 0.7) 
+pvalue = 100 - scipy.stats.percentileofscore(flip_results_df, 70) 
 pvalue
 
 # %% [markdown]
@@ -99,7 +99,8 @@ def sample_ttest(sampSize=32):
 for run in range(num_runs):
     sim_results_df.loc[run, 'p_value'] = sample_ttest()
 
-p_error = sim_results_df.loc[sim_results_df['p_value'] < 0.05].mean(axis=0)
+p_error = sim_results_df['p_value'] < 0.05
+p_error = p_error.mean(axis=0)
 p_error
 
 # %% [markdown]

From 532e7f661ad467a2702ad7d5f17a55495b8df552 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 23:17:02 +0100
Subject: [PATCH 05/11] Fix typos in chpt 09 10 and 13

---
 notebooks/09-StatisticalPower.ipynb   | 2 +-
 notebooks/09-StatisticalPower.py      | 2 +-
 notebooks/10-BayesianStatistics.ipynb | 4 +++-
 notebooks/10-BayesianStatistics.py    | 4 +++-
 notebooks/13-GeneralLinearModel.ipynb | 2 +-
 notebooks/13-GeneralLinearModel.py    | 4 ++--
 6 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/notebooks/09-StatisticalPower.ipynb b/notebooks/09-StatisticalPower.ipynb
index 34040ad..77d41f7 100644
--- a/notebooks/09-StatisticalPower.ipynb
+++ b/notebooks/09-StatisticalPower.ipynb
@@ -37,7 +37,7 @@
    "source": [
     "## Power analysis\n",
     "\n",
-    "We can compute a power analysis using functions from the `statsmodels.stats.power` package. Let's focus on the power for an independent samples t-test in order to determine a difference in the mean between two groups.  Let's say that we think than an effect size of Cohen's d=0.5 is realistic for the study in question (based on previous research) and would be of scientific interest.  We wish to have 80% power to find the effect if it exists.  We can compute the sample size needed for adequate power using the `TTestIndPower()` function:"
+    "We can compute a power analysis using functions from the `statsmodels.stats.power` package. Let's focus on the power for an independent samples t-test in order to determine a difference in the mean between two groups.  Let's say that we think that an effect size of Cohen's d=0.5 is realistic for the study in question (based on previous research) and would be of scientific interest.  We wish to have 80% power to find the effect if it exists.  We can compute the sample size needed for adequate power using the `TTestIndPower()` function:"
    ]
   },
   {
diff --git a/notebooks/09-StatisticalPower.py b/notebooks/09-StatisticalPower.py
index a52c1ce..6a79af9 100644
--- a/notebooks/09-StatisticalPower.py
+++ b/notebooks/09-StatisticalPower.py
@@ -33,7 +33,7 @@
 # %% [markdown]
 # ## Power analysis
 #
-# We can compute a power analysis using functions from the `statsmodels.stats.power` package. Let's focus on the power for an independent samples t-test in order to determine a difference in the mean between two groups.  Let's say that we think than an effect size of Cohen's d=0.5 is realistic for the study in question (based on previous research) and would be of scientific interest.  We wish to have 80% power to find the effect if it exists.  We can compute the sample size needed for adequate power using the `TTestIndPower()` function:
+# We can compute a power analysis using functions from the `statsmodels.stats.power` package. Let's focus on the power for an independent samples t-test in order to determine a difference in the mean between two groups.  Let's say that we think that an effect size of Cohen's d=0.5 is realistic for the study in question (based on previous research) and would be of scientific interest.  We wish to have 80% power to find the effect if it exists.  We can compute the sample size needed for adequate power using the `TTestIndPower()` function:
 
 # %%
 
diff --git a/notebooks/10-BayesianStatistics.ipynb b/notebooks/10-BayesianStatistics.ipynb
index 420991d..e7ae032 100644
--- a/notebooks/10-BayesianStatistics.ipynb
+++ b/notebooks/10-BayesianStatistics.ipynb
@@ -9,7 +9,7 @@
     "\n",
     "## Applying Bayes' theorem: A simple example\n",
     "TBD: MOVE TO MULTIPLE TESTING EXAMPLE SO WE CAN USE BINOMIAL LIKELIHOOD\n",
-    "A person has a cough and flu-like symptoms, and gets a PCR test for COVID-19, which comes back postiive.  What is the likelihood that they actually have COVID-19, as opposed a regular cold or flu?  We can use Bayes' theorem to compute this.  Let's say that the local rate of symptomatic individuals who actually are infected with COVID-19 is 7.4% (as [reported](https://twitter.com/Bob_Wachter/status/1281792549309386752/photo/1) on July 10, 2020 for San Francisco); thus, our prior probability that someone with symptoms actually has COVID-19 is .074.  The RT-PCR test used to identify COVID-19 RNA is highly specific (that is, it very rarelly reports the presence of the virus when it is not present); for our example, we will say that the specificity is 99%.  Its sensitivity is not known, but probably is no higher than 90%.  \n",
+    "A person has a cough and flu-like symptoms, and gets a PCR test for COVID-19, which comes back postiive.  What is the likelihood that they actually have COVID-19, as opposed to a regular cold or flu?  We can use Bayes' theorem to compute this.  Let's say that the local rate of symptomatic individuals who actually are infected with COVID-19 is 7.4% (as [reported](https://twitter.com/Bob_Wachter/status/1281792549309386752/photo/1) on July 10, 2020 for San Francisco); thus, our prior probability that someone with symptoms actually has COVID-19 is .074.  The RT-PCR test used to identify COVID-19 RNA is highly specific (that is, it very rarelly reports the presence of the virus when it is not present); for our example, we will say that the specificity is 99%.  Its sensitivity is not known, but probably is no higher than 90%.  \n",
     "First let's look at the probability of disease given a single positive test."
    ]
   },
@@ -29,6 +29,8 @@
     "marginal_likelihood = sensitivity * prior + (1 - specificity) * (1 - prior)\n",
     "posterior = (likelihood * prior) / marginal_likelihood\n",
     "posterior\n",
+    "\n",
+    "\n",
     "\n"
    ]
   },
diff --git a/notebooks/10-BayesianStatistics.py b/notebooks/10-BayesianStatistics.py
index 5213442..293051e 100644
--- a/notebooks/10-BayesianStatistics.py
+++ b/notebooks/10-BayesianStatistics.py
@@ -19,7 +19,7 @@
 #
 # ## Applying Bayes' theorem: A simple example
 # TBD: MOVE TO MULTIPLE TESTING EXAMPLE SO WE CAN USE BINOMIAL LIKELIHOOD
-# A person has a cough and flu-like symptoms, and gets a PCR test for COVID-19, which comes back postiive.  What is the likelihood that they actually have COVID-19, as opposed a regular cold or flu?  We can use Bayes' theorem to compute this.  Let's say that the local rate of symptomatic individuals who actually are infected with COVID-19 is 7.4% (as [reported](https://twitter.com/Bob_Wachter/status/1281792549309386752/photo/1) on July 10, 2020 for San Francisco); thus, our prior probability that someone with symptoms actually has COVID-19 is .074.  The RT-PCR test used to identify COVID-19 RNA is highly specific (that is, it very rarelly reports the presence of the virus when it is not present); for our example, we will say that the specificity is 99%.  Its sensitivity is not known, but probably is no higher than 90%.  
+# A person has a cough and flu-like symptoms, and gets a PCR test for COVID-19, which comes back postiive.  What is the likelihood that they actually have COVID-19, as opposed to a regular cold or flu?  We can use Bayes' theorem to compute this.  Let's say that the local rate of symptomatic individuals who actually are infected with COVID-19 is 7.4% (as [reported](https://twitter.com/Bob_Wachter/status/1281792549309386752/photo/1) on July 10, 2020 for San Francisco); thus, our prior probability that someone with symptoms actually has COVID-19 is .074.  The RT-PCR test used to identify COVID-19 RNA is highly specific (that is, it very rarelly reports the presence of the virus when it is not present); for our example, we will say that the specificity is 99%.  Its sensitivity is not known, but probably is no higher than 90%.  
 # First let's look at the probability of disease given a single positive test.
 
 # %%
@@ -36,6 +36,8 @@
 
 
 
+
+
 # %% [markdown]
 # The high specificity of the test, along with the relatively high base rate of the disease, means that most people who test positive actually have the disease. 
 # Now let's plot the posterior as a function of the prior.  Let's first create a function to compute the posterior, and then apply this with a range of values for the prior.
diff --git a/notebooks/13-GeneralLinearModel.ipynb b/notebooks/13-GeneralLinearModel.ipynb
index b0872d0..141d59b 100644
--- a/notebooks/13-GeneralLinearModel.ipynb
+++ b/notebooks/13-GeneralLinearModel.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# The General Linear Model in R\n",
+    "# The General Linear Model\n",
     "In this chapter we will explore how to fit general linear models in Python.  We will focus on the tools provided by the `statsmodels` package."
    ]
   },
diff --git a/notebooks/13-GeneralLinearModel.py b/notebooks/13-GeneralLinearModel.py
index 6c94b3b..ab5523d 100644
--- a/notebooks/13-GeneralLinearModel.py
+++ b/notebooks/13-GeneralLinearModel.py
@@ -14,7 +14,7 @@
 # ---
 
 # %% [markdown]
-# # The General Linear Model in R
+# # The General Linear Model
 # In this chapter we will explore how to fit general linear models in Python.  We will focus on the tools provided by the `statsmodels` package.
 
 # %%
@@ -95,7 +95,7 @@ def generate_linear_data(slope, intercept,
 import seaborn as sns
 import scipy.stats
 
-scipy.stats.probplot(ols_result.resid, plot=sns.mpl.pyplot)
+_ = scipy.stats.probplot(ols_result.resid, plot=sns.mpl.pyplot)
 
 # %% [markdown]
 # This looks pretty good, in the sense that the residual data points fall very close to the unit line.  This is not surprising, since we generated the data with normally distributed noise.  We should also plot the predicted (or *fitted*) values against the residuals, to make sure that the model does work systematically better for some predicted values versus others.

From af81366b2bbe825bb0fce375081f7cd57bf20c40 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 23:31:58 +0100
Subject: [PATCH 06/11] Revert "Fix bugs in chpt 05 and 06"

This reverts commit d0bc00a3889c87692146887b5ad83134b3da80b8.
---
 notebooks/05-Probability.ipynb | 2 +-
 notebooks/05-Probability.py    | 2 +-
 notebooks/06-Sampling.ipynb    | 2 +-
 notebooks/06-Sampling.py       | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/notebooks/05-Probability.ipynb b/notebooks/05-Probability.ipynb
index fbb8ad5..0ed71ff 100644
--- a/notebooks/05-Probability.ipynb
+++ b/notebooks/05-Probability.ipynb
@@ -75,7 +75,7 @@
    "source": [
     "#+\n",
     "import pandas as pd\n",
-    "SFrain = pd.read_csv('data/SanFranciscoRain.csv')\n",
+    "SFrain = pd.read_csv('https://raw.githubusercontent.com/statsthinking21/statsthinking21-python/master/notebooks/data/SanFranciscoRain.csv')\n",
     "\n",
     "# we will remove the STATION and NAME variables \n",
     "# since they are identical for all rows\n",
diff --git a/notebooks/05-Probability.py b/notebooks/05-Probability.py
index 3abad40..9f77495 100644
--- a/notebooks/05-Probability.py
+++ b/notebooks/05-Probability.py
@@ -49,7 +49,7 @@
 # %%
 #+
 import pandas as pd
-SFrain = pd.read_csv('data/SanFranciscoRain.csv')
+SFrain = pd.read_csv('https://raw.githubusercontent.com/statsthinking21/statsthinking21-python/master/notebooks/data/SanFranciscoRain.csv')
 
 # we will remove the STATION and NAME variables 
 # since they are identical for all rows
diff --git a/notebooks/06-Sampling.ipynb b/notebooks/06-Sampling.ipynb
index 7223dd8..8011dd4 100644
--- a/notebooks/06-Sampling.ipynb
+++ b/notebooks/06-Sampling.ipynb
@@ -79,7 +79,7 @@
     "# we need to use the maximum of those data to set\n",
     "# the height of the vertical line that shows the mean\n",
     "plt.axvline(x=adult_nhanes_data['Height'].mean(),\n",
-    "            ymax=1, color='k')\n",
+    "            ymax=np.max(hist[0]), color='k')\n",
     "\n",
     "# draw the normal distribution with same mean and standard deviation\n",
     "# as the sampling distribution\n",
diff --git a/notebooks/06-Sampling.py b/notebooks/06-Sampling.py
index 06253e0..dfb77ed 100644
--- a/notebooks/06-Sampling.py
+++ b/notebooks/06-Sampling.py
@@ -62,7 +62,7 @@
 # we need to use the maximum of those data to set
 # the height of the vertical line that shows the mean
 plt.axvline(x=adult_nhanes_data['Height'].mean(),
-            ymax=1, color='k')
+            ymax=np.max(hist[0]), color='k')
 
 # draw the normal distribution with same mean and standard deviation
 # as the sampling distribution

From 0169cb6bb5226be077ab27b128d8e65bfbb96c71 Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 23:32:43 +0100
Subject: [PATCH 07/11] Fix bug in chpt 06

---
 notebooks/06-Sampling.ipynb | 2 +-
 notebooks/06-Sampling.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/notebooks/06-Sampling.ipynb b/notebooks/06-Sampling.ipynb
index 8011dd4..7223dd8 100644
--- a/notebooks/06-Sampling.ipynb
+++ b/notebooks/06-Sampling.ipynb
@@ -79,7 +79,7 @@
     "# we need to use the maximum of those data to set\n",
     "# the height of the vertical line that shows the mean\n",
     "plt.axvline(x=adult_nhanes_data['Height'].mean(),\n",
-    "            ymax=np.max(hist[0]), color='k')\n",
+    "            ymax=1, color='k')\n",
     "\n",
     "# draw the normal distribution with same mean and standard deviation\n",
     "# as the sampling distribution\n",
diff --git a/notebooks/06-Sampling.py b/notebooks/06-Sampling.py
index dfb77ed..06253e0 100644
--- a/notebooks/06-Sampling.py
+++ b/notebooks/06-Sampling.py
@@ -62,7 +62,7 @@
 # we need to use the maximum of those data to set
 # the height of the vertical line that shows the mean
 plt.axvline(x=adult_nhanes_data['Height'].mean(),
-            ymax=np.max(hist[0]), color='k')
+            ymax=1, color='k')
 
 # draw the normal distribution with same mean and standard deviation
 # as the sampling distribution

From 0b07a9e59318864c06e60f7fa746433dfac7909d Mon Sep 17 00:00:00 2001
From: Wu Jianxiao <jianxiao.wu.veronica@gmail.com>
Date: Thu, 12 Nov 2020 23:48:36 +0100
Subject: [PATCH 08/11] Fix bug in chpt 13

---
 notebooks/13-GeneralLinearModel.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/13-GeneralLinearModel.ipynb b/notebooks/13-GeneralLinearModel.ipynb
index 141d59b..8768920 100644
--- a/notebooks/13-GeneralLinearModel.ipynb
+++ b/notebooks/13-GeneralLinearModel.ipynb
@@ -134,7 +134,7 @@
     "import seaborn as sns\n",
     "import scipy.stats\n",
     "\n",
-    "scipy.stats.probplot(ols_result.resid, plot=sns.mpl.pyplot)"
+    "_ = scipy.stats.probplot(ols_result.resid, plot=sns.mpl.pyplot)"
    ]
   },
   {

From 92725c5923deeeb7c3dbe1890c64c8c0b075a350 Mon Sep 17 00:00:00 2001
From: phildi <phoqueyousuckers@yahoo.ca>
Date: Mon, 30 Aug 2021 08:31:33 -0400
Subject: [PATCH 09/11] Update 13-GeneralLinearModel.ipynb

---
 notebooks/13-GeneralLinearModel.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/13-GeneralLinearModel.ipynb b/notebooks/13-GeneralLinearModel.ipynb
index b0872d0..a7d39bc 100644
--- a/notebooks/13-GeneralLinearModel.ipynb
+++ b/notebooks/13-GeneralLinearModel.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# The General Linear Model in R\n",
+    "# The General Linear Model in Python\n",
     "In this chapter we will explore how to fit general linear models in Python.  We will focus on the tools provided by the `statsmodels` package."
    ]
   },

From 0e96c9391894ec1fa0a401794606acd958430dd6 Mon Sep 17 00:00:00 2001
From: phildi <phoqueyousuckers@yahoo.ca>
Date: Mon, 30 Aug 2021 08:35:18 -0400
Subject: [PATCH 10/11] Update 07-ResamplingAndSimulation.ipynb

---
 notebooks/07-ResamplingAndSimulation.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/07-ResamplingAndSimulation.ipynb b/notebooks/07-ResamplingAndSimulation.ipynb
index fe6122d..cd4e26f 100644
--- a/notebooks/07-ResamplingAndSimulation.ipynb
+++ b/notebooks/07-ResamplingAndSimulation.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Resampling and simulation in R\n",
+    "# Resampling and simulation in Python\n",
     "\n",
     "## Generating random samples\n",
     "Here we will generate random samples from a number of different distributions and plot their histograms.  We could write out separate commands to plot each of our functions of interest, but that would involve repeating a lot of code, so instead we will take advantage of the fact that Python allows us to treat modules as variables.  We will specify the module that creates each distribution, and then loop through them, each time incrementing the panel number.  Some distributions also take specific parameters; for example, the Chi-squared distribution requires specifying the degrees of freedom.  We will store those in a separate dictionary and use them as needed."

From 3092228c56e30224e7f0b0bb0490fa92e0d52fc4 Mon Sep 17 00:00:00 2001
From: Russ Poldrack <poldrack@gmail.com>
Date: Thu, 22 Dec 2022 07:24:17 -0800
Subject: [PATCH 11/11] update license

---
 notebooks/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebooks/index.md b/notebooks/index.md
index c308dfd..9b798ab 100644
--- a/notebooks/index.md
+++ b/notebooks/index.md
@@ -23,7 +23,7 @@ I apologize up front that the datasets are heavily US-centric.  This is primaril
 
 This book is meant to be a living document, which is why its source is available online at [https://github.com/statsthinking21/statsthinking21-python](https://github.com/statsthinking21/statsthinking21-python).  If you find any errors in the book or want to make a suggestion for how to improve it, please open an issue on the Github site. Even better, submit a pull request with your suggested change.  
 
-The book is licensed according to the [Creative Commons Attribution 2.0 Generic (CC BY 2.0) License](https://creativecommons.org/licenses/by/2.0/).  Please see the terms of that license for more details. 
+This book is licensed using the [Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)  License](https://creativecommons.org/licenses/by-nc/4.0/).  Please see the terms of that license for more details. 
 
 ## Acknowledgements