diff --git a/notebooks/08-HypothesisTesting.ipynb b/notebooks/08-HypothesisTesting.ipynb index b6a662c..e92b784 100644 --- a/notebooks/08-HypothesisTesting.ipynb +++ b/notebooks/08-HypothesisTesting.ipynb @@ -118,6 +118,70 @@ "#" ] }, + { + "cell_type": "markdown", + "source": [ + "## Performing t-test with Python\n", + "Let's draw a sample of 250 participants from the \"population\" who participated the NHANES study\n" + ], + "metadata": { + "id": "Jb361YaoZQG4" + } + }, + { + "cell_type": "code", + "source": [ + "! pip install nhanes\n", + "from nhanes.load import load_NHANES_data\n", + "nhanes_data = load_NHANES_data()\n", + "\n" + ], + "metadata": { + "id": "oYoivOQeZ2nt" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import seaborn as sns\n", + "sample_size = 250\n", + "nhanes_data['PhysActive'] = np.logical_or(nhanes_data['VigorousRecreationalActivities'], nhanes_data['ModerateRecreationalActivities'])\n", + "print('Unique values in PhysActive:',nhanes_data['PhysActive'].unique())\n", + "\n", + "sample = nhanes_data.dropna(subset=['PhysActive', 'BodyMassIndexKgm2']).sample(sample_size)\n", + "sns.boxplot(data=sample, x=\"PhysActive\", y=\"BodyMassIndexKgm2\")" + ], + "metadata": { + "id": "BjiYATqLc_lA" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from scipy.stats import ttest_ind\n", + "# By default, ttest_ind assumes equal variance of the two samples\n", + "print('assuming equal variance of the two population:')\n", + "t, p = ttest_ind(sample.query('PhysActive==1.0')['BodyMassIndexKgm2'], sample.query('PhysActive==0.0')['BodyMassIndexKgm2'])\n", + "print('t-statistic:', t)\n", + "print('p-value:', p)\n", + "\n", + "# If we don't make the assumption, the result may be slightly different:\n", + "print('without assuming equal variance of the two populations:')\n", + "t, p = ttest_ind(sample.query('PhysActive==1.0')['BodyMassIndexKgm2'], sample.query('PhysActive==0.0')['BodyMassIndexKgm2'], equal_var=False)\n", + "print('t-statistic:', t)\n", + "print('p-value:', p)\n" + ], + "metadata": { + "id": "gSmTuPZmlqsn" + }, + "execution_count": null, + "outputs": [] + }, { "cell_type": "markdown", "metadata": {