diff --git a/fall2024/Session_06.ipynb b/fall2024/Session_06.ipynb
new file mode 100644
index 0000000..0f14f86
--- /dev/null
+++ b/fall2024/Session_06.ipynb
@@ -0,0 +1,5599 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ " "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Session 05"
+ ],
+ "metadata": {
+ "id": "8DMpMrNHM9pw"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "P8WoeLFWM7z4",
+ "outputId": "5c1b505a-bd34-470c-e4cd-1adfc63f33d4"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(4123, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 1
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import plotly.express as px\n",
+ "\n",
+ "pd.set_option(\"display.max_rows\", None)\n",
+ "\n",
+ "df = pd.read_csv(\"https://raw.githubusercontent.com/wcj365/python-stats-dataviz/refs/heads/master/fall2024/data/World_Development_Indicators_(WDI).csv\")\n",
+ "\n",
+ "df.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.sample(5)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 275
+ },
+ "id": "af30qeVaNGvw",
+ "outputId": "a0d42106-837f-443d-fdf8-deb1cca09aa6"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "2623 2005 Nepal 309.310420 \n",
+ "3807 2011 Turks and Caicos Islands 23649.714434 \n",
+ "2337 2004 Malta 15197.056610 \n",
+ "2617 2018 Nauru 10985.874397 \n",
+ "3322 2020 Slovenia 25558.429054 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "2623 65.457000 26285110.0 NPL \n",
+ "3807 76.195000 30816.0 TCA \n",
+ "2337 79.253659 401268.0 MLT \n",
+ "2617 63.234000 11924.0 NRU \n",
+ "3322 80.531707 2102419.0 SVN \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "2623 South Asia Lower middle income IDA \n",
+ "3807 Latin America & Caribbean High income Not classified \n",
+ "2337 Middle East & North Africa High income Not classified \n",
+ "2617 East Asia & Pacific High income IBRD \n",
+ "3322 Europe & Central Asia High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2623 \n",
+ " 2005 \n",
+ " Nepal \n",
+ " 309.310420 \n",
+ " 65.457000 \n",
+ " 26285110.0 \n",
+ " NPL \n",
+ " South Asia \n",
+ " Lower middle income \n",
+ " IDA \n",
+ " \n",
+ " \n",
+ " 3807 \n",
+ " 2011 \n",
+ " Turks and Caicos Islands \n",
+ " 23649.714434 \n",
+ " 76.195000 \n",
+ " 30816.0 \n",
+ " TCA \n",
+ " Latin America & Caribbean \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 2337 \n",
+ " 2004 \n",
+ " Malta \n",
+ " 15197.056610 \n",
+ " 79.253659 \n",
+ " 401268.0 \n",
+ " MLT \n",
+ " Middle East & North Africa \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 2617 \n",
+ " 2018 \n",
+ " Nauru \n",
+ " 10985.874397 \n",
+ " 63.234000 \n",
+ " 11924.0 \n",
+ " NRU \n",
+ " East Asia & Pacific \n",
+ " High income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3322 \n",
+ " 2020 \n",
+ " Slovenia \n",
+ " 25558.429054 \n",
+ " 80.531707 \n",
+ " 2102419.0 \n",
+ " SVN \n",
+ " Europe & Central Asia \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7,\n \"min\": 2004,\n \"max\": 2020,\n \"num_unique_values\": 5,\n \"samples\": [\n 2011,\n 2020,\n 2004\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Turks and Caicos Islands\",\n \"Slovenia\",\n \"Malta\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10224.581557273556,\n \"min\": 309.310420053588,\n \"max\": 25558.4290544506,\n \"num_unique_values\": 5,\n \"samples\": [\n 23649.7144340602,\n 25558.4290544506,\n 15197.056610121\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 8.035746544687575,\n \"min\": 63.234,\n \"max\": 80.5317073170732,\n \"num_unique_values\": 5,\n \"samples\": [\n 76.195,\n 80.5317073170732,\n 79.2536585365854\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11502584.231626423,\n \"min\": 11924.0,\n \"max\": 26285110.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 30816.0,\n 2102419.0,\n 401268.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"TCA\",\n \"SVN\",\n \"MLT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Latin America & Caribbean \",\n \"Europe & Central Asia\",\n \"Middle East & North Africa\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\",\n \"Lower middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"IDA\",\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 2
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.isna().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 366
+ },
+ "id": "IkEmfYakNKPD",
+ "outputId": "5e0a5624-7695-46aa-92c6-5f288a13805d"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Year 0\n",
+ "Country 0\n",
+ "GDP per capita (current US$) 161\n",
+ "Life expectancy at birth, total (years) 346\n",
+ "Population, total 0\n",
+ "Country Code 0\n",
+ "Region 0\n",
+ "Income Group 0\n",
+ "Lending Type 0\n",
+ "dtype: int64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " GDP per capita (current US$) \n",
+ " 161 \n",
+ " \n",
+ " \n",
+ " Life expectancy at birth, total (years) \n",
+ " 346 \n",
+ " \n",
+ " \n",
+ " Population, total \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country Code \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Income Group \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Lending Type \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: int64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df[[\"Year\", \"Country\",\"GDP per capita (current US$)\", \"Life expectancy at birth, total (years)\"]].sample(5)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "_tysBz07NM9O",
+ "outputId": "ea63321a-c2cf-4bd1-832c-2d7a98bcee62"
+ },
+ "execution_count": 4,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "694 2014 Cayman Islands 76610.648456 \n",
+ "2072 2005 Latvia 7594.902384 \n",
+ "2114 2009 Lesotho 866.475127 \n",
+ "556 2009 Bulgaria 6988.273163 \n",
+ "2421 2012 Mexico 10842.733089 \n",
+ "\n",
+ " Life expectancy at birth, total (years) \n",
+ "694 NaN \n",
+ "2072 71.356098 \n",
+ "2114 44.034000 \n",
+ "556 73.412195 \n",
+ "2421 74.574000 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 694 \n",
+ " 2014 \n",
+ " Cayman Islands \n",
+ " 76610.648456 \n",
+ " NaN \n",
+ " \n",
+ " \n",
+ " 2072 \n",
+ " 2005 \n",
+ " Latvia \n",
+ " 7594.902384 \n",
+ " 71.356098 \n",
+ " \n",
+ " \n",
+ " 2114 \n",
+ " 2009 \n",
+ " Lesotho \n",
+ " 866.475127 \n",
+ " 44.034000 \n",
+ " \n",
+ " \n",
+ " 556 \n",
+ " 2009 \n",
+ " Bulgaria \n",
+ " 6988.273163 \n",
+ " 73.412195 \n",
+ " \n",
+ " \n",
+ " 2421 \n",
+ " 2012 \n",
+ " Mexico \n",
+ " 10842.733089 \n",
+ " 74.574000 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df[[\\\"Year\\\", \\\"Country\\\",\\\"GDP per capita (current US$)\\\", \\\"Life expectancy at birth, total (years)\\\"]]\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2005,\n \"max\": 2014,\n \"num_unique_values\": 4,\n \"samples\": [\n 2005,\n 2012,\n 2014\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Latvia\",\n \"Mexico\",\n \"Lesotho\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 31528.632990177994,\n \"min\": 866.475127181991,\n \"max\": 76610.6484563953,\n \"num_unique_values\": 5,\n \"samples\": [\n 7594.90238431362,\n 10842.7330888296,\n 866.475127181991\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.600796746573149,\n \"min\": 44.034,\n \"max\": 74.574,\n \"num_unique_values\": 4,\n \"samples\": [\n 44.034,\n 74.574,\n 71.3560975609756\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2022 = df[df[\"Year\"] != 2022]\n",
+ "df_2022.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "W5dKuC7MNSM8",
+ "outputId": "2495f479-edae-402b-fdf7-ffbe4a3a0bb6"
+ },
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(3906, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 26
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2022.isna().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 366
+ },
+ "id": "kuZKYpJiNU1W",
+ "outputId": "173c7533-fbda-4bd3-f1b1-1060ce42d627"
+ },
+ "execution_count": 27,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Year 0\n",
+ "Country 0\n",
+ "GDP per capita (current US$) 140\n",
+ "Life expectancy at birth, total (years) 129\n",
+ "Population, total 0\n",
+ "Country Code 0\n",
+ "Region 0\n",
+ "Income Group 0\n",
+ "Lending Type 0\n",
+ "dtype: int64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " GDP per capita (current US$) \n",
+ " 140 \n",
+ " \n",
+ " \n",
+ " Life expectancy at birth, total (years) \n",
+ " 129 \n",
+ " \n",
+ " \n",
+ " Population, total \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country Code \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Income Group \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Lending Type \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: int64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2022 = df_2022.dropna()\n",
+ "df_2022.isna().sum()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 366
+ },
+ "id": "DtlRm3SwNXqY",
+ "outputId": "dc8c266b-1e00-4a7c-c92c-0ae65a8e5b7d"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Year 0\n",
+ "Country 0\n",
+ "GDP per capita (current US$) 0\n",
+ "Life expectancy at birth, total (years) 0\n",
+ "Population, total 0\n",
+ "Country Code 0\n",
+ "Region 0\n",
+ "Income Group 0\n",
+ "Lending Type 0\n",
+ "dtype: int64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " GDP per capita (current US$) \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Life expectancy at birth, total (years) \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Population, total \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country Code \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Income Group \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Lending Type \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: int64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 29
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2022.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mWEaXg1zNakg",
+ "outputId": "4781de35-5015-4d44-a856-ded6f7dfc7e8"
+ },
+ "execution_count": 30,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(3641, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 30
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Homework Question 1:\n",
+ "\n",
+ "to check if this dataset has duplicate rows. If it has, only keep one from the duplicated rows and drop the rest."
+ ],
+ "metadata": {
+ "id": "su7rU3TcNgXy"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.duplicated().sum()"
+ ],
+ "metadata": {
+ "id": "v1nYXDPTg0q6",
+ "outputId": "0344ab3a-213d-404c-a38b-7173fa5a5294",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": 31,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 31
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df[df.duplicated()]"
+ ],
+ "metadata": {
+ "id": "Ovv4aduDhaMh",
+ "outputId": "53218be2-d1d1-4b1c-e030-3e7017dbd68d",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 53
+ }
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [Year, Country, GDP per capita (current US$), Life expectancy at birth, total (years), Population, total, Country Code, Region, Income Group, Lending Type]\n",
+ "Index: []"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "repr_error": "Out of range float values are not JSON compliant: nan"
+ }
+ },
+ "metadata": {},
+ "execution_count": 32
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_cleaned = df.drop_duplicates()\n",
+ "\n",
+ "print(f\"Cleaned shape: {df_cleaned.shape}\")\n",
+ "print(df_cleaned.sample(5))\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "f12uqvuQNdRR",
+ "outputId": "0c92cd3b-a5b7-4209-b877-b3cacb7193ba"
+ },
+ "execution_count": 33,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Cleaned shape: (4123, 9)\n",
+ " Year Country GDP per capita (current US$) \\\n",
+ "2189 2008 Lithuania 14944.996652 \n",
+ "2557 2015 Mozambique 603.838514 \n",
+ "2527 2004 Morocco 2177.798828 \n",
+ "2005 2014 Kosovo 3902.530841 \n",
+ "3248 2022 Sierra Leone 475.795728 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "2189 71.812195 3198231.0 LTU \n",
+ "2557 58.151000 26843246.0 MOZ \n",
+ "2527 68.231000 30033125.0 MAR \n",
+ "2005 78.880000 1812771.0 XKX \n",
+ "3248 NaN 8605718.0 SLE \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "2189 Europe & Central Asia High income Not classified \n",
+ "2557 Sub-Saharan Africa Low income IDA \n",
+ "2527 Middle East & North Africa Lower middle income IBRD \n",
+ "2005 Europe & Central Asia Upper middle income IDA \n",
+ "3248 Sub-Saharan Africa Low income IDA \n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## HW Question 2\n",
+ "\n",
+ "Find out if there is a relationship between wealth and health\n",
+ "\n",
+ "Pick any year worth of data and use plotly express to generate a scatter plot with GDP as x and life expectancy as y."
+ ],
+ "metadata": {
+ "id": "kG6WK152Nns9"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "year_of_interest = 2019\n",
+ "\n",
+ "df_2019 = df[df[\"Year\"] == year_of_interest]\n",
+ "print(df_2019.shape)\n",
+ "df_2019.sample(5)\n"
+ ],
+ "metadata": {
+ "id": "ivocZDyJNkEK",
+ "outputId": "2a822460-d821-40a2-e658-d8b76348e331",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 311
+ }
+ },
+ "execution_count": 35,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(217, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "2409 2019 Mauritius 11403.252787 \n",
+ "224 2019 Austria 50067.585727 \n",
+ "870 2019 Congo, Rep. 2508.944783 \n",
+ "1744 2019 Iran, Islamic Rep. 3276.753265 \n",
+ "2751 2019 Nigeria 2334.023643 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "2409 74.235854 1265985.0 MUS \n",
+ "224 81.895122 8879920.0 AUT \n",
+ "870 62.747000 5570733.0 COG \n",
+ "1744 76.103000 86564202.0 IRN \n",
+ "2751 52.910000 203304492.0 NGA \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "2409 Sub-Saharan Africa Upper middle income IBRD \n",
+ "224 Europe & Central Asia High income Not classified \n",
+ "870 Sub-Saharan Africa Lower middle income Blend \n",
+ "1744 Middle East & North Africa Lower middle income IBRD \n",
+ "2751 Sub-Saharan Africa Lower middle income Blend "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2409 \n",
+ " 2019 \n",
+ " Mauritius \n",
+ " 11403.252787 \n",
+ " 74.235854 \n",
+ " 1265985.0 \n",
+ " MUS \n",
+ " Sub-Saharan Africa \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 224 \n",
+ " 2019 \n",
+ " Austria \n",
+ " 50067.585727 \n",
+ " 81.895122 \n",
+ " 8879920.0 \n",
+ " AUT \n",
+ " Europe & Central Asia \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 870 \n",
+ " 2019 \n",
+ " Congo, Rep. \n",
+ " 2508.944783 \n",
+ " 62.747000 \n",
+ " 5570733.0 \n",
+ " COG \n",
+ " Sub-Saharan Africa \n",
+ " Lower middle income \n",
+ " Blend \n",
+ " \n",
+ " \n",
+ " 1744 \n",
+ " 2019 \n",
+ " Iran, Islamic Rep. \n",
+ " 3276.753265 \n",
+ " 76.103000 \n",
+ " 86564202.0 \n",
+ " IRN \n",
+ " Middle East & North Africa \n",
+ " Lower middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 2751 \n",
+ " 2019 \n",
+ " Nigeria \n",
+ " 2334.023643 \n",
+ " 52.910000 \n",
+ " 203304492.0 \n",
+ " NGA \n",
+ " Sub-Saharan Africa \n",
+ " Lower middle income \n",
+ " Blend \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_2019\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2019,\n \"max\": 2019,\n \"num_unique_values\": 1,\n \"samples\": [\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Austria\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20559.10797517433,\n \"min\": 2334.02364318105,\n \"max\": 50067.5857265892,\n \"num_unique_values\": 5,\n \"samples\": [\n 50067.5857265892\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11.62397848926082,\n \"min\": 52.91,\n \"max\": 81.8951219512195,\n \"num_unique_values\": 5,\n \"samples\": [\n 81.8951219512195\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 86978571.0275076,\n \"min\": 1265985.0,\n \"max\": 203304492.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 8879920.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"AUT\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Sub-Saharan Africa \"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Upper middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"IBRD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 35
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2019.isna().sum()"
+ ],
+ "metadata": {
+ "id": "YNPwsdH5jLXv",
+ "outputId": "22b03d70-607e-4f2e-f6ac-b20a3b4b9268",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 366
+ }
+ },
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Year 0\n",
+ "Country 0\n",
+ "GDP per capita (current US$) 6\n",
+ "Life expectancy at birth, total (years) 8\n",
+ "Population, total 0\n",
+ "Country Code 0\n",
+ "Region 0\n",
+ "Income Group 0\n",
+ "Lending Type 0\n",
+ "dtype: int64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " GDP per capita (current US$) \n",
+ " 6 \n",
+ " \n",
+ " \n",
+ " Life expectancy at birth, total (years) \n",
+ " 8 \n",
+ " \n",
+ " \n",
+ " Population, total \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Country Code \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Income Group \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " Lending Type \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: int64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 36
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2019 = df_2019.dropna(subset=[\"GDP per capita (current US$)\", \"Life expectancy at birth, total (years)\"])\n",
+ "print(df_2019.shape)\n",
+ "df_2019.sample(5)"
+ ],
+ "metadata": {
+ "id": "-Jel1XjHNq6U",
+ "outputId": "64e63355-0145-4310-a26f-c5ec54d1cc30",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 293
+ }
+ },
+ "execution_count": 37,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(203, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "2219 2019 Luxembourg 112726.439673 \n",
+ "414 2019 Bermuda 116153.166122 \n",
+ "3017 2019 Puerto Rico 32916.866801 \n",
+ "1079 2019 Dominican Republic 8173.344699 \n",
+ "1060 2019 Dominica 8561.587011 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "2219 82.639024 620001.0 LUX \n",
+ "414 81.033000 63911.0 BMU \n",
+ "3017 79.063000 3193694.0 PRI \n",
+ "1079 73.577000 10881882.0 DOM \n",
+ "1060 73.559000 71428.0 DMA \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "2219 Europe & Central Asia High income Not classified \n",
+ "414 North America High income Not classified \n",
+ "3017 Latin America & Caribbean High income Not classified \n",
+ "1079 Latin America & Caribbean Upper middle income IBRD \n",
+ "1060 Latin America & Caribbean Upper middle income Blend "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2219 \n",
+ " 2019 \n",
+ " Luxembourg \n",
+ " 112726.439673 \n",
+ " 82.639024 \n",
+ " 620001.0 \n",
+ " LUX \n",
+ " Europe & Central Asia \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 414 \n",
+ " 2019 \n",
+ " Bermuda \n",
+ " 116153.166122 \n",
+ " 81.033000 \n",
+ " 63911.0 \n",
+ " BMU \n",
+ " North America \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 3017 \n",
+ " 2019 \n",
+ " Puerto Rico \n",
+ " 32916.866801 \n",
+ " 79.063000 \n",
+ " 3193694.0 \n",
+ " PRI \n",
+ " Latin America & Caribbean \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 1079 \n",
+ " 2019 \n",
+ " Dominican Republic \n",
+ " 8173.344699 \n",
+ " 73.577000 \n",
+ " 10881882.0 \n",
+ " DOM \n",
+ " Latin America & Caribbean \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 1060 \n",
+ " 2019 \n",
+ " Dominica \n",
+ " 8561.587011 \n",
+ " 73.559000 \n",
+ " 71428.0 \n",
+ " DMA \n",
+ " Latin America & Caribbean \n",
+ " Upper middle income \n",
+ " Blend \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_2019\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2019,\n \"max\": 2019,\n \"num_unique_values\": 1,\n \"samples\": [\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Bermuda\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 54558.418278567646,\n \"min\": 8173.34469885261,\n \"max\": 116153.166121638,\n \"num_unique_values\": 5,\n \"samples\": [\n 116153.166121638\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4.216980209572233,\n \"min\": 73.559,\n \"max\": 82.6390243902439,\n \"num_unique_values\": 5,\n \"samples\": [\n 81.033\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4610244.33091281,\n \"min\": 63911.0,\n \"max\": 10881882.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 63911.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"BMU\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Upper middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 37
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "fig = px.scatter(\n",
+ " df_2019,\n",
+ " x=\"GDP per capita (current US$)\",\n",
+ " y=\"Life expectancy at birth, total (years)\",\n",
+ " color=\"Country\",\n",
+ " size=\"Population, total\",\n",
+ " title=f\"Relationship between GDP per Capita and Life Expectancy in {year_of_interest}\",\n",
+ " labels={\n",
+ " \"GDP per capita (current US$)\": \"GDP per Capita (Current US$)\",\n",
+ " \"Life expectancy at birth, total (years)\": \"Life Expectancy (Years)\"\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "fig.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ },
+ "id": "Gk4aJFeTNtvO",
+ "outputId": "d9859196-78d8-45c8-abac-171a5954d3bd"
+ },
+ "execution_count": 43,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Compare China, US, India, Russia"
+ ],
+ "metadata": {
+ "id": "sCL8DtKqnT56"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_4_countries = df_2019[df_2019[\"Country Code\"].isin([\"USA\", \"CHN\", \"IND\", \"RUS\"])]\n",
+ "print(df_4_countries.shape)\n",
+ "df_4_countries"
+ ],
+ "metadata": {
+ "id": "q3n7ykVCn6K_",
+ "outputId": "dcd58ce7-68d0-44cd-9047-2821ae113ef0",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 262
+ }
+ },
+ "execution_count": 44,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(4, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "794 2019 China 10143.860221 \n",
+ "1706 2019 India 2050.163800 \n",
+ "3074 2019 Russian Federation 11536.258789 \n",
+ "3929 2019 United States 65120.394663 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "794 77.968000 1.407745e+09 CHN \n",
+ "1706 70.910000 1.383112e+09 IND \n",
+ "3074 73.083902 1.444063e+08 RUS \n",
+ "3929 78.787805 3.283300e+08 USA \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "794 East Asia & Pacific Upper middle income IBRD \n",
+ "1706 South Asia Lower middle income IBRD \n",
+ "3074 Europe & Central Asia Upper middle income IBRD \n",
+ "3929 North America High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 794 \n",
+ " 2019 \n",
+ " China \n",
+ " 10143.860221 \n",
+ " 77.968000 \n",
+ " 1.407745e+09 \n",
+ " CHN \n",
+ " East Asia & Pacific \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 1706 \n",
+ " 2019 \n",
+ " India \n",
+ " 2050.163800 \n",
+ " 70.910000 \n",
+ " 1.383112e+09 \n",
+ " IND \n",
+ " South Asia \n",
+ " Lower middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3074 \n",
+ " 2019 \n",
+ " Russian Federation \n",
+ " 11536.258789 \n",
+ " 73.083902 \n",
+ " 1.444063e+08 \n",
+ " RUS \n",
+ " Europe & Central Asia \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3929 \n",
+ " 2019 \n",
+ " United States \n",
+ " 65120.394663 \n",
+ " 78.787805 \n",
+ " 3.283300e+08 \n",
+ " USA \n",
+ " North America \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_4_countries",
+ "summary": "{\n \"name\": \"df_4_countries\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2019,\n \"max\": 2019,\n \"num_unique_values\": 1,\n \"samples\": [\n 2019\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"India\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 28909.291573444032,\n \"min\": 2050.1638002619,\n \"max\": 65120.3946628653,\n \"num_unique_values\": 4,\n \"samples\": [\n 2050.1638002619\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.8041864829872507,\n \"min\": 70.91,\n \"max\": 78.7878048780488,\n \"num_unique_values\": 4,\n \"samples\": [\n 70.91\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 673458339.6980696,\n \"min\": 144406261.0,\n \"max\": 1407745000.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 1383112050.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"IND\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"South Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Upper middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Not classified\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 44
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "fig = px.scatter(\n",
+ " df_4_countries,\n",
+ " x=\"GDP per capita (current US$)\",\n",
+ " y=\"Life expectancy at birth, total (years)\",\n",
+ " color=\"Country\",\n",
+ " text=\"Country\",\n",
+ " size=\"Population, total\",\n",
+ " title=f\"Relationship between GDP per Capita and Life Expectancy in {year_of_interest}\",\n",
+ " labels={\n",
+ " \"GDP per capita (current US$)\": \"GDP per Capita (Current US$)\",\n",
+ " \"Life expectancy at birth, total (years)\": \"Life Expectancy (Years)\"\n",
+ " },\n",
+ " height=600,\n",
+ " width=1000\n",
+ ")\n",
+ "\n",
+ "# Update text position\n",
+ "fig.update_traces(textposition='top center')\n",
+ "fig.update_layout(showlegend=False)\n",
+ "\n",
+ "fig.show()"
+ ],
+ "metadata": {
+ "id": "UpJ-JoqLnPuD",
+ "outputId": "24828721-a8cc-4d33-9978-d8d16061acd8",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 617
+ }
+ },
+ "execution_count": 54,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_quad = df[df[\"Country Code\"].isin([\"USA\", \"CHN\", \"IND\", \"RUS\"])]\n",
+ "print(df_quad.shape)\n",
+ "df_quad.sample(5)"
+ ],
+ "metadata": {
+ "id": "NBUPYesorE9G",
+ "outputId": "24ed4da8-d749-4351-c817-b851e8eb5078",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 293
+ }
+ },
+ "execution_count": 59,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(76, 9)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "786 2011 China 5614.386022 \n",
+ "3066 2011 Russian Federation 14311.064453 \n",
+ "3916 2006 United States 46302.000880 \n",
+ "3060 2005 Russian Federation 5323.455078 \n",
+ "3925 2015 United States 56762.729452 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "786 75.903000 1.345035e+09 CHN \n",
+ "3066 69.683902 1.429609e+08 RUS \n",
+ "3916 77.687805 2.983799e+08 USA \n",
+ "3060 65.529756 1.435188e+08 RUS \n",
+ "3925 78.690244 3.207390e+08 USA \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "786 East Asia & Pacific Upper middle income IBRD \n",
+ "3066 Europe & Central Asia Upper middle income IBRD \n",
+ "3916 North America High income Not classified \n",
+ "3060 Europe & Central Asia Upper middle income IBRD \n",
+ "3925 North America High income Not classified "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 786 \n",
+ " 2011 \n",
+ " China \n",
+ " 5614.386022 \n",
+ " 75.903000 \n",
+ " 1.345035e+09 \n",
+ " CHN \n",
+ " East Asia & Pacific \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3066 \n",
+ " 2011 \n",
+ " Russian Federation \n",
+ " 14311.064453 \n",
+ " 69.683902 \n",
+ " 1.429609e+08 \n",
+ " RUS \n",
+ " Europe & Central Asia \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3916 \n",
+ " 2006 \n",
+ " United States \n",
+ " 46302.000880 \n",
+ " 77.687805 \n",
+ " 2.983799e+08 \n",
+ " USA \n",
+ " North America \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 3060 \n",
+ " 2005 \n",
+ " Russian Federation \n",
+ " 5323.455078 \n",
+ " 65.529756 \n",
+ " 1.435188e+08 \n",
+ " RUS \n",
+ " Europe & Central Asia \n",
+ " Upper middle income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ " 3925 \n",
+ " 2015 \n",
+ " United States \n",
+ " 56762.729452 \n",
+ " 78.690244 \n",
+ " 3.207390e+08 \n",
+ " USA \n",
+ " North America \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_quad\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 2005,\n \"max\": 2015,\n \"num_unique_values\": 4,\n \"samples\": [\n 2006,\n 2015,\n 2011\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"China\",\n \"Russian Federation\",\n \"United States\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24174.738776412603,\n \"min\": 5323.455078125,\n \"max\": 56762.7294515989,\n \"num_unique_values\": 5,\n \"samples\": [\n 14311.064453125,\n 56762.7294515989,\n 46302.0008800056\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.664321082887293,\n \"min\": 65.529756097561,\n \"max\": 78.690243902439,\n \"num_unique_values\": 5,\n \"samples\": [\n 69.6839024390244,\n 78.690243902439,\n 77.6878048780488\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 507195315.2074461,\n \"min\": 142960908.0,\n \"max\": 1345035000.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 142960908.0,\n 320738994.0,\n 298379912.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"CHN\",\n \"RUS\",\n \"USA\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"East Asia & Pacific\",\n \"Europe & Central Asia\",\n \"North America\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"High income\",\n \"Upper middle income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Not classified\",\n \"IBRD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 59
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "fig = px.line(\n",
+ " df_quad,\n",
+ " x=\"Year\",\n",
+ " color=\"Country\",\n",
+ " y=\"GDP per capita (current US$)\"\n",
+ ")\n",
+ "\n",
+ "fig.show()"
+ ],
+ "metadata": {
+ "id": "PsNfOLezqaQB",
+ "outputId": "2166e2b8-92f1-4c74-d6eb-343cd569e5a5",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ }
+ },
+ "execution_count": 61,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_group = df.groupby([\"Year\", \"Region\"])[\"Population, total\"].sum().reset_index()\n",
+ "df_group"
+ ],
+ "metadata": {
+ "id": "6cUmZY3gvaxY",
+ "outputId": "7ff8bfc6-6183-4c7f-da4c-3f553e702d2b",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ }
+ },
+ "execution_count": 63,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Region Population, total\n",
+ "0 2004 East Asia & Pacific 2.094011e+09\n",
+ "1 2004 Europe & Central Asia 8.702708e+08\n",
+ "2 2004 Latin America & Caribbean 5.498976e+08\n",
+ "3 2004 Middle East & North Africa 3.470449e+08\n",
+ "4 2004 North America 3.248097e+08\n",
+ "5 2004 South Asia 1.515703e+09\n",
+ "6 2004 Sub-Saharan Africa 7.465468e+08\n",
+ "7 2005 East Asia & Pacific 2.109795e+09\n",
+ "8 2005 Europe & Central Asia 8.729137e+08\n",
+ "9 2005 Latin America & Caribbean 5.567395e+08\n",
+ "10 2005 Middle East & North Africa 3.544624e+08\n",
+ "11 2005 North America 3.278245e+08\n",
+ "12 2005 South Asia 1.541264e+09\n",
+ "13 2005 Sub-Saharan Africa 7.669787e+08\n",
+ "14 2006 East Asia & Pacific 2.125591e+09\n",
+ "15 2006 Europe & Central Asia 8.756279e+08\n",
+ "16 2006 Latin America & Caribbean 5.634241e+08\n",
+ "17 2006 Middle East & North Africa 3.626340e+08\n",
+ "18 2006 North America 3.310156e+08\n",
+ "19 2006 South Asia 1.565893e+09\n",
+ "20 2006 Sub-Saharan Africa 7.881100e+08\n",
+ "21 2007 East Asia & Pacific 2.141253e+09\n",
+ "22 2007 Europe & Central Asia 8.784563e+08\n",
+ "23 2007 Latin America & Caribbean 5.699722e+08\n",
+ "24 2007 Middle East & North Africa 3.713248e+08\n",
+ "25 2007 North America 3.341851e+08\n",
+ "26 2007 South Asia 1.589455e+09\n",
+ "27 2007 Sub-Saharan Africa 8.100193e+08\n",
+ "28 2008 East Asia & Pacific 2.156920e+09\n",
+ "29 2008 Europe & Central Asia 8.820884e+08\n",
+ "30 2008 Latin America & Caribbean 5.763862e+08\n",
+ "31 2008 Middle East & North Africa 3.802941e+08\n",
+ "32 2008 North America 3.374064e+08\n",
+ "33 2008 South Asia 1.612709e+09\n",
+ "34 2008 Sub-Saharan Africa 8.326427e+08\n",
+ "35 2009 East Asia & Pacific 2.172100e+09\n",
+ "36 2009 Europe & Central Asia 8.856598e+08\n",
+ "37 2009 Latin America & Caribbean 5.827381e+08\n",
+ "38 2009 Middle East & North Africa 3.893324e+08\n",
+ "39 2009 North America 3.404661e+08\n",
+ "40 2009 South Asia 1.636412e+09\n",
+ "41 2009 Sub-Saharan Africa 8.558857e+08\n",
+ "42 2010 East Asia & Pacific 2.187065e+09\n",
+ "43 2010 Europe & Central Asia 8.891696e+08\n",
+ "44 2010 Latin America & Caribbean 5.888739e+08\n",
+ "45 2010 Middle East & North Africa 3.979976e+08\n",
+ "46 2010 North America 3.433972e+08\n",
+ "47 2010 South Asia 1.660546e+09\n",
+ "48 2010 Sub-Saharan Africa 8.797974e+08\n",
+ "49 2011 East Asia & Pacific 2.202812e+09\n",
+ "50 2011 Europe & Central Asia 8.913294e+08\n",
+ "51 2011 Latin America & Caribbean 5.955100e+08\n",
+ "52 2011 Middle East & North Africa 4.060453e+08\n",
+ "53 2011 North America 3.459874e+08\n",
+ "54 2011 South Asia 1.684898e+09\n",
+ "55 2011 Sub-Saharan Africa 9.042822e+08\n",
+ "56 2012 East Asia & Pacific 2.220515e+09\n",
+ "57 2012 Europe & Central Asia 8.946605e+08\n",
+ "58 2012 Latin America & Caribbean 6.021394e+08\n",
+ "59 2012 Middle East & North Africa 4.141176e+08\n",
+ "60 2012 North America 3.486567e+08\n",
+ "61 2012 South Asia 1.708707e+09\n",
+ "62 2012 Sub-Saharan Africa 9.293287e+08\n",
+ "63 2013 East Asia & Pacific 2.237930e+09\n",
+ "64 2013 Europe & Central Asia 8.986074e+08\n",
+ "65 2013 Latin America & Caribbean 6.086422e+08\n",
+ "66 2013 Middle East & North Africa 4.227904e+08\n",
+ "67 2013 North America 3.512079e+08\n",
+ "68 2013 South Asia 1.731684e+09\n",
+ "69 2013 Sub-Saharan Africa 9.550967e+08\n",
+ "70 2014 East Asia & Pacific 2.254840e+09\n",
+ "71 2014 Europe & Central Asia 9.026709e+08\n",
+ "72 2014 Latin America & Caribbean 6.150468e+08\n",
+ "73 2014 Middle East & North Africa 4.316646e+08\n",
+ "74 2014 North America 3.538889e+08\n",
+ "75 2014 South Asia 1.754030e+09\n",
+ "76 2014 Sub-Saharan Africa 9.815066e+08\n",
+ "77 2015 East Asia & Pacific 2.271045e+09\n",
+ "78 2015 Europe & Central Asia 9.066954e+08\n",
+ "79 2015 Latin America & Caribbean 6.213901e+08\n",
+ "80 2015 Middle East & North Africa 4.405065e+08\n",
+ "81 2015 North America 3.565071e+08\n",
+ "82 2015 South Asia 1.775545e+09\n",
+ "83 2015 Sub-Saharan Africa 1.008699e+09\n",
+ "84 2016 East Asia & Pacific 2.287214e+09\n",
+ "85 2016 Europe & Central Asia 9.106333e+08\n",
+ "86 2016 Latin America & Caribbean 6.276685e+08\n",
+ "87 2016 Middle East & North Africa 4.489174e+08\n",
+ "88 2016 North America 3.592458e+08\n",
+ "89 2016 South Asia 1.797073e+09\n",
+ "90 2016 Sub-Saharan Africa 1.036156e+09\n",
+ "91 2017 East Asia & Pacific 2.303580e+09\n",
+ "92 2017 Europe & Central Asia 9.140783e+08\n",
+ "93 2017 Latin America & Caribbean 6.337972e+08\n",
+ "94 2017 Middle East & North Africa 4.568855e+08\n",
+ "95 2017 North America 3.617312e+08\n",
+ "96 2017 South Asia 1.818932e+09\n",
+ "97 2017 Sub-Saharan Africa 1.063885e+09\n",
+ "98 2018 East Asia & Pacific 2.317809e+09\n",
+ "99 2018 Europe & Central Asia 9.173805e+08\n",
+ "100 2018 Latin America & Caribbean 6.396282e+08\n",
+ "101 2018 Middle East & North Africa 4.650735e+08\n",
+ "102 2018 North America 3.639672e+08\n",
+ "103 2018 South Asia 1.840534e+09\n",
+ "104 2018 Sub-Saharan Africa 1.092404e+09\n",
+ "105 2019 East Asia & Pacific 2.330266e+09\n",
+ "106 2019 Europe & Central Asia 9.202775e+08\n",
+ "107 2019 Latin America & Caribbean 6.452958e+08\n",
+ "108 2019 Middle East & North Africa 4.732018e+08\n",
+ "109 2019 North America 3.659951e+08\n",
+ "110 2019 South Asia 1.861599e+09\n",
+ "111 2019 Sub-Saharan Africa 1.121549e+09\n",
+ "112 2020 East Asia & Pacific 2.340351e+09\n",
+ "113 2020 Europe & Central Asia 9.223534e+08\n",
+ "114 2020 Latin America & Caribbean 6.505350e+08\n",
+ "115 2020 Middle East & North Africa 4.799666e+08\n",
+ "116 2020 North America 3.695826e+08\n",
+ "117 2020 South Asia 1.882532e+09\n",
+ "118 2020 Sub-Saharan Africa 1.151302e+09\n",
+ "119 2021 East Asia & Pacific 2.346702e+09\n",
+ "120 2021 Europe & Central Asia 9.235640e+08\n",
+ "121 2021 Latin America & Caribbean 6.549806e+08\n",
+ "122 2021 Middle East & North Africa 4.861748e+08\n",
+ "123 2021 North America 3.703218e+08\n",
+ "124 2021 South Asia 1.901912e+09\n",
+ "125 2021 Sub-Saharan Africa 1.181163e+09\n",
+ "126 2022 East Asia & Pacific 2.351976e+09\n",
+ "127 2022 Europe & Central Asia 9.203756e+08\n",
+ "128 2022 Latin America & Caribbean 6.593106e+08\n",
+ "129 2022 Middle East & North Africa 4.932795e+08\n",
+ "130 2022 North America 3.722810e+08\n",
+ "131 2022 South Asia 1.919348e+09\n",
+ "132 2022 Sub-Saharan Africa 1.211190e+09"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Region \n",
+ " Population, total \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 2004 \n",
+ " East Asia & Pacific \n",
+ " 2.094011e+09 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 2004 \n",
+ " Europe & Central Asia \n",
+ " 8.702708e+08 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2004 \n",
+ " Latin America & Caribbean \n",
+ " 5.498976e+08 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 2004 \n",
+ " Middle East & North Africa \n",
+ " 3.470449e+08 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 2004 \n",
+ " North America \n",
+ " 3.248097e+08 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " 2004 \n",
+ " South Asia \n",
+ " 1.515703e+09 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " 2004 \n",
+ " Sub-Saharan Africa \n",
+ " 7.465468e+08 \n",
+ " \n",
+ " \n",
+ " 7 \n",
+ " 2005 \n",
+ " East Asia & Pacific \n",
+ " 2.109795e+09 \n",
+ " \n",
+ " \n",
+ " 8 \n",
+ " 2005 \n",
+ " Europe & Central Asia \n",
+ " 8.729137e+08 \n",
+ " \n",
+ " \n",
+ " 9 \n",
+ " 2005 \n",
+ " Latin America & Caribbean \n",
+ " 5.567395e+08 \n",
+ " \n",
+ " \n",
+ " 10 \n",
+ " 2005 \n",
+ " Middle East & North Africa \n",
+ " 3.544624e+08 \n",
+ " \n",
+ " \n",
+ " 11 \n",
+ " 2005 \n",
+ " North America \n",
+ " 3.278245e+08 \n",
+ " \n",
+ " \n",
+ " 12 \n",
+ " 2005 \n",
+ " South Asia \n",
+ " 1.541264e+09 \n",
+ " \n",
+ " \n",
+ " 13 \n",
+ " 2005 \n",
+ " Sub-Saharan Africa \n",
+ " 7.669787e+08 \n",
+ " \n",
+ " \n",
+ " 14 \n",
+ " 2006 \n",
+ " East Asia & Pacific \n",
+ " 2.125591e+09 \n",
+ " \n",
+ " \n",
+ " 15 \n",
+ " 2006 \n",
+ " Europe & Central Asia \n",
+ " 8.756279e+08 \n",
+ " \n",
+ " \n",
+ " 16 \n",
+ " 2006 \n",
+ " Latin America & Caribbean \n",
+ " 5.634241e+08 \n",
+ " \n",
+ " \n",
+ " 17 \n",
+ " 2006 \n",
+ " Middle East & North Africa \n",
+ " 3.626340e+08 \n",
+ " \n",
+ " \n",
+ " 18 \n",
+ " 2006 \n",
+ " North America \n",
+ " 3.310156e+08 \n",
+ " \n",
+ " \n",
+ " 19 \n",
+ " 2006 \n",
+ " South Asia \n",
+ " 1.565893e+09 \n",
+ " \n",
+ " \n",
+ " 20 \n",
+ " 2006 \n",
+ " Sub-Saharan Africa \n",
+ " 7.881100e+08 \n",
+ " \n",
+ " \n",
+ " 21 \n",
+ " 2007 \n",
+ " East Asia & Pacific \n",
+ " 2.141253e+09 \n",
+ " \n",
+ " \n",
+ " 22 \n",
+ " 2007 \n",
+ " Europe & Central Asia \n",
+ " 8.784563e+08 \n",
+ " \n",
+ " \n",
+ " 23 \n",
+ " 2007 \n",
+ " Latin America & Caribbean \n",
+ " 5.699722e+08 \n",
+ " \n",
+ " \n",
+ " 24 \n",
+ " 2007 \n",
+ " Middle East & North Africa \n",
+ " 3.713248e+08 \n",
+ " \n",
+ " \n",
+ " 25 \n",
+ " 2007 \n",
+ " North America \n",
+ " 3.341851e+08 \n",
+ " \n",
+ " \n",
+ " 26 \n",
+ " 2007 \n",
+ " South Asia \n",
+ " 1.589455e+09 \n",
+ " \n",
+ " \n",
+ " 27 \n",
+ " 2007 \n",
+ " Sub-Saharan Africa \n",
+ " 8.100193e+08 \n",
+ " \n",
+ " \n",
+ " 28 \n",
+ " 2008 \n",
+ " East Asia & Pacific \n",
+ " 2.156920e+09 \n",
+ " \n",
+ " \n",
+ " 29 \n",
+ " 2008 \n",
+ " Europe & Central Asia \n",
+ " 8.820884e+08 \n",
+ " \n",
+ " \n",
+ " 30 \n",
+ " 2008 \n",
+ " Latin America & Caribbean \n",
+ " 5.763862e+08 \n",
+ " \n",
+ " \n",
+ " 31 \n",
+ " 2008 \n",
+ " Middle East & North Africa \n",
+ " 3.802941e+08 \n",
+ " \n",
+ " \n",
+ " 32 \n",
+ " 2008 \n",
+ " North America \n",
+ " 3.374064e+08 \n",
+ " \n",
+ " \n",
+ " 33 \n",
+ " 2008 \n",
+ " South Asia \n",
+ " 1.612709e+09 \n",
+ " \n",
+ " \n",
+ " 34 \n",
+ " 2008 \n",
+ " Sub-Saharan Africa \n",
+ " 8.326427e+08 \n",
+ " \n",
+ " \n",
+ " 35 \n",
+ " 2009 \n",
+ " East Asia & Pacific \n",
+ " 2.172100e+09 \n",
+ " \n",
+ " \n",
+ " 36 \n",
+ " 2009 \n",
+ " Europe & Central Asia \n",
+ " 8.856598e+08 \n",
+ " \n",
+ " \n",
+ " 37 \n",
+ " 2009 \n",
+ " Latin America & Caribbean \n",
+ " 5.827381e+08 \n",
+ " \n",
+ " \n",
+ " 38 \n",
+ " 2009 \n",
+ " Middle East & North Africa \n",
+ " 3.893324e+08 \n",
+ " \n",
+ " \n",
+ " 39 \n",
+ " 2009 \n",
+ " North America \n",
+ " 3.404661e+08 \n",
+ " \n",
+ " \n",
+ " 40 \n",
+ " 2009 \n",
+ " South Asia \n",
+ " 1.636412e+09 \n",
+ " \n",
+ " \n",
+ " 41 \n",
+ " 2009 \n",
+ " Sub-Saharan Africa \n",
+ " 8.558857e+08 \n",
+ " \n",
+ " \n",
+ " 42 \n",
+ " 2010 \n",
+ " East Asia & Pacific \n",
+ " 2.187065e+09 \n",
+ " \n",
+ " \n",
+ " 43 \n",
+ " 2010 \n",
+ " Europe & Central Asia \n",
+ " 8.891696e+08 \n",
+ " \n",
+ " \n",
+ " 44 \n",
+ " 2010 \n",
+ " Latin America & Caribbean \n",
+ " 5.888739e+08 \n",
+ " \n",
+ " \n",
+ " 45 \n",
+ " 2010 \n",
+ " Middle East & North Africa \n",
+ " 3.979976e+08 \n",
+ " \n",
+ " \n",
+ " 46 \n",
+ " 2010 \n",
+ " North America \n",
+ " 3.433972e+08 \n",
+ " \n",
+ " \n",
+ " 47 \n",
+ " 2010 \n",
+ " South Asia \n",
+ " 1.660546e+09 \n",
+ " \n",
+ " \n",
+ " 48 \n",
+ " 2010 \n",
+ " Sub-Saharan Africa \n",
+ " 8.797974e+08 \n",
+ " \n",
+ " \n",
+ " 49 \n",
+ " 2011 \n",
+ " East Asia & Pacific \n",
+ " 2.202812e+09 \n",
+ " \n",
+ " \n",
+ " 50 \n",
+ " 2011 \n",
+ " Europe & Central Asia \n",
+ " 8.913294e+08 \n",
+ " \n",
+ " \n",
+ " 51 \n",
+ " 2011 \n",
+ " Latin America & Caribbean \n",
+ " 5.955100e+08 \n",
+ " \n",
+ " \n",
+ " 52 \n",
+ " 2011 \n",
+ " Middle East & North Africa \n",
+ " 4.060453e+08 \n",
+ " \n",
+ " \n",
+ " 53 \n",
+ " 2011 \n",
+ " North America \n",
+ " 3.459874e+08 \n",
+ " \n",
+ " \n",
+ " 54 \n",
+ " 2011 \n",
+ " South Asia \n",
+ " 1.684898e+09 \n",
+ " \n",
+ " \n",
+ " 55 \n",
+ " 2011 \n",
+ " Sub-Saharan Africa \n",
+ " 9.042822e+08 \n",
+ " \n",
+ " \n",
+ " 56 \n",
+ " 2012 \n",
+ " East Asia & Pacific \n",
+ " 2.220515e+09 \n",
+ " \n",
+ " \n",
+ " 57 \n",
+ " 2012 \n",
+ " Europe & Central Asia \n",
+ " 8.946605e+08 \n",
+ " \n",
+ " \n",
+ " 58 \n",
+ " 2012 \n",
+ " Latin America & Caribbean \n",
+ " 6.021394e+08 \n",
+ " \n",
+ " \n",
+ " 59 \n",
+ " 2012 \n",
+ " Middle East & North Africa \n",
+ " 4.141176e+08 \n",
+ " \n",
+ " \n",
+ " 60 \n",
+ " 2012 \n",
+ " North America \n",
+ " 3.486567e+08 \n",
+ " \n",
+ " \n",
+ " 61 \n",
+ " 2012 \n",
+ " South Asia \n",
+ " 1.708707e+09 \n",
+ " \n",
+ " \n",
+ " 62 \n",
+ " 2012 \n",
+ " Sub-Saharan Africa \n",
+ " 9.293287e+08 \n",
+ " \n",
+ " \n",
+ " 63 \n",
+ " 2013 \n",
+ " East Asia & Pacific \n",
+ " 2.237930e+09 \n",
+ " \n",
+ " \n",
+ " 64 \n",
+ " 2013 \n",
+ " Europe & Central Asia \n",
+ " 8.986074e+08 \n",
+ " \n",
+ " \n",
+ " 65 \n",
+ " 2013 \n",
+ " Latin America & Caribbean \n",
+ " 6.086422e+08 \n",
+ " \n",
+ " \n",
+ " 66 \n",
+ " 2013 \n",
+ " Middle East & North Africa \n",
+ " 4.227904e+08 \n",
+ " \n",
+ " \n",
+ " 67 \n",
+ " 2013 \n",
+ " North America \n",
+ " 3.512079e+08 \n",
+ " \n",
+ " \n",
+ " 68 \n",
+ " 2013 \n",
+ " South Asia \n",
+ " 1.731684e+09 \n",
+ " \n",
+ " \n",
+ " 69 \n",
+ " 2013 \n",
+ " Sub-Saharan Africa \n",
+ " 9.550967e+08 \n",
+ " \n",
+ " \n",
+ " 70 \n",
+ " 2014 \n",
+ " East Asia & Pacific \n",
+ " 2.254840e+09 \n",
+ " \n",
+ " \n",
+ " 71 \n",
+ " 2014 \n",
+ " Europe & Central Asia \n",
+ " 9.026709e+08 \n",
+ " \n",
+ " \n",
+ " 72 \n",
+ " 2014 \n",
+ " Latin America & Caribbean \n",
+ " 6.150468e+08 \n",
+ " \n",
+ " \n",
+ " 73 \n",
+ " 2014 \n",
+ " Middle East & North Africa \n",
+ " 4.316646e+08 \n",
+ " \n",
+ " \n",
+ " 74 \n",
+ " 2014 \n",
+ " North America \n",
+ " 3.538889e+08 \n",
+ " \n",
+ " \n",
+ " 75 \n",
+ " 2014 \n",
+ " South Asia \n",
+ " 1.754030e+09 \n",
+ " \n",
+ " \n",
+ " 76 \n",
+ " 2014 \n",
+ " Sub-Saharan Africa \n",
+ " 9.815066e+08 \n",
+ " \n",
+ " \n",
+ " 77 \n",
+ " 2015 \n",
+ " East Asia & Pacific \n",
+ " 2.271045e+09 \n",
+ " \n",
+ " \n",
+ " 78 \n",
+ " 2015 \n",
+ " Europe & Central Asia \n",
+ " 9.066954e+08 \n",
+ " \n",
+ " \n",
+ " 79 \n",
+ " 2015 \n",
+ " Latin America & Caribbean \n",
+ " 6.213901e+08 \n",
+ " \n",
+ " \n",
+ " 80 \n",
+ " 2015 \n",
+ " Middle East & North Africa \n",
+ " 4.405065e+08 \n",
+ " \n",
+ " \n",
+ " 81 \n",
+ " 2015 \n",
+ " North America \n",
+ " 3.565071e+08 \n",
+ " \n",
+ " \n",
+ " 82 \n",
+ " 2015 \n",
+ " South Asia \n",
+ " 1.775545e+09 \n",
+ " \n",
+ " \n",
+ " 83 \n",
+ " 2015 \n",
+ " Sub-Saharan Africa \n",
+ " 1.008699e+09 \n",
+ " \n",
+ " \n",
+ " 84 \n",
+ " 2016 \n",
+ " East Asia & Pacific \n",
+ " 2.287214e+09 \n",
+ " \n",
+ " \n",
+ " 85 \n",
+ " 2016 \n",
+ " Europe & Central Asia \n",
+ " 9.106333e+08 \n",
+ " \n",
+ " \n",
+ " 86 \n",
+ " 2016 \n",
+ " Latin America & Caribbean \n",
+ " 6.276685e+08 \n",
+ " \n",
+ " \n",
+ " 87 \n",
+ " 2016 \n",
+ " Middle East & North Africa \n",
+ " 4.489174e+08 \n",
+ " \n",
+ " \n",
+ " 88 \n",
+ " 2016 \n",
+ " North America \n",
+ " 3.592458e+08 \n",
+ " \n",
+ " \n",
+ " 89 \n",
+ " 2016 \n",
+ " South Asia \n",
+ " 1.797073e+09 \n",
+ " \n",
+ " \n",
+ " 90 \n",
+ " 2016 \n",
+ " Sub-Saharan Africa \n",
+ " 1.036156e+09 \n",
+ " \n",
+ " \n",
+ " 91 \n",
+ " 2017 \n",
+ " East Asia & Pacific \n",
+ " 2.303580e+09 \n",
+ " \n",
+ " \n",
+ " 92 \n",
+ " 2017 \n",
+ " Europe & Central Asia \n",
+ " 9.140783e+08 \n",
+ " \n",
+ " \n",
+ " 93 \n",
+ " 2017 \n",
+ " Latin America & Caribbean \n",
+ " 6.337972e+08 \n",
+ " \n",
+ " \n",
+ " 94 \n",
+ " 2017 \n",
+ " Middle East & North Africa \n",
+ " 4.568855e+08 \n",
+ " \n",
+ " \n",
+ " 95 \n",
+ " 2017 \n",
+ " North America \n",
+ " 3.617312e+08 \n",
+ " \n",
+ " \n",
+ " 96 \n",
+ " 2017 \n",
+ " South Asia \n",
+ " 1.818932e+09 \n",
+ " \n",
+ " \n",
+ " 97 \n",
+ " 2017 \n",
+ " Sub-Saharan Africa \n",
+ " 1.063885e+09 \n",
+ " \n",
+ " \n",
+ " 98 \n",
+ " 2018 \n",
+ " East Asia & Pacific \n",
+ " 2.317809e+09 \n",
+ " \n",
+ " \n",
+ " 99 \n",
+ " 2018 \n",
+ " Europe & Central Asia \n",
+ " 9.173805e+08 \n",
+ " \n",
+ " \n",
+ " 100 \n",
+ " 2018 \n",
+ " Latin America & Caribbean \n",
+ " 6.396282e+08 \n",
+ " \n",
+ " \n",
+ " 101 \n",
+ " 2018 \n",
+ " Middle East & North Africa \n",
+ " 4.650735e+08 \n",
+ " \n",
+ " \n",
+ " 102 \n",
+ " 2018 \n",
+ " North America \n",
+ " 3.639672e+08 \n",
+ " \n",
+ " \n",
+ " 103 \n",
+ " 2018 \n",
+ " South Asia \n",
+ " 1.840534e+09 \n",
+ " \n",
+ " \n",
+ " 104 \n",
+ " 2018 \n",
+ " Sub-Saharan Africa \n",
+ " 1.092404e+09 \n",
+ " \n",
+ " \n",
+ " 105 \n",
+ " 2019 \n",
+ " East Asia & Pacific \n",
+ " 2.330266e+09 \n",
+ " \n",
+ " \n",
+ " 106 \n",
+ " 2019 \n",
+ " Europe & Central Asia \n",
+ " 9.202775e+08 \n",
+ " \n",
+ " \n",
+ " 107 \n",
+ " 2019 \n",
+ " Latin America & Caribbean \n",
+ " 6.452958e+08 \n",
+ " \n",
+ " \n",
+ " 108 \n",
+ " 2019 \n",
+ " Middle East & North Africa \n",
+ " 4.732018e+08 \n",
+ " \n",
+ " \n",
+ " 109 \n",
+ " 2019 \n",
+ " North America \n",
+ " 3.659951e+08 \n",
+ " \n",
+ " \n",
+ " 110 \n",
+ " 2019 \n",
+ " South Asia \n",
+ " 1.861599e+09 \n",
+ " \n",
+ " \n",
+ " 111 \n",
+ " 2019 \n",
+ " Sub-Saharan Africa \n",
+ " 1.121549e+09 \n",
+ " \n",
+ " \n",
+ " 112 \n",
+ " 2020 \n",
+ " East Asia & Pacific \n",
+ " 2.340351e+09 \n",
+ " \n",
+ " \n",
+ " 113 \n",
+ " 2020 \n",
+ " Europe & Central Asia \n",
+ " 9.223534e+08 \n",
+ " \n",
+ " \n",
+ " 114 \n",
+ " 2020 \n",
+ " Latin America & Caribbean \n",
+ " 6.505350e+08 \n",
+ " \n",
+ " \n",
+ " 115 \n",
+ " 2020 \n",
+ " Middle East & North Africa \n",
+ " 4.799666e+08 \n",
+ " \n",
+ " \n",
+ " 116 \n",
+ " 2020 \n",
+ " North America \n",
+ " 3.695826e+08 \n",
+ " \n",
+ " \n",
+ " 117 \n",
+ " 2020 \n",
+ " South Asia \n",
+ " 1.882532e+09 \n",
+ " \n",
+ " \n",
+ " 118 \n",
+ " 2020 \n",
+ " Sub-Saharan Africa \n",
+ " 1.151302e+09 \n",
+ " \n",
+ " \n",
+ " 119 \n",
+ " 2021 \n",
+ " East Asia & Pacific \n",
+ " 2.346702e+09 \n",
+ " \n",
+ " \n",
+ " 120 \n",
+ " 2021 \n",
+ " Europe & Central Asia \n",
+ " 9.235640e+08 \n",
+ " \n",
+ " \n",
+ " 121 \n",
+ " 2021 \n",
+ " Latin America & Caribbean \n",
+ " 6.549806e+08 \n",
+ " \n",
+ " \n",
+ " 122 \n",
+ " 2021 \n",
+ " Middle East & North Africa \n",
+ " 4.861748e+08 \n",
+ " \n",
+ " \n",
+ " 123 \n",
+ " 2021 \n",
+ " North America \n",
+ " 3.703218e+08 \n",
+ " \n",
+ " \n",
+ " 124 \n",
+ " 2021 \n",
+ " South Asia \n",
+ " 1.901912e+09 \n",
+ " \n",
+ " \n",
+ " 125 \n",
+ " 2021 \n",
+ " Sub-Saharan Africa \n",
+ " 1.181163e+09 \n",
+ " \n",
+ " \n",
+ " 126 \n",
+ " 2022 \n",
+ " East Asia & Pacific \n",
+ " 2.351976e+09 \n",
+ " \n",
+ " \n",
+ " 127 \n",
+ " 2022 \n",
+ " Europe & Central Asia \n",
+ " 9.203756e+08 \n",
+ " \n",
+ " \n",
+ " 128 \n",
+ " 2022 \n",
+ " Latin America & Caribbean \n",
+ " 6.593106e+08 \n",
+ " \n",
+ " \n",
+ " 129 \n",
+ " 2022 \n",
+ " Middle East & North Africa \n",
+ " 4.932795e+08 \n",
+ " \n",
+ " \n",
+ " 130 \n",
+ " 2022 \n",
+ " North America \n",
+ " 3.722810e+08 \n",
+ " \n",
+ " \n",
+ " 131 \n",
+ " 2022 \n",
+ " South Asia \n",
+ " 1.919348e+09 \n",
+ " \n",
+ " \n",
+ " 132 \n",
+ " 2022 \n",
+ " Sub-Saharan Africa \n",
+ " 1.211190e+09 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_group",
+ "summary": "{\n \"name\": \"df_group\",\n \"rows\": 133,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 2004,\n \"max\": 2022,\n \"num_unique_values\": 19,\n \"samples\": [\n 2004,\n 2009,\n 2015\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"East Asia & Pacific\",\n \"Europe & Central Asia\",\n \"South Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 658513824.5184423,\n \"min\": 324809693.0,\n \"max\": 2351975929.0,\n \"num_unique_values\": 133,\n \"samples\": [\n 595510008.0,\n 955096702.0,\n 380294099.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 63
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "Home work #5\n",
+ "Pick a few countries of your interest (for example, G7, BRICS, Nordic countries).\n",
+ "- 1 Create a line chart showing the trend of Life Expectancy over time for comparing those countries.\n",
+ "- 2 Pick a year from the data of the above few countries and create a scatter plot to compare their GDP per capita and life expectancy. The size of the dots represent the population. This type of scatter plot is called bubble chart.\n",
+ "- 3 Bonus - compute the average GDP per capita, the average life expectancy, and the total population for each year of all the seven regions (aggregation) and repeat 1 and 2 using the aggregated data for all regions"
+ ],
+ "metadata": {
+ "id": "rXn6-xhPqL2M"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Homework Question 3\n",
+ "\n",
+ "Select a year of data. calculate total population of each region and make a pie chart to display the distribution of total population of all regions."
+ ],
+ "metadata": {
+ "id": "Toh4NdGeN4A8"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_filtered = df_filtered.dropna(subset=[\"Population, total\", \"Region\"])\n",
+ "\n",
+ "population_by_region = df_filtered.groupby(\"Region\")[\"Population, total\"].sum().reset_index()\n",
+ "\n",
+ "\n",
+ "fig = px.pie(\n",
+ " population_by_region,\n",
+ " values='Population, total',\n",
+ " names='Region',\n",
+ " title=f\"Total Population Distribution by Region in {year_of_interest}\",\n",
+ " labels={'Population, total': 'Total Population'}\n",
+ ")"
+ ],
+ "metadata": {
+ "id": "jmgp1nayNyyb"
+ },
+ "execution_count": 13,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "fig.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ },
+ "id": "YBkZ1sgGN7Hm",
+ "outputId": "4af8c4c9-7bc4-4b59-8f04-ee9429142275"
+ },
+ "execution_count": 14,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Exercise\n",
+ "\n",
+ "Calculate average GDP per capita for each region and make a bar chart."
+ ],
+ "metadata": {
+ "id": "b2dW-w6YOA2C"
+ }
+ },
+ {
+ "cell_type": "markdown",
+ "source": [],
+ "metadata": {
+ "id": "DTco520NOGvw"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "\n",
+ "df_cleaned = df[['Region', 'GDP per capita (current US$)']].dropna()\n",
+ "\n",
+ "average_gdp_by_region = df_cleaned.groupby('Region').agg({'GDP per capita (current US$)': 'mean'}).reset_index()\n",
+ "fig = px.bar(\n",
+ " data_frame=average_gdp_by_region,\n",
+ " x='Region',\n",
+ " y='GDP per capita (current US$)',\n",
+ " title='Average GDP per Capita by Region',\n",
+ " labels={\n",
+ " 'GDP per capita (current US$)': 'Average GDP per Capita (Current US$)',\n",
+ " 'Region': 'Region'\n",
+ " },\n",
+ " text='GDP per capita (current US$)'\n",
+ ")\n",
+ "\n",
+ "\n",
+ "fig.show()\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ },
+ "id": "-kG4fTMVN-RL",
+ "outputId": "60d28058-291f-4f60-ef2b-26c6c049b110"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2020 = df3[df3[\"Year\"] == 2020]\n",
+ "df_2020.shape"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "NDVtrt8aOJdN",
+ "outputId": "e9bfc006-432c-44ad-c7c5-16bd05fe3cb2"
+ },
+ "execution_count": 16,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(202, 9)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_2020.sample(5)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 258
+ },
+ "id": "vbwTLlkeONLx",
+ "outputId": "755b043e-6495-4b66-c842-8d798afefcc9"
+ },
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Year Country GDP per capita (current US$) \\\n",
+ "2999 2020 Portugal 22242.406418 \n",
+ "624 2020 Cabo Verde 3126.399859 \n",
+ "2258 2020 Madagascar 462.404229 \n",
+ "1042 2020 Djibouti 2921.738706 \n",
+ "2980 2020 Poland 15816.820402 \n",
+ "\n",
+ " Life expectancy at birth, total (years) Population, total Country Code \\\n",
+ "2999 80.97561 10297081.0 PRT \n",
+ "624 74.80800 582640.0 CPV \n",
+ "2258 65.18200 28225177.0 MDG \n",
+ "1042 62.69400 1090156.0 DJI \n",
+ "2980 76.50000 37899070.0 POL \n",
+ "\n",
+ " Region Income Group Lending Type \n",
+ "2999 Europe & Central Asia High income Not classified \n",
+ "624 Sub-Saharan Africa Lower middle income Blend \n",
+ "2258 Sub-Saharan Africa Low income IDA \n",
+ "1042 Middle East & North Africa Lower middle income IDA \n",
+ "2980 Europe & Central Asia High income IBRD "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Year \n",
+ " Country \n",
+ " GDP per capita (current US$) \n",
+ " Life expectancy at birth, total (years) \n",
+ " Population, total \n",
+ " Country Code \n",
+ " Region \n",
+ " Income Group \n",
+ " Lending Type \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 2999 \n",
+ " 2020 \n",
+ " Portugal \n",
+ " 22242.406418 \n",
+ " 80.97561 \n",
+ " 10297081.0 \n",
+ " PRT \n",
+ " Europe & Central Asia \n",
+ " High income \n",
+ " Not classified \n",
+ " \n",
+ " \n",
+ " 624 \n",
+ " 2020 \n",
+ " Cabo Verde \n",
+ " 3126.399859 \n",
+ " 74.80800 \n",
+ " 582640.0 \n",
+ " CPV \n",
+ " Sub-Saharan Africa \n",
+ " Lower middle income \n",
+ " Blend \n",
+ " \n",
+ " \n",
+ " 2258 \n",
+ " 2020 \n",
+ " Madagascar \n",
+ " 462.404229 \n",
+ " 65.18200 \n",
+ " 28225177.0 \n",
+ " MDG \n",
+ " Sub-Saharan Africa \n",
+ " Low income \n",
+ " IDA \n",
+ " \n",
+ " \n",
+ " 1042 \n",
+ " 2020 \n",
+ " Djibouti \n",
+ " 2921.738706 \n",
+ " 62.69400 \n",
+ " 1090156.0 \n",
+ " DJI \n",
+ " Middle East & North Africa \n",
+ " Lower middle income \n",
+ " IDA \n",
+ " \n",
+ " \n",
+ " 2980 \n",
+ " 2020 \n",
+ " Poland \n",
+ " 15816.820402 \n",
+ " 76.50000 \n",
+ " 37899070.0 \n",
+ " POL \n",
+ " Europe & Central Asia \n",
+ " High income \n",
+ " IBRD \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_2020\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 2020,\n \"max\": 2020,\n \"num_unique_values\": 1,\n \"samples\": [\n 2020\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Cabo Verde\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9567.240054265198,\n \"min\": 462.404228818383,\n \"max\": 22242.406417972,\n \"num_unique_values\": 5,\n \"samples\": [\n 3126.39985872259\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Life expectancy at birth, total (years)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 7.774607698123842,\n \"min\": 62.694,\n \"max\": 80.9756097560976,\n \"num_unique_values\": 5,\n \"samples\": [\n 74.808\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Population, total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16739320.994855577,\n \"min\": 582640.0,\n \"max\": 37899070.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 582640.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Country Code\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"CPV\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Europe & Central Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Income Group\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"High income\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Lending Type\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Blend\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_group = df.groupby(\"Region\")[\"GDP per capita (current US$)\"].mean()\n",
+ "df_group"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 335
+ },
+ "id": "nOm_pMHcOPjU",
+ "outputId": "e689615c-12c1-43ca-86d1-e0b999f5265e"
+ },
+ "execution_count": 18,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Region\n",
+ "East Asia & Pacific 15178.578702\n",
+ "Europe & Central Asia 32834.037083\n",
+ "Latin America & Caribbean 13226.709551\n",
+ "Middle East & North Africa 15653.223444\n",
+ "North America 67679.379373\n",
+ "South Asia 2357.171555\n",
+ "Sub-Saharan Africa 2271.143792\n",
+ "Name: GDP per capita (current US$), dtype: float64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " GDP per capita (current US$) \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " East Asia & Pacific \n",
+ " 15178.578702 \n",
+ " \n",
+ " \n",
+ " Europe & Central Asia \n",
+ " 32834.037083 \n",
+ " \n",
+ " \n",
+ " Latin America & Caribbean \n",
+ " 13226.709551 \n",
+ " \n",
+ " \n",
+ " Middle East & North Africa \n",
+ " 15653.223444 \n",
+ " \n",
+ " \n",
+ " North America \n",
+ " 67679.379373 \n",
+ " \n",
+ " \n",
+ " South Asia \n",
+ " 2357.171555 \n",
+ " \n",
+ " \n",
+ " Sub-Saharan Africa \n",
+ " 2271.143792 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
dtype: float64 "
+ ]
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_group = df_group.reset_index()\n",
+ "df_group"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 269
+ },
+ "id": "Qv2zwb3nOTPA",
+ "outputId": "00571e80-b338-46b4-e66a-afc242ba5a99"
+ },
+ "execution_count": 19,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Region GDP per capita (current US$)\n",
+ "0 East Asia & Pacific 15178.578702\n",
+ "1 Europe & Central Asia 32834.037083\n",
+ "2 Latin America & Caribbean 13226.709551\n",
+ "3 Middle East & North Africa 15653.223444\n",
+ "4 North America 67679.379373\n",
+ "5 South Asia 2357.171555\n",
+ "6 Sub-Saharan Africa 2271.143792"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Region \n",
+ " GDP per capita (current US$) \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " East Asia & Pacific \n",
+ " 15178.578702 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " Europe & Central Asia \n",
+ " 32834.037083 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " Latin America & Caribbean \n",
+ " 13226.709551 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " Middle East & North Africa \n",
+ " 15653.223444 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " North America \n",
+ " 67679.379373 \n",
+ " \n",
+ " \n",
+ " 5 \n",
+ " South Asia \n",
+ " 2357.171555 \n",
+ " \n",
+ " \n",
+ " 6 \n",
+ " Sub-Saharan Africa \n",
+ " 2271.143792 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_group",
+ "summary": "{\n \"name\": \"df_group\",\n \"rows\": 7,\n \"fields\": [\n {\n \"column\": \"Region\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"East Asia & Pacific\",\n \"Europe & Central Asia\",\n \"South Asia\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"GDP per capita (current US$)\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 22874.816676472525,\n \"min\": 2271.14379159153,\n \"max\": 67679.3793732906,\n \"num_unique_values\": 7,\n \"samples\": [\n 15178.57870244265,\n 32834.03708256286,\n 2357.1715547168174\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "fig = px.bar(df_group, x=\"Region\", y=\"GDP per capita (current US$)\", color=\"Region\")\n",
+ "\n",
+ "fig.show()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ },
+ "id": "e0ufxKv7OVnj",
+ "outputId": "c9484b7e-5da2-4ddb-edba-a7f86e299b0e"
+ },
+ "execution_count": 20,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "## Home work question 4\n",
+ "\n",
+ "1. remove the color legend.\n",
+ "2. Order the above bar chart based on the values of the average GDP per capita."
+ ],
+ "metadata": {
+ "id": "REAbuG9SObTX"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_group = df_group.sort_values(by=\"GDP per capita (current US$)\", ascending=False)\n",
+ "\n",
+ "fig = px.bar(df_group,\n",
+ " x=\"Region\",\n",
+ " y=\"GDP per capita (current US$)\",\n",
+ " color=\"Region\")\n",
+ "\n",
+ "fig.update_layout(showlegend=False)\n",
+ "\n"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 542
+ },
+ "id": "sv44EtoDOYgR",
+ "outputId": "6d96e9b3-8fa8-4179-a855-d1de173b1f52"
+ },
+ "execution_count": 21,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ ""
+ ]
+ },
+ "metadata": {}
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file