From e57aa042932d2976942e7ecf0dfe63f613e1cd72 Mon Sep 17 00:00:00 2001
From: Diwakar Gupta <39624018+Diwakar-Gupta@users.noreply.github.com>
Date: Tue, 3 May 2022 12:10:56 +0530
Subject: [PATCH] Pandas assignment_solution
---
22-04-30-Pandas/AssignmentSolution.ipynb | 2387 ++++++++++++++++++++++
1 file changed, 2387 insertions(+)
create mode 100644 22-04-30-Pandas/AssignmentSolution.ipynb
diff --git a/22-04-30-Pandas/AssignmentSolution.ipynb b/22-04-30-Pandas/AssignmentSolution.ipynb
new file mode 100644
index 0000000..e986046
--- /dev/null
+++ b/22-04-30-Pandas/AssignmentSolution.ipynb
@@ -0,0 +1,2387 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "Excercise1_pandas_Solution.ipynb",
+ "provenance": [],
+ "collapsed_sections": [],
+ "include_colab_link": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "view-in-github",
+ "colab_type": "text"
+ },
+ "source": [
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "**import pandas as pd and numpy as np**"
+ ],
+ "metadata": {
+ "id": "zoRkvutsEsv9"
+ }
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "L8z0tBlXt1mC"
+ },
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "read csv from this url https://raw.githubusercontent.com/Kokkalo4/Kaggle-SF-Salaries/master/Salaries.csv using pandas in df variable.\n",
+ "\n",
+ "\n",
+ "If any error is printed ignore that."
+ ],
+ "metadata": {
+ "id": "1F1WQpscEhqQ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "LO98v31ct6iw",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2ad0cf2c-4687-4ffe-cdc1-318541613c69"
+ },
+ "source": [
+ "df = pd.read_csv('https://raw.githubusercontent.com/Kokkalo4/Kaggle-SF-Salaries/master/Salaries.csv')"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2718: DtypeWarning: Columns (3,4,5,6,12) have mixed types.Specify dtype option on import or set low_memory=False.\n",
+ " interactivity=interactivity, compiler=compiler, result=result)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "ZYHMdf1OuoCF"
+ },
+ "source": [
+ "**check the head of DataFrame**\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head()"
+ ],
+ "metadata": {
+ "id": "shypO4MfPuN9",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 389
+ },
+ "outputId": "3f7b4068-4241-4442-c851-449ada3f7bce"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411 | \n",
+ " 0 | \n",
+ " 400184 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966 | \n",
+ " 245132 | \n",
+ " 137811 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739 | \n",
+ " 106088 | \n",
+ " 16452.6 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916 | \n",
+ " 56120.7 | \n",
+ " 198307 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134402 | \n",
+ " 9737 | \n",
+ " 182235 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Id EmployeeName ... Agency Status\n",
+ "0 1 NATHANIEL FORD ... San Francisco NaN\n",
+ "1 2 GARY JIMENEZ ... San Francisco NaN\n",
+ "2 3 ALBERT PARDINI ... San Francisco NaN\n",
+ "3 4 CHRISTOPHER CHONG ... San Francisco NaN\n",
+ "4 5 PATRICK GARDNER ... San Francisco NaN\n",
+ "\n",
+ "[5 rows x 13 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3Hs476kddqd_",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 389
+ },
+ "outputId": "1b5511c0-ccaf-46bd-b495-d6fb0a622235"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " EmployeeName | \n",
+ " JobTitle | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " OtherPay | \n",
+ " Benefits | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ " Notes | \n",
+ " Agency | \n",
+ " Status | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " NATHANIEL FORD | \n",
+ " GENERAL MANAGER-METROPOLITAN TRANSIT AUTHORITY | \n",
+ " 167411 | \n",
+ " 0 | \n",
+ " 400184 | \n",
+ " NaN | \n",
+ " 567595.43 | \n",
+ " 567595.43 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " GARY JIMENEZ | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 155966 | \n",
+ " 245132 | \n",
+ " 137811 | \n",
+ " NaN | \n",
+ " 538909.28 | \n",
+ " 538909.28 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " ALBERT PARDINI | \n",
+ " CAPTAIN III (POLICE DEPARTMENT) | \n",
+ " 212739 | \n",
+ " 106088 | \n",
+ " 16452.6 | \n",
+ " NaN | \n",
+ " 335279.91 | \n",
+ " 335279.91 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " CHRISTOPHER CHONG | \n",
+ " WIRE ROPE CABLE MAINTENANCE MECHANIC | \n",
+ " 77916 | \n",
+ " 56120.7 | \n",
+ " 198307 | \n",
+ " NaN | \n",
+ " 332343.61 | \n",
+ " 332343.61 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " PATRICK GARDNER | \n",
+ " DEPUTY CHIEF OF DEPARTMENT,(FIRE DEPARTMENT) | \n",
+ " 134402 | \n",
+ " 9737 | \n",
+ " 182235 | \n",
+ " NaN | \n",
+ " 326373.19 | \n",
+ " 326373.19 | \n",
+ " 2011 | \n",
+ " NaN | \n",
+ " San Francisco | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Id EmployeeName ... Agency Status\n",
+ "0 1 NATHANIEL FORD ... San Francisco NaN\n",
+ "1 2 GARY JIMENEZ ... San Francisco NaN\n",
+ "2 3 ALBERT PARDINI ... San Francisco NaN\n",
+ "3 4 CHRISTOPHER CHONG ... San Francisco NaN\n",
+ "4 5 PATRICK GARDNER ... San Francisco NaN\n",
+ "\n",
+ "[5 rows x 13 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "q4guqLVUu3s4"
+ },
+ "source": [
+ "**use the info method to find out how many entries there are.**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.info()"
+ ],
+ "metadata": {
+ "id": "sCf-_N-7Pwpb",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c6c29e0f-57af-4aa2-98eb-20e8e6eaacab"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148049 non-null object \n",
+ " 4 OvertimePay 148654 non-null object \n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Notes 0 non-null float64\n",
+ " 11 Agency 148654 non-null object \n",
+ " 12 Status 38119 non-null object \n",
+ "dtypes: float64(3), int64(2), object(8)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "9ETc2lOqdrYN",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "47f965c7-4f81-4774-c628-0738d555daa4"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148049 non-null object \n",
+ " 4 OvertimePay 148654 non-null object \n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Notes 0 non-null float64\n",
+ " 11 Agency 148654 non-null object \n",
+ " 12 Status 38119 non-null object \n",
+ "dtypes: float64(3), int64(2), object(8)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "BasePay and OvertimePay both the fields are of **object** type and contains **missing data**, convert them to numeric using\n",
+ "\n",
+ " `pd.to_numeric(df[column_name], errors='coerce')`\n",
+ "\n",
+ " errors='coerce' will replace all non-numeric values with NaN\n",
+ "\n",
+ " then check datatype using info function"
+ ],
+ "metadata": {
+ "id": "55lbeP7q4Kt7"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df['BasePay'] = pd.to_numeric(df['BasePay'], errors = 'coerce')\n",
+ "df['OvertimePay'] = pd.to_numeric(df['OvertimePay'], errors = 'coerce')"
+ ],
+ "metadata": {
+ "id": "MCs3nTMEPzmA"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.info()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "dSKsf7WbE3fP",
+ "outputId": "aa08dbf0-bc16-45f9-ae9a-804b29efb29d"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148045 non-null float64\n",
+ " 4 OvertimePay 148650 non-null float64\n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Notes 0 non-null float64\n",
+ " 11 Agency 148654 non-null object \n",
+ " 12 Status 38119 non-null object \n",
+ "dtypes: float64(5), int64(2), object(6)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "yV_JbMJH4ds3",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "d9313c24-991e-42a1-8692-592958978065"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 13 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148045 non-null float64\n",
+ " 4 OvertimePay 148650 non-null float64\n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Notes 0 non-null float64\n",
+ " 11 Agency 148654 non-null object \n",
+ " 12 Status 38119 non-null object \n",
+ "dtypes: float64(5), int64(2), object(6)\n",
+ "memory usage: 14.7+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "uMlELt6pvGRv"
+ },
+ "source": [
+ "**What is the average BasePay?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "TqFqU5v_dsYK",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "16db4a87-11b4-4f4a-961e-4df333b9a928"
+ },
+ "source": [
+ "df['BasePay'].mean()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "66325.44884050643"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "_cb6KuDk2cwV",
+ "outputId": "6379652c-7589-4ccc-d2dc-8d1d4fc9bbf6"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "66325.44884050643"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 48
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nJWtU2L9zaDq"
+ },
+ "source": [
+ "**What is the highest amount of OvertimePay in the dataset?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "OCN9UhwqdtLk",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6765ce4c-cd18-4741-f35b-8f4aacbd41b9"
+ },
+ "source": [
+ "df['BasePay'].max()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "319275.01"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 12
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "gtSR1ZAszYzT",
+ "outputId": "4786818d-923a-467b-e3c6-3493de21aba3"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "245131.88"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 49
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "rYI-Oix34iEi"
+ },
+ "source": [
+ "**What is the Job Title of JOSEPH DRISCOLL?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "abIYqLy1dt8Y",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "830af414-0b08-48be-962f-7234932bd179"
+ },
+ "source": [
+ "df[df['EmployeeName'] == 'JOSEPH DRISCOLL']['JobTitle']"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "24 CAPTAIN, FIRE SUPPRESSION\n",
+ "Name: JobTitle, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "nBWc2QjlzxXL",
+ "outputId": "0a8d83ea-4e4c-4aac-fc2d-a2e1fa9fd76a"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "24 CAPTAIN, FIRE SUPPRESSION\n",
+ "Name: JobTitle, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 55
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "655t31sq5FsP"
+ },
+ "source": [
+ "**How much does JOSEPH DRISCOLL make (including benefits)**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "7y0404xYdvG-",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2a0bba35-87a7-47ad-899c-d5d2933914fa"
+ },
+ "source": [
+ "df[df['EmployeeName'] == 'JOSEPH DRISCOLL']['TotalPayBenefits']"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "24 270324.91\n",
+ "Name: TotalPayBenefits, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "kEi-kCQk5M1u",
+ "outputId": "458f15e4-6c62-403d-b433-09a8d457697b"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "24 270324.91\n",
+ "Name: TotalPayBenefits, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 59
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7S8IJPSq5xvV"
+ },
+ "source": [
+ "**What is the name of highest paid person**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "vBk2SnBRdx13",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "0ecb0edc-4871-41a1-b906-283d35fdd045"
+ },
+ "source": [
+ "df[df['TotalPayBenefits'].max() == df['TotalPayBenefits']]['EmployeeName']"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 NATHANIEL FORD\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "iwG4BhNb51Yn",
+ "outputId": "d45a2036-a7d5-4464-dae4-0e1de12e6c89"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 NATHANIEL FORD\n",
+ "Name: EmployeeName, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 62
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3KPozhE66y_S"
+ },
+ "source": [
+ "**What is the name of lowest paid person (including benefits)?**\n",
+ "\n",
+ "find his row index then use iloc to access data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "z1FpptNody88",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c12b9bcd-5166-4a0c-a55a-893d019b1215"
+ },
+ "source": [
+ "idx = np.where(df['TotalPayBenefits'].min() == df['TotalPayBenefits'])[0][0]\n",
+ "\n",
+ "df.iloc[idx]"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName Joe Lopez\n",
+ "JobTitle Counselor, Log Cabin Ranch\n",
+ "BasePay 0\n",
+ "OvertimePay 0\n",
+ "OtherPay -618.13\n",
+ "Benefits 0.00\n",
+ "TotalPay -618.13\n",
+ "TotalPayBenefits -618.13\n",
+ "Year 2014\n",
+ "Notes NaN\n",
+ "Agency San Francisco\n",
+ "Status PT\n",
+ "Name: 148653, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AS0x187B6fHH",
+ "outputId": "8ac24798-55f9-4315-d437-250b67a125d8"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Id 148654\n",
+ "EmployeeName Joe Lopez\n",
+ "JobTitle Counselor, Log Cabin Ranch\n",
+ "BasePay 0\n",
+ "OvertimePay 0\n",
+ "OtherPay -618.13\n",
+ "Benefits 0.00\n",
+ "TotalPay -618.13\n",
+ "TotalPayBenefits -618.13\n",
+ "Year 2014\n",
+ "Notes NaN\n",
+ "Agency San Francisco\n",
+ "Status PT\n",
+ "Name: 148653, dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 70
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "use info to find column with **0 non-null** value, then drop that column and print info()"
+ ],
+ "metadata": {
+ "id": "au-0Eu1M-UCv"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.drop( columns = ['Notes'], inplace = True)\n",
+ "df.info()"
+ ],
+ "metadata": {
+ "id": "4V3T7O1lQFR6",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "9db7fc99-7c0a-462d-d0d9-16903ec65c8a"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148045 non-null float64\n",
+ " 4 OvertimePay 148650 non-null float64\n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Agency 148654 non-null object \n",
+ " 11 Status 38119 non-null object \n",
+ "dtypes: float64(4), int64(2), object(6)\n",
+ "memory usage: 13.6+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ ""
+ ],
+ "metadata": {
+ "id": "SZRxohSr-QFw",
+ "outputId": "414af7be-ef95-4cdc-8bce-243052f409e7",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 148654 entries, 0 to 148653\n",
+ "Data columns (total 12 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 Id 148654 non-null int64 \n",
+ " 1 EmployeeName 148654 non-null object \n",
+ " 2 JobTitle 148654 non-null object \n",
+ " 3 BasePay 148045 non-null float64\n",
+ " 4 OvertimePay 148650 non-null float64\n",
+ " 5 OtherPay 148654 non-null object \n",
+ " 6 Benefits 112495 non-null object \n",
+ " 7 TotalPay 148654 non-null float64\n",
+ " 8 TotalPayBenefits 148654 non-null float64\n",
+ " 9 Year 148654 non-null int64 \n",
+ " 10 Agency 148654 non-null object \n",
+ " 11 Status 38119 non-null object \n",
+ "dtypes: float64(4), int64(2), object(6)\n",
+ "memory usage: 13.6+ MB\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "RojthL1u7hNf"
+ },
+ "source": [
+ "**What was the average (mean) BasePay of all employees per year?(2011, 2014)**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "5MWZEkT1d1LV",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 175
+ },
+ "outputId": "8946f74f-ccda-4127-d82e-097d715a31f8"
+ },
+ "source": [
+ "df.groupby('Year')[['BasePay']].mean().reset_index()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " BasePay | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " 63595.956517 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2012 | \n",
+ " 65436.406857 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013 | \n",
+ " 69630.030216 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014 | \n",
+ " 66564.421924 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Year BasePay\n",
+ "0 2011 63595.956517\n",
+ "1 2012 65436.406857\n",
+ "2 2013 69630.030216\n",
+ "3 2014 66564.421924"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 35
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 173
+ },
+ "id": "LW1Fcpay7vza",
+ "outputId": "76443d97-b044-4963-dc83-8cf2dc4547c1"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Year | \n",
+ " BasePay | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 2011 | \n",
+ " 63595.956517 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2012 | \n",
+ " 65436.406857 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2013 | \n",
+ " 69630.030216 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2014 | \n",
+ " 66564.421924 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Year BasePay\n",
+ "0 2011 63595.956517\n",
+ "1 2012 65436.406857\n",
+ "2 2013 69630.030216\n",
+ "3 2014 66564.421924"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 83
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8ODaELxwLwe8"
+ },
+ "source": [
+ "**How many unique job titles are there**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_MUO7JTOd2sL",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "11085b78-b4c0-4341-9959-4aad388cac73"
+ },
+ "source": [
+ "# df['JobTitle'].unique().size\n",
+ "df['JobTitle'].nunique()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "2159"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 40
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "b2U4RvnCLzXe",
+ "outputId": "9f78cde2-5eae-446f-f3ab-d55e067a2908"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "2159"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 87
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gobw6W0qMHnZ"
+ },
+ "source": [
+ "**What are the top 5 most common jobs?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "KCwVAPxHd3-B",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "3739f319-7b67-49aa-fc94-ba898fd5323e"
+ },
+ "source": [
+ "df['JobTitle'].value_counts().head(5)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Transit Operator 7036\n",
+ "Special Nurse 4389\n",
+ "Registered Nurse 3736\n",
+ "Public Svc Aide-Public Works 2518\n",
+ "Police Officer 3 2421\n",
+ "Name: JobTitle, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 44
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "XrcEvD3MMGN7",
+ "outputId": "9a266581-c392-44c6-8a03-84d0b6b51d41"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Transit Operator 7036\n",
+ "Special Nurse 4389\n",
+ "Registered Nurse 3736\n",
+ "Public Svc Aide-Public Works 2518\n",
+ "Police Officer 3 2421\n",
+ "Name: JobTitle, dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 89
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "DYo0zcxBMx8T"
+ },
+ "source": [
+ "**How Many JobTitles with only one occurence in 2013?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "5Zm7x9-zd5Hu",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "0afc5092-4c3c-4843-a78e-9e1b2c8a2f72"
+ },
+ "source": [
+ "(df[df['Year'] == 2013]['JobTitle'].value_counts() == 1).sum()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "202"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 48
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "i3XJLC_jM3aq",
+ "outputId": "f72fe225-218d-4239-a713-e8731c04a1fa"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "202"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 96
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "AxNuuKlVOqQp"
+ },
+ "source": [
+ "**How many people has word chief in there jobtitle?**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "uh8Oay3Yd6Eq",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "8c8ff4a8-0cf9-41cd-d9e9-a63912c0651e"
+ },
+ "source": [
+ "# count = 0\n",
+ "\n",
+ "# for x in df['JobTitle']:\n",
+ "# if 'chief' in x.lower().split(' '):\n",
+ "# count += 1\n",
+ "\n",
+ "# print(count)\n",
+ "\n",
+ "# def hasChief(x):\n",
+ "# return 'chief' in x.lower().split(' ')\n",
+ "\n",
+ "# df['JobTitle'].apply(hasChief).sum()\n",
+ "\n",
+ "df['JobTitle'].apply(lambda x: 'chief' in x.lower().split(' ')).sum()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "477"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 60
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "b-2w3Xp0N68f",
+ "outputId": "f94ae9e0-1bfe-4636-a302-e60b5952671c"
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "477"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 101
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "wPz_hb0NP0Tf"
+ },
+ "source": [
+ "**Find correlation of TotalPay with other fields**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "27TcvkAET8iz",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 238
+ },
+ "outputId": "9e2d84d7-9f23-49a9-87e9-53486a717eed"
+ },
+ "source": [
+ "df.corr()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Id | \n",
+ " BasePay | \n",
+ " OvertimePay | \n",
+ " TotalPay | \n",
+ " TotalPayBenefits | \n",
+ " Year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Id | \n",
+ " 1.000000 | \n",
+ " -0.204878 | \n",
+ " -0.081505 | \n",
+ " -0.211478 | \n",
+ " -0.092190 | \n",
+ " 0.968171 | \n",
+ "
\n",
+ " \n",
+ " BasePay | \n",
+ " -0.204878 | \n",
+ " 1.000000 | \n",
+ " 0.266740 | \n",
+ " 0.954494 | \n",
+ " 0.946595 | \n",
+ " 0.033751 | \n",
+ "
\n",
+ " \n",
+ " OvertimePay | \n",
+ " -0.081505 | \n",
+ " 0.266740 | \n",
+ " 1.000000 | \n",
+ " 0.504859 | \n",
+ " 0.467981 | \n",
+ " 0.027887 | \n",
+ "
\n",
+ " \n",
+ " TotalPay | \n",
+ " -0.211478 | \n",
+ " 0.954494 | \n",
+ " 0.504859 | \n",
+ " 1.000000 | \n",
+ " 0.977313 | \n",
+ " 0.032090 | \n",
+ "
\n",
+ " \n",
+ " TotalPayBenefits | \n",
+ " -0.092190 | \n",
+ " 0.946595 | \n",
+ " 0.467981 | \n",
+ " 0.977313 | \n",
+ " 1.000000 | \n",
+ " 0.151947 | \n",
+ "
\n",
+ " \n",
+ " Year | \n",
+ " 0.968171 | \n",
+ " 0.033751 | \n",
+ " 0.027887 | \n",
+ " 0.032090 | \n",
+ " 0.151947 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " Id BasePay ... TotalPayBenefits Year\n",
+ "Id 1.000000 -0.204878 ... -0.092190 0.968171\n",
+ "BasePay -0.204878 1.000000 ... 0.946595 0.033751\n",
+ "OvertimePay -0.081505 0.266740 ... 0.467981 0.027887\n",
+ "TotalPay -0.211478 0.954494 ... 0.977313 0.032090\n",
+ "TotalPayBenefits -0.092190 0.946595 ... 1.000000 0.151947\n",
+ "Year 0.968171 0.033751 ... 0.151947 1.000000\n",
+ "\n",
+ "[6 rows x 6 columns]"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 61
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "2Cxq8qE8d7Fi",
+ "outputId": "2dcce7cb-bac6-4e07-a1ed-1137dd6c1f05",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ }
+ },
+ "source": [
+ ""
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Id -0.211478\n",
+ "BasePay 0.954494\n",
+ "OvertimePay 0.504859\n",
+ "Benefits 0.632202\n",
+ "TotalPay 1.000000\n",
+ "TotalPayBenefits 0.977313\n",
+ "Year 0.032090\n",
+ "Notes NaN\n",
+ "Name: TotalPay, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 54
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Exploring co-reletion"
+ ],
+ "metadata": {
+ "id": "KjzBUTE6LHFr"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df2 = pd.DataFrame({\n",
+ " 'x': np.arange(1, 11),\n",
+ " 'y': np.arange(10, 0, -1),\n",
+ " 'z': np.arange(11, 21),\n",
+ " 'r': np.random.rand(10)\n",
+ "})"
+ ],
+ "metadata": {
+ "id": "aMVtNQqpLGiv"
+ },
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df2.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "muclJ2G5LZkD",
+ "outputId": "44e0faa3-8013-426a-9d00-e3d422870966"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 10 | \n",
+ " 11 | \n",
+ " 0.542859 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 9 | \n",
+ " 12 | \n",
+ " 0.322000 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 8 | \n",
+ " 13 | \n",
+ " 0.921930 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 7 | \n",
+ " 14 | \n",
+ " 0.471896 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 15 | \n",
+ " 0.597488 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " x y z r\n",
+ "0 1 10 11 0.542859\n",
+ "1 2 9 12 0.322000\n",
+ "2 3 8 13 0.921930\n",
+ "3 4 7 14 0.471896\n",
+ "4 5 6 15 0.597488"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 71
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df2.corr()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 175
+ },
+ "id": "IaQySagKLatT",
+ "outputId": "361cf01e-f8f1-4b74-95d0-56a636bf4904"
+ },
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ " r | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " x | \n",
+ " 1.000000 | \n",
+ " -1.000000 | \n",
+ " 1.000000 | \n",
+ " -0.219664 | \n",
+ "
\n",
+ " \n",
+ " y | \n",
+ " -1.000000 | \n",
+ " 1.000000 | \n",
+ " -1.000000 | \n",
+ " 0.219664 | \n",
+ "
\n",
+ " \n",
+ " z | \n",
+ " 1.000000 | \n",
+ " -1.000000 | \n",
+ " 1.000000 | \n",
+ " -0.219664 | \n",
+ "
\n",
+ " \n",
+ " r | \n",
+ " -0.219664 | \n",
+ " 0.219664 | \n",
+ " -0.219664 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " x y z r\n",
+ "x 1.000000 -1.000000 1.000000 -0.219664\n",
+ "y -1.000000 1.000000 -1.000000 0.219664\n",
+ "z 1.000000 -1.000000 1.000000 -0.219664\n",
+ "r -0.219664 0.219664 -0.219664 1.000000"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 72
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file