diff --git a/.ipynb_checkpoints/aceleradev_semana2-checkpoint.ipynb b/.ipynb_checkpoints/aceleradev_semana2-checkpoint.ipynb new file mode 100644 index 0000000..4ae22e7 --- /dev/null +++ b/.ipynb_checkpoints/aceleradev_semana2-checkpoint.ipynb @@ -0,0 +1,4724 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AceleraDev Codenation - Semana 2\n", + "\n", + "### Túlio Vieira de Souza | Data Scientist" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Manipulando dados" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#Importando os pacotes\n", + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "#acessando o help dos pacotes\n", + "pd?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dicionarios" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "#Criando um dicionário com os dados\n", + "dados = {'canal_venda' : ['facebook', 'twitter', 'instagram', 'linkedin', 'facebook'],\n", + " 'acessos': [100, 200, 300 ,400, 500],\n", + " 'site': ['site1', 'site1', 'site2', 'site2', 'site3'],\n", + " 'vendas': [1000.52, 1052.34, 2002, 5000, 300 ]}" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'canal_venda': ['facebook', 'twitter', 'instagram', 'linkedin', 'facebook'],\n", + " 'acessos': [100, 200, 300, 400, 500],\n", + " 'site': ['site1', 'site1', 'site2', 'site2', 'site3'],\n", + " 'vendas': [1000.52, 1052.34, 2002, 5000, 300]}" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#para printar o dicionáario\n", + "dados" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#verificando o tipo do dicionario\n", + "type(dados)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['canal_venda', 'acessos', 'site', 'vendas'])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando as chaves do meu dicionario\n", + "dados.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['site1', 'site1', 'site2', 'site2', 'site3']" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando uma chave especifica\n", + "dados['site']" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "300" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando uma posicao especifica de um dicionario\n", + "dados['acessos'][2]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'instagram'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando uma posicao especifica de um dicionario\n", + "dados['canal_venda'][2]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['facebook', 'twitter', 'instagram']" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando uma posicao especifica de um dicionario\n", + "dados['canal_venda'][:3]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Listas" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "#Criando uma lista\n", + "lista = [200, 200 , 300 ,800, 200]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[200, 200, 300, 800, 200]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#printando a lista\n", + "lista" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "200" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Vendo valores especificos\n", + "lista[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[200, 200, 300]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#fatia da lista\n", + "lista[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# Adicionando a lista ao dicionario \n", + "dados['lista'] = lista" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'canal_venda': ['facebook', 'twitter', 'instagram', 'linkedin', 'facebook'],\n", + " 'acessos': [100, 200, 300, 400, 500],\n", + " 'site': ['site1', 'site1', 'site2', 'site2', 'site3'],\n", + " 'vendas': [1000.52, 1052.34, 2002, 5000, 300],\n", + " 'lista': [200, 200, 300, 800, 200]}" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dados" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### DataFrames" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'canal_venda': ['facebook', 'twitter', 'instagram', 'linkedin', 'facebook'],\n", + " 'acessos': [100, 200, 300, 400, 500],\n", + " 'site': ['site1', 'site1', 'site2', 'site2', 'site3'],\n", + " 'vendas': [1000.52, 1052.34, 2002, 5000, 300],\n", + " 'lista': [200, 200, 300, 800, 200]}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dados" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "#Criar um data frame a partir de um dict\n", + "dataframe = pd.DataFrame(dados)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#acessando o dataframe\n", + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#printando os primeiros casos do dataframe\n", + "dataframe.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 5)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Verificando o formato do dataframe\n", + "dataframe.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "QQRNOME = dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5, 5)" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "QQRNOME.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(QQRNOME)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RangeIndex(start=0, stop=5, step=1)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Verificando o indice do dataframe\n", + "dataframe.index" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda object\n", + "acessos int64\n", + "site object\n", + "vendas float64\n", + "lista int64\n", + "dtype: object" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Verificando os tipos dos dados do dataframe\n", + "dataframe.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "object 2\n", + "int64 2\n", + "float64 1\n", + "dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#saber quantas colunas tem de cada tipo\n", + "dataframe.dtypes.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0FalseFalseFalseFalseFalse
1FalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalse
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 False False False False False\n", + "1 False False False False False\n", + "2 False False False False False\n", + "3 False False False False False\n", + "4 False False False False False" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Verificando se existem valores faltantes\n", + "dataframe.isna()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda 0\n", + "acessos 0\n", + "site 0\n", + "vendas 0\n", + "lista 0\n", + "dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#printando os nomes das colunas\n", + "dataframe.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 facebook\n", + "1 twitter\n", + "2 instagram\n", + "3 linkedin\n", + "4 facebook\n", + "Name: canal_venda, dtype: object" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando uma coluna especifica\n", + "dataframe['canal_venda']" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "# Criando uma nova coluna\n", + "dataframe['nova_coluna'] = [1, 2, 3, 4, 5]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistanova_coluna
0facebook100site11000.522001
1twitter200site11052.342002
2instagram300site22002.003003
3linkedin400site25000.008004
4facebook500site3300.002005
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista nova_coluna\n", + "0 facebook 100 site1 1000.52 200 1\n", + "1 twitter 200 site1 1052.34 200 2\n", + "2 instagram 300 site2 2002.00 300 3\n", + "3 linkedin 400 site2 5000.00 800 4\n", + "4 facebook 500 site3 300.00 200 5" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista', 'nova_coluna'], dtype='object')" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vendaslistanova_coluna
01000.522001
11052.342002
22002.003003
35000.008004
4300.002005
\n", + "
" + ], + "text/plain": [ + " vendas lista nova_coluna\n", + "0 1000.52 200 1\n", + "1 1052.34 200 2\n", + "2 2002.00 300 3\n", + "3 5000.00 800 4\n", + "4 300.00 200 5" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Removendo colunas somente no plotter na tela\n", + "dataframe.drop(columns = ['acessos','site', 'canal_venda'])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistanova_coluna
0facebook100site11000.522001
1twitter200site11052.342002
2instagram300site22002.003003
3linkedin400site25000.008004
4facebook500site3300.002005
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista nova_coluna\n", + "0 facebook 100 site1 1000.52 200 1\n", + "1 twitter 200 site1 1052.34 200 2\n", + "2 instagram 300 site2 2002.00 300 3\n", + "3 linkedin 400 site2 5000.00 800 4\n", + "4 facebook 500 site3 300.00 200 5" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe.drop(columns='nova_coluna',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Mostrando as colunas\n", + "dataframe.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "200" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando valores especificos\n", + "dataframe['acessos'][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 facebook\n", + "1 twitter\n", + "Name: canal_venda, dtype: object" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Acessando fatia de coluna especifica\n", + "dataframe['canal_venda'][:2]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
lista
3800
4200
\n", + "
" + ], + "text/plain": [ + " lista\n", + "3 800\n", + "4 200" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "### Fatiando os dados usando o iloc (linhas | colunas)\n", + "dataframe.iloc[3:,4:]" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Fatiar os dados usando o loc (indice)\n", + "dataframe.loc[:3]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendavendas
0facebook1000.52
1twitter1052.34
2instagram2002.00
3linkedin5000.00
4facebook300.00
\n", + "
" + ], + "text/plain": [ + " canal_venda vendas\n", + "0 facebook 1000.52\n", + "1 twitter 1052.34\n", + "2 instagram 2002.00\n", + "3 linkedin 5000.00\n", + "4 facebook 300.00" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Selecionando colunas especificas\n", + "dataframe[['canal_venda', 'vendas']]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "#Passando os valores atraves de lista\n", + "filtro = ['canal_venda', 'acessos']" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessos
0facebook100
1twitter200
2instagram300
3linkedin400
4facebook500
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos\n", + "0 facebook 100\n", + "1 twitter 200\n", + "2 instagram 300\n", + "3 linkedin 400\n", + "4 facebook 500" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe[filtro]" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 canal_venda 5 non-null object \n", + " 1 acessos 5 non-null int64 \n", + " 2 site 5 non-null object \n", + " 3 vendas 5 non-null float64\n", + " 4 lista 5 non-null int64 \n", + "dtypes: float64(1), int64(2), object(2)\n", + "memory usage: 328.0+ bytes\n" + ] + } + ], + "source": [ + "# Usando o metodo info()\n", + "dataframe.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [], + "source": [ + "#Pivotando os dados (coluna)\n", + "aux = dataframe.pivot(index = 'canal_venda', columns='site', values='acessos')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitesite1site2site3
canal_venda
facebook100.0NaN500.0
instagramNaN300.0NaN
linkedinNaN400.0NaN
twitter200.0NaNNaN
\n", + "
" + ], + "text/plain": [ + "site site1 site2 site3\n", + "canal_venda \n", + "facebook 100.0 NaN 500.0\n", + "instagram NaN 300.0 NaN\n", + "linkedin NaN 400.0 NaN\n", + "twitter 200.0 NaN NaN" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Index: 4 entries, facebook to twitter\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 site1 2 non-null float64\n", + " 1 site2 2 non-null float64\n", + " 2 site3 1 non-null float64\n", + "dtypes: float64(3)\n", + "memory usage: 128.0+ bytes\n" + ] + } + ], + "source": [ + "aux.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitesite1site2site3
canal_venda
facebook100.00.0500.0
instagram0.0300.00.0
linkedin0.0400.00.0
twitter200.00.00.0
\n", + "
" + ], + "text/plain": [ + "site site1 site2 site3\n", + "canal_venda \n", + "facebook 100.0 0.0 500.0\n", + "instagram 0.0 300.0 0.0\n", + "linkedin 0.0 400.0 0.0\n", + "twitter 200.0 0.0 0.0" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Completando os valores faltantes usando fillna\n", + "#Pivotando os dados (coluna)\n", + "aux= dataframe.pivot(index = 'canal_venda', columns='site', values='acessos').fillna(0)\n", + "dataframe.pivot(index = 'canal_venda', columns='site', values='acessos').fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitevariablevalue
0site1canal_vendafacebook
1site1canal_vendatwitter
2site2canal_vendainstagram
3site2canal_vendalinkedin
4site3canal_vendafacebook
\n", + "
" + ], + "text/plain": [ + " site variable value\n", + "0 site1 canal_venda facebook\n", + "1 site1 canal_venda twitter\n", + "2 site2 canal_venda instagram\n", + "3 site2 canal_venda linkedin\n", + "4 site3 canal_venda facebook" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Mudando as colunas usando o comando melt\n", + "dataframe.melt(id_vars='site', value_vars=['canal_venda'])" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['site1', 'site2', 'site3'], dtype='object', name='site')\n", + "Index(['index', 'canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')\n" + ] + } + ], + "source": [ + "#Resetando o indice do dataframe\n", + "print(aux.columns)\n", + "aux = dataframe.reset_index()\n", + "print(aux.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcanal_vendaacessossitevendaslista
00facebook100site11000.52200
11twitter200site11052.34200
22instagram300site22002.00300
33linkedin400site25000.00800
44facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " index canal_venda acessos site vendas lista\n", + "0 0 facebook 100 site1 1000.52 200\n", + "1 1 twitter 200 site1 1052.34 200\n", + "2 2 instagram 300 site2 2002.00 300\n", + "3 3 linkedin 400 site2 5000.00 800\n", + "4 4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
level_0indexcanal_vendaacessossitevendaslista
000facebook100site11000.52200
111twitter200site11052.34200
222instagram300site22002.00300
333linkedin400site25000.00800
444facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " level_0 index canal_venda acessos site vendas lista\n", + "0 0 0 facebook 100 site1 1000.52 200\n", + "1 1 1 twitter 200 site1 1052.34 200\n", + "2 2 2 instagram 300 site2 2002.00 300\n", + "3 3 3 linkedin 400 site2 5000.00 800\n", + "4 4 4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux.reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitesite1site2site3
canal_venda
facebook100.00.0500.0
instagram0.0300.00.0
linkedin0.0400.00.0
twitter200.00.00.0
\n", + "
" + ], + "text/plain": [ + "site site1 site2 site3\n", + "canal_venda \n", + "facebook 100.0 0.0 500.0\n", + "instagram 0.0 300.0 0.0\n", + "linkedin 0.0 400.0 0.0\n", + "twitter 200.0 0.0 0.0" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendasitevalue
0facebooksite1100.0
1instagramsite10.0
2linkedinsite10.0
3twittersite1200.0
4facebooksite20.0
5instagramsite2300.0
6linkedinsite2400.0
7twittersite20.0
8facebooksite3500.0
9instagramsite30.0
10linkedinsite30.0
11twittersite30.0
\n", + "
" + ], + "text/plain": [ + " canal_venda site value\n", + "0 facebook site1 100.0\n", + "1 instagram site1 0.0\n", + "2 linkedin site1 0.0\n", + "3 twitter site1 200.0\n", + "4 facebook site2 0.0\n", + "5 instagram site2 300.0\n", + "6 linkedin site2 400.0\n", + "7 twitter site2 0.0\n", + "8 facebook site3 500.0\n", + "9 instagram site3 0.0\n", + "10 linkedin site3 0.0\n", + "11 twitter site3 0.0" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Exemplo do comando melt\n", + "aux.melt(id_vars='canal_venda', value_vars=['site1', 'site2', 'site3'])" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 101, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda facebooktwitterinstagramlinkedinfacebook\n", + "acessos 1500\n", + "site site1site1site2site2site3\n", + "vendas 9354.86\n", + "lista 1700\n", + "dtype: object" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Somando as colunas do dataframe\n", + "dataframe.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 1300.52\n", + "1 1452.34\n", + "2 2602.00\n", + "3 6200.00\n", + "4 1000.00\n", + "dtype: float64" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Somando as linhas do dataframe\n", + "dataframe.sum(axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Por linha: 0 200.0\n", + "1 200.0\n", + "2 300.0\n", + "3 800.0\n", + "4 300.0\n", + "dtype: float64\n", + "Por coluna: acessos 300.00\n", + "vendas 1052.34\n", + "lista 200.00\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "#Calculando a mediana das colunas numericas\n", + "print('Por linha: ',dataframe.median(axis= 1) )\n", + "print('Por coluna: ', dataframe.median())" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "acessos 300.000\n", + "vendas 1870.972\n", + "lista 340.000\n", + "dtype: float64" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Calculando a media das colunas númericas\n", + "dataframe.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "acessos 158.113883\n", + "vendas 1850.931024\n", + "lista 260.768096\n", + "dtype: float64" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Calculando o desvio padrão das colunas numericas\n", + "dataframe.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acessosvendaslista
count5.0000005.0000005.000000
mean300.0000001870.972000340.000000
std158.1138831850.931024260.768096
min100.000000300.000000200.000000
25%200.0000001000.520000200.000000
50%300.0000001052.340000200.000000
75%400.0000002002.000000300.000000
max500.0000005000.000000800.000000
\n", + "
" + ], + "text/plain": [ + " acessos vendas lista\n", + "count 5.000000 5.000000 5.000000\n", + "mean 300.000000 1870.972000 340.000000\n", + "std 158.113883 1850.931024 260.768096\n", + "min 100.000000 300.000000 200.000000\n", + "25% 200.000000 1000.520000 200.000000\n", + "50% 300.000000 1052.340000 200.000000\n", + "75% 400.000000 2002.000000 300.000000\n", + "max 500.000000 5000.000000 800.000000" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Usando o comando describe que calcula estatisticas descritivas para colunas numericas\n", + "dataframe.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site1300.00200.0
1NaN200site21000.52NaN
2NaN300NaN1052.34NaN
3NaN400NaN2002.00NaN
4NaN500NaN5000.00NaN
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 300.00 200.0\n", + "1 NaN 200 site2 1000.52 NaN\n", + "2 NaN 300 NaN 1052.34 NaN\n", + "3 NaN 400 NaN 2002.00 NaN\n", + "4 NaN 500 NaN 5000.00 NaN" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Calculando a moda\n", + "dataframe.mode()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda twitter\n", + "acessos 500\n", + "site site3\n", + "vendas 5000\n", + "lista 800\n", + "dtype: object" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Valor maximo por coluna\n", + "dataframe.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda facebook\n", + "acessos 100\n", + "site site1\n", + "vendas 300\n", + "lista 200\n", + "dtype: object" + ] + }, + "execution_count": 215, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Valor minimo por coluna\n", + "dataframe.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda 4\n", + "acessos 5\n", + "site 3\n", + "vendas 5\n", + "lista 3\n", + "dtype: int64" + ] + }, + "execution_count": 217, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Printando o numero de unicos\n", + "dataframe.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "facebook 2\n", + "linkedin 1\n", + "instagram 1\n", + "twitter 1\n", + "Name: canal_venda, dtype: int64" + ] + }, + "execution_count": 221, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Contando valores unicos de uma coluna\n", + "dataframe['canal_venda'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 222, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['facebook', 'twitter', 'instagram', 'linkedin'], dtype=object)" + ] + }, + "execution_count": 222, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Valores unicos de uma coluna\n", + "dataframe['canal_venda'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 225, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "site\n", + "site1 300\n", + "site2 700\n", + "site3 500\n", + "Name: acessos, dtype: int64" + ] + }, + "execution_count": 225, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Usando o groupby com valores numericos\n", + "dataframe.groupby('site')['acessos'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda\n", + "facebook 300\n", + "instagram 300\n", + "linkedin 400\n", + "twitter 200\n", + "Name: acessos, dtype: int64" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Usando o groupby com valores numericos\n", + "dataframe.groupby('canal_venda')['acessos'].median()" + ] + }, + { + "cell_type": "code", + "execution_count": 229, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "site\n", + "site1 [facebook, twitter]\n", + "site2 [instagram, linkedin]\n", + "site3 [facebook]\n", + "Name: canal_venda, dtype: object" + ] + }, + "execution_count": 229, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Usando o groupby com categoricos\n", + "dataframe.groupby('site')['canal_venda'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 230, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "site\n", + "site1 facebook\n", + "site2 instagram\n", + "site3 facebook\n", + "Name: canal_venda, dtype: object" + ] + }, + "execution_count": 230, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Usando o groupby com categoricos\n", + "dataframe.groupby('site')['canal_venda'].first()" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
siteacessos
canal_venda
facebook[site1, site3]600
instagram[site2]300
linkedin[site2]400
twitter[site1]200
\n", + "
" + ], + "text/plain": [ + " site acessos\n", + "canal_venda \n", + "facebook [site1, site3] 600\n", + "instagram [site2] 300\n", + "linkedin [site2] 400\n", + "twitter [site1] 200" + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Usando o groupby com a função agg\n", + "dataframe.groupby('canal_venda').agg({'site': 'unique',\n", + " 'acessos': 'sum'})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
acessosvendaslista
acessos1.000000.0000000.335410
vendas0.000001.0000000.894427
lista0.335410.8944271.000000
\n", + "
" + ], + "text/plain": [ + " acessos vendas lista\n", + "acessos 1.00000 0.000000 0.335410\n", + "vendas 0.00000 1.000000 0.894427\n", + "lista 0.33541 0.894427 1.000000" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Correlações entre variaveis\n", + "dataframe.corr(method = 'spearman')" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslista
0facebook100site11000.52200
1twitter200site11052.34200
2instagram300site22002.00300
3linkedin400site25000.00800
4facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista\n", + "0 facebook 100 site1 1000.52 200\n", + "1 twitter 200 site1 1052.34 200\n", + "2 instagram 300 site2 2002.00 300\n", + "3 linkedin 400 site2 5000.00 800\n", + "4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "#Criando variaveis categoricas por fatia de variavel numerica\n", + "dataframe['categoria_vendas'] = pd.cut(dataframe['vendas'],\n", + " bins= (0, 1500, 2000, 8000), \n", + " labels = ('0 a 1500', '1500 a 2000', '2000 a 8000'))" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistacategoria_vendas
0facebook100site11000.522000 a 1500
1twitter200site11052.342000 a 1500
2instagram300site22002.003002000 a 8000
3linkedin400site25000.008002000 a 8000
4facebook500site3300.002000 a 1500
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista categoria_vendas\n", + "0 facebook 100 site1 1000.52 200 0 a 1500\n", + "1 twitter 200 site1 1052.34 200 0 a 1500\n", + "2 instagram 300 site2 2002.00 300 2000 a 8000\n", + "3 linkedin 400 site2 5000.00 800 2000 a 8000\n", + "4 facebook 500 site3 300.00 200 0 a 1500" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "#Criando variavel categorica usando compressao de lista\n", + "dataframe['categoria_acessos'] = ['maior_que_300' if x > 300 else 'menor_que_300' for x in dataframe['acessos']]" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistacategoria_vendascategoria_acessos
0facebook100site11000.522000 a 1500menor_que_300
1twitter200site11052.342000 a 1500menor_que_300
2instagram300site22002.003002000 a 8000menor_que_300
3linkedin400site25000.008002000 a 8000maior_que_300
4facebook500site3300.002000 a 1500maior_que_300
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista categoria_vendas \\\n", + "0 facebook 100 site1 1000.52 200 0 a 1500 \n", + "1 twitter 200 site1 1052.34 200 0 a 1500 \n", + "2 instagram 300 site2 2002.00 300 2000 a 8000 \n", + "3 linkedin 400 site2 5000.00 800 2000 a 8000 \n", + "4 facebook 500 site3 300.00 200 0 a 1500 \n", + "\n", + " categoria_acessos \n", + "0 menor_que_300 \n", + "1 menor_que_300 \n", + "2 menor_que_300 \n", + "3 maior_que_300 \n", + "4 maior_que_300 " + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "#Juntando dois dataframes | Criando o dataframe_2\n", + "dataframe_2 = pd.DataFrame({'site': ['site1', 'site1', 'site2', 'site2', 'site3'],\n", + " 'suporte': ['Carlos', 'Carlos', 'Maria', 'Maria', 'Ezequiel']})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistacategoria_vendascategoria_acessossuporte
0facebook100site11000.522000 a 1500menor_que_300Carlos
1facebook100site11000.522000 a 1500menor_que_300Carlos
2twitter200site11052.342000 a 1500menor_que_300Carlos
3twitter200site11052.342000 a 1500menor_que_300Carlos
4instagram300site22002.003002000 a 8000menor_que_300Maria
5instagram300site22002.003002000 a 8000menor_que_300Maria
6linkedin400site25000.008002000 a 8000maior_que_300Maria
7linkedin400site25000.008002000 a 8000maior_que_300Maria
8facebook500site3300.002000 a 1500maior_que_300Ezequiel
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista categoria_vendas \\\n", + "0 facebook 100 site1 1000.52 200 0 a 1500 \n", + "1 facebook 100 site1 1000.52 200 0 a 1500 \n", + "2 twitter 200 site1 1052.34 200 0 a 1500 \n", + "3 twitter 200 site1 1052.34 200 0 a 1500 \n", + "4 instagram 300 site2 2002.00 300 2000 a 8000 \n", + "5 instagram 300 site2 2002.00 300 2000 a 8000 \n", + "6 linkedin 400 site2 5000.00 800 2000 a 8000 \n", + "7 linkedin 400 site2 5000.00 800 2000 a 8000 \n", + "8 facebook 500 site3 300.00 200 0 a 1500 \n", + "\n", + " categoria_acessos suporte \n", + "0 menor_que_300 Carlos \n", + "1 menor_que_300 Carlos \n", + "2 menor_que_300 Carlos \n", + "3 menor_que_300 Carlos \n", + "4 menor_que_300 Maria \n", + "5 menor_que_300 Maria \n", + "6 maior_que_300 Maria \n", + "7 maior_que_300 Maria \n", + "8 maior_que_300 Ezequiel " + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Realizando o merge\n", + "dataframe.merge(dataframe_2, on = 'site', how = 'left')" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [], + "source": [ + "#Salvando o dataframe como csv\n", + "dataframe.to_csv('dataframe.csv', sep = ';', decimal = ',', index = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "#Lendo dados no formato csv\n", + "dataframe_lido = pd.read_csv('dataframe.csv', sep = ';', decimal = ',')" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
canal_vendaacessossitevendaslistacategoria_vendascategoria_acessos
0facebook100site11000.522000 a 1500menor_que_300
1twitter200site11052.342000 a 1500menor_que_300
2instagram300site22002.003002000 a 8000menor_que_300
3linkedin400site25000.008002000 a 8000maior_que_300
4facebook500site3300.002000 a 1500maior_que_300
\n", + "
" + ], + "text/plain": [ + " canal_venda acessos site vendas lista categoria_vendas \\\n", + "0 facebook 100 site1 1000.52 200 0 a 1500 \n", + "1 twitter 200 site1 1052.34 200 0 a 1500 \n", + "2 instagram 300 site2 2002.00 300 2000 a 8000 \n", + "3 linkedin 400 site2 5000.00 800 2000 a 8000 \n", + "4 facebook 500 site3 300.00 200 0 a 1500 \n", + "\n", + " categoria_acessos \n", + "0 menor_que_300 \n", + "1 menor_que_300 \n", + "2 menor_que_300 \n", + "3 maior_que_300 \n", + "4 maior_que_300 " + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe_lido.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [], + "source": [ + "json = pd.read_json('https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/index.json')" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'offerCode': 'AmazonMQ',\n", + " 'versionIndexUrl': '/offers/v1.0/aws/AmazonMQ/index.json',\n", + " 'currentVersionUrl': '/offers/v1.0/aws/AmazonMQ/current/index.json',\n", + " 'currentRegionIndexUrl': '/offers/v1.0/aws/AmazonMQ/current/region_index.json'}" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json['offers'][99]" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [], + "source": [ + "json.reset_index(inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 A4B\n", + "1 AWSAmplify\n", + "2 AWSAppSync\n", + "3 AWSBackup\n", + "4 AWSBudgets\n", + " ... \n", + "143 comprehendmedical\n", + "144 datapipeline\n", + "145 mobileanalytics\n", + "146 transcribe\n", + "147 translate\n", + "Name: index, Length: 148, dtype: object" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "json['index']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imputação de dados" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('train.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "aux = pd.DataFrame({'colunas': df.columns,\n", + " 'tipos': df.dtypes,\n", + " 'percentual_faltante': df.isna().sum() / df.shape[0]})" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colunastipospercentual_faltante
PassengerIdPassengerIdint640.000000
SurvivedSurvivedint640.000000
PclassPclassint640.000000
NameNameobject0.000000
SexSexobject0.000000
AgeAgefloat640.000000
SibSpSibSpint640.000000
ParchParchint640.000000
TicketTicketobject0.000000
FareFarefloat640.000000
CabinCabinobject0.771044
EmbarkedEmbarkedobject0.002245
\n", + "
" + ], + "text/plain": [ + " colunas tipos percentual_faltante\n", + "PassengerId PassengerId int64 0.000000\n", + "Survived Survived int64 0.000000\n", + "Pclass Pclass int64 0.000000\n", + "Name Name object 0.000000\n", + "Sex Sex object 0.000000\n", + "Age Age float64 0.000000\n", + "SibSp SibSp int64 0.000000\n", + "Parch Parch int64 0.000000\n", + "Ticket Ticket object 0.000000\n", + "Fare Fare float64 0.000000\n", + "Cabin Cabin object 0.771044\n", + "Embarked Embarked object 0.002245" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "#Dados númericos : média ou mediana\n", + "df['Age'] = df['Age'].fillna(df['Age'].mode())" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "#Dados categoricos: Unknown ou moda\n", + "df['Cabin'] = df['Cabin'].fillna('Unknown')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Unknown 687\n", + "B96 B98 4\n", + "C23 C25 C27 4\n", + "G6 4\n", + "E101 3\n", + " ... \n", + "C85 1\n", + "C148 1\n", + "A36 1\n", + "B3 1\n", + "D28 1\n", + "Name: Cabin, Length: 148, dtype: int64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Cabin'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "891" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.ipynb_checkpoints/semana_3-checkpoint.ipynb b/.ipynb_checkpoints/semana_3-checkpoint.ipynb new file mode 100644 index 0000000..4207360 --- /dev/null +++ b/.ipynb_checkpoints/semana_3-checkpoint.ipynb @@ -0,0 +1,1179 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Aceleradev Codenation - Semana 3\n", + "## Túlio Vieira de Souza | Data Scientist" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "dados : https://www.kaggle.com/rubenssjr/brasilian-houses-to-rent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Colunas \n", + "\n", + "- city : Cidade onde o imóvel está localizada / City where the property is located\n", + "- area : Area do imovel / Property area\n", + "- rooms: Numero de quartos/ Quantity of rooms\n", + "- bathroom: Numero de banheiros / Quantity of bathroom\n", + "- parking spaces : Numero de vagas / Quantity of parking spaces\n", + "- floor : Andar / Floor\n", + "- animal : Aceita animais? / Acept animals?\n", + "- furniture : Mobilhada? / Furniture?\n", + "- hoa (RS): Valor do condomínio / Homeowners association tax \n", + "- rent amount (RS) : Valor do Aluguel (/) Rent amount \n", + "- property tax (RS) : IPTU (/) Property tax\n", + "- fire insurance (RS) : Seguro Incendio / Fire Insurance\n", + "- total (RS) : Valor total / Total" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importando os pacotes" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('houses_to_rent_v2.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityarearoomsbathroomparking spacesflooranimalfurniturehoa (R$)rent amount (R$)property tax (R$)fire insurance (R$)total (R$)
0São Paulo702117aceptfurnished20653300211425618
1São Paulo32044020aceptnot furnished120049601750637973
2Porto Alegre801116aceptnot furnished100028000413841
3Porto Alegre512102aceptnot furnished270111222171421
4São Paulo251101not aceptnot furnished08002511836
\n", + "
" + ], + "text/plain": [ + " city area rooms bathroom parking spaces floor animal \\\n", + "0 São Paulo 70 2 1 1 7 acept \n", + "1 São Paulo 320 4 4 0 20 acept \n", + "2 Porto Alegre 80 1 1 1 6 acept \n", + "3 Porto Alegre 51 2 1 0 2 acept \n", + "4 São Paulo 25 1 1 0 1 not acept \n", + "\n", + " furniture hoa (R$) rent amount (R$) property tax (R$) \\\n", + "0 furnished 2065 3300 211 \n", + "1 not furnished 1200 4960 1750 \n", + "2 not furnished 1000 2800 0 \n", + "3 not furnished 270 1112 22 \n", + "4 not furnished 0 800 25 \n", + "\n", + " fire insurance (R$) total (R$) \n", + "0 42 5618 \n", + "1 63 7973 \n", + "2 41 3841 \n", + "3 17 1421 \n", + "4 11 836 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "city object\n", + "area int64\n", + "rooms int64\n", + "bathroom int64\n", + "parking spaces int64\n", + "floor object\n", + "animal object\n", + "furniture object\n", + "hoa (R$) int64\n", + "rent amount (R$) int64\n", + "property tax (R$) int64\n", + "fire insurance (R$) int64\n", + "total (R$) int64\n", + "dtype: object" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 10692 entries, 0 to 10691\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 city 10692 non-null object\n", + " 1 area 10692 non-null int64 \n", + " 2 rooms 10692 non-null int64 \n", + " 3 bathroom 10692 non-null int64 \n", + " 4 parking spaces 10692 non-null int64 \n", + " 5 floor 10692 non-null object\n", + " 6 animal 10692 non-null object\n", + " 7 furniture 10692 non-null object\n", + " 8 hoa (R$) 10692 non-null int64 \n", + " 9 rent amount (R$) 10692 non-null int64 \n", + " 10 property tax (R$) 10692 non-null int64 \n", + " 11 fire insurance (R$) 10692 non-null int64 \n", + " 12 total (R$) 10692 non-null int64 \n", + "dtypes: int64(9), object(4)\n", + "memory usage: 1.1+ MB\n" + ] + } + ], + "source": [ + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Problema : Explorar o valor do aluguel (rent amount RS)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Estatística univariada" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "df.rename(columns = {'rent amount (R$)' : 'valor_aluguel'}, inplace = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3896.247194163861" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['valor_aluguel'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2661.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['valor_aluguel'].median()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3408.5455176710816" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['valor_aluguel'].std()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 10692.000000\n", + "mean 3896.247194\n", + "std 3408.545518\n", + "min 450.000000\n", + "25% 1530.000000\n", + "50% 2661.000000\n", + "75% 5000.000000\n", + "max 45000.000000\n", + "Name: valor_aluguel, dtype: float64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['valor_aluguel'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAV8klEQVR4nO3df7BkZX3n8ffHGQFdf/BrMNTMJAPJlAlrmRUnyK7ZrCsG+WEYktIEy4oTJJndDW502S0dNCXZpKyCbFaUSlaDwApGRUQTZhWXnaDG2ipBB1RAERmRhRHi3ISfBpWg3/3jPJf0zNyZ0zPcvt339vtV1dXnfM/T3d9+ijtfnvM8fU6qCkmS9uZp405AkjT5LBaSpF4WC0lSL4uFJKmXxUKS1Gv5uBMYhcMPP7zWrFkz7jQkaVG56aab/q6qVsx1bEkWizVr1rB169ZxpyFJi0qS/7enY56GkiT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSpl8VCktTLYiFJ6mWxkCT1WpK/4J5PazZ96sntu88/dYyZSNL4OLKQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9RlYsklyWZEeS2+Y49l+SVJLD236SXJRkW5Jbkhw70HZDkjvbY8Oo8pUk7dkoRxYfAE7aNZhkNfDLwD0D4ZOBte2xEXhva3socB7wEuA44Lwkh4wwZ0nSHEZWLKrq88ADcxy6EHgLUAOx9cAV1bkBODjJkcArgS1V9UBVPQhsYY4CJEkarQWds0hyGvCdqvrqLodWAvcO7G9vsT3F53rvjUm2Jtk6MzMzj1lLkhasWCR5JvB24B1zHZ4jVnuJ7x6suriq1lXVuhUrVux/opKk3SzkyOKngaOArya5G1gF3JzkJ+hGDKsH2q4C7ttLXJK0gBasWFTVrVV1RFWtqao1dIXg2Kr6W2Az8Pq2Kup44OGquh+4DjgxySFtYvvEFpMkLaBRLp39CPAF4PlJtic5ay/NrwXuArYB7wd+F6CqHgD+CPhSe/xhi0mSFtDI7sFdVa/tOb5mYLuAs/fQ7jLgsnlNTpK0T/wFtySpl8VCktTLYiFJ6mWxkCT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSpl8VCktTLYiFJ6mWxkCT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSp18julLfUrdn0qSe37z7/1DFmIkmjN8p7cF+WZEeS2wZi/y3JN5LckuQvkxw8cOzcJNuS3JHklQPxk1psW5JNo8pXkrRnozwN9QHgpF1iW4AXVNULgW8C5wIkOQY4A/jn7TX/I8myJMuAPwNOBo4BXtvaSpIW0MhOQ1XV55Os2SX2fwZ2bwBe3bbXA1dW1Q+BbyfZBhzXjm2rqrsAklzZ2n59VHnvzeCpJ0maJuOc4H4D8Om2vRK4d+DY9hbbU3w3STYm2Zpk68zMzAjSlaTpNZZikeTtwBPAh2ZDczSrvcR3D1ZdXFXrqmrdihUr5idRSRIwhtVQSTYArwJOqKrZf/i3A6sHmq0C7mvbe4pLkhbIgo4skpwEvBU4raoeGzi0GTgjyYFJjgLWAl8EvgSsTXJUkgPoJsE3L2TOkqQRjiySfAR4GXB4ku3AeXSrnw4EtiQBuKGq/n1VfS3JVXQT108AZ1fVj9r7vBG4DlgGXFZVXxtVzpKkuY1yNdRr5whfupf27wTeOUf8WuDaeUxNkrSPvNyHJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqNbJikeSyJDuS3DYQOzTJliR3tudDWjxJLkqyLcktSY4deM2G1v7OJBtGla8kac9GObL4AHDSLrFNwPVVtRa4vu0DnAysbY+NwHuhKy7AecBLgOOA82YLjCRp4YysWFTV54EHdgmvBy5v25cDpw/Er6jODcDBSY4EXglsqaoHqupBYAu7FyBJ0ogt9JzF86rqfoD2fESLrwTuHWi3vcX2FN9Nko1JtibZOjMzM++JS9I0m5QJ7swRq73Edw9WXVxV66pq3YoVK+Y1OUmadgtdLL7bTi/Rnne0+HZg9UC7VcB9e4lLkhbQQheLzcDsiqYNwDUD8de3VVHHAw+301TXAScmOaRNbJ/YYpKkBbR8mEZJXlBVt/W33Ok1HwFeBhyeZDvdqqbzgauSnAXcA7ymNb8WOAXYBjwGnAlQVQ8k+SPgS63dH1bVrpPmkqQRG6pYAO9LcgDdctgPV9VDfS+oqtfu4dAJc7Qt4Ow9vM9lwGVD5ilJGoGhTkNV1S8Cr6ObP9ia5MNJfnmkmUmSJsbQcxZVdSfw+8BbgX8DXJTkG0l+bVTJSZImw1DFIskLk1wI3A68HPiVqvq5tn3hCPOTJE2AYecs/hR4P/C2qvr+bLCq7kvy+yPJTJI0MYYtFqcA36+qHwEkeRpwUFU9VlUfHFl2kqSJMOycxV8DzxjYf2aLSZKmwLDF4qCq+t7sTtt+5mhSkiRNmmGLxT/sco+JFwPf30t7SdISMuycxZuBjyWZvS7TkcBvjCYlSdKkGapYVNWXkvws8Hy6K8F+o6r+caSZSZImxrAjC4BfANa017woCVV1xUiykiRNlGEvJPhB4KeBrwA/auECLBaSNAWGHVmsA45pF/yTJE2ZYVdD3Qb8xCgTkSRNrmFHFocDX0/yReCHs8GqOm0kWUmSJsqwxeIPRpmEJGmyDbt09m+S/BSwtqr+OskzgWWjTU2SNCmGvUT57wBXA3/eQiuBvxpVUpKkyTLsaaizgeOAG6G7EVKSI/b3Q5P8J+C36Zbf3kp3z+0jgSuBQ4Gbgd+sqseTHEi3RPfFwN8Dv1FVd+/vZ4/Cmk2fenL77vNPHWMmkjQaw66G+mFVPT67k2Q53T/0+yzJSuD3gHVV9QK601lnABcAF1bVWuBB4Kz2krOAB6vqZ+hutHTB/nyuJGn/DVss/ibJ24BntHtvfwz4X0/hc5e391pOd/Xa++nuund1O345cHrbXt/2acdPSJKn8NmSpH00bLHYBMzQnTL6d8C1dPfj3mdV9R3gT4B76IrEw8BNwENV9URrtp1uXoT2fG977ROt/WG7vm+SjUm2Jtk6MzOzP6lJkvZg2NVQP6a7rer7n+oHJjmEbrRwFPAQ3Sjl5Lk+dvYlezk2mOPFwMUA69at85fmkjSPhr021LeZ+x/oo/fjM18BfLuqZtp7fwL4V8DBSZa30cMqYPZy6NuB1cD2dtrqucAD+/G5C8LJbklL0b5cG2rWQcBr6FYt7Y97gOPbbzW+D5wAbAU+C7yabkXUBuCa1n5z2/9CO/4Zr1ElSQtrqDmLqvr7gcd3qurddBPS+6yqbqSbqL6Zbg7kaXSnj94KnJNkG92cxKXtJZcCh7X4OXTzJ5KkBTTsaahjB3afRjfSePb+fmhVnQect0v4Lrrfcuza9gd0IxlJ0pgMexrqvw9sPwHcDfz6vGcjSZpIw66G+rejTkSSNLmGPQ11zt6OV9W75icdSdIk2pfVUL9AtzIJ4FeAz9N+LCdJWtr25eZHx1bVowBJ/gD4WFX99qgSkyRNjmEv9/GTwOMD+48Da+Y9G0nSRBp2ZPFB4ItJ/pLul9y/SnfZcEnSFBh2NdQ7k3wa+NctdGZVfXl0aUmSJsmwp6Ggu5T4I1X1HrrrNB01opwkSRNm2Nuqnkd3OY5zW+jpwF+MKilJ0mQZdmTxq8BpwD8AVNV9PIXLfUiSFpdhi8Xj7UqvBZDkn40uJUnSpBl2NdRVSf6c7p4TvwO8gXm4EdJS570tJC0Vw66G+pN27+1HgOcD76iqLSPNTJI0MXqLRZJlwHVV9QrAAiFJU6h3zqKqfgQ8luS5C5CPJGkCDTtn8QPg1iRbaCuiAKrq90aSlSRpogxbLD7VHpKkKbTXYpHkJ6vqnqq6fD4/NMnBwCXAC+iW474BuAP4KN0FCu8Gfr2qHkwS4D3AKcBjwG9V1c3zmY8kae/65iz+anYjycfn8XPfA/zvqvpZ4OeB24FNwPVVtRa4vu0DnAysbY+NwHvnMQ9J0hD6ikUGto+ejw9M8hzgl4BLAarq8ap6CFgPzI5gLgdOb9vrgSuqcwPdbz2OnI9cJEnD6SsWtYftp+JoYAb4n0m+nOSS9ovw51XV/QDt+YjWfiU735Fve4tJkhZIX7H4+SSPJHkUeGHbfiTJo0ke2c/PXA4cC7y3ql5Et7pq017aZ47YboUrycYkW5NsnZmZ2c/UJElz2WuxqKplVfWcqnp2VS1v27P7z9nPz9wObK+qG9v+1XTF47uzp5fa846B9qsHXr8KuG+OXC+uqnVVtW7FihX7mZokaS7DLp2dN1X1t0nuTfL8qroDOAH4entsAM5vz9e0l2wG3pjkSuAlwMOzp6tGZfCaTpKkMRSL5j8CH0pyAHAXcCbdKOeqJGcB9wCvaW2vpVs2u41u6eyZC5+uJE23sRSLqvoKsG6OQyfM0baAs0eelCRpj/bltqqSpCllsZAk9bJYSJJ6jWuCe+p41zxJi5kjC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9xlYskixL8uUkn2z7RyW5McmdST6a5IAWP7Dtb2vH14wrZ0maVuMcWbwJuH1g/wLgwqpaCzwInNXiZwEPVtXPABe2dpKkBTSWYpFkFXAqcEnbD/By4OrW5HLg9La9vu3Tjp/Q2kuSFsi4RhbvBt4C/LjtHwY8VFVPtP3twMq2vRK4F6Adf7i130mSjUm2Jtk6MzMzytwlaeoseLFI8ipgR1XdNBieo2kNceyfAlUXV9W6qlq3YsWKechUkjRr+Rg+86XAaUlOAQ4CnkM30jg4yfI2elgF3NfabwdWA9uTLAeeCzyw8GlL0vRa8JFFVZ1bVauqag1wBvCZqnod8Fng1a3ZBuCatr257dOOf6aqdhtZSJJGZ5J+Z/FW4Jwk2+jmJC5t8UuBw1r8HGDTmPKTpKk1jtNQT6qqzwGfa9t3AcfN0eYHwGsWNLEFtGbTp57cvvv8U8eYiSTt2SSNLCRJE8piIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1Gusv+DWzvw1t6RJ5chCktTLYiFJ6mWxkCT1cs5iEXAuQ9K4ObKQJPWyWEiSenkaagwGTyuN+v09bSVpPlgsNJEseNJkWfDTUElWJ/lsktuTfC3Jm1r80CRbktzZng9p8SS5KMm2JLckOXahc5akaTeOkcUTwH+uqpuTPBu4KckW4LeA66vq/CSbgE3AW4GTgbXt8RLgve15SRv1qSpJ2hcLXiyq6n7g/rb9aJLbgZXAeuBlrdnlwOfoisV64IqqKuCGJAcnObK9z1TzVI2khTLW1VBJ1gAvAm4EnjdbANrzEa3ZSuDegZdtb7Fd32tjkq1Jts7MzIwybUmaOmMrFkmeBXwceHNVPbK3pnPEardA1cVVta6q1q1YsWK+0pQkMaZikeTpdIXiQ1X1iRb+bpIj2/EjgR0tvh1YPfDyVcB9C5WrJGk8q6ECXArcXlXvGji0GdjQtjcA1wzEX99WRR0PPOx8hSQtrHGshnop8JvArUm+0mJvA84HrkpyFnAP8Jp27FrgFGAb8Bhw5sKmO1lcJSVpHMaxGur/Mvc8BMAJc7Qv4OyRJqWduMpK0q68NpQkqZfFQpLUy2IhSerlhQSn1K4T5U9lbsI5Dmnpc2QhSerlyGKJcEmtpFGyWEwRC4qk/eVpKElSL4uFJKmXxUKS1Ms5Cw3NJbLS9HJkIUnqZbGQJPWyWEiSejlnIWD0v8FwvkNa3CwWGplhCpBFRFocPA0lSerlyGKJ8xIfkuaDxUJ7ZbGRBIuoWCQ5CXgPsAy4pKrOH3NKU+2pFBELkLT4LIo5iyTLgD8DTgaOAV6b5JjxZiVJ02OxjCyOA7ZV1V0ASa4E1gNfH2tW2s1SG3G4WkvqLJZisRK4d2B/O/CSwQZJNgIb2+73ktyxj59xOPB3+53h0jMx/ZELxp0BAIfngsnojwkyMf+NTIil0B8/tacDi6VYZI5Y7bRTdTFw8X5/QLK1qtbt7+uXGvtjZ/bH7uyTnS31/lgUcxZ0I4nVA/urgPvGlIskTZ3FUiy+BKxNclSSA4AzgM1jzkmSpsaiOA1VVU8keSNwHd3S2cuq6mvz/DH7fQpribI/dmZ/7M4+2dmS7o9UVX8rSdJUWyynoSRJY2SxkCT1mvpikeSkJHck2ZZk07jzmU9JLkuyI8ltA7FDk2xJcmd7PqTFk+Si1g+3JDl24DUbWvs7k2wYiL84ya3tNRclmWuJ88RIsjrJZ5PcnuRrSd7U4tPcJwcl+WKSr7Y++a8tflSSG9v3+2hbWEKSA9v+tnZ8zcB7ndvidyR55UB80f2NJVmW5MtJPtn2p7o/AKiqqX3QTZZ/CzgaOAD4KnDMuPOax+/3S8CxwG0DsT8GNrXtTcAFbfsU4NN0v2k5HrixxQ8F7mrPh7TtQ9qxLwL/sr3m08DJ4/7OPf1xJHBs23428E26y8dMc58EeFbbfjpwY/uuVwFntPj7gP/Qtn8XeF/bPgP4aNs+pv39HAgc1f6uli3WvzHgHODDwCfb/lT3R1VN/cjiycuIVNXjwOxlRJaEqvo88MAu4fXA5W37cuD0gfgV1bkBODjJkcArgS1V9UBVPQhsAU5qx55TVV+o7q/jioH3mkhVdX9V3dy2HwVup7s6wDT3SVXV99ru09ujgJcDV7f4rn0y21dXAye00dN64Mqq+mFVfRvYRvf3tej+xpKsAk4FLmn7YYr7Y9a0F4u5LiOycky5LJTnVdX90P3jCRzR4nvqi73Ft88RXxTa6YIX0f2f9FT3STvl8hVgB13h+xbwUFU90ZoMfo8nv3s7/jBwGPveV5Ps3cBbgB+3/cOY7v4ALBa9lxGZInvqi32NT7wkzwI+Dry5qh7ZW9M5YkuuT6rqR1X1L+iujHAc8HNzNWvPS7pPkrwK2FFVNw2G52g6Ff0xaNqLxTReRuS77XQJ7XlHi++pL/YWXzVHfKIleTpdofhQVX2ihae6T2ZV1UPA5+jmLA5OMvuj3cHv8eR3b8efS3eqc1/7alK9FDgtyd10p4heTjfSmNb+eNK0F4tpvIzIZmB29c4G4JqB+OvbCqDjgYfbKZnrgBOTHNJWCZ0IXNeOPZrk+HaO9vUD7zWRWp6XArdX1bsGDk1zn6xIcnDbfgbwCrq5nM8Cr27Ndu2T2b56NfCZNj+zGTijrQ46ClhLN9m/qP7GqurcqlpVVWvocv1MVb2OKe2PnYx7hn3cD7oVL9+kO0/79nHnM8/f7SPA/cA/0v0fzVl051OvB+5sz4e2tqG7wdS3gFuBdQPv8wa6CbptwJkD8XXAbe01f0q7IsCkPoBfpBvy3wJ8pT1OmfI+eSHw5dYntwHvaPGj6f5x2wZ8DDiwxQ9q+9va8aMH3uvt7XvfwcAqsMX6Nwa8jH9aDTX1/eHlPiRJvab9NJQkaQgWC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSev1/L0v4a+EYXAkAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df['valor_aluguel'].plot(kind = 'hist', bins= 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.8388773035440982" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#skewness positive skew\n", + "df.valor_aluguel.skew()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.624228179818687" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#kurtose - leptocurtica\n", + "df.valor_aluguel.kurtosis()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploracao multivariáveis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Perguntas\n", + " \n", + "- Qual a cidade com a média de aluguel mais cara? \n", + "- Quantos banheiros existem nas residencias com alugueis mais altos? \n", + "- Os imóveis mais caros aceitam animal? \n", + "- Os imóveis mais caros são mobiliados? " + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
cityvalor_aluguel
4São Paulo3400
0Belo Horizonte2300
3Rio de Janeiro2300
2Porto Alegre1650
1Campinas1500
\n", + "
" + ], + "text/plain": [ + " city valor_aluguel\n", + "4 São Paulo 3400\n", + "0 Belo Horizonte 2300\n", + "3 Rio de Janeiro 2300\n", + "2 Porto Alegre 1650\n", + "1 Campinas 1500" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Qual a cidade com a média de aluguel mais alta? \n", + "\n", + "df.groupby('city')['valor_aluguel'].median().reset_index().sort_values('valor_aluguel', ascending = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "#Quantos banheiros existem em média nas residencias com alugueis mais altos? \n", + "## definicao: algueis mais altos são valores acima de 5000\n", + "\n", + "df['aluguel_alto'] = ['Alto' if x > 5000 else 'Baixo' for x in df['valor_aluguel']]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "aluguel_alto\n", + "Alto 3.729027\n", + "Baixo 1.772108\n", + "Name: bathroom, dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.groupby('aluguel_alto')['bathroom'].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Hipóteses\n", + "\n", + "- São Paulo é a cidade com o aluguel mais caro.\n", + "- Quanto mais banheiros em um imovel maior o valor do aluguel.\n", + "- Os imoveis com mobilia tem o aluguel mais alto. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
valor_aluguelbathroom
valor_aluguel1.000000.71589
bathroom0.715891.00000
\n", + "
" + ], + "text/plain": [ + " valor_aluguel bathroom\n", + "valor_aluguel 1.00000 0.71589\n", + "bathroom 0.71589 1.00000" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[['valor_aluguel', 'bathroom']].corr(method = 'spearman')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "aux = pd.DataFrame({'colunas' : df.columns, 'tipos' : df.dtypes})" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "lista = list(aux[aux['tipos'] == 'int64']['colunas'])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "area\n", + " valor_aluguel area\n", + "valor_aluguel 1.000000 0.728095\n", + "area 0.728095 1.000000\n", + "rooms\n", + " valor_aluguel rooms\n", + "valor_aluguel 1.000000 0.600969\n", + "rooms 0.600969 1.000000\n", + "bathroom\n", + " valor_aluguel bathroom\n", + "valor_aluguel 1.00000 0.71589\n", + "bathroom 0.71589 1.00000\n", + "parking spaces\n", + " valor_aluguel parking spaces\n", + "valor_aluguel 1.000000 0.620175\n", + "parking spaces 0.620175 1.000000\n", + "hoa (R$)\n", + " valor_aluguel hoa (R$)\n", + "valor_aluguel 1.000000 0.355785\n", + "hoa (R$) 0.355785 1.000000\n", + "valor_aluguel\n", + " valor_aluguel valor_aluguel\n", + "valor_aluguel 1.0 1.0\n", + "valor_aluguel 1.0 1.0\n", + "property tax (R$)\n", + " valor_aluguel property tax (R$)\n", + "valor_aluguel 1.00000 0.65923\n", + "property tax (R$) 0.65923 1.00000\n", + "fire insurance (R$)\n", + " valor_aluguel fire insurance (R$)\n", + "valor_aluguel 1.000000 0.988045\n", + "fire insurance (R$) 0.988045 1.000000\n", + "total (R$)\n", + " valor_aluguel total (R$)\n", + "valor_aluguel 1.000000 0.968176\n", + "total (R$) 0.968176 1.000000\n" + ] + } + ], + "source": [ + "for coluna in lista:\n", + " print(coluna)\n", + " print(df[['valor_aluguel', coluna]].corr(method = 'spearman'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualização de dados" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "São Paulo 1857\n", + "Belo Horizonte 276\n", + "Rio de Janeiro 229\n", + "Porto Alegre 89\n", + "Campinas 88\n", + "Name: city, dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query('aluguel_alto==\"Alto\"').city.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x='city',data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x='city', y='valor_aluguel', data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#resetando o index, colocou o eixo x em ordem alfabética e principalmente a média\n", + "sns.barplot(x='city', y='valor_aluguel', data=df.groupby('city')['valor_aluguel'].mean().reset_index())" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df['valor_aluguel'])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='valor_aluguel',hue='city',y='bathroom',size='aluguel_alto', data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(df.corr(),annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df,col='city', row='aluguel_alto')\n", + "g.map(plt.hist,'valor_aluguel')" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aluguel_altovalor_aluguelcity
0Alto50101
1Alto50152
2Alto50251
3Alto50501
4Alto50581
............
880Baixo49602
881Baixo49901
882Baixo49981
883Baixo49991
884Baixo5000118
\n", + "

885 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " aluguel_alto valor_aluguel city\n", + "0 Alto 5010 1\n", + "1 Alto 5015 2\n", + "2 Alto 5025 1\n", + "3 Alto 5050 1\n", + "4 Alto 5058 1\n", + ".. ... ... ...\n", + "880 Baixo 4960 2\n", + "881 Baixo 4990 1\n", + "882 Baixo 4998 1\n", + "883 Baixo 4999 1\n", + "884 Baixo 5000 118\n", + "\n", + "[885 rows x 3 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query('city==\"São Paulo\"').groupby(['aluguel_alto','valor_aluguel'])['city'].count().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..67a3cc4 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.pythonPath": "C:\\Users\\marce\\.conda\\envs\\aceleradev_ds\\python.exe" +} \ No newline at end of file diff --git a/Semana 2/aceleradev_semana2.ipynb b/Semana 2/aceleradev_semana2.ipynb index ed58c0c..4ae22e7 100644 --- a/Semana 2/aceleradev_semana2.ipynb +++ b/Semana 2/aceleradev_semana2.ipynb @@ -29,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -46,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -59,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -71,7 +71,7 @@ " 'vendas': [1000.52, 1052.34, 2002, 5000, 300]}" ] }, - "execution_count": 7, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -83,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -92,7 +92,7 @@ "dict" ] }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -104,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -113,7 +113,7 @@ "dict_keys(['canal_venda', 'acessos', 'site', 'vendas'])" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -125,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -134,7 +134,7 @@ "['site1', 'site1', 'site2', 'site2', 'site3']" ] }, - "execution_count": 10, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -155,7 +155,7 @@ "300" ] }, - "execution_count": 16, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -167,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -176,7 +176,7 @@ "'instagram'" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -188,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -197,7 +197,7 @@ "['facebook', 'twitter', 'instagram']" ] }, - "execution_count": 19, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -216,7 +216,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -225,7 +225,7 @@ "list" ] }, - "execution_count": 21, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -236,7 +236,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -246,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -255,7 +255,7 @@ "[200, 200, 300, 800, 200]" ] }, - "execution_count": 23, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -267,7 +267,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -276,7 +276,7 @@ "200" ] }, - "execution_count": 24, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -288,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -297,7 +297,7 @@ "[200, 200, 300]" ] }, - "execution_count": 25, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -309,7 +309,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -319,7 +319,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -332,7 +332,7 @@ " 'lista': [200, 200, 300, 800, 200]}" ] }, - "execution_count": 27, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -350,7 +350,27 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -363,7 +383,7 @@ " 'lista': [200, 200, 300, 800, 200]}" ] }, - "execution_count": 29, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -374,7 +394,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -384,7 +404,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -469,7 +489,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 31, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -481,7 +501,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -539,7 +559,7 @@ "1 twitter 200 site1 1052.34 200" ] }, - "execution_count": 35, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -551,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -560,7 +580,7 @@ "(5, 5)" ] }, - "execution_count": 36, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -581,7 +601,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -590,7 +610,7 @@ "(5, 5)" ] }, - "execution_count": 38, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -601,7 +621,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -610,7 +630,7 @@ "pandas.core.frame.DataFrame" ] }, - "execution_count": 39, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -621,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -630,7 +650,7 @@ "RangeIndex(start=0, stop=5, step=1)" ] }, - "execution_count": 40, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -642,7 +662,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -656,7 +676,7 @@ "dtype: object" ] }, - "execution_count": 43, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } @@ -668,7 +688,31 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "object 2\n", + "int64 2\n", + "float64 1\n", + "dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#saber quantas colunas tem de cada tipo\n", + "dataframe.dtypes.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "metadata": {}, "outputs": [ { @@ -753,7 +797,7 @@ "4 False False False False False" ] }, - "execution_count": 49, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -765,7 +809,32 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "canal_venda 0\n", + "acessos 0\n", + "site 0\n", + "vendas 0\n", + "lista 0\n", + "dtype: int64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataframe.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -774,7 +843,7 @@ "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')" ] }, - "execution_count": 23, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -786,16 +855,21 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['facebook', 'twitter', 'instagram', 'linkedin', 'facebook']" + "0 facebook\n", + "1 twitter\n", + "2 instagram\n", + "3 linkedin\n", + "4 facebook\n", + "Name: canal_venda, dtype: object" ] }, - "execution_count": 52, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -807,7 +881,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 34, "metadata": {}, "outputs": [], "source": [ @@ -817,7 +891,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -908,7 +982,7 @@ "4 facebook 500 site3 300.00 200 5" ] }, - "execution_count": 54, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -919,7 +993,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 36, "metadata": {}, "outputs": [ { @@ -928,7 +1002,7 @@ "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista', 'nova_coluna'], dtype='object')" ] }, - "execution_count": 55, + "execution_count": 36, "metadata": {}, "output_type": "execute_result" } @@ -939,7 +1013,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 37, "metadata": {}, "outputs": [ { @@ -965,6 +1039,7 @@ " \n", " vendas\n", " lista\n", + " nova_coluna\n", " \n", " \n", " \n", @@ -972,53 +1047,58 @@ " 0\n", " 1000.52\n", " 200\n", + " 1\n", " \n", " \n", " 1\n", " 1052.34\n", " 200\n", + " 2\n", " \n", " \n", " 2\n", " 2002.00\n", " 300\n", + " 3\n", " \n", " \n", " 3\n", " 5000.00\n", " 800\n", + " 4\n", " \n", " \n", " 4\n", " 300.00\n", " 200\n", + " 5\n", " \n", " \n", "\n", "" ], "text/plain": [ - " vendas lista\n", - "0 1000.52 200\n", - "1 1052.34 200\n", - "2 2002.00 300\n", - "3 5000.00 800\n", - "4 300.00 200" + " vendas lista nova_coluna\n", + "0 1000.52 200 1\n", + "1 1052.34 200 2\n", + "2 2002.00 300 3\n", + "3 5000.00 800 4\n", + "4 300.00 200 5" ] }, - "execution_count": 65, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Removendo colunas\n", + "# Removendo colunas somente no plotter na tela\n", "dataframe.drop(columns = ['acessos','site', 'canal_venda'])" ] }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -1047,6 +1127,7 @@ " site\n", " vendas\n", " lista\n", + " nova_coluna\n", " \n", " \n", " \n", @@ -1057,6 +1138,7 @@ " site1\n", " 1000.52\n", " 200\n", + " 1\n", " \n", " \n", " 1\n", @@ -1065,6 +1147,7 @@ " site1\n", " 1052.34\n", " 200\n", + " 2\n", " \n", " \n", " 2\n", @@ -1073,6 +1156,7 @@ " site2\n", " 2002.00\n", " 300\n", + " 3\n", " \n", " \n", " 3\n", @@ -1081,6 +1165,7 @@ " site2\n", " 5000.00\n", " 800\n", + " 4\n", " \n", " \n", " 4\n", @@ -1089,21 +1174,22 @@ " site3\n", " 300.00\n", " 200\n", + " 5\n", " \n", " \n", "\n", "" ], "text/plain": [ - " canal_venda acessos site vendas lista\n", - "0 facebook 100 site1 1000.52 200\n", - "1 twitter 200 site1 1052.34 200\n", - "2 instagram 300 site2 2002.00 300\n", - "3 linkedin 400 site2 5000.00 800\n", - "4 facebook 500 site3 300.00 200" + " canal_venda acessos site vendas lista nova_coluna\n", + "0 facebook 100 site1 1000.52 200 1\n", + "1 twitter 200 site1 1052.34 200 2\n", + "2 instagram 300 site2 2002.00 300 3\n", + "3 linkedin 400 site2 5000.00 800 4\n", + "4 facebook 500 site3 300.00 200 5" ] }, - "execution_count": 66, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -1114,7 +1200,16 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 39, + "metadata": {}, + "outputs": [], + "source": [ + "dataframe.drop(columns='nova_coluna',inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, "metadata": {}, "outputs": [ { @@ -1123,7 +1218,7 @@ "Index(['canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')" ] }, - "execution_count": 28, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } @@ -1135,7 +1230,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -1144,7 +1239,7 @@ "200" ] }, - "execution_count": 71, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -1156,7 +1251,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 42, "metadata": {}, "outputs": [ { @@ -1167,7 +1262,7 @@ "Name: canal_venda, dtype: object" ] }, - "execution_count": 30, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -1179,7 +1274,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 43, "metadata": {}, "outputs": [ { @@ -1264,7 +1359,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 74, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -1275,7 +1370,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 44, "metadata": {}, "outputs": [ { @@ -1321,7 +1416,7 @@ "4 200" ] }, - "execution_count": 79, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -1333,7 +1428,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -1418,7 +1513,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 80, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -1429,7 +1524,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 46, "metadata": {}, "outputs": [ { @@ -1505,7 +1600,7 @@ "3 linkedin 400 site2 5000.00 800" ] }, - "execution_count": 32, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -1517,7 +1612,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 47, "metadata": {}, "outputs": [ { @@ -1584,7 +1679,7 @@ "4 facebook 300.00" ] }, - "execution_count": 83, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -1596,7 +1691,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ @@ -1606,7 +1701,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 49, "metadata": {}, "outputs": [ { @@ -1673,7 +1768,7 @@ "4 facebook 500" ] }, - "execution_count": 85, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -1684,7 +1779,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 50, "metadata": {}, "outputs": [ { @@ -1713,7 +1808,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -1798,7 +1893,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 87, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -1809,7 +1904,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ @@ -1819,7 +1914,91 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sitesite1site2site3
canal_venda
facebook100.0NaN500.0
instagramNaN300.0NaN
linkedinNaN400.0NaN
twitter200.0NaNNaN
\n", + "
" + ], + "text/plain": [ + "site site1 site2 site3\n", + "canal_venda \n", + "facebook 100.0 NaN 500.0\n", + "instagram NaN 300.0 NaN\n", + "linkedin NaN 400.0 NaN\n", + "twitter 200.0 NaN NaN" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -1845,7 +2024,7 @@ }, { "cell_type": "code", - "execution_count": 97, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -1918,7 +2097,7 @@ "twitter 200.0 0.0 0.0" ] }, - "execution_count": 97, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -1932,7 +2111,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 57, "metadata": {}, "outputs": [ { @@ -2017,7 +2196,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 90, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } @@ -2028,7 +2207,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -2101,7 +2280,7 @@ "4 site3 canal_venda facebook" ] }, - "execution_count": 39, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -2113,14 +2292,14 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 59, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Index(['canal_venda', 'site1', 'site2', 'site3'], dtype='object', name='site')\n", + "Index(['site1', 'site2', 'site3'], dtype='object', name='site')\n", "Index(['index', 'canal_venda', 'acessos', 'site', 'vendas', 'lista'], dtype='object')\n" ] } @@ -2134,7 +2313,109 @@ }, { "cell_type": "code", - "execution_count": 96, + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
indexcanal_vendaacessossitevendaslista
00facebook100site11000.52200
11twitter200site11052.34200
22instagram300site22002.00300
33linkedin400site25000.00800
44facebook500site3300.00200
\n", + "
" + ], + "text/plain": [ + " index canal_venda acessos site vendas lista\n", + "0 0 facebook 100 site1 1000.52 200\n", + "1 1 twitter 200 site1 1052.34 200\n", + "2 2 instagram 300 site2 2002.00 300\n", + "3 3 linkedin 400 site2 5000.00 800\n", + "4 4 facebook 500 site3 300.00 200" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "aux" + ] + }, + { + "cell_type": "code", + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -2219,7 +2500,7 @@ "4 facebook 500 site3 300.00 200" ] }, - "execution_count": 96, + "execution_count": 60, "metadata": {}, "output_type": "execute_result" } @@ -4435,7 +4716,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/Semana 3/semana_3.ipynb b/Semana 3/semana_3.ipynb index d7939e4..4207360 100644 --- a/Semana 3/semana_3.ipynb +++ b/Semana 3/semana_3.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -63,7 +63,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -210,7 +210,7 @@ "4 11 836 " ] }, - "execution_count": 30, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -221,7 +221,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -243,7 +243,7 @@ "dtype: object" ] }, - "execution_count": 31, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -254,10 +254,8 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": { - "collapsed": true - }, + "execution_count": 5, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -306,7 +304,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -315,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -324,7 +322,7 @@ "3896.247194163861" ] }, - "execution_count": 42, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -335,7 +333,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -344,7 +342,7 @@ "2661.0" ] }, - "execution_count": 43, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -355,16 +353,16 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "3408.5455176710675" + "3408.5455176710816" ] }, - "execution_count": 44, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +373,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -392,7 +390,7 @@ "Name: valor_aluguel, dtype: float64" ] }, - "execution_count": 45, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -403,22 +401,22 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 51, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAVqElEQVR4nO3dfbRldX3f8fdHRkDiA083hM5Mc8dklpYaUscR6bJJjVQeDWO7jMFlwog001ZstWYtHTRL0qSuBX0QpU2wE6CCNSISU6aCpSNiXF2rPAwP8hjkiiAzgnMjTxqMBP32j/MbPAx3Zp8Z7rnn3Dnv11pn3b2/e599fmcv7nzYv9/v7p2qQpKkXXnBqBsgSRp/hoUkqZNhIUnqZFhIkjoZFpKkTktG3YBhOPTQQ2t6enrUzZCkReWmm276q6qammvbXhkW09PTbN68edTNkKRFJckDO9tmN5QkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSp0175F9zzaXr9lc8s33/2SSNsiSSNjlcWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSeo0tLBIclGSbUnumGPb7yapJIe29SQ5L8lMktuSrOrbd22Se9tr7bDaK0nauWFeWXwKOH7HYpLlwLHAt/vKJwAr22sdcH7b92DgLOB1wFHAWUkOGmKbJUlzGFpYVNXXgEfm2HQu8AGg+mprgEuq5zrgwCSHA8cBm6rqkap6FNjEHAEkSRquBR2zSLIG2FpVX99h01Lgwb71La22s/pcx16XZHOSzbOzs/PYaknSgoVFkgOADwEfGcbxq2pDVa2uqtVTU1PD+AhJmlgLeWXxC8AK4OtJ7geWATcn+TlgK7C8b99lrbazuiRpAS1YWFTV7VX1s1U1XVXT9LqUVlXVw8BG4NQ2K+po4PGqegi4Gjg2yUFtYPvYVpMkLaBhTp39LPD/gFck2ZLk9F3sfhVwHzAD/AnwboCqegT4Q+DG9vqDVpMkLaChPYO7qt7esX26b7mAM3ay30XARfPaOEnSbvEvuCVJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ2G9qS8vd30+iufWb7/7JNG2BJJGr5hPoP7oiTbktzRV/uPSf4yyW1J/jzJgX3bzkwyk+SeJMf11Y9vtZkk64fVXknSzg2zG+pTwPE71DYBr6qqI4FvAGcCJDkCOAX4++09f5xknyT7AH8EnAAcAby97StJWkBD64aqqq8lmd6h9n/6Vq8D3tqW1wCXVtWPgG8lmQGOattmquo+gCSXtn3vGla7d6W/60mSJskoB7jfBXypLS8FHuzbtqXVdlZ/jiTrkmxOsnl2dnYIzZWkyTWSsEjyYeBp4DPzdcyq2lBVq6tq9dTU1HwdVpLECGZDJXkn8GbgmKqqVt4KLO/bbVmrsYu6JGmBLOiVRZLjgQ8AJ1fVk32bNgKnJNkvyQpgJXADcCOwMsmKJPvSGwTfuJBtliQN8coiyWeBNwCHJtkCnEVv9tN+wKYkANdV1b+sqjuTXEZv4Ppp4Iyq+nE7znuAq4F9gIuq6s5htVmSNLdhzoZ6+xzlC3ex/0eBj85Rvwq4ah6bJknaTd7uQ5LUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ0MC0lSJ8NCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHUyLCRJnQwLSVInw0KS1MmwkCR1MiwkSZ2GFhZJLkqyLckdfbWDk2xKcm/7eVCrJ8l5SWaS3JZkVd971rb9702ydljtlSTt3DCvLD4FHL9DbT1wTVWtBK5p6wAnACvbax1wPvTCBTgLeB1wFHDW9oCRJC2coYVFVX0NeGSH8hrg4rZ8MfCWvvol1XMdcGCSw4HjgE1V9UhVPQps4rkBJEkasoUeszisqh5qyw8Dh7XlpcCDffttabWd1Z8jybokm5Nsnp2dnd9WS9KEG9kAd1UVUPN4vA1VtbqqVk9NTc3XYSVJLHxYfLd1L9F+bmv1rcDyvv2WtdrO6pKkBbTQYbER2D6jaS1wRV/91DYr6mjg8dZddTVwbJKD2sD2sa0mSVpAS4Z14CSfBd4AHJpkC71ZTWcDlyU5HXgAeFvb/SrgRGAGeBI4DaCqHknyh8CNbb8/qKodB80lSUM2UFgk+aWqun13DlxVb9/JpmPm2LeAM3ZynIuAi3bnsyVJ82vQbqg/TnJDkncnedlQWyRJGjsDhUVV/QrwDnqDzTcl+dMkbxpqyyRJY2PgAe6quhf4PeCDwD8Gzkvyl0n+2bAaJ0kaDwOFRZIjk5wL3A28Efj1qvp7bfncIbZPkjQGBp0N9V+AC4APVdUPtxer6jtJfm8oLZMkjY1Bw+Ik4IdV9WOAJC8A9q+qJ6vq00NrnSRpLAw6ZvFl4EV96we0miRpAgwaFvtX1Q+2r7TlA4bTJEnSuBk0LP56hwcSvQb44S72lyTtRQYds3gf8Pkk3wEC/Bzwm0NrlSRprAwUFlV1Y5JXAq9opXuq6m+H1yxJ0jjZnRsJvhaYbu9ZlYSqumQorZIkjZVBbyT4aeAXgFuBH7dyAYaFJE2AQa8sVgNHtLvDSpImzKCzoe6gN6gtSZpAg15ZHArcleQG4Efbi1V18lBaJUkaK4OGxe8PsxGSpPE26NTZv0jy88DKqvpykgOAfYbbNEnSuBh0NtTvAOuAg+nNiloKfJI5HpE64PH+LfDP6c2oup3eM7cPBy4FDgFuAn67qp5Ksh+9WVevAb4H/GZV3b8nnzss0+uvfGb5/rNPGmFLJGk4Bh3gPgN4PfAEPPMgpJ/dkw9MshT4N8DqqnoVvSuUU4BzgHOr6heBR4HT21tOBx5t9XPbfpKkBTRoWPyoqp7avpJkCb2rgj21BHhRO84BwEP0HqR0edt+MfCWtrymrdO2H5Mkz+OzJUm7adCw+IskH6L3D/ybgM8D/2tPPrCqtgL/Cfg2vZB4nF6302NV9XTbbQu9ri7azwfbe59u+x+y43GTrEuyOcnm2dnZPWmaJGknBg2L9cAsvfGFfwFcRe953LstyUH0rhZWAH8H+Bng+D05Vr+q2lBVq6tq9dTU1PM9nCSpz6CzoX4C/El7PV//BPhWVc0CJPkCvfGQA5MsaVcPy4Ctbf+twHJgS+u2ehm9ge6x5GC3pL3RQFcWSb6V5L4dX3v4md8Gjk5yQBt7OAa4C7gWeGvbZy1wRVve2NZp27/ibUckaWHtzr2httsf+A1602h3W1Vdn+Ry4GbgaeAWYANwJXBpkn/fahe2t1wIfDrJDPAIvZlTkqQFNGg31I7dPh9PchPwkT350Ko6Czhrh/J9wFFz7Ps39MJJkjQig/5R3qq+1RfQu9LYnWdhSJIWsUH/wf/PfctPA/cDb5v31kiSxtKg3VC/NuyGSJLG16DdUO/f1faq+tj8NEeSNI52ZzbUa+lNYwX4deAG4N5hNEqSNF4GDYtlwKqq+j5Akt8Hrqyq3xpWwyRJ42PQ230cBjzVt/5Uq0mSJsCgVxaXADck+fO2/hZ+eidYSdJebtDZUB9N8iXgV1rptKq6ZXjNkiSNk0G7oaD33IknquoT9G7qt2JIbZIkjZlBbyR4FvBB4MxWeiHwP4bVKEnSeBn0yuKfAicDfw1QVd8BXjKsRkmSxsugA9xPVVUlKYAkPzPENu01fLaFpL3FoFcWlyX5b/QeUPQ7wJeZnwchSZIWgc4ri/aAos8BrwSeAF4BfKSqNg25bZKkMdEZFq376aqq+iXAgJCkCTRoN9TNSV471JZIksbWoAPcrwN+K8n99GZEhd5Fx5HDapgkaXzsMiyS/N2q+jZw3Hx+aJIDgQuAVwEFvAu4h97YyDTt4UpV9WgbM/kEcCLwJPDOqrp5PtsjSdq1rm6o/wlQVQ8AH6uqB/pfz+NzPwH876p6JfDLwN3AeuCaqloJXNPWAU4AVrbXOuD85/G5kqQ90BUW6Vt++Xx8YJKXAb8KXAhQVU9V1WPAGn56c8KL6d2skFa/pHquozd99/D5aIskaTBdYVE7WX4+VgCzwH9PckuSC9of+R1WVQ+1fR7mp7dAXwo82Pf+La0mSVogXWHxy0meSPJ94Mi2/ESS7yd5Yg8/cwmwCji/ql5Nb8B8ff8OVVXsZjglWZdkc5LNs7Oze9g0SdJcdhkWVbVPVb20ql5SVUva8vb1l+7hZ24BtlTV9W39cnrh8d3t3Uvt57a2fSuwvO/9y1ptx7ZuqKrVVbV6ampqD5smSZrLoFNn501VPZzkwSSvqKp7gGOAu9prLXB2+3lFe8tG4D1JLqU3hffxvu6qoei/p5MkaQRh0fxr4DNJ9gXuA06jd5VzWZLTgQeAt7V9r6I3bXaG3tTZ0xa+uZI02UYSFlV1K7B6jk3HzLFvAWcMvVGSpJ3anSflSZImlGEhSepkWEiSOo1qgHvi+NQ8SYuZVxaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjoZFpKkToaFJKmTYSFJ6mRYSJI6GRaSpE6GhSSpk2EhSeo0srBIsk+SW5J8sa2vSHJ9kpkkn0uyb6vv19Zn2vbpUbVZkibVKK8s3gvc3bd+DnBuVf0i8ChwequfDjza6ue2/SRJC2gkYZFkGXAScEFbD/BG4PK2y8XAW9rymrZO235M21+StEBGdWXxceADwE/a+iHAY1X1dFvfAixty0uBBwHa9sfb/s+SZF2SzUk2z87ODrPtkjRxFjwskrwZ2FZVN83ncatqQ1WtrqrVU1NT83loSZp4S0bwma8HTk5yIrA/8FLgE8CBSZa0q4dlwNa2/1ZgObAlyRLgZcD3Fr7ZkjS5FvzKoqrOrKplVTUNnAJ8pareAVwLvLXttha4oi1vbOu07V+pqlrAJkvSxBunv7P4IPD+JDP0xiQubPULgUNa/f3A+hG1T5Im1ii6oZ5RVV8FvtqW7wOOmmOfvwF+Y0EbtoCm11/5zPL9Z580wpZI0s6N05WFJGlMGRaSpE6GhSSpk2EhSepkWEiSOhkWkqROhoUkqZNhIUnqZFhIkjqN9C+49Wz+NbekceWVhSSpk2EhSepkWEiSOjlmsQg4liFp1LyykCR1MiwkSZ3shhqB/m6lYR/fbitJ88Gw0Fgy8KTxsuDdUEmWJ7k2yV1J7kzy3lY/OMmmJPe2nwe1epKcl2QmyW1JVi10myVp0o3iyuJp4Her6uYkLwFuSrIJeCdwTVWdnWQ9sB74IHACsLK9Xgec337u1YbdVSVJu2PBw6KqHgIeasvfT3I3sBRYA7yh7XYx8FV6YbEGuKSqCrguyYFJDm/HmWh21UhaKCOdDZVkGng1cD1wWF8APAwc1paXAg/2vW1Lq+14rHVJNifZPDs7O7Q2S9IkGllYJHkx8GfA+6rqif5t7Sqidud4VbWhqlZX1eqpqal5bKkkaSRhkeSF9ILiM1X1hVb+bpLD2/bDgW2tvhVY3vf2Za0mSVogo5gNFeBC4O6q+ljfpo3A2ra8Friir35qmxV1NPC44xWStLBGMRvq9cBvA7cnubXVPgScDVyW5HTgAeBtbdtVwInADPAkcNrCNne8OEtK0iiMYjbU/wWyk83HzLF/AWcMtVF6FmdZSdqR94aSJHUyLCRJnQwLSVInbyQ4oXYcKH8+YxOOcUh7P68sJEmdvLLYSzilVtIwGRYTxECRtKfshpIkdTIsJEmdDAtJUifHLDQwp8hKk8srC0lSJ8NCktTJsJAkdXLMQsDw/wbD8Q5pcTMsNDSDBJAhIi0OdkNJkjp5ZbGX8xYfkuaDYaFdMmwkwSIKiyTHA58A9gEuqKqzR9ykifZ8QsQAkhafRTFmkWQf4I+AE4AjgLcnOWK0rZKkybFYriyOAmaq6j6AJJcCa4C7RtoqPcfedsXhbC2pZ7GExVLgwb71LcDr+ndIsg5Y11Z/kOSe3fyMQ4G/2uMW7n3G5nzknFG3AIBDc854nI8xMjb/jYyJveF8/PzONiyWsOhUVRuADXv6/iSbq2r1PDZpUfN8PJvn47k8J8+2t5+PRTFmAWwFlvetL2s1SdICWCxhcSOwMsmKJPsCpwAbR9wmSZoYi6IbqqqeTvIe4Gp6U2cvqqo75/lj9rgLay/l+Xg2z8dzeU6eba8+H6mqUbdBkjTmFks3lCRphAwLSVKniQ+LJMcnuSfJTJL1o27PfEpyUZJtSe7oqx2cZFOSe9vPg1o9Sc5r5+G2JKv63rO27X9vkrV99dckub2957wkWdhvuHuSLE9ybZK7ktyZ5L2tPsnnZP8kNyT5ejsn/67VVyS5vn2Pz7WJJSTZr63PtO3Tfcc6s9XvSXJcX33R/Y4l2SfJLUm+2NYn+nwAUFUT+6I3WP5N4OXAvsDXgSNG3a55/H6/CqwC7uir/QdgfVteD5zTlk8EvgQEOBq4vtUPBu5rPw9qywe1bTe0fdPee8Kov3PH+TgcWNWWXwJ8g97tYyb5nAR4cVt+IXB9a/9lwCmt/kngX7XldwOfbMunAJ9ry0e035/9gBXt92qfxfo7Brwf+FPgi219os9HVU38lcUztxGpqqeA7bcR2StU1deAR3YorwEubssXA2/pq19SPdcBByY5HDgO2FRVj1TVo8Am4Pi27aVVdV31fjsu6TvWWKqqh6rq5rb8feBuencHmORzUlX1g7b6wvYq4I3A5a2+4znZfq4uB45pV09rgEur6kdV9S1ght7v16L7HUuyDDgJuKCthwk+H9tNeljMdRuRpSNqy0I5rKoeassPA4e15Z2di13Vt8xRXxRad8Gr6f2f9ESfk9blciuwjV7wfRN4rKqebrv0f49nvnvb/jhwCLt/rsbZx4EPAD9p64cw2ecDMCwmWvu/34mbO53kxcCfAe+rqif6t03iOamqH1fVP6B3Z4SjgFeOuEkjk+TNwLaqumnUbRk3kx4Wk3gbke+27hLaz22tvrNzsav6sjnqYy3JC+kFxWeq6gutPNHnZLuqegy4FviH9Lrctv/Rbv/3eOa7t+0vA77H7p+rcfV64OQk99PrInojvefoTOr5eMakh8Uk3kZkI7B99s5a4Iq++qltBtDRwOOta+Zq4NgkB7VZQscCV7dtTyQ5uvXRntp3rLHU2nkhcHdVfaxv0ySfk6kkB7blFwFvojeWcy3w1rbbjudk+7l6K/CVdjW2ETilzQ5aAaykN9i/qH7HqurMqlpWVdP02vqVqnoHE3o+nmXUI+yjftGb8fINev20Hx51e+b5u30WeAj4W3p9o6fT60+9BrgX+DJwcNs39B4w9U3gdmB133HeRW+AbgY4ra++Grijvee/0u4IMK4v4B/R62K6Dbi1vU6c8HNyJHBLOyd3AB9p9ZfT+8dtBvg8sF+r79/WZ9r2l/cd68Pte99D3yywxfo7BryBn86Gmvjz4e0+JEmdJr0bSpI0AMNCktTJsJAkdTIsJEmdDAtJUifDQpLUybCQJHX6/72O4m/LrzMjAAAAAElFTkSuQmCC\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAV8klEQVR4nO3df7BkZX3n8ffHGQFdf/BrMNTMJAPJlAlrmRUnyK7ZrCsG+WEYktIEy4oTJJndDW502S0dNCXZpKyCbFaUSlaDwApGRUQTZhWXnaDG2ipBB1RAERmRhRHi3ISfBpWg3/3jPJf0zNyZ0zPcvt339vtV1dXnfM/T3d9+ijtfnvM8fU6qCkmS9uZp405AkjT5LBaSpF4WC0lSL4uFJKmXxUKS1Gv5uBMYhcMPP7zWrFkz7jQkaVG56aab/q6qVsx1bEkWizVr1rB169ZxpyFJi0qS/7enY56GkiT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSpl8VCktTLYiFJ6mWxkCT1WpK/4J5PazZ96sntu88/dYyZSNL4OLKQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9RlYsklyWZEeS2+Y49l+SVJLD236SXJRkW5Jbkhw70HZDkjvbY8Oo8pUk7dkoRxYfAE7aNZhkNfDLwD0D4ZOBte2xEXhva3socB7wEuA44Lwkh4wwZ0nSHEZWLKrq88ADcxy6EHgLUAOx9cAV1bkBODjJkcArgS1V9UBVPQhsYY4CJEkarQWds0hyGvCdqvrqLodWAvcO7G9vsT3F53rvjUm2Jtk6MzMzj1lLkhasWCR5JvB24B1zHZ4jVnuJ7x6suriq1lXVuhUrVux/opKk3SzkyOKngaOArya5G1gF3JzkJ+hGDKsH2q4C7ttLXJK0gBasWFTVrVV1RFWtqao1dIXg2Kr6W2Az8Pq2Kup44OGquh+4DjgxySFtYvvEFpMkLaBRLp39CPAF4PlJtic5ay/NrwXuArYB7wd+F6CqHgD+CPhSe/xhi0mSFtDI7sFdVa/tOb5mYLuAs/fQ7jLgsnlNTpK0T/wFtySpl8VCktTLYiFJ6mWxkCT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSpl8VCktTLYiFJ6mWxkCT1slhIknpZLCRJvSwWkqReFgtJUi+LhSSp18julLfUrdn0qSe37z7/1DFmIkmjN8p7cF+WZEeS2wZi/y3JN5LckuQvkxw8cOzcJNuS3JHklQPxk1psW5JNo8pXkrRnozwN9QHgpF1iW4AXVNULgW8C5wIkOQY4A/jn7TX/I8myJMuAPwNOBo4BXtvaSpIW0MhOQ1XV55Os2SX2fwZ2bwBe3bbXA1dW1Q+BbyfZBhzXjm2rqrsAklzZ2n59VHnvzeCpJ0maJuOc4H4D8Om2vRK4d+DY9hbbU3w3STYm2Zpk68zMzAjSlaTpNZZikeTtwBPAh2ZDczSrvcR3D1ZdXFXrqmrdihUr5idRSRIwhtVQSTYArwJOqKrZf/i3A6sHmq0C7mvbe4pLkhbIgo4skpwEvBU4raoeGzi0GTgjyYFJjgLWAl8EvgSsTXJUkgPoJsE3L2TOkqQRjiySfAR4GXB4ku3AeXSrnw4EtiQBuKGq/n1VfS3JVXQT108AZ1fVj9r7vBG4DlgGXFZVXxtVzpKkuY1yNdRr5whfupf27wTeOUf8WuDaeUxNkrSPvNyHJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqNbJikeSyJDuS3DYQOzTJliR3tudDWjxJLkqyLcktSY4deM2G1v7OJBtGla8kac9GObL4AHDSLrFNwPVVtRa4vu0DnAysbY+NwHuhKy7AecBLgOOA82YLjCRp4YysWFTV54EHdgmvBy5v25cDpw/Er6jODcDBSY4EXglsqaoHqupBYAu7FyBJ0ogt9JzF86rqfoD2fESLrwTuHWi3vcX2FN9Nko1JtibZOjMzM++JS9I0m5QJ7swRq73Edw9WXVxV66pq3YoVK+Y1OUmadgtdLL7bTi/Rnne0+HZg9UC7VcB9e4lLkhbQQheLzcDsiqYNwDUD8de3VVHHAw+301TXAScmOaRNbJ/YYpKkBbR8mEZJXlBVt/W33Ok1HwFeBhyeZDvdqqbzgauSnAXcA7ymNb8WOAXYBjwGnAlQVQ8k+SPgS63dH1bVrpPmkqQRG6pYAO9LcgDdctgPV9VDfS+oqtfu4dAJc7Qt4Ow9vM9lwGVD5ilJGoGhTkNV1S8Cr6ObP9ia5MNJfnmkmUmSJsbQcxZVdSfw+8BbgX8DXJTkG0l+bVTJSZImw1DFIskLk1wI3A68HPiVqvq5tn3hCPOTJE2AYecs/hR4P/C2qvr+bLCq7kvy+yPJTJI0MYYtFqcA36+qHwEkeRpwUFU9VlUfHFl2kqSJMOycxV8DzxjYf2aLSZKmwLDF4qCq+t7sTtt+5mhSkiRNmmGLxT/sco+JFwPf30t7SdISMuycxZuBjyWZvS7TkcBvjCYlSdKkGapYVNWXkvws8Hy6K8F+o6r+caSZSZImxrAjC4BfANa017woCVV1xUiykiRNlGEvJPhB4KeBrwA/auECLBaSNAWGHVmsA45pF/yTJE2ZYVdD3Qb8xCgTkSRNrmFHFocDX0/yReCHs8GqOm0kWUmSJsqwxeIPRpmEJGmyDbt09m+S/BSwtqr+OskzgWWjTU2SNCmGvUT57wBXA3/eQiuBvxpVUpKkyTLsaaizgeOAG6G7EVKSI/b3Q5P8J+C36Zbf3kp3z+0jgSuBQ4Gbgd+sqseTHEi3RPfFwN8Dv1FVd+/vZ4/Cmk2fenL77vNPHWMmkjQaw66G+mFVPT67k2Q53T/0+yzJSuD3gHVV9QK601lnABcAF1bVWuBB4Kz2krOAB6vqZ+hutHTB/nyuJGn/DVss/ibJ24BntHtvfwz4X0/hc5e391pOd/Xa++nuund1O345cHrbXt/2acdPSJKn8NmSpH00bLHYBMzQnTL6d8C1dPfj3mdV9R3gT4B76IrEw8BNwENV9URrtp1uXoT2fG977ROt/WG7vm+SjUm2Jtk6MzOzP6lJkvZg2NVQP6a7rer7n+oHJjmEbrRwFPAQ3Sjl5Lk+dvYlezk2mOPFwMUA69at85fmkjSPhr021LeZ+x/oo/fjM18BfLuqZtp7fwL4V8DBSZa30cMqYPZy6NuB1cD2dtrqucAD+/G5C8LJbklL0b5cG2rWQcBr6FYt7Y97gOPbbzW+D5wAbAU+C7yabkXUBuCa1n5z2/9CO/4Zr1ElSQtrqDmLqvr7gcd3qurddBPS+6yqbqSbqL6Zbg7kaXSnj94KnJNkG92cxKXtJZcCh7X4OXTzJ5KkBTTsaahjB3afRjfSePb+fmhVnQect0v4Lrrfcuza9gd0IxlJ0pgMexrqvw9sPwHcDfz6vGcjSZpIw66G+rejTkSSNLmGPQ11zt6OV9W75icdSdIk2pfVUL9AtzIJ4FeAz9N+LCdJWtr25eZHx1bVowBJ/gD4WFX99qgSkyRNjmEv9/GTwOMD+48Da+Y9G0nSRBp2ZPFB4ItJ/pLul9y/SnfZcEnSFBh2NdQ7k3wa+NctdGZVfXl0aUmSJsmwp6Ggu5T4I1X1HrrrNB01opwkSRNm2Nuqnkd3OY5zW+jpwF+MKilJ0mQZdmTxq8BpwD8AVNV9PIXLfUiSFpdhi8Xj7UqvBZDkn40uJUnSpBl2NdRVSf6c7p4TvwO8gXm4EdJS570tJC0Vw66G+pN27+1HgOcD76iqLSPNTJI0MXqLRZJlwHVV9QrAAiFJU6h3zqKqfgQ8luS5C5CPJGkCDTtn8QPg1iRbaCuiAKrq90aSlSRpogxbLD7VHpKkKbTXYpHkJ6vqnqq6fD4/NMnBwCXAC+iW474BuAP4KN0FCu8Gfr2qHkwS4D3AKcBjwG9V1c3zmY8kae/65iz+anYjycfn8XPfA/zvqvpZ4OeB24FNwPVVtRa4vu0DnAysbY+NwHvnMQ9J0hD6ikUGto+ejw9M8hzgl4BLAarq8ap6CFgPzI5gLgdOb9vrgSuqcwPdbz2OnI9cJEnD6SsWtYftp+JoYAb4n0m+nOSS9ovw51XV/QDt+YjWfiU735Fve4tJkhZIX7H4+SSPJHkUeGHbfiTJo0ke2c/PXA4cC7y3ql5Et7pq017aZ47YboUrycYkW5NsnZmZ2c/UJElz2WuxqKplVfWcqnp2VS1v27P7z9nPz9wObK+qG9v+1XTF47uzp5fa846B9qsHXr8KuG+OXC+uqnVVtW7FihX7mZokaS7DLp2dN1X1t0nuTfL8qroDOAH4entsAM5vz9e0l2wG3pjkSuAlwMOzp6tGZfCaTpKkMRSL5j8CH0pyAHAXcCbdKOeqJGcB9wCvaW2vpVs2u41u6eyZC5+uJE23sRSLqvoKsG6OQyfM0baAs0eelCRpj/bltqqSpCllsZAk9bJYSJJ6jWuCe+p41zxJi5kjC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSelksJEm9xlYskixL8uUkn2z7RyW5McmdST6a5IAWP7Dtb2vH14wrZ0maVuMcWbwJuH1g/wLgwqpaCzwInNXiZwEPVtXPABe2dpKkBTSWYpFkFXAqcEnbD/By4OrW5HLg9La9vu3Tjp/Q2kuSFsi4RhbvBt4C/LjtHwY8VFVPtP3twMq2vRK4F6Adf7i130mSjUm2Jtk6MzMzytwlaeoseLFI8ipgR1XdNBieo2kNceyfAlUXV9W6qlq3YsWKechUkjRr+Rg+86XAaUlOAQ4CnkM30jg4yfI2elgF3NfabwdWA9uTLAeeCzyw8GlL0vRa8JFFVZ1bVauqag1wBvCZqnod8Fng1a3ZBuCatr257dOOf6aqdhtZSJJGZ5J+Z/FW4Jwk2+jmJC5t8UuBw1r8HGDTmPKTpKk1jtNQT6qqzwGfa9t3AcfN0eYHwGsWNLEFtGbTp57cvvv8U8eYiSTt2SSNLCRJE8piIUnqZbGQJPWyWEiSelksJEm9LBaSpF4WC0lSL4uFJKmXxUKS1Gusv+DWzvw1t6RJ5chCktTLYiFJ6mWxkCT1cs5iEXAuQ9K4ObKQJPWyWEiSenkaagwGTyuN+v09bSVpPlgsNJEseNJkWfDTUElWJ/lsktuTfC3Jm1r80CRbktzZng9p8SS5KMm2JLckOXahc5akaTeOkcUTwH+uqpuTPBu4KckW4LeA66vq/CSbgE3AW4GTgbXt8RLgve15SRv1qSpJ2hcLXiyq6n7g/rb9aJLbgZXAeuBlrdnlwOfoisV64IqqKuCGJAcnObK9z1TzVI2khTLW1VBJ1gAvAm4EnjdbANrzEa3ZSuDegZdtb7Fd32tjkq1Jts7MzIwybUmaOmMrFkmeBXwceHNVPbK3pnPEardA1cVVta6q1q1YsWK+0pQkMaZikeTpdIXiQ1X1iRb+bpIj2/EjgR0tvh1YPfDyVcB9C5WrJGk8q6ECXArcXlXvGji0GdjQtjcA1wzEX99WRR0PPOx8hSQtrHGshnop8JvArUm+0mJvA84HrkpyFnAP8Jp27FrgFGAb8Bhw5sKmO1lcJSVpHMaxGur/Mvc8BMAJc7Qv4OyRJqWduMpK0q68NpQkqZfFQpLUy2IhSerlhQSn1K4T5U9lbsI5Dmnpc2QhSerlyGKJcEmtpFGyWEwRC4qk/eVpKElSL4uFJKmXxUKS1Ms5Cw3NJbLS9HJkIUnqZbGQJPWyWEiSejlnIWD0v8FwvkNa3CwWGplhCpBFRFocPA0lSerlyGKJ8xIfkuaDxUJ7ZbGRBIuoWCQ5CXgPsAy4pKrOH3NKU+2pFBELkLT4LIo5iyTLgD8DTgaOAV6b5JjxZiVJ02OxjCyOA7ZV1V0ASa4E1gNfH2tW2s1SG3G4WkvqLJZisRK4d2B/O/CSwQZJNgIb2+73ktyxj59xOPB3+53h0jMx/ZELxp0BAIfngsnojwkyMf+NTIil0B8/tacDi6VYZI5Y7bRTdTFw8X5/QLK1qtbt7+uXGvtjZ/bH7uyTnS31/lgUcxZ0I4nVA/urgPvGlIskTZ3FUiy+BKxNclSSA4AzgM1jzkmSpsaiOA1VVU8keSNwHd3S2cuq6mvz/DH7fQpribI/dmZ/7M4+2dmS7o9UVX8rSdJUWyynoSRJY2SxkCT1mvpikeSkJHck2ZZk07jzmU9JLkuyI8ltA7FDk2xJcmd7PqTFk+Si1g+3JDl24DUbWvs7k2wYiL84ya3tNRclmWuJ88RIsjrJZ5PcnuRrSd7U4tPcJwcl+WKSr7Y++a8tflSSG9v3+2hbWEKSA9v+tnZ8zcB7ndvidyR55UB80f2NJVmW5MtJPtn2p7o/AKiqqX3QTZZ/CzgaOAD4KnDMuPOax+/3S8CxwG0DsT8GNrXtTcAFbfsU4NN0v2k5HrixxQ8F7mrPh7TtQ9qxLwL/sr3m08DJ4/7OPf1xJHBs23428E26y8dMc58EeFbbfjpwY/uuVwFntPj7gP/Qtn8XeF/bPgP4aNs+pv39HAgc1f6uli3WvzHgHODDwCfb/lT3R1VN/cjiycuIVNXjwOxlRJaEqvo88MAu4fXA5W37cuD0gfgV1bkBODjJkcArgS1V9UBVPQhsAU5qx55TVV+o7q/jioH3mkhVdX9V3dy2HwVup7s6wDT3SVXV99ru09ujgJcDV7f4rn0y21dXAye00dN64Mqq+mFVfRvYRvf3tej+xpKsAk4FLmn7YYr7Y9a0F4u5LiOycky5LJTnVdX90P3jCRzR4nvqi73Ft88RXxTa6YIX0f2f9FT3STvl8hVgB13h+xbwUFU90ZoMfo8nv3s7/jBwGPveV5Ps3cBbgB+3/cOY7v4ALBa9lxGZInvqi32NT7wkzwI+Dry5qh7ZW9M5YkuuT6rqR1X1L+iujHAc8HNzNWvPS7pPkrwK2FFVNw2G52g6Ff0xaNqLxTReRuS77XQJ7XlHi++pL/YWXzVHfKIleTpdofhQVX2ihae6T2ZV1UPA5+jmLA5OMvuj3cHv8eR3b8efS3eqc1/7alK9FDgtyd10p4heTjfSmNb+eNK0F4tpvIzIZmB29c4G4JqB+OvbCqDjgYfbKZnrgBOTHNJWCZ0IXNeOPZrk+HaO9vUD7zWRWp6XArdX1bsGDk1zn6xIcnDbfgbwCrq5nM8Cr27Ndu2T2b56NfCZNj+zGTijrQ46ClhLN9m/qP7GqurcqlpVVWvocv1MVb2OKe2PnYx7hn3cD7oVL9+kO0/79nHnM8/f7SPA/cA/0v0fzVl051OvB+5sz4e2tqG7wdS3gFuBdQPv8wa6CbptwJkD8XXAbe01f0q7IsCkPoBfpBvy3wJ8pT1OmfI+eSHw5dYntwHvaPGj6f5x2wZ8DDiwxQ9q+9va8aMH3uvt7XvfwcAqsMX6Nwa8jH9aDTX1/eHlPiRJvab9NJQkaQgWC0lSL4uFJKmXxUKS1MtiIUnqZbGQJPWyWEiSev1/L0v4a+EYXAkAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] @@ -435,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -444,7 +442,7 @@ "1.8388773035440982" ] }, - "execution_count": 56, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -456,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -465,7 +463,7 @@ "4.624228179818687" ] }, - "execution_count": 57, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -496,7 +494,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -563,7 +561,7 @@ "1 Campinas 1500" ] }, - "execution_count": 64, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -576,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -588,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -600,7 +598,7 @@ "Name: bathroom, dtype: float64" ] }, - "execution_count": 71, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -622,7 +620,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -671,7 +669,7 @@ "bathroom 0.71589 1.00000" ] }, - "execution_count": 79, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -682,7 +680,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -691,7 +689,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -700,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -761,45 +759,393 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 53, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "São Paulo 1857\n", + "Belo Horizonte 276\n", + "Rio de Janeiro 229\n", + "Porto Alegre 89\n", + "Campinas 88\n", + "Name: city, dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query('aluguel_alto==\"Alto\"').city.value_counts()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.countplot(x='city',data=df)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.barplot(x='city', y='valor_aluguel', data=df)" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#resetando o index, colocou o eixo x em ordem alfabética e principalmente a média\n", + "sns.barplot(x='city', y='valor_aluguel', data=df.groupby('city')['valor_aluguel'].mean().reset_index())" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 39, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(df['valor_aluguel'])" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.scatterplot(x='valor_aluguel',hue='city',y='bathroom',size='aluguel_alto', data=df)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(df.corr(),annot=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "g = sns.FacetGrid(df,col='city', row='aluguel_alto')\n", + "g.map(plt.hist,'valor_aluguel')" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aluguel_altovalor_aluguelcity
0Alto50101
1Alto50152
2Alto50251
3Alto50501
4Alto50581
............
880Baixo49602
881Baixo49901
882Baixo49981
883Baixo49991
884Baixo5000118
\n", + "

885 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " aluguel_alto valor_aluguel city\n", + "0 Alto 5010 1\n", + "1 Alto 5015 2\n", + "2 Alto 5025 1\n", + "3 Alto 5050 1\n", + "4 Alto 5058 1\n", + ".. ... ... ...\n", + "880 Baixo 4960 2\n", + "881 Baixo 4990 1\n", + "882 Baixo 4998 1\n", + "883 Baixo 4999 1\n", + "884 Baixo 5000 118\n", + "\n", + "[885 rows x 3 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.query('city==\"São Paulo\"').groupby(['aluguel_alto','valor_aluguel'])['city'].count().reset_index()" + ] }, { "cell_type": "code", @@ -825,7 +1171,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.2" + "version": "3.7.6" } }, "nbformat": 4, diff --git a/Semana 6/.ipynb_checkpoints/Semana 6 - Aceleradev-checkpoint.ipynb b/Semana 6/.ipynb_checkpoints/Semana 6 - Aceleradev-checkpoint.ipynb new file mode 100644 index 0000000..8538c62 --- /dev/null +++ b/Semana 6/.ipynb_checkpoints/Semana 6 - Aceleradev-checkpoint.ipynb @@ -0,0 +1,1873 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AceleraDev DataScience \n", + "\n", + "## Setup\n", + "\n", + "https://www.kaggle.com/c/house-prices-advanced-regression-techniques/data\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "#lendo os pacotes\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('train.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analysis" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Selecao por completude" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "#Criando um dataframe auxliar para analisar a consistencia das variaveis\n", + "cons = pd.DataFrame({'colunas' : df.columns,\n", + " 'tipo': df.dtypes,\n", + " 'missing' : df.isna().sum(),\n", + " 'size' : df.shape[0],\n", + " 'unicos': df.nunique()})\n", + "cons['percentual'] = round(cons['missing'] / cons['size'],2)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD4CAYAAADrRI2NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARnElEQVR4nO3de5BedX3H8fcHAuUiCpg1ZbgYrIgyXhBXxLFqFbGKldDWUhhpo8OYjlpHq22N1qn2NoPT8dqx1QjWaL1wqUjqtZiiTDsCBvGCIAURNNyyIoiCBcFv/3hOdE022ZNkz7Nsfu/XzM5zrs/5/tjw2fP8znl+J1WFJKkdu8x3AZKk8TL4JakxBr8kNcbgl6TGGPyS1JhF811AH4sXL66lS5fOdxmStKBcdtllP6iqiU2XL4jgX7p0KevWrZvvMiRpQUlyw0zL7eqRpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGLIhv7u6IpSs/Pd8ljN31p79gvkuQ9ADmGb8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGjNY8Cc5PMnXpv3cmeQ1SfZPckGSa7rX/YaqQZK0ucGCv6qurqojq+pI4EnA3cB5wEpgbVUdBqzt5iVJYzKurp5jge9U1Q3AMmB1t3w1cOKYapAkMb7gPxn4WDe9pKpu7qZvAZbMtEOSFUnWJVk3NTU1jholqQmDB3+S3YETgHM2XVdVBdRM+1XVqqqarKrJiYmJgauUpHaM44z/+cBXq+rWbv7WJAcAdK8bxlCDJKkzjuA/hV928wCsAZZ308uB88dQgySpM2jwJ9kbOA74xLTFpwPHJbkGeE43L0kak0Eftl5VdwEP3WTZbYzu8pEkzQO/uStJjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTFDP3N33yTnJvl2kquSPDXJ/kkuSHJN97rfkDVIkn7V0Gf87wI+V1WPBp4AXAWsBNZW1WHA2m5ekjQmgwV/kocAzwDOBKiqe6vqDmAZsLrbbDVw4lA1SJI2N+QZ/6HAFPCvSS5PckaSvYElVXVzt80twJKZdk6yIsm6JOumpqYGLFOS2jJk8C8CjgL+paqeCNzFJt06VVVAzbRzVa2qqsmqmpyYmBiwTElqy5DBvx5YX1WXdPPnMvpDcGuSAwC61w0D1iBJ2sRgwV9VtwDfT3J4t+hY4EpgDbC8W7YcOH+oGiRJm1s08Pu/CvhIkt2B64CXMvpjc3aS04AbgJMGrkGSNM2gwV9VXwMmZ1h17JDHlSRtmd/claTGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmEEfvZjkeuDHwP3AfVU1mWR/4CxgKXA9cFJV3T5kHZKkXxrHGf+zqurIqtr47N2VwNqqOgxY281LksZkPrp6lgGru+nVwInzUIMkNWvo4C/gP5NclmRFt2xJVd3cTd8CLBm4BknSNIP28QO/WVU3JnkYcEGSb09fWVWVpGbasftDsQLgkEMOGbhMSWrHoGf8VXVj97oBOA84Grg1yQEA3euGLey7qqomq2pyYmJiyDIlqSmDBX+SvZPss3EaeC5wBbAGWN5tthw4f6gaJEmbG7KrZwlwXpKNx/loVX0uyVeAs5OcBtwAnDRgDZKkTfQK/iSPq6pvbssbV9V1wBNmWH4bcOy2vJckae707er55ySXJnlFkocMWpEkaVC9gr+qng68GDgYuCzJR5McN2hlkqRB9L64W1XXAG8CXg88E3h3km8n+b2hipMkzb1ewZ/k8UneAVwFPBt4YVU9ppt+x4D1SZLmWN+7ev4JOAN4Y1X9dOPCqropyZsGqUySNIi+wf8C4KdVdT9Akl2AParq7qr68GDVSZLmXN8+/i8Ae06b36tbJklaYPoG/x5V9ZONM930XsOUJEkaUt/gvyvJURtnkjwJ+OlWtpckPUD17eN/DXBOkpuAAL8O/OFgVUmSBtMr+KvqK0keDRzeLbq6qn42XFmSpKFsyyBtT2b0nNxFwFFJqKoPDVKVJGkwfQdp+zDwG8DXGD04HUZP1zL4JWmB6XvGPwkcUVUzPi1LkrRw9L2r5wpGF3QlSQtc3zP+xcCVSS4F7tm4sKpOGKQqSdJg+gb/W4YsQpI0Pn1v5/xSkocDh1XVF5LsBew6bGmSpCH0HZb5ZcC5wPu6RQcCnxyqKEnScPpe3H0l8DTgTvjFQ1ke1mfHJLsmuTzJp7r5Q5NckuTaJGcl2X17CpckbZ++wX9PVd27cSbJIkb38ffxakYPcNnorcA7quqRwO3AaT3fR5I0B/oG/5eSvBHYs3vW7jnAf8y2U5KDGI3lf0Y3H0ZP7Tq322Q1cOK2Fi1J2n59g38lMAV8E/gT4DOMnr87m3cCfwn8vJt/KHBHVd3Xza9ndL1gM0lWJFmXZN3U1FTPMiVJs+l7V8/Pgfd3P70k+R1gQ1VdluS3trWwqloFrAKYnJz0G8OSNEf6jtXzXWbo06+qR2xlt6cBJyQ5HtgDeDDwLmDfJIu6s/6DgBu3uWpJ0nbblrF6NtoD+ANg/63tUFVvAN4A0J3x/3lVvTjJOcCLgI8Dy4Hzt7FmSdIO6NXHX1W3Tfu5sareyeii7fZ4PfDaJNcy6vM/czvfR5K0Hfp29Rw1bXYXRp8Aeo/lX1VfBL7YTV8HHN27QknSnOob3m+bNn0fcD1w0pxXI0kaXN+7ep41dCGSpPHo29Xz2q2tr6q3z005kqShbctdPU8G1nTzLwQuBa4ZoihJ0nD6Bv9BwFFV9WOAJG8BPl1Vpw5VmCRpGH2HbFgC3Dtt/t5umSRpgel7xv8h4NIk53XzJzIaYE2StMD0vavnH5J8Fnh6t+ilVXX5cGVJkobSt6sHYC/gzqp6F7A+yaED1SRJGlDfRy++mdFQC2/oFu0G/NtQRUmShtP3jP93gROAuwCq6iZgn6GKkiQNp2/w31tVRTc0c5K9hytJkjSkvsF/dpL3MRpL/2XAF9iGh7JIkh44Zr2rp3tO7lnAo4E7gcOBv66qCwauTZI0gFmDv6oqyWeq6nGAYS9JC1zfrp6vJnnyoJVIksai7zd3nwKcmuR6Rnf2hNGHgccPVZgkaRhbDf4kh1TV94DfHlM9kqSBzdbV80mAqroBeHtV3TD9Z2s7JtkjyaVJvp7kW0n+plt+aJJLklyb5Kwku89NUyRJfcwW/Jk2/YhtfO97gGdX1ROAI4HnJTkGeCvwjqp6JHA7cNo2vq8kaQfMFvy1helZ1chPutndup8Cng2c2y1fzWikT0nSmMx2cfcJSe5kdOa/ZzcNv7y4++Ct7ZxkV+Ay4JHAe4DvAHdU1X3dJuuBA7ew7wpgBcAhhxzSoymSpD62GvxVteuOvHlV3Q8cmWRf4DxGXwLru+8qYBXA5OTkNn3akCRt2bYMy7zdquoO4ELgqYyGfdj4B+cg4MZx1CBJGhks+JNMdGf6JNkTOA64itEfgBd1my0Hzh+qBknS5vp+gWt7HACs7vr5dwHOrqpPJbkS+HiSvwcuB84csAZJ0iYGC/6q+gbwxBmWXwccPdRxJUlbN5Y+fknSA4fBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0Z8mHrBye5MMmVSb6V5NXd8v2TXJDkmu51v6FqkCRtbsgz/vuA11XVEcAxwCuTHAGsBNZW1WHA2m5ekjQmgwV/Vd1cVV/tpn8MXAUcCCwDVnebrQZOHKoGSdLmxtLHn2Qp8ETgEmBJVd3crboFWLKFfVYkWZdk3dTU1DjKlKQmDB78SR4E/Dvwmqq6c/q6qiqgZtqvqlZV1WRVTU5MTAxdpiQ1Y9DgT7Ibo9D/SFV9olt8a5IDuvUHABuGrEGS9KuGvKsnwJnAVVX19mmr1gDLu+nlwPlD1SBJ2tyiAd/7acAfAd9M8rVu2RuB04Gzk5wG3ACcNGANkqRNDBb8VfXfQLaw+tihjitJ2jq/uStJjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTFDPmz9A0k2JLli2rL9k1yQ5Jrudb+hji9JmtmQZ/wfBJ63ybKVwNqqOgxY281LksZosOCvqouAH26yeBmwupteDZw41PElSTMbdx//kqq6uZu+BVgy5uNLUvPm7eJuVRVQW1qfZEWSdUnWTU1NjbEySdq5jTv4b01yAED3umFLG1bVqqqarKrJiYmJsRUoSTu7cQf/GmB5N70cOH/Mx5ek5g15O+fHgC8DhydZn+Q04HTguCTXAM/p5iVJY7RoqDeuqlO2sOrYoY4pSZqd39yVpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaM9hYPZI015au/PR8lzBW15/+gkHe1zN+SWqMwS9JjTH4JakxBr8kNcbgl6TGGPyS1BiDX5IaMy/38Sd5HvAuYFfgjKryoevaIa3d3w3D3eOtnd/Yz/iT7Aq8B3g+cARwSpIjxl2HJLVqPrp6jgaurarrqupe4OPAsnmoQ5KaNB9dPQcC3582vx54yqYbJVkBrOhmf5Lk6u083mLgB9u574KUt7bXZvw9t6KpNs/B7/jhMy18wI7VU1WrgFU7+j5J1lXV5ByUtGDY5jbY5p3fUO2dj66eG4GDp80f1C2TJI3BfAT/V4DDkhyaZHfgZGDNPNQhSU0ae1dPVd2X5E+BzzO6nfMDVfWtAQ+5w91FC5BtboNt3vkN0t5U1RDvK0l6gPKbu5LUGINfkhqz0wR/kucluTrJtUlWzrD+15Kc1a2/JMnS8Vc5t3q0+bVJrkzyjSRrk8x4T+9CMlubp233+0kqyYK+9a9Pe5Oc1P2ev5Xko+Ouca71+Hd9SJILk1ze/ds+fj7qnEtJPpBkQ5IrtrA+Sd7d/Tf5RpKjduiAVbXgfxhdJP4O8Ahgd+DrwBGbbPMK4L3d9MnAWfNd9xja/Cxgr2765S20udtuH+Ai4GJgcr7rHvh3fBhwObBfN/+w+a57DG1eBby8mz4CuH6+656Ddj8DOAq4Ygvrjwc+CwQ4BrhkR463s5zx9xkGYhmwups+Fzg2ScZY41ybtc1VdWFV3d3NXszoOxMLWd/hPv4OeCvwf+MsbgB92vsy4D1VdTtAVW0Yc41zrU+bC3hwN/0Q4KYx1jeIqroI+OFWNlkGfKhGLgb2TXLA9h5vZwn+mYaBOHBL21TVfcCPgIeOpbph9GnzdKcxOmNYyGZtc/cR+OCq2hmG6+zzO34U8Kgk/5Pk4m7k24WsT5vfApyaZD3wGeBV4yltXm3r/+9b9YAdskFzJ8mpwCTwzPmuZUhJdgHeDrxknksZp0WMunt+i9EnuouSPK6q7pjXqoZ1CvDBqnpbkqcCH07y2Kr6+XwXtlDsLGf8fYaB+MU2SRYx+oh421iqG0avoS+SPAf4K+CEqrpnTLUNZbY27wM8FvhikusZ9YWuWcAXePv8jtcDa6rqZ1X1XeB/Gf0hWKj6tPk04GyAqvoysAejwdt2ZnM61M3OEvx9hoFYAyzvpl8E/Fd1V00WqFnbnOSJwPsYhf5C7/uFWdpcVT+qqsVVtbSqljK6rnFCVa2bn3J3WJ9/159kdLZPksWMun6uG2eRc6xPm78HHAuQ5DGMgn9qrFWO3xrgj7u7e44BflRVN2/vm+0UXT21hWEgkvwtsK6q1gBnMvpIeC2jiygnz1/FO65nm/8ReBBwTncd+3tVdcK8Fb2DerZ5p9GzvZ8HnpvkSuB+4C+qasF+ku3Z5tcB70/yZ4wu9L5kgZ/EkeRjjP6AL+6uXbwZ2A2gqt7L6FrG8cC1wN3AS3foeAv8v5ckaRvtLF09kqSeDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUmP8HKu3LZnK+EasAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "cons.percentual.plot.hist( bins = 5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Removendo colunas com dados missing" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Contagem de colunas com ATÉ 20% de dados faltantes 76\n", + "Contagem de colunas com 0% de dados faltantes 63\n" + ] + } + ], + "source": [ + "print('Contagem de colunas com ATÉ 20% de dados faltantes', cons[cons.percentual < 0.2].shape[0])\n", + "print('Contagem de colunas com 0% de dados faltantes', cons[cons.percentual == 0].shape[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "int64 35\n", + "object 28\n", + "Name: tipo, dtype: int64" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cons[cons.percentual == 0]['tipo'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "cons['completa'] = ['completa' if x == 0 else 'faltante' for x in cons['percentual']]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "mantem = list(cons[cons['completa'] == 'completa']['colunas'])\n", + "df = df[mantem]" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "colunas_numericas = list(cons[((cons['tipo'] != 'object') &\n", + " (cons['completa'] == 'completa'))]['colunas'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploração" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Id\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSSubClass\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPEElEQVR4nO3dfaxkd13H8feHXZ6KsAhdkfSB27K12gSUdUGiFKOitKzb8qDYBiJK07URjIQYWR6C/cdk0QBag8IiDQ/yDIK72RIKxsA/QNmWQreU2qUssqW0pSRbhIZS+PrH/C5Mrzv3zt3OmTNz7/uVTPbM786995Nfpv3c3zlnzklVIUnSA/oOIEmaDRaCJAmwECRJjYUgSQIsBElSs7HvAPfHiSeeWAsLC33HkKS5cvXVV3+7qjYvHZ/rQlhYWODAgQN9x5CkuZLk68cad5eRJAmwECRJjYUgSQIsBElSM5eFkGRHkj1Hjx7tO4okrRlzWQhVta+qdm7atKnvKJK0ZsxlIUiSJs9CkCQBc/7BtPtjYdf+Y44f3r19ykkkaTa4QpAkARaCJKmZy0LwtFNJmry5LARPO5WkyZvLQpAkTZ6FIEkCLARJUmMhSJIAC0GS1FgIkiTAQpAkNXNZCH4wTZImby4LwQ+mSdLkzWUhSJImz0KQJAEWgiSpsRAkSYCFIElqLARJEmAhSJIaC0GSBFgIkqTGQpAkAbCx7wDHI8kOYMeWLVv6jrImLOzaf8zxw7u3TzmJpD7N5QrBaxlJ0uTNZSFIkibPQpAkARaCJKmxECRJgIUgSWosBEkSYCFIkhoLQZIEWAiSpMZCkCQBFoIkqbEQJEmAhSBJaiwESRIwp4WQZEeSPUePHu07iiStGXNZCN4PQZImby4LQZI0eRaCJAmwECRJjYUgSQIsBElSYyFIkgALQZLUWAiSJMBCkCQ1FoIkCbAQJEnNxr4D6Pgt7Np/zPHDu7dPOYmktcAVgiQJsBAkSY2FIEkCLARJUmMhSJIAC0GS1FgIkiTAQpAkNRaCJAmwECRJjYUgSQJm6FpGSZ4NbAceAbytqq7sOZIkrSudrhCSXJ7k9iQHl4yfk+TGJIeS7AKoqo9W1cXAJcAfdZlLkvT/db3L6O3AOcMDSTYAbwLOBc4CLkxy1tBLXtO+Lkmaok4Loao+DXxnyfBTgENVdXNV3QO8Dzg/A68DPlZV14z6mUl2JjmQ5MAdd9zRXXhJWmf6OKh8EvCNoedH2thfAM8A/iDJJaO+uar2VNW2qtq2efPmbpNK0joyMweVq+oy4LK+c0jSetXHCuEW4JSh5ye3MUlSj/oohM8DZyQ5LcmDgAuAvav5AUl2JNlz9OjRTgJK0nrU9Wmn7wU+A5yZ5EiSi6rqXuClwMeBG4APVNX1q/m5VbWvqnZu2rRp8qElaZ3q9BhCVV04YvwK4Iouf7e01i3s2n/M8cO7t085idYKL10hSQLGLIQkT+g6iCSpX+OuEP45yVVJ/jxJ7zvuPagsSZM3ViFU1dnACxicLnp1kvck+d1Oky2fx4PKkjRhYx9DqKqbGFxn6BXAbwKXJflKkud2FU6SND3jHkN4YpI3MjhN9LeBHVX1S237jR3mkyRNybinnf4T8K/Aq6rq7sXBqvpmktd0kkySNFXjFsJ24O6q+hFAkgcAD6mq71fVuzpLN0KSHcCOLVu2TPtXS9KaNW4hfJLBlUj/tz0/AbgS+PUuQq2kqvYB+7Zt23bxtH6nHwKStNaNe1D5IVW1WAa07RO6iSRJ6sO4hfC9JFsXnyT5VeDuZV4vSZoz4+4yehnwwSTfBAL8PN73WJLWlLEKoao+n+QXgTPb0I1V9cPuYknS/eexv9VZzdVOnwwstO/ZmoSqemcnqSRJUzdWISR5F/B44FrgR224gF4KwdNOZ5N/jUnzbdwVwjbgrKqqLsOMq4/TTjVdlos0feOeZXSQwYFkSdIaNe4K4UTgy0muAn6wOFhV53WSSpI0deMWwqVdhpAk9W/c004/leRxwBlV9ckkJwAbuo0m3X8ei5DGN+7lry8GPgS8pQ2dBHy0q1CSpOkb96DyS4DfAO6Cn9ws5+e6CrUSb6EpSZM3biH8oKruWXySZCODzyH0wltoStLkjVsIn0ryKuCh7V7KHwT2dRdLkjRt4xbCLuAO4Drgz4ArGNxfWZK0Rox7ltGPgbe2hyRpDRr3WkZf4xjHDKrq9IknkiT1YjXXMlr0EOAPgUdNPo4kqS9jHUOoqjuHHrdU1T8AfrJHktaQcXcZbR16+gAGK4bV3EtBkjTjxv2f+uuHtu8FDgPPn3iaMXk/BEmavHHPMvqtroOshvdDkKTJG3eX0cuX+3pVvWEycSRJfVnNWUZPBva25zuAq4CbugglSZq+cQvhZGBrVX0XIMmlwP6qemFXwSRJ0zXupSseA9wz9PyeNiZJWiPGXSG8E7gqyUfa82cD7+gmkiSpD+OeZfS3ST4GnN2G/rSqvtBdLEnStI27ywjgBOCuqvpH4EiS0zrKJEnqwbi30Pwb4BXAK9vQA4F/6yqUJGn6xl0hPAc4D/geQFV9E3h4V6EkSdM3biHcU1VFuwR2kod1F0mS1IdxzzL6QJK3AI9McjHwYnq8WY7XMpK0Hizs2n/M8cO7u7nY9IorhCQB3g98CPgwcCbw2qr6p04SjaGq9lXVzk2bNvUVQZLWnBVXCFVVSa6oqicAn5hCJklSD8Y9hnBNkid3mkSS1KtxjyH8GvDCJIcZnGkUBouHJ3YVTJI0XcsWQpJTq+p/gGdOKY8kqScrrRA+yuAqp19P8uGqet40QkmSpm+lYwgZ2j69yyCSpH6tVAg1YluStMastMvol5PcxWCl8NC2DT89qPyITtNJkqZm2UKoqg3TCiJJ6tdqLn8tSVrDLARJEmAhSJIaC0GSBFgIkqRm3GsZzRTvh6ClRl03XtL45nKF4P0QJGny5rIQJEmTZyFIkgALQZLUWAiSJMBCkCQ1FoIkCbAQJEnNXH4wTWuDHyaTZosrBEkSYCFIkhoLQZIEWAiSpMZCkCQBFoIkqbEQJEmAhSBJaiwESRJgIUiSGgtBkgR4LaO54DV/JE2DKwRJEmAhSJIaC0GSBMxQISQ5Pcnbknyo7yyStB51WghJLk9ye5KDS8bPSXJjkkNJdgFU1c1VdVGXeSRJo3W9Qng7cM7wQJINwJuAc4GzgAuTnNVxDknSCjo97bSqPp1kYcnwU4BDVXUzQJL3AecDXx7nZybZCewEOPXUUyeWVZK6NuoU8sO7t085ybH1cQzhJOAbQ8+PACcleXSSNwNPSvLKUd9cVXuqaltVbdu8eXPXWSVp3ZiZD6ZV1Z3AJX3nkKT1qo8Vwi3AKUPPT25jkqQe9VEInwfOSHJakgcBFwB7V/MDkuxIsufo0aOdBJSk9ajr007fC3wGODPJkSQXVdW9wEuBjwM3AB+oqutX83Oral9V7dy0adPkQ0vSOtX1WUYXjhi/Ariiy98tSVqdmfmksiSpXxaCJAmY00LwoLIkTd5cFoIHlSVp8uayECRJk2chSJIAC0GS1MxlIXhQWZImby4LwYPKkjR5c1kIkqTJsxAkSYCFIElqLARJEmAhSJKauSwETzuVpMmby0LwtFNJmry5LARJ0uRZCJIkwEKQJDUWgiQJsBAkSc1cFoKnnUrS5M1lIXjaqSRN3lwWgiRp8iwESRJgIUiSGgtBkgRYCJKkxkKQJAEWgiSp2dh3gOORZAewY8uWLX1HWbWFXftHfu3w7u1TTCJJ9zWXKwQ/mCZJkzeXhSBJmjwLQZIEWAiSpMZCkCQBFoIkqbEQJEmAhSBJaiwESRIAqaq+Mxy3JHcAX+/wV5wIfLvDn98FM3dv3vKCmadlXjI/rqo2Lx2c60LoWpIDVbWt7xyrYebuzVteMPO0zGPmYe4ykiQBFoIkqbEQlren7wDHwczdm7e8YOZpmcfMP+ExBEkS4ApBktRYCJIkwEIAIMkpSf4ryZeTXJ/kL9v4pUluSXJtezyr76zDkhxOcl3LdqCNPSrJJ5Lc1P792b5zLkpy5tBcXpvkriQvm7V5TnJ5ktuTHBwaO+a8ZuCyJIeSfCnJ1hnK/PdJvtJyfSTJI9v4QpK7h+b7zTOSd+T7IMkr2xzfmOSZ0867TOb3D+U9nOTaNt77HB+Xqlr3D+CxwNa2/XDgv4GzgEuBv+o73zK5DwMnLhn7O2BX294FvK7vnCOybwC+BTxu1uYZeDqwFTi40rwCzwI+BgR4KvC5Gcr8e8DGtv26ocwLw6+bobzHfB+0/xa/CDwYOA34KrBhFjIv+frrgdfOyhwfz8MVAlBVt1bVNW37u8ANwEn9pjpu5wPvaNvvAJ7dY5bl/A7w1arq8pPmx6WqPg18Z8nwqHk9H3hnDXwWeGSSx04n6U8dK3NVXVlV97annwVOnnauUUbM8SjnA++rqh9U1deAQ8BTOgs3wnKZkwR4PvDeqYaaMAthiSQLwJOAz7Whl7Yl9+WztPulKeDKJFcn2dnGHlNVt7btbwGP6Sfaii7gvv/xzPI8w+h5PQn4xtDrjjCbf0y8mMFKZtFpSb6Q5FNJzu4r1DEc630wD3N8NnBbVd00NDarczyShTAkyc8AHwZeVlV3Af8CPB74FeBWBkvCWfK0qtoKnAu8JMnTh79Yg7XrzJ1XnORBwHnAB9vQrM/zfczqvI6S5NXAvcC729CtwKlV9STg5cB7kjyir3xD5up9sMSF3PcPnFmd42VZCE2SBzIog3dX1b8DVNVtVfWjqvox8FZ6WKYup6puaf/eDnyEQb7bFndZtH9v7y/hSOcC11TVbTD789yMmtdbgFOGXndyG5sJSf4E+H3gBa3IaLte7mzbVzPYJ/8LvYVslnkfzPocbwSeC7x/cWxW53glFgI/2f/3NuCGqnrD0PjwvuDnAAeXfm9fkjwsycMXtxkcQDwI7AVe1F72IuA/+km4rPv8NTXL8zxk1LzuBf64nW30VODo0K6lXiU5B/hr4Lyq+v7Q+OYkG9r26cAZwM39pPypZd4He4ELkjw4yWkM8l417XzLeAbwlao6sjgwq3O8or6Pas/CA3gag10AXwKubY9nAe8Crmvje4HH9p11KPPpDM68+CJwPfDqNv5o4D+Bm4BPAo/qO+uS3A8D7gQ2DY3N1DwzKKtbgR8y2F990ah5ZXB20ZsY/AV4HbBthjIfYrDvffE9/eb22ue198y1wDXAjhnJO/J9ALy6zfGNwLmzMsdt/O3AJUte2/scH8/DS1dIkgB3GUmSGgtBkgRYCJKkxkKQJAEWgiSpsRAkSYCFIElq/g8RBMP2SqaLQAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LotArea\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQIUlEQVR4nO3df+wkd13H8eeLllKKeLT0xKbt8W1pU7wElONLhQgYjULL5fit9iKBYNMToUZiTLgC0f5jUk0ArRboIQ2CWFp+2kuPFKoImhD6Awu0lKNHPeQKUiDxDpFQWt7+sXPj9vzu3X7bm53v7D4fyeY789nZ3fd8bndfN/OZnUlVIUkSwCP6LkCStHYYCpKklqEgSWoZCpKklqEgSWod23cBD8fJJ59cS0tLfZchSYNy6623freq1q9036BDYWlpiVtuuaXvMiRpUJJ8fdJ97j6SJLUGGQpJtiTZsX///r5LkaS5MshQqKqdVbVt3bp1fZciSXNlkKEgSeqGoSBJahkKkqTWIEPBgWZJ6sYgQ8GBZknqxqB/vNaFpe3Xr9i+97LNM65EkmZvkFsKkqRuDDIUHFOQpG4MMhQcU5CkbgwyFCRJ3TAUJEktQ0GS1DIUJEmtQYaCRx9JUjcG+eO1qtoJ7FxeXr5oVq856Udt4A/bJM2PQW4pSJK6YShIklqGgiSpZShIklqGgiSpNchQ8JBUSerGIEPBE+JJUjcGGQqSpG4YCpKklqEgSWoZCpKklqEgSWoZCpKk1iBDwd8pSFI3BhkK/k5BkroxyFCQJHXDUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVJrkKHgCfEkqRuDDAVPiCdJ3RhkKEiSumEoSJJax/ZdQF+Wtl/fdwmStOa4pSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJaq2ps6QmeTGwGfhp4N1V9YmeS5KkhdL5lkKSq5Lcm+T2Q9rPS7I7yZ4k2wGq6mNVdRHwGuC3uq5NkvRgs9h99B7gvPGGJMcAVwDnAxuBrUk2ji3y5uZ+SdIMdb77qKo+k2TpkOZzgT1VdTdAkg8AL0pyJ3AZ8PGq+vxKz5dkG7ANYMOGDV2VvSqTLtiz97LNM65Ekh6evgaaTwW+MTa/r2n7feDXgJcnec1KD6yqHVW1XFXL69ev775SSVoga2qguaouBy7vuw5JWlR9bSncA5w+Nn9a0zaVJFuS7Ni/f/9RL0ySFllfoXAzcHaSM5IcB1wAXDftg6tqZ1VtW7duXWcFStIimsUhqVcDnwXOSbIvyYVVdT9wMXADcCdwbVXd0XUtkqTDm8XRR1sntO8CdnX9+pKk6Q3yNBeOKUhSNwYZCo4pSFI3BhkKkqRuGAqSpNYgQ8ExBUnqxiBDwTEFSerGIENBktQNQ0GS1DIUJEmtQYaCA82S1I1BhoIDzZLUjUGGgiSpG1OFQpKndF2IJKl/024pvD3JTUlem8R9NpI0p6YKhap6DvDbjK6WdmuSv0/y651WdhgONEtSN6YeU6iqu4A3A28Afhm4PMlXkry0q+IOU4sDzZLUgWnHFJ6a5G2MrpL2q8CWqvq5ZvptHdYnSZqhaa+89lfA3wBvrKofHmysqm8meXMnlUmSZm7aUNgM/LCqHgBI8gjg+Kr6n6p6X2fVSZJmatoxhRuBR4/Nn9C0SZLmyLShcHxV/ffBmWb6hG5KkiT1ZdpQ+EGSTQdnkjwd+OFhlu+Uh6RKUjemDYXXAx9M8i9J/hW4Bri4u7IOz0NSJakbUw00V9XNSZ4MnNM07a6qH3dXliSpD9MefQTwDGCpecymJFTVezupSpLUi6lCIcn7gCcBtwEPNM0FGAqSNEem3VJYBjZWVXVZjCSpX9MONN8O/GyXhUiS+jftlsLJwJeT3AT86GBjVb2wk6okSb2YNhQu7bIISdLaMO0hqZ9O8kTg7Kq6MckJwDHdljZZki3AlrPOOquvEiRpLk176uyLgA8BVzZNpwIf66qoI/HHa5LUjWkHml8H/BJwANoL7vxMV0VJkvoxbSj8qKruOziT5FhGv1OQJM2RaUPh00neCDy6uTbzB4Gd3ZUlSerDtKGwHfgO8CXgd4FdjK7XLEmaI9MeffQT4F3NTZI0p6Y999G/s8IYQlWdedQrmiNL269fsX3vZZtnXIkkTWc15z466HjgN4CTjn45kqQ+TTWmUFXfG7vdU1V/AfjfXUmaM9PuPto0NvsIRlsOq7kWgyRpAKb9Yn/L2PT9wF7gN496NZKkXk179NGvdF3IanjuI0nqxrS7j/7wcPdX1VuPTjnTqaqdwM7l5eWLZvm6kjTvVnP00TOA65r5LcBNwF1dFCVJ6se0oXAasKmqvg+Q5FLg+qp6RVeFSZJmb9rTXDwBuG9s/r6mTZI0R6bdUngvcFOSjzbzLwb+tpuSJEl9mfbooz9N8nHgOU3Tq6vq37orS5LUh2l3HwGcAByoqr8E9iU5o6OaJEk9mfZynH8CvAG4pGl6JPB3XRUlSerHtFsKLwFeCPwAoKq+CTy2q6IkSf2YNhTuq6qiOX12ksd0V5IkqS/ThsK1Sa4EHpfkIuBGvOCOJM2dIx59lCTANcCTgQPAOcAfV9UnO65NkjRjRwyFqqoku6rqKYBBIElzbNrdR59P8oxOK5Ek9W7aXzT/IvCKJHsZHYEURhsRT+2qMEnS7B02FJJsqKr/AJ4/o3oWwtL261ds33uZVziV1K8jbSl8jNHZUb+e5MNV9bJZFCVJ6seRxhQyNn1ml4UkOTPJu5N8qMvXkSRNdqRQqAnTU0lyVZJ7k9x+SPt5SXYn2ZNkO0BV3V1VF672NSRJR8+RQuHnkxxI8n3gqc30gSTfT3Jgiud/D3DeeEOSY4ArgPOBjcDWJBsfQu2SpKPssGMKVXXMw3nyqvpMkqVDms8F9lTV3QBJPgC8CPjyNM+ZZBuwDWDDhg0PpzxJ0iFWc+rso+VU4Btj8/uAU5M8Psk7gacluWTlh0JV7aiq5apaXr9+fde1StJCmfZ3Cp2rqu8Br+m7DklaZH1sKdwDnD42f1rTJknqWR+hcDNwdpIzkhwHXABct5onSLIlyY79+/d3UqAkLapOQyHJ1cBngXOS7EtyYVXdD1wM3ADcCVxbVXes5nmramdVbVu3bt3RL1qSFlinYwpVtXVC+y5gV5evLUlavT52Hz1s7j6SpG4MMhTcfSRJ3RhkKEiSumEoSJJagwwFxxQkqRuDDAXHFCSpG4MMBUlSNwwFSVLLUJAktQYZCg40S1I3BhkKDjRLUjcGGQqSpG4YCpKklqEgSWoZCpKk1pq5RvNqJNkCbDnrrLP6LmUmlrZfv2L73ss2z7gSSfNukFsKHn0kSd0YZChIkrphKEiSWoaCJKllKEiSWoaCJKk1yFDwhHiS1I1BhoKHpEpSNwYZCpKkbhgKkqSWoSBJahkKkqSWoSBJahkKkqSWp85eQyadInu1yx+tU2p7ym5p8QxyS8HfKUhSNwYZCpKkbhgKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJanlCvAXiCe4kHckgtxQ8IZ4kdWOQoSBJ6oahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqGQqSpJahIElqrZkrryV5DPB24D7gn6vq/T2XJEkLp9MthSRXJbk3ye2HtJ+XZHeSPUm2N80vBT5UVRcBL+yyLknSyrreffQe4LzxhiTHAFcA5wMbga1JNgKnAd9oFnug47okSSvodPdRVX0mydIhzecCe6rqboAkHwBeBOxjFAy3cZiwSrIN2AawYcOGo1/0HFjafv2aWh5g72WbV/Vck5afB4u2zg9lfRetjw5n1n3Rx0DzqfzfFgGMwuBU4CPAy5K8A9g56cFVtaOqlqtqef369d1WKkkLZs0MNFfVD4BX912HJC2yPrYU7gFOH5s/rWmbWpItSXbs37//qBYmSYuuj1C4GTg7yRlJjgMuAK5bzRNU1c6q2rZu3bpOCpSkRdX1IalXA58FzkmyL8mFVXU/cDFwA3AncG1V3dFlHZKk6XR99NHWCe27gF1dvrYkafUGeZoLxxQkqRuDDAXHFCSpG4MMBUlSN1JVfdfwkCX5DvD1VT7sZOC7HZQzL+yfyeybyeybydZi3zyxqlb89e+gQ+GhSHJLVS33XcdaZf9MZt9MZt9MNrS+cfeRJKllKEiSWosYCjv6LmCNs38ms28ms28mG1TfLNyYgiRpskXcUpAkTWAoSJJaCxUKE64NPZeS7E3ypSS3JbmlaTspySeT3NX8PbFpT5LLm375YpJNY8/zqmb5u5K8aqz96c3z72kem9mv5XRWulb4LPpi0musJRP65tIk9zTvnduSvGDsvkua9dyd5Plj7St+tpqzIX+uab+mOTMySR7VzO9p7l+azRpPL8npST6V5MtJ7kjyB037fL93qmohbsAxwNeAM4HjgC8AG/uuq8P13QucfEjbnwPbm+ntwJ810y8APg4EeCbwuab9JODu5u+JzfSJzX03Ncumeez5fa/zYfriucAm4PZZ9sWk11hLtwl9cynwRyssu7H53DwKOKP5PB1zuM8WcC1wQTP9TuD3munXAu9spi8Arum7L1ZY31OATc30Y4GvNn0w1++d3jt+hv/AzwJuGJu/BLik77o6XN+9/P9Q2A2c0kyfAuxupq8Eth66HLAVuHKs/cqm7RTgK2PtD1puLd6ApUO++Drvi0mvsdZuK/TNpawcCg/6zDA6/f2zJn22mi+67wLHNu3tcgcf20wf2yyXvvviCP30D8Cvz/t7Z5F2H026NvS8KuATSW5Nsq1pe0JVfauZ/k/gCc30pL45XPu+FdqHZBZ9Mek1huDiZhfIVWO7LlbbN48H/qtG11AZb3/QczX372+WX5Oa3VtPAz7HnL93FikUFs2zq2oTcD7wuiTPHb+zRv8F8XhkZtMXA+vvdwBPAn4B+Bbwln7L6VeSnwI+DLy+qg6M3zeP751FCoWHfW3oIamqe5q/9wIfBc4Fvp3kFIDm773N4pP65nDtp63QPiSz6ItJr7GmVdW3q+qBqvoJ8C5G7x1Yfd98D3hckmMPaX/QczX3r2uWX1OSPJJRILy/qj7SNM/1e2eRQuFhXxt6KJI8JsljD04DzwNuZ7S+B498eBWjfaQ07a9sjp54JrC/2XS9AXhekhObXQjPY7RP+FvAgSTPbI6WeOXYcw3FLPpi0musaQe/jBovYfTegdH6XNAcOXQGcDajgdIVP1vN/3A/Bby8efyh/Xywb14O/FOz/JrR/Hu+G7izqt46dtd8v3f6HryZ5Y3R0QFfZXSkxJv6rqfD9TyT0REgXwDuOLiujPbZ/iNwF3AjcFLTHuCKpl++BCyPPdfvAHua26vH2pcZfVl8Dfhr1vAgIXA1o90gP2a03/bCWfTFpNdYS7cJffO+Zt2/yOjL6ZSx5d/UrOduxo44m/TZat6LNzV99kHgUU378c38nub+M/vuixX65tmMdtt8Ebitub1g3t87nuZCktRapN1HkqQjMBQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU+l9RQRp0g1/0VwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OverallQual\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAANdElEQVR4nO3dfYxm5V3G8e8F24YX7dYKoi7YAYcsbmwrONRaUo0tjdR1AF8LEdMYAhLb2qqJXRqi/UezJtoXTK1dAaEvgpRWZIVaSjXlHyIstLG8lEDaBRaobEtcKpJuaX/+MWdldrsvz8I5zz1n5vtJJnvOPTPPXDnJ7DX3uc9zTqoKSZIOaR1AkrQ0WAiSJMBCkCR1LARJEmAhSJI6q1oHeCGOOuqompmZaR1Dkkblzjvv/HpVHb3n+KgLYWZmhi1btrSOIUmjkuShvY2P8pRRkvkkm3bs2NE6iiQtG6MshKraXFUXrl69unUUSVo2RlkIkqT+WQiSJMBCkCR1LARJEmAhSJI6FoIkCRjpG9OSzAPzs7OzraNomZnZcONex7duXD/lJNL0jXKG4PsQJKl/o5whSMuNMxMtBaOcIUiS+mchSJIAC0GS1LEQJEmAhSBJ6niVkaS98sqnlccZgiQJsBAkSZ1RFoKP0JSk/o2yELx1hST1b5SFIEnqn4UgSQIsBElSx0KQJAEWgiSpYyFIkgALQZLUsRAkSYCFIEnqWAiSJMBCkCR1LARJEmAhSJI6oywEb38tSf0bZSF4+2tJ6t8oC0GS1D8LQZIEWAiSpI6FIEkCYFXrAJK02MyGG/c6vnXj+iknWXmcIUiSAAtBktSxECRJgIUgSepYCJIkwEKQJHUsBEkSYCFIkjoWgiQJsBAkSR0LQZIEWAiSpI6FIEkCvNupGtrXXS3BO1tKLThDkCQBS2iGkORsYD3wEuDyqrq5cSRJWlEGnSEkuSLJE0nu3mP8jCT3J3kwyQaAqrq+qi4ALgLePGQuSdL3GvqU0ZXAGYsHkhwKfBB4E7AOODfJukVfckn3eUnSFA1aCFV1K/DkHsOvBh6sqq9U1U7gGuCsLPgL4NNVddeQuSRJ36vFovIa4JFF+9u6sbcDpwO/nuSifX1zkguTbEmyZfv27cMmlaQVZMksKlfVpcClE3zdJmATwNzcXA2dS5JWihYzhEeB4xbtH9uNSZIaalEIdwAnJjk+yYuBc4AbGuSQJC0y9GWnVwO3AWuTbEtyflU9C7wN+AxwH3BtVd1zkK87n2TTjh07+g8tSSvUoGsIVXXuPsZvAm56Aa+7Gdg8Nzd3wfN9DUnS7rx1hSQJsBAkSZ1RFoJrCJLUv1EWQlVtrqoLV69e3TqKJC0boywESVL/LARJEmAhSJI6oywEF5UlqX+jLAQXlSWpf6MsBElS/ywESRJgIUiSOhaCJAkYaSF4lZEk9W+UheBVRpLUv1EWgiSpfxaCJAmwECRJnYkKIckrhg4iSWpr0hnC3yS5PcnvJXElV5KWoYkKoapeB/wWcBxwZ5J/SPLGQZPth5edSlL/Jl5DqKoHgEuAdwE/D1ya5MtJfnWocPvJ4mWnktSzSdcQXpnkfcB9wOuB+ar6iW77fQPmkyRNyaoJv+6vgcuAd1fVM7sGq+qxJJcMkkySNFWTFsJ64Jmq+g5AkkOAw6rqf6vqo4OlkyRNzaSFcAtwOvA/3f4RwM3Aa4cIJUlLycyGG/c6vnXj+iknGdaki8qHVdWuMqDbPmKYSJKkFiYthKeTnLJrJ8lPA8/s5+slSSMz6SmjdwKfSPIYEOCHgTcPluoAkswD87Ozs60iSNKyM1EhVNUdSU4C1nZD91fVt4eLdcA8m4HNc3NzF7TKIEnLzaQzBIBTgZnue05JQlV9ZJBUkqSpm6gQknwU+HHgi8B3uuECLARJWiYmnSHMAeuqqoYMI0lqZ9KrjO5mYSFZkrRMTTpDOAq4N8ntwLd2DVbVmYOkkiRN3aSF8J4hQ0iS2pv0stPPJ3k5cGJV3ZLkCODQYaNJkqZp0ttfXwBcB3y4G1oDXD9UKEnS9E26qPxW4DTgKfj/h+X80FChJEnTN2khfKuqdu7aSbKKhfchNOEjNCWpf5MWwueTvBs4vHuW8ieAzcPF2j8foSlJ/Zu0EDYA24EvAb8L3MTC85UlScvEpFcZfRf4u+5DkrQMTXovo6+ylzWDqjqh90SSpCYO5l5GuxwG/Abwsv7jSJJamWgNoaq+sejj0ap6P7C8HiYqSSvcpKeMTlm0ewgLM4aDeZaCJGmJm/Q/9b9atP0ssBX4zd7TSJKamfQqo18YOogkqa1JTxn94f4+X1Xv7SeOJKmVg7nK6FTghm5/HrgdeGCIUJKk6Zu0EI4FTqmqbwIkeQ9wY1WdN1QwSdJ0TXrrimOAnYv2d3ZjkqRlYtIZwkeA25P8U7d/NnDVMJEkSS1MepXRnyX5NPC6buh3quoLw8XavyTzwPzs7GyrCMvKzIYb9zq+daPvPZRWkklPGQEcATxVVR8AtiU5fqBMB+TtryWpf5M+QvNPgXcBF3dDLwI+NlQoSdL0TTpD+BXgTOBpgKp6DPj+oUJJkqZv0kXlnVVVSQogyZEDZpIkMf31vUlnCNcm+TDw0iQXALfgw3IkaVk54AwhSYB/BE4CngLWAn9SVZ8dOJskaYoOWAjdqaKbquoVgCUgScvUpKeM7kpy6qBJJElNTbqo/DPAeUm2snClUViYPLxyqGCSpOnabyEk+bGqehj4xSnlkSQ1cqAZwvUs3OX0oSSfrKpfm0YoSdL0HWgNIYu2TxgyiCSprQMVQu1jW5K0zBzolNGrkjzFwkzh8G4bnltUfsmg6SRJU7PfQqiqQ6cVRJLU1sHc/lqStIxZCJIkwEKQJHUsBEkSYCFIkjoWgiQJsBAkSZ0lUwhJTkhyeZLrWmeRpJVo0EJIckWSJ5Lcvcf4GUnuT/Jgkg0AVfWVqjp/yDySpH0beoZwJXDG4oEkhwIfBN4ErAPOTbJu4BySpAMYtBCq6lbgyT2GXw082M0IdgLXAGdN+ppJLkyyJcmW7du395hWkla2FmsIa4BHFu1vA9Yk+cEkfwucnOTifX1zVW2qqrmqmjv66KOHzipJK8akj9AcXFV9A7iodQ5JWqlazBAeBY5btH9sNyZJaqhFIdwBnJjk+CQvBs4BbjiYF0gyn2TTjh07BgkoSSvR0JedXg3cBqxNsi3J+VX1LPA24DPAfcC1VXXPwbxuVW2uqgtXr17df2hJWqEGXUOoqnP3MX4TcNOQP1uSdHCWzDuVJUltjbIQXEOQpP6NshBcQ5Ck/o2yECRJ/bMQJEmAhSBJ6oyyEFxUlqT+jbIQXFSWpP6NshAkSf2zECRJgIUgSepYCJIkYKSF4FVGktS/URaCVxlJUv9GWQiSpP5ZCJIkwEKQJHUsBEkSYCFIkjqjLAQvO5Wk/o2yELzsVJL6N8pCkCT1z0KQJAEWgiSpYyFIkgALQZLUsRAkScBIC8H3IUhS/0ZZCL4PQZL6N8pCkCT1z0KQJAEWgiSpYyFIkgALQZLUsRAkSYCFIEnqWAiSJMBCkCR1VrUO8HwkmQfmZ2dnW0c5aDMbbtzn57ZuXD/FJJK0u1HOELx1hST1b5SFIEnqn4UgSQIsBElSx0KQJAEWgiSpYyFIkgALQZLUsRAkSYCFIEnqWAiSJMBCkCR1LARJEmAhSJI6K/b21/u6DbW3oJa0Uo1yhuDtryWpf6MsBElS/ywESRJgIUiSOhaCJAmwECRJHQtBkgRYCJKkjoUgSQIgVdU6w/OWZDvwUOscL9BRwNdbh1giPBa783jszuPxnBd6LF5eVUfvOTjqQlgOkmypqrnWOZYCj8XuPB6783g8Z6hj4SkjSRJgIUiSOhZCe5taB1hCPBa783jszuPxnEGOhWsIkiTAGYIkqWMhSJIAC6GJJMcl+fck9ya5J8k7WmdaCpIcmuQLSf6ldZbWkrw0yXVJvpzkviQ/2zpTK0n+oPs9uTvJ1UkOa51pmpJckeSJJHcvGntZks8meaD79wf6+FkWQhvPAn9UVeuA1wBvTbKucaal4B3Afa1DLBEfAP61qk4CXsUKPS5J1gC/D8xV1U8ChwLntE01dVcCZ+wxtgH4XFWdCHyu23/BLIQGqurxqrqr2/4mC7/sa9qmaivJscB64LLWWVpLshr4OeBygKraWVX/3TZVU6uAw5OsAo4AHmucZ6qq6lbgyT2GzwKu6ravAs7u42dZCI0lmQFOBv6jbZLm3g/8MfDd1kGWgOOB7cDfd6fQLktyZOtQLVTVo8BfAg8DjwM7qurmtqmWhGOq6vFu+2vAMX28qIXQUJLvAz4JvLOqnmqdp5Ukvww8UVV3ts6yRKwCTgE+VFUnA0/T0ymBsenOjZ/FQkn+KHBkkvPaplpaauG9A728f8BCaCTJi1gog49X1ada52nsNODMJFuBa4DXJ/lY20hNbQO2VdWuWeN1LBTESnQ68NWq2l5V3wY+Bby2caal4L+S/AhA9+8TfbyohdBAkrBwfvi+qnpv6zytVdXFVXVsVc2wsGD4b1W1Yv8KrKqvAY8kWdsNvQG4t2Gklh4GXpPkiO735g2s0AX2PdwAvKXbfgvwz328qIXQxmnAb7Pwl/AXu49fah1KS8rbgY8n+U/gp4A/b5yniW6WdB1wF/AlFv7PWlG3sEhyNXAbsDbJtiTnAxuBNyZ5gIVZ1MZefpa3rpAkgTMESVLHQpAkARaCJKljIUiSAAtBktSxECRJgIUgSer8Hxk5fl8wkim/AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OverallCond\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD6CAYAAABOIFvoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQvUlEQVR4nO3db7CcZX3G8e8F0UKoxlaitQkYLEw0o7bgEdpaHeufNjQGrLVKRvvCYYhMxcH6oo3WUfuiMzjTqrVFawoU/0IR1CFNFLRVaWcokCAdwUhNMUrAlihtEGSM4K8vdvNwTE+SPXDu8+zmfD8zGXbvc87uNZmQK/effZ5UFZIkARzRdwBJ0viwFCRJHUtBktSxFCRJHUtBktSxFCRJHUtBktRZ1HeAx+LYY4+tFStW9B1DkibKtm3bvldVS2f62tiUQpJnAecDxwL/VFUfOtTPrFixgq1btzbPJkmHkyTfPtDXmi4fJbkkyT1Jbt1vfHWS25PsSLIBoKq2V9W5wGuAF7TMJUmaWes9hUuB1dMHkhwJXAicDqwC1iVZNfzaGcBmYEvjXJKkGTQthaq6Drh3v+FTgR1VdUdV7QUuB84cfv/VVXU68LoDvWaS9Um2Jtm6e/fuVtElaUHqY09hGXDntOe7gNOSvBh4FfAzHGSmUFUbgY0AU1NTXs1PkubQ2Gw0V9WXgS/3HEOSFrQ+PqdwF3DctOfLh2OSpJ71UQo3ASclOSHJ44GzgKtn8wJJ1ibZuGfPniYBJWmhan0k9TLgemBlkl1Jzq6qh4DzgGuA7cAVVXXbbF63qjZV1folS5bMfWhJWsCa7ilU1boDjG/BY6c6jK3YsPmAX9t5wZp5TCLNzkRe+8jlI0lqYyJLweUjSWpjIktBktSGpSBJ6kxkKbinIEltTGQpuKcgSW1MZClIktqwFCRJnYksBfcUJKmNiSwF9xQkqY2JLAVJUhuWgiSpYylIkjqWgiSpM5Gl4OkjSWpjIkvB00eS1MZEloIkqQ1LQZLUsRQkSR1LQZLUmchS8PSRJLUxkaXg6SNJamMiS0GS1IalIEnqWAqSpI6lIEnqWAqSpI6lIEnqWAqSpM5EloIfXpOkNiayFPzwmiS1MZGlIElqw1KQJHUsBUlSZ1HfASTNjxUbNs84vvOCNfOcROPMmYIkqWMpSJI6loIkqWMpSJI6loIkqePpI0m9ONBpKPBEVJ8mcqbgtY8kqY2JLAWvfSRJbUxkKUiS2rAUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1BmrO68leSWwBngicHFVXdtzJElaUJrPFJJckuSeJLfuN746ye1JdiTZAFBVn62qc4Bzgde2ziZJ+mnzsXx0KbB6+kCSI4ELgdOBVcC6JKumfcs7hl+XJM2j5qVQVdcB9+43fCqwo6ruqKq9wOXAmRl4D/C5qrp5ptdLsj7J1iRbd+/e3Ta8JC0wfW00LwPunPZ813DszcDLgFcnOXemH6yqjVU1VVVTS5cubZ9UkhaQsdporqoPAB/oO4ckLVR9zRTuAo6b9nz5cGwkSdYm2bhnz545DyZJC1lfpXATcFKSE5I8HjgLuHrUH66qTVW1fsmSJc0CStJCNB9HUi8DrgdWJtmV5Oyqegg4D7gG2A5cUVW3tc4iSTq45nsKVbXuAONbgC2t31+SNLqJvMyFewqS1MZEloJ7CpLUxkSWgiSpjbH6nIIk9WnFhs0zju+8YM08J+nPRM4U3FOQpDYmshTcU5CkNiayFCRJbYxUCkme0zqIJKl/o84UPpjkxiR/mKT3NRv3FCSpjZFKoapeCLyOwUXstiX5ZJKXN0128DzuKUhSAyMfSa2qbyZ5B7CVweWtT04S4O1V9elWAaXpPDIotTXqnsJzk7yPwcXrXgKsrapnDR+/r2E+SdI8GnWm8NfARQxmBQ/uG6yqu4ezB0nSYWDUUlgDPFhVDwMkOQI4qqp+WFUfa5ZOkjSvRj199EXg6GnPFw/HeuHpI0lqY9RSOKqq7t/3ZPh4cZtIh+bpI0lqY9RSeCDJKfueJHke8OBBvl+SNIFG3VN4C/CpJHcDAX4BeG2zVJKkXoxUClV1U5JnAiuHQ7dX1Y/bxZIk9WE291N4PrBi+DOnJKGqPtoklSSpFyOVQpKPAb8E3AI8PBwuoJdSSLIWWHviiSf28faSdNgadaYwBayqqmoZZlRVtQnYNDU1dU7fWSTpcDLq6aNbGWwuS5IOY6POFI4Fvp7kRuBH+war6owmqSRJvRi1FN7dMoQkaTyMeiT1K0meDpxUVV9Mshg4sm00SdJ8G/XS2ecAVwIfHg4tAz7bKpQkqR+jbjS/CXgBcB8MbrgDPKVVKElSP0YthR9V1d59T5IsYvA5BUnSYWTUUvhKkrcDRw/vzfwpYFO7WAfnpbMlqY1RS2EDsBv4GvBGYAvQ2x3XvHS2JLUx6umjnwB/N/wlSTpMjXrto28xwx5CVT1jzhNJknozm2sf7XMU8PvAz899HElSn0ZdPvr+fkPvT7INeOfcR9K4WLFh84zjOy9YM89JJM2XUZePTpn29AgGM4fZ3ItBkjQBRv2L/S+nPX4I2Am8Zs7TSJJ6Nery0W+2DiJJ6t+oy0dvPdjXq+q9cxNHkjTdfO/tzeb00fOBq4fP1wI3At9sEUqS1I9RS2E5cEpV/QAgybuBzVX1+lbBJEnzb9TLXDwV2Dvt+d7hWC+89pEktTFqKXwUuDHJu4ezhBuAjzRLdQhe+0iS2hj19NGfJ/kc8MLh0Buq6qvtYkmS+jDqTAFgMXBfVf0VsCvJCY0ySZJ6MurtON8F/AnwtuHQ44CPtwolSerHqDOF3wXOAB4AqKq7gSe0CiVJ6seopbC3qorh5bOTHNMukiSpL6OWwhVJPgw8Kck5wBfxhjuSdNg55OmjJAH+AXgmcB+wEnhnVX2hcTZJ0jw7ZClUVSXZUlXPASwCSTqMjbp8dHOS5zdNIknq3ajXPjoNeH2SnQxOIIXBJOK5rYJJkubfQUshyfFV9R3gt+cpjySpR4eaKXyWwdVRv53kqqr6vfkIJUnqx6H2FDLt8TNaBpEk9e9QpVAHeCxJOgwdavnol5Pcx2DGcPTwMTyy0fzEpukkSfPqoKVQVUfOV5AkzwD+FFhSVa+er/eVJD1iNpfOnrUklyS5J8mt+42vTnJ7kh1JNgBU1R1VdXbLPJKkg2taCsClwOrpA0mOBC4ETgdWAeuSrGqcQ5I0gqalUFXXAffuN3wqsGM4M9gLXA6cOeprJlmfZGuSrbt3757DtJKk1jOFmSwD7pz2fBewLMmTk/wtcHKSt838o1BVG6tqqqqmli5d2jqrJC0oo17mormq+j5wbt85JGkh62OmcBdw3LTny4djI0uyNsnGPXv2zGkwSVro+iiFm4CTkpyQ5PHAWcDVs3mBqtpUVeuXLFnSJKAkLVStj6ReBlwPrEyyK8nZVfUQcB5wDbAduKKqbmuZQ5I0mqZ7ClW17gDjW4AtLd9bkjR7fSwfPWbuKUhSGxNZCu4pSFIbE1kKkqQ2LAVJUmciS8E9BUlqYyJLwT0FSWpjIktBktSGpSBJ6kxkKbinIEltTGQpuKcgSW1MZClIktqwFCRJHUtBktSxFCRJnbG5HedsJFkLrD3xxBP7jjKnVmzYPOP4zgvWzHMSSQvVRM4UPH0kSW1MZClIktqwFCRJHUtBktSxFCRJnYksBa99JEltTGQpePpIktqYyFKQJLVhKUiSOpaCJKljKUiSOpaCJKljKUiSOpaCJKkzkaXgh9ckqY2JLAU/vCZJbUxkKUiS2rAUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1JnIUvAyF5LUxkSWgpe5kKQ2JrIUJEltWAqSpI6lIEnqWAqSpI6lIEnqWAqSpI6lIEnqWAqSpI6lIEnqWAqSpI6lIEnqWAqSpI6lIEnqWAqSpI6lIEnqLOo7wD5JjgE+COwFvlxVn+g5kiQtOE1nCkkuSXJPklv3G1+d5PYkO5JsGA6/Criyqs4BzmiZS5I0s9bLR5cCq6cPJDkSuBA4HVgFrEuyClgO3Dn8tocb55IkzaBpKVTVdcC9+w2fCuyoqjuqai9wOXAmsItBMTTPJUmaWR97Cst4ZEYAgzI4DfgA8DdJ1gCbDvTDSdYD6wGOP/74Rx1ixYbNM47vvGDNo35NSZp0Y7PRXFUPAG8Y4fs2AhsBpqamqnUuSVpI+limuQs4btrz5cMxSVLP+iiFm4CTkpyQ5PHAWcDVs3mBJGuTbNyzZ0+TgJK0ULU+knoZcD2wMsmuJGdX1UPAecA1wHbgiqq6bTavW1Wbqmr9kiVL5j60JC1gTfcUqmrdAca3AFtavrckafYm8uiny0eS1MZEloLLR5LUxkSWgiSpDUtBktRJ1eR+/ivJbuDbj/LHjwW+N4dx5oq5ZsdcszOuuWB8sx2OuZ5eVUtn+sJEl8JjkWRrVU31nWN/5podc83OuOaC8c220HK5fCRJ6lgKkqTOQi6FjX0HOABzzY65Zmdcc8H4ZltQuRbsnoIk6f9byDMFSdJ+FlwpHOi+0X1LclySLyX5epLbkpzfdyaAJEcluTHJvw9z/VnfmaZLcmSSryb5x76z7JNkZ5KvJbklyda+8+yT5ElJrkzyjSTbk/zaGGRaOfx92vfrviRv6TsXQJI/Gv6ZvzXJZUmO6jsTQJLzh5lua/F7teCWj5K8CLgf+GhVPbvvPPskeRrwtKq6OckTgG3AK6vq6z3nCnBMVd2f5HHAvwLnV9W/9ZlrnyRvBaaAJ1bVK/rOA4NSAKaqaqzOtif5CPAvVXXR8LL1i6vqf/vOtc/w/u13AadV1aP9/NFcZVnG4M/6qqp6MMkVwJaqurTnXM9mcAvjU4G9wOeBc6tqx1y9x4KbKRzgvtG9q6rvVtXNw8c/YHBZ8WX9poIauH/49HHDX2PxL4kky4E1wEV9Zxl3SZYALwIuBqiqveNUCEMvBf6z70KYZhFwdJJFwGLg7p7zADwLuKGqfji8DcFXgFfN5RssuFKYBElWACcDN/SbZGC4RHMLcA/whaoai1zA+4E/Bn7Sd5D9FHBtkm3De4qPgxOA3cDfD5fbLkpyTN+h9nMWcFnfIQCq6i7gL4DvAN8F9lTVtf2mAuBW4IVJnpxkMfA7/PSdLB8zS2HMJPlZ4CrgLVV1X995AKrq4ar6FQa3Tj11OIXtVZJXAPdU1ba+s8zgN6rqFOB04E3DJcu+LQJOAT5UVScDDwAb+o30iOFy1hnAp/rOApDk54AzGZTpLwLHJHl9v6mgqrYD7wGuZbB0dAvw8Fy+h6UwRoZr9lcBn6iqT/edZ3/D5YYvAav7zgK8ADhjuH5/OfCSJB/vN9LA8F+ZVNU9wGcYrP/2bRewa9os70oGJTEuTgdurqr/7jvI0MuAb1XV7qr6MfBp4Nd7zgRAVV1cVc+rqhcB/wP8x1y+vqUwJoYbuhcD26vqvX3n2SfJ0iRPGj4+Gng58I1+U0FVva2qllfVCgbLDv9cVb3/Sy7JMcODAgyXZ36LwZS/V1X1X8CdSVYOh14K9HqIYT/rGJOlo6HvAL+aZPHw/82XMtjn612Spwz/ezyD/YRPzuXrN70d5zga3jf6xcCxSXYB76qqi/tNBQz+5fsHwNeG6/cAbx/eurRPTwM+MjwZcgSDe2qPzfHPMfRU4DODv0dYBHyyqj7fb6TOm4FPDJdq7gDe0HMeoCvPlwNv7DvLPlV1Q5IrgZuBh4CvMj6fbL4qyZOBHwNvmusDAwvuSKok6cBcPpIkdSwFSVLHUpAkdSwFSVLHUpAkdSwFSVLHUpAkdSwFSVLn/wA9xRT47pU+7QAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "YearBuilt\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARzElEQVR4nO3de9BcdX3H8fcHULkoDyBI5WagIJbWG0Z0prW0WgVlIipaQRkdZIiX2kFbW6MyLZ1OO2ittRSspmoFtVBQq2TAInSqzDjIJcgd0XBRArSgdIJSSwS//WPPc1gyeZJ9kpw9u0/er5mdnP2d3X2+Ocnu5/ld9pxUFZIkAWzTdwGSpMlhKEiSWoaCJKllKEiSWoaCJKm1Xd8FbI7dd9+9Fi1a1HcZkjRVVq5c+eOq2mN9+6Y6FBYtWsTVV1/ddxmSNFWS/HCufQ4fSZJahoIkqWUoSJJahoIkqTWVoZBkSZLla9as6bsUSVpQpjIUqmpFVS2dmZnpuxRJWlCmMhQkSd0wFCRJran+8pokLQSLll045747TztqjJXYU5AkDTEUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktT50tSWOyoVNkTwp7CpKklqEgSWoZCpKk1kTNKSR5DXAUsDPwmar6Rs8lSdJWpfOeQpLPJrkvyY3rtB+Z5NYkq5IsA6iqr1bVScA7gDd2XZsk6fHGMXz0OeDI4YYk2wJnAq8EDgGOS3LI0ENOafZLksao81CoqsuAB9ZpPgxYVVW3V9Va4Fzg6Ax8GPh6VV3TdW2SpMfra6J5b+Cuofurm7Y/BH4PeH2Sd6zviUmWJrk6ydX3339/95VK0lZkoiaaq+p04PSNPGY5sBxg8eLFNY66JGlr0VdP4W5g36H7+zRtkqQe9RUKVwEHJdk/yROBY4ELeqpFktQYx5LUc4DLgYOTrE5yYlU9ArwbuBi4BTivqm6ax2suSbJ8zZo13RQtSVupzucUquq4OdovAi7axNdcAaxYvHjxSZtTmyTp8TzNhSSpZShIklpTGQrOKUhSN6YyFKpqRVUtnZmZ6bsUSVpQpjIUJEndMBQkSS1DQZLUmspQcKJZkroxUSfEG5VfXpM0yRYtu7DvEjbZVIaCpPmb64PqztOOGnMlmmRTOXwkSeqGoSBJahkKkqTWVM4pJFkCLDnwwAP7LkVSwzmLhWEqewqe5kKSujGVPQVJC5u9jv4YCpLmZZrX4GvjpnL4SJLUDUNBktRy+EiSNmJrmuOYylBwSao0N8f8tTmmMhQ8IZ605WxNvwVr45xTkCS1DAVJUstQkCS1pnJOQdL0cM5iuthTkCS17ClI0gQbd09rKkPB7ylI08/vU0ymqQwFv6ew8DkO3T8/tDduIR4j5xQkSS1DQZLUMhQkSa2pnFOQtDDHs9U/Q0GSGgatw0eSpCGGgiSpZShIklrOKUgTwvFsTYKp7CkkWZJk+Zo1a/ouRZIWlKkMhapaUVVLZ2Zm+i5FkhaUqQwFSVI3nFPQguAJ9KQtw56CJKllKEiSWg4fSZvBYavp5PLfuRkKGgs/PKXpMNLwUZJnd12IJKl/o84pfCLJlUnelcQvB0jSAjVSKFTVS4A3A/sCK5P8S5KXd1qZJGnsRl59VFU/AE4B3g8cDpye5HtJXtdVcZKk8RppojnJc4ATgKOAS4AlVXVNkr2Ay4GvdFei9BhXjUjdGnX10T8AnwY+WFU/n22sqnuSnNJJZZKksRs1FI4Cfl5VjwIk2QbYvqr+t6o+31l1kjTEpc3dG3VO4VJgh6H7OzZtkqQFZNRQ2L6qfjZ7p9nesZuSNs7rKUhSN0YNhYeSHDp7J8kLgJ9v4PGd8noKktSNUecU3gOcn+QeIMCvAG/srCpNtIUwrrsQ/g5SF0YKhaq6KsmzgIObplur6hfdlSVJ6sN8Toj3QmBR85xDk1BVZ3dSlTRh/H7EZLPnt+WM+uW1zwO/ClwLPNo0F2AoSNICMmpPYTFwSFVVl8VIC8WGehb+9qpJNurqoxsZTC5LkhawUXsKuwM3J7kSeHi2sape3UlVkqRejBoKp3ZZhCRpMoy6JPVbSZ4BHFRVlybZEdi229IkSeM26uU4TwK+BHyqadob+GpXRUmS+jHqRPMfAL8JPAjtBXee1lVRkqR+jBoKD1fV2tk7SbZj8D0FSdICMmoofCvJB4Edmmsznw+s6K4sSVIfRg2FZcD9wA3A24GLGFyvWZK0gIy6+uiXwD81N0mbwfMoaZKNeu6jO1jPHEJVHbDFK5Ik9WY+5z6atT3wBmC3LV+OJKlPI80pVNVPhm53V9XHAc/qJUkLzKjDR4cO3d2GQc9hPtdikCRNgVE/2P92aPsR4E7g97d4NZKkXo26+uh3uy5EktS/UYeP/mhD+6vqY5tbSJIDgA8BM1X1+s19PQlc/inN16hfXlsMvJPBifD2Bt4BHAo8pbmtV5LPJrkvyY3rtB+Z5NYkq5IsA6iq26vqxE35S0iStoxR5xT2AQ6tqp8CJDkVuLCqjt/I8z4HnMHQtZyTbAucCbwcWA1cleSCqrp5fqVLkra0UUNhT2Dt0P21TdsGVdVlSRat03wYsKqqbgdIci5wNDBSKCRZCiwF2G+//UZ5ihpzDaV4zWBJs0YdPjobuDLJqU0v4QrgrE38mXsDdw3dXw3sneSpST4JPD/JB+Z6clUtr6rFVbV4jz322MQSJEnrM+rqo79K8nXgJU3TCVX13S1ZSFX9hMFchSSpJ6P2FAB2BB6sqr8HVifZfxN/5t3AvkP392naJEk9G3VJ6p8zWIF0MPDPwBOALzC4Gtt8XQUc1ITK3cCxwJvm8wJJlgBLDjzwwE348dLcXMKqrd2oPYXXAq8GHgKoqnvYwFLUWUnOAS4HDk6yOsmJVfUI8G7gYuAW4Lyqumk+RVfViqpaOjMzM5+nSZI2YtTVR2urqpIUQJKdRnlSVR03R/tFDC7UI0maIKP2FM5L8ilglyQnAZfiBXckacHZaE8hSYB/BZ4FPMhgXuHPquqSjmuTJI3ZRkOhGTa6qKqeDUxEEDjRvHA4sasu+f9r/kYdPromyQs7rWQenGiWpG6MOtH8IuD4JHcyWIEUBp2I53RVmCRp/DYYCkn2q6ofAUeMqR5JUo821lP4KoOzo/4wyZer6phxFCVJ6sfG5hQytH1Al4XMR5IlSZavWbOm71IkaUHZWCjUHNu9cqJZkrqxseGj5yZ5kEGPYYdmGx6baN650+okSWO1wVCoqm3HVYgkqX/zOXW2JGmBMxQkSa2pDAVXH0lSN6YyFFx9JEndmMpQkCR1w1CQJLUMBUlSy1CQJLUMBUlSa9TrKUwUr7w2mbzKlTT9prKn4JJUSerGVIaCJKkbhoIkqWUoSJJahoIkqWUoSJJahoIkqeX3FEa0oTX4d5521NjqkKQuTWVPwe8pSFI3pjIUJEndMBQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU8jQXU2CuU2x4eg1JW9pU9hQ8zYUkdWMqQ0GS1A1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU8noKHZrvdRDmenzXvF6DpFlT2VPwegqS1I2pDAVJUjcMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLW267uAWUl2Aj4BrAW+WVVf7LkkSdrqdNpTSPLZJPcluXGd9iOT3JpkVZJlTfPrgC9V1UnAq7usS5K0fl0PH30OOHK4Icm2wJnAK4FDgOOSHALsA9zVPOzRjuuSJK1Hp8NHVXVZkkXrNB8GrKqq2wGSnAscDaxmEAzXsoGwSrIUWAqw3377bXJti5ZduMnP7fK1toQtVc+k/b0kda+Piea9eaxHAIMw2Bv4CnBMkn8EVsz15KpaXlWLq2rxHnvs0W2lkrSVmZiJ5qp6CDih7zokaWvWR0/hbmDfofv7NG2SpJ71EQpXAQcl2T/JE4FjgQvm8wJJliRZvmbNmk4KlKStVddLUs8BLgcOTrI6yYlV9QjwbuBi4BbgvKq6aT6vW1UrqmrpzMzMli9akrZiXa8+Om6O9ouAi7r82ZKk+fM0F5Kk1lSGgnMKktSNqQwF5xQkqRupqr5r2GRJ7gd+2HMZuwM/7rmG+bDebllvt6atXpjMmp9RVev99u9Uh8IkSHJ1VS3uu45RWW+3rLdb01YvTF/NUzl8JEnqhqEgSWoZCptved8FzJP1dst6uzVt9cKU1eycgiSpZU9BktQyFCRJLUNhHeu7rnSS5yX5TpJrk1yd5LCmfSbJiiTXJbkpyQlDz3lrkh80t7f2UPNzk1ye5Iamxp2H9n2guT72rUmOGGpf37Wze603ycuTrGzaVyZ56dBzXtC0r0pyepL0Xe/Q/v2S/CzJ+4baJu74Nvue0+y7qdm/fdM+ccc3yROSnNW035LkA0PPGdfx3TfJfya5uTlmJzftuyW5pHnPX5Jk16Y9zfFbleT6JIcOvdbYPidGVlXehm7AbwOHAjcOtX0DeGWz/Srgm832B4EPN9t7AA8ATwR2A25v/ty12d51zDVfBRzebL8N+Mtm+xDgOuBJwP7AbcC2ze024IDm73AdcMgE1Pt8YK9m+zeAu4eecyXwYiDA12f/jfqsd2j/l4Dzgfc19yf1+G4HXA88t7n/VGDbST2+wJuAc5vtHYE7gUVjPr5PBw5ttp8CfL95X30EWNa0L+Oxz4ZXNccvzfG8omkf6+fEqDd7CuuoqssYfLg/rhmY/c1qBrhnqP0pzW9QT26e9whwBHBJVT1QVf8DXAIcOeaanwlc1mxfAhzTbB/N4E31cFXdAaxicN3s9trZVbUWmL12dq/1VtV3q2r2eN8E7JDkSUmeDuxcVd+pwTvsbOA1fdcLkOQ1wB1NvbMm8vgCrwCur6rrmuf+pKoeneDjW8BOSbYDdgDWAg8y3uN7b1Vd02z/lMElAPZuft5ZzcPO4rHjdTRwdg18B9ilOb5j/ZwYlaEwmvcAf5PkLuCjwGyX9Qzg1xiExA3AyVX1S+a+DvU43cRjb4o38NjV7uaqre+a56p32DHANVX1MIPaVg/tm4h6kzwZeD/wF+s8flKP7zOBSnJxkmuS/GnTPpHHl0EP7CHgXuBHwEer6gF6Or5JFjHozV4B7FlV9za7/gvYs9me1PfcehkKo3kn8N6q2hd4L/CZpv0I4FpgL+B5wBnrji336G3Au5KsZNDFXdtzPRuzwXqT/DrwYeDtPdS2PnPVeyrwd1X1s74Km8Nc9W4H/Bbw5ubP1yZ5WT8lPs5c9R4GPMrgPbc/8MdJDuijwOYXgC8D76mqB4f3Nb2rqVzv3+lFdhaQtwInN9vnA59utk8ATmv+A6xKcgfwLAbXnP6doefvA3xzLJU2qup7DIYGSPJM4Khm14aukd3btbM3UC9J9gH+DXhLVd3WNN/d1DhrUup9EfD6JB8BdgF+meT/gJVM5vFdDVxWVT9u9l3EYHz/C0zm8X0T8O9V9QvgviTfBhYz+I17bMc3yRMYBMIXq+orTfN/J3l6Vd3bDA/d17TP9Z7r/XNifewpjOYe4PBm+6XAD5rtHwEvA0iyJ3Awg8mii4FXJNm1WYHwiqZtbJI8rflzG+AU4JPNrguAY5tx+f2BgxhMKG72tbO7qDfJLsCFDCbwvj37+Kab/mCSFzdzOm8BvtZ3vVX1kqpaVFWLgI8Df11VZzChx5fB/8tnJ9mxGac/HLh5Uo8vg/fcS5t9OzGYuP0eYzy+zfH4DHBLVX1saNcFDH6BpPnza0Ptb2lWIb0YWNMc394/J9ar75nuSbsB5zAYr/wFg9+iTmTQrV7JYEXDFcALmsfuxWBl0g3AjcDxQ6/zNgaTuKuAE3qo+WQGqyK+D5xG8+315vEfYrBS41aGVpQwWCXx/WbfhyahXgYfCA8xGKabvT2t2be4Oe63MZjfSd/1rvO8U2lWH03q8W0efzyDMfwbgY8MtU/c8WWwoOP8pt6bgT/p4fj+FoOhoeuH/k++isHKrf9g8EvjpcBuzeMDnNnUdQOweOi1xvY5MerN01xIkloOH0mSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWv8P4m7P8H+asjsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "YearRemodAdd\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQY0lEQVR4nO3df6xkZX3H8fcHkAAqK8iilh9d6CJ2E7TigibVam1VlKxorT9Qo1ECNWqjtbaiJa1JY6KmVUtro7SioFWqtrVsxFLoD0nqD34JuIDIQkEXaUGxu/4grOC3f8yzZXrdu3fu7pyZOXPfr+Tmnnlm5uz3uTN3P/c5z3POpKqQJGmvaRcgSZoNBoIkCTAQJEmNgSBJAgwESVKzz7QL2BOHHHJIrVmzZtplSFKvXHXVVd+tqtUL23sdCGvWrOHKK6+cdhmS1CtJbt9Zu4eMJEmAgSBJagwESRLQ00BIsiHJOVu3bp12KZI0N3oZCFW1sarOWLVq1bRLkaS50ctAkCSNn4EgSQIMBElS0+sT0/bEmjM/v9P229598oQrkaTZ4AhBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkpoV+wE5krSn5u2DthwhSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1M3NxuyQvAE4GDgQ+UlX/POWSJGlF6XSEkOTcJHcl2bSg/aQkNyXZnORMgKr6XFWdDrwOeGmXdUmSflbXh4w+Bpw03JBkb+CDwHOBdcCpSdYNPeSsdr8kaYI6DYSqugy4Z0HzicDmqrq1qrYDFwCnZOA9wBeq6urF9pnkjCRXJrny7rvv7q54SVphpjGpfBjw7aHbW1rbbwO/Dvxmktct9uSqOqeq1lfV+tWrV3dbqSStIDMzqVxVZwNnT7sOSdpTff0ktWmMEO4Ajhi6fXhrkyRN0TQC4QrgmCRHJdkXeBlw4RTqkCQN6XrZ6aeALwPHJtmS5LSquh94I3AxcCPw6aq6vss6JElL63QOoapOXaT9IuCi3d1vkg3AhrVr1+7uLiRJC/Ty0hVVtbGqzli1atW0S5GkudHLQJAkjZ+BIEkCDARJUmMgSJKAngZCkg1Jztm6deu0S5GkudHLQHCVkSSNXy8DQZI0fjNzcTtJmnezftE7RwiSJMBAkCQ1BoIkCTAQJElNLwPB8xAkafx6GQiehyBJ49fLQJAkjZ/nIUjqvVlf398XjhAkSYCBIElqDARJEmAgSJKaXgaC5yFI0vj1MhA8D0GSxq+XgSBJGj8DQZIEGAiSpMYzlSXNLc9gXh5HCJIkwECQJDUGgiQJcA5Bkpa02FzEvOnlCMEzlSVp/HoZCJ6pLEnj18tAkCSNn3MIktSslLmCxThCkCQBIwZCkuO6LkSSNF2jjhD+MsnlSV6fxJlcSZpDIwVCVT0NeAVwBHBVkk8meVanlUmSJmrkOYSquhk4C3gb8HTg7CTfSPIbXRUnSZqcUecQHp/k/cCNwDOBDVX1i237/R3WJ0makFGXnf458NfAO6rq3h2NVfWdJGd1UpkkaaJGDYSTgXur6gGAJHsB+1XVj6vq451VJ0mamFHnEC4F9h+6fUBrkyTNiVFHCPtV1Q933KiqHyY5oKOalpRkA7Bh7dq10ypBksZmVj7ZbdQRwo+SHL/jRpInAffu4vGd8uJ2kjR+o44Q3gx8Jsl3gACPBl7aWVWSpIkbKRCq6ookjwOObU03VdVPuitLkjRpy7na6QnAmvac45NQVed3UpUkaeJGCoQkHwd+AbgGeKA1F2AgSALGNzE6KxOsK9GoI4T1wLqqqi6LkSRNz6irjDYxmEiWJM2pUUcIhwA3JLkcuG9HY1U9v5OqJEkTN2ogvLPLIiRJ0zfqstMvJvl54JiqurSdpbx3t6VJkiZp1Mtfnw58FvhwazoM+FxXRUmSJm/USeU3AL8MbIP/+7CcQ7sqSpI0eaMGwn1VtX3HjST7MDgPQZI0J0adVP5ikncA+7fPUn49sLG7sjRPPNFIO7PY+0LTM2ognAmcBnwd+C3gIgafoDYVXv5ae8qQkn7WqKuMfgr8VfuauqraCGxcv3796dOuRZLmxajXMvpPdjJnUFVHj70iSRIw+ZHscq5ltMN+wIuBg8dfjiRpWkZaZVRV3xv6uqOqPgB4sFWS5sioh4yOH7q5F4MRw3I+S0GSNONG/U/9T4e27wduA14y9mrUKVfWSNqVUVcZ/WrXhUiSpmvUQ0Zv2dX9VfW+8ZSjlWRXJyY5apEmbzmrjE4ALmy3NwCXAzd3UZQkafJGDYTDgeOr6gcASd4JfL6qXtlVYdI0OM8yu7zURfdGvbjdo4DtQ7e3tzZJ0pwYdYRwPnB5kn9ot18AnNdNSZKkaRh1ldG7knwBeFprek1Vfa27siRJkzbqISOAA4BtVfVnwJYkR3VUkyRpCkb9CM0/At4GvL01PQT4RFdFSZImb9QRwguB5wM/Aqiq7wAP76ooSdLkjTqpvL2qKkkBJHlohzVJmiMuF+2PUUcIn07yYeARSU4HLmVGPixHkjQeS44QkgT4W+BxwDbgWOAPq+qSjmuTJE3QkoHQDhVdVFXHAYaApN7zMNbOjXrI6OokJ3RaiSRpqkadVH4y8MoktzFYaRQGg4fHd1WYJGmydhkISY6sqm8Bz5lQPSNJsgHYsHbt2mmXognry8Xn+lKnNGypQ0afA6iq24H3VdXtw1/dl7dzVbWxqs5YtWrVtEqQpLmzVCBkaPvoLguRJE3XUnMItci2dlOfPiXMwx7SyrJUIDwhyTYGI4X92zY8OKl8YKfVSZImZpeBUFV7T6oQLW4l/qXuOnFp8pZz+WtJ0hwzECRJgIEgSWpGPVNZ6iXnIqTROUKQJAEGgiSp8ZCRNIKVuPRXK48jBEkSYCBIkhoDQZIEOIcwU/qyRNLj6bNtXK+Pr/PK4whBkgQYCJKkxkNGe8hhtVaavhza1PI5QpAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMYT03ps1k6K84Qlqd8cIUiSAANBktQYCJIkwECQJDVOKmsuJoPnoQ/T4s9OOzhCkCQBMxQISY5O8pEkn512LZK0EnUaCEnOTXJXkk0L2k9KclOSzUnOBKiqW6vqtC7rkSQtrusRwseAk4YbkuwNfBB4LrAOODXJuo7rkCQtodNJ5aq6LMmaBc0nApur6laAJBcApwA3jLLPJGcAZwAceeSRY6t1KU68aWd8X2ieTGMO4TDg20O3twCHJXlkkg8BT0zy9sWeXFXnVNX6qlq/evXqrmuVpBVjZpadVtX3gNdNuw5JWqmmMUK4Azhi6PbhrU2SNEXTCIQrgGOSHJVkX+BlwIVTqEOSNKTrZaefAr4MHJtkS5LTqup+4I3AxcCNwKer6vou65AkLa3rVUanLtJ+EXDR7u43yQZgw9q1a3d3F51z9Ym6ttz3mO9JLWVmzlRejqraWFVnrFq1atqlSNLc6GUgSJLGz0CQJAEGgiSpMRAkScAMnam8HH1YZTRNriaRtDt6OUJwlZEkjV8vA0GSNH4GgiQJMBAkSY2BIEkCDARJUuOyU2mCFlsSfNu7T17W46Uu9HKE4LJTSRq/XgaCJGn8DARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKnpZSAk2ZDknK1bt067FEmaG70MBE9Mk6Tx62UgSJLGz0CQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBPgRmj/DjyzUNPi+0yzo5QjBM5Ulafx6GQiSpPEzECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBkKqadg27LcndwO27+fRDgO+OsZxpmpe+zEs/wL7Mqnnpy5724+eravXCxl4Hwp5IcmVVrZ92HeMwL32Zl36AfZlV89KXrvrhISNJEmAgSJKalRwI50y7gDGal77MSz/AvsyqeelLJ/1YsXMIkqT/byWPECRJQwwESRIwR4GQ5NwkdyXZNNT2hCRfTvL1JBuTHNja1yS5N8k17etDQ895Unv85iRnJ8ks96Xd9/h23/Xt/v362Jckrxh6Ta5J8tMkv9TTvjwkyXmt/cYkbx96zklJbmp9OXPG+7Fvko+29muTPGPoObPwmhyR5N+S3NDe/29q7QcnuSTJze37Qa09rdbNSa5LcvzQvl7dHn9zklf3oC+Pa6/ZfUneumBfu/ceq6q5+AJ+BTge2DTUdgXw9Lb9WuCP2/aa4cct2M/lwFOAAF8AnjvjfdkHuA54Qrv9SGDvPvZlwfOOA27p8evycuCCtn0AcFt73+0N3AIcDewLXAusm+F+vAH4aNs+FLgK2GuGXpPHAMe37YcD3wTWAe8FzmztZwLvadvPa7Wm1f7V1n4wcGv7flDbPmjG+3IocALwLuCtQ/vZ7ffY3IwQquoy4J4FzY8FLmvblwAv2tU+kjwGOLCqvlKDn+z5wAvGXetSltmXZwPXVdW17bnfq6oHetqXYacCF0BvX5cCHppkH2B/YDuwDTgR2FxVt1bVdgZ9PKXr2octsx/rgH9tz7sL+B9g/Qy9JndW1dVt+wfAjcBhDH6m57WHnTdU2ynA+TXwFeARrS/PAS6pqnuq6vsMfgYnTbAry+5LVd1VVVcAP1mwq91+j81NICzieh78QbwYOGLovqOSfC3JF5M8rbUdBmwZesyW1jYLFuvLY4FKcnGSq5P8fmvvY1+GvRT4VNvuY18+C/wIuBP4FvAnVXUPg7q/PfT8WenLYv24Fnh+kn2SHAU8qd03c69JkjXAE4GvAo+qqjvbXf8FPKptL/bzn6nXZcS+LGa3+zLvgfBa4PVJrmIwBNve2u8EjqyqJwJvAT6ZoWPyM2qxvuwDPBV4Rfv+wiS/Np0SR7ZYXwBI8mTgx1W1aWdPnjGL9eVE4AHg54CjgN9NcvR0ShzJYv04l8F/KFcCHwC+xKBfMyXJw4C/A95cVduG72sjmN6sr59mX/bpasezoKq+weCQCkkeC5zc2u8D7mvbVyW5hcFf2ncAhw/t4vDWNnWL9YXBL+tlVfXddt9FDI4Pf4L+9WWHl/Hg6AD6+bq8HPinqvoJcFeS/wDWM/jLbXhENBN92cXvyv3A7+x4XJIvMTi2/X1m5DVJ8hAG/4H+TVX9fWv+7ySPqao72yGhu1r7Hez8538H8IwF7f/eZd07s8y+LGaxPi5prkcISQ5t3/cCzgI+1G6vTrJ32z4aOAa4tQ3LtiV5Slsx8SrgH6dS/AKL9QW4GDguyQHtePXTgRt62pcdbS+hzR/A4Ngq/evLt4BntvseymAC8xsMJm+PSXJUkn0ZhN+Fk657oV38rhzQ6ifJs4D7q2pm3l/t3/4IcGNVvW/orguBHSuFXj1U24XAq9pqo6cAW1tfLgaeneSgtorn2a1tYnajL4vZ/ffYJGfRu/xi8BflnQwmWLYApwFvYvDXzDeBd/PgmdkvYnDM9BrgamDD0H7WA5sYzNL/xY7nzGpf2uNf2fqzCXhvz/vyDOArO9lPr/oCPAz4THtdbgB+b2g/z2uPvwX4gxnvxxrgJgYTnJcyuGzyLL0mT2VwCOW69vt8Tfv5PhL4F+DmVvfB7fEBPthq/jqwfmhfrwU2t6/X9KAvj26v3zYGk/1bGEz07/Z7zEtXSJKAOT9kJEkanYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1/wsJv26J6BeongAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BsmtFinSF1\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQHUlEQVR4nO3df7AdZX3H8ffHgCBqI0i0TgADDYPNVFrTSO1UW2urBuMVa2kL1aljGVLb2qnjHzWoY+0fncFO6w8crKbqoFZBpEqJxEGoVv6xQFBUEJGIoSbaJkq9oOMY0W//OHvXQ8hNTpK7d+/e+37NnLm7zzl3z/eZ2dxPdp9nd1NVSJIE8Ii+C5AkLRyGgiSpZShIklqGgiSpZShIklpH9V3AkTjxxBNr1apVfZchSYNy6623fqeqVuzvvUGGQpIpYGr16tVs27at73IkaVCS3Dvbe4M8fVRVW6pq4/Lly/suRZIWlUGGgiSpG4aCJKllKEiSWoMMhSRTSTZPT0/3XYokLSqDDAUHmiWpG4MMBUlSNwwFSVJrkBevzYVVm67db/uOizfMcyWStHB4pCBJag0yFJx9JEndGGQoOPtIkroxyFCQJHXDUJAktQwFSVLLUJAktQwFSVJrkKHglFRJ6sYgQ8EpqZLUjUGGgiSpG4aCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoMMBS9ek6RuDDIUvHhNkroxyFCQJHXDUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVJrkKHgDfEkqRuDDAVviCdJ3RhkKEiSumEoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJaR/VdwFK0atO1+23fcfGGea5Ekh7KIwVJUstQkCS1DAVJUssxhTngGIGkxcJQ6NBsYSFJC9WCCoUkLwY2AD8HvLeqPtVzSZK0pHQeCkneB7wQ2F1VvzTWvh54O7AMeE9VXVxVVwNXJzke+EdgSYWCp6Ek9W0+BpovA9aPNyRZBlwKnA2sAc5PsmbsI29o3pckzaPOQ6GqbgTu26f5LGB7Vd1TVXuBK4BzMvJm4JNV9fmua5MkPVRfU1JXAt8cW9/ZtP0V8LvAuUleub9fTLIxybYk2/bs2dN9pZK0hCyogeaqugS45CCf2QxsBli3bl3NR12StFT0FQq7gJPH1k9q2hYsp5dKWgr6On10C3B6klOTPBI4D7hm0l9OMpVk8/T0dGcFStJS1HkoJLkc+BxwRpKdSS6oqgeBVwHXAXcCV1bVHZNus6q2VNXG5cuXd1O0JC1RnZ8+qqrzZ2nfCmzt+vslSZPzhniSpNYgQ8ExBUnqxiBDwTEFSerGIENBktQNQ0GS1BpkKDimIEndGGQoOKYgSd1YUPc+Wgi8nYWkpcxQGDAfyiNprhkKA+DRi6T5MsgxBQeaJakbgwwFB5olqRuDDAVJUjcMBUlSy1CQJLUGGQoONEtSNwYZCg40S1I3JgqFJE/tuhBJUv8mvXjtnUmOAS4DPlRVnrdZwLzSWdLhmuhIoaqeBbwUOBm4NcmHkzy308okSfNu4jGFqrobeAPwWuC3gEuSfDXJS7oqTpI0vyYdUzgzyVuBO4HnAFNV9YvN8ls7rE+SNI8mHVN4B/Ae4HVV9cOZxqr6VpI3dFLZASSZAqZWr1493189aI41SDqYSU8fbQA+PBMISR6R5DiAqvpgV8XNximpktSNSUPhBuBRY+vHNW2SpEVk0lA4tqq+P7PSLB/XTUmSpL5MGgo/SLJ2ZiXJrwI/PMDnJUkDNOlA86uBjyb5FhDg54E/6qwqzSsHoCXNmCgUquqWJE8Bzmia7qqqH3dXliSpD4fyjOanA6ua31mbhKr6QCdVSZJ6MVEoJPkg8AvAbcBPmuYCDAVJWkQmPVJYB6ypquqyGElSvyadfXQ7o8HlBcGH7EhSNyYNhROBryS5Lsk1M68uCzsQr2iWpG5MevroTV0WIUlaGCadkvrZJE8GTq+qG5r7Hi3rtjRJ0nyb9NbZFwJXAe9umlYCV3dVlCSpH5OePvpL4CzgJhg9cCfJEzqrSguCVzpLS8+kA80/qqq9MytJjmJ0nYIkaRGZNBQ+m+R1wKOaZzN/FNjSXVmSpD5MGgqbgD3Al4E/A7Yyel6zJGkRmXT20U+Bf2lekqRFatJ7H32D/YwhVNVpc16RJKk3h3LvoxnHAn8AnDD35UiS+jTRmEJVfXfstauq3gb0Ni/Rex9JUjcmPX20dmz1EYyOHA7lWQxzqqq2AFvWrVt3YV81SNJiNOkf9n8aW34Q2AH84ZxXI0nq1aSzj36760IkSf2b9PTRaw70flW9ZW7KkST16VBmHz0dmHmGwhRwM3B3F0VJkvoxaSicBKytqgcAkrwJuLaqXtZVYZKk+TfpbS6eCOwdW9/btEmSFpFJjxQ+ANyc5OPN+ouB93dTkiSpL5POPvr7JJ8EntU0vaKqvtBdWZKkPkx6+gjgOOD+qno7sDPJqR3VJEnqyaSP4/xb4LXARU3T0cC/dlWUJKkfkx4p/B7wIuAHAFX1LeCxXRUlSerHpKGwt6qK5vbZSR7dXUmSpL5MGgpXJnk38LgkFwI34AN3JGnROejsoyQBPgI8BbgfOAN4Y1Vd33FtkqR5dtBQqKpKsrWqngoYBJK0iE16+ujzSZ7eaSWSpN5NekXzrwEvS7KD0QykMDqIOLOrwiRJ8++AoZDklKr6b+D5XReS5DTg9cDyqjq36++TJD3cwU4fXQ1QVfcCb6mqe8dfB9t4kvcl2Z3k9n3a1ye5K8n2JJua77inqi443I5Iko7cwUIhY8unHcb2LwPWP2SDyTLgUuBsYA1wfpI1h7FtSdIcO1go1CzLE6mqG4H79mk+C9jeHBnsBa4Azpl0m0k2JtmWZNuePXsOtSRJ0gEcLBR+Ocn9SR4AzmyW70/yQJL7D/M7VwLfHFvfCaxM8vgk7wKeluSi/f8qVNXmqlpXVetWrFhxmCVIkvbngAPNVbVsvgqpqu8Cr5yv75MkPdyh3Dp7ruwCTh5bP6lpkyT1bNLrFObSLcDpzfMYdgHnAX98KBtIMgVMrV69uoPydDCrNl273/YdF2+Y50okzbVOjxSSXA58Djgjyc4kF1TVg8CrgOuAO4Erq+qOQ9luVW2pqo3Lly+f+6IlaQnr9Eihqs6fpX0rsLXL75YkHbo+xhSOWJKpJJunp6f7LkWSFpVBhoKnjySpG4MMBUlSNwwFSVLLUJAktQYZCg40S1I3BhkKDjRLUjcGGQqSpG4YCpKk1iBDwTEFSerGIEPBMQVJ6sYgQ0GS1A1DQZLUMhQkSS1DQZLUGmQoOPtIkroxyFBw9pEkdWOQoSBJ6oahIElqGQqSpJahIElqGQqSpNZRfRdwOJJMAVOrV6/uuxSNWbXp2lnf23HxhnmsRNLhGuSRglNSJakbgwwFSVI3DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1vHhN8+JAF7btjxe7Sf0Y5JGCF69JUjcGGQqSpG4YCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoZCpKklqEgSWoNMhSSTCXZPD093XcpkrSoDDIUvCGeJHVjkKEgSeqGoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTWUX0XMCPJo4F3AnuB/6yqD/VckiQtOZ0eKSR5X5LdSW7fp319kruSbE+yqWl+CXBVVV0IvKjLuiRJ+9f16aPLgPXjDUmWAZcCZwNrgPOTrAFOAr7ZfOwnHdclSdqPTk8fVdWNSVbt03wWsL2q7gFIcgVwDrCTUTDcxgHCKslGYCPAKaecMvdFa0FbtenaOdnOjos3zMl2pK7Nts93tQ/3MdC8kp8dEcAoDFYCHwN+P8k/A1tm++Wq2lxV66pq3YoVK7qtVJKWmAUz0FxVPwBe0XcdkrSU9XGksAs4eWz9pKZtYkmmkmyenp6e08IkaanrIxRuAU5PcmqSRwLnAdccygaqaktVbVy+fHknBUrSUtX1lNTLgc8BZyTZmeSCqnoQeBVwHXAncGVV3dFlHZKkyXQ9++j8Wdq3Alu7/G5J0qEb5G0uHFOQpG4MMhQcU5CkbgwyFCRJ3UhV9V3DYUuyB7j3MH/9ROA7c1jOQmG/hsV+Dcti6deTq2q/V/8OOhSORJJtVbWu7zrmmv0aFvs1LIu1X+M8fSRJahkKkqTWUg6FzX0X0BH7NSz2a1gWa79aS3ZMQZL0cEv5SEGStA9DQZLUWpKhMMszohes/T3rOskJSa5Pcnfz8/imPUkuafr2pSRrx37n5c3n707y8j76MlbLyUk+k+QrSe5I8tdN+9D7dWySm5N8senX3zXtpya5qan/I80dgklyTLO+vXl/1di2Lmra70ry/H569FBJliX5QpJPNOuD71eSHUm+nOS2JNuatkHvh0ekqpbUC1gGfB04DXgk8EVgTd91HaTm3wTWArePtf0DsKlZ3gS8uVl+AfBJIMAzgJua9hOAe5qfxzfLx/fYpycBa5vlxwJfY/TM7qH3K8BjmuWjgZuaeq8Ezmva3wX8ebP8F8C7muXzgI80y2uaffMY4NRmn122APbF1wAfBj7RrA++X8AO4MR92ga9Hx7JaykeKbTPiK6qvcDMM6IXrKq6Ebhvn+ZzgPc3y+8HXjzW/oEa+S/gcUmeBDwfuL6q7quq/wOuB9Z3X/3+VdW3q+rzzfIDjG6jvpLh96uq6vvN6tHNq4DnAFc17fv2a6a/VwG/kyRN+xVV9aOq+gawndG+25skJwEbgPc062ER9GsWg94Pj8RSDIXZnhE9NE+sqm83y/8DPLFZnq1/C7bfzamFpzH6X/Xg+9WcYrkN2M3oj8PXge/V6Fki8NAa2/qb96eBx7MA+wW8Dfgb4KfN+uNZHP0q4FNJbk2ysWkb/H54uBbMM5p1+KqqkgxybnGSxwD/Bry6qu4f/WdyZKj9qqqfAL+S5HHAx4Gn9FzSEUvyQmB3Vd2a5Nl91zPHnllVu5I8Abg+yVfH3xzqfni4luKRwhE/I3qB+N/msJXm5+6mfbb+Lbh+JzmaUSB8qKo+1jQPvl8zqup7wGeAX2d0mmHmP2HjNbb1N+8vB77LwuvXbwAvSrKD0SnX5wBvZ/j9oqp2NT93Mwrxs1hE++GhWoqhcMTPiF4grgFmZji8HPj3sfY/aWZJPAOYbg6DrwOel+T4ZibF85q2XjTnl98L3FlVbxl7a+j9WtEcIZDkUcBzGY2XfAY4t/nYvv2a6e+5wKdrNHJ5DXBeM4vnVOB04Ob56cXDVdVFVXVSVa1i9G/m01X1UgberySPTvLYmWVG+8/tDHw/PCJ9j3T38WI0g+BrjM71vr7veiao93Lg28CPGZ2rvIDR+dn/AO4GbgBOaD4b4NKmb18G1o1t508ZDextB17Rc5+eyehc7peA25rXCxZBv84EvtD063bgjU37aYz++G0HPgoc07Qf26xvb94/bWxbr2/6exdwdt/74Vhdz+Zns48G3a+m/i82rztm/h4MfT88kpe3uZAktZbi6SNJ0iwMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLX+H/VVLSCNYdmlAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BsmtFinSF2\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAR+UlEQVR4nO3de7BdZ13G8e9DagtFTakNUJvWpKRTzKhIDQiD9wuklLTc1GZwRKyNoDjeZrQVR/EPZ8AbF6daoiAXS0spF9sSpoqK6AwDTZFLSymN0NIUsQE1VXQshZ9/7HVWN/GcZJ/0vHvtnf39zJzJWu/aZ58nK9l5su6pKiRJAnjI0AEkSbPDUpAk9SwFSVLPUpAk9SwFSVLvuKEDPBinnHJKbdq0aegYkjRXbrrpps9X1Yblls11KWzatIm9e/cOHUOS5kqSO1da5u4jSVLPUpAk9SwFSVLPUpAk9SwFSVLPUpAk9SwFSVLPUpAk9eb64rUHY9Ml71p2/I6XnTflJJI0O9xSkCT1LAVJUs9SkCT1ZqYUknxzksuTXJPkRUPnkaRF1LQUkrwuyT1Jbj5kfHuS25LsS3IJQFXdWlUvBH4UeErLXJKk5bXeUng9sH18IMk64DLgXGArsDPJ1m7Z+cC7gD2Nc0mSltG0FKrqfcC/HTL8RGBfVX2qqu4DrgIu6F5/bVWdCzxvpfdMsivJ3iR7Dxw40Cq6JC2kIa5TOA24a2x+P/CdSb4PeDZwAofZUqiq3cBugG3btlW7mJK0eGbm4rWqei/w3oFjSNJCG+Lso7uB08fmN3ZjE0uyI8nugwcPrmkwSVp0Q5TCjcBZSTYnOR64ELh2NW9QVddV1a7169c3CShJi6r1KalXAu8Hzk6yP8lFVXU/8GLgBuBW4OqquqVlDknSZJoeU6iqnSuM78HTTiVp5szMFc2r4TEFSWpjLkvBYwqS1MZcloIkqY25LAV3H0lSG3NZCu4+kqQ25rIUJEltWAqSpN5cloLHFCSpjbksBY8pSFIbc1kKkqQ2LAVJUs9SkCT15rIUPNAsSW3MZSl4oFmS2pjLUpAktWEpSJJ6loIkqWcpSJJ6loIkqTeXpeApqZLUxlyWgqekSlIbc1kKkqQ2LAVJUs9SkCT1LAVJUs9SkCT1LAVJUm8uS8HrFCSpjbksBa9TkKQ25rIUJEltWAqSpJ6lIEnqWQqSpJ6lIEnqWQqSpJ6lIEnqWQqSpJ6lIEnqzWUpeJsLSWpjLkvB21xIUhtzWQqSpDYsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlS77ihA4xL8kzgPODrgddW1V8NHEmSFkrzLYUkr0tyT5KbDxnfnuS2JPuSXAJQVe+sqouBFwI/1jqbJOmrTWP30euB7eMDSdYBlwHnAluBnUm2jr3kN7rlkqQpal4KVfU+4N8OGX4isK+qPlVV9wFXARdk5OXAu6vqQ8u9X5JdSfYm2XvgwIG24SVpwQx1oPk04K6x+f3d2M8DPwQ8N8kLl/vGqtpdVduqatuGDRvaJ5WkBTJTB5qr6tXAq4fOIUmLaqgthbuB08fmN3ZjE0myI8nugwcPrnkwSVpkQ5XCjcBZSTYnOR64ELh20m+uquuqatf69eubBZSkRTRRKST51qP9AUmuBN4PnJ1kf5KLqup+4MXADcCtwNVVdcvR/gxJ0tqY9JjCHyc5gdHppVdU1cT7bapq5wrje4A9k77PuCQ7gB1btmw5mm+XJK1goi2Fqvpu4HmMjgPclOTNSX64abLD53H3kSQ1MPExhaq6ndFFZb8GfC/w6iSfSPLsVuEkSdM16TGFb0vyCkb7/38A2FFV39xNv6JhPknSFE26pfBHwIeAx1XVzy1dbVxVn2W09TBVnpIqSW1MWgrnAW+uqv8BSPKQJCcCVNWbWoVbiccUJKmNSUvhPcDDxuZP7MYkSceQSUvhoVX1X0sz3fSJbSJJkoYyaSl8Mck5SzNJvgP4nzaRjsxjCpLUxqSl8IvAW5P8Q5J/BN7C6IrkQXhMQZLamOiK5qq6McljgbO7oduq6kvtYkmShrCaW2c/AdjUfc85SaiqNzZJJUkaxESlkORNwGOADwNf7oYLsBQk6Rgy6ZbCNmBrVVXLMJPyhniS1MakB5pvBh7dMshqeKBZktqYdEvhFODjST4I/O/SYFWd3ySVJGkQk5bCS1uGkCTNhklPSf37JN8EnFVV7+nue7SubTRJ0rRNeuvsi4FrgNd0Q6cB72wVSpI0jEkPNP8c8BTgXugfuPPIVqGOxNtcSFIbk5bC/1bVfUszSY5jdJ3CIDz7SJLamLQU/j7JrwMP657N/FbgunaxJElDmLQULgEOAB8DfgbYwwBPXJMktTXp2UdfAf60+5IkHaMmvffRp1nmGEJVnbnmiSRJg1nNvY+WPBT4EeDktY8jSRrSRMcUquoLY193V9UrgfMaZ5MkTdmku4/OGZt9CKMth9U8i2FNeZdUSWpj0n/Y/2Bs+n7gDuBH1zzNhKrqOuC6bdu2XTxUBkk6Fk169tH3tw4iSRrepLuPfvlwy6vqD9cmjiRpSKs5++gJwLXd/A7gg8DtLUJJkoYxaSlsBM6pqv8ESPJS4F1V9eOtgkmSpm/S21w8CrhvbP6+bkySdAyZdEvhjcAHk7yjm38m8IY2kSRJQ5n07KPfSfJu4Lu7oRdU1T+1iyVJGsKku48ATgTurapXAfuTbG6USZI0kEkfx/lbwK8Bl3ZDXwP8RatQkqRhTLql8CzgfOCLAFX1WeDrWoU6Eh/HKUltTFoK91VV0d0+O8nD20U6Mh/HKUltTFoKVyd5DXBSkouB9+ADdyTpmHPEs4+SBHgL8FjgXuBs4Der6q8bZ5MkTdkRS6GqKsmeqvpWwCKQpGPYpLuPPpTkCU2TSJIGN+kVzd8J/HiSOxidgRRGGxHf1iqYJGn6DlsKSc6oqs8AT5tSHknSgI60pfBORndHvTPJ26rqOdMIJUkaxpGOKWRs+syWQSRJwztSKdQK05KkY9CRdh89Lsm9jLYYHtZNwwMHmr++aTpJ0lQdthSqat20gkiShreaW2dLko5xk16noCnYdMm7lh2/42XnTTmJpEXlloIkqTczpZDkzCSvTXLN0FkkaVE13X2U5HXAM4B7qupbxsa3A68C1gF/VlUvq6pPARcdS6Xg7iBJ86b1lsLrge3jA0nWAZcB5wJbgZ1JtjbOIUmaQNMthap6X5JNhww/EdjXbRmQ5CrgAuDjk7xnkl3ALoAzzjhjzbI+GCttEQxlnrZQVpt1nn5v0jwa4pjCacBdY/P7gdOSfEOSy4HHJ7l0pW+uqt1Vta2qtm3YsKF1VklaKDNzSmpVfQF44dA5JGmRDVEKdwOnj81v7MYmlmQHsGPLli1rmWtqZm13kyQtGWL30Y3AWUk2JzkeuBC4djVvUFXXVdWu9evXNwkoSYuqaSkkuRJ4P3B2kv1JLqqq+4EXAzcAtwJXV9UtLXNIkibT+uyjnSuM7wH2tPzZkqTVm5kDzasx78cUVmuo0zA9/VNaPDNzm4vV8JiCJLUxl6UgSWpjLkshyY4kuw8ePDh0FEk6psxlKbj7SJLamMtSkCS1YSlIknqekqqp8NYe0nyYyy0FjylIUhtzWQqSpDYsBUlSz1KQJPUsBUlSz7OPDrGIZ8ms9vd8LNwo71j4PUgtzOWWgmcfSVIbc1kKkqQ2LAVJUs9SkCT1LAVJUs+zj+bYrJ0pNWt5oH0mz2LSsWYutxQ8+0iS2pjLUpAktWEpSJJ6loIkqWcpSJJ6loIkqWcpSJJ6XqewQGbxOoJZ4zpae17LMV/mckvB6xQkqY25LAVJUhuWgiSpZylIknqWgiSpZylIknqWgiSpZylIknqWgiSpZylIknre5kKastXeSmOl20EMdfsIbwVybJvLLQVvcyFJbcxlKUiS2rAUJEk9S0GS1LMUJEk9S0GS1LMUJEk9S0GS1LMUJEk9S0GS1LMUJEk9S0GS1LMUJEk9S0GS1LMUJEm9mXmeQpKHA38M3Ae8t6quGDiSJC2cplsKSV6X5J4kNx8yvj3JbUn2JbmkG342cE1VXQyc3zKXJGl5rXcfvR7YPj6QZB1wGXAusBXYmWQrsBG4q3vZlxvnkiQtI1XV9gckm4Drq+pbuvknAy+tqqd185d2L90P/HtVXZ/kqqq6cIX32wXsAjjjjDO+48477zyqXD5SUItmrR7rOeRnp/WjRmdRi8euJrmpqrYtt2yIA82n8cAWAYzK4DTg7cBzkvwJcN1K31xVu6tqW1Vt27BhQ9ukkrRgZuZAc1V9EXjB0DkkaZENsaVwN3D62PzGbmxiSXYk2X3w4ME1DSZJi26IUrgROCvJ5iTHAxcC167mDarquqratX79+iYBJWlRtT4l9Urg/cDZSfYnuaiq7gdeDNwA3ApcXVW3tMwhSZpM02MKVbVzhfE9wJ6jfd8kO4AdW7ZsOdq3kCQtYy5vc+HuI0lqYy5LQZLUhqUgSeo1v6K5haVjCsCPAbcf5ducAnx+zUK1Mw85zbg25iEjzEdOMx7eN1XVslf/zmUprIUke1e6zHuWzENOM66NecgI85HTjEfP3UeSpJ6lIEnqLXIp7B46wITmIacZ18Y8ZIT5yGnGo7SwxxQkSf/fIm8pSJIOYSlIknoLWQorPCN6iBynJ/m7JB9PckuSX+jGT07y10lu7359RDeeJK/ucn80yTlTzLouyT8lub6b35zkA12Wt3R3vCXJCd38vm75pinlOynJNUk+keTWJE+e0fX4S92f9c1Jrkzy0KHX5XLPUj+adZfk+d3rb0/y/Cnl/L3uz/yjSd6R5KSxZZd2OW9L8rSx8Waf/+Uyji37lSSV5JRufrB1eVhVtVBfwDrgn4EzgeOBjwBbB8pyKnBON/11wCcZPbf6d4FLuvFLgJd3008H3g0EeBLwgSlm/WXgzYwerQpwNXBhN3058KJu+meBy7vpC4G3TCnfG4Cf7qaPB06atfXI6AmDnwYeNrYOf3LodQl8D3AOcPPY2KrWHXAy8Knu10d004+YQs6nAsd10y8fy7m1+2yfAGzuPvPrWn/+l8vYjZ/O6M7QdwKnDL0uD/t7mNYPmpUv4MnADWPzlwKXDp2ry/KXwA8DtwGndmOnArd1068Bdo69vn9d41wbgb8BfgC4vvtL/PmxD2O/Tru/+E/upo/rXpfG+dZ3/9jmkPFZW49Lj6I9uVs31wNPm4V1CWw65B/bVa07YCfwmrHxr3pdq5yHLHsWcEU3/VWf66V1OY3P/3IZgWuAxwF38EApDLouV/paxN1HKz0jelDdroHHAx8AHlVV/9It+hzwqG56qOyvBH4V+Eo3/w3Af9To2RiH5ugzdssPdq9vaTNwAPjzbhfXnyV5ODO2HqvqbuD3gc8A/8Jo3dzEbK3LJatdd7PwufopRv/z5jB5pp4zyQXA3VX1kUMWzUzGcYtYCjMnydcCbwN+saruHV9Wo/8qDHbecJJnAPdU1U1DZZjAcYw22f+kqh4PfJHRLo/e0OsRoNsvfwGjEvtG4OHA9iEzTWIW1t2RJHkJcD9wxdBZxiU5Efh14DeHzjKpRSyFB/2M6LWU5GsYFcIVVfX2bvhfk5zaLT8VuKcbHyL7U4Dzk9wBXMVoF9KrgJOSLD2kaTxHn7Fbvh74QuOM+4H9VfWBbv4aRiUxS+sR4IeAT1fVgar6EvB2Rut3ltblktWuu8E+V0l+EngG8LyuwDhMnmnnfAyj/wR8pPsMbQQ+lOTRM5TxqyxiKTzoZ0SvlSQBXgvcWlV/OLboWmDpjIPnMzrWsDT+E91ZC08CDo5t4jdRVZdW1caq2sRoXf1tVT0P+DvguStkXMr+3O71Tf+XWVWfA+5KcnY39IPAx5mh9dj5DPCkJCd2f/ZLOWdmXY5Z7bq7AXhqkkd0W0RP7caaSrKd0a7N86vqvw/Jf2F3Btdm4Czgg0z5819VH6uqR1bVpu4ztJ/RySWfY8bW5XjohftidNT/k4zOQnjJgDm+i9Fm+UeBD3dfT2e03/hvGN0W/D3Ayd3rA1zW5f4YsG3Keb+PB84+OpPRh2wf8FbghG78od38vm75mVPK9u3A3m5dvpPRWRsztx6B3wY+AdwMvInR2TGDrkvgSkbHOL7E6B+ti45m3THap7+v+3rBlHLuY7T/fenzc/nY61/S5bwNOHdsvNnnf7mMhyy/gwcONA+2Lg/35W0uJEm9Rdx9JElagaUgSepZCpKknqUgSepZCpKknqUgSepZCpKk3v8Bd863PJBzroYAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BsmtUnfSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPTUlEQVR4nO3da6xlZX3H8e/PQfFSHEGm1gDjgEOwk9jW6fGSVE16UUEyotVWaJsaS5iaSFPT9MV4ScsbU22itrRoOypFqYVivZSJYxCaVt6oMFjkIkFGxDpoBbUZ1BpH8N8Xe53H43TOmX3OnHXW2Xt/P8nOWevZt/9a2Xt+86zn2WulqpAkCeBRQxcgSVo/DAVJUmMoSJIaQ0GS1BgKkqTmuKELOBYnn3xybdmyZegyJGmi3HLLLd+qqk1Hum+iQ2HLli3s27dv6DIkaaIk+epi93n4SJLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSmon+8dqx2LLrE0dsv+9t565xJZK0fthTkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkZl39eC3Jy4FzgScC76+qTw1ckiTNlN57CkkuT/JAkjsOaz87yd1J9ifZBVBVH6+qi4DXAa/uuzZJ0k9bi8NHVwBnL2xIsgG4DDgH2AZckGTbgoe8pbtfkrSGeg+FqroR+M5hzc8B9lfVvVV1CLgaOC8jbwc+WVWfP9LrJdmZZF+SfQ8++GC/xUvSjBlqoPkU4GsL1g90bX8E/AbwqiSvO9ITq2p3Vc1V1dymTZv6r1SSZsi6GmiuqkuBS4euQ5Jm1VA9hfuB0xasn9q1SZIGNFQo3AycmeT0JI8BzgeuHagWSVJnLaakXgV8BjgryYEkF1bVw8DFwHXAXcA1VXXnMl5zR5LdBw8e7KdoSZpRvY8pVNUFi7TvBfau8DX3AHvm5uYuOpbaJEk/zdNcSJKadTX7aNZ53WhJQ5vInoJjCpLUj4nsKTimMGLPQtJqm8iegiSpH4aCJKkxFCRJzUSOKQxhseP34DF8SdNjInsKzj6SpH5MZE9hvc0+Wu4soKV6HZI0pInsKUiS+mEoSJIaQ0GS1BgKkqRmIgeak+wAdmzdunXoUtYlT38haaUmMhTW2+yjxTjL6CcMKmkyTGQozBrDRdJaMRS0KP93L80eB5olSY09Ba3a4SkPc0mTz56CJKmZyFDwhHiS1I+JDIWq2lNVOzdu3Dh0KZI0VRxTmCEe85d0NBPZU5Ak9cOegpbNHoc0vQwFDcofyEnri6FwGP8XLGmWGQpal+xBSMNwoFmS1ExkKPjjNUnqx0SGgj9ek6R+TGQoSJL64UCzJooD0FK/7ClIkhpDQZLUGAqSpMZQkCQ1DjRrKiz39CQOTEtHZk9BktQYCpKkxlCQJDUTGQqe+0iS+jGRoeC5jySpHxMZCpKkfhgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEnNWKGQ5Jl9FyJJGt6412h+d5LjgSuAD1XVoBcySLID2LF169Yhy9AEW+yazl67WbNurJ5CVb0A+F3gNOCWJP+U5EW9VrZ0PV5PQZJ6MG5Pgaq6J8lbgH3ApcCzkgR4U1V9tK8CpfXAnoVmxbhjCr+Q5F3AXcCvATuq6ue75Xf1WJ8kaQ2N21P4G+B9jHoFP5hvrKqvd70HaSos1iOQZsW4oXAu8IOqegQgyaOAx1bV/1bVlb1VJ0laU+P+TuEG4HEL1h/ftUmSpsi4PYXHVtX35leq6ntJHt9TTdJUW+4hKgeztZbG7Sl8P8n2+ZUkvwz8YInHS5Im0Lg9hTcAH07ydSDAzwGv7q0qaUI4VVXTZqxQqKqbkzwDOKtruruqftRfWZKkIYz94zXg2cCW7jnbk1BVH+ylKknSIMYKhSRXAk8HbgUe6ZoLMBSkgXjoSn0Yt6cwB2yrquqzGEnSsMadfXQHo8FlSdIUG7encDLwxSQ3AT+cb6yql/VSlSRpEOOGwiV9FiFJWh/GnZL66SRPA86sqhu6XzNv6Lc0SdJaG3f20UXATuAkRrOQTgH+Dvj1/kqTBMs/LYazknQsxh1ofj3wK8BDMLrgDvCzfRUlSRrGuKHww6o6NL+S5DhGv1OQJE2RcUPh00neBDyuuzbzh4E9/ZUlSRrCuKGwC3gQuB34Q2Av4BXXJGnKjDv76MfAe7ubJGlKjTv76CscYQyhqs5Y9YokrSkv+qOFlnPuo3mPBX6L0fTUVZPkDODNwMaqetVqvrYkaTxjjSlU1bcX3O6vqr8CjvrfhSSXJ3kgyR2HtZ+d5O4k+5Ps6t7j3qq6cEVbIUlaFeMePtq+YPVRjHoO4zz3CuBvWXCK7SQbgMuAFwEHgJuTXFtVXxyzZklST8Y9fPSOBcsPA/cBv320J1XVjUm2HNb8HGB/Vd0LkORq4DxgrFBIspPRr6vZvHnzOE+R1txyj9OvhfVYk9afcWcf/eoqvucpwNcWrB8AnpvkycBbgWcleWNV/cUitewGdgPMzc35AzpJWkXjHj76k6Xur6p3HmshVfVt4HXH+jqSpJVbzuyjZwPXdus7gJuAe1bwnvcDpy1YP7VrkyQNbNxQOBXYXlXfBUhyCfCJqvq9FbznzcCZSU5nFAbnA7+znBdIsgPYsXXr1hW8vSRpMeOe5uIpwKEF64e6tiUluQr4DHBWkgNJLqyqh4GLgeuAu4BrqurO5RRdVXuqaufGjRuX8zRJ0lGM21P4IHBTko916y8HPnC0J1XVBYu072V0/iRJ0joy7uyjtyb5JPCCrum1VfWf/ZUlSRrCuIePAB4PPFRVfw0c6MYEJElTZNwpqX/OaAbSWcA/AI8G/pHR1djWnAPN0nC83Od0G7en8ArgZcD3Aarq68AJfRV1NA40S1I/xg2FQ1VVdKfPTvKE/kqSJA1l3FC4JsnfA09KchFwA15wR5KmzlHHFJIE+GfgGcBDjMYV/qyqru+5NknSGjtqKFRVJdlbVc8EDAJJmmLj/njt80meXVU391rNmJx9JK0/zkqaDuOOKTwX+GySLye5LcntSW7rs7ClOPtIkvqxZE8hyeaq+i/gJWtUjyRpQEc7fPRxRmdH/WqSj1TVK9eiKEnSMI52+CgLls/osxBJ0vCOFgq1yLIkaQod7fDRLyZ5iFGP4XHdMt16VdUTe61uEc4+kibHYrOSluKMpeEs2VOoqg1V9cSqOqGqjuuW59cHCYSuLmcfSVIPlnPqbEnSlDMUJEmNoSBJagwFSVJjKEiSmokMhSQ7kuw+ePDg0KVI0lSZyFBwSqok9WMiQ0GS1A9DQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJao52PYV1yespSNNtsWsweJ2F/k1kT8Efr0lSPyYyFCRJ/TAUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY3nPpI08TxX0uqZyJ6C5z6SpH5MZChIkvphKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKnxegqSptZi11lYrlm6LsNE9hS8noIk9WMiQ0GS1A9DQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWqOG7qAeUmeALwbOAT8R1V9aOCSJGnm9NpTSHJ5kgeS3HFY+9lJ7k6yP8murvk3gX+pqouAl/VZlyTpyPo+fHQFcPbChiQbgMuAc4BtwAVJtgGnAl/rHvZIz3VJko6g18NHVXVjki2HNT8H2F9V9wIkuRo4DzjAKBhuZYmwSrIT2AmwefPm1S9a0rq1Zdcnhi7hpyy3nvvedu6qvcdKXmscQww0n8JPegQwCoNTgI8Cr0zyHmDPYk+uqt1VNVdVc5s2beq3UkmaMetmoLmqvg+8dug6JGmWDdFTuB84bcH6qV2bJGlgQ4TCzcCZSU5P8hjgfODa5bxAkh1Jdh88eLCXAiVpVvU9JfUq4DPAWUkOJLmwqh4GLgauA+4CrqmqO5fzulW1p6p2bty4cfWLlqQZ1vfsowsWad8L7O3zvSVJy+dpLiRJjaEgSWomMhQcaJakfqSqhq5hxZI8CHx1hU8/GfjWKpYzidwHI+4H9wHM1j54WlUd8de/Ex0KxyLJvqqaG7qOIbkPRtwP7gNwH8ybyMNHkqR+GAqSpGaWQ2H30AWsA+6DEfeD+wDcB8AMjylIkv6/We4pSJIOYyhIkpqZDIVFrhE9lZLcl+T2JLcm2de1nZTk+iT3dH9P7NqT5NJuv9yWZPuw1a/Mka4NvpJtTvKa7vH3JHnNENuyUovsg0uS3N99Fm5N8tIF972x2wd3J3nJgvaJ/a4kOS3Jvyf5YpI7k/xx1z5Tn4Vlq6qZugEbgC8DZwCPAb4AbBu6rh639z7g5MPa/hLY1S3vAt7eLb8U+CQQ4HnA54auf4Xb/EJgO3DHSrcZOAm4t/t7Yrd84tDbdoz74BLgT4/w2G3d9+B44PTu+7Fh0r8rwFOB7d3yCcCXum2dqc/Ccm+z2FNo14iuqkPA/DWiZ8l5wAe65Q8AL1/Q/sEa+SzwpCRPHaLAY1FVNwLfOax5udv8EuD6qvpOVf0PcD1wdv/Vr45F9sFizgOurqofVtVXgP2MvicT/V2pqm9U1ee75e8yOlX/KczYZ2G5ZjEUFrtG9LQq4FNJbkmys2t7SlV9o1v+b+Ap3fI075vlbvO07ouLu0Mjl88fNmEG9kGSLcCzgM/hZ2FJsxgKs+b5VbUdOAd4fZIXLryzRv3jmZqXPIvb3HkP8HTgl4BvAO8Ytpy1keRngI8Ab6iqhxbeN8OfhUXNYijM1DWiq+r+7u8DwMcYHRL45vxhoe7vA93Dp3nfLHebp25fVNU3q+qRqvox8F5GnwWY4n2Q5NGMAuFDVfXRrnnmPwtLmcVQOOZrRE+KJE9IcsL8MvBi4A5G2zs/g+I1wL92y9cCv9/NwngecHBBN3vSLXebrwNenOTE7jDLi7u2iXXY+NArGH0WYLQPzk9yfJLTgTOBm5jw70qSAO8H7qqqdy64a+Y/C0saeqR7iBujWQZfYjSz4s1D19Pjdp7BaMbIF4A757cVeDLwb8A9wA3ASV17gMu6/XI7MDf0Nqxwu69idHjkR4yO/164km0G/oDRoOt+4LVDb9cq7IMru228jdE/gE9d8Pg3d/vgbuCcBe0T+10Bns/o0NBtwK3d7aWz9llY7s3TXEiSmlk8fCRJWoShIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNf8H440dYpg9JdkAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TotalBsmtSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQV0lEQVR4nO3dbZBeZX3H8e/PIPKgLiDUMjyY0GWwmanVNKKO1ra2KkgXrdpKqlOHUlKrduo4bQ3qWPuiHexMfaCDQqrWhyqIqBQkDoJafeMAQVFBjKQYS1BL1Bpa6ojgvy/us8c1ZpN7w5777Nn9fmbu2XOu+2H/13DIb69zXfc5qSokSQJ4SN8FSJKWDkNBktQyFCRJLUNBktQyFCRJrYP6LuDBOProo2v16tV9lyFJg3LTTTd9t6qO2dtzgw6F1atXs3Xr1r7LkKRBSfLN+Z4b5OmjJDNJNu/evbvvUiRpWRlkKFTVVVW1cWpqqu9SJGlZGWQoSJK6YShIklqGgiSpZShIklqGgiSpZShIklqD/vLaSrF609V7bd9x/hkTrkTScudIQZLUMhQkSS1DQZLUMhQkSS1DQZLUGuTqoyQzwMz09HTfpSyq+VYZSdKkDHKk4FVSJakbgxwpaMTvL0habIMcKUiSumEoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJafnmtB17OQtJS5UhBktQyFCRJLUNBktRyTmEZ8kJ5kg6UIwVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1ltSS1CTPA84AHgm8q6o+2XNJkrSidD5SSPLuJHcnuWWP9tOSbEuyPckmgKq6oqrOBV4GvKjr2iRJP2sSp4/eA5w2tyHJKuBC4HRgLbAhydo5L3l987wkaYI6D4Wq+hzw/T2aTwW2V9UdVXUfcCnw3Iy8CfhEVX1hb5+XZGOSrUm27tq1q9viJWmF6WtO4Tjgzjn7O4EnAX8O/A4wlWS6qi7a841VtRnYDLB+/fqaQK3LxkIv2e1lMaSVZ0lNNFfVBcAFfdchSStVX0tS7wJOmLN/fNMmSepRX6FwI3BykjVJDgbOAq4c981JZpJs3r17d2cFStJKNIklqZcAnwdOSbIzyTlVdT/wSuAa4Dbgsqq6ddzPrKqrqmrj1NRUN0VL0grV+ZxCVW2Yp30LsKXr368D530ZpJXHy1xIklqDDAXnFCSpG4MMBecUJKkbgwwFSVI3DAVJUstQkCS1BhkKTjRLUjcGGQpONEtSNwYZCpKkbhgKkqSWoSBJai2p+yloGLwmkrR8DXKk4OojSerGIEPB1UeS1I1BhoIkqRuGgiSpZShIklqGgiSpNcglqUlmgJnp6em+S9mn+ZZuStJSNciRgquPJKkbgwwFSVI3DAVJUstQkCS1DAVJUstQkCS1DAVJUmuQoeBVUiWpG4MMBb+nIEndGOQ3mheDN4qRpJ83yJGCJKkbhoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJaK/Z7Clp8+7rTnN//kIZhkCMFL3MhSd0YKxSS/ErXhSyEl7mQpG6MO1J4e5Ibkrw8if8SS9IyNVYoVNWvAy8GTgBuSvLBJM/stDJJ0sSNPadQVbcDrwdeA/wGcEGSryV5flfFSZIma9w5hccleQtwG/AMYKaqfrnZfkuH9UmSJmjcJan/BLwTeG1V/XC2saq+leT1nVQ2IPtaiqkRL1UuDcO4oXAG8MOqegAgyUOAQ6rq/6rq/Z1VJ0maqHHnFK4DDp2zf1jTJklaRsYNhUOq6n9nd5rtw7opSZLUl3FD4d4k62Z3kvwa8MN9vF6SNEDjzim8Cvhwkm8BAX4ReFFnVUmSejFWKFTVjUkeC5zSNG2rqh93V5YkqQ8LuUrqE4HVzXvWJaGq3tdJVZKkXowVCkneD/wScDPwQNNcgKEgScvIuCOF9cDaqqouixlXkhlgZnp6uu9SJGlZGXf10S2MJpeXBC+dLUndGHekcDTw1SQ3AD+abayqMzupSpLUi3FD4Y1dFiFJWhrGXZL62SSPAU6uquuSHAas6rY0SdKkjXvp7HOBy4GLm6bjgCu6KkqS1I9xJ5pfATwVuAfaG+78QldFSZL6MW4o/Kiq7pvdSXIQo+8pSJKWkXFD4bNJXgsc2tyb+cPAVd2VJUnqw7ihsAnYBXwF+FNgC6P7NUuSlpFxVx/9BPjn5iFJWqbGvfbRN9jLHEJVnbToFUmSerOQax/NOgT4feCoxS9HktSnseYUqup7cx53VdVbgTM6rk2SNGHjnj5aN2f3IYxGDgu5F4MkaQDG/Yf9H+ds3w/sAP5g0avRirN609V7bd9xvgNRqQ/jrj76ra4LkST1b9zTR6/e1/NV9ebFKUeS1KeFrD56InBlsz8D3ADc3kVRkqR+jBsKxwPrqup/AJK8Ebi6ql7SVWGSpMkb9zIXjwbum7N/X9MmSVpGxh0pvA+4IcnHmv3nAe9dzEKSnAS8Dpiqqhcu5mdLksYz7pfX/g44G/jv5nF2Vf39/t6X5N1J7k5yyx7tpyXZlmR7kk3N77ijqs5ZeBckSYtl3NNHAIcB91TV24CdSdaM8Z73AKfNbUiyCrgQOB1YC2xIsnYBdUiSOjLu7Tj/BngNcF7T9FDgX/f3vqr6HPD9PZpPBbY3I4P7gEuB545dsSSpM+OOFH4POBO4F6CqvgU84gB/53HAnXP2dwLHJXlUkouAJyQ5b+9vhSQbk2xNsnXXrl0HWIIkaW/GnWi+r6oqSQEkOXyxC6mq7wEvG+N1m4HNAOvXr/eWoJK0iMYdKVyW5GLgiCTnAtdx4DfcuQs4Yc7+8U2bJKln+x0pJAnwIeCxwD3AKcAbquraA/ydNwInNxPVdwFnAX+4kA9IMgPMTE9PH2AJkqS92e9IoaoK2FJV11bVX1XVX44bCEkuAT4PnJJkZ5Jzqup+4JXANcBtwGVVdetCiq6qq6pq49TU1ELeJknaj3HnFL6Q5IlVdeNCPryqNszTvgXYspDPkiR1b9xQeBLwkiQ7GK1ACqNBxOO6KkySNHn7DIUkJ1bVfwLPnlA9kqQe7W+kcAWjq6N+M8lHquoFkyhqf5xoXv68I5vUj/1NNGfO9kldFrIQTjRLUjf2Fwo1z7YkaRna3+mjX01yD6MRw6HNNvx0ovmRnVYnSZqofYZCVa2aVCGSpP6NuyR1Seljonm+iU9JWk4Wcj+FJcOJZknqxiBDQZLUDUNBktQyFCRJrUGGQpKZJJt3797ddymStKwMMhScaJakbgwyFCRJ3TAUJEktQ0GS1DIUJEktQ0GS1BpkKLgkVZK6MchQcEmqJHVjkKEgSeqGoSBJahkKkqSWoSBJahkKkqSWoSBJag0yFPyegiR1Y5Ch4PcUJKkbgwwFSVI3DAVJUstQkCS1DAVJUstQkCS1DAVJUstQkCS1DAVJUuugvgs4EElmgJnp6em+S9GErd509YJev+P8MzqqRFqeBjlS8BvNktSNQYaCJKkbhoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJaXjpby9p8l9ru+pLaC73EN3iZby0NgxwpeOlsSerGIENBktQNQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1Dqo7wJmJTkceDtwH/DvVfWBnkuSpBWn05FCkncnuTvJLXu0n5ZkW5LtSTY1zc8HLq+qc4Ezu6xLkrR3XZ8+eg9w2tyGJKuAC4HTgbXAhiRrgeOBO5uXPdBxXZKkvej09FFVfS7J6j2aTwW2V9UdAEkuBZ4L7GQUDDezj7BKshHYCHDiiScues2rN1296J+ppWe+/847zj9jwpVI+zbpY7WPiebj+OmIAEZhcBzwUeAFSd4BXDXfm6tqc1Wtr6r1xxxzTLeVStIKs2QmmqvqXuDsvuuQpJWsj5HCXcAJc/aPb9okST3rIxRuBE5OsibJwcBZwJUL+YAkM0k27969u5MCJWml6npJ6iXA54FTkuxMck5V3Q+8ErgGuA24rKpuXcjnVtVVVbVxampq8YuWpBWs69VHG+Zp3wJs6fJ3S5IWzstcSJJagwwF5xQkqRuDDAXnFCSpG6mqvms4YEl2Ad88wLcfDXx3Ecvpi/1YWuzH0rEc+gDd9OMxVbXXb/8OOhQejCRbq2p933U8WPZjabEfS8dy6ANMvh+DPH0kSeqGoSBJaq3kUNjcdwGLxH4sLfZj6VgOfYAJ92PFzilIkn7eSh4pSJL2YChIklorMhTmuUf0krG3e1snOSrJtUlub34e2bQnyQVNX76cZN2c97y0ef3tSV464T6ckOQzSb6a5NYkfzHQfhyS5IYkX2r68bdN+5ok1zf1fqi54i9JHtbsb2+eXz3ns85r2rclefYk+9H8/lVJvpjk40PtQ1PDjiRfSXJzkq1N29COqyOSXJ7ka0luS/KUJdOHqlpRD2AV8B/AScDBwJeAtX3XtUeNTwfWAbfMafsHYFOzvQl4U7P9HOATQIAnA9c37UcBdzQ/j2y2j5xgH44F1jXbjwC+zuie3EPrR4CHN9sPBa5v6rsMOKtpvwj4s2b75cBFzfZZwIea7bXNsfYwYE1zDK6a8HH1auCDwMeb/cH1oaljB3D0Hm1DO67eC/xJs30wcMRS6cNE/2MuhQfwFOCaOfvnAef1Xdde6lzNz4bCNuDYZvtYYFuzfTGwYc/XARuAi+e0/8zreujPvwHPHHI/gMOALwBPYvQN04P2PKYYXRL+Kc32Qc3rsudxNvd1E6r9eOBTwDOAjzc1DaoPc37vDn4+FAZzXAFTwDdoFvostT6sxNNH890jeql7dFV9u9n+DvDoZnu+/iyZfjanH57A6K/swfWjOe1yM3A3cC2jv5B/UKN7g+xZU1tv8/xu4FH034+3An8N/KTZfxTD68OsAj6Z5KYkG5u2IR1Xa4BdwL80p/PemeRwlkgfVmIoDF6N/iwYxFriJA8HPgK8qqrumfvcUPpRVQ9U1eMZ/bV9KvDYnktakCS/C9xdVTf1XcsieVpVrQNOB16R5OlznxzAcXUQo9PD76iqJwD3Mjpd1OqzDysxFIZ6j+j/SnIsQPPz7qZ9vv703s8kD2UUCB+oqo82zYPrx6yq+gHwGUanWo5IMnuTqrk1tfU2z08B36PffjwVODPJDuBSRqeQ3saw+tCqqruan3cDH2MU1EM6rnYCO6vq+mb/ckYhsST6sBJD4UHfI7onVwKzqwteyugc/Wz7HzUrFJ4M7G6GoNcAz0pyZLOK4VlN20QkCfAu4LaqevOcp4bWj2OSHNFsH8poXuQ2RuHwwnn6Mdu/FwKfbv7quxI4q1nZswY4GbhhEn2oqvOq6viqWs3oeP90Vb14SH2YleTwJI+Y3WZ0PNzCgI6rqvoOcGeSU5qm3wa+umT6MOlJoqXwYDSb/3VG54Zf13c9e6nvEuDbwI8Z/VVxDqNzup8CbgeuA45qXhvgwqYvXwHWz/mcPwa2N4+zJ9yHpzEa/n4ZuLl5PGeA/Xgc8MWmH7cAb2jaT2L0D+J24MPAw5r2Q5r97c3zJ835rNc1/dsGnN7TsfWb/HT10eD60NT8peZx6+z/vwM8rh4PbG2OqysYrR5aEn3wMheSpNZKPH0kSZqHoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqTW/wOoIwSDNyvFNgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1stFlrSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPkklEQVR4nO3dbZBeZX3H8e/PgAI+RJDUOgm4oWGwzGhrulI71tbaomgasJZWGJ06liFVa6eOL2p8GLUvOoOdqQ+0WE2V8aEKInUsERwK1cobCwRFBSklYqhBW1DrotYRxX9f3CeXa9xN7l327Nl79/uZuSfnXPdD/ueC4cd1ruuck6pCkiSAhwxdgCRp5TAUJEmNoSBJagwFSVJjKEiSmiOGLuDBOP7442tqamroMiRpotx0003fqKoNc7030aEwNTXFnj17hi5DkiZKkrvme8/TR5KkxlCQJDWGgiSpmchQSLI9ya6ZmZmhS5GkVWUiQ6GqdlfVjvXr1w9diiStKhMZCpKkfhgKkqTGUJAkNRN98dpqM7Xzyjnb912wbZkrkbRWOVKQJDWGgiSpMRQkSY2hIElqDAVJUuPqownmaiVJS82RgiSpcaQwgPn+D3+pPi9Ji+VIQZLUGAqSpMZQkCQ1hoIkqTEUJEmNq49WIa9fkLRYjhQkSY2hIElqDAVJUrOi5hSSPA/YBjwKeE9V/cvAJUnSmtL7SCHJxUnuSXLLQe1nJLk9yd4kOwGq6mNVdT7wUuAFfdcmSfppy3H66L3AGbMbkqwDLgKeA5wKnJvk1FkfeX33viRpGfUeClV1HfCtg5pPA/ZW1Z1VdT9wKXBWRt4MfKKqPtt3bZKknzbURPNG4Kuz9vd3bX8G/A5wdpKXzvXFJDuS7Emy59577+2/UklaQ1bURHNVXQhceJjP7AJ2AUxPT9dy1LVaeFGbpMMZaqRwN3DCrP1NXZskaUBDhcKNwMlJNid5KHAOcMVAtUiSOr2fPkpyCfAM4Pgk+4E3VtV7krwCuBpYB1xcVbcu4De3A9u3bNnSR8lrjqeVJB2Qqsk9LT89PV179uwZuox5TfpjNA0FaXVKclNVTc/1nre5kCQ1hoIkqZnIUEiyPcmumZmZoUuRpFVlIkOhqnZX1Y7169cPXYokrSoTGQqSpH4YCpKkZkXd5kKTYTHXNXgthDQZJnKk4ESzJPVjIkcKVbUb2D09PX3+0LWsZpN+8Z2khZvIkYIkqR+GgiSpMRQkSY2hIElqJjIUXH0kSf2YyFDwNheS1I+JXJKqlcklrNLkm8iRgiSpH4aCJKkxFCRJjaEgSWomMhRckipJ/ZjIUHBJqiT1wyWpS8ClmJJWi4kcKUiS+mEoSJIaQ0GS1BgKkqTGUJAkNYaCJKmZyFDw4jVJ6sdEhoIXr0lSPyYyFCRJ/TAUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpmchQ8DYXktSPiQwFb3MhSf2YyFCQJPXDUJAkNYaCJKk5YugCtLZN7bxyzvZ9F2xb5kokgSMFSdIshoIkqRkrFJI8se9CJEnDG3ek8I4kNyR5eRIvDpCkVWqsUKiqpwMvBE4AbkryoSSn91qZJGnZjT2nUFV3AK8HXg38JnBhkv9I8vy+ipMkLa9x5xSelOStwG3AM4HtVfWL3fZbe6xPkrSMxr1O4W+BdwOvrarvH2isqq8leX0vlWlN8/oFaRjjhsI24PtV9QBAkocAR1XV/1XVB3qrTpK0rMadU7gWOHrW/jFd2yC8dbYk9WPcUDiqqr57YKfbPqafkg7PW2dLUj/GDYXvJdl6YCfJrwDfP8TnJUkTaNw5hVcCH0nyNSDAzwMv6K0qSdIgxgqFqroxyROAU7qm26vqh/2VtfLMtxpGklaThdw6+ynAVPedrUmoqvf3UpUkaRBjhUKSDwC/ANwMPNA1F2AoSNIqMu5IYRo4taqqz2IkScMad/XRLYwmlyVJq9i4I4XjgS8luQH4wYHGqjqzl6okSYMYNxTe1GcRkqSVYdwlqZ9O8njg5Kq6NskxwLp+S5PG5w30pKUx7q2zzwcuB97VNW0EPtZXUZKkYYw70fynwNOA+6A9cOfn+ipKkjSMcUPhB1V1/4GdJEcwuk5BkrSKjBsKn07yWuDo7tnMHwF291eWJGkI44bCTuBe4IvAnwBXMXpesyRpFRl39dGPgX/oXtJgvDGh1K9x7330FeaYQ6iqk5a8IknSYBZy76MDjgL+ADhu6cuRJA1prDmFqvrmrNfdVfU2wKuCJGmVGff00dZZuw9hNHJYyLMYJEkTYNz/sP/NrO0fAfuAP1zyaiRJgxp39dFv9V2IJGl4454+etWh3q+qtzzYQpKcBLwOWF9VZz/Y35MkLdy4F69NAy9jdCO8jcBLga3AI7vXnJJcnOSeJLcc1H5GktuT7E2yE6Cq7qyq8xZzEJKkpTHunMImYGtVfQcgyZuAK6vqRYf53nuBv2PWs5yTrAMuAk4H9gM3Jrmiqr60sNIlSUtt3FB4LHD/rP37u7ZDqqrrkkwd1HwasLeq7gRIcilwFjBWKCTZAewAOPHEE8f5yoJ4xezq4nMWpIUZ9/TR+4EbkrypGyVcD7xvkX/nRuCrs/b3AxuTPCbJO4EnJ3nNfF+uql1VNV1V0xs2bFhkCZKkuYy7+uivknwCeHrX9JKq+txSFlJV32Q0VyFJGsi4IwWAY4D7qurtwP4kmxf5d94NnDBrf1PXJkka2LiP43wj8GrgwGmdI4F/XOTfeSNwcpLNSR4KnANcsZAfSLI9ya6ZmZlFliBJmsu4I4XfA84EvgdQVV/jEEtRD0hyCfAZ4JQk+5OcV1U/Al4BXA3cBlxWVbcupOiq2l1VO9avX7+Qr0mSDmPc1Uf3V1UlKYAkDx/nS1V17jztVzF6UI8kaQUZd6RwWZJ3AY9Ocj5wLT5wR5JWncOOFJIE+DDwBOA+4BTgDVV1Tc+1Haqm7cD2LVu2DFWCJK1Khw2F7rTRVVX1RGCwIJitqnYDu6enp88fuhZJWk3GPX302SRP6bUSSdLgxp1o/lXgRUn2MVqBFEaDiCf1VZgkafkdMhSSnFhV/wU8e5nqkSQN6HCnjz4GUFV3AW+pqrtmv/ovb25evCZJ/ThcKGTW9kl9FrIQXrwmSf04XCjUPNuSpFXocBPNv5TkPkYjhqO7bfjJRPOjeq1OkrSsDhkKVbVuuQqRJA1vIbfOliStchMZCq4+kqR+TGQouPpIkvoxkaEgSeqHoSBJagwFSVJjKEiSGkNBktSMe+vsFcUnr+nBmtp55Zzt+y7YtsyVSCvLRI4UXJIqSf2YyFCQJPXDUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhovXpPG4MVuWismcqTgxWuS1I+JDAVJUj8MBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJaiYyFJJsT7JrZmZm6FIkaVWZyFDwNheS1I+JDAVJUj8MBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKk5oihC1iMJNuB7Vu2bFn0b0ztvHLpCpIOcqh/v/ZdsG0ZK5EWZiJHCt46W5L6MZGhIEnqh6EgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTmiKELOCDJw4F3APcD/1ZVHxy4JElac3odKSS5OMk9SW45qP2MJLcn2ZtkZ9f8fODyqjofOLPPuiRJc+v79NF7gTNmNyRZB1wEPAc4FTg3yanAJuCr3cce6LkuSdIcej19VFXXJZk6qPk0YG9V3QmQ5FLgLGA/o2C4mUOEVZIdwA6AE088cemL1po2tfPKXj9/qO/su2Dbivqd+Sz09/XgLNU/53ENMdG8kZ+MCGAUBhuBjwK/n+Tvgd3zfbmqdlXVdFVNb9iwod9KJWmNWTETzVX1PeAlQ9chSWvZECOFu4ETZu1v6tokSQMbIhRuBE5OsjnJQ4FzgCsW8gNJtifZNTMz00uBkrRW9b0k9RLgM8ApSfYnOa+qfgS8ArgauA24rKpuXcjvVtXuqtqxfv36pS9aktawvlcfnTtP+1XAVX3+3ZKkhfM2F5KkZiJDwTkFSerHRIaCcwqS1I9U1dA1LFqSe4G7hq5jAMcD3xi6iBXIfvlZ9snc1nq/PL6q5rz6d6JDYa1KsqeqpoeuY6WxX36WfTI3+2V+E3n6SJLUD0NBktQYCpNp19AFrFD2y8+yT+Zmv8zDOQVJUuNIQZLUGAqSpMZQWAHmepZ1kuOSXJPkju7PY7v2JLmwe771F5JsnfWdF3efvyPJi4c4lqWU5IQkn0rypSS3Jvnzrn1N902So5LckOTzXb/8Zde+Ocn13fF/uLsLMUke1u3v7d6fmvVbr+nab0/y7GGOaOkkWZfkc0k+3u2v+T5ZsKryNfAL+A1gK3DLrLa/BnZ22zuBN3fbzwU+AQR4KnB9134ccGf357Hd9rFDH9uD7JfHAVu77UcC/8noud5rum+643tEt30kcH13vJcB53Tt7wRe1m2/HHhnt30O8OFu+1Tg88DDgM3Al4F1Qx/fg+ybVwEfAj7e7a/5Plnoy5HCClBV1wHfOqj5LOB93fb7gOfNan9/jfw78OgkjwOeDVxTVd+qqv8FrgHO6L/6/lTV16vqs932dxjdan0ja7xvuuP7brd7ZPcq4JnA5V37wf1yoL8uB347Sbr2S6vqB1X1FWAvo2eoT6Qkm4BtwLu7/bDG+2QxDIWV67FV9fVu+7+Bx3bb8z3jer72VaEb3j+Z0f8Vr/m+6U6T3Azcwyjkvgx8u0bPK4GfPsZ2/N37M8BjWH398jbgL4Afd/uPwT5ZMENhAtRoXLtm1w4neQTwT8Arq+q+2e+t1b6pqgeq6pcZPc72NOAJA5c0qCS/C9xTVTcNXcukMxRWrv/pTn3Q/XlP1z7fM65X5bOvkxzJKBA+WFUf7Zrtm05VfRv4FPBrjE6XHXhw1uxjbMffvb8e+Carq1+eBpyZZB9wKaPTRm9nbffJohgKK9cVwIFVMi8G/nlW+x91K22eCsx0p1KuBp6V5NhuNc6zuraJ1Z3jfQ9wW1W9ZdZba7pvkmxI8uhu+2jgdEbzLZ8Czu4+dnC/HOivs4FPdiOsK4BzupU4m4GTgRuW5yiWVlW9pqo2VdUUo4njT1bVC1nDfbJoQ890+yqAS4CvAz9kdA7zPEbnN/8VuAO4Fjiu+2yAixidQ/4iMD3rd/6Y0cTYXuAlQx/XEvTLrzM6NfQF4Obu9dy13jfAk4DPdf1yC/CGrv0kRv8B2wt8BHhY135Ut7+3e/+kWb/1uq6/bgeeM/SxLVH/PIOfrD6yTxb48jYXkqTG00eSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSmv8HZHML1sOu2X4AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2ndFlrSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD6CAYAAABOIFvoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQW0lEQVR4nO3df5BdZX3H8feXICAUVzCUMgkxwWSimWpruoBTrf0xLSTQEKRWSXXKYIaUqXR0Op0xiNPyjy12Rq206BiV4UctFLHSZIjDD6fIP1QIDEJiGkljKAnUoDiLpQwR/PaPe3K4pLvZs2GfPffsfb9mdvbc5/7Y7z5z7372Oc9zzonMRJIkgCPaLkCSNDgMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNWObLuAV2Pu3Lm5cOHCtsuQpE558MEHf5SZJ41338CEQkS8BfgIMBf4VmZ+YbLnLFy4kC1bthSvTZJmk4h4fKL7iu4+iohrI2JfRGw9qH1FROyIiJ0RsR4gM7dn5qXA+4B3lqxLkjS+0nMK1wEr+hsiYg5wDbASWAasiYhl1X3nAbcDmwvXJUkaR9FQyMx7gWcOaj4D2JmZuzJzP3AzsLp6/MbMXAl8oGRdkqTxtTGnMA94ou/2HuDMiPgt4ALgaA4xUoiIdcA6gAULFpSrUpKG0MBMNGfmPcA9DR63AdgAMDo66ileJWkatXGcwl7g1L7b86s2SVLL2giFB4AlEbEoIo4CLgQ2TuUFImJVRGwYGxsrUqAkDavSS1JvAu4DlkbEnohYm5kvApcBdwDbgVsyc9tUXjczN2XmupGRkekvWpKGWNE5hcxcM0H7Zlpedrpw/e3jtu++6twZrkSSBkcnz33k7iNJKqOToeDuI0kqo5OhIEkqw1CQJNU6GQrOKUhSGZ0MBecUJKmMToaCJKkMQ0GSVDMUJEm1ToaCE82SVEYnQ8GJZkkqo5OhIEkqw1CQJNUMBUlSrZOh4ESzJJXRyVBwolmSyuhkKEiSyjAUJEk1Q0GSVDMUJEk1Q0GSVOtkKLgkVZLK6GQouCRVksroZChIksowFCRJNUNBklQzFCRJNUNBklQzFCRJtU6GgscpSFIZnQwFj1OQpDI6GQqSpDIMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSrZOh4LmPJKmMToaC5z6SpDI6GQqSpDIMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNUMBUlSzVCQJNWObLuAfhFxPnAu8DrgK5l5Z8slSdJQKT5SiIhrI2JfRGw9qH1FROyIiJ0RsR4gM2/LzEuAS4H3l65NkvRKM7H76DpgRX9DRMwBrgFWAsuANRGxrO8hn6julyTNoOKhkJn3As8c1HwGsDMzd2XmfuBmYHX0fAr4ZmY+NN7rRcS6iNgSEVuefvrpssVL0pBpa6J5HvBE3+09VdufAb8LvDciLh3viZm5ITNHM3P0pJNOKl+pJA2RgZpozsyrgavbrkOShlVbI4W9wKl9t+dXbZKkFrUVCg8ASyJiUUQcBVwIbGz65IhYFREbxsbGihUoScNoJpak3gTcByyNiD0RsTYzXwQuA+4AtgO3ZOa2pq+ZmZsyc93IyEiZoiVpSBWfU8jMNRO0bwY2l/75kqTmOnmaC3cfSVIZnQwFdx9JUhmdDAVJUhmGgiSp1slQcE5BksroZCg4pyBJZTQKhYh4a+lCJEntazpS+HxE3B8RfxoR/nsuSbNUo1DIzN8APkDvfEUPRsQ/RcTvFa3sEJxTkKQyGs8pZOZj9C5+8zHgN4GrI+I/IuKCUsUdohbnFCSpgKZzCm+LiM/SO0/R7wCrMvMt1fZnC9YnSZpBTc999PfAl4GPZ+bzBxoz88mI+ESRyiRJM65pKJwLPJ+ZLwFExBHAMZn5v5l5Y7HqNLQWrr993PbdV507w5VIw6VpKNxN7zKZ/1PdPha4E/j1EkVpeEz0x7+tn2voaNg1nWg+JjMPBALV9rFlSpqcq48kqYymI4XnImJ5Zj4EEBG/Bjw/yXOKycxNwKbR0dFL2qpB3dDWSETqqqah8FHgaxHxJBDALwHvL1aVJKkVjUIhMx+IiDcDS6umHZn5s3JlSZLaMJXLcZ4OLKyeszwiyMwbilQlSWpFo1CIiBuBNwEPAy9VzQkYCpI0izQdKYwCyzIzSxbTVESsAlYtXry47VIkaVZpGgpb6U0uP1WwlsZcfTS8PL5AKqtpKMwFvhcR9wMvHGjMzPOKVCVJakXTULiyZBHqFv9bl2avpktSvx0RbwSWZObdEXEsMKdsaVJzHqQmTY+mp86+BLgV+GLVNA+4rVRRkqR2ND330YeBdwLPQn3BnV8sVZQkqR1NQ+GFzNx/4EZEHEnvOAVJ0izSNBS+HREfB15bXZv5a8CmcmVJktrQNBTWA08DjwJ/Amymd73mVnjqbEkqo+nqo58DX6q+WufBa4PJFUBS9zU999EPGGcOITNPm/aKJEmtmcq5jw44BvhD4MTpL0eDxP/8peHTaE4hM3/c97U3M/8O8PBVSZplmu4+Wt538wh6I4epXItBktQBTf+wf7pv+0VgN/C+aa9GktSqpquPfrt0IZKk9jXdffTnh7o/Mz8zPeVIkto0ldVHpwMbq9urgPuBx0oUJbXF04Jr2DUNhfnA8sz8KUBEXAncnpkfLFWYJGnmNT3NxcnA/r7b+6s2SdIs0nSkcANwf0R8o7p9PnB9mZImFxGrgFWLFy9uqwQNGXcraVg0PXjtk8DFwE+qr4sz869LFjZJPZsyc93IyEhbJUjSrNR09xHAscCzmfk5YE9ELCpUkySpJU0vx/lXwMeAy6um1wD/WKooSVI7mo4U3gOcBzwHkJlPAseXKkqS1I6mobA/M5Pq9NkRcVy5kiRJbWm6+uiWiPgi8PqIuAT4EANywR1pEB3OacddyaRBMGkoREQA/wy8GXgWWAr8ZWbeVbg2SdIMmzQUMjMjYnNmvhUwCCRpFms6p/BQRJxetBJJUuuazimcCXwwInbTW4EU9AYRbytVmGaOl90cDB41rUFwyFCIiAWZ+V/A2TNUjySpRZONFG6jd3bUxyPi65n5BzNRlNQVMzHKcgShmTTZnEL0bZ9WshBJUvsmC4WcYFuSNAtNtvvoVyLiWXojhtdW2/DyRPPrilYnSZpRhwyFzJwzU4VExGnAFcBIZr53pn6uJOllTZekHpaIuBb4fWBfZv5yX/sK4HPAHODLmXlVZu4C1kbErSVrGmYuPZU0malcT+FwXAes6G+IiDnANcBKYBmwJiKWFa5DktRA0VDIzHuBZw5qPgPYmZm7MnM/cDOwumQdkqRmiu4+msA84Im+23uAMyPiDcAngbdHxOWZ+TfjPTki1gHrABYsWFC6VmlgTdfxCx4HoX5thMK4MvPHwKUNHrcB2AAwOjrqMllJmkal5xTGsxc4te/2/KpNktSyNkYKDwBLImIRvTC4EPijqbxARKwCVi1evLhAed3haiKNx/eFXo2iI4WIuAm4D1gaEXsiYm1mvghcBtwBbAduycxtU3ndzNyUmetGRkamv2hJGmJFRwqZuWaC9s3A5pI/W5I0dW3MKUiSBlQnQyEiVkXEhrGxsbZLkaRZpZOh4JyCJJXRyVCQJJVhKEiSap0MBecUJKmMToaCcwqSVEYnQ0GSVIahIEmqDcxZUqei6+c+8lTFkgZVJ0cKzilIUhmdDAVJUhmGgiSpZihIkmqGgiSp5uqjDvBKWpJmSidHCq4+kqQyOhkKkqQyDAVJUs1QkCTVDAVJUs3VR5KmxHN3zW6dHCm4+kiSyuhkKEiSyjAUJEk1Q0GSVDMUJEk1Q0GSVDMUJEk1Q0GSVPPgtQHiKbI1G3mwW7d0cqTgwWuSVEYnQ0GSVIahIEmqGQqSpJqhIEmqGQqSpJqhIEmqGQqSpJqhIEmqGQqSpJqhIEmqee6jgjyXkbqs9Pv3UK/veZHa08mRguc+kqQyOhkKkqQyDAVJUs1QkCTVDAVJUs1QkCTVDAVJUs1QkCTVDAVJUs1QkCTVDAVJUs1QkCTVDAVJUs1QkCTVDAVJUm1grqcQEccBnwf2A/dk5ldbLkmShk7RkUJEXBsR+yJi60HtKyJiR0TsjIj1VfMFwK2ZeQlwXsm6JEnjK7376DpgRX9DRMwBrgFWAsuANRGxDJgPPFE97KXCdUmSxlE0FDLzXuCZg5rPAHZm5q7M3A/cDKwG9tALhuJ1SZLG18acwjxeHhFALwzOBK4G/iEizgU2TfTkiFgHrANYsGBBwTJfyevJSoc2ndd0nuprTddncLp+h+n8mzBRTaX+7gzMRHNmPgdc3OBxG4ANAKOjo1m6LkkaJm3sptkLnNp3e37VJklqWRuh8ACwJCIWRcRRwIXAxqm8QESsiogNY2NjRQqUpGFVeknqTcB9wNKI2BMRazPzReAy4A5gO3BLZm6byutm5qbMXDcyMjL9RUvSECs6p5CZayZo3wxsLvmzJUlT18mln+4+kqQyOhkK7j6SpDI6GQqSpDIMBUlSLTK7e/xXRDwNPH6YT58L/Ggay5mt7Kdm7KfJ2UfNzEQ/vTEzTxrvjk6HwqsREVsyc7TtOgad/dSM/TQ5+6iZtvvJ3UeSpJqhIEmqDXMobGi7gI6wn5qxnyZnHzXTaj8N7ZyCJOn/G+aRgiTpIEMZChNcI3ooRcTuiHg0Ih6OiC1V24kRcVdEPFZ9P6Fqj4i4uuq3RyJiebvVlzPe9cUPp18i4qLq8Y9FxEVt/C4lTdBPV0bE3uo99XBEnNN33+VVP+2IiLP72mftZzIiTo2If4uI70XEtoj4SNU+mO+nzByqL2AO8J/AacBRwHeBZW3X1WJ/7AbmHtT2t8D6ans98Klq+xzgm0AA7wC+03b9Bfvl3cByYOvh9gtwIrCr+n5CtX1C27/bDPTTlcBfjPPYZdXn7WhgUfU5nDPbP5PAKcDyavt44PtVXwzk+2kYRwoTXSNaL1sNXF9tXw+c39d+Q/b8O/D6iDiljQJLy/GvLz7VfjkbuCszn8nMnwB3ASvKVz9zJuiniawGbs7MFzLzB8BOep/HWf2ZzMynMvOhavun9C4ZMI8BfT8NYyiMd43oeS3VMggSuDMiHqyufw1wcmY+VW3/N3BytT3sfTfVfhnm/rqs2vVx7YHdIthPRMRC4O3AdxjQ99MwhoJe6V2ZuRxYCXw4It7df2f2xq0uUTuI/XJIXwDeBPwq8BTw6XbLGQwR8QvA14GPZuaz/fcN0vtpGEPBa0T3ycy91fd9wDfoDeV/eGC3UPV9X/XwYe+7qfbLUPZXZv4wM1/KzJ8DX6L3noIh7qeIeA29QPhqZv5L1TyQ76dhDIVXfY3o2SIijouI4w9sA2cBW+n1x4GVDRcB/1ptbwT+uFod8Q5grG/4Owym2i93AGdFxAnVLpSzqrZZ7aB5pvfQe09Br58ujIijI2IRsAS4n1n+mYyIAL4CbM/Mz/TdNZjvp7Zn5tv4oje7/316Kx6uaLueFvvhNHorPb4LbDvQF8AbgG8BjwF3AydW7QFcU/Xbo8Bo279Dwb65id6uj5/R23e79nD6BfgQvQnVncDFbf9eM9RPN1b98Ai9P3Cn9D3+iqqfdgAr+9pn7WcSeBe9XUOPAA9XX+cM6vvJI5olSbVh3H0kSZqAoSBJqhkKkqSaoSBJqhkKkqSaoSBJqhkKkqSaoSBJqv0fVcOrSnn8OgMAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "LowQualFinSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAD4CAYAAAD7CAEUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARF0lEQVR4nO3dbZBeZX3H8e9PEHmoLirUOgQa6GbAzIhII+qordraBumCVatkdHQsQ0orHZ12poJ1rH3RGX1RUTr4kCpSrQMiKiUSi+ATbxghKCoYKRGxBLWJT6GljhH898V9Amuazd5nsyfnPrvfz8w9Oee6H/Z/7d7Z317Xde5zUlVIkjSuR/VdgCRpWAwOSVIrBockqRWDQ5LUisEhSWrl4L4L2B9HHXVUrVy5su8yJGlQbr311h9W1dELff4ggyPJDDAzPT3N5s2b+y5HkgYlyXf35/mDnKqqqo1VtX5qaqrvUiRp2RlkcEiS+mNwSJJaMTgkSa0YHJKkVgwOSVIrBockqRWDQ5LUyiA/ALgYVl5w7V7b73n7GQe4EkkaFkcckqRWDA5JUisGhySpFYNDktSKwSFJasXgkCS1MjHBkeQpSd6X5Kokf953PZKkves0OJJcmmR7ktv3aF+b5M4kW5NcAFBVW6rqPOAVwHO6rEuStHBdjzguA9bObkhyEHAJcDqwGliXZHVz35nAtcCmjuuSJC1Qp8FRVTcCP96j+TRga1XdXVW7gCuAs5rHX1NVpwOvmus1k6xPsjnJ5h07dnRVuiRpDn2ccuQY4N5Z+9uAZyZ5PvBS4DHsY8RRVRuADQBr1qyp7sqUJO3NxJyrqqq+CHyx5zIkSfPo46iq+4BjZ+2vaNrGlmQmyYadO3cuamGSpPn1ERy3AKuSHJ/kEOBs4Jo2L1BVG6tq/dTUVCcFSpLm1vXhuJcDNwEnJtmW5JyqehA4H7gO2AJcWVV3dFmHJGnxdLrGUVXr5mjfxH4ccptkBpiZnp5e6EtIkhZoYj453oZTVZLUn0EGhySpPwaHJKmVQQaHh+NKUn8GGRyucUhSfwYZHJKk/hgckqRWBhkcrnFIUn8GGRyucUhSfwYZHJKk/hgckqRWDA5JUiuDDA4XxyWpP4MMDhfHJak/gwwOSVJ/DA5JUisGhySpFYNDktTKIIPDo6okqT+DDA6PqpKk/gwyOCRJ/TE4JEmtGBySpFYMDklSKwaHJKkVg0OS1Mogg8PPcUhSfwYZHH6OQ5L6M8jgkCT1x+CQJLVicEiSWjE4JEmtGBySpFYMDklSKwaHJKkVg0OS1IrBIUlqZZDB4SlHJKk/gwwOTzkiSf0ZZHBIkvpjcEiSWjE4JEmtGBySpFYMDklSKwaHJKkVg0OS1IrBIUlqxeCQJLVicEiSWjE4JEmtGBySpFYMDklSKwf3XcBsSV4CnAE8DvhgVX2255IkSXvofMSR5NIk25Pcvkf72iR3Jtma5AKAqrq6qs4FzgNe2XVtkqT2DsRU1WXA2tkNSQ4CLgFOB1YD65KsnvWQtzT3S5ImTOfBUVU3Aj/eo/k0YGtV3V1Vu4ArgLMy8g7gM1X1la5rkyS119fi+DHAvbP2tzVtfwn8PvDyJOft7YlJ1ifZnGTzjh07uq9UkvQrJmpxvKouBi6e5zEbgA0Aa9asqQNRlyTpEX2NOO4Djp21v6JpkyRNuLGCI8lTF/nr3gKsSnJ8kkOAs4Frxn1ykpkkG3bu3LnIZUmS5jPuiOM9SW5O8hdJptp8gSSXAzcBJybZluScqnoQOB+4DtgCXFlVd4z7mlW1sarWT021KkWStAjGWuOoquclWQX8KXBrkpuBD1XV9WM8d90c7ZuATW2KlST1b+w1jqq6i9HnK94E/C5wcZJvJXlpV8XNxakqSerPuGscJye5iNG00guBmap6SrN9UYf17ZVTVZLUn3EPx/0n4APAm6vqZ7sbq+p7Sd7SSWWSpIk0bnCcAfysqh4CSPIo4NCq+t+q+khn1c0hyQwwMz09faC/tCQte+OucdwAHDZr//CmrRdOVUlSf8YNjkOr6n927zTbh3dTkiRpko0bHA8kOXX3TpLfBn62j8dLkpaocdc43gh8PMn3gAC/gdfLkKRladwPAN6S5CTgxKbpzqr6RXdl7ZuL45LUnzYnOXwGcDJwKqMLL72mm5Lm5+K4JPVnrBFHko8AvwXcBjzUNBfw4Y7qkiRNqHHXONYAq6vK619I0jI37lTV7YwWxCVJy9y4I46jgG82Z8X9+e7Gqjqzk6rm4eK4JPVn3OB4W5dFtFVVG4GNa9asObfvWiRpuRn3cNwvJflNYFVV3ZDkcOCgbkuTJE2icU+rfi5wFfD+pukY4OquipIkTa5xF8dfDzwHuB8evqjTr3dVlCRpco0bHD+vql27d5IczOhzHJKkZWbc4PhSkjcDhyV5EfBxYGN3Ze2bl46VpP6MGxwXADuAbwB/BmxidP3xXnjKEUnqz7hHVf0S+OfmJklaxsY9V9V32MuaRlWdsOgVSZImWptzVe12KPAnwBMWvxxJ0qQba42jqn4063ZfVb0LOKPj2iRJE2jcqapTZ+0+itEIZNzRiiRpCRn3l/8/ztp+ELgHeMWiVzMmT3IoSf0Z96iqF3RdSBue5FCS+jPuVNVf7ev+qnrn4pQjSZp0bY6qegZwTbM/A9wM3NVFUZKkyTVucKwATq2q/wZI8jbg2qp6dVeFSZIm07inHHkSsGvW/q6mTZK0zIw74vgwcHOSTzX7LwH+pZuSJEmTbNyjqv4hyWeA5zVNr6uqr3ZXliRpUo07VQVwOHB/Vb0b2Jbk+I5qkiRNsHEvHft3wJuAC5umRwP/2lVRkqTJNe6I44+BM4EHAKrqe8BjuypKkjS5xg2OXVVVNKdWT3JEdyXNzysASlJ/xg2OK5O8HzgyybnADfR4USevAChJ/Zn3qKokAT4GnATcD5wIvLWqru+4NknSBJo3OKqqkmyqqqcChoUkLXPjTlV9JckzOq1EkjQI435y/JnAq5Pcw+jIqjAajJzcVWGSpMm0z+BIclxV/SfwhweoHknShJtvxHE1o7PifjfJJ6rqZQeiKEnS5JpvjSOztk/oshBJ0jDMFxw1x7YkaZmab6rqaUnuZzTyOKzZhkcWxx/XaXWSpImzz+CoqoMOVCGSpGFoc1p1SZIMDklSOwaHJKkVg0OS1MrEBEeSE5J8MMlVfdciSZpbp8GR5NIk25Pcvkf72iR3Jtma5AKAqrq7qs7psh5J0v7resRxGbB2dkOSg4BLgNOB1cC6JKs7rkOStEg6DY6quhH48R7NpwFbmxHGLuAK4KxxXzPJ+iSbk2zesWPHIlYrSRpHH2scxwD3ztrfBhyT5IlJ3gc8PcmFcz25qjZU1ZqqWnP00Ud3XaskaQ/jXo+jc1X1I+C8vuuQJO1bHyOO+4BjZ+2vaNrGlmQmyYadO3cuamGSpPn1ERy3AKuSHJ/kEOBs4Jo2L1BVG6tq/dTUVCcFSpLm1vXhuJcDNwEnJtmW5JyqehA4H7gO2AJcWVV3dFmHJGnxdLrGUVXr5mjfBGxa6OsmmQFmpqenF/oSkqQFmphPjrfhVJUk9WeQwSFJ6o/BIUlqZZDB4eG4ktSfQQaHaxyS1J9BBockqT8GhySplUEGh2scktSfQQaHaxyS1J9BBockqT8GhySpFYNDktTKIIPDxXFJ6s8gg8PFcUnqzyCDQ5LUH4NDktSKwSFJasXgkCS1Msjg8KgqSerPIIPDo6okqT+DDA5JUn8MDklSKwaHJKkVg0OS1IrBIUlqxeCQJLVycN8FLESSGWBmenq671IW1coLrt1r+z1vP+MAVyLNzfepBjni8HMcktSfQQaHJKk/BockqRWDQ5LUisEhSWrF4JAktWJwSJJaMTgkSa0YHJKkVgwOSVIrnnJES5qnx3jEcvxeLFafl+P3bl8GOeLwlCOS1J9BBockqT8GhySpFYNDktSKwSFJasXgkCS1YnBIkloxOCRJrRgckqRWDA5JUisGhySpFYNDktSKwSFJasXgkCS1YnBIklqZmOtxJDkCeA+wC/hiVX2055IkSXvR6YgjyaVJtie5fY/2tUnuTLI1yQVN80uBq6rqXODMLuuSJC1c11NVlwFrZzckOQi4BDgdWA2sS7IaWAHc2zzsoY7rkiQtUKdTVVV1Y5KVezSfBmytqrsBklwBnAVsYxQet7GPQEuyHlgPcNxxxy1+0Yto0i432baeuR4/l8Xs16R975aytj/nxXr9Pn+WXV9Sdi4L6fMkfv/6WBw/hkdGFjAKjGOATwIvS/JeYONcT66qDVW1pqrWHH300d1WKkn6fyZmcbyqHgBe13cdkqR962PEcR9w7Kz9FU3b2JLMJNmwc+fORS1MkjS/PoLjFmBVkuOTHAKcDVzT5gWqamNVrZ+amuqkQEnS3Lo+HPdy4CbgxCTbkpxTVQ8C5wPXAVuAK6vqji7rkCQtnq6Pqlo3R/smYNNCXzfJDDAzPT290JeQJC3QIE854lSVJPVnkMEhSeqPwSFJaiVV1XcNre1e4wBeCdy1wJc5CvjhohU1GZZin2Bp9ss+DcNS7BPAiVX12IU+eZDBsRiSbK6qNX3XsZiWYp9gafbLPg3DUuwT7H+/nKqSJLVicEiSWlnOwbGh7wI6sBT7BEuzX/ZpGJZin2A/+7Vs1zgkSQuznEcckqQFMDgkSa0sy+CY45rnE29v13BP8oQk1ye5q/n38U17klzc9PHrSU7tr/K5JTk2yReSfDPJHUne0LQPtl9JDk1yc5KvNX36+6b9+CRfbmr/WHN2aJI8ptnf2ty/ss/69yXJQUm+muTTzf5S6NM9Sb6R5LYkm5u2wb7/AJIcmeSqJN9KsiXJsxezT8suOPZxzfMhuIw9ruEOXAB8rqpWAZ9r9mHUv1XNbT3w3gNUY1sPAn9dVauBZwGvb34eQ+7Xz4EXVtXTgFOAtUmeBbwDuKiqpoGfAOc0jz8H+EnTflHzuEn1BkZntd5tKfQJ4AVVdcqszzYM+f0H8G7g36vqJOBpjH5mi9enqlpWN+DZwHWz9i8ELuy7rhb1rwRun7V/J/DkZvvJwJ3N9vuBdXt73CTfgH8DXrRU+gUcDnwFeCajTyAf3LQ//D5kdImBZzfbBzePS9+176UvK5pfOC8EPg1k6H1q6rsHOGqPtsG+/4Ap4Dt7fr8Xs0/LbsTB3Nc8H6onVdX3m+0fAE9qtgfXz2Y64+nAlxl4v5opnduA7cD1wLeBn9boejTwq3U/3Kfm/p3AEw9sxWN5F/A3wC+b/Scy/D4BFPDZJLcmWd+0Dfn9dzywA/hQM634gSRHsIh9Wo7BsWTV6M+FQR5fneTXgE8Ab6yq+2ffN8R+VdVDVXUKo7/STwNO6rmk/ZLkj4DtVXVr37V04LlVdSqjKZvXJ/md2XcO8P13MHAq8N6qejrwAI9MSwH736flGBz7fc3zCfNfSZ4M0Py7vWkfTD+TPJpRaHy0qj7ZNA++XwBV9VPgC4ymcY5MsvviabPrfrhPzf1TwI8OcKnzeQ5wZpJ7gCsYTVe9m2H3CYCquq/5dzvwKUZBP+T33zZgW1V9udm/ilGQLFqflmNw7Pc1zyfMNcBrm+3XMloj2N3+muaIiWcBO2cNUydGkgAfBLZU1Ttn3TXYfiU5OsmRzfZhjNZstjAKkJc3D9uzT7v7+nLg881fhBOjqi6sqhVVtZLR/5nPV9WrGHCfAJIckeSxu7eBPwBuZ8Dvv6r6AXBvkhObpt8Dvsli9qnvhZyeFo9eDPwHo3nnv+27nhZ1Xw58H/gFo78qzmE0b/w5RqeXvwF4QvPYMDp67NvAN4A1fdc/R5+ey2jI/HXgtub24iH3CzgZ+GrTp9uBtzbtJwA3A1uBjwOPadoPbfa3Nvef0Hcf5unf84FPL4U+NfV/rbndsfv3wZDff02dpwCbm/fg1cDjF7NPnnJEktTKcpyqkiTtB4NDktSKwSFJasXgkCS1YnBIkloxOCRJrRgckqRW/g/oUuiDEJF4gAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GrLivArea\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAP0UlEQVR4nO3df4xlZX3H8ffHxYqoXdFFS4F1oEswJNK6HVCjtrapim4XrZLKWlODhK21NjX+URc1av8wpU1qLC0q24pWa0H8WVYwCGr1HyMsiMoPt2xxrYu2iyYu1hpR/PaPe/Zx2M7s3hn3zJl75/1Kbuac5/6Y7wM3+5nnOc85J1WFJEkADxm6AEnSymEoSJIaQ0GS1BgKkqTGUJAkNUcNXcDPY926dTUzMzN0GZI0UW6++ebvVNVx8z030aEwMzPDzp07hy5DkiZKkm8s9JzTR5KkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1Ez0yWua38y2a+Zt33PxpmWuRNKkMRRWkcWGheEirT5OH0mSGkNBktQ4fTTBFprekaSlMhRkuEhqnD6SJDUraqSQ5IXAJuAXgXdX1acGLmlF8C95Scul95FCksuT7Ety20HtZyfZlWR3km0AVfXxqroQeCXwkr5rkyQ92HKMFN4L/D3wvgMNSdYAlwLPBvYCNyW5uqru6F7yxu75VcURgaSh9R4KVfX5JDMHNZ8F7K6quwGSXAm8IMmdwMXAJ6vqlr5r09J4Ups0vYY60HwC8M05+3u7tj8Ffgc4N8kr53tjkq1JdibZee+99/ZfqSStIivqQHNVXQJccpjXbAe2A8zOztZy1CVJq8VQI4V7gJPm7J/YtUmSBjTUSOEm4NQkJzMKg/OAlw5Uy7LzgLKklWo5lqReAXwBOC3J3iQXVNVPgFcD1wF3AldV1e2L+MzNSbbv37+/n6IlaZVajtVHWxZovxa4domfuQPYMTs7e+HPU5sk6cG8zIUkqTEUJEmNoSBJalbUeQrjSrIZ2Lxhw4ahSzkkVxlJmjQTOVKoqh1VtXXt2rVDlyJJU2UiQ0GS1A9DQZLUGAqSpGYiQ8EzmiWpHxMZCh5olqR+TGQoSJL6MZHnKWhlWsp5Gd6tTVpZHClIkhpDQZLUTGQouPpIkvoxkaHg6iNJ6sdEhoIkqR+GgiSpMRQkSY2hIElqDAVJUmMoSJKaiQwFz1OQpH5MZCh4noIk9WMiQ0GS1A+vkqpBLXRlVa+eKg3DkYIkqTEUJEmNoSBJagwFSVJjKEiSmokMBU9ek6R+TGQoePKaJPVjIkNBktQPQ0GS1BgKkqTGUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUTGQoeO0jSerHRN6Os6p2ADtmZ2cvHLoW9cPbdErDmMiRgiSpH4aCJKmZyOkjrV5OK0n9cqQgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqfE8BU0Fz1+QjgxHCpKkxlCQJDWGgiSpGSsUkjyp70IWw/spSFI/xh0pvCPJjUlelWRtrxWNoap2VNXWtWsHL0WSpspYq4+q6plJTgVeAdyc5EbgPVV1fa/VST1xtZI0v7GPKVTVXcAbgdcBvwlckuRrSV7UV3GSpOU11kghyRnA+cAm4Hpgc1XdkuSXgS8AH+2vRGnpFhoRSJrfuCev/R3wj8Drq+qHBxqr6ltJ3thLZZKkZTduKGwCflhVDwAkeQhwdFX9b1W9v7fqJEnLatxjCjcAD5+zf0zXJkmaIuOGwtFV9T8HdrrtY/opSZI0lHFD4QdJNh7YSfLrwA8P8XpJ0gQa95jCa4APJfkWEOCXgJf0VpUkaRDjnrx2U5InAqd1Tbuq6sf9lSVJGsJi7qdwJjDTvWdjEqrqfb1UNWFcCy9pWox78tr7gV8BbgUe6JoLMBQkaYqMO1KYBU6vquqzGEnSsMZdfXQbo4PLkqQpNu5IYR1wR3d11B8daKyqc3qpSpI0iHFD4S19FiFJWhnGXZL6uSRPAE6tqhuSHAOs6bc0SdJyG/d2nBcCHwYu65pOAD7eV1GSpGGMe6D5T4CnA/dBu+HO4/oqSpI0jHFD4UdVdf+BnSRHMTpPQZI0RcYNhc8leT3w8CTPBj4E7OivLEnSEMYNhW3AvcBXgT8CrmV0v2ZJ0hQZd/XRT4F/6B6SpCk17rWPvs48xxCq6pQjVUiSU4A3AGur6twj9bmSpPGNO300y+gqqWcCzwQuAf75cG9KcnmSfUluO6j97CS7kuxOsg2gqu6uqgsWV74k6UgaKxSq6rtzHvdU1duBTWO89b3A2XMbkqwBLgWeB5wObEly+uLKliT1Ydzpo41zdh/CaORw2PdW1eeTzBzUfBawu6ru7j77SuAFwB1j1rIV2Aqwfv36cd4iSRrTuNc++ps52z8B9gC/v8TfeQLwzTn7e4GnJHks8FbgyUkuqqq/nO/NVbUd2A4wOzvruRKSdASNu/rot/oupKq+C7yy798jSVrYuNNHrz3U81X1tkX8znuAk+bsn9i1SZIGtpg7r50JXN3tbwZuBO5awu+8CTg1ycmMwuA84KVL+BxJ0hE2biicCGysqu8DJHkLcE1VvexQb0pyBfAsYF2SvcCbq+rdSV4NXMfo8tuXV9Xtiyk6yWZg84YNGxbzNmnJZrZdM2/7novHWYQnTY5xQ+HxwP1z9u/v2g6pqrYs0H4to0tlLElV7QB2zM7OXrjUz5Ak/X/jhsL7gBuTfKzbfyHwT/2UJEkayrirj96a5JOMzmYGOL+qvtRfWZKkIYw7UgA4Brivqt6T5LgkJ1fV1/sq7FCGOKaw0Jyypov/n7XajXs7zjcDrwMu6poeyhjXPupLVe2oqq1r164dqgRJmkrjXhDv94BzgB8AVNW3gEf1VZQkaRjjhsL9VVV0l89O8oj+SpIkDWXcULgqyWXAo5NcCNyAN9yRpKlz2APNSQJ8EHgicB9wGvCmqrq+59okSctsnMtfV5Jrq+pJwIoIAs9olqR+jDt9dEuSM3utZBFcfSRJ/Rj3PIWnAC9LsofRCqQwGkSc0VdhkqTld8hQSLK+qv4TeO4y1SNJGtDhRgofZ3R11G8k+UhVvXg5ipIkDeNwxxQyZ/uUPguRJA3vcKFQC2wPKsnmJNv3798/dCmSNFUOFwq/muS+JN8Hzui270vy/ST3LUeB83H1kST145DHFKpqzXIVIkka3rjnKUiSVgFDQZLUGAqSpMZQkCQ1ExkKLkmVpH5MZCi4JFWS+jGRoSBJ6oehIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNYaCJKk53O04V6Qkm4HNGzZsGLoU6YiZ2XbNvO17Lt60zJVoNZvIkYJnNEtSPyYyFCRJ/TAUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSp8TIX0s9hKZemWOg90kowkSMFL3MhSf2YyFCQJPXDUJAkNYaCJKkxFCRJjaEgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSmlV7kx1vdKI+Lcf3ayk3+JEOZyJHCt5kR5L6MZGhIEnqh6EgSWoMBUlSYyhIkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJagwFSVJjKEiSGkNBktQYCpKkxlCQJDWGgiSpMRQkSY2hIElqDAVJUmMoSJIaQ0GS1BgKkqTGUJAkNUcNXcABSR4BvAO4H/i3qvrAwCVJ0qrT60ghyeVJ9iW57aD2s5PsSrI7ybau+UXAh6vqQuCcPuuSJM2v7+mj9wJnz21Isga4FHgecDqwJcnpwInAN7uXPdBzXZKkefQ6fVRVn08yc1DzWcDuqrobIMmVwAuAvYyC4VYOEVZJtgJbAdavX3/ki5ZWmJlt1/T6+j0Xb1rU64+UQ9W5UE2T0rcjaaE+99W3IQ40n8DPRgQwCoMTgI8CL07yTmDHQm+uqu1VNVtVs8cdd1y/lUrSKrNiDjRX1Q+A84euQ5JWsyFGCvcAJ83ZP7FrkyQNbIhQuAk4NcnJSX4BOA+4ejEfkGRzku379+/vpUBJWq36XpJ6BfAF4LQke5NcUFU/AV4NXAfcCVxVVbcv5nOrakdVbV27du2RL1qSVrG+Vx9tWaD9WuDaPn+3JGnxvMyFJKkxFCRJzUSGggeaJakfqaqha1iyJPcC3xi6jiNsHfCdoYtYBquhn/ZxekxbP59QVfOe/TvRoTCNkuysqtmh6+jbauinfZweq6WfMKHTR5KkfhgKkqTGUFh5tg9dwDJZDf20j9NjtfTTYwqSpJ9xpCBJagwFSVJjKCyD+e5VneQxSa5Pclf389iuPUku6e5f/ZUkG+e85+Xd6+9K8vIh+rKQJCcl+WySO5LcnuTPuvap6WeSo5PcmOTLXR//oms/OckXu758sLv6L0ke1u3v7p6fmfNZF3Xtu5I8d5geLSzJmiRfSvKJbn8a+7gnyVeT3JpkZ9c2Nd/XJasqHz0/gN8ANgK3zWn7a2Bbt70N+Ktu+/nAJ4EATwW+2LU/Bri7+3lst33s0H2b05/jgY3d9qOAf2d0D+6p6WdX6yO77YcCX+xqvwo4r2t/F/DH3fargHd12+cBH+y2Twe+DDwMOBn4D2DN0P07qK+vBf4F+ES3P4193AOsO6htar6vS/7vMnQBq+UBzBwUCruA47vt44Fd3fZlwJaDXwdsAS6b0/6g1620B/CvwLOntZ/AMcAtwFMYnel6VNf+NOC6bvs64Gnd9lHd6wJcBFw057Pa61bCg9GNrz4N/Dbwia7mqepjV9N8oTCV39fFPJw+Gs7jq+rb3fZ/AY/vthe6h/VC7StON4XwZEZ/SU9VP7tplVuBfcD1jP4C/l6N7hMCD6639aV7fj/wWFZ4H4G3A38O/LTbfyzT10eAAj6V5OYkW7u2qfq+LsWKuUfzalZVlWQq1gYneSTwEeA1VXVfkvbcNPSzqh4Afi3Jo4GPAU8cuKQjKsnvAvuq6uYkzxq6np49o6ruSfI44PokX5v75DR8X5fCkcJw/jvJ8QDdz31d+0L3sF7x97ZO8lBGgfCBqvpo1zx1/QSoqu8Bn2U0lfLoJAf+wJpbb+tL9/xa4Lus7D4+HTgnyR7gSkZTSH/LdPURgKq6p/u5j1HAn8WUfl8Xw1AYztXAgZUKL2c0B3+g/Q+71Q5PBfZ3w9nrgOckObZbEfGcrm1FyGhI8G7gzqp625ynpqafSY7rRggkeTijYyZ3MgqHc7uXHdzHA30/F/hMjSaerwbO61bunAycCty4PL04tKq6qKpOrKoZRgeOP1NVf8AU9REgySOSPOrANqPv2W1M0fd1yYY+qLEaHsAVwLeBHzOac7yA0bzrp4G7gBuAx3SvDXApo7nqrwKzcz7nFcDu7nH+0P06qI/PYDRH+xXg1u7x/GnqJ3AG8KWuj7cBb+raT2H0D95u4EPAw7r2o7v93d3zp8z5rDd0fd8FPG/ovi3Q32fxs9VHU9XHrj9f7h63A2/o2qfm+7rUh5e5kCQ1Th9JkhpDQZLUGAqSpMZQkCQ1hoIkqTEUJEmNoSBJav4Pga5BbGI8hLIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BsmtFullBath\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD5CAYAAADItClGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQFUlEQVR4nO3dbYxmZX3H8e/PBQR8WFPXVgOsC4Wg26oVR7QxbemD6SJZsGqVjbaVULZoaWv6xtUYtU3a2BfVFovBrRDUWhCxJbuyhmqq8sYKC9EKInVLsSyasEK7+EDcQv99Mfcex+nMzhmYa86cme8nuZNzrvvMPf+Ls9y/Oec651ypKiRJAnjC0AVIklYOQ0GS1DEUJEkdQ0GS1DEUJEkdQ0GS1Dlq6AIejw0bNtSmTZuGLkOSRuXWW2/9TlU9Y673Rh0KmzZtYu/evUOXIUmjkuSb8723Yk4fJXluksuTXJfkTUPXI0lrUdNQSHJlkvuT3D6rfUuSu5LsS7IDoKrurKqLgdcCL2tZlyRpbq2PFK4CtsxsSLIOuAw4G9gMbEuyefLeucANwJ7GdUmS5tA0FKrqJuDBWc1nAvuq6u6qOgRcA5w32X5XVZ0NvL5lXZKkuQ0x0HwCcO+M9f3AS5KcBbwKeCJHOFJIsh3YDrBx48Z2VUrSGrRirj6qqs8Dn++x3U5gJ8DU1JSPeJWkJTTE1Uf3ASfNWD9x0iZJGtgQoXALcFqSk5McA5wP7FrMByTZmmTnwYMHmxQoSWtV09NHSa4GzgI2JNkPvKuqrkhyCXAjsA64sqruWMznVtVuYPfU1NRFj7W2TTtumLP9nvec81g/UpJGr2koVNW2edr34GWnkrTirJg7mhfD00eS1MYoQ6GqdlfV9vXr1w9diiStKqMMBUlSG4aCJKkzylBwTEGS2hhlKDimIEltjDIUJEltGAqSpI6hIEnqrJinpC5Gkq3A1lNPPXXoUvQ4+bgRaWUZ5ZGCA82S1MYoQ0GS1IahIEnqGAqSpM4oQ8E7miWpjVGGggPNktTGKENBktSGoSBJ6hgKkqSOoSBJ6hgKkqTOKEPBS1IlqY1RhoKXpEpSG6MMBUlSG4aCJKljKEiSOoaCJKljKEiSOoaCJKkzylDwPgVJamOUoeB9CpLUxihDQZLUhqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeqMMhR89pEktTHKUPDZR5LUxihDQZLUhqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeoYCpKkjqEgSeocNXQBMyV5JXAO8FTgiqr6p4FLkqQ1pfmRQpIrk9yf5PZZ7VuS3JVkX5IdAFV1fVVdBFwMvK51bZKkH7ccp4+uArbMbEiyDrgMOBvYDGxLsnnGJu+YvC9JWkbNQ6GqbgIenNV8JrCvqu6uqkPANcB5mfYXwKer6rbWtUmSftxQA80nAPfOWN8/afsD4NeA1yS5eK4fTLI9yd4kew8cONC+UklaQ1bUQHNVXQpcusA2O4GdAFNTU7UcdUnSWjHUkcJ9wEkz1k+ctEmSBjRUKNwCnJbk5CTHAOcDu/r+cJKtSXYePHiwWYGStBYtxyWpVwNfBE5Psj/JhVX1CHAJcCNwJ3BtVd3R9zOrandVbV+/fn2boiVpjWo+plBV2+Zp3wPsaf37JUn9+ZgLSVJnlKHgmIIktTHKUHBMQZLaGGUoSJLaMBQkSZ1RhoJjCpLURq9QSPK81oUshmMKktRG3yOFDyS5Ocmbk/hNLEmrVK9QqKpfAF7P9POKbk3y90le3rQySdKy6z2mUFXfYHrym7cCvwRcmuTrSV7Vqrj5OKYgSW30HVN4fpL3Mf2col8BtlbVcyfL72tY35wcU5CkNvo+++j9wIeAt1fVw4cbq+pbSd7RpDJJ0rLrGwrnAA9X1aMASZ4AHFtVP6iqjzarTpK0rPqOKXwWOG7G+vGTNknSKtI3FI6tqu8dXpksH9+mpIU50CxJbfQNhe8nOePwSpIXAQ8fYfumHGiWpDb6jim8BfhEkm8BAZ4JvK5ZVZKkQfQKhaq6JclzgNMnTXdV1f+0K0uSNITFTMf5YmDT5GfOSEJVfaRJVZKkQfQKhSQfBX4a+DLw6KS5AENBklaRvkcKU8DmqqqWxUiShtX36qPbmR5cXhG8JFWS2ugbChuAryW5Mcmuw6+WhR2Jl6RKUht9Tx+9u2URkqSVoe8lqV9I8mzgtKr6bJLjgXVtS5MkLbe+j86+CLgO+OCk6QTg+lZFSZKG0XdM4feBlwEPQTfhzk+2KkqSNIy+ofDDqjp0eCXJUUzfpyBJWkX6hsIXkrwdOG4yN/MngN3typIkDaFvKOwADgBfBX4P2MP0fM2D8D4FSWqj79VH/wv87eQ1uKraDeyempq6aOhaJGk16fvso/9gjjGEqjplySuSJA1mMc8+OuxY4DeBn1j6ciRJQ+o1plBVD8x43VdVfwWc07g2SdIy63v66IwZq09g+shhMXMxSJJGoO8X+1/OWH4EuAd47ZJXI0kaVN+rj365dSGSpOH1PX30x0d6v6reuzTlSJKGtJirj14MHJ5DYStwM/CNFkVJkobRNxROBM6oqu8CJHk3cENVvaFVYZKk5df3MRc/BRyasX5o0iZJWkX6Hil8BLg5yT9O1l8JfLhNSQtLshXYeuqppw5VgiStSn1vXvsz4ALgvyavC6rqz1sWtkA9ztEsSQ30PX0EcDzwUFX9NbA/ycmNapIkDaTvdJzvAt4KvG3SdDTwd62KkiQNo++Rwm8A5wLfB6iqbwFPaVWUJGkYfUPhUFUVk8dnJ3lSu5IkSUPpGwrXJvkg8LQkFwGfZYVMuCNJWjoLXpKaJMDHgecADwGnA++sqs80rk2StMwWDIWqqiR7qup5gEEgSatY35vXbkvy4qq6pWk1klaMTTtumLP9nvc4v9Zq1jcUXgK8Ick9TF+BFKYPIp7fqjBJ0vI7Yigk2VhV/wn8+jLVI0ka0EJHCtcz/XTUbyb5ZFW9ejmKkiQNY6FLUjNj+ZSWhUiShrdQKNQ8y5KkVWih00cvSPIQ00cMx02W4UcDzU9tWp0kaVkdMRSqat1yFSJJGt5iHp3dVJJTklyR5Lqha5GktappKCS5Msn9SW6f1b4lyV1J9iXZAVBVd1fVhS3rkSQdWesjhauALTMbkqwDLgPOBjYD25JsblyHJKmHpqFQVTcBD85qPhPYNzkyOARcA5zXsg5JUj9DjCmcANw7Y30/cEKSpye5HHhhkrfN/aOQZHuSvUn2HjhwoHWtkrSm9H32UXNV9QBwcY/tdgI7Aaamprx3QpKW0BBHCvcBJ81YP3HSJkka2BChcAtwWpKTkxwDnA/sWswHJNmaZOfBgwebFChJa1XrS1KvBr4InJ5kf5ILq+oR4BLgRuBO4NqqumMxn1tVu6tq+/r165e+aElaw5qOKVTVtnna9wB7Wv5uSdLirZg7mhfD00eS1MYoQ8HTR5LUxihDQZLUhqEgSeqMMhQcU5CkNkYZCo4pSFIbowwFSVIbhoIkqTPKUHBMQZLaGGUoOKYgSW2MMhQkSW0YCpKkjqEgSeoYCpKkzihDwauPJKmNUYaCVx9JUhujDAVJUhuGgiSpYyhIkjqGgiSpM8pQ8OojSWpjlKHg1UeS1MYoQ0GS1IahIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpM4oQ8Gb1ySpjVGGgjevSVIbowwFSVIbhoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqTPKUPDZR5LUxihDwWcfSVIbowwFSVIbhoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqWMoSJI6hoIkqXPU0AUcluRJwAeAQ8Dnq+pjA5ckSWtO0yOFJFcmuT/J7bPatyS5K8m+JDsmza8Crquqi4BzW9YlSZpb69NHVwFbZjYkWQdcBpwNbAa2JdkMnAjcO9ns0cZ1SZLm0DQUquom4MFZzWcC+6rq7qo6BFwDnAfsZzoYmtclSZrbEGMKJ/CjIwKYDoOXAJcCf5PkHGD3fD+cZDuwHWDjxo0Ny5Sk4W3accOc7fe855wmv2/FDDRX1feBC3pstxPYCTA1NVWt65KktWSI0zT3ASfNWD9x0iZJGtgQoXALcFqSk5McA5wP7FrMByTZmmTnwYMHmxQoSWtV60tSrwa+CJyeZH+SC6vqEeAS4EbgTuDaqrpjMZ9bVburavv69euXvmhJWsOajilU1bZ52vcAe1r+bknS4o3y0k9PH0lSG6MMBU8fSVIbowwFSVIbhoIkqZOq8d7/leQA8M3H+OMbgO8sYTlDsi8rz2rpB9iXlerx9OXZVfWMud4YdSg8Hkn2VtXU0HUsBfuy8qyWfoB9Wala9cXTR5KkjqEgSeqs5VDYOXQBS8i+rDyrpR9gX1aqJn1Zs2MKkqT/by0fKUiSZln1oTDPfNAz339iko9P3v9Skk3LX2U/PfryxiQHknx58vrdIepcyHxzd894P0kunfTzX5Ocsdw19tWjL2clOThjn7xzuWvsI8lJST6X5GtJ7kjyR3NsM4r90rMvK36/JDk2yc1JvjLpx5/Msc3Sf39V1ap9AeuAfwdOAY4BvgJsnrXNm4HLJ8vnAx8fuu7H0Zc3An8zdK09+vKLwBnA7fO8/wrg00CAlwJfGrrmx9GXs4BPDV1nj348CzhjsvwU4N/m+Pc1iv3Ssy8rfr9M/js/ebJ8NPAl4KWztlny76/VfqQw33zQM50HfHiyfB3wq0myjDX21acvo1Bzz90903nAR2ravwBPS/Ks5alucXr0ZRSq6ttVddtk+btMP9b+hFmbjWK/9OzLijf57/y9yerRk9fsQeAl//5a7aEw13zQs/9xdNvU9FwPB4GnL0t1i9OnLwCvnhzaX5fkpDneH4O+fR2Ln5+cAvh0kp8ZupiFTE5BvJDpv0xnGt1+OUJfYAT7Jcm6JF8G7gc+U1Xz7pOl+v5a7aGw1uwGNlXV84HP8KO/IDSc25h+pMALgPcD1w9czxEleTLwSeAtVfXQ0PU8Hgv0ZRT7paoeraqfY3ra4jOT/Gzr37naQ6HPfNDdNkmOAtYDDyxLdYuzYF+q6oGq+uFk9UPAi5aptqW2aubxrqqHDp8CqOnJpY5OsmHgsuaU5Gimv0Q/VlX/MMcmo9kvC/VlTPsFoKr+G/gcsGXWW0v+/bXaQ6HPfNC7gN+ZLL8G+OeajNqsMAv2Zdb53XOZPpc6RruA355c7fJS4GBVfXvooh6LJM88fI43yZlM/z+34v7omNR4BXBnVb13ns1GsV/69GUM+yXJM5I8bbJ8HPBy4OuzNlvy76+m03EOraoeSXJ4Puh1wJVVdUeSPwX2VtUupv/xfDTJPqYHDM8fruL59ezLHyY5F3iE6b68cbCCjyDTc3efBWxIsh94F9ODaFTV5UxP1foKYB/wA+CCYSpdWI++vAZ4U5JHgIeB81foHx0vA34L+OrkHDbA24GNMLr90qcvY9gvzwI+nGQd06F1bVV9qvX3l3c0S5I6q/30kSRpEQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLHUJAkdQwFSVLn/wCoexv9LSouLQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BsmtHalfBath\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQrUlEQVR4nO3dfaxkdX3H8fdHEBBrV3CxmgVdKARco8Z1xdaH+lBbF8mCD62FaKKWgqiYGpPGVYw1TZrSP1otLY2ulii25UGsBMoSC/WBpIqwUBQQ0XVFWTRlBbuIGhD67R/zWx2ue/eey54zc2f3/Upu9szvnLnz2TOz97NnfmfOTVUhSdKjph1AkrQ0WAiSJMBCkCQ1FoIkCbAQJEnNvtMOsDuWL19eK1eunHYMSZop119//Q+r6pC54zNdCCtXrmTTpk3TjiFJMyXJd3c27ltGkiTAQpAkNRaCJAmwECRJjYUgSQIsBElSYyFIkgALQZLUzPQH03bHyvWX73T89rOOn3ASSVoaPEKQJAEWgiSpsRAkSYCFIElqLARJEmAhSJKaJVMISZ6W5MNJLk7y1mnnkaS9zaCFkOTcJHcluXnO+NoktyXZnGQ9QFXdWlWnA68DXjBkLknSrxr6COHjwNrxgST7AOcAxwGrgJOTrGrrTgAuBzYOnEuSNMeghVBVVwP3zBk+FthcVVuq6gHgAuDEtv2lVXUc8Pr5vmeS05JsSrJp27ZtQ0WXpL3ONC5dsQK4Y+z2VuB5SV4CvAbYn10cIVTVBmADwJo1a2q4mJK0d1ky1zKqqi8AX5hyDEnaa03jLKM7gcPGbh/axjpLsi7Jhu3bt/caTJL2ZtMohOuAo5IcnmQ/4CTg0sV8g6q6rKpOW7Zs2SABJWlvNPRpp+cDXwaOTrI1ySlV9SBwBvBZ4Fbgoqq6ZcgckqSFDTqHUFUnzzO+EU8tlaQlZcl8UnkxnEOQpP7NZCE4hyBJ/ZvJQpAk9c9CkCQBM1oIziFIUv9mshCcQ5Ck/s1kIUiS+mchSJKAGS0E5xAkqX8zWQjOIUhS/2ayECRJ/bMQJEmAhSBJaiwESRIwo4XgWUaS1L+ZLATPMpKk/s1kIUiS+mchSJIAC0GS1FgIkiTAQpAkNTNZCJ52Kkn9m8lC8LRTSerfTBaCJKl/FoIkCbAQJEmNhSBJAiwESVJjIUiSgBktBD+HIEn9m8lC8HMIktS/mSwESVL/LARJEmAhSJIaC0GSBFgIkqTGQpAkARaCJKmxECRJgIUgSWosBEkSYCFIkpqZLAQvbidJ/ZvJQvDidpLUv5ksBElS/ywESRJgIUiSGgtBkgRYCJKkxkKQJAEWgiSpsRAkSYCFIElqLARJEmAhSJIaC0GSBFgIkqTGQpAkARaCJKnpVAhJnjF0kCSvSvLRJBcm+f2hH0+S9HBdjxD+Mcm1Sd6WpPNvpUlybpK7ktw8Z3xtktuSbE6yHqCqLqmqU4HTgT/q/DeQJPWiUyFU1YuA1wOHAdcn+dckv9fhrh8H1o4PJNkHOAc4DlgFnJxk1dgm72vrJUkT1HkOoaq+xeiH9buBFwNnJ/lGktfs4j5XA/fMGT4W2FxVW6rqAeAC4MSM/DVwRVXdsNi/iCRp93SdQ3hmkg8CtwIvA9ZV1dPa8gcX+ZgrgDvGbm9tY+8AXg78QZLTd5HltCSbkmzatm3bIh9akjSffTtu9/fAx4D3VtXPdgxW1feTvK+PIFV1NnB2h+02ABsA1qxZU308tiSpeyEcD/ysqh4CSPIo4ICq+mlVfXKRj3kno7mIHQ5tY5KkKeo6h3AV8Jix2we2sUfiOuCoJIcn2Q84Cbh0Md8gybokG7Zv3/4II0iS5upaCAdU1X07brTlAxe6U5LzgS8DRyfZmuSUqnoQOAP4LKM5iYuq6pbFhK6qy6rqtGXLOp8BK0laQNe3jH6SZPWOs3+SPAf42QL3oapOnmd8I7Cxc0pJ0uC6FsI7gU8l+T4Q4ElM8cNjSdYB64488shpRZB2aeX6y3c6fvtZx084idRdp0KoquuSHAMc3YZuq6qfDxdrwTyXAZetWbPm1GllkKQ9TdcjBIDnAivbfVYnoarOGySVJGniOhVCkk8CvwncCDzUhguwECRpD9H1CGENsKqqlsQHwZxDkKT+dT3t9GZGE8lLgqedSlL/uh4hLAe+nuRa4P4dg1V1wiCpJEkT17UQPjBkCEnS9HU97fSLSZ4KHFVVVyU5ENhn2GiSpEnqevnrU4GLgY+0oRXAJUOF6pDHaxlJUs+6Tiq/HXgBcC/84pflPHGoUAtxUlmS+te1EO5vv90MgCT7MvocgiRpD9G1EL6Y5L3AY9rvUv4UcNlwsSRJk9a1ENYD24CbgLcwulJpL78pTZK0NHQ9y+j/gI+2L0nSHqjrtYy+w07mDKrqiN4TdeClKySpf4u5ltEOBwB/CBzcf5xuvPy1JPWv0xxCVd099nVnVX0I8Dd9SNIepOtbRqvHbj6K0RHDYn6XgiRpiev6Q/1vxpYfBG4HXtd7GknS1HQ9y+ilQweRJE1X17eM3rWr9VX1t/3E6cazjCSpf10/mLYGeCuji9qtAE4HVgOPa18T5bWMJKl/XecQDgVWV9WPAZJ8ALi8qt4wVDBJ0mR1PUL4DeCBsdsPtDFJ0h6i6xHCecC1ST7Tbr8K+MQwkSRJ09D1LKO/THIF8KI29Oaq+u/hYkmSJq3rW0YABwL3VtXfAVuTHD5QJknSFHT9FZp/DrwbeE8bejTwz0OFkiRNXtcjhFcDJwA/Aaiq7zOF000lScPpWggPVFXRLoGd5LHDRVpYknVJNmzfvn2aMSRpj9K1EC5K8hHg8UlOBa5iir8sxw+mSVL/FjzLKEmAC4FjgHuBo4H3V9WVA2eTJE3QgoVQVZVkY1U9A7AEJGkP1fUtoxuSPHfQJJKkqer6SeXnAW9IcjujM43C6ODhmUMFkyRN1i4LIclTqup7wCsmlEeSNCULHSFcwugqp99N8umqeu0kQkmSJm+hOYSMLR8xZBBJ0nQtVAg1z7IkaQ+z0FtGz0pyL6Mjhce0ZfjlpPKvD5pOkjQxuyyEqtpnUkEkSdO1mMtfLxley0iS+jeTheC1jCSpfzNZCJKk/lkIkiTAQpAkNRaCJAmwECRJjYUgSQIsBElSYyFIkgALQZLUWAiSJMBCkCQ1FoIkCbAQJEmNhSBJAiwESVJjIUiSgCVUCEmOSPJPSS6edhZJ2hsNWghJzk1yV5Kb54yvTXJbks1J1gNU1ZaqOmXIPJKk+Q19hPBxYO34QJJ9gHOA44BVwMlJVg2cQ5K0gEELoaquBu6ZM3wssLkdETwAXACc2PV7JjktyaYkm7Zt29ZjWknau01jDmEFcMfY7a3AiiRPSPJh4NlJ3jPfnatqQ1Wtqao1hxxyyNBZJWmvse+0A+xQVXcDp087hyTtraZxhHAncNjY7UPbWGdJ1iXZsH379l6DSdLebBqFcB1wVJLDk+wHnARcuphvUFWXVdVpy5YtGySgJO2Nhj7t9Hzgy8DRSbYmOaWqHgTOAD4L3ApcVFW3DJlDkrSwQecQqurkecY3AhuHfGxJ0uIsmU8qL4ZzCJLUv5ksBOcQJKl/M1kIkqT+WQiSJGBGC8E5BEnq30wWgnMIktS/mSwESVL/LARJEmAhSJKamSwEJ5UlqX8zWQhOKktS/2ayECRJ/bMQJEmAhSBJamayEJxUlqT+zWQhOKksSf2byUKQJPXPQpAkARaCJKmxECRJgIUgSWpmshA87VSS+jeTheBpp5LUv5ksBElS/ywESRJgIUiSGgtBkgRYCJKkxkKQJAEWgiSpmclC8INpktS/mSwEP5gmSf2byUKQJPXPQpAkARaCJKmxECRJgIUgSWosBEkSYCFIkhoLQZIEWAiSpMZCkCQBsO+0AzwSSdYB64488shpR5Gkwaxcf/m8624/6/jeH28mjxC8lpEk9W8mC0GS1D8LQZIEWAiSpMZCkCQBFoIkqbEQJEmAhSBJaiwESRIAqappZ3jEkmwDvvsI774c+GGPcfpirsUx1+KYa3GWai7YvWxPrapD5g7OdCHsjiSbqmrNtHPMZa7FMdfimGtxlmouGCabbxlJkgALQZLU7M2FsGHaAeZhrsUx1+KYa3GWai4YINteO4cgSXq4vfkIQZI0xkKQJAF7aCEkWZvktiSbk6zfyfr9k1zY1n8lycqxde9p47clecWEc70rydeTfC3JfyZ56ti6h5Lc2L4unXCuNyXZNvb4fzK27o1JvtW+3jjhXB8cy/TNJP87tm6Q/ZXk3CR3Jbl5nvVJcnbL/LUkq8fWDbmvFsr1+pbnpiRfSvKssXW3t/Ebk2yacK6XJNk+9ly9f2zdLp//gXP92Vimm9vr6eC2bsj9dViSz7efA7ck+dOdbDPca6yq9qgvYB/g28ARwH7AV4FVc7Z5G/DhtnwScGFbXtW23x84vH2ffSaY66XAgW35rTtytdv3TXF/vQn4h53c92BgS/vzoLZ80KRyzdn+HcC5E9hfvwOsBm6eZ/0rgSuAAL8FfGXofdUx1/N3PB5w3I5c7fbtwPIp7a+XAP++u89/37nmbLsO+NyE9teTgdVt+XHAN3fy73Gw19ieeIRwLLC5qrZU1QPABcCJc7Y5EfhEW74Y+N0kaeMXVNX9VfUdYHP7fhPJVVWfr6qftpvXAIf29Ni7lWsXXgFcWVX3VNWPgCuBtVPKdTJwfk+PPa+quhq4ZxebnAicVyPXAI9P8mSG3VcL5qqqL7XHhcm9trrsr/nszuuy71wTeW0BVNUPquqGtvxj4FZgxZzNBnuN7YmFsAK4Y+z2Vn51h/5im6p6ENgOPKHjfYfMNe4URv8L2OGAJJuSXJPkVT1lWkyu17bD04uTHLbI+w6Zi/bW2uHA58aGh9pfC5kv95D7arHmvrYK+I8k1yc5bQp5fjvJV5NckeTpbWxJ7K8kBzL6ofrpseGJ7K+M3sp+NvCVOasGe43tu9iQGl6SNwBrgBePDT+1qu5McgTwuSQ3VdW3JxTpMuD8qro/yVsYHV29bEKP3cVJwMVV9dDY2DT315KV5KWMCuGFY8MvbPvqicCVSb7R/gc9CTcweq7uS/JK4BLgqAk9dhfrgP+qqvGjicH3V5JfY1RC76yqe/v83ruyJx4h3AkcNnb70Da2022S7AssA+7ueN8hc5Hk5cCZwAlVdf+O8aq6s/25BfgCo/85TCRXVd09luVjwHO63nfIXGNOYs4h/YD7ayHz5R5yX3WS5JmMnr8Tq+ruHeNj++ou4DP09zbpgqrq3qq6ry1vBB6dZDlLYH81u3ptDbK/kjyaURn8S1X92042Ge41NsTEyDS/GB31bGH0FsKOyainz9nm7Tx8Uvmitvx0Hj6pvIX+JpW75Ho2o4m0o+aMHwTs35aXA9+ipwm2jrmePLb8auCa+uUk1ndavoPa8sGTytW2O4bRJF8msb/a91zJ/JOkx/PwCb9rh95XHXM9hdGc2PPnjD8WeNzY8peAtRPM9aQdzx2jH6zfa/uu0/M/VK62fhmjeYbHTmp/tb/7ecCHdrHNYK+x3nbuUvpiNAv/TUY/XM9sY3/B6H/dAAcAn2r/QK4Fjhi775ntfrcBx00411XA/wA3tq9L2/jzgZvaP4qbgFMmnOuvgFva438eOGbsvn/c9uNm4M2TzNVufwA4a879BttfjP63+APg54zeoz0FOB04va0PcE7LfBOwZkL7aqFcHwN+NPba2tTGj2j76avtOT5zwrnOGHttXcNYYe3s+Z9UrrbNmxidZDJ+v6H31wsZzVF8bey5euWkXmNeukKSBOyZcwiSpEfAQpAkARaCJKmxECRJgIUgSWosBEkSYCFIkpr/B0it/mb4dnylAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "FullBath\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOL0lEQVR4nO3df6wlZ13H8fen29a2AtvIViHbwrbZplilyHIpGETxR2O12RYVtUTUEmwFRCX+w0IIoIkJJlq0gCkrbWirFkrVZpdugxAJ/UfbbitIC1RXbGULSZcSt/xoWItf/zjPwuVy797Z2zln7px9v5KTzMyZc8732bl7PueZeWYmVYUkSccNXYAkaX0wECRJgIEgSWoMBEkSYCBIkprjhy7gidi0aVNt2bJl6DIkaVTuvvvuL1XVaUuXjzoQtmzZwt69e4cuQ5JGJcmDyy13l5EkCRhpICTZnmTnwYMHhy5FkubGKAOhqnZX1RUbN24cuhRJmhujDARJUv8MBEkSYCBIkppRBoIHlSWpf6MMBA8qS1L/Rn1imqTp2bLj1mWXP/D2i2ZciWZllD0ESVL/DARJEmAgSJKaUQaCo4wkqX+jDARHGUlS/0YZCJKk/hkIkiTA8xA0oJXGuYNj3aUh2EOQJAEGgiSpGWUgOOxUkvo3ykBw2Kkk9W+UgSBJ6p+BIEkCDARJUmMgSJIAA0GS1BgIkiRgpIHgeQiS1L9RBoLnIUhS/0YZCJKk/hkIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkoCRBoJnKktS/0YZCJ6pLEn9G2UgSJL6ZyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktSMMhC8uJ0k9W+UgeDF7SSpf6MMBElS/wwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJwEgDwTumSVL/RhkI3jFNkvo3ykCQJPXPQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRLQMRCSPHvahUiShtW1h/CXSe5M8tok3oRAkuZQp0CoqhcDvwacAdyd5G+TXDDVyiRJM9X5GEJV/QfwZuANwE8AVyX5bJJfnFZxkqTZ6XoM4bwk7wA+A/wUsL2qfrBNv2OK9UmSZuT4juu9E3gv8Kaqeuzwwqr6QpI3T6UySdJMdQ2Ei4DHquqbAEmOA06qqq9X1Q1Tq06SNDNdjyF8FDh50fwpbZkkaU50DYSTquqrh2fa9CnTKUmSNISugfC1JNsOzyR5HvDYEdaXJI1M12MIrwc+mOQLQICnAb86taokSTPXKRCq6q4kzwLOaYvur6r/nV5ZkqRZ69pDAHg+sKW9ZlsSqur6vgpJ8lImo5meAlxTVf/Y13tLklbX9cS0G4A/BX6MSTA8H1jo8Lprkzyc5N4lyy9Mcn+SfUl2AFTVLVV1OfBq3B0lSTPXtYewAJxbVXWU7/8+4F3At3oSSTYA7wYuAPYDdyXZVVWfbqu8uT0vSZqhrqOM7mVyIPmoVNXtwJeXLD4f2FdVn6uqQ8D7gUsy8SfAbVV1z0rvmeSKJHuT7D1w4MDRliRJWkHXHsIm4NNJ7gS+cXhhVV28hs/cDHx+0fx+4AXA7wI/A2xMsrWqrl7uxVW1E9gJsLCwcLQ9FknSCroGwtumWQRAVV0FXDXtz5EkLa/rsNOPJ3kmcHZVfTTJKcCGNX7mQ0zuq3DY6W2ZJGlAXUcZXQ7cDLynLdoM3LLGz7wLODvJmUlOBC4Fdq3xvSRJPel6UPl3gBcBj8K3bpbz/au9KMmNwD8D5yTZn+RVVfU48Drgw0zur3BTVd13NEUn2Z5k58GDB4/mZZKkI+h6DOEbVXUoCQBJjgdWPaBbVS9fYfkeYE/XIpd5/W5g98LCwuVrfQ9J0nfq2kP4eJI3ASe3eyl/ENg9vbIkSbPWNRB2AAeATwG/zeTXvXdKk6Q50nWU0f8Bf9UekqQ51CkQkvwXyxwzqKqzeq+ogyTbge1bt24d4uMlaS4dzbWMDjsJ+GXg+/ovpxsPKktS/zodQ6iqRxY9HqqqP2dyqWpJ0pzousto26LZ45j0GI7mXgqSpHWu65f6ny2afhx4APiV3quRJA2m6yijn5x2IZKkYXXdZfQHR3q+qq7sp5xuHGUkSf3remLaAvAaJhe128zkNpfbgCe3x0xV1e6qumLjxo2z/mhJmltdjyGcDmyrqq8AJHkbcGtVvWJahUmSZqtrD+EHgEOL5g+1ZZKkOdG1h3A9cGeSf2jzLwWum05JkqQhdB1l9MdJbgNe3Ba9sqr+dXplSZJmresuI4BTgEer6i+A/UnOnFJNkqQBdL2F5luBNwBvbItOAP56WkV1qMc7pklSz7r2EH4BuBj4GkBVfYEBhpse5rBTSepf10A4VFVFuwR2ku+dXkmSpCF0DYSbkrwHODXJ5cBH8WY5kjRXVh1llCTAB4BnAY8C5wBvqaqPTLk2SdIMrRoIVVVJ9lTVswFDQJLmVNddRvckef5UK5EkDarrmcovAF6R5AEmI43CpPNw3rQKkyTN1hEDIckzquq/gZ+dUT2dePlrSerfaruMbgGoqgeBK6vqwcWP6Ze3PM9DkKT+rRYIWTR91jQLkSQNa7VAqBWmJUlzZrWDys9J8iiTnsLJbRq+fVD5KVOtTpI0M0cMhKraMKtCJEnDOprLX0uS5piBIEkCDARJUmMgSJKAkQaCd0yTpP6NMhA8U1mS+jfKQJAk9c9AkCQBBoIkqTEQJEmAgSBJagwESRLQ/RaakqQZ27Lj1mWXP/D2i6byefYQJEmAgSBJagwESRIw0kDwWkaS1L9RBoLXMpKk/o0yECRJ/TMQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkASMNBO+YJkn9G2UgeMc0SerfKANBktQ/A0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBKyjQEhyVpJrktw8dC2SdCyaaiAkuTbJw0nuXbL8wiT3J9mXZAdAVX2uql41zXokSSubdg/hfcCFixck2QC8G/g54Fzg5UnOnXIdkqRVTDUQqup24MtLFp8P7Gs9gkPA+4FLur5nkiuS7E2y98CBAz1WK0nHtiGOIWwGPr9ofj+wOclTk1wNPDfJG1d6cVXtrKqFqlo47bTTpl2rJB0zjh+6gMOq6hHg1UPXIUnHqiF6CA8BZyyaP70tkyQNaIhAuAs4O8mZSU4ELgV2DVCHJGmRqe4ySnIj8BJgU5L9wFur6pokrwM+DGwArq2q+47yfbcD27du3brm2rbsuHXZ5Q+8/aI1v6ckjdlUA6GqXr7C8j3AnifwvruB3QsLC5ev9T0kSd9p3ZypLEkaloEgSQIMBElSM8pASLI9yc6DBw8OXYokzY1RBkJV7a6qKzZu3Dh0KZI0N0YZCJKk/hkIkiTAQJAkNamqoWtYsyQHgAfX+PJNwJd6LGdI89KWeWkH2Jb1al7a8kTb8cyq+q7LRY86EJ6IJHuramHoOvowL22Zl3aAbVmv5qUt02qHu4wkSYCBIElqjuVA2Dl0AT2al7bMSzvAtqxX89KWqbTjmD2GIEn6TsdyD0GStIiBIEkCjoFASHJhkvuT7EuyY5nnvyfJB9rzdyTZMvsqV9ehHZclOZDkE+3xW0PU2UWSa5M8nOTeFZ5PkqtaW/8tybZZ19hFh3a8JMnBRdvkLbOusaskZyT5WJJPJ7kvye8vs8663y4d2zGK7ZLkpCR3Jvlka8sfLrNOv99fVTW3Dya36PxP4CzgROCTwLlL1nktcHWbvhT4wNB1r7EdlwHvGrrWju35cWAbcO8Kz/88cBsQ4IXAHUPXvMZ2vAT40NB1dmzL04FtbfrJwL8v8ze27rdLx3aMYru0f+cntekTgDuAFy5Zp9fvr3nvIZwP7Kuqz1XVIeD9wCVL1rkEuK5N3wz8dJLMsMYuurRjNKrqduDLR1jlEuD6mvgX4NQkT59Ndd11aMdoVNUXq+qeNv0V4DPA5iWrrfvt0rEdo9D+nb/aZk9oj6WjgHr9/pr3QNgMfH7R/H6++4/jW+tU1ePAQeCpM6muuy7tAPil1pW/OckZsyltKrq2dwx+tHX5b0vyQ0MX00Xb7fBcJr9IFxvVdjlCO2Ak2yXJhiSfAB4GPlJVK26TPr6/5j0QjiW7gS1VdR7wEb79q0HDuYfJNWOeA7wTuGXgelaV5EnA3wGvr6pHh65nrVZpx2i2S1V9s6p+BDgdOD/JD0/z8+Y9EB4CFv9SPr0tW3adJMcDG4FHZlJdd6u2o6oeqapvtNn3As+bUW3T0GW7rXtV9ejhLn9V7QFOSLJp4LJWlOQEJl+if1NVf7/MKqPYLqu1Y2zbBaCq/gf4GHDhkqd6/f6a90C4Czg7yZlJTmRy0GXXknV2Ab/Zpl8G/FO1IzTryKrtWLIv92Im+07HahfwG21UywuBg1X1xaGLOlpJnnZ4f26S85n8f1tvPzaAyQgi4BrgM1V15Qqrrfvt0qUdY9kuSU5LcmqbPhm4APjsktV6/f46fq0vHIOqejzJ64APMxmpc21V3Zfkj4C9VbWLyR/PDUn2MTlAeOlwFS+vYzt+L8nFwONM2nHZYAWvIsmNTEZ6bEqyH3grkwNmVNXVwB4mI1r2AV8HXjlMpUfWoR0vA16T5HHgMeDSdfhj47AXAb8OfKrtswZ4E/AMGNV26dKOsWyXpwPXJdnAJLRuqqoPTfP7y0tXSJKA+d9lJEnqyECQJAEGgiSpMRAkSYCBIElqDARJEmAgSJKa/wcQdzAS115bzgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HalfBath\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD5CAYAAAAndkJ4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQR0lEQVR4nO3df6xkZX3H8ffHRUGsWcHVaoBloUtEjBjXq7b+qGi1gmTBH6kFNVFLWVExNSaNqxg1TZriH62WSoOrEsW2gGIlUJZYqL+SWn7sUhQQ0RVRF01BtLugBpR++8c8q8P13r3nXubM3Lm8X8kN5zznzMyXZ87ezz3znPNMqgpJkh426QIkScuDgSBJAgwESVJjIEiSAANBktQYCJIkAPaZdAEPxpo1a2rdunWTLkOSpsr27dt/XFWPm90+1YGwbt06tm3bNukyJGmqJPneXO3L5iOjJE9Ock6Si5K8edL1SNJDTa+BkOTcJHckuXFW+7FJbkmyI8lmgKq6uapOA14NPLfPuiRJv63vM4RPAMcONyRZBZwNHAccBZyc5Ki27QTgMmBrz3VJkmbpNRCq6ivAT2Y1PwvYUVW3VtV9wAXAiW3/S6rqOOC1fdYlSfptkxhUPgj4wdD6TuDZSY4BXgnsy17OEJJsAjYBrF27tr8qJekhZtlcZVRVXwK+1GG/LcAWgJmZGadqlaQRmcRVRrcDhwytH9zaJEkTNIlAuBY4IslhSR4BnARcMoE6JElDev3IKMn5wDHAmiQ7gfdV1ceTnA58HlgFnFtVN/VZx1zWbb5szvbbzjx+zJVI0vLQayBU1cnztG/lQVxammQjsHH9+vVLfQpJ0izL5k7lxaiqS6tq0+rVqyddiiStGFMZCJKk0TMQJEmAgSBJaqYyEJJsTLJl165dky5FklaMqQwEB5UlafSmMhAkSaNnIEiSAANBktQYCJIkYEoDwauMJGn0pjIQvMpIkkZvKgNBkjR6BoIkCTAQJEmNgSBJAgwESVIzlYHgZaeSNHpTGQhedipJozeVgSBJGj0DQZIEGAiSpMZAkCQBsM+kC5BWonWbL5uz/bYzjx9zJVJ3niFIkgADQZLUTGUgeGOaJI3eVAaCN6ZJ0uhNZSBIkkbPQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIETGkgeKeyJI3eVAaCdypL0uhNZSBIkkbPQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqZnKQHByO0kavakMBCe3k6TRm8pAkCSNnoEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSc1UBoJfoSlJozeVgeBXaErS6E1lIEiSRs9AkCQBBoIkqekUCEme2nchkqTJ6nqG8I9JrknyliSO5ErSCtQpEKrq+cBrgUOA7Un+JclLeq1MkjRWnccQqurbwHuAdwIvAM5K8s0kr+yrOEnS+HQdQzg6yQeBm4EXARur6slt+YM91idJGpN9Ou73D8DHgHdX1S/2NFbVD5O8p5fKJElj1TUQjgd+UVX3AyR5GLBfVf28qj7VW3WSpLHpOoZwJfDIofX9W5skaYXoGgj7VdU9e1ba8v79lCRJmoSugfCzJBv2rCR5BvCLvewvSZoyXccQ3g58JskPgQBPAP60t6okSWPXKRCq6tokRwJPak23VNUv+ytLkjRuXc8QAJ4JrGuP2ZCEqjqvl6okSWPXKRCSfAr4PeB64P7WXICBIEkrRNczhBngqKqqPouRJE1O16uMbmQwkCxJWqG6niGsAb6R5Brg3j2NVXVCL1VJksauayC8v88iJEmT1/Wy0y8nORQ4oqquTLI/sKrf0iRJ49R1+utTgYuAj7Smg4CL+ypKkjR+XQeV3wo8F9gNv/6ynMf3VZQkafy6BsK9VXXfnpUk+zC4D0GStEJ0DYQvJ3k38Mj2XcqfAS4dZSFJXp7ko0kuTPLHo3xuSdLCugbCZuBO4AbgTcBWBt+vvFdJzk1yR5IbZ7Ufm+SWJDuSbAaoqour6lTgNJw4T5LGrutVRv8HfLT9LMYngA8zNMVFklXA2cBLgJ3AtUkuqapvtF3e07ZLksao61xG32WOMYOqOnxvj6uqryRZN6v5WcCOqrq1PfcFwIlJbgbOBC6vquu61CVJGp3FzGW0x37AnwAHLvE1DwJ+MLS+E3g28DbgxcDqJOur6py5HpxkE7AJYO3atUssQZI0W9ePjO6a1fShJNuB946qkKo6Czirw35bgC0AMzMzXukkSSPS9SOjDUOrD2NwxrCY71IYdjtwyND6wa1NkjRBXX+p/+3Q8q+A24BXL/E1rwWOSHIYgyA4CXjNEp9LkjQiXT8yeuFSnjzJ+cAxwJokO4H3VdXHk5wOfJ7BfEjnVtVNS3l+SdLodP3I6B17215VfzdP+8nztG9lcC/DkiTZCGxcv379Up9CkjRL1xvTZoA3M7hC6CAGN49tAB7dfsaqqi6tqk2rV68e90tL0orVdQzhYGBDVd0NkOT9wGVV9bq+CpMkjVfXM4TfBe4bWr+vtUmSVoiuZwjnAdck+VxbfznwyX5KkiRNQterjP46yeXA81vTG6vqv/sra+8cVJak0ev6kRHA/sDuqvp7YGe7j2AiHFSWpNHr+hWa7wPeCbyrNT0c+Ke+ipIkjV/XM4RXACcAPwOoqh8ygctNJUn96RoI91VV0abATvKo/kqSJE1C10D4dJKPAI9JcipwJYv/shxJ0jK24FVGSQJcCBwJ7AaeBLy3qq7ouba91eRVRpI0YgsGQlVVkq1V9VRgYiEwrKouBS6dmZk5ddK1SNJK0fUjo+uSPLPXSiRJE9X1TuVnA69LchuDK43C4OTh6L4KkySN114DIcnaqvo+8NIx1SNJmpCFzhAuZjDL6feSfLaqXjWOoiRJ47fQGEKGlg/vsxBJ0mQtFAg1z/JEJdmYZMuuXbsmXYokrRgLBcLTkuxOcjdwdFveneTuJLvHUeBcnNxOkkZvr2MIVbVqXIVIkiZrMdNfS5JWMANBkgQYCJKkxkCQJAEGgiSpMRAkScCUBoI3pknS6E1lIHhjmiSN3lQGgiRp9AwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkScCUBoJ3KkvS6E1lIHinsiSN3lQGgiRp9AwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJKaqQwEJ7eTpNGbykBwcjtJGr2pDARJ0ugZCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUTGUg+BWakjR6UxkIfoWmJI3eVAaCJGn0DARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkZp9JFyBJmtu6zZfNu+22M48f+et5hiBJAgwESVJjIEiSgGUUCEkOT/LxJBdNuhZJeijqNRCSnJvkjiQ3zmo/NsktSXYk2QxQVbdW1Sl91iNJml/fZwifAI4dbkiyCjgbOA44Cjg5yVE91yFJWkCvgVBVXwF+Mqv5WcCOdkZwH3ABcGKfdUiSFjaJMYSDgB8Mre8EDkry2CTnAE9P8q75HpxkU5JtSbbdeeedfdcqSQ8Zy+bGtKq6Czitw35bgC0AMzMz1XddkvRQMYlAuB04ZGj94Na2aNu3b/9xku8tsY41wI9nN+YDS3y20ZmzrmXAuhbH42txrGuR8oEHVduhczVOIhCuBY5IchiDIDgJeM1SnqiqHrfUIpJsq6qZpT6+L9a1ONa1ONa1OMu1Luintr4vOz0f+C/gSUl2Jjmlqn4FnA58HrgZ+HRV3dRnHZKkhfV6hlBVJ8/TvhXY2udrS5IWZ9ncqTwBWyZdwDysa3Gsa3Gsa3GWa13QQ22p8kIdSdJD+wxBkjRkRQbCXHMlzdq+b5IL2/ark6wb2vau1n5LkpeOua53JPlGkq8n+Y8khw5tuz/J9e3nkjHX9YYkdw69/p8PbXt9km+3n9ePua4PDtX0rST/O7Stl/6ab36uoe1Jclar+etJNgxt67OvFqrrta2eG5J8NcnThrbd1tqvT7JtzHUdk2TX0Hv13qFte33/e67rL4dqurEdTwe2bX321yFJvth+D9yU5C/m2Ke/Y6yqVtQPsAr4DnA48Ajga8BRs/Z5C3BOWz4JuLAtH9X23xc4rD3PqjHW9UJg/7b85j11tfV7JthfbwA+PMdjDwRubf89oC0fMK66Zu3/NuDcMfTXHwIbgBvn2f4y4HIgwO8DV/fdVx3res6e12Mwj9jVQ9tuA9ZMqL+OAf7twb7/o65r1r4bgS+Mqb+eCGxoy48GvjXHv8fejrGVeIbQZa6kE4FPtuWLgD9KktZ+QVXdW1XfBXa05xtLXVX1xar6eVu9isFNe317MHNLvRS4oqp+UlU/Ba5g1mSGY6zrZOD8Eb32vGru+bmGnQicVwNXAY9J8kT67asF66qqr7bXhfEdW136az69znm2yLrGcmwBVNWPquq6tnw3g0vzD5q1W2/H2EoMhDnnSppvnxrcF7ELeGzHx/ZZ17BTGPwVsMd+GczhdFWSl4+opsXU9ap2enpRkj13mi+L/mofrR0GfGGoua/+Wsh8dffZV4s1+9gq4N+TbE+yaQL1/EGSryW5PMlTWtuy6K8k+zP4pfrZoeax9FcGH2U/Hbh61qbejrFlM5eRfiPJ64AZ4AVDzYdW1e1JDge+kOSGqvrOmEq6FDi/qu5N8iYGZ1cvGtNrd3EScFFV3T/UNsn+WraSvJBBIDxvqPl5ra8eD1yR5JvtL+hxuI7Be3VPkpcBFwNHjOm1u9gI/GdVDZ9N9N5fSX6HQQi9vap2j/K592YlniF0mSvp1/sk2QdYDdzV8bF91kWSFwNnACdU1b172qvq9vbfW4EvMfjLYSx1VdVdQ7V8DHhG18f2WdeQk5h1St9jfy1kvrr77KtOkhzN4P07sQaTSQIP6Ks7gM8xuo9JF1RVu6vqnra8FXh4kjUsg/5q9nZs9dJfSR7OIAz+uar+dY5d+jvG+hgYmeQPg7OeWxl8hLBnMOops/Z5Kw8cVP50W34KDxxUvpXRDSp3qevpDAbSjpjVfgCwb1teA3ybEQ2wdazriUPLrwCuqt8MYn231XdAWz5wXHW1/Y5kMMiXcfRXe851zD9IejwPHPC7pu++6ljXWgZjYs+Z1f4o4NFDy18Fjh1jXU/Y894x+MX6/dZ3nd7/vupq21czGGd41Lj6q/2/nwd8aC/79HaMjaxzl9MPg1H4bzH45XpGa/srBn91A+wHfKb9A7kGOHzosWe0x90CHDfmuq4E/ge4vv1c0tqfA9zQ/lHcAJwy5rr+Bripvf4XgSOHHvtnrR93AG8cZ11t/f3AmbMe11t/Mfhr8UfALxl8RnsKg2nbT2vbw+AbAb/TXntmTH21UF0fA346dGxta+2Ht376WnuPzxhzXacPHVtXMRRYc73/46qr7fMGBheZDD+u7/56HoMxiq8PvVcvG9cx5p3KkiRgZY4hSJKWwECQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBMD/A8iHjxEedyq/AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "BedroomAbvGr\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD7CAYAAACFfIhNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQhklEQVR4nO3da7BdZX3H8e+PRMqlGttirU3AYMNEM2orPUKr1VovbZgYsNZWMtIXDkN0Ko7WFzVap9oXnaEzrbcWrSlQxAsU8TLQxIK2Ku0MFQLaEUQqxSgBW6JMg1DGCP77Yu+zOKbnJPvgec7aO/v7mTkzez/79hsmnN9Zz/PstVJVSJIEcETfASRJ48NSkCR1LAVJUsdSkCR1LAVJUsdSkCR1LAVJUsdSkCR1VvYdYFaSpwFvAI4D/qmq3n+o1xx33HG1du3a1tEk6bBy4403fqeqnjDfY2n5jeYkFwEvBe6pqqfPGd8IvAdYAVxQVefNeewI4JKqOutQ7z8zM1O7du1a+uCSdBhLcmNVzcz3WOvpo4uBjQeEWQGcD5wGbAC2JNkwfOx0YAews3EuSdI8mpZCVV0L3HvA8CnA7VV1R1XtBy4Dzhg+/8qqOg14VctckqT59bGmsBq4c879PcCpSV4AvBz4CQ5ypJBkK7AV4IQTTmiXUpKm0NgsNFfV54HPj/C87cB2GKwptE0lSdOljy2pdwHHz7m/ZjgmSepZH6VwA3BSkhOTHAmcCVy5mDdIsjnJ9n379jUJKEnTqmkpJLkUuA5Yn2RPkrOr6iHgXOBq4Fbg8qq6ZTHvW1VXVdXWVatWLX1oSZpiTdcUqmrLAuM7cdupJI2dsVloXowkm4HN69at6zuKltnabTvmHd993qZlTiIdniby3EdOH0lSGxNZCpKkNiwFSVJnIkvBLamS1MZEloJrCpLUxkSWgiSpDUtBktSxFCRJnYksBReaJamNiSwFF5olqY2JLAVJUhuWgiSpYylIkjoTWQouNEtSGxNZCi40S1IbE1kKkqQ2LAVJUsdSkCR1LAVJUsdSkCR1JrIU3JIqSW1MZCm4JVWS2pjIUpAktbGy7wAaX2u37Zh3fPd5m5Y5iaTl4pGCJKljKUiSOpaCJKljKUiSOhNZCn5PQZLamMhS8HsKktTGRJaCJKkNS0GS1LEUJEkdS0GS1LEUJEkdS0GS1LEUJEkdS0GS1PHU2VIDC512HDz1uMabRwqSpM5EloLnPpKkNiayFDz3kSS1MZGlIElqw1KQJHUsBUlSx1KQJHUsBUlSx1KQJHUsBUlSx1KQJHUsBUlSx1KQJHUsBUlSx1KQJHUsBUlSx1KQJHUsBUlSZ6wux5nkZcAm4HHAhVV1Tc+RJGmqND9SSHJRknuS3HzA+MYktyW5Pck2gKr6VFWdA7wWeGXrbJKkH7Uc00cXAxvnDiRZAZwPnAZsALYk2TDnKW8bPi5JWkbNS6GqrgXuPWD4FOD2qrqjqvYDlwFnZODPgU9X1U2ts0mSflRfC82rgTvn3N8zHHs98GLgFUleO98Lk2xNsivJrr1797ZPKklTZKwWmqvqvcB7D/Gc7cB2gJmZmVqOXJI0Lfo6UrgLOH7O/TXDMUlSj/oqhRuAk5KcmORI4EzgylFfnGRzku379u1rFlCSptFybEm9FLgOWJ9kT5Kzq+oh4FzgauBW4PKqumXU96yqq6pq66pVq9qElqQp1XxNoaq2LDC+E9jZ+vMlSaObyNNcOH0kSW1MZCk4fSRJbUxkKUiS2rAUJEmdiSwF1xQkqY2JLAXXFCSpjYksBUlSGyOVQpJntA4iSerfqEcK70tyfZI/SOKcjSQdpkYqhap6HvAqBiexuzHJR5O8pGmyg3ChWZLaGHlNoaq+zuCKaG8Gfh14b5KvJXl5q3AHyeJCsyQ1MOqawjOTvIvByeteCGyuqqcNb7+rYT5J0jIa9YR4fwVcALy1qh6cHayqu5O8rUkySdKyG7UUNgEPVtXDAEmOAI6qqv+tqg81SydJWlajlsJnGVw7+f7h/WOAa4DntAh1KEk2A5vXrVvXx8c3s3bbjnnHd5+3aZmTSJpWoy40H1VVs4XA8PYxbSIdmgvNktTGqKXwQJKTZ+8k+WXgwYM8X5I0gUadPnoj8LEkdwMBfg54ZbNUkqRejFQKVXVDkqcC64dDt1XVD9rFkiT1YTHXaH42sHb4mpOTUFWXNEklSerFSKWQ5EPALwBfBh4eDhdgKUjSYWTUI4UZYENVVcswozpct6RKUt9G3X10M4PF5bHgllRJamPUI4XjgK8muR74/uxgVZ3eJJUkqRejlsI7WoaQJI2HUbekfiHJk4GTquqzSY4BVrSNJklabqOeOvsc4ArgA8Oh1cCnWoWSJPVj1IXm1wHPBe6D7oI7P9sqlCSpH6OWwverav/snSQrGXxPQZJ0GBm1FL6Q5K3A0cNrM38MuKpdrIPzGs2S1MaopbAN2At8BXgNsJPB9Zp74fcUJKmNUXcf/RD42+GPJOkwNeq5j77BPGsIVfWUJU8kSerNYs59NOso4HeBn176OJKkPo20plBV353zc1dVvRvwwsGSdJgZdfro5Dl3j2Bw5LCYazFIkibAqL/Y/3LO7YeA3cDvLXkaSVKvRt199Butg0iS+jfq9NGbDvZ4Vb1zaeJIkvq0mN1HzwauHN7fDFwPfL1FKElSP0YthTXAyVX1PYAk7wB2VNVZrYJJWlprt+2Yd3z3eW4k1CNGPc3FE4H9c+7vH471wnMfSVIbo5bCJcD1Sd4xPEr4IvDBZqkOwXMfSVIbo+4++rMknwaeNxx6dVV9qV0sSVIfRj1SADgGuK+q3gPsSXJio0ySpJ6MejnOtwNvBt4yHHoM8OFWoSRJ/Rj1SOG3gdOBBwCq6m7gsa1CSZL6MWop7K+qYnj67CTHtoskSerLqKVweZIPAI9Pcg7wWbzgjiQddg65+yhJgL8HngrcB6wH/qSqPtM4myRpmR2yFKqqkuysqmcAFoEkHcZGnT66KcmzmyaRJPVu1HMfnQqclWQ3gx1IYXAQ8cxWwSRJy++gpZDkhKr6FvBby5RHktSjQx0pfIrB2VG/meTjVfU7yxFKktSPQ60pZM7tp7QMIknq36FKoRa4LUk6DB1q+ugXk9zH4Ijh6OFteGSh+XFN00mSltVBS6GqVixXkCRPAf4YWFVVr1iuz5UkPWIxp85etCQXJbknyc0HjG9McluS25NsA6iqO6rq7JZ5JEkH17QUgIuBjXMHkqwAzgdOAzYAW5JsaJxDkjSCpqVQVdcC9x4wfApw+/DIYD9wGXBGyxySpNG0PlKYz2rgzjn39wCrk/xMkr8BnpXkLfO/FJJsTbIrya69e/e2zipJU2XU01w0V1XfBV47wvO2A9sBZmZm3CYrSUuojyOFu4Dj59xfMxyTJPWsj1K4ATgpyYlJjgTOBK5czBsk2Zxk+759+5oElKRp1XpL6qXAdcD6JHuSnF1VDwHnAlcDtwKXV9Uti3nfqrqqqrauWrVq6UNL0hRruqZQVVsWGN8J7Gz52ZKkxetj+ujH5vSRJLUxkaXg9JEktTGRpSBJasNSkCR1xubLa4uRZDOwed26dX1HkfQord22Y8HHdp+3aRmTaK6JPFJwTUGS2pjIUpAktWEpSJI6loIkqTO1C80LLXK5wCVpmk3kkYILzZLUxkSWgiSpDUtBktSxFCRJnYksBc+SKkltTGQpuNAsSW1MZClIktqwFCRJHUtBktSxFCRJHUtBktSZyFJwS6oktTGRpeCWVElqYyJLQZLUhqUgSepYCpKkjqUgSepYCpKkjqUgSepMZCn4PQVJamMiS8HvKUhSGxNZCpKkNiwFSVLHUpAkdSwFSVLHUpAkdSwFSVLHUpAkdSwFSVLHUpAkdSwFSVJnIkvBcx9JUhsTWQqe+0iS2pjIUpAktWEpSJI6loIkqWMpSJI6loIkqWMpSJI6loIkqWMpSJI6loIkqWMpSJI6loIkqWMpSJI6loIkqWMpSJI6loIkqbOy7wCzkhwLvA/YD3y+qj7ScyRJmjpNjxSSXJTkniQ3HzC+McltSW5Psm04/HLgiqo6Bzi9ZS5J0vxaTx9dDGycO5BkBXA+cBqwAdiSZAOwBrhz+LSHG+eSJM2jaSlU1bXAvQcMnwLcXlV3VNV+4DLgDGAPg2JonkuSNL8+1hRW88gRAQzK4FTgvcBfJ9kEXLXQi5NsBbYCnHDCCQ1jSlL/1m7bMe/47vM2Nfm8sVlorqoHgFeP8LztwHaAmZmZap1LkqZJH9M0dwHHz7m/ZjgmSepZH6VwA3BSkhOTHAmcCVy5mDdIsjnJ9n379jUJKEnTqvWW1EuB64D1SfYkObuqHgLOBa4GbgUur6pbFvO+VXVVVW1dtWrV0oeWpCnWdE2hqrYsML4T2NnysyVJi+fWT0lSZyJLwTUFSWpjIkvBNQVJamMiS0GS1EaqJvf7X0n2At98lC8/DvjOEsZZKuZaHHMtzrjmgvHNdjjmenJVPWG+Bya6FH4cSXZV1UzfOQ5krsUx1+KMay4Y32zTlsvpI0lSx1KQJHWmuRS29x1gAeZaHHMtzrjmgvHNNlW5pnZNQZL0/03zkYIk6QBTWQoLXCO6Vwtdz7pvSY5P8rkkX01yS5I39J0JIMlRSa5P8u/DXH/ad6a5kqxI8qUk/9B3lllJdif5SpIvJ9nVd55ZSR6f5IokX0tya5JfHYNM64f/nWZ/7kvyxr5zAST5w+G/+ZuTXJrkqCV9/2mbPhpeI/o/gJcwuOrbDcCWqvpqz7meD9wPXFJVT+8zy1xJngQ8qapuSvJY4EbgZWPw3yvAsVV1f5LHAP8KvKGq/q3PXLOSvAmYAR5XVS/tOw8MSgGYqaqx2nOf5IPAv1TVBcPT6R9TVf/Td65Zw98ZdwGnVtWj/V7UUmVZzeDf+oaqejDJ5cDOqrp4qT5jGo8UFrpGdK8WuJ5176rq21V10/D29xic7nx1v6mgBu4f3n3M8Gcs/sJJsgbYBFzQd5Zxl2QV8HzgQoCq2j9OhTD0IuA/+y6EOVYCRydZCRwD3L2Ubz6NpTDfNaJ7/yU3CZKsBZ4FfLHfJAPDKZovA/cAn6mqscgFvBv4I+CHfQc5QAHXJLlxeK3zcXAisBf4u+F02wVJju071AHOBC7tOwRAVd0F/AXwLeDbwL6qumYpP2MaS0GPQpKfBD4OvLGq7us7D0BVPVxVv8Tgkq6nJOl92i3JS4F7qurGvrPM49eq6mTgNOB1wynLvq0ETgbeX1XPAh4AxmKdD2A4nXU68LG+swAk+SkGMxsnAj8PHJvkrKX8jGksBa8RvUjDOfuPAx+pqk/0nedAw+mGzwEb+84CPBc4fTh/fxnwwiQf7jfSwPCvTKrqHuCTDKZS+7YH2DPnKO8KBiUxLk4Dbqqq/+47yNCLgW9U1d6q+gHwCeA5S/kB01gKP/Y1oqfJcEH3QuDWqnpn33lmJXlCkscPbx/NYOPA1/pNBVX1lqpaU1VrGfzb+ueqWtK/5B6NJMcONwownJ75TaD3nW5V9V/AnUnWD4deBPS6ieEAWxiTqaOhbwG/kuSY4f+bL2Kwzrdkml6OcxxV1UNJZq8RvQK4aLHXiG5heD3rFwDHJdkDvL2qLuw3FTD4y/f3ga8M5+8B3jq8pGqfngR8cLgz5AgG1/oem+2fY+iJwCcHv0dYCXy0qv6x30id1wMfGf6Rdgfw6p7zAF15vgR4Td9ZZlXVF5NcAdwEPAR8iSX+ZvPUbUmVJC1sGqePJEkLsBQkSR1LQZLUsRQkSR1LQZLUsRQkSR1LQZLUsRQkSZ3/A1XIBvVCOznIAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "KitchenAbvGr\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQOElEQVR4nO3dbbAeZX3H8e/PAIJPcSppdQIYaBg0rVoxoh3Hlj44DTIHrFolU9tqKSla2jp9Y3QctZ3pjH1RbbVYTJWi1gERLZNIGIpTlTdUCBQVRGpKsQSdIcI0+MCYhv774txZT9Nzkj3hXGfPnvP9zJyZe697783/yib37+xeu3ulqpAkCeAJQxcgSVo6DAVJUsdQkCR1DAVJUsdQkCR1jhm6gMfjxBNPrHXr1g1dhiSNym233fbdqloz23ujDoV169axa9euocuQpFFJ8q253vP0kSSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqGgiSpYyhIkjqjvnlN47du63Wztt/33nMXuRJJ4JGCJGkGQ0GS1DEUJEkdQ0GS1DEUJEmdJRMKSZ6b5LIk1yR589D1SNJK1DQUklye5MEkdx7SvinJPUl2J9kKUFV3V9XFwOuAl7WsS5I0u9ZHClcAm2Y2JFkFXAqcA2wANifZMHnvPOA6YGfjuiRJs2gaClV1E/DwIc1nAbur6t6q2g9cBZw/WX97VZ0D/OZc20yyJcmuJLv27t3bqnRJWpGGuKN5LXD/jOU9wEuSnA28GngihzlSqKptwDaAjRs3VrsyJWnlWTKPuaiqLwJfHLgMSVrRhrj66AHg5BnLJ03aeksylWTbvn37FrQwSVrphgiFW4HTk5ya5DjgAmD7fDZQVTuqasvq1aubFChJK1XrS1KvBG4GzkiyJ8mFVXUAuAS4AbgbuLqq7mpZhySpn6ZjClW1eY72nTyOy06TTAFT69evP9pNSJJmsWTuaJ4PTx9JUhujDAVJUhuGgiSpM8pQ8JJUSWpjlKHgmIIktTHKUJAktWEoSJI6owwFxxQkqY1RhoJjCpLUxihDQZLUhqEgSeoYCpKkzihDwYFmSWpjlKHgQLMktTHKUJAktWEoSJI6hoIkqWMoSJI6owwFrz6SpDZGGQpefSRJbYwyFCRJbRgKkqSOoSBJ6hgKkqSOoSBJ6hgKkqTOKEPB+xQkqY1RhoL3KUhSG6MMBUlSG4aCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOqMMBR9zIUltjDIUfMyFJLUxylCQJLVhKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOoaCJKljKEiSOscMXcBMSV4FnAs8DfhoVf3TwCVJ0orS/EghyeVJHkxy5yHtm5Lck2R3kq0AVXVtVV0EXAy8vnVtkqT/azFOH10BbJrZkGQVcClwDrAB2Jxkw4xV3jl5X5K0iJqHQlXdBDx8SPNZwO6qureq9gNXAedn2l8A11fV7bNtL8mWJLuS7Nq7d2/b4iVphRlqoHktcP+M5T2Ttj8EfhV4bZKLZ/tgVW2rqo1VtXHNmjXtK5WkFWRJDTRX1QeADwxdhyStVEMdKTwAnDxj+aRJWy9JppJs27dv34IXJkkrWa9QSPK8Bf5zbwVOT3JqkuOAC4DtfT9cVTuqasvq1asXuCxJWtn6Hil8KMktSd6SZF7fxEmuBG4GzkiyJ8mFVXUAuAS4AbgbuLqq7ppX5ZKkBddrTKGqXp7kdOB3gduS3AL8fVXd2OOzm+do3wnsnE+xByWZAqbWr19/NB+XJM2h95hCVX2T6fsH3gb8IvCBJN9I8upWxR2mFk8fSVIDfccUnp/k/Uyf6vllYKqqnjt5/f6G9UmSFlHfS1I/CHwEeEdVPXqwsaq+neSdTSqTJC26vqFwLvBoVT0GkOQJwPFV9cOq+kSz6ubgmIIktdF3TOHzwAkzlp80aRuEYwqS1EbfI4Xjq+r7Bxeq6vtJntSoJklLwLqt183aft97z13kSrSY+h4p/CDJmQcXkrwIePQw60uSRqjvkcJbgU8n+TYQ4JkMON+BYwqS1Ebfm9duTfIc4IxJ0z1V9d/tyjpiPTuAHRs3brxoqBokaTmaz1NSXwysm3zmzCRU1cebVCVJGkSvUEjyCeCngTuAxybNBRgKkrSM9D1S2AhsqKpqWYwkaVh9rz66k+nB5SXB+RQkqY2+oXAi8PUkNyTZfvCnZWGH481rktRG39NH72lZhCRpaeh7SeqXkjwbOL2qPj+5m3lV29IkSYut76OzLwKuAT48aVoLXNuqKEnSMPqOKfwB8DLgEegm3PnJVkVJkobRNxR+VFX7Dy4kOYbp+xQG4dVHktRG31D4UpJ3ACckeQXwaWBHu7IOz6uPJKmNvqGwFdgLfA34fWAn0/M1S5KWkb5XH/0P8HeTH0nSMtX32Uf/wSxjCFV12oJXJEkazHyefXTQ8cBvAD+x8OVIkobUa0yhqh6a8fNAVf0V4Jx8krTM9D19dOaMxScwfeQwn7kYJEkj0PeL/S9nvD4A3Ae8bsGr6cnpOCWpjb5XH/1S60Lmw+k4JamNvqeP/uRw71fV+xamHEnSkOZz9dGLgYNzKEwBtwDfbFGUJGkYfUPhJODMqvoeQJL3ANdV1RtaFSZJWnx9H3PxU8D+Gcv7J22SpGWk75HCx4FbkvzjZPlVwMfalCRJGkrfq4/+PMn1wMsnTW+qqn9tV5YkaQh9Tx8BPAl4pKr+GtiT5NRGNUmSBtJ3Os53A28D3j5pOhb4h1ZFSZKG0fdI4deB84AfAFTVt4GntipKkjSMvqGwv6qKyeOzkzy5XUlH5nScktRG31C4OsmHgacnuQj4PANOuON0nJLUxhGvPkoS4FPAc4BHgDOAd1XVjY1rkyQtsiOGQlVVkp1V9TzAIJCkZazv6aPbk7y4aSWSpMH1vaP5JcAbktzH9BVIYfog4vmtCpMkLb7DhkKSU6rqP4FfW6R6JEkDOtKRwrVMPx31W0k+U1WvWYyiJEnDONKYQma8Pq1lIZKk4R0pFGqO15KkZehIp49ekOQRpo8YTpi8hh8PND+taXWSpEV12FCoqlWLVYgkaXjzeXS2JGmZMxQkSR1DQZLUMRQkSZ0lEwpJTkvy0STXDF2LJK1UTUMhyeVJHkxy5yHtm5Lck2R3kq0AVXVvVV3Ysh5J0uG1PlK4Atg0syHJKuBS4BxgA7A5yYbGdUiSemgaClV1E/DwIc1nAbsnRwb7gauA8/tuM8mWJLuS7Nq7d+8CVitJGmJMYS1w/4zlPcDaJM9IchnwwiRvn+vDVbWtqjZW1cY1a9a0rlWSVpS+8yk0V1UPARcPXYckrWRDHCk8AJw8Y/mkSVtvSaaSbNu3b9+CFiZJK90QoXArcHqSU5McB1wAbJ/PBqpqR1VtWb16dZMCJWmlan1J6pXAzcAZSfYkubCqDgCXADcAdwNXV9VdLeuQJPXTdEyhqjbP0b4T2Hm0200yBUytX7/+aDchSZrFkrmjeT48fSRJbYwyFCRJbRgKkqTOKEPBS1IlqY1RhoJjCpLUxihDQZLUhqEgSeqMMhQcU5CkNkYZCo4pSFIbowwFSVIbhoIkqWMoSJI6owwFB5olqY1RhoIDzZLUxihDQZLUhqEgSeoYCpKkjqEgSeqMMhS8+kiS2hhlKHj1kSS1McpQkCS1YShIkjqGgiSpYyhIkjqGgiSpYyhIkjqjDAXvU5CkNkYZCt6nIEltjDIUJEltGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqGAqSpI6hIEnqHDN0AUcjyRQwtX79+qFLkaSm1m29btb2+957bpM/b5RHCj7mQpLaGGUoSJLaMBQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSR1DQZLUMRQkSZ0lM59CkicDHwL2A1+sqk8OXJIkrThNjxSSXJ7kwSR3HtK+Kck9SXYn2TppfjVwTVVdBJzXsi5J0uxanz66Atg0syHJKuBS4BxgA7A5yQbgJOD+yWqPNa5LkjSLpqePquqmJOsOaT4L2F1V9wIkuQo4H9jDdDDcwWHCKskWYAvAKaecctS1LfYUd5I0BkMMNK/lx0cEMB0Ga4HPAq9J8rfAjrk+XFXbqmpjVW1cs2ZN20olaYVZMgPNVfUD4E1D1yFJK9kQRwoPACfPWD5p0tZbkqkk2/bt27eghUnSSjdEKNwKnJ7k1CTHARcA2+ezgaraUVVbVq9e3aRASVqpWl+SeiVwM3BGkj1JLqyqA8AlwA3A3cDVVXVXyzokSf20vvpo8xztO4GdR7vdJFPA1Pr16492E5KkWYzyMReePpKkNkYZCpKkNgwFSVInVTV0DfN2cEwBeD3wzaPczInAdxesqGHZl6VnufQD7MtS9Xj68uyqmvXu31GGwkJIsquqNg5dx0KwL0vPcukH2JelqlVfPH0kSeoYCpKkzkoOhW1DF7CA7MvSs1z6AfZlqWrSlxU7piBJ+v9W8pGCJOkQhoIkqbPsQ2GO+aBnvv/EJJ+avP/lWWaKWzJ69OWNSfYmuWPy83tD1Hkkc83dPeP9JPnApJ9fTXLmYtfYV4++nJ1k34x98q7FrrGPJCcn+UKSrye5K8kfz7LOKPZLz74s+f2S5PgktyT5yqQffzrLOgv//VVVy/YHWAX8O3AacBzwFWDDIeu8Bbhs8voC4FND1/04+vJG4G+GrrVHX34BOBO4c473XwlcDwR4KfDloWt+HH05G/jc0HX26MezgDMnr58K/Nss/75GsV969mXJ75fJ3/NTJq+PBb4MvPSQdRb8+2u5Hyl080FX1X7g4HzQM50PfGzy+hrgV5JkEWvsq09fRqGqbgIePswq5wMfr2n/Ajw9ybMWp7r56dGXUaiq71TV7ZPX32P6sfZrD1ltFPulZ1+WvMnf8/cni8dOfg69MmjBv7+WeyjMNR/0rOvU9FwP+4BnLEp189OnLzA9z/VXk1yT5ORZ3h+Dvn0di5+fnAK4PsnPDF3MkUxOQbyQ6d9MZxrdfjlMX2AE+yXJqiR3AA8CN1bVnPtkob6/lnsorDQ7gHVV9XzgRn78G4SGczvTz5l5AfBB4NqB6zmsJE8BPgO8taoeGbqex+MIfRnFfqmqx6rq55ietvisJD/b+s9c7qHQZz7obp0kxwCrgYcWpbr5OWJfquqhqvrRZPEjwIsWqbaF9rjn8V4qquqRg6cAanpyqWOTnDhwWbNKcizTX6KfrKrPzrLKaPbLkfoypv0CUFX/BXwB2HTIWwv+/bXcQ6HPfNDbgd+ZvH4t8M81GbVZYo7Yl0PO757H9LnUMdoO/PbkapeXAvuq6jtDF3U0kjzz4DneJGcx/X9uyf3SManxo8DdVfW+OVYbxX7p05cx7Jcka5I8ffL6BOAVwDcOWW3Bv7+aTsc5tKo6kOTgfNCrgMur6q4kfwbsqqrtTP/j+USS3UwPGF4wXMVz69mXP0pyHnCA6b68cbCCDyPTc3efDZyYZA/wbqYH0aiqy5ieqvWVwG7gh8Cbhqn0yHr05bXAm5McAB4FLliiv3S8DPgt4GuTc9gA7wBOgdHtlz59GcN+eRbwsSSrmA6tq6vqc62/v3zMhSSps9xPH0mS5sFQkCR1DAVJUsdQkCR1DAVJUsdQkCR1DAVJUud/Ad4SGfDT6bwBAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TotRmsAbvGrd\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPTklEQVR4nO3de5Bed13H8feHFKak4iImgCYN25JOMcNF6nJRBi9cNBjSIl6gAw4i08gICMqMpMAA/+jUUbkpt1hqudRWqICNDUJBpf+AvQG9UltLS9OCDTBuamEIha9/7JPDEnabs5ezZ88+79dMps/5bfZ5Pme6m89zzu/3nJOqQpIkgPv1HUCStHpYCpKkhqUgSWpYCpKkhqUgSWoc03eApdiwYUNNTk72HUOSBuXKK6/8elVtnOtrgy6FyclJrrjiir5jSNKgJLltvq95+kiS1BhkKSTZmWTP9PR031EkaU0ZZClU1d6q2jUxMdF3FElaUwZZCpKkblgKkqSGpSBJalgKkqSGpSBJagzyw2tJdgI7t27d2ncULdHk7ovnHL/1rB0rnEQSDPRIwSWpktSNQZaCJKkbloIkqWEpSJIaloIkqWEpSJIag1ySKrXlkldpYTxSkCQ1LAVJUmOQpeBNdiSpG4MsBT/RLEndGGQpSJK64eojzcuVO9L48UhBktSwFCRJDUtBktSwFCRJDUtBktSwFCRJDUtBktSwFCRJDUtBktQYZCl4QTxJ6sYgS8EL4klSN7z2kdSB+a4bBV47SqvbII8UJEndsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUsBQkSQ1LQZLUWFV3XkvyHGAH8OPAe6vqkz1HkqSx0vmRQpJzktyV5NojxrcnuTHJzUl2A1TVx6rqDOClwPO6ziZJ+mErcfroXGD77IEk64B3AM8CtgGnJ9k266+8fvR1SdIK6rwUqupS4JtHDD8RuLmqbqmqQ8AFwGmZ8RfAx6vqqrmeL8muJFckueLAgQPdhpekMdPXnMIm4PZZ2/uBJwGvAJ4BTCTZWlXvPvIbq2oPsAdgamqqViDrqjW5++I5x289a8cKJ5G0Vqyqieaqejvw9r5zSNK46mtJ6h3A8bO2N4/GJEk96qsULgdOSnJCkgcAzwcuavvNSXYm2TM9Pd1ZQEkaRyuxJPV84LPAyUn2J3lJVd0LvBz4BHAD8KGquq7tc1bV3qraNTEx0U1oSRpTnc8pVNXp84zvA/Z1/fqSpPYGeZkLTx9JUjcGWQqePpKkbgyyFCRJ3VhVn1OQ1J4fXlQXPFKQJDUGWQpONEtSNwZZCk40S1I3BlkKkqRuWAqSpIalIElqDLIUnGiWpG4MshScaJakbgyyFCRJ3bAUJEkNS0GS1LAUJEmNQZaCq48kqRuDLAVXH0lSN7x0tqQ5eWnu8TTIIwVJUjcsBUlSw1KQJDUsBUlSo1UpJHlM10EWwiWpktSNtkcK70xyWZI/TNL7OlCXpEpSN1qVQlU9FXgBcDxwZZJ/SPLMTpNJklZc6zmFqroJeD3wGuCXgLcn+VKS53YVTpK0strOKTw2yVuAG4CnATur6mdGj9/SYT5J0gpq+4nmvwHOBl5bVd8+PFhVdyZ5fSfJJEkrrm0p7AC+XVXfA0hyP+DYqvpWVX2gs3SSpBXVdk7hU8ADZ22vH41JktaQtqVwbFX93+GN0eP13USSJPWlbSnck+SUwxtJfg749n38fUnSALWdU3gV8OEkdwIBHg48r7NUR5FkJ7Bz69atfUWQpDWpVSlU1eVJHgWcPBq6saq+212so+bZC+ydmpo6o68MkrQWLeQmO08AJkffc0oSqur9naSSJPWiVSkk+QDwSOALwPdGwwVYCpK0hrQ9UpgCtlVVdRlGktSvtquPrmVmclmStIa1PVLYAFyf5DLgO4cHq+rUTlJJknrRthTe1GUISdLq0HZJ6meSPAI4qao+lWQ9sK7baJKkldb20tlnABcC7xkNbQI+1lUoSVI/2k40vwx4CnAQmhvuPLSrUJKkfrQthe9U1aHDG0mOYeZzCpKkNaRtKXwmyWuBB47uzfxhYG93sSRJfWhbCruBA8A1wB8A+5i5X3MvkuxMsmd6erqvCJK0JrVdffR94O9Gf3rnBfEkqRttr330ZeaYQ6iqE5c9kSSpNwu59tFhxwK/DTxk+eNIkvrU9vTRN44YemuSK4E3LH8kSeNgcvfF837t1rN2rGASzdb29NEpszbvx8yRw0LuxSBJGoC2/7D/9azH9wK3Ar+z7GnWmPneCfkuSNJq1fb00a90HUSS1L+2p4/+5L6+XlVvXp44kqQ+LWT10ROAi0bbO4HLgJu6CCVJ6kfbUtgMnFJVdwMkeRNwcVW9sKtgkqSV1/YyFw8DDs3aPjQakyStIW2PFN4PXJbko6Pt5wDv6yaSJKkvbVcf/VmSjwNPHQ29uKo+310sSVIf2p4+AlgPHKyqtwH7k5zQUSZJUk/a3o7zjcBrgDNHQ/cHPthVKElSP9oeKfwGcCpwD0BV3Qk8qKtQkqR+tC2FQ1VVjC6fneS47iJJkvrSthQ+lOQ9wIOTnAF8ilVywx1J0vI56uqjJAH+EXgUcBA4GXhDVV3ScTZJ0go7ailUVSXZV1WPATorgiQnAq8DJqrqt7p6HUnjwysVL1zb00dXJXnCQp88yTlJ7kpy7RHj25PcmOTmJLsBquqWqnrJQl9DkrR82pbCk4DPJfnvJFcnuSbJ1S2+71xg++yBJOuAdwDPArYBpyfZtoDMkqSO3OfpoyRbquorwK8t5smr6tIkk0cMPxG4uapuGb3GBcBpwPVtnjPJLmAXwJYtWxYTS5I0j6MdKXwMoKpuA95cVbfN/rPI19wE3D5rez+wKclPJnk38PgkZ879rVBVe6pqqqqmNm7cuMgIkqS5HG2iObMen9hlkKr6BvDSLl9DknTfjnakUPM8Xoo7gONnbW8ejbWWZGeSPdPT08sUSZIERy+FxyU5mORu4LGjxweT3J3k4CJf83LgpCQnJHkA8Hx+cEe3Vqpqb1XtmpiYWGQESdJc7vP0UVWtW8qTJzkf+GVgQ5L9wBur6r1JXg58AlgHnFNV1y3ldSRJy6PtTXYWpapOn2d8H7Cvy9eWJC3cQu6nsGo4pyBJ3RhkKTinIEndGGQpSJK6YSlIkhqWgiSpMchScKJZkroxyFJwolmSujHIUpAkdcNSkCQ1LAVJUmOQpeBEsyR1Y5Cl4ESzJHVjkKUgSeqGpSBJalgKkqSGpSBJagyyFFx9JEndGGQpuPpIkroxyFKQJHXDUpAkNSwFSVLDUpAkNSwFSVJjkKXgklRJ6sYgS8ElqZLUjUGWgiSpG5aCJKlhKUiSGpaCJKlhKUiSGpaCJKlhKUiSGpaCJKkxyFLwE82S1I1BloKfaJakbgyyFCRJ3bAUJEkNS0GS1LAUJEkNS0GS1LAUJEkNS0GS1LAUJEkNS0GS1LAUJEkNS0GS1BhkKXhBPEnqxiBLwQviSVI3BlkKkqRuWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpIalIElqWAqSpMYxfQc4LMlxwDuBQ8B/VNV5PUeSpLHT6ZFCknOS3JXk2iPGtye5McnNSXaPhp8LXFhVZwCndplLkjS3rk8fnQtsnz2QZB3wDuBZwDbg9CTbgM3A7aO/9r2Oc0mS5tDp6aOqujTJ5BHDTwRurqpbAJJcAJwG7GemGL7AfZRVkl3ALoAtW7YsOtvk7ovnHL/1rB2Lfk5JWm4r/W9VHxPNm/jBEQHMlMEm4CPAbyZ5F7B3vm+uqj1VNVVVUxs3buw2qSSNmVUz0VxV9wAv7juHJI2zPo4U7gCOn7W9eTTWWpKdSfZMT08vazBJGnd9lMLlwElJTkjyAOD5wEULeYKq2ltVuyYmJjoJKEnjquslqecDnwVOTrI/yUuq6l7g5cAngBuAD1XVdV3mkCS10/Xqo9PnGd8H7OvytSVJCzfIy1w4pyBJ3RhkKTinIEndGGQpSJK6karqO8OiJTkA3LbIb98AfH0Z4/TJfVl91sp+gPuyWi1lXx5RVXN++nfQpbAUSa6oqqm+cywH92X1WSv7Ae7LatXVvnj6SJLUsBQkSY1xLoU9fQdYRu7L6rNW9gPcl9Wqk30Z2zkFSdKPGucjBUnSESwFSVJj7EohyfFJ/j3J9UmuS/LKvjMtRZJ1ST6f5F/6zrIUSR6c5MIkX0pyQ5Kf7zvTYiX549HP1rVJzk9ybN+Z2prrvupJHpLkkiQ3jf77E31mbGueffnL0c/Y1Uk+muTBfWZsY7573Y++9uoklWTDcr3e2JUCcC/w6qraBjwZeNnoHtFD9UpmrjY7dG8D/rWqHgU8joHuU5JNwB8BU1X1aGAdM5eHH4pzOeK+6sBu4NNVdRLw6dH2EJzLj+7LJcCjq+qxwH8BZ650qEU4lx/dD5IcD/wq8JXlfLGxK4Wq+mpVXTV6fDcz//hs6jfV4iTZDOwAzu47y1IkmQB+EXgvQFUdqqr/7TfVkhwDPDDJMcB64M6e87RWVZcC3zxi+DTgfaPH7wOes6KhFmmufamqT44u3w/wOWZu8rWqzfP/BOAtwJ8Cy7paaOxKYbYkk8Djgf/sN8mivZWZH4rv9x1kiU4ADgB/PzoVdnaS4/oOtRhVdQfwV8y8e/sqMF1Vn+w31ZI9rKq+Onr8NeBhfYZZRr8PfLzvEIuR5DTgjqr64nI/99iWQpIfA/4JeFVVHew7z0IleTZwV1Vd2XeWZXAMcArwrqp6PHAPwzlF8UNG59tPY6bofho4LskL+021fGpmDfvg17EneR0zp5LP6zvLQiVZD7wWeEMXzz+WpZDk/swUwnlV9ZG+8yzSU4BTk9wKXAA8LckH+420aPuB/VV1+IjtQmZKYoieAXy5qg5U1XeBjwC/0HOmpfqfJD8FMPrvXT3nWZIkvwc8G3hBDfODWo9k5k3HF0e//5uBq5I8fDmefOxKIUmYOXd9Q1W9ue88i1VVZ1bV5qqaZGYi89+qapDvSKvqa8DtSU4eDT0duL7HSEvxFeDJSdaPftaezkAnzWe5CHjR6PGLgH/uMcuSJNnOzCnXU6vqW33nWYyquqaqHlpVk6Pf//3AKaPfoyUbu1Jg5h327zLzzvoLoz+/3nco8QrgvCRXAz8L/HnPeRZldLRzIXAVcA0zv2ODubTCXPdVB84CnpnkJmaOhM7qM2Nb8+zL3wIPAi4Z/e6/u9eQLcyzH9293jCPniRJXRjHIwVJ0jwsBUlSw1KQJDUsBUlSw1KQJDUsBUlSw1KQJDX+HxM8SEtKds0IAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fireplaces\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOX0lEQVR4nO3df6wlZX3H8feHXShQZU1dWgmgCy7F0gplXaiNsbE/TLFkwUbbQmothkKt0tb0n67EqG1iQv+otlob3BYi2voD0ZJdWUI1NfqPFRaKFaTUrcWyYMKK6UWtka799o/zbL297t07d5lz5s7Z9yu5ycxz5pzzfXbuns995pkzk6pCkqRjhi5AkrQ2GAiSJMBAkCQ1BoIkCTAQJEnN+qELeCo2btxYmzZtGroMSRqVu++++2tVdfLS9lEHwqZNm9izZ8/QZUjSqCT5yqHaPWQkSQIMBElSM8pASLItyY6FhYWhS5GkuTHKQKiqXVV19YYNG4YuRZLmxigDQZLUPwNBkgQYCJKkxkCQJAEj/WJakm3Ats2bNx/xa2zaftsh2x+67uIjfk1JGrNRjhA8y0iS+jfKQJAk9W+Uh4w0H5Y7bAceupOG4AhBkgQYCJKkZpSB4LWMJKl/owwEzzKSpP6NMhAkSf0zECRJgIEgSWoMBEkSYCBIkppRBoKnnUpS/0YZCJ52Kkn9G2UgSJL6ZyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEnNKAPBL6ZJUv9GGQh+MU2S+jfKQJAk9c9AkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEjDQSvZSRJ/RtlIHgtI0nq3ygDQZLUPwNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJwEgDwfshSFL/RhkI3g9Bkvo3ykCQJPXPQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqTEQJEmAgSBJagwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSQCsH7oASWvTpu23HbL9oesunnElmhVHCJIkYA2NEJK8HLgYOAm4oar+fuCSJOmoMtURQpIbkzyW5L4l7RcleTDJ3iTbAarq1qq6Cngt8GvTrEuS9P06BUKS5x/h678XuGjJa60D3g28DDgHuDzJOYs2eVN7XJI0Q11HCH+Z5M4kr0uyoeuLV9VngK8vab4Q2FtVX66qJ4EPAZdm4k+A26vqnq7vIUnqR6dAqKoXA78OnA7cneQDSV56hO95KvDwovV9re13gV8AXpnktcs9OcnVSfYk2bN///4jLEGStFTnSeWq+lKSNwF7gHcC5ycJcG1VfeypFlJV72yvu9J2O4AdAFu3bq2n+r6SpImucwjnJnkH8ADwc8C2qvqxtvyOVb7nI0xGGged1tokSQPqOofwLuAe4Lyqev3BY/xV9SiTSeDVuAs4K8kZSY4DLgN2rvI1JEk96xoIFwMfqKpvAyQ5JsmJAFX1/uWelOSDwGeBs5PsS3JlVR0ArgHuYDLiuLmq7l9N0Um2JdmxsLCwmqdJkg6jayB8Ejhh0fqJre2wquryqjqlqo6tqtOq6obWvruqfrSqnltVb1tt0VW1q6qu3rCh8wlPkqQVdA2E46vqmwdX2vKJ0ylJkjSEroHwrSRbDq4keQHw7emUJEkaQtfTTt8AfCTJo0CAZ+HlJSRprnQKhKq6K8nzgLNb04NV9d/TK+vwkmwDtm3evHmoEiRp7qzm4nYXAOcCW5hcf+jV0ylpZU4qS1L/Oo0QkrwfeC5wL/Dd1lzA+6ZUlyRpxrrOIWwFzqkqLxUhSXOq6yGj+5hMJEuS5lTXEcJG4ItJ7gS+c7Cxqi6ZSlUrcFJZkvrXNRDeOs0iVquqdgG7tm7detXQtUjSvOh62umnkzwHOKuqPtmuY7RuuqVJkmap6+WvrwJuAd7Tmk4Fbp1WUZKk2es6qfx64EXAEzC5WQ7ww9MqSpI0e10D4Tvt/scAJFnP5HsIkqQ50TUQPp3kWuCEdi/ljwC7plfW4Xk/BEnqX9dA2A7sB74A/Dawm9XfKa03XrpCkvrX9Syj/wH+qv1IkuZQ12sZ/TuHmDOoqjN7r0iSNIjVXMvooOOBXwF+qP9yJElD6TSHUFWPL/p5pKr+DLh4yrVJkmao6yGjLYtWj2EyYug6upAkjUDXD/U/XbR8AHgI+NXeq5EkDabrWUY/O+1CVsOrnUpS/7oeMvqDwz1eVW/vp5xuvNqpJPVvNWcZXQDsbOvbgDuBL02jKEnS7HUNhNOALVX1DYAkbwVuq6pXTaswSdJsdb10xY8ATy5af7K1SZLmRNcRwvuAO5P8XVt/OXDTdEqSJA2h61lGb0tyO/Di1vSaqvqn6ZUlSZq1roeMAE4EnqiqPwf2JTljSjVJkgbQ9RaabwH+EHhjazoW+JtpFdWhHu+HIEk96zpC+GXgEuBbAFX1KPD0aRW1Eu+HIEn96xoIT1ZV0S6BneQHp1eSJGkIXQPh5iTvAZ6R5Crgk3izHEmaKyueZZQkwIeB5wFPAGcDb66qT0y5NknSDK0YCFVVSXZX1fMBQ0CS5lTXQ0b3JLlgqpVIkgbV9ZvKPwW8KslDTM40CpPBw7nTKkySNFuHDYQkz66q/wB+cUb1SJIGstII4VYmVzn9SpKPVtUrZlGUJGn2VppDyKLlM6dZiCRpWCsFQi2zLEmaMysdMjovyRNMRgontGX43qTySVOtbhneU1mS+nfYEUJVrauqk6rq6VW1vi0fXB8kDFpdXstIknq2mstfS5LmmIEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkYKSBkGRbkh0LCwtDlyJJc2OUgeD9ECSpf6MMBElS/wwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSpMRAkSYCBIElqDARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkxECRJgIEgSWoMBEkSYCBIkhoDQZIEGAiSpMZAkCQBBoIkqVkzgZDkzCQ3JLll6Fok6Wg01UBIcmOSx5Lct6T9oiQPJtmbZDtAVX25qq6cZj2SpOVNe4TwXuCixQ1J1gHvBl4GnANcnuScKdchSVrBVAOhqj4DfH1J84XA3jYieBL4EHBp19dMcnWSPUn27N+/v8dqJenoNsQcwqnAw4vW9wGnJnlmkuuB85O8cbknV9WOqtpaVVtPPvnkadcqSUeN9UMXcFBVPQ68dug6JOloNcQI4RHg9EXrp7U2SdKAhgiEu4CzkpyR5DjgMmDnal4gybYkOxYWFqZSoCQdjaZ92ukHgc8CZyfZl+TKqjoAXAPcATwA3FxV96/mdatqV1VdvWHDhv6LlqSj1FTnEKrq8mXadwO7p/nekqTVWTPfVJYkDctAkCQBIw0EJ5UlqX+jDAQnlSWpf6MMBElS/wwESRJgIEiSmlEGgpPKktS/UQaCk8qS1L9RBoIkqX8GgiQJMBAkSY2BIEkCDARJUjPKQPC0U0nq3ygDwdNOJal/owwESVL/DARJEmAgSJIaA0GSBBgIkqRm/dAFHIkk24BtmzdvHroUSZqaTdtvO2T7Q9ddPJX3G+UIwdNOJal/owwESVL/DARJEmAgSJIaA0GSBBgIkqTGQJAkAQaCJKkZZSB4PwRJ6l+qaugajliS/cBXjvDpG4Gv9VjOkOalL/PSD7Ava9W89OWp9uM5VXXy0sZRB8JTkWRPVW0duo4+zEtf5qUfYF/Wqnnpy7T6McpDRpKk/hkIkiTg6A6EHUMX0KN56cu89APsy1o1L32ZSj+O2jkESdL/dzSPECRJixgIkiTgKAiEJBcleTDJ3iTbD/H4DyT5cHv8c0k2zb7KlXXoxxVJ9ie5t/381hB1dpHkxiSPJblvmceT5J2tr/+cZMusa+yiQz9ekmRh0T5586xr7CrJ6Uk+leSLSe5P8vuH2GbN75eO/RjFfklyfJI7k3y+9eWPDrFNv59fVTW3P8A64N+AM4HjgM8D5yzZ5nXA9W35MuDDQ9d9hP24AviLoWvt2J+fAbYA9y3z+C8BtwMBXgh8buiaj7AfLwE+PnSdHftyCrClLT8d+NdD/I6t+f3SsR+j2C/t3/lpbflY4HPAC5ds0+vn17yPEC4E9lbVl6vqSeBDwKVLtrkUuKkt3wL8fJLMsMYuuvRjNKrqM8DXD7PJpcD7auIfgWckOWU21XXXoR+jUVVfrap72vI3gAeAU5dstub3S8d+jEL7d/5mWz22/Sw9C6jXz695D4RTgYcXre/j+385/m+bqjoALADPnEl13XXpB8Ar2lD+liSnz6a0qeja3zH46Tbkvz3Jjw9dTBftsMP5TP4iXWxU++Uw/YCR7Jck65LcCzwGfKKqlt0nfXx+zXsgHE12AZuq6lzgE3zvrwYN5x4m14w5D3gXcOvA9awoydOAjwJvqKonhq7nSK3Qj9Hsl6r6blX9JHAacGGSn5jm+817IDwCLP5L+bTWdshtkqwHNgCPz6S67lbsR1U9XlXfaat/DbxgRrVNQ5f9tuZV1RMHh/xVtRs4NsnGgctaVpJjmXyI/m1VfewQm4xiv6zUj7HtF4Cq+k/gU8BFSx7q9fNr3gPhLuCsJGckOY7JpMvOJdvsBH6zLb8S+IdqMzRryIr9WHIs9xImx07Haifw6nZWywuBhar66tBFrVaSZx08npvkQib/39baHxvA5Awi4Abggap6+zKbrfn90qUfY9kvSU5O8oy2fALwUuBflmzW6+fX+iN94hhU1YEk1wB3MDlT58aquj/JHwN7qmonk1+e9yfZy2SC8LLhKj60jv34vSSXAAeY9OOKwQpeQZIPMjnTY2OSfcBbmEyYUVXXA7uZnNGyF/gv4DXDVHp4HfrxSuB3khwAvg1ctgb/2DjoRcBvAF9ox6wBrgWeDaPaL136MZb9cgpwU5J1TELr5qr6+DQ/v7x0hSQJmP9DRpKkjgwESRJgIEiSGgNBkgQYCJKkxkCQJAEGgiSp+V8waGCceUtI7wAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GarageCars\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPsElEQVR4nO3df+xddX3H8eeLFgSn1sy6aVrwCytBmT9m/YouxoW5mdXVwjbdBpNtGkfnD7YZ/9iqMeqWLOk/8zcLdkIUnCCiIa2UMI0/+McBBXWCjNmxOoomVMyKv2KHvvfHPZ3Xr98f51vOued7v30+km9yz+eee8+rpz3fV8+Pe26qCkmSThg6gCRpZbAQJEmAhSBJalgIkiTAQpAkNSwESRJgIUiSGhaCJAlYQYWQ5GlJLktyXZLXDp1Hko436fOTykmuAF4KPFBVTx8b3wK8G1gDfKCqdo49dwJwZVVdtNT7r1+/vmZmZjrPLUmr2e233/6tqnri3PG1PS/3g8D7gCuPDiRZA1wKvBg4CNyWZHdVfTXJecBrgavavPnMzAz79u3rPLQkrWZJvj7feK+HjKrqZuDbc4bPAfZX1b1VdQS4Bji/mX93Vb0EeEWfuSRJP6vvPYT5bADuG5s+CDwvybnA7wGPAvYu9OIk24HtAKeddlp/KSXpODNEIcyrqj4HfK7FfLuAXQCzs7PeqlWSOjLEVUb3A6eOTW9sxiRJAxqiEG4DzkxyepKTgAuA3QPkkCSN6bUQklwNfAE4K8nBJK+uqoeBS4CbgLuBa6vqrmW+77Ykuw4fPtx9aEk6TvX6OYS+zc7OlpedStLyJLm9qmbnjq+YTypLkoa1Yq4yklaTmR03zDt+YOfWCSeR2pvKPQTPIUhS96ayEKpqT1VtX7du3dBRJGnVmMpCkCR1z0KQJAEWgiSpYSFIkoApLQSvMpKk7k1lIXiVkSR1byoLQZLUPQtBkgRYCJKkhoUgSQKmtBC8ykiSujeVheBVRpLUvaksBElS9ywESRJgIUiSGhaCJAmwECRJjaksBC87laTuTWUheNmpJHVvKgtBktQ9C0GSBFgIkqSGhSBJAiwESVLDQpAkAVNaCH4OQZK6N5WF4OcQJKl7U1kIkqTurR06gFaumR03zDt+YOfWCSeRNAnuIUiSAAtBktSwECRJgIUgSWpYCJIkwEKQJDUsBEkSMKWF4K0rJKl7U1kI3rpCkro3lYUgSeqehSBJAiwESVLDQpAkARaCJKlhIUiSAAtBktSwECRJgIUgSWpYCJIkwEKQJDUsBEkSYCFIkhoWgiQJmNJC8PsQJKl7U1kIfh+CJHVvKgtBktQ9C0GSBMDaoQNIOj7N7Lhh3vEDO7dOOImOcg9BkgRYCJKkhoUgSQIsBElSw0KQJAEWgiSpYSFIkgALQZLUsBAkSYCFIElqWAiSJMBCkCQ1LARJEmAhSJIaFoIkCWhZCEme0XcQSdKw2u4h/GOSW5O8LolfZCxJq1CrQqiqFwKvAE4Fbk/ykSQv7jWZJGmiWn+FZlV9LclbgH3Ae4BnJwnw5qr6xCMNkuR3gK3A44DLq+pfHul7SpLaa3sO4ZlJ3gncDbwI2FZVT2sev3OR112R5IEkd84Z35LkniT7k+wAqKrrq+pi4DXAHx7jn0eSdIzankN4L3AH8Kyqen1V3QFQVd8A3rLI6z4IbBkfSLIGuBR4CXA2cGGSs8dmeUvzvCRpgtoeMtoK/KCqfgSQ5ATg5Kr6flVdtdCLqurmJDNzhs8B9lfVvc17XQOcn+RuYCdw49HCkSRNTts9hE8Dp4xNP7oZOxYbgPvGpg82Y38B/Cbw8iSvWejFSbYn2Zdk36FDh44xgiRprrZ7CCdX1XePTlTVd5M8ussgVfUeRierl5pvF7ALYHZ2trrMIEnHs7Z7CN9LsvnoRJLnAD84xmXez+jy1aM2NmOSpAG13UN4A/CxJN8AAjyJY78S6DbgzCSnMyqCC4A/Osb3kiR1pFUhVNVtSZ4KnNUM3VNV/7vU65JcDZwLrE9yEHhbVV2e5BLgJmANcEVV3bWc0Em2Ads2bdq0nJdJkhbR+oNpwHOBmeY1m5NQVVcu9oKqunCB8b3A3mUse+7r9wB7ZmdnLz7W95Ak/bRWhZDkKuCXgC8BP2qGC1i0ECRJ06PtHsIscHZVeVWPJK1SbQvhTkYnkr/ZY5aJmtlxw7zjB3ZunXASSVoZ2hbCeuCrSW4Ffnh0sKrO6yXVEjypLEnda1sIb+8zxHJ5UlmSutf2stPPJ3kKcGZVfbr5lPKafqNJkiap7e2vLwauA97fDG0Aru8rlCRp8treuuL1wAuAh2D0ZTnAL/QVSpI0eW0L4YdVdeToRJK1jD6HMIgk25LsOnz48FARJGnVaVsIn0/yZuCU5ruUPwbs6S/W4qpqT1VtX7du3VARJGnVaVsIO4BDwFeAP2d024nFvilNkjRl2l5l9GPgn5ofSdIq1PZeRv/FPOcMquqMzhNJkgaxnHsZHXUy8PvAz3cfR5I0lFbnEKrqwbGf+6vqXcBgN/3xKiNJ6l7bQ0abxyZPYLTHsJzvUuiUt66QpO61/aX+D2OPHwYOAH/QeRpJ0mDaXmX0630HkSQNq+0hozcu9nxVvaObOJKkoSznKqPnArub6W3ArcDX+gglSZq8toWwEdhcVd8BSPJ24IaquqivYJKkyWp764pfBI6MTR9pxgbhZaeS1L22hXAlcGuStzd7B7cAH+ot1RK8uZ0kda/tVUZ/n+RG4IXN0Kuq6ov9xZIkTVrbPQSARwMPVdW7gYNJTu8pkyRpAG2/QvNtwN8Ab2qGTgQ+3FcoSdLktd1D+F3gPOB7AFX1DeCxfYWSJE1e20I4UlVFcwvsJD/XXyRJ0hDaFsK1Sd4PPD7JxcCn8ctyJGlVWfIqoyQBPgo8FXgIOAt4a1V9qudsi2XaBmzbtGnTUBEkadVZshCqqpLsrapnAIOVwDhvfy1J3Wt7yOiOJM/tNYkkaVBt72X0POCiJAcYXWkURjsPz+wrmCRpshYthCSnVdV/A781oTySpIEstYdwPaO7nH49ycer6mWTCCVJmrylziFk7PEZfQaRJA1rqUKoBR5LklaZpQ4ZPSvJQ4z2FE5pHsNPTio/rtd0kqSJWbQQqmrNpIJIkoa1nNtfS5JWMQtBkgRMaSH4ncqS1L2pLAS/U1mSujeVhSBJ6p6FIEkCLARJUsNCkCQBFoIkqWEhSJIAC0GS1LAQJEmAhSBJalgIkiTAQpAkNSwESRJgIUiSGlNZCN7+WpK6N5WF4O2vJal7U1kIkqTuWQiSJMBCkCQ1LARJEmAhSJIaFoIkCbAQJEkNC0GSBFgIkqSGhSBJAiwESVLDQpAkARaCJKlhIUiSAAtBktSwECRJgIUgSWpYCJIkwEKQJDUsBEkSYCFIkhorphCSnJHk8iTXDZ1Fko5HvRZCkiuSPJDkzjnjW5Lck2R/kh0AVXVvVb26zzySpIX1vYfwQWDL+ECSNcClwEuAs4ELk5zdcw5J0hJ6LYSquhn49pzhc4D9zR7BEeAa4Pw+c0iSljbEOYQNwH1j0weBDUmekOQy4NlJ3rTQi5NsT7Ivyb5Dhw71nVWSjhtrhw5wVFU9CLymxXy7gF0As7Oz1XcuSTpeDLGHcD9w6tj0xmZMkjSgIQrhNuDMJKcnOQm4ANg9QA5J0pi+Lzu9GvgCcFaSg0leXVUPA5cANwF3A9dW1V3LfN9tSXYdPny4+9CSdJzq9RxCVV24wPheYO8jeN89wJ7Z2dmLj/U9JEk/bcV8UlmSNCwLQZIEWAiSpMZUFoInlSWpe1NZCFW1p6q2r1u3bugokrRqTGUhSJK6ZyFIkgALQZLUmMpC8KSyJHVvKgvBk8qS1L2pLARJUvcsBEkSYCFIkhoWgiQJmNJC8CojSereVBaCVxlJUvemshAkSd2zECRJgIUgSWpYCJIkwEKQJDXWDh3gWCTZBmzbtGnT0FEkqTczO25Y8LkDO7d2vryp3EPwslNJ6t5UFoIkqXsWgiQJsBAkSQ0LQZIEWAiSpIaFIEkCLARJUmMqC8HvQ5Ck7qWqhs5wzJIcAr5+jC9fD3yrwzhdMdfymGt5zLU8KzUXPLJsT6mqJ84dnOpCeCSS7Kuq2aFzzGWu5THX8phreVZqLugn21QeMpIkdc9CkCQBx3ch7Bo6wALMtTzmWh5zLc9KzQU9ZDtuzyFIkn7a8byHIEkas+oLIcmWJPck2Z9kxzzPPyrJR5vnb0kys0JyvTLJoSRfan7+bAKZrkjyQJI7F3g+Sd7TZP63JJv7ztQy17lJDo+tq7dOKNepST6b5KtJ7kryV/PMM/F11jLXxNdZkpOT3Jrky02uv51nnolvjy1zTXx7HFv2miRfTPLJeZ7rdn1V1ar9AdYA/wmcAZwEfBk4e848rwMuax5fAHx0heR6JfC+Ca+vXwM2A3cu8PxvAzcCAZ4P3LJCcp0LfHKAf19PBjY3jx8L/Mc8f48TX2ctc018nTXr4DHN4xOBW4Dnz5lniO2xTa6Jb49jy34j8JH5/r66Xl+rfQ/hHGB/Vd1bVUeAa4Dz58xzPvCh5vF1wG8kyQrINXFVdTPw7UVmOR+4skb+FXh8kievgFyDqKpvVtUdzePvAHcDG+bMNvF11jLXxDXr4LvN5InNz9yTmBPfHlvmGkSSjcBW4AMLzNLp+lrthbABuG9s+iA/u2H8/zxV9TBwGHjCCsgF8LLmMMN1SU7tOVMbbXMP4VebXf4bk/zypBfe7Ko/m9H/LscNus4WyQUDrLPm8MeXgAeAT1XVgutrgttjm1wwzPb4LuCvgR8v8Hyn62u1F8I02wPMVNUzgU/xk/8F6Gfdweij+M8C3gtcP8mFJ3kM8HHgDVX10CSXvZglcg2yzqrqR1X1K8BG4JwkT5/EcpfSItfEt8ckLwUeqKrb+17WUau9EO4Hxpt8YzM27zxJ1gLrgAeHzlVVD1bVD5vJDwDP6TlTG23W58RV1UNHd/mrai9wYpL1k1h2khMZ/dL956r6xDyzDLLOlso15Dprlvk/wGeBLXOeGmJ7XDLXQNvjC4DzkhxgdFj5RUk+PGeeTtfXai+E24Azk5ye5CRGJ112z5lnN/CnzeOXA5+p5gzNkLnmHGc+j9Fx4KHtBv6kuXLm+cDhqvrm0KGSPOnocdMk5zD6d937L5FmmZcDd1fVOxaYbeLrrE2uIdZZkicmeXzz+BTgxcC/z5lt4ttjm1xDbI9V9aaq2lhVM4x+R3ymqi6aM1un62vtsb5wGlTVw0kuAW5idGXPFVV1V5K/A/ZV1W5GG85VSfYzOnF5wQrJ9ZdJzgMebnK9su9cSa5mdPXJ+iQHgbcxOsFGVV0G7GV01cx+4PvAq/rO1DLXy4HXJnkY+AFwwQRKHUb/g/tj4CvN8WeANwOnjWUbYp21yTXEOnsy8KEkaxgV0LVV9cmht8eWuSa+PS6kz/XlJ5UlScDqP2QkSWrJQpAkARaCJKlhIUiSAAtBktSwECRJgIUgSWpYCJIkAP4P9M34lt6yzigAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GarageArea\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQ/UlEQVR4nO3dfZBkVXnH8e/jIq/RAVxUwrLO4lAYqtS4Gd/KvBgjiq4DakyEaEUJYWMSUzGmKi5qqfnDBPPiCxGFjRDUIIhECQtrETFG/7GAXePLAq5scJFFDatJlgQtEX3yR985tOPM7u1hTt/ume+naop7z+3pefYw3b8595y+NzITSZIAHtZ1AZKk0WEoSJIKQ0GSVBgKkqTCUJAkFQd1XcBDsXr16pycnOy6DEkaK9u3b/9OZh4z37GxDoXJyUm2bdvWdRmSNFYi4s6Fjnn6SJJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSirH+8JrGx+Sm6+Zt333ehiFXIml/HClIkoqRGilExIuBDcAjgYsz8186LkkdcWQhdaP6SCEiLomIeyJix5z2UyNiZ0TsiohNAJl5dWaeA7wGeHnt2iRJP2kYI4VLgfcCH5ptiIhVwAXAKcAe4OaIuCYzb20e8ubmuJa5hUYEkrpRfaSQmZ8D/mtO89OAXZl5R2beD1wBnB497wA+mZlfmO/5ImJjRGyLiG179+6tW7wkrTBdzSkcB9zVt78HeDrwR8BzgYmImMrMC+d+Y2ZuBjYDTE9P5xBq1QhxrkGqa6QmmjPzfOD8rutYrnxDlXQgXYXC3cDxfftrmjZpSRmE0mC6+pzCzcCJEbEuIg4GzgCuafvNETETEZv37dtXrUBJWomGsST1cuDzwEkRsScizs7MB4DXAtcDtwFXZuYtbZ8zM7dk5saJiYk6RUvSClX99FFmnrlA+1Zga+2fL0lqz8tcSJIKQ0GSVIzUktS2ImIGmJmamlr0c7gqZXnxk9HS0hjLkYITzZJUx1iGgiSpDkNBklSM5ZyCRpPn9aXx50hBklSMZSh4mQtJqmMsQ8HVR5JUh3MKy5Dn9iUt1liOFCRJdRgKkqTCUJAkFWMZCq4+kqQ6xjIUXH0kSXWMZShIkupwSeoYc+mppKXmSEGSVBgKkqTCUJAkFYaCJKkYy1DwcwqSVMdYrj7KzC3Alunp6XO6rkUrw0IrvXaft2HIlUh1jWUoqFsuhZWWr7E8fSRJqsNQkCQVhoIkqTAUJEmFoSBJKlx9JPVxZZVWurEcKfjhNUmqYyxHCn54bWn5wSxJs8ZypCBJqmMsRwrSQ+XcgTQ/Q0EL8o1TWnk8fSRJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpGMvPKUTEDDAzNTXVdSlSZ7w8iWoYy5FCZm7JzI0TExNdlyJJy8pYjhRWGj9ZLGlYxnKkIEmqw1CQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKP6cgLTN+0lkPhSMFSVJhKEiSCk8fSSPC0z4aBY4UJEmFoSBJKgwFSVIxlnMK3mRH48xLoWuUjeVIwZvsSFIdYxkKkqQ6xvL0kTQqxmkZ6TjVqu60GilExBNrFyJJ6l7bkcL7IuIQ4FLgsszcV68kafw5maxx1WqkkJm/BLwCOB7YHhEfiYhTqlYmSRq61hPNmXk78GbgDcCvAOdHxFcj4qW1ipMkDVfbOYUnRcS7gNuA5wAzmflzzfa7KtYnSRqitnMKfwd8AHhjZn5/tjEzvxkRb65SmSRp6NqGwgbg+5n5I4CIeBhwaGZ+LzM/XK06SU5aa6jazincABzWt3940yZJWkbahsKhmfl/szvN9uF1SpIkdaVtKNwXEetndyLiF4Dv7+fxkqQx1HZO4XXAxyLim0AAjwVeXq0qSVInWoVCZt4cEU8ATmqadmbmD+uVJUnqwiAXxHsqMNl8z/qIIDM/VKUqSVInWoVCRHwYeDzwReBHTXMChoIkLSNtRwrTwMmZmTWLWelcjy6pa21XH+2gN7ksSVrG2o4UVgO3RsRNwA9mGzPztCpVSZI60TYU3lazCEnSaGi7JPWzEfE44MTMvCEiDgdW1S1NkjRsbS+dfQ5wFXBR03QccHWtoiRJ3Wg70fyHwLOAe6HccOfRtYqSJHWjbSj8IDPvn92JiIPofU5BkrSMtA2Fz0bEG4HDmnszfwzYspSFRMQJEXFxRFy1lM8rSWqvbShsAvYCXwF+D9hK737N+xURl0TEPRGxY077qRGxMyJ2RcQmgMy8IzPPHqx8SdJSarv66MfA3zdfg7gUeC99l8OIiFXABcApwB7g5oi4JjNvHfC5JUlLrO21j77OPHMImXnC/r4vMz8XEZNzmp8G7MrMO5rnvgI4HWgVChGxEdgIsHbt2jbfIklqaZBrH806FPgN4OhF/szjgLv69vcAT4+IRwFvB54SEedm5l/O982ZuRnYDDA9Pe1ktyQtobanj747p+ndEbEdeMtSFdL8jNcs1fNJkgbX9vTR+r7dh9EbOQxyL4Z+dwPH9+2vadokSR1r+8b+t33bDwC7gd9c5M+8GTgxItbRC4MzgN9a5HNJkpZQ29NHv7qYJ4+Iy4FnA6sjYg/w1sy8OCJeC1xP7/pJl2TmLQM+7wwwMzU1tZiyJEkLaHv66PX7O56Z71yg/cwF2rfS+6zDomTmFmDL9PT0OYt9DknSTxtk9dFTgWua/RngJuD2GkVJkrrRNhTWAOsz838BIuJtwHWZ+cpahUmShq/tZS4eA9zft39/0yZJWkbajhQ+BNwUEZ9o9l8MfLBOSQfmRLMk1dFqpJCZbwfOAv67+TorM/+iZmEHqGdLZm6cmJjoqgRJWpbanj4COBy4NzPfA+xpPmcgSVpG2t6O863AG4Bzm6aHA/9YqyhJUjfajhReApwG3AeQmd8EHlGrKElSN9pONN+fmRkRCRARR1Ss6YCcaJaWzuSm6+Zt333ehiFXolHQdqRwZURcBBwZEecANzD4DXeWjBPNklTHAUcKERHAR4EnAPcCJwFvycxPVa5NkjRkBwyF5rTR1sx8ImAQSNIy1vb00Rci4qlVK5Ekda7tRPPTgVdGxG56K5CC3iDiSbUKkyQN335DISLWZuY3gOcPqR5JUocOdProaoDMvBN4Z2be2f9Vv7z5RcRMRGzet29fVyVI0rJ0oFCIvu0TahYyCJekSlIdBwqFXGBbkrQMHWii+ckRcS+9EcNhzTY8ONH8yKrVSZKGar+hkJmrhlWIJKl7g1w6W5K0zBkKkqRiLEPBJamSVMdYhoJLUiWpjrEMBUlSHYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpKLt7ThHSkTMADNTU1NdlyItW5Obrpu3ffd5G6o+/1L+DA1uLEcKfqJZkuoYy1CQJNVhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKr33Ugf1d80UadYP+/nodo/EyliMFr30kSXWMZShIkuowFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwpvsSKrKm0o9NAv1X62bF43lSMGb7EhSHWMZCpKkOgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSYShIkgpDQZJUGAqSpMJQkCQVhoIkqTAUJEnFQV0XMCsijgDeB9wP/FtmXtZxSZK04lQdKUTEJRFxT0TsmNN+akTsjIhdEbGpaX4pcFVmngOcVrMuSdL8ap8+uhQ4tb8hIlYBFwAvAE4GzoyIk4E1wF3Nw35UuS5J0jyqnj7KzM9FxOSc5qcBuzLzDoCIuAI4HdhDLxi+yH7CKiI2AhsB1q5du/RFSxpZk5uum7d993kbqj5+IQs9z0IGracLXUw0H8eDIwLohcFxwMeBX4+I9wNbFvrmzNycmdOZOX3MMcfUrVSSVpiRmWjOzPuAs7quQ5JWsi5GCncDx/ftr2naJEkd6yIUbgZOjIh1EXEwcAZwzSBPEBEzEbF53759VQqUpJWq9pLUy4HPAydFxJ6IODszHwBeC1wP3AZcmZm3DPK8mbklMzdOTEwsfdGStILVXn105gLtW4GtNX+2JGlwXuZCklQYCpKkYixDwYlmSaojMrPrGhYtIvYCdy7y21cD31nCcmqy1jqstQ5rrWMpa31cZs776d+xDoWHIiK2ZeZ013W0Ya11WGsd1lrHsGody9NHkqQ6DAVJUrGSQ2Fz1wUMwFrrsNY6rLWOodS6YucUJEk/bSWPFCRJcxgKkqRiRYbCAveI7qqW4yPiMxFxa0TcEhF/3LQfHRGfiojbm/8e1bRHRJzf1P7liFjfQc2rIuLfI+LaZn9dRNzY1PTR5uq3RMQhzf6u5vjkkOs8MiKuioivRsRtEfHMUe3XiPiT5v//joi4PCIOHZV+ne9e64vpx4h4VfP42yPiVUOs9a+b34EvR8QnIuLIvmPnNrXujIjn97VXf4+Yr9a+Y38aERkRq5v94fVrZq6oL2AV8B/ACcDBwJeAkzus51hgfbP9COBr9O5d/VfApqZ9E/COZvuFwCeBAJ4B3NhBza8HPgJc2+xfCZzRbF8I/H6z/QfAhc32GcBHh1znB4HfbbYPBo4cxX6ld+fBrwOH9fXnq0elX4FfBtYDO/raBupH4Gjgjua/RzXbRw2p1ucBBzXb7+ir9eTm9X8IsK55X1g1rPeI+Wpt2o+ndxXpO4HVw+7XofzSj9IX8Ezg+r79c4Fzu66rr55/Bk4BdgLHNm3HAjub7YuAM/seXx43pPrWAJ8GngNc2/ySfqfvRVf6t/nFfmazfVDzuBhSnRPNG23MaR+5fuXBW9Qe3fTTtcDzR6lfgck5b7QD9SNwJnBRX/tPPK5mrXOOvQS4rNn+idf+bL8O8z1ivlqBq4AnA7t5MBSG1q8r8fTRQveI7lxzGuApwI3AYzLzW82hbwOPaba7rv/dwJ8BP272HwX8T/bukzG3nlJrc3xf8/hhWAfsBf6hOdX1gYg4ghHs18y8G/gb4BvAt+j103ZGs19nDdqPXf/ezvoden9xwwjWGhGnA3dn5pfmHBparSsxFEZSRPwM8E/A6zLz3v5j2fsToPO1wxHxIuCezNzedS0tHERvaP7+zHwKcB+90xzFCPXrUcDp9ILsZ4EjgFM7LWoAo9KPBxIRbwIeAC7rupb5RMThwBuBt3RZx0oMhZG7R3REPJxeIFyWmR9vmv8zIo5tjh8L3NO0d1n/s4DTImI3cAW9U0jvAY6MiNkbNvXXU2ptjk8A3x1SrXuAPZl5Y7N/Fb2QGMV+fS7w9czcm5k/BD5Or69HsV9nDdqPnb7uIuLVwIuAVzQhxn5q6qrWx9P7w+BLzWtsDfCFiHjsMGtdiaHwkO8RvZQiIoCLgdsy8519h64BZlcSvIreXMNs+283qxGeAezrG8ZXlZnnZuaazJyk12//mpmvAD4DvGyBWmf/DS9rHj+Uvygz89vAXRFxUtP0a8CtjGC/0jtt9IyIOLz5fZitdeT6tc+g/Xg98LyIOKoZGT2vaasuIk6ld8rztMz83px/wxnNaq51wInATXT0HpGZX8nMR2fmZPMa20NvEcq3GWa/1pg8GfUvejP5X6O3wuBNHdfyi/SG3l8Gvth8vZDeOeJPA7cDNwBHN48P4IKm9q8A0x3V/WweXH10Ar0X0y7gY8AhTfuhzf6u5vgJQ67x54FtTd9eTW91xkj2K/DnwFeBHcCH6a2IGYl+BS6nN9fxQ3pvVGcvph/pnc/f1XydNcRad9E77z77+rqw7/FvamrdCbygr736e8R8tc45vpsHJ5qH1q9e5kKSVKzE00eSpAUYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUvH/MUMNbJoJN3kAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WoodDeckSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD8CAYAAACYebj1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQ0ElEQVR4nO3dbbBdVX3H8e+PICC0XkVSi4GYMMlgMz4UekEddWpbHYMYsEorGZ06NkPqA622nWmhdlr7ojM4Y1Vo8SFVpFALIlKaYCwFqvKGEQI6yoOUiChBLfGhoWUckfrvi7Ozuaa5ybnJ3Xffc873M3PHs9c599z/WW74sfZaZ+1UFZIkARzSdwGSpMXDUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktRZNKCT5pSQfTnJ1krf2XY8kTaJ0+Y3mJJcArwYerqrnzGhfC1wILAE+WlUXzHjuEOCyqnrj/t7/mGOOqRUrVsx73ZI0zm6//fbvVdXSvT13aMd/+1Lg74DLdjckWQJcDLwC2AHclmRzVd2d5AzgrcDlw7z5ihUr2LZt27wXLUnjLMk3Z3uu08tHVXUz8IM9mk8FtlfV/VX1GHAlcGbz+s1VdRrwhtneM8nGJNuSbNu5c2dXpUvSROp6pLA3y4AHZxzvAF6Q5GXAa4HDga2z/XJVbQI2AUxPT7ubnyTNoz5CYa+q6vPA53suQ5ImWh+rjx4Cjp9xfFzTNrQk65Js2rVr17wWJkmTro9QuA1YnWRlksOAs4HNc3mDqtpSVRunpqY6KVCSJlWnoZDkCuAW4MQkO5JsqKrHgXOB64F7gKuq6q4u65AkDafTOYWqWj9L+1b2MZksSerHovlG81w4pyBJ3Vg0q4/moqq2AFump6fPOdD3WHHeZ/ba/sAFpx/oW0rSyBvJkYIkqRuGgiSpNZKh4JyCJHVjJEPB7ylIUjdGMhQkSd0wFCRJrZEMBecUJKkbIxkKzilIUjdGMhQkSd0wFCRJLUNBktQyFCRJrZEMBVcfSVI3RjIUXH0kSd0YyVCQJHXDUJAktQwFSVLLUJAktUYyFFx9JEndGMlQcPWRJHVjJENBktQNQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEmtkQwFv7wmSd0YyVDwy2uS1I2RDAVJUjcMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSayRDwb2PJKkbIxkK7n0kSd0YyVCQJHXDUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktQ7tu4CZkrwGOB14CvCxqvq3nkuSpInS+UghySVJHk5y5x7ta5Pcm2R7kvMAquraqjoHeAvw+q5rkyT9rIW4fHQpsHZmQ5IlwMXAacAaYH2SNTNe8ufN85KkBdR5KFTVzcAP9mg+FdheVfdX1WPAlcCZGXgP8NmqumNv75dkY5JtSbbt3Lmz2+IlacL0NdG8DHhwxvGOpu33gZcDZyV5y95+sao2VdV0VU0vXbq0+0olaYIsqonmqroIuKjvOiRpUvU1UngIOH7G8XFN21CSrEuyadeuXfNemCRNsr5C4TZgdZKVSQ4DzgY2D/vLVbWlqjZOTU11VqAkTaKFWJJ6BXALcGKSHUk2VNXjwLnA9cA9wFVVdVfXtUiS9q3zOYWqWj9L+1Zg64G8Z5J1wLpVq1YdTGmSpD0sqonmYVXVFmDL9PT0OX3X0qcV531mr+0PXHD6AlciaVy495EkqWUoSJJaIxkKLkmVpG44pzCGnGuQdKBGcqQgSeqGoSBJao3k5SPNLy83SdptqJFCkud2XchcONEsSd0Y9vLRB5PcmuRtSXrfcMi9jySpG0NdPqqqlyZZDfwucHuSW4GPV9UNnVYnYPbLO5I034aeaK6q+xjcJvNPgV8FLkrytSSv7ao4SdLCGmqkkOR5wJuB04EbgHVVdUeSZzLYAfWa7kocP07sSlqshh0p/C1wB/D8qnr77vsnV9W3GYweFpQTzZLUjWFD4XTgn6rqRwBJDklyJEBVXd5VcbNxolmSujFsKNwIPHnG8ZFNmyRpjAwbCkdU1f/sPmgeH9lNSZKkvgwbCo8mOXn3QZJfAX7UTUmSpL4Mu83FO4FPJfk2EOAXgdd3VpUWBVdJSZNn2C+v3Zbk2cCJTdO9VfWT7sqSJPVhLhvinQKsaH7n5CRU1WWdVLUfSdYB61atWtXHn5eksTXshniXA+8FXsIgHE4Bpjusa59ckipJ3Rh2pDANrKmq6rIYSVK/hl19dCeDyWVJ0hgbdqRwDHB3szvqj3c3VtUZnVSlTrjbqqT9GTYU3t1lEZKkxWHYJalfSPIsYHVV3djse7Sk29IkSQtt2NVH5wBXAx9pmpYB13ZVlCSpH8NONL8deDHwCLQ33PmFroqSJPVj2DmFH1fVY0kASHIo0NvyVL+81i+3v5DG17Ch8IUkfwY8OckrgLcBW7ora9+qaguwZXp6+py+aujCqK8O2lf9BoY0Goa9fHQesBP4KvB7wFZ6uOOaJKlbw64++inw982PJGlMDRUKSb7BXuYQquqEea9IktSbuex9tNsRwG8BR89/OZKkPg01p1BV35/x81BVfQBw5lCSxsywl49OnnF4CIORw1zuxSBJGgHD/ov9b2Y8fhx4APjtea9GktSrYVcf/VrXhWi8+YU3aTQMe/noj/b1fFW9b37KkST1aS6rj04BNjfH64Bbgfu6KEqS1I9hQ+E44OSq+m+AJO8GPlNVb+yqsH3pY+8jt3CQNAmG3ebiGcBjM44fa9p6UVVbqmrj1NRUXyVI0lgadqRwGXBrkn9ujl8D/EM3JUmS+jLs6qO/TvJZ4KVN05ur6kvdlTVaXFkjaVwMe/kI4Ejgkaq6ENiRZGVHNUmSejLsktS/ZLAC6UTg48CTgH9kcDc2ad45+pL6Meycwm8CJwF3AFTVt5P8fGdVjYlRv2nOQphrHxkWUreGvXz0WFUVzfbZSY7qriRJUl+GDYWrknwEeGqSc4Ab8YY7kjR29nv5KEmATwLPBh5hMK/wF1V1Q8e1SZIW2H5Doaoqydaqei5gEEjSGBv28tEdSU7ptBJJUu+GXX30AuCNSR4AHgXCYBDxvK4KkyQtvH2GQpLlVfUt4JULVI8kqUf7Gylcy2B31G8m+XRVvW4hipIk9WN/cwqZ8fiELguRJPVvf6FQszyWJI2h/V0+en6SRxiMGJ7cPIYnJpqfMl+FJDkBeBcwVVVnzdf7SpKGt89QqKolB/PmSS4BXg08XFXPmdG+FrgQWAJ8tKouqKr7gQ1Jrj6Yv6nJ5J5I0vyYy9bZB+JSYO3MhiRLgIuB04A1wPokazquQ5I0hGG/p3BAqurmJCv2aD4V2N6MDEhyJXAmcPcw75lkI7ARYPny5fNWqyaLIwtp77oeKezNMuDBGcc7gGVJnp7kw8BJSc6f7ZeralNVTVfV9NKlS7uuVZImSqcjhbmoqu8Db+m7DkmaZH2EwkPA8TOOj2vahpZkHbBu1apV81kX4I1xxo3/f0pz08flo9uA1UlWJjkMOBvYPJc3qKotVbVxamqqkwIlaVJ1GgpJrgBuAU5MsiPJhqp6HDgXuB64B7iqqu7qsg5J0nC6Xn20fpb2rcDWLv+2JGnuFs1E81x0OaegyeZSVU26PuYUDppzCpLUjZEMBUlSNwwFSVJrJEMhybokm3bt2tV3KZI0VkYyFJxTkKRujGQoSJK6YShIklqGgiSp5ZfXpCH4pTZNipEcKTjRLEndGMlQkCR1w1CQJLUMBUlSy4lm6SAcyJ3dnJzWYjaSIwUnmiWpGyMZCpKkbhgKkqSWoSBJahkKkqSWoSBJarkkVVok5rq/kvsxqQsjOVJwSaokdWMkQ0GS1A1DQZLUMhQkSS1DQZLUMhQkSS1DQZLU8nsK0iJ3INtzSwdqJEcKfk9BkroxkqEgSeqGoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJahkKkqSWoSBJarn3kTRm5uvezeN8D+hx/mwHayRHCu59JEndGMlQkCR1w1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUWzZ3XkhwFfBB4DPh8VX2i55IkaeJ0OlJIckmSh5PcuUf72iT3Jtme5Lym+bXA1VV1DnBGl3VJkvau68tHlwJrZzYkWQJcDJwGrAHWJ1kDHAc82LzsfzuuS5K0F51ePqqqm5Os2KP5VGB7Vd0PkORK4ExgB4Ng+DL7CKskG4GNAMuXL5//oqWOzXbT+FH5u/N10/v57Ie5/u1RMl/9Paw+JpqX8cSIAAZhsAy4Bnhdkg8BW2b75araVFXTVTW9dOnSbiuVpAmzaCaaq+pR4M191yFJk6yPkcJDwPEzjo9r2oaWZF2STbt27ZrXwiRp0vURCrcBq5OsTHIYcDaweS5vUFVbqmrj1NRUJwVK0qTqeknqFcAtwIlJdiTZUFWPA+cC1wP3AFdV1V1d1iFJGk7Xq4/Wz9K+Fdja5d+WJM3dSG5z4ZyCJHVjJEPBOQVJ6sZIhoIkqRupqr5rOGBJdgLfPMBfPwb43jyWM07sm9nZN7Ozb2a32PrmWVW112//jnQoHIwk26pquu86FiP7Znb2zezsm9mNUt94+UiS1DIUJEmtSQ6FTX0XsIjZN7Ozb2Zn38xuZPpmYucUJEn/3ySPFCRJezAUJEmtiQyFWe4RPTGSHJ/kc0nuTnJXknc07UcnuSHJfc3/Pq1pT5KLmv76SpKT+/0E3UqyJMmXklzXHK9M8sXm83+y2d2XJIc3x9ub51f0WfdCSPLUJFcn+VqSe5K8yPNmIMkfNv883ZnkiiRHjOK5M3GhsI97RE+Sx4E/rqo1wAuBtzd9cB5wU1WtBm5qjmHQV6ubn43Ahxa+5AX1DgY7+O72HuD9VbUK+CGwoWnfAPywaX9/87pxdyHwr1X1bOD5DPpp4s+bJMuAPwCmq+o5wBIGtwUYvXOnqibqB3gRcP2M4/OB8/uuq+c++RfgFcC9wLFN27HAvc3jjwDrZ7y+fd24/TC46dNNwK8D1wFh8E3UQ/c8fxhs//6i5vGhzevS92fosG+mgG/s+Rk9bwqeuM3w0c25cB3wylE8dyZupMDs94ieSM2w9STgi8Azquo7zVPfBZ7RPJ6kPvsA8CfAT5vjpwP/VYP7gMDPfva2X5rndzWvH1crgZ3Ax5vLax9NchSeN1TVQ8B7gW8B32FwLtzOCJ47kxgKaiT5OeDTwDur6pGZz9XgP2Emar1yklcDD1fV7X3XskgdCpwMfKiqTgIe5YlLRcBknjcAzTzKmQyC85nAUcDaXos6QJMYCgd9j+hxkORJDALhE1V1TdP8n0mObZ4/Fni4aZ+UPnsxcEaSB4ArGVxCuhB4apLdN6Sa+dnbfmmenwK+v5AFL7AdwI6q+mJzfDWDkJj08wbg5cA3qmpnVf0EuIbB+TRy584khsJB3yN61CUJ8DHgnqp634ynNgNvah6/icFcw+7232lWk7wQ2DXjcsHYqKrzq+q4qlrB4Lz496p6A/A54KzmZXv2y+7+Oqt5/dj+V3JVfRd4MMmJTdNvAHcz4edN41vAC5Mc2fzztbtvRu/c6XtSo6dJoVcB/wF8HXhX3/X08PlfwmCI/xXgy83Pqxhc07wJuA+4ETi6eX0YrNj6OvBVBissev8cHffRy4DrmscnALcC24FPAYc37Uc0x9ub50/ou+4F6JdfBrY15861wNM8b9q++Svga8CdwOXA4aN47rjNhSSpNYmXjyRJszAUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1Po/mKvfyC9e538AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenPorchSF\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAP0ElEQVR4nO3dbbBdZXnG8f8lqIjVWCVaJ4CBhsEyIyqNqFWn6kiL0qhVW6U6dRyGlFY7Ou1MDdax9kNn9ENFadWaVodqLYivJRIHQa1+cYSAqCBSo401aJv4Fqx1jODdD3ud5SHmJPsk5znrrH3+v5k92evZ++xzP7DJxfOy1kpVIUkSwL2GLkCStHIYCpKknqEgSeoZCpKknqEgSeodO3QBR+OEE06o9evXD12GJI3KjTfe+J2qWnuw10YZCkk2AZs2bNjAjh07hi5HkkYlyTcWem2U00dVta2qNq9Zs2boUiRppowyFCRJbYwyFJJsSrJ13759Q5ciSTNllKHg9JEktTHKUJAktWEoSJJ6owwF1xQkqY1RhoJrCpLUxihPXlsK67dcfdD2XW84b5krkaSVY5QjBUlSG4aCJKk3ylBwoVmS2hhlKLjQLEltjDIUJEltGAqSpJ6hIEnqGQqSpN4oQ8HdR5LUxihDwd1HktTGKENBktSGoSBJ6hkKkqSeoSBJ6o0yFNx9JEltjDIU3H0kSW2MMhQkSW0YCpKknqEgSeoZCpKknqEgSeoZCpKknqEgSeqNMhQ8eU2S2hhlKHjymiS1McpQkCS1YShIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpN8pQ8IJ4ktTGKEPBC+JJUhujDAVJUhuGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknqGgiSpZyhIknrHDl3AfEmeC5wHPBB4Z1V9fOCSJGlVaT5SSPKuJHuS3HJA+7lJbk+yM8kWgKr6SFVdCFwEvLB1bZKke1qOkcJlwN8D755rSHIM8FbgHGA3cEOSq6rqy91bXtu9Pgrrt1y9qPfvesN5jSqRpKPTfKRQVZ8BvndA89nAzqr6elXtB64AnpOJNwIfq6qbWtcmSbqnoRaa1wHfnHe8u2v7U+AZwAuSXHSwH0yyOcmOJDv27t3bvlJJWkVW1EJzVV0KXHqY92wFtgJs3LixlqMuSVothhop3AGcNO/4xK5NkjSgoUYKNwCnJTmFSRi8CPiDaX84ySZg04YNGxqV94sWu5gsSWO0HFtSLwc+C5yeZHeSC6rqLuAVwDXAbcCVVXXrtJ9ZVduqavOaNWvaFC1Jq1TzkUJVnb9A+3Zge+vfL0ma3igvc5FkU5Kt+/btG7oUSZopowwFp48kqY1RhoIkqY0VdZ7CarHQTiYvfyFpaI4UJEm9UYaCC82S1MYoQ8GFZklqwzWFFcS1BklDG+VIQZLUxihDwTUFSWpjlKHgmoIktTHKUJAktTFVKCR5VOtCJEnDm3ak8LYk1yf5kyTO2UjSjJoqFKrqKcCLmdwt7cYk/5rknKaVSZKW3dRrClX1VeC1wKuB3wQuTfKVJM9rVdxC3H0kSW1Mu6ZwZpJLmNwl7enApqr6te75JQ3rOyh3H0lSG9Oe0fx3wD8Br6mqH881VtW3kry2SWUD8V7MklazaUPhPODHVXU3QJJ7AcdV1f9V1XuaVSdJWlbThsJ1wDOA/+2Ojwc+DvxGi6LUhtdWknQ40y40H1dVc4FA9/z4NiVJkoYybSj8KMlZcwdJfh348SHeL0kaoWmnj14FvD/Jt4AAvwK8sFlVh5FkE7Bpw4YNQ5UgSTNpqlCoqhuSPBI4vWu6vap+2q6sw9azDdi2cePGC4eqQZJm0WJusvM4YH33M2cloare3aQq3YMLxJKWy1ShkOQ9wK8CNwN3d80FGAoDMiwkLbVpRwobgTOqqloWI0ka1rS7j25hsrgsSZph044UTgC+nOR64CdzjVX17CZV6ah4qQ5JR2raUHh9yyIkSSvDtFtSP53kEcBpVXVdkuOBY9qWtjDPU5CkNqa9dPaFwAeAd3RN64CPtCrqcLx0tiS1Me1C88uBJwF3Qn/DnYe2KkqSNIxpQ+EnVbV/7iDJsUzOU5AkzZBpQ+HTSV4D3K+7N/P7gW3typIkDWHaUNgC7AW+BPwRsJ3J/ZolSTNk2t1HPwP+sXtoxni5DElzpr320X9ykDWEqjp1ySuSJA1mMdc+mnMc8HvAg5e+HEnSkKZaU6iq78573FFVbwacW5CkGTPt9NFZ8w7vxWTksJh7MUiSRmDav9j/dt7zu4BdwO8veTWSpEFNu/voaa0LkSQNb9rpoz871OtV9aalKWc6XhBveSz2EtxuYZXGb9qT1zYCf8zkQnjrgIuAs4AHdI9l5QXxJKmNadcUTgTOqqofAiR5PXB1Vb2kVWGSpOU37UjhYcD+ecf7uzZJ0gyZdqTwbuD6JB/ujp8L/HObkiRJQ5l299HfJPkY8JSu6WVV9fl2ZUmShjDt9BHA8cCdVfUWYHeSUxrVJEkayLS34/wr4NXAxV3TvYF/aVWUJGkY064p/C7wWOAmgKr6VpJl34qqlW2x5zWA5zZIK82000f7q6roLp+d5P7tSpIkDWXaULgyyTuAByW5ELgOb7gjSTPnsNNHSQK8D3gkcCdwOvC6qrq2cW2SpGV22FCoqkqyvaoeBRgEkjTDpp0+uinJ45pWIkka3LS7jx4PvCTJLuBHQJgMIs5sVZgkafkdMhSSnFxV/wX89jLVI0ka0OFGCh9hcnXUbyT5YFU9fzmKkiQN43BrCpn3/NSWhUiShne4UKgFni+5JKcmeWeSD7T8PZKkhR0uFB6d5M4kPwTO7J7fmeSHSe483IcneVeSPUluOaD93CS3J9mZZAtAVX29qi448q5Iko7WIUOhqo6pqgdW1QOq6tju+dzxA6f4/MuAc+c3JDkGeCvwTOAM4PwkZxxh/ZKkJbSYS2cvWlV9BvjeAc1nAzu7kcF+4ArgOdN+ZpLNSXYk2bF3794lrFaS1DQUFrAO+Oa8493AuiQPSfIPwGOTXHzwH4Wq2lpVG6tq49q1a1vXKkmryrQnrzVXVd8FLhq6DklazYYIhTuAk+Ydn9i1TS3JJmDThg0blrIujcBC92xY7H0ZlupzpFkzxPTRDcBpSU5Jch/gRcBVi/mAqtpWVZvXrFnTpEBJWq2ahkKSy4HPAqcn2Z3kgqq6C3gFcA1wG3BlVd3asg5J0nSaTh9V1fkLtG8Htrf83ZKkxVsxC82L4ZrC7DuS+z1LOnpDrCkcNdcUJKmNUYaCJKkNQ0GS1HNNQYNy7UBaWUY5UnBNQZLaGGUoSJLaMBQkST3XFDQTvJaRtDRGOVJwTUGS2hhlKEiS2jAUJEk9Q0GS1DMUJEk9dx9J8yzHLiZ3SmklG+VIwd1HktTGKENBktSGoSBJ6hkKkqSeoSBJ6hkKkqSeW1I105bqJj5uI9VqMcqRgltSJamNUYaCJKkNQ0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1PM8BekoeP6CZs0oRwqepyBJbYwyFCRJbRgKkqSeoSBJ6hkKkqSeoSBJ6hkKkqSeoSBJ6hkKkqSeoSBJ6hkKkqSeoSBJ6nlBPKmBhS6Ut5Sf5UX31MIoRwpeEE+S2hhlKEiS2jAUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEm9FXPntST3B94G7Af+vareO3BJkrTqNB0pJHlXkj1Jbjmg/dwktyfZmWRL1/w84ANVdSHw7JZ1SZIOrvX00WXAufMbkhwDvBV4JnAGcH6SM4ATgW92b7u7cV2SpINoOn1UVZ9Jsv6A5rOBnVX1dYAkVwDPAXYzCYabOURYJdkMbAY4+eSTl75oaSDrt1y9JO/f9YbzBnn/Yi22v0v5u5fKcvwzbf3v4UBDLDSv4+cjApiEwTrgQ8Dzk7wd2LbQD1fV1qraWFUb165d27ZSSVplVsxCc1X9CHjZ0HVI0mo2xEjhDuCkeccndm1TS7IpydZ9+/YtaWGStNoNEQo3AKclOSXJfYAXAVct5gOqaltVbV6zZk2TAiVptWq9JfVy4LPA6Ul2J7mgqu4CXgFcA9wGXFlVt7asQ5I0nda7j85foH07sL3l75YkLd4oL3PhmoIktTHKUHBNQZLaGGUoSJLaSFUNXcMRS7IX+MYR/vgJwHeWsJyVxv6N1yz3DWa7f2Pp2yOq6qBn/446FI5Gkh1VtXHoOlqxf+M1y32D2e7fLPTN6SNJUs9QkCT1VnMobB26gMbs33jNct9gtvs3+r6t2jUFSdIvWs0jBUnSAQwFSVJvVYbCAveIHpWD3f86yYOTXJvkq92fv9y1J8mlXX+/mOSs4So/vCQnJflUki8nuTXJK7v2WenfcUmuT/KFrn9/3bWfkuRzXT/e111FmCT37Y53dq+vH7L+aSQ5Jsnnk3y0O56lvu1K8qUkNyfZ0bXNxHcTVmEoHOIe0WNzGQfc/xrYAnyiqk4DPtEdw6Svp3WPzcDbl6nGI3UX8OdVdQbwBODl3b+jWenfT4CnV9WjgccA5yZ5AvBG4JKq2gB8H7ige/8FwPe79ku69610r2RyFeQ5s9Q3gKdV1WPmnZMwK99NqKpV9QCeCFwz7/hi4OKh6zrCvqwHbpl3fDvw8O75w4Hbu+fvAM4/2PvG8AD+DThnFvsHHA/cBDyeyZmwx3bt/feUyWXmn9g9P7Z7X4au/RB9OpHJX4xPBz4KZFb61tW5CzjhgLaZ+W6uupECC98jehY8rKq+3T3/b+Bh3fPR9rmbTngs8DlmqH/d9MrNwB7gWuBrwA9qcr8RuGcf+v51r+8DHrK8FS/Km4G/AH7WHT+E2ekbQAEfT3Jjks1d28x8N1fMPZq1tKqqkox6v3GSXwI+CLyqqu5M0r829v5V1d3AY5I8CPgw8MiBS1oSSX4H2FNVNyZ56tD1NPLkqrojyUOBa5N8Zf6LY/9ursaRwlHfI3oF+58kDwfo/tzTtY+uz0nuzSQQ3ltVH+qaZ6Z/c6rqB8CnmEypPCjJ3P+oze9D37/u9TXAd5e51Gk9CXh2kl3AFUymkN7CbPQNgKq6o/tzD5NAP5sZ+m6uxlA46ntEr2BXAS/tnr+UyVz8XPsfdjshngDsmzfUXXEyGRK8E7itqt4076VZ6d/aboRAkvsxWS+5jUk4vKB724H9m+v3C4BPVjdBvdJU1cVVdWJVrWfy39Ynq+rFzEDfAJLcP8kD5p4DvwXcwox8N4HVt9Dcfd+eBfwHk3ncvxy6niPsw+XAt4GfMpmnvIDJXOwngK8C1wEP7t4bJjuuvgZ8Cdg4dP2H6duTmczbfhG4uXs8a4b6dybw+a5/twCv69pPBa4HdgLvB+7btR/XHe/sXj916D5M2c+nAh+dpb51/fhC97h17u+PWfluVpWXuZAk/dxqnD6SJC3AUJAk9QwFSVLPUJAk9QwFSVLPUJAk9QwFSVLv/wFlnYkUXpVQngAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EnclosedPorch\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQjUlEQVR4nO3da7CdVX3H8e9PEAGrsUq0DgEDDYNmxhuNqKNO1altkAasUktGp47DkNJKR6edqaF2rH3RGXxRUTp4SUeGai2I14LEQfDGG0YIiAIiJdJYgraJt9BaRwT/fbGfPB7jOclOOOs8Z+/9/czsyfOsfcl/nWzOj7XWc0lVIUkSwKOGLkCStHwYCpKknqEgSeoZCpKknqEgSeodPnQBj8QxxxxTq1evHroMSZoot9xyy/eqauV8z010KKxevZpt27YNXYYkTZQk317oOaePJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1DMUJEk9Q0GS1Jvok9ceidWbr5m3fceFpy9xJZK0fDhSkCT1DAVJUm/ZhEKSZyR5f5KPJ/nToeuRpFnUNBSSXJpkV5I79mlfn+TuJNuTbAaoqruq6jzgtcCLWtYlSZpf65HCZcD6uQ1JDgMuAU4D1gIbk6ztnjsDuAbY2rguSdI8moZCVd0A/GCf5lOB7VV1b1U9CFwBnNm9/qqqOg143UKfmWRTkm1Jtu3evbtV6ZI0k4Y4JPVY4L45+zuB5yd5KfBq4DHsZ6RQVVuALQDr1q2rdmVK0uxZNucpVNWXgC8NXIYkzbQhjj66Hzhuzv6qrm1sSTYk2bJnz55FLUySZt0QoXAzcFKSE5IcAZwNXHUwH1BVV1fVphUrVjQpUJJmVetDUi8HbgROTrIzyTlV9RBwPnAtcBdwZVXd2bIOSdJ4mq4pVNXGBdq34mGnkrTsLJszmg+GawqS1MZEhoJrCpLUxkSGgiSpjYkMBaePJKmNiQwFp48kqY2JDAVJUhuGgiSpZyhIknoTGQouNEtSGxMZCi40S1IbExkKkqQ2DAVJUs9QkCT1JjIUXGiWpDYmMhRcaJakNiYyFCRJbRgKkqSeoSBJ6hkKkqTeRIaCRx9JUhsTGQoefSRJbUxkKEiS2jAUJEk9Q0GS1DMUJEk9Q0GS1DMUJEm9iQwFz1OQpDYmMhQ8T0GS2pjIUJAktWEoSJJ6hoIkqWcoSJJ6hoIkqWcoSJJ6hoIkqWcoSJJ6hoIkqTeRoeBlLiSpjYkMBS9zIUltTGQoSJLaMBQkST1DQZLUMxQkST1DQZLUMxQkST1DQZLUMxQkST1DQZLUMxQkST1DQZLUMxQkST1DQZLUMxQkSb3Dhy5griSvAk4HHg98sKo+N3BJkjRTmo8UklyaZFeSO/ZpX5/k7iTbk2wGqKpPV9W5wHnAH7WuTZL0y5Zi+ugyYP3chiSHAZcApwFrgY1J1s55yd90z0uSllDzUKiqG4Af7NN8KrC9qu6tqgeBK4AzM/JO4LNVdet8n5dkU5JtSbbt3r27bfGSNGOGWmg+Frhvzv7Oru3Pgd8Bzkpy3nxvrKotVbWuqtatXLmyfaWSNEOW1UJzVV0MXDx0HZI0q4YaKdwPHDdnf1XXNpYkG5Js2bNnz6IXJkmzbKhQuBk4KckJSY4AzgauGvfNVXV1VW1asWJFswIlaRYtxSGplwM3Aicn2ZnknKp6CDgfuBa4C7iyqu5sXYskaf/GWlNI8syquv1Q/oKq2rhA+1Zg66F8ZpINwIY1a9YcytslSQsYd6Tw3iQ3JfmzJIPP2Th9JEltjBUKVfUS4HWMFodvSfKvSV7RtDJJ0pIbe02hqu5hdKbxW4HfBi5O8s0kr25VnCRpaY0VCkmeleQiRovCLwc2VNUzuu2LGta3UD0ekipJDYw7UvhH4Fbg2VX1pr2XoKiq7zAaPSwp1xQkqY1xz2g+HfhJVT0MkORRwJFV9X9V9eFm1UmSltS4I4XrgaPm7B/dtUmSpsi4oXBkVf3v3p1u++g2JR2YawqS1Ma4ofDjJKfs3UnyW8BP2pR0YK4pSFIb464pvAX4WJLvAAF+A++MJklTZ6xQqKqbkzwdOLlruruqftauLEnSEA7mfgrPA1Z37zklCVX1oSZVSZIGMe4F8T4M/CZwG/Bw11zAIKHgBfEkqY1xRwrrgLVVVS2LGVdVXQ1cvW7dunOHrkWSpsm4Rx/dwWhxWZI0xcYdKRwDfCPJTcBP9zZW1RlNqpIkDWLcUHhHyyIkScvDuIekfjnJ04CTqur6JEcDh7UtTZK01Ma9dPa5wMeBD3RNxwKfblWUJGkY4y40vwl4EfAA9DfceXKrog7Eax9JUhvjhsJPq+rBvTtJDmd0nsIgvPaRJLUxbih8OclfA0d192b+GHB1u7IkSUMYNxQ2A7uB24E/AbYywB3XJEltjXv00c+Bf+oekqQpNe61j/6DedYQqurERa9IkjSYg7n20V5HAn8IPHHxy5EkDWmsNYWq+v6cx/1V9W7g9Ma1SZKW2LjTR6fM2X0Uo5HDwdyLYVF56WxJamPcX+z/MGf7IWAH8NpFr2ZMXjpbktoY9+ijl7UuRO2t3nzNvO07LnQmUNLIuNNHf7G/56vqXYtTjiRpSAdz9NHzgKu6/Q3ATcA9LYqSJA1j3FBYBZxSVf8DkOQdwDVV9fpWhUmSlt64l7l4CvDgnP0HuzZJ0hQZd6TwIeCmJJ/q9l8F/HObkiRJQxn36KO/T/JZ4CVd0xur6qvtypIkDWHc6SOAo4EHquo9wM4kJzSqSZI0kHFvx/m3wFuBC7qmRwP/0qooSdIwxh0p/AFwBvBjgKr6DvC4VkUdiLfjlKQ2xg2FB6uq6C6fneSx7Uo6MG/HKUltjBsKVyb5APCEJOcC1+MNdyRp6hzw6KMkAT4KPB14ADgZeHtVXde4NknSEjtgKFRVJdlaVc8EDAJJmmLjTh/dmuR5TSuRJA1u3DOanw+8PskORkcghdEg4lmtCpMkLb39hkKS46vqP4HfW6J6JEkDOtBI4dOMro767SSfqKrXLEVRkqRhHGhNIXO2T2xZiCRpeAcKhVpgW5I0hQ40ffTsJA8wGjEc1W3DLxaaH9+0OknSktpvKFTVYUtViCRpeAdz6WxJ0pQb9zwF7cfqzdfM277jwtOXuJKlsVB/92dafxbStHGkIEnqLZuRQpITgbcBK6rqrKHrmQTTPEKZ5r5Jy1nTkUKSS5PsSnLHPu3rk9ydZHuSzQBVdW9VndOyHknS/rWeProMWD+3IclhwCXAacBaYGOStY3rkCSNoen0UVXdkGT1Ps2nAtur6l6AJFcAZwLfGOczk2wCNgEcf/zxi1arJpvTTdLiGGKh+Vjgvjn7O4FjkzwpyfuB5ya5YKE3V9WWqlpXVetWrlzZulZJminLZqG5qr4PnDd0HZI0y4YIhfuB4+bsr+raxpZkA7BhzZo1i1nXzFqKqZdDObdB0tIbYvroZuCkJCckOQI4G7jqYD6gqq6uqk0rVqxoUqAkzarWh6ReDtwInJxkZ5Jzquoh4HzgWuAu4MqqurNlHZKk8bQ++mjjAu1bga2H+rlOH+2fUzW/cLA/C49W0qybyMtcOH0kSW1MZChIktowFCRJvWVznsLBcE1haczi2oRnRmvWTeRIwTUFSWpjIkNBktSGoSBJ6rmmMAFmcW5/If4spLYmcqTgmoIktTGRoSBJasNQkCT1DAVJUs+F5mXERVRJQ5vIkYILzZLUxkSGgiSpDUNBktQzFCRJPUNBktTz6KMxHcqRQV6GeXr4b6lZMZEjBY8+kqQ2JjIUJEltGAqSpJ6hIEnqGQqSpJ6hIEnqGQqSpJ7nKQzAq6FOj0M5f8FzHrScTeRIwfMUJKmNiQwFSVIbhoIkqWcoSJJ6hoIkqWcoSJJ6hoIkqWcoSJJ6hoIkqWcoSJJ6hoIkqee1j/bhdYm0GPweaVJN5EjBax9JUhsTGQqSpDYMBUlSz1CQJPUMBUlSz1CQJPUMBUlSz1CQJPUMBUlSz1CQJPUMBUlSz1CQJPUMBUlSz1CQJPUMBUlSb9ncTyHJY4H3Ag8CX6qqjwxckiTNnKYjhSSXJtmV5I592tcnuTvJ9iSbu+ZXAx+vqnOBM1rWJUmaX+vpo8uA9XMbkhwGXAKcBqwFNiZZC6wC7ute9nDjuiRJ82g6fVRVNyRZvU/zqcD2qroXIMkVwJnATkbBcBv7Caskm4BNAMcff/ziFy0NZLFu4bnjwtMX5XMWslCdrf/eWbXUP+8hFpqP5RcjAhiFwbHAJ4HXJHkfcPVCb66qLVW1rqrWrVy5sm2lkjRjls1Cc1X9GHjj0HVI0iwbYqRwP3DcnP1VXdvYkmxIsmXPnj2LWpgkzbohQuFm4KQkJyQ5AjgbuOpgPqCqrq6qTStWrGhSoCTNqtaHpF4O3AicnGRnknOq6iHgfOBa4C7gyqq6s2UdkqTxtD76aOMC7VuBrYf6uUk2ABvWrFlzqB8hSZrHRF7mwukjSWpjIkNBktSGoSBJ6qWqhq7hkCXZDXz7EN9+DPC9RSxnOZnmvsF098++TaZJ69vTqmres38nOhQeiSTbqmrd0HW0MM19g+nun32bTNPUN6ePJEk9Q0GS1JvlUNgydAENTXPfYLr7Z98m09T0bWbXFCRJv2qWRwqSpH0YCpKk3kyGwgL3iJ4Y8937OskTk1yX5J7uz1/v2pPk4q6vX09yynCVH1iS45J8Mck3ktyZ5M1d+8T3L8mRSW5K8rWub3/XtZ+Q5CtdHz7aXT2YJI/p9rd3z68esv5xJDksyVeTfKbbn4q+JdmR5PYktyXZ1rVN/HdyPjMXCvu5R/QkuYx97n0NbAY+X1UnAZ/v9mHUz5O6xybgfUtU46F6CPjLqloLvAB4U/fvMw39+ynw8qp6NvAcYH2SFwDvBC6qqjXAD4FzutefA/ywa7+oe91y92ZGVz/ea5r69rKqes6c8xGm4Tv5q6pqph7AC4Fr5+xfAFwwdF2H0I/VwB1z9u8GntptPxW4u9v+ALBxvtdNwgP4N+AV09Y/4GjgVuD5jM6EPbxr77+fjC4v/8Ju+/DudRm69v30aRWjX44vBz4DZIr6tgM4Zp+2qfpO7n3M3EiBhe8RPemeUlXf7bb/C3hKtz2x/e2mFJ4LfIUp6V83vXIbsAu4DvgW8KMa3WcEfrn+vm/d83uAJy1txQfl3cBfAT/v9p/E9PStgM8luSXJpq5tKr6T+1o292jW4qmqSjLRxxon+TXgE8BbquqBJP1zk9y/qnoYeE6SJwCfAp4+cEmLIsnvA7uq6pYkLx26ngZeXFX3J3kycF2Sb859cpK/k/uaxZHCI75H9DL130meCtD9uatrn7j+Jnk0o0D4SFV9smuemv4BVNWPgC8ymlJ5QpK9/4M2t/6+b93zK4DvL3Gp43oRcEaSHcAVjKaQ3sN09I2qur/7cxejMD+VKftO7jWLofCI7xG9TF0FvKHbfgOjufi97X/cHRHxAmDPnCHvspPRkOCDwF1V9a45T018/5Ks7EYIJDmK0VrJXYzC4azuZfv2bW+fzwK+UN0k9XJTVRdU1aqqWs3ov6kvVNXrmIK+JXlsksft3QZ+F7iDKfhOzmvoRY0hHsArgX9nNJ/7tqHrOYT6Lwe+C/yM0XzlOYzmYz8P3ANcDzyxe20YHW31LeB2YN3Q9R+gby9mNH/7deC27vHKaegf8Czgq13f7gDe3rWfCNwEbAc+Bjymaz+y29/ePX/i0H0Ys58vBT4zLX3r+vC17nHn3t8Z0/CdnO/hZS4kSb1ZnD6SJC3AUJAk9QwFSVLPUJAk9QwFSVLPUJAk9QwFSVLv/wGCl7PmuvNY4gAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3SsnPorch\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQWUlEQVR4nO3dbbBdVX3H8e/PIAJWr1WidQgY6M2gmRGVXlFHnapT2yC9YNUqGR0dmyGllY5OO1ODdax90Rl9UVE6+JBWSrUOiPhQInEQfOINIwRFDWJKpFgSbROfQmsdI/jvi7Ozvab3Jucmd9+dc873M3Mme69z7r7/FQ/5udbaD6kqJEkCeFjfBUiSjh2GgiSpZShIklqGgiSpZShIklrH9V3A0Tj55JNr9erVfZchSSPljjvu+H5VrZzvvZEMhSSzwOz09DTbtm3ruxxJGilJvrPQeyM5fVRVW6pq49TUVN+lSNJYGclQkCR1w1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSayQvXlsKqzfdMG/7fe84b5krkaRjhyMFSVLLUJAktQwFSVLLUJAktQwFSVLLUJAktY6ZUEjylCTvT3Jdkj/pux5JmkSdhkKSK5PsSbL9oPZ1SXYk2ZlkE0BV3V1VFwOvBJ7bZV2SpPl1PVK4Clg3tyHJCuAK4FxgLbA+ydrmvfOBG4CtHdclSZpHp6FQVbcAPzyo+RxgZ1XdW1X7gWuAC5rPX19V5wKvXuiYSTYm2ZZk2969e7sqXZImUh+3uTgFuH/O/i7gWUleALwMeASHGClU1WZgM8DMzEx1V6YkTZ5j5t5HVfVF4Is9lyFJE62Ps492A6fO2V/VtA0tyWySzfv27VvSwiRp0vURCrcDa5KcnuR44ELg+sUcoKq2VNXGqampTgqUpEnV9SmpVwO3Amcm2ZVkQ1U9CFwC3AjcDVxbVXd1WYckaTidrilU1foF2rdyFKedJpkFZqenp4/0EJKkeRwzVzQvhtNHktSNkQwFSVI3DAVJUmskQ8FTUiWpGyMZCq4pSFI3RjIUJEndMBQkSa2RDAXXFCSpGyMZCq4pSFI3RjIUJEndMBQkSS1DQZLUGslQcKFZkroxkqHgQrMkdWMkQ0GS1A1DQZLUMhQkSS1DQZLUGslQ8OwjSerGSIaCZx9JUjdGMhQkSd0wFCRJLUNBktQyFCRJLUNBktQyFCRJrZEMBa9TkKRujGQoeJ2CJHVjJENBktQNQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEktQ0GS1DIUJEmtkQwFb3MhSd0YyVDwNheS1I2RDAVJUjcMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLUMBUlSy1CQJLWO67uAuZK8FDgPeDTwwar6bM8lSdJE6XykkOTKJHuSbD+ofV2SHUl2JtkEUFWfqqqLgIuBV3VdmyTpVy3H9NFVwLq5DUlWAFcA5wJrgfVJ1s75yFub9yVJy6jzUKiqW4AfHtR8DrCzqu6tqv3ANcAFGXgn8Jmq+krXtUmSflVfC82nAPfP2d/VtP0Z8DvAK5JcPN8PJtmYZFuSbXv37u2+UkmaIMfUQnNVXQ5cfpjPbAY2A8zMzNRy1CVJk6KvkcJu4NQ5+6uaNklSj4YKhSRPXeLfezuwJsnpSY4HLgSuH/aHk8wm2bxv374lLkuSJtuwI4X3JrktyZ8mmVrML0hyNXArcGaSXUk2VNWDwCXAjcDdwLVVddewx6yqLVW1cWpqUaVIkg5jqDWFqnp+kjXAHwF3JLkN+KeqummIn12/QPtWYOtiipUkdWvoNYWquofB9QNvBn4buDzJt5K8rKviFuL0kSR1Y9g1hbOSXMZgqudFwGxVPaXZvqzD+ubl9JEkdWPYU1L/HvhH4C1V9dMDjVX13SRv7aQySdKyGzYUzgN+WlUPASR5GHBCVf1vVX24s+oWkGQWmJ2enl7uXy1JY23YNYWbgRPn7J/UtPXC6SNJ6sawoXBCVf3PgZ1m+6RuSpIk9WXYUPhJkrMP7CT5LeCnh/i8JGkEDbum8CbgY0m+CwT4DXzegSSNnWEvXrs9yZOBM5umHVX18+7KOjQXmiWpG4u5Id4zgbOAsxk8FOe13ZR0eC40S1I3hhopJPkw8JvAncBDTXMBH+qoLklSD4ZdU5gB1laVzy+QpDE27PTRdgaLy5KkMTbsSOFk4JvN3VF/dqCxqs7vpKrDcKFZkroxbCi8vcsiFquqtgBbZmZmLuq7FkkaJ8OekvqlJE8C1lTVzUlOAlZ0W5okabkNe+vsi4DrgA80TacAn+qqKElSP4ZdaH4D8FzgAWgfuPP4roqSJPVj2FD4WVXtP7CT5DgG1ylIksbIsKHwpSRvAU5M8mLgY8CW7so6NB/HKUndGDYUNgF7gW8AfwxsZfC85l54mwtJ6sawZx/9AviH5iVJGlPD3vvo35lnDaGqzljyiiRJvVnMvY8OOAH4Q+CxS1+OJKlPQ60pVNUP5rx2V9W7gfM6rk2StMyGnT46e87uwxiMHIYdZUiSRsSw/7D/3ZztB4H7gFcueTVD8oZ4ktSNYc8+emHXhSyGN8STpG4MO33054d6v6retTTlSJL6tJizj54JXN/szwK3Afd0UZQkqR/DhsIq4Oyq+m+AJG8Hbqiq13RVmCRp+Q17m4snAPvn7O9v2iRJY2TYkcKHgNuSfLLZfynwz92UJEnqy7BnH/1tks8Az2+aXl9VX+2uLElSH4adPgI4CXigqt4D7Epyekc1SZJ6MuzjOP8aeDNwadP0cOBfuipKktSPYUcKfwCcD/wEoKq+Czyqq6IkSf0YNhT2V1XR3D47ySO7K+nwfPKaJHVj2FC4NskHgMckuQi4mR4fuOOT1ySpG4c9+yhJgI8CTwYeAM4E3lZVN3VcmyRpmR02FKqqkmytqqcCBoEkjbFhp4++kuSZnVYiSerdsFc0Pwt4TZL7GJyBFAaDiLO6KkyStPwOGQpJTquq/wB+b5nqkST16HAjhU8xuDvqd5J8vKpevhxFSZL6cbg1hczZPqPLQiRJ/TtcKNQC25KkMXS46aOnJXmAwYjhxGYbfrnQ/OhOq5MkLatDhkJVrViuQiRJ/VvMrbMlSWPOUJAktQwFSVLLUJAktY6ZUEhyRpIPJrmu71okaVJ1GgpJrkyyJ8n2g9rXJdmRZGeSTQBVdW9VbeiyHknSoXU9UrgKWDe3IckK4ArgXGAtsD7J2o7rkCQNodNQqKpbgB8e1HwOsLMZGewHrgEuGPaYSTYm2ZZk2969e5ewWklSH2sKpwD3z9nfBZyS5HFJ3g88I8mlC/1wVW2uqpmqmlm5cmXXtUrSRBn2eQqdq6ofABf3XYckTbI+Rgq7gVPn7K9q2oaWZDbJ5n379i1pYZI06foIhduBNUlOT3I8cCFw/WIOUFVbqmrj1NRUJwVK0qTq+pTUq4FbgTOT7EqyoaoeBC4BbgTuBq6tqru6rEOSNJxO1xSqav0C7VuBrUd63CSzwOz09PSRHkKSNI9j5ormxXD6SJK6MZKhIEnqhqEgSWqNZCh4SqokdWMkQ8E1BUnqxkiGgiSpG4aCJKk1kqHgmoIkdWMkQ8E1BUnqxkiGgiSpG4aCJKllKEiSWiMZCi40S1I3RjIUXGiWpG6MZChIkrphKEiSWoaCJKllKEiSWiMZCp59JEndGMlQ8OwjSerGSIaCJKkbhoIkqWUoSJJahoIkqWUoSJJahoIkqXVc3wUciSSzwOz09HTfpegord50w6I+f987zuuoEkkwoiMFr1OQpG6MZChIkrphKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKllKEiSWoaCJKnlbS60ZBZ7y4o+LVSrt9HQpBvJkYK3uZCkboxkKEiSumEoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJahoIkqWUoSJJax8zzFJI8EngvsB/4YlV9pOeSJGnidDpSSHJlkj1Jth/Uvi7JjiQ7k2xqml8GXFdVFwHnd1mXJGl+XU8fXQWsm9uQZAVwBXAusBZYn2QtsAq4v/nYQx3XJUmaR6fTR1V1S5LVBzWfA+ysqnsBklwDXADsYhAMd3KIsEqyEdgIcNpppy190Qs41KMmj7VHOC7VoyZ9vObhj7+QpfxOLNXvHudHkNq3pdPHQvMp/HJEAIMwOAX4BPDyJO8Dtiz0w1W1uapmqmpm5cqV3VYqSRPmmFlorqqfAK/vuw5JmmR9jBR2A6fO2V/VtA0tyWySzfv27VvSwiRp0vURCrcDa5KcnuR44ELg+sUcoKq2VNXGqampTgqUpEnV9SmpVwO3Amcm2ZVkQ1U9CFwC3AjcDVxbVXd1WYckaThdn320foH2rcDWIz1ukllgdnp6+kgPIUmax0je5sLpI0nqxkiGgiSpG4aCJKmVquq7hkU7sKYAvAq45wgPczLw/SUr6thnf8eb/R1vS93fJ1XVvFf/jmQoLIUk26pqpu86lov9HW/2d7wtZ3+dPpIktQwFSVJrkkNhc98FLDP7O97s73hbtv5O7JqCJOn/m+SRgiTpIIaCJKk1kaGwwDOiR9p8z8NO8tgkNyW5p/nz15v2JLm86f/Xk5zdX+WLl+TUJF9I8s0kdyV5Y9M+rv09IcltSb7W9PdvmvbTk3y56ddHm7sOk+QRzf7O5v3VfdZ/pJKsSPLVJJ9u9se2v0nuS/KNJHcm2da09fJ9nrhQOMQzokfdVRz0PGxgE/C5qloDfK7Zh0Hf1zSvjcD7lqnGpfIg8BdVtRZ4NvCG5n/Dce3vz4AXVdXTgKcD65I8G3gncFlVTQM/AjY0n98A/Khpv6z53Ch6I4M7KR8w7v19YVU9fc71CP18n6tqol7Ac4Ab5+xfClzad11L1LfVwPY5+zuAJzbbTwR2NNsfANbP97lRfAH/Crx4EvoLnAR8BXgWgytcj2va2+81g9vSP6fZPq75XPqufZH9XMXgH8IXAZ8GMub9vQ84+aC2Xr7PEzdSYOFnRI+jJ1TV95rt/wSe0GyPzd9BM1XwDODLjHF/m6mUO4E9wE3At4Ef1+D5JPCrfWr727y/D3jc8lZ81N4N/CXwi2b/cYx3fwv4bJI7kmxs2nr5Ph8zz2hWt6qqkozV+cdJfg34OPCmqnogSfveuPW3qh4Cnp7kMcAngSf3XFJnkvw+sKeq7kjygr7rWSbPq6rdSR4P3JTkW3PfXM7v8ySOFI76GdEj5L+SPBGg+XNP0z7yfwdJHs4gED5SVZ9omse2vwdU1Y+BLzCYPnlMkgP/x25un9r+Nu9PAT9Y5lKPxnOB85PcB1zDYArpPYxvf6mq3c2fexiE/jn09H2exFA46mdEj5Drgdc1269jMPd+oP21zVkMzwb2zRmmHvMyGBJ8ELi7qt41561x7e/KZoRAkhMZrJ/czSAcXtF87OD+Hvh7eAXw+Womn0dBVV1aVauqajWD/z4/X1WvZkz7m+SRSR51YBv4XWA7fX2f+15g6WlR5yXAvzGYl/2rvutZoj5dDXwP+DmDOcYNDOZVP8fg9uI3A49tPhsGZ2B9G/gGMNN3/Yvs6/MYzMF+Hbizeb1kjPt7FvDVpr/bgbc17WcAtwE7gY8Bj2jaT2j2dzbvn9F3H46i7y8APj3O/W369bXmddeBf5P6+j57mwtJUmsSp48kSQswFCRJLUNBktQyFCRJLUNBktQyFCRJLUNBktT6P+UTooAz0zuBAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ScreenPorch\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAD4CAYAAAAdIcpQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQlUlEQVR4nO3da7BdZX3H8e/PoFysxiponQQaaBg0U2/0iDrqVJ3aBjFg1SqpTh0nQ0pHOjrtTA1tx9oXndEXFaWDl7QyVKsg4qVE4iB44w0jBEQFkRIplqBt4i20jmNE/32x14FDTLL25uy19t7Z38/Mmez17H32+T/nnOSX57LWSlUhSdKhPGLSBUiSpp9hIUlqZVhIkloZFpKkVoaFJKnVEZMuYDmOPfbYWrNmzaTLkKSZctNNN32/qo4b5XNmOizWrFnDjh07Jl2GJM2UJN8Z9XOchpIktTIsJEmtDAtJUivDQpLUyrCQJLUyLCRJrQwLSVIrw0KS1GqmT8pbjjVbrjpg+93vOKPnSiRp+jmykCS1MiwkSa0MC0lSK8NCktRqasIiyVOTvD/JFUn+bNL1SJIe1GlYJLk4ye4kt+7Xvj7JHUl2JtkCUFW3V9W5wGuA53dZlyRpNF2PLC4B1i9tSLICuAg4HVgHbEyyrnnuTOAqYHvHdUmSRtBpWFTVdcAP92s+DdhZVXdV1T7gMuCs5vVXVtXpwOsO9p5JNifZkWTHnj17uipdkrTEJE7KWwXcs+R4F/CcJC8CXgkcySFGFlW1FdgKsLCwUN2VKUlaNDVncFfVl4AvTbgMSdIBTGI31L3A8UuOVzdtQ0uyIcnWvXv3jrUwSdKBTSIsbgROTnJikkcBZwNXjvIGVbWtqjavXLmykwIlSQ/V9dbZS4HrgVOS7EqyqaruB84DrgZuBy6vqtu6rEOStDydrllU1caDtG9nGdtjk2wANqxdu/bhvoUkaQRTcwb3KJyGkqR+zWRYSJL6ZVhIklrNZFi4dVaS+jWTYeGahST1aybDQpLUL8NCktRqJsPCNQtJ6tdMhoVrFpLUr5kMC0lSvwwLSVIrw0KS1Gomw8IFbknq10yGhQvcktSvmQwLSVK/DAtJUivDQpLUyrCQJLUyLCRJrWYyLNw6K0n9msmwcOusJPVrJsNCktQvw0KS1MqwkCS1MiwkSa0MC0lSq5kMC7fOSlK/ZjIs3DorSf2aybCQJPXLsJAktTIsJEmtDAtJUivDQpLUyrCQJLUyLCRJrQwLSVIrw0KS1MqwkCS1msmw8NpQktSvmQwLrw0lSf2aybCQJPXLsJAktTIsJEmtDAtJUivDQpLUyrCQJLUyLCRJrQwLSVIrw0KS1MqwkCS1MiwkSa0MC0lSK8NCktTqiEkXsFSSVwBnAI8FPlhVn5twSZIkehhZJLk4ye4kt+7Xvj7JHUl2JtkCUFWfrqpzgHOB13ZdmyRpOH1MQ10CrF/akGQFcBFwOrAO2Jhk3ZKX/G3zvCRpCnQeFlV1HfDD/ZpPA3ZW1V1VtQ+4DDgrA+8EPltVN3ddmyRpOJNa4F4F3LPkeFfT9ufA7wGvTnLugT4xyeYkO5Ls2LNnT/eVSpKma4G7qi4ELmx5zVZgK8DCwkL1UZckzbtJjSzuBY5fcry6aRtKkg1Jtu7du3fshUmSftWkwuJG4OQkJyZ5FHA2cOWwn1xV26pq88qVKzsrUJL0oKHCIsnTHu4XSHIpcD1wSpJdSTZV1f3AecDVwO3A5VV128P9GpKkbg27ZvHeJEcy2Ab7kaoaev6nqjYepH07sH3Y95EkTc5QI4uqeiHwOgbrDDcl+WiSl3Za2SG4ZiFJ/Rp6zaKq7mRwstxbgd8FLkzyrSSv7Kq4Q9TimoUk9WjYNYunJ7mAwfrCS4ANVfXU5vEFHdYnSZoCw44s/gm4GXhGVb1p8ezqqvoug9FGr5yGkqR+DRsWZwAfraqfAiR5RJJjAKrqw10VdzBOQ0lSv4YNi2uBo5ccH9O0SZLmwLBhcVRV/d/iQfP4mG5KkiRNm2HD4idJTl08SPI7wE+7KUmSNG2GPSnvLcDHk3wXCPAbTPDmREk2ABvWrl07qRIkaa4MFRZVdWOSpwCnNE13VNXPuyurtZ5twLaFhYVzJlWDJM2TUS5R/mxgTfM5pyahqj7USVWSpKkyVFgk+TDwW8AtwC+a5gIMC0maA8OOLBaAdVXlzYYkaQ4NuxvqVgaL2lPBM7glqV/DhsWxwDeTXJ3kysWPLgs7FM/glqR+DTsN9fYui5AkTbdht85+OclvAidX1bXNdaFWdFuaJGlaDHuJ8nOAK4APNE2rgE93VZQkaboMu2bxJuD5wH3wwI2QnthVUZKk6TJsWPysqvYtHiQ5gsF5FhPhbihJ6tewYfHlJH8NHN3ce/vjwLbuyjo0d0NJUr+GDYstwB7gG8CfAtuZwB3yJEmTMexuqF8C/9x8SJLmzLDXhvpPDrBGUVUnjb0iSdLUGeXaUIuOAv4IePz4y5EkTaOh1iyq6gdLPu6tqncDZ3RcmyRpSgw7DXXqksNHMBhpjHIvDEnSDBv2H/x/XPL4fuBu4DVjr2ZI3lZVkvo17G6oF3ddyCi8raok9WvYaai/ONTzVfWu8ZQjSZpGo+yGejaweA+LDcANwJ1dFCVJmi7DhsVq4NSq+l+AJG8Hrqqq13dVmCRpegx7uY8nAfuWHO9r2iRJc2DYkcWHgBuSfKo5fgXwr92UJEmaNsPuhvqHJJ8FXtg0vbGqvtpdWZKkaTLsNBTAMcB9VfUeYFeSEzuqSZI0ZYa9rerfAW8Fzm+aHgn8W1dFSZKmy7Ajiz8EzgR+AlBV3wUe01VRkqTpMmxY7KuqorlMeZJHd1dSO2+rKkn9GjYsLk/yAeBxSc4BrmWCN0LytqqS1K/W3VBJAnwMeApwH3AK8Laquqbj2iRJU6I1LKqqkmyvqqcBBoQkzaFhp6FuTvLsTiuRJE2tYc/gfg7w+iR3M9gRFQaDjqd3VZgkaXocMiySnFBV/wX8QU/1SJKmUNvI4tMMrjb7nSSfqKpX9VGUJGm6tK1ZZMnjk7osRJI0vdrCog7yWJI0R9qmoZ6R5D4GI4yjm8fw4AL3YzutTpI0FQ4ZFlW1oq9CJEnTa5RLlEuS5pRhIUlqZVhIklpNTVgkOSnJB5NcMelaJEkPNezlPh6WJBcDLwd2V9VvL2lfD7wHWAH8S1W9o6ruAjYZFhqnNVuuOmD73e84o+dKpNnW9cjiEmD90oYkK4CLgNOBdcDGJOs6rkOStAydhkVVXQf8cL/m04CdVXVXVe0DLgPO6rIOSdLyTGLNYhVwz5LjXcCqJE9I8n7gWUnOP9gnJ9mcZEeSHXv27Om6VkkSHa9ZjKKqfgCcO8TrtgJbARYWFrwEiST1YBIji3uB45ccr27aJElTahIjixuBk5OcyCAkzgb+eJQ3SLIB2LB27doOylMXxrUryd1N0mR0OrJIcilwPXBKkl1JNlXV/cB5wNXA7cDlVXXbKO9bVduqavPKlSvHX7Qk6Vd0OrKoqo0Had8ObO/ya0uSxmdqFrhH4TRUP/qY8ul6ekrSeEzN5T5G4TSUJPVrJsNCktQvw0KS1Mo1C2kIbtnVvJvJkYVrFpLUr5kMC0lSvwwLSVIr1yw0lTxvQpouMzmycM1Ckvo1k2EhSeqXYSFJamVYSJJaucCtkXmCmjR/ZnJk4QK3JPVrJsNCktQvw0KS1MqwkCS1MiwkSa3cDXUYmtRuJS/RsTzuMtM0m8mRhbuhJKlfMxkWkqR+GRaSpFaGhSSplWEhSWplWEiSWhkWkqRWnmcxR9zH366Pc0VG/Rqj/tz8OasLMzmy8DwLSerXTIaFJKlfhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFZe7mOKeJmG/ozrsh7+zDQvZnJk4eU+JKlfMxkWkqR+GRaSpFaGhSSplWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIkloZFpKkVoaFJKmVYSFJamVYSJJaGRaSpFZTcz+LJI8G3gvsA75UVR+ZcEmSpEanI4skFyfZneTW/drXJ7kjyc4kW5rmVwJXVNU5wJld1iVJGk3X01CXAOuXNiRZAVwEnA6sAzYmWQesBu5pXvaLjuuSJI2g02moqrouyZr9mk8DdlbVXQBJLgPOAnYxCIxbOESIJdkMbAY44YQTxl/0GI3r1p2Ten9Nt65vDXswfdwydtTb1Y7rezFq3/q4re603Lp3Egvcq3hwBAGDkFgFfBJ4VZL3AdsO9slVtbWqFqpq4bjjjuu2UkkSMEUL3FX1E+CNk65DkvSrJjGyuBc4fsnx6qZtaEk2JNm6d+/esRYmSTqwSYTFjcDJSU5M8ijgbODKUd6gqrZV1eaVK1d2UqAk6aG63jp7KXA9cEqSXUk2VdX9wHnA1cDtwOVVdVuXdUiSlqfr3VAbD9K+Hdj+cN83yQZgw9q1ax/uW0iSRjCTl/twGkqS+jWTYSFJ6pdhIUlqlaqadA0jW1yzAF4L3Pkw3+ZY4PtjK2r2zHP/7fv8muf+L+37b1bVSGc1z2RYjEOSHVW1MOk6JmWe+2/f57PvMN/9X27fnYaSJLUyLCRJreY5LLZOuoAJm+f+2/f5Nc/9X1bf53bNQpI0vHkeWUiShmRYSJJazWVYHOQe4IeNA937PMnjk1yT5M7mz19v2pPkwuZ78fUkp06u8uVLcnySLyb5ZpLbkry5aZ+X/h+V5IYkX2v6//dN+4lJvtL082PNFZ9JcmRzvLN5fs0k6x+HJCuSfDXJZ5rjeer73Um+keSWJDuatrH87s9dWBziHuCHk0vY797nwBbg81V1MvD55hgG34eTm4/NwPt6qrEr9wN/WVXrgOcCb2p+vvPS/58BL6mqZwDPBNYneS7wTuCCqloL/AjY1Lx+E/Cjpv2C5nWz7s0Mrmi9aJ76DvDiqnrmknMqxvO7X1Vz9QE8D7h6yfH5wPmTrquDfq4Bbl1yfAfw5Obxk4E7mscfADYe6HWHwwfw78BL57H/wDHAzcBzGJy5e0TT/sDfAQa3Cnhe8/iI5nWZdO3L6PPq5h/ElwCfATIvfW/6cTdw7H5tY/ndn7uRBQe/B/jh7klV9b3m8X8DT2oeH7bfj2Za4VnAV5ij/jfTMLcAu4FrgG8DP67BvWTgoX18oP/N83uBJ/Rb8Vi9G/gr4JfN8ROYn74DFPC5JDcl2dy0jeV3f2ruwa3+VFUlOaz3TCf5NeATwFuq6r4kDzx3uPe/qn4BPDPJ44BPAU+ZcEm9SPJyYHdV3ZTkRZOuZ0JeUFX3JnkicE2Sby19cjm/+/M4slj2PcBn1P8keTJA8+fupv2w+34keSSDoPhIVX2yaZ6b/i+qqh8DX2Qw9fK4JIv/OVzaxwf63zy/EvhBz6WOy/OBM5PcDVzGYCrqPcxH3wGoqnubP3cz+I/CaYzpd38ew2LZ9wCfUVcCb2gev4HBXP5i+580OyOeC+xdMmSdORkMIT4I3F5V71ry1Lz0/7hmREGSoxms19zOIDRe3bxs//4vfl9eDXyhmgnsWVNV51fV6qpaw+Dv9Req6nXMQd8Bkjw6yWMWHwO/D9zKuH73J70gM6FFoJcB/8FgLvdvJl1PB/27FPge8HMG85CbGMzFfp7BJd2vBR7fvDYMdod9G/gGsDDp+pfZ9xcwmLf9OnBL8/GyOer/04GvNv2/FXhb034ScAOwE/g4cGTTflRzvLN5/qRJ92FM34cXAZ+Zp743/fxa83Hb4r9t4/rd93IfkqRW8zgNJUkakWEhSWplWEiSWhkWkqRWhoUkqZVhIUlqZVhIklr9PzsM2IlYJ1D5AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "PoolArea\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARDklEQVR4nO3de6xlZX3G8e/jIHJRDypTS7g40CHoJFKlI2q8xNragnTQqlUmGo2hTL01Nf7RDtZY+0cTbVKvoeJUqZcqiHgpo2NQvJE0RhgUFaToVLEMXhg1HlpqRPDXP/Y6i8PxnJk9w3nP2nvO95PsnLXefXtmNsxz1nrXXitVhSRJAPcbOoAkaXJYCpKknqUgSepZCpKknqUgSeodMnSA++Loo4+udevWDR1DkqbKtdde+5OqWrvYfVNdCuvWrWPnzp1Dx5CkqZLk+0vdN5W7j5JsSrJtdnZ26CiSdFCZylKoqu1VtWVmZmboKJJ0UJnKUpAktWEpSJJ6loIkqWcpSJJ6loIkqWcpSJJ6U/3ltfti3dZPLTp+8xvPWuEkkjQ53FKQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlSb2JKIcmjklyY5LIkLx86jyStRk1LIclFSW5Lcv2C8TOS3JRkV5KtAFV1Y1W9DHg+8KSWuSRJi2u9pfBe4Iz5A0nWABcAZwIbgM1JNnT3nQ18CtjROJckaRFNS6GqrgJ+tmD4dGBXVX23qu4ELgGe1T3+8qo6E3jhUq+ZZEuSnUl27tmzp1V0SVqVhjjNxbHALfPWdwOPT/I04DnAA9jLlkJVbQO2AWzcuLHaxZSk1Wdizn1UVV8EvjhwDEla1YY4+uhW4Ph568d1Y2NLsinJttnZ2WUNJkmr3RClcA1wcpITkxwKnANcvj8vUFXbq2rLzMxMk4CStFq1PiT1YuDLwClJdic5t6ruAl4FXAHcCFxaVTe0zCFJGk/TOYWq2rzE+A7uw2GnSTYBm9avX3+gLyFJWsTEfKN5f7j7SJLamMpSkCS1YSlIknpTWQoekipJbUxlKTinIEltTGUpSJLasBQkSb2pLAXnFCSpjaksBecUJKmNqSwFSVIbloIkqWcpSJJ6U1kKTjRLUhtTWQpONEtSG1NZCpKkNiwFSVLPUpAk9SwFSVJvKkvBo48kqY2pLAWPPpKkNqayFCRJbVgKkqSepSBJ6lkKkqSepSBJ6lkKkqTeVJaC31OQpDamshT8noIktTGVpSBJasNSkCT1LAVJUs9SkCT1LAVJUs9SkCT1LAVJUs9SkCT1LAVJUm8qS8HTXEhSG1NZCp7mQpLamMpSkCS1YSlIknqWgiSpZylIknqWgiSpZylIknqWgiSpZylIknqWgiSpZylIknqWgiSpZylIknqWgiSpZylIknqHDB1gviTPBs4CHgy8p6o+M3AkSVpVmm8pJLkoyW1Jrl8wfkaSm5LsSrIVoKo+UVXnAS8DXtA6myTp3lZi99F7gTPmDyRZA1wAnAlsADYn2TDvIa/r7pckraDmpVBVVwE/WzB8OrCrqr5bVXcClwDPysibgE9X1VcXe70kW5LsTLJzz549bcNL0ioz1ETzscAt89Z3d2N/Cfwh8LwkL1vsiVW1rao2VtXGtWvXtk8qSavIRE00V9XbgbcPnUOSVquxthSSPHqZ3/dW4Ph568d1Y2NJsinJttnZ2WWOJUmr27i7j/45ydVJXpFkZhne9xrg5CQnJjkUOAe4fNwnV9X2qtoyM7McUSRJc8Yqhap6CvBCRr/dX5vkQ0meMc5zk1wMfBk4JcnuJOdW1V3Aq4ArgBuBS6vqhgP6E0iSls3YcwpV9Z0krwN2Mtrv/9gkAV5bVR/by/M2LzG+A9ixn3mB0e4jYNP69esP5OmSpCWMO6dwapK3MPqt/unApqp6VLf8lob5FuXuI0lqY9wthXcA72a0VfCLucGq+kG39SBJOgiMWwpnAb+oqrsBktwPOKyq/q+qPtAsnSRpRY179NGVwOHz1o/oxgbhIamS1Ma4pXBYVf3v3Eq3fESbSPvmnIIktTFuKdyR5LS5lSS/B/xiL4+XJE2hcecUXg18JMkPgAC/jae2lqSDzlilUFXXJHkkcEo3dFNV/apdrL3zewqS1Mb+nCX1ccCpwGmMrn/w4jaR9s05BUlqY6wthSQfAH4HuA64uxsu4P2NckmSBjDunMJGYENVVcswkqRhjbv76HpGk8uSpIPYuFsKRwPfSnI18Mu5wao6u0mqfXCiWZLaGLcU3tAyxP6qqu3A9o0bN543dBZJOpiMe0jql5I8Aji5qq5McgSwpm00SdJKG/fU2ecBlwHv6oaOBT7RKpQkaRjjTjS/EngScDuMLrgD/FarUJKkYYxbCr+sqjvnVpIcwuh7CpKkg8i4pfClJK8FDu+uzfwRYHu7WHvnqbMlqY1xS2ErsAf4JvAXjK6tPNgV1zzNhSS1Me7RR78G/qW7SZIOUuOe++h7LDKHUFUnLXsiSdJg9ufcR3MOA/4MeOjyx5EkDWmsOYWq+um8261V9VbgrMbZJEkrbNzdR6fNW70foy2HcbcyJElTYtx/2P9p3vJdwM3A85c9jSRpUOMeffT7rYPsD8+SKkltjLv76DV7u7+q3rw8ccbjWVIlqY39OfroccDl3fom4GrgOy1CSZKGMW4pHAecVlX/A5DkDcCnqupFrYJJklbeuKe5eDhw57z1O7sxSdJBZNwthfcDVyf5eLf+bOB9bSJJkoYy7tFH/5Dk08BTuqGXVtXX2sWSJA1h3N1HAEcAt1fV24DdSU5slEmSNJBxL8f5d8DfAOd3Q/cH/q1VKEnSMMbdUvhT4GzgDoCq+gHwoFahJEnDGLcU7qyqojt9dpIj20WSJA1l3FK4NMm7gKOSnAdcyYAX3PFynJLUxj5LIUmADwOXAR8FTgFeX1XvaJxtSV6OU5La2OchqVVVSXZU1aOBz65AJknSQMbdffTVJI9rmkSSNLhxv9H8eOBFSW5mdARSGG1EnNoqmCRp5e21FJKcUFX/DfzxCuWRJA1oX1sKn2B0dtTvJ/loVT13JUJJkoaxrzmFzFs+qWUQSdLw9lUKtcSyJOkgtK/dR7+b5HZGWwyHd8twz0Tzg5umkyStqL2WQlWtWakgkqTh7c+psyVJBzlLQZLUsxQkST1LQZLUm5hSSHJSkvckuWzoLJK0WjUthSQXJbktyfULxs9IclOSXUm2AlTVd6vq3JZ5JEl713pL4b3AGfMHkqwBLgDOBDYAm5NsaJxDkjSGpqVQVVcBP1swfDqwq9syuBO4BHjWuK+ZZEuSnUl27tmzZxnTSpKGmFM4Frhl3vpu4NgkD0tyIfDYJOcv9eSq2lZVG6tq49q1a1tnlaRVZdzrKTRXVT8FXjZ0DklazYbYUrgVOH7e+nHd2NiSbEqybXZ2dlmDSdJqN0QpXAOcnOTEJIcC5wCX788LVNX2qtoyMzPTJKAkrVatD0m9GPgycEqS3UnOraq7gFcBVwA3ApdW1Q0tc0iSxtN0TqGqNi8xvgPYcaCvm2QTsGn9+vUH+hKSpEVMzDea94e7jySpjaksBUlSG5aCJKk3laXgIamS1MZUloJzCpLUxlSWgiSpDUtBktSbylJwTkGS2pjKUnBOQZLamMpSkCS1YSlIknqWgiSpN5Wl4ESzJLUxlaXgRLMktTGVpSBJasNSkCT1LAVJUs9SkCT1prIUPPpIktqYylLw6CNJamMqS0GS1IalIEnqWQqSpJ6lIEnqWQqSpJ6lIEnqTWUp+D0FSWpjKkvB7ylIUhtTWQqSpDYsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPUsBUlSz1KQJPWmshQ8zYUktTGVpeBpLiSpjaksBUlSG5aCJKlnKUiSepaCJKlnKUiSepaCJKlnKUiSepaCJKlnKUiSepaCJKlnKUiSepaCJKlnKUiSepaCJKl3yNAB5iQ5Evhn4E7gi1X1wYEjSdKq03RLIclFSW5Lcv2C8TOS3JRkV5Kt3fBzgMuq6jzg7Ja5JEmLa7376L3AGfMHkqwBLgDOBDYAm5NsAI4DbukednfjXJKkRTTdfVRVVyVZt2D4dGBXVX0XIMklwLOA3YyK4Tr2UlZJtgBbAE444YTlDy2tMuu2fmrR8ZvfeNZ+PX4pB/I6+/ucpR6/v1q//oFY6UxDTDQfyz1bBDAqg2OBjwHPTfJOYPtST66qbVW1sao2rl27tm1SSVplJmaiuaruAF46dA5JWs2G2FK4FTh+3vpx3djYkmxKsm12dnZZg0nSajdEKVwDnJzkxCSHAucAl+/PC1TV9qraMjMz0ySgJK1WrQ9JvRj4MnBKkt1Jzq2qu4BXAVcANwKXVtUNLXNIksbT+uijzUuM7wB2HOjrJtkEbFq/fv2BvoQkaRFTeZoLdx9JUhtTWQqSpDYsBUlSL1U1dIb9NjenALwA+M4BvszRwE+WLVQb05ARpiOnGZfHNGSE6cg5ZMZHVNWi3/6dylJYDkl2VtXGoXPszTRkhOnIacblMQ0ZYTpyTmpGdx9JknqWgiSpt5pLYdvQAcYwDRlhOnKacXlMQ0aYjpwTmXHVzilIkn7Tat5SkCQtYClIknqrshSWuEb0EDl+4xrWSR6a5LNJvtP9fEg3niRv7zJ/I8lpK5Tx+CRfSPKtJDck+atJy5nksCRXJ/l6l/Hvu/ETk3yly/Lh7qy8JHlAt76ru39d64zzsq5J8rUkn5zgjDcn+WaS65Ls7MYm5vPu3veoJJcl+c8kNyZ54iRlTHJK9/c3d7s9yasnKeOSqmpV3YA1wH8BJwGHAl8HNgyU5anAacD188b+EdjaLW8F3tQtPxP4NBDgCcBXVijjMcBp3fKDgG8zurb2xOTs3uuB3fL9ga90730pcE43fiHw8m75FcCF3fI5wIdX8DN/DfAh4JPd+iRmvBk4esHYxHze3fu+D/jzbvlQ4KhJyzgv6xrgR8AjJjXjvfIO9caD/YHhicAV89bPB84fMM+6BaVwE3BMt3wMcFO3/C5g82KPW+G8/w48Y1JzAkcAXwUez+jboocs/NwZnbb9id3yId3jsgLZjgM+Bzwd+GT3D8BEZezeb7FSmJjPG5gBvrfw72OSMi7I9UfAf0xyxvm31bj7aKlrRE+Kh1fVD7vlHwEP75YHz93twngso9/EJypnt1vmOuA24LOMtgZ/XqPrdyzM0Wfs7p8FHtY6I/BW4K+BX3frD5vAjAAFfCbJtUm2dGOT9HmfCOwB/rXbFffuJEdOWMb5zgEu7pYnNWNvNZbC1KjRrwwTccxwkgcCHwVeXVW3z79vEnJW1d1V9RhGv42fDjxyyDwLJfkT4LaqunboLGN4clWdBpwJvDLJU+ffOQGf9yGMdru+s6oeC9zBaFdMbwIyAtDNEZ0NfGThfZOScaHVWAr3+RrRjf04yTEA3c/buvHBcie5P6NC+GBVfWxScwJU1c+BLzDaFXNUkrkLSc3P0Wfs7p8Bfto42pOAs5PcDFzCaBfS2yYsIwBVdWv38zbg44xKdpI+793A7qr6Srd+GaOSmKSMc84EvlpVP+7WJzHjvazGUrjP14hu7HLgJd3ySxjtw58bf3F3lMITgNl5m6HNJAnwHuDGqnrzJOZMsjbJUd3y4YzmPG5kVA7PWyLjXPbnAZ/vfmtrpqrOr6rjqmodo//mPl9VL5ykjABJjkzyoLllRvvDr2eCPu+q+hFwS5JTuqE/AL41SRnn2cw9u47mskxaxnsbYiJj6Bujmf5vM9rv/LcD5rgY+CHwK0a//ZzLaL/x5xidEvxK4KHdYwNc0GX+JrBxhTI+mdEm7jeA67rbMycpJ3Aq8LUu4/XA67vxk4CrgV2MNt8f0I0f1q3v6u4/aYU/96dxz9FHE5Wxy/P17nbD3P8fk/R5d+/7GGBn95l/AnjIBGY8ktHW3cy8sYnKuNjN01xIknqrcfeRJGkJloIkqWcpSJJ6loIkqWcpSJJ6loIkqWcpSJJ6/w/QkezvKDSw8AAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MiscVal\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY8AAAD4CAYAAAAUymoqAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAASh0lEQVR4nO3de5BkZX3G8e/jIiLGDCCrEi4uuIiupUZcb2VM1NK4iCteEmVLq7wQNnirGFOJi1rGpCpVXpKoJChuFO8BES9hZS0iVtT8YQmL10VcWXHVRRNWrSyJsYLoL3/0WWjGnZk+M33mdDvfT9UUp9/u6Xl4mZ6Hc97TfVJVSJLUxp36DiBJmj6WhySpNctDktSa5SFJas3ykCS1dkjfAZbi6KOPrjVr1vQdQ5KmyjXXXPOjqlq9lOeYyvJIshHYuHbtWnbs2NF3HEmaKkm+u9TnmMrDVlW1rao2z8zM9B1FklakqSwPSVK/LA9JUmuWhySpNctDktSa5SFJas3ykCS1ZnlIklqbyjcJjsOaLZcfdHzPG05f5iSSNH3c85AktWZ5SJJaszwkSa1ZHpKk1iwPSVJrE1MeSR6Q5IIklyZ5cd95JElz67Q8klyY5KYkO2eNb0iyK8nuJFsAquq6qjoHeDbwmC5zSZKWpus9j/cCG4YHkqwCzgdOA9YBm5Ksa+57GnA5sL3jXJKkJei0PKrq88BPZg0/AthdVTdU1S3AxcAZzeMvq6rTgOd2mUuStDR9vMP8WOD7Q7f3Ao9M8jjgmcBdmGfPI8lmYDPACSec0F1KSdKcJubjSarqs8BnR3jcVmArwPr166vbVJKkg+njbKsbgeOHbh/XjI0sycYkW/fv3z/WYJKk0fRRHlcDJyc5McmhwJnAZW2eoKq2VdXmmZmZTgJKkubX9am6FwFfAE5JsjfJWVV1K/Ay4ArgOuCSqrq2yxySpPHqdM2jqjbNMb4dT8eVpKk1Me8wb8M1D0nq11SWh2sektSvqSwPSVK/prI8PGwlSf2ayvLwsJUk9Wsqy0OS1C/LQ5LUmuUhSWptKsvDBXNJ6tdUlocL5pLUr6ksD0lSvywPSVJrU1kernlIUr+msjxc85Ckfk1leUiS+mV5SJJaszwkSa1ZHpKk1iwPSVJrU1kenqorSf2ayvLwVF1J6tdUlockqV+WhySpNctDktSa5SFJas3ykCS1NpXl4am6ktSvqSwPT9WVpH5NZXlIkvpleUiSWrM8JEmtWR6SpNYsD0lSa5aHJKk1y0OS1JrlIUlqzfKQJLU2leXhx5NIUr+msjz8eBJJ6tdUlockqV+WhySpNctDktSa5SFJas3ykCS1ZnlIklqzPCRJrVkekqTWLA9JUmuWhySpNctDktSa5SFJas3ykCS1ZnlIklo7pO8Aw5I8HTgd+E3g3VX1rz1HkiQdROd7HkkuTHJTkp2zxjck2ZVkd5ItAFX1iao6GzgHeE7X2SRJi7Mch63eC2wYHkiyCjgfOA1YB2xKsm7oIa9t7pckTaDOy6OqPg/8ZNbwI4DdVXVDVd0CXAyckYE3Ap+qqi8d7PmSbE6yI8mOffv2dRteknRQfS2YHwt8f+j23mbs5cATgT9Ics7BvrGqtlbV+qpav3r16u6TSpJ+xUQtmFfVecB5feeQJM2vrz2PG4Hjh24f14yNJMnGJFv3798/9mCSpIWNVB5JHjTmn3s1cHKSE5McCpwJXDbqN1fVtqraPDMzM+ZYkqRRjLrn8fYkVyV5SZJWf7GTXAR8ATglyd4kZ1XVrcDLgCuA64BLquraVsklSb0Zac2jqh6b5GTgRcA1Sa4C3lNVnx7hezfNMb4d2N4m7AFJNgIb165du5hvlyQt0chrHlV1PYP3X7wK+D3gvCTfTPLMrsLNk8XDVpLUo1HXPB6c5C0MDjE9AdhYVQ9ott/SYT5J0gQa9VTdfwDeBby6qn52YLCqfpDktZ0kkyRNrFHL43TgZ1X1C4AkdwIOq6r/raoPdJZuDq55SFK/Rl3zuBK469Dtw5uxXrjmIUn9GrU8Dquq/zlwo9k+vJtIkqRJN2p5/DTJqQduJHkY8LN5Hi9J+jU26prHK4CPJPkBEODe9Hi9Ddc8JKlfo75J8Ook9wdOaYZ2VdXPu4u1YJ5twLb169ef3VcGSVrJ2nyq7sOBNc33nJqEqnp/J6kkSRNtpPJI8gHgvsBXgF80wwVYHpK0Ao2657EeWFdV1WUYSdJ0GPVsq50MFskngtfzkKR+jVoeRwPfSHJFkssOfHUZbD6+SVCS+jXqYavXdxlCkjRdRj1V93NJ7gOcXFVXJjkcWNVtNEnSpBr1I9nPBi4F3tkMHQt8oqtQkqTJNuqax0uBxwA3w20XhrpnV6EkSZNt1PL4v6q65cCNJIcweJ9HLzzbSpL6NWp5fC7Jq4G7JnkS8BFgW3ex5ufZVpLUr1HLYwuwD/g68MfAdgbXM5ckrUCjnm31S+Cfmi9J0go36mdbfYeDrHFU1UljTyRJmnhtPtvqgMOAPwSOGn8cSdI0GGnNo6p+PPR1Y1W9FTi942ySpAk16mGrU4du3onBnkiba4FMjTVbLj/o+J432JWSdMCoBfB3Q9u3AnuAZ489zYi8DK0k9WvUs60e33WQNrwMrST1a9TDVq+c7/6q+vvxxJEkTYM2Z1s9HDhwDY+NwFXA9V2EkiRNtlHL4zjg1Kr6b4Akrwcur6rndRVMkjS5Rv14knsBtwzdvqUZkyStQKPuebwfuCrJx5vbTwfe100kSdKkG/Vsq79J8ingsc3QC6vqy93FkiRNslEPWwEcDtxcVW8D9iY5saNMkqQJN+plaP8SeBVwbjN0Z+CDXYWSJE22Ufc8ngE8DfgpQFX9ALh7V6EkSZNt1PK4paqK5mPZk9ytu0gL8zK0ktSvUcvjkiTvBI5IcjZwJT1eGMrL0EpSvxY82ypJgA8D9wduBk4BXldVn+44myRpQi1YHlVVSbZX1YMAC0OSNPJhqy8leXinSSRJU2PUd5g/Enhekj0MzrgKg52SB3cVTJI0ueYtjyQnVNX3gCcvUx5J0hRYaM/jEww+Tfe7ST5aVc9ajlCSpMm20JpHhrZP6jKIJGl6LFQeNce2JGkFW+iw1UOS3MxgD+SuzTbcvmD+m52mkyRNpHnLo6pWLVcQSdL0aPOR7JIkAZaHJGkRLA9JUmuWhySptYkpjyQnJXl3kkv7ziJJml+n5ZHkwiQ3Jdk5a3xDkl1JdifZAlBVN1TVWV3mkSSNR9d7Hu8FNgwPJFkFnA+cBqwDNiVZ13EOSdIYdVoeVfV54Cezhh8B7G72NG4BLgbOGPU5k2xOsiPJjn379o0xrSRpVH2seRwLfH/o9l7g2CT3SHIB8NAk5871zVW1tarWV9X61atXd51VknQQo17Po3NV9WPgnL5zSJIW1seex43A8UO3j2vGRpZkY5Kt+/fvH2swSdJo+iiPq4GTk5yY5FDgTOCyNk9QVduqavPMzEwnASVJ8+v6VN2LgC8ApyTZm+SsqroVeBlwBXAdcElVXdtlDknSeHW65lFVm+YY3w5sX+zzJtkIbFy7du1in0KStAQT8w7zNjxsJUn9msrykCT1y/KQJLU2Me/zaKOPNY81Wy6f8749bzh92XJI0iSYyj0P1zwkqV9TWR6SpH5ZHpKk1qayPPx4Eknq11SWh2sektSvqSwPSVK/LA9JUmuWhySptaksDxfMJalfU1keLphLUr+msjwkSf2yPCRJrVkekqTWLA9JUmt+JPsEmetj3/3Id0mTZir3PDzbSpL6NZXlIUnql+UhSWrN8pAktWZ5SJJaszwkSa1ZHpKk1nyfxxj4/gxJK81U7nn4Pg9J6tdUlockqV+WhySpNctDktSa5SFJas3ykCS1ZnlIklqzPCRJrVkekqTWLA9JUmt+PEmH/NgSSb+upnLPw48nkaR+TWV5SJL6ZXlIklqzPCRJrVkekqTWLA9JUmuWhySpNctDktSa5SFJas3ykCS1ZnlIklqzPCRJrVkekqTWLA9JUmuWhySptYm5nkeSuwFvB24BPltVH+o5kiRpDp3ueSS5MMlNSXbOGt+QZFeS3Um2NMPPBC6tqrOBp3WZS5K0NF0ftnovsGF4IMkq4HzgNGAdsCnJOuA44PvNw37RcS5J0hJ0etiqqj6fZM2s4UcAu6vqBoAkFwNnAHsZFMhXmKfUkmwGNgOccMIJ4w89RcZ1mVsvl7u8nG+1Mam/L30smB/L7XsYMCiNY4GPAc9K8g5g21zfXFVbq2p9Va1fvXp1t0klSQc1MQvmVfVT4IV955AkLayPPY8bgeOHbh/XjI0sycYkW/fv3z/WYJKk0fRRHlcDJyc5McmhwJnAZW2eoKq2VdXmmZmZTgJKkubX9am6FwFfAE5JsjfJWVV1K/Ay4ArgOuCSqrq2yxySpPHq+myrTXOMbwe2L/Z5k2wENq5du3axTyFJWoKp/HgSD1tJUr+msjwkSf2yPCRJraWq+s7Q2oE1D+A5wPWLfJqjgR+NLdT4TXI+sy3OJGeDyc5ntsWZK9t9qmpJ77KeyvIYhyQ7qmp93znmMsn5zLY4k5wNJjuf2Rany2wetpIktWZ5SJJaW8nlsbXvAAuY5HxmW5xJzgaTnc9si9NZthW75iFJWryVvOchSVoky0OS1NqKLI85rqHe9c88Psm/JflGkmuT/EkzflSSTye5vvnnkc14kpzXZPxaklOHnuv5zeOvT/L8MWZcleTLST7Z3D4xyRebDB9uPgWZJHdpbu9u7l8z9BznNuO7kjx5TLmOSHJpkm8muS7Joydl3pL8afPfc2eSi5Ic1ue8JbkwyU1Jdg6NjW2ukjwsydeb7zkvSZaY7c3Nf9evJfl4kiMWmpO5Xr9zzftS8g3d92dJKsnRze3e564Zf3kzf9cmedPQePdzV1Ur6gtYBXwbOAk4FPgqsG4Zfu4xwKnN9t2BbzG4hvubgC3N+Bbgjc32U4BPAQEeBXyxGT8KuKH555HN9pFjyvhK4J+BTza3LwHObLYvAF7cbL8EuKDZPhP4cLO9rpnPuwAnNvO8agy53gf8UbN9KHDEJMwbgytgfge469B8vaDPeQN+FzgV2Dk0Nra5Aq5qHpvme09bYrbfBw5ptt84lO2gc8I8r9+55n0p+Zrx4xl8Cvh3gaMnaO4eD1wJ3KW5fc/lnLtO/2BO4hfwaOCKodvnAuf2kONfgCcBu4BjmrFjgF3N9juBTUOP39Xcvwl459D4HR63hDzHAZ8BngB8svkF/9HQC/u2eWteSI9utg9pHpfZczn8uCXkmmHwBzqzxnufN26/pPJRzTx8Enhy3/MGrJn1R2Ysc9Xc982h8Ts8bjHZZt33DOBDzfZB54Q5Xr/z/b4uNR9wKfAQYA+3l0fvc8fgD/4TD/K4ZZm7lXjYaq5rqC+b5nDFQ4EvAveqqh82d/0HcK9me66cXeV/K/AXwC+b2/cA/qsG11+Z/XNuy9Dcv795fBfZTgT2Ae/J4JDau5LcjQmYt6q6Efhb4HvADxnMwzVMxrwNG9dcHdtsd5XzRQz+j3wx2eb7fV20JGcAN1bVV2fdNQlzdz/gsc3hps8lefgisy1q7lZiefQqyW8AHwVeUVU3D99Xg9pf9nOnkzwVuKmqrlnunz2CQxjsrr+jqh4K/JTBoZfb9DhvRwJnMCi43wLuBmxY7hxt9DVXC0nyGuBW4EN9ZzkgyeHAq4HX9Z1lDocw2Ot9FPDnwCVt1lGWaiWWx5Kvob5YSe7MoDg+VFUfa4b/M8kxzf3HADctkLOL/I8BnpZkD3Axg0NXbwOOSHLggmHDP+e2DM39M8CPO8q2F9hbVV9sbl/KoEwmYd6eCHynqvZV1c+BjzGYy0mYt2Hjmqsbm+2x5kzyAuCpwHObcltMth8z97wv1n0Z/I/BV5vXxnHAl5LcexH5upi7vcDHauAqBkcNjl5EtsXNXdtjgtP+xaCtb2DwS3Fg0eiBy/BzA7wfeOus8Tdzx8XMNzXbp3PHBbmrmvGjGKwBHNl8fQc4aow5H8ftC+Yf4Y6LaC9ptl/KHRd+L2m2H8gdF+puYDwL5v8OnNJsv76Zs97nDXgkcC1wePPz3ge8vO9541ePjY9trvjVRd+nLDHbBuAbwOpZjzvonDDP63eueV9Kvln37eH2NY9JmLtzgL9utu/H4JBUlmvuxv5Hchq+GJwp8S0GZx68Zpl+5u8wOFzwNeArzddTGBxv/AyDj5a/cugXLcD5TcavA+uHnutFwO7m64Vjzvk4bi+Pk5pf+N3NL9eBszoOa27vbu4/aej7X9Nk3kWLs0kWyPTbwI5m7j7RvCgnYt6AvwK+CewEPtC8YHubN+AiBusvP2fwf6ZnjXOugPXNv+u3gX9k1okMi8i2m8EfvQOviQsWmhPmeP3ONe9LyTfr/j3cXh6TMHeHAh9snvNLwBOWc+78eBJJUmsrcc1DkrRElockqTXLQ5LUmuUhSWrN8pAktWZ5SJJaszwkSa39P8zuPoTooVGvAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MoSold\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZUAAAD4CAYAAAAkRnsLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAP+0lEQVR4nO3de7BdZX3G8e9jYoUgBhWkbUJNFCY0gxcQb2XQqZc2Gm69qFBvVQbKVKm2ztRgafUP60RrVbBWDRfxQrlFoVKCirYjUwe5yiAQKaggASxRxuCthsivf5ydeDieJPsk7zor+5zvZ+ZM9nr3Oms9ayaTJ+9ae6+VqkKSpBYe1XcASdLMYalIkpqxVCRJzVgqkqRmLBVJUjNz+w7Qt7333rsWLVrUdwxJGinXX3/9D6pqn4njs75UFi1axHXXXdd3DEkaKUnummzc01+SpGYsFUlSM5aKJKkZS0WS1IylIklqxlKRJDVjqUiSmrFUJEnNzPovP0qtLVpx2aTjd65cPs1JpOnnTEWS1IylIklqxlKRJDVjqUiSmrFUJEnNWCqSpGYsFUlSM5aKJKkZS0WS1IzfqJdmiK19kx/8Nr+mjzMVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc3MyOepJDkGWA48Djirqr7UcyRJmhU6K5Uk+wGfAvYFClhVVaft4LbOBo4A7q+qgya8tww4DZgDnFlVK6vqEuCSJI8H3g9YKtpiaw+z8kFW0s7r8vTXJuBtVbUUeB7wpiRLx6+Q5ElJ9pwwtv8k2zoHWDZxMMkc4CPAy4ClwHET9nHq4H1J0jTorFSq6r6qumHw+sfAWmDBhNVeyNiM4jEASU4APjzJtq4EHphkN88B7qiq71TVRuB84OiMeS9w+eYMkqTuTcs1lSSLgIOBq8ePV9VFSRYDFyS5CHgj8NIpbHoBcPe45XXAc4GTgZcA85PsX1UfmyTTkcCR++8/2cRIkrQjOv/0V5LHAp8F3lpVD058v6reB/wf8FHgqKr6yc7us6pOr6pnVdVJkxXKYJ1Lq+rE+fPn7+zuJEkDnZZKkkczVijnVtXntrLO4cBBwMXAO6e4i3uA/cYtLxyMSZJ60FmpJAlwFrC2qj6wlXUOBlYBRwNvAJ6Y5N1T2M21wAFJFif5DeBY4PM7l1yStKO6nKkcBrwWeFGSGwc/L5+wzjzglVX17ap6GHgdcNfEDSU5D7gKWJJkXZLjAapqE/Bm4IuMfRDgwqq6pbtDkiRtS2cX6qvqv4FsZ52vTVh+CDhjkvWO28Y21gBrdjCmJKkhb9MiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZub2HUCz06IVl006fufK5dOcRFJLzlQkSc1YKpKkZoYqlSRP6zqIJGn0DTtT+dck1yT5yyTzO00kSRpZQ5VKVR0OvBrYD7g+yb8leWmnySRJI2foaypVdTtwKvB24IXA6Um+leSPuwonSRotw15TeXqSDwJrgRcBR1bV7w5ef7DDfJKkETLs91Q+DJwJvKOqfr55sKruTXJqJ8kkSSNn2FJZDvy8qn4JkORRwG5V9bOq+nRn6SRJI2XYaypfBnYftzxvMCZJ0hbDlspuVfWTzQuD1/O6iSRJGlXDlspPkxyyeSHJs4Cfb2N9SdIsNOw1lbcCFyW5Fwjwm8CrOkslSRpJQ5VKVV2b5EBgyWDotqp6qLtYkqRRNJVb3z8bWDT4nUOSUFWf6iSVJGkkDVUqST4NPBW4EfjlYLgAS0WStMWwM5VDgaVVVV2GkSSNtmFL5WbGLs7f12EWSbs4n9ip7Rm2VPYGbk1yDfCLzYNVdVQnqSRJI2nYUnlXlyEkSTPDsB8p/mqSJwMHVNWXk8wD5nQbTZI0aoa99f0JwGrg44OhBcAlXYWSJI2mYW/T8ibgMOBB2PLArid1FUqSNJqGLZVfVNXGzQtJ5jL2PRVJkrYYtlS+muQdwO6DZ9NfBFzaXSxJ0igatlRWAOuBbwJ/Aaxh7Hn1kiRtMeynvx4Gzhj8SJI0qWHv/fVdJrmGUlVPaZ5IkjSypnLvr812A14BPKF9HEnSKBvqmkpV/XDczz1V9SHAm/1Ikh5h2NNfh4xbfBRjM5epPItFkjQLDFsM/zzu9SbgTuCVzdNIkkbasJ/++v2ug0iSRt+wp7/+ZlvvV9UH2sSRJI2yqXz669nA5wfLRwLXALd3EUqSNJqGLZWFwCFV9WOAJO8CLquq13QVTP3yCX+SdsSwt2nZF9g4bnnjYEySpC2Gnal8CrgmycWD5WOAT3YTSZI0qob99Nc/JrkcOHww9Iaq+kZ3sSRJo2jY018A84AHq+o0YF2SxR1l2mFJjklyRpILkvxB33kkabYZ9nHC7wTeDpwyGHo08JmuQk3Y99lJ7k9y84TxZUluS3JHkhUAVXVJVZ0AnAS8ajrySZJ+ZdiZyh8BRwE/Baiqe4E9uwo1wTnAsvEDSeYAHwFeBiwFjkuydNwqpw7elyRNo2FLZWNVFYPb3yfZo7tIj1RVVwIPTBh+DnBHVX1n8Jjj84GjM+a9wOVVdcPWtpnkxCTXJblu/fr13YWXpFlm2FK5MMnHgb2SnAB8mX4f2LUAuHvc8rrB2MnAS4A/TXLS1n65qlZV1aFVdeg+++zTbVJJmkW2++mvJAEuAA4EHgSWAP9QVVd0nG3Kqup04PS+c0jSbLXdUqmqSrKmqp4G7CpFcg+w37jlhYMxSVKPhj39dUOSZ3eaZGquBQ5IsjjJbwDH8qv7kkmSejJsqTwX+HqSbye5Kck3k9zUZbDNkpwHXAUsSbIuyfFVtQl4M/BFYC1wYVXdMh15JElbt83TX0l+p6q+B/zhNOX5NVV13FbG1wBrpjmOJGkbtndN5RLG7k58V5LPVtWfTEcoSdJo2t7pr4x7/ZQug0iSRt/2SqW28lqSpF+zvdNfz0jyIGMzlt0HrxksV1U9rtN0kqSRss1Sqao50xVEkjT6pnLre0mStmnWlkqSI5Os2rBhQ99RJGnGmLWlUlWXVtWJ8+fP7zuKJM0Ywz6jXpJmtEUrLpt0/M6Vy6c5yWibtTMVSVJ7lookqRlLRZLUjKUiSWrGUpEkNWOpSJKasVQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWpm1paKt76XpPZmbal463tJas9b30tSj2baLfdn7UxFktSepSJJasZSkSQ1Y6lIkpqxVCRJzVgqkqRmLBVJUjOWiiSpGUtFktSMpSJJasZSkSQ1Y6lIkpqxVCRJzVgqkqRmZm2p+JAuSWpv1paKD+mSpPZmbalIktqzVCRJzVgqkqRmLBVJUjOWiiSpGUtFktSMpSJJasZSkSQ1Y6lIkpqxVCRJzVgqkqRm5vYdQJK2Z9GKyyYdv3Pl8mlOou1xpiJJasZSkSQ1MyNLJclTkpyVZHXfWSRpNum0VJLslWR1km8lWZvk+Tu4nbOT3J/k5kneW5bktiR3JFkBUFXfqarjdza/JGlqup6pnAZ8oaoOBJ4BrB3/ZpInJdlzwtj+k2znHGDZxMEkc4CPAC8DlgLHJVnaJrokaao6K5Uk84EXAGcBVNXGqvrRhNVeCFyS5DGD3zkB+PDEbVXVlcADk+zmOcAdg5nJRuB84Ogh8/k4YUlqrMuZymJgPfCJJN9IcmaSPcavUFUXAV8ELkjyauCNwCumsI8FwN3jltcBC5I8McnHgIOTnDLZL/o4YUlqr8vvqcwFDgFOrqqrk5wGrAD+fvxKVfW+JOcDHwWeWlU/2dkdV9UPgZN2djuSNOqm+zs+Xc5U1gHrqurqwfJqxkrmEZIcDhwEXAy8c4r7uAfYb9zywsGYJKkHnc1Uqur7Se5OsqSqbgNeDNw6fp0kBwOrgCOA7wLnJnl3VZ065G6uBQ5IspixMjkW+LNmB7EdfstXkh6p609/ncxYUdwEPBN4z4T35wGvrKpvV9XDwOuAuyZuJMl5wFXAkiTrkhwPUFWbgDczdl1mLXBhVd3S2dFIkrap03t/VdWNwKHbeP9rE5YfAs6YZL3jtrGNNcCanYgpSWpkRn6jXpLUD0tFktSMpSJJasZSkSQ1Y6lIkpqxVCRJzVgqkqRmLBVJUjOWiiSpGUtFktSMpSJJaqbTe39p52ztLsjgnZAl7ZqcqUiSmrFUJEnNWCqSpGYsFUlSM7O2VJIcmWTVhg0b+o4iSTPGrC2Vqrq0qk6cP39+31EkacaYtaUiSWrPUpEkNWOpSJKaSVX1naFXSdYDd/WdY0h7Az/oO0RHPLbRNZOPz2PbuidX1T4TB2d9qYySJNdV1aF95+iCxza6ZvLxeWxT5+kvSVIzlookqRlLZbSs6jtAhzy20TWTj89jmyKvqUiSmnGmIklqxlKRJDVjqezikuyX5L+S3JrkliRv6TtTa0nmJPlGkv/oO0trSfZKsjrJt5KsTfL8vjO1kuSvB38nb05yXpLd+s60M5KcneT+JDePG3tCkiuS3D748/F9ZtxRWzm2fxr8vbwpycVJ9mqxL0tl17cJeFtVLQWeB7wpydKeM7X2FmBt3yE6chrwhao6EHgGM+Q4kywA/go4tKoOAuYAx/abaqedAyybMLYC+EpVHQB8ZbA8is7h14/tCuCgqno68D/AKS12ZKns4qrqvqq6YfD6x4z9o7Sg31TtJFkILAfO7DtLa0nmAy8AzgKoqo1V9aN+UzU1F9g9yVxgHnBvz3l2SlVdCTwwYfho4JOD158EjpnWUI1MdmxV9aWq2jRY/DqwsMW+LJURkmQRcDBwdb9JmvoQ8LfAw30H6cBiYD3wicHpvTOT7NF3qBaq6h7g/cD3gPuADVX1pX5TdWLfqrpv8Pr7wL59hunQG4HLW2zIUhkRSR4LfBZ4a1U92HeeFpIcAdxfVdf3naUjc4FDgI9W1cHATxnd0yePMLi2cDRjxfnbwB5JXtNvqm7V2PcvZtx3MJL8HWOn2c9tsT1LZQQkeTRjhXJuVX2u7zwNHQYcleRO4HzgRUk+02+kptYB66pq88xyNWMlMxO8BPhuVa2vqoeAzwG/13OmLvxvkt8CGPx5f895mkry58ARwKur0ZcWLZVdXJIwdk5+bVV9oO88LVXVKVW1sKoWMXaR9z+rasb8b7eqvg/cnWTJYOjFwK09Rmrpe8Dzkswb/B19MTPkQwgTfB54/eD164F/7zFLU0mWMXbq+aiq+lmr7Voqu77DgNcy9r/4Gwc/L+87lIZ2MnBukpuAZwLv6TlPE4PZ12rgBuCbjP1bMtK3NElyHnAVsCTJuiTHAyuBlya5nbHZ2co+M+6orRzbvwB7AlcM/l35WJN9eZsWSVIrzlQkSc1YKpKkZiwVSVIzlookqRlLRZLUjKUiSWrGUpEkNfP/Zywl7/Nce3oAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "YrSold\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAAD4CAYAAAAgs6s2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQzUlEQVR4nO3de7CcdX3H8feXBAwRCAKiDKAHkIIZRUCwWJuhMmYayXCryGW0KtpQRnFkWjtGSwvt6FTsiG0opYA4Xkq5iaRQw3BpqY4WuRoiECOXggSpeBkTsZTrt388z4nrZs85z57ze85mz3m/Znay++yzv+dznt09n30uZxOZiSRJJW016ACSpJnHcpEkFWe5SJKKs1wkScVZLpKk4uYOOsCg7bLLLjkyMjLoGJI0VO66666fZubLx7p/1pfLyMgId95556BjSNJQiYhHx7vf3WKSpOIsF0lScZaLJKk4y0WSVJzlIkkqznKRJBVnuUiSirNcJEnFzfo/opQ0vEaWf73n9Ec+vXSak6ibWy6SpOIsF0lScZaLJKk4y0WSVJzlIkkqznKRJBVnuUiSivPvXNQq/w5Bmp3ccpEkFWe5SJKKs1wkScVZLpKk4iwXSVJxloskqTjLRZJUnOUiSSrOcpEkFWe5SJKKs1wkScVZLpKk4iwXSVJxfivyFPiNv5LUm1sukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTi/At9aQvlN0BomM3IcomIY4GlwA7AJZl544AjSdKs0tpusYiYFxG3R8Q9EXFfRPzVFMb6QkQ8GRH39rhvSUSsi4gHI2I5QGauzMxlwGnAiZP/KSRJk9HmMZdngCMy8w3AgcCSiDisc4aI2DUitu+a9poeY30RWNI9MSLmAOcDbwcWAidHxMKOWc6s75ckTaPWyiUrT9U3t64v2TXb4cDKiHgJQEQsA87rMdY3gZ/3WMybgAcz8+HMfBa4HDgmKucA12fm3b3yRcRREXHRhg0bJvPjSZLG0erZYhExJyJWA08CN2XmbZ33Z+ZVwA3AFRHxLuD9wDv7WMTuwGMdt9fX0z4MvA04PiJO6/XAzLwuM09dsGBBH4uTJDXR6gH9zHwBODAidgSuiYjXZea9XfN8JiIuBy4A9unY2pnKclcAK6Y6jiRpcqbl71wy8xfALfQ+brIIeB1wDXBWn0M/DuzZcXuPepokaYDaPFvs5fUWCxGxLbAY+H7XPAcBFwHHAKcAO0fEJ/tYzB3AvhGxV0RsA5wEXFsivyRp8trcctkNuCUi1lCVwE2Z+W9d88wHTsjMhzLzReA9wKPdA0XEZcCtwH4RsT4iPgCQmc8Dp1Mdt1kLXJmZ97X2E0mSGmntmEtmrgEOmmCeb3fdfg64uMd8J48zxipg1SRjSpJa4HeLSZKKs1wkScVZLpKk4iwXSVJxloskqTjLRZJUnOUiSSrOcpEkFWe5SJKKs1wkScVZLpKk4iwXSVJxloskqTjLRZJUnOUiSSrOcpEkFWe5SJKKs1wkScVZLpKk4iwXSVJxloskqTjLRZJUnOUiSSrOcpEkFWe5SJKKs1wkScVZLpKk4iwXSVJxloskqTjLRZJUnOUiSSrOcpEkFWe5SJKKa1QuEfH6toNIkmaOplsu/xgRt0fEByNiQauJJElDr1G5ZOYi4F3AnsBdEfEvEbG41WSSpKHV+JhLZj4AnAl8DDgcWBER34+IP2grnCRpODU95nJARHwOWAscARyVma+tr3+uxXySpCE0t+F85wGfBz6RmU+PTszMH0XEma0kkyQNrablshR4OjNfAIiIrYB5mfm/mfmV1tJJkoZS02MuNwPbdtyeX0+TJGkzTctlXmY+NXqjvj6/nUiSpGHXtFx+FREHj96IiDcCT48zvyRpFmt6zOUM4KqI+BEQwCuBE1tLJUkaao3KJTPviIj9gf3qSesy87n2YkmShlnTLReAQ4GR+jEHRwSZ+eVWUkmShlqjcomIrwD7AKuBF+rJCVgukqTNNN1yOQRYmJnZZhhJ0szQ9Gyxe6kO4kuSNKGmWy67APdHxO3AM6MTM/PoVlJJkoZa03I5u80QkqSZpempyN+IiFcD+2bmzRExH5jTbjRJ0rBq+pX7y4CvAhfWk3YHVrYVSpI03Joe0P8Q8BZgI2z6j8N2bSuUJGm4NS2XZzLz2dEbETGX6u9cJEnaTNNy+UZEfALYNiIWA1cB17UXS5I0zJqWy3LgJ8D3gD8GVgH+D5SSpJ6ani32InBxfZEkaVxNv1vsv+lxjCUz9y6eSJI09Pr5brFR84B3AjuVjyNJmgkaHXPJzJ91XB7PzL8DlracTZI0pJruFju44+ZWVFsy/fxfMJKkWaRpQXy24/rzwCPACcXTSJJmhKZni7217SCSpJmj6W6xPxnv/sw8t0wcSdJM0M/ZYocC19a3jwJuBx5oI5Qkabg1LZc9gIMz85cAEXE28PXMfHdbwSRJw6vp17+8Ani24/az9TRJkjbTdMvly8DtEXFNfftY4EvtRJIkDbumZ4t9KiKuBxbVk07JzO+2F0uSNMya7hYDmA9szMy/B9ZHxF4tZZIkDbmm/83xWcDHgI/Xk7YG/rmtUJKk4dZ0y+U44GjgVwCZ+SNg+7ZCSZKGW9NyeTYzk/pr9yPipe1FkiQNu6blcmVEXAjsGBHLgJvxPw6TJI1hwrPFIiKAK4D9gY3AfsBfZuZNLWeTJA2pCcslMzMiVmXm6wELRZI0oaa7xe6OiENbTSJJmjGa/oX+bwPvjohHqM4YC6qNmgPaCiZJGl7jlktEvCozfwj8/jTlkSTNABNtuayk+jbkRyPi6sx8x3SEkiQNt4mOuUTH9b3bDCJJmjkmKpcc47okSWOaaLfYGyJiI9UWzLb1dfj1Af0dWk0nSRpK45ZLZs6ZriCSpJmjn6/clySpEctFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKm7uoAO0ISKOBZYCOwCXZOaNA44kSbNKa1suEbFnRNwSEfdHxH0R8ZEpjPWFiHgyIu7tcd+SiFgXEQ9GxHKAzFyZmcuA04ATJ/9TSJImo83dYs8Df5qZC4HDgA9FxMLOGSJi14jYvmvaa3qM9UVgSffEiJgDnA+8HVgInNy1jDPr+yVJ06i1csnMJzLz7vr6L4G1wO5dsx0OrIyIlwBExDLgvB5jfRP4eY/FvAl4MDMfzsxngcuBY6JyDnD9aIZuEXFURFy0YcOGSf6EkqSxTMsB/YgYAQ4CbuucnplXATcAV0TEu4D3A+/sY+jdgcc6bq+vp30YeBtwfESc1uuBmXldZp66YMGCPhYnSWqi9QP6EbEdcDVwRmZu7L4/Mz8TEZcDFwD7ZOZTU11mZq4AVkx1HEnS5LS65RIRW1MVy6WZ+bUx5lkEvA64Bjirz0U8DuzZcXuPepokaYDaPFssgEuAtZl57hjzHARcBBwDnALsHBGf7GMxdwD7RsReEbENcBJw7dSSS5Kmqs0tl7cAfwgcERGr68uRXfPMB07IzIcy80XgPcCj3QNFxGXArcB+EbE+Ij4AkJnPA6dTHbdZC1yZmfe19yNJkppo7ZhLZn4LiAnm+XbX7eeAi3vMd/I4Y6wCVk0ypiSpBX79iySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxVkukqTiLBdJUnGWiySpuLmDDiBJas/I8q/3nP7Ip5e2uly3XCRJxVkukqTiLBdJUnGWiySpOMtFklSc5SJJKs5ykSQVZ7lIkoqzXCRJxUVmDjrDQEXET4BHJ/nwXYCfFoxTirn6Y67+mKs/MzXXqzPz5WPdOevLZSoi4s7MPGTQObqZqz/m6o+5+jNbc7lbTJJUnOUiSSrOcpmaiwYdYAzm6o+5+mOu/szKXB5zkSQV55aLJKk4y0WSVF5mzsoLsCdwC3A/cB/wkXr6TsBNwAP1vy+rpwewAngQWAMc3DHWq4AbgbX1eCM9lvcS4Ir68bf1mmdAud4H/ARYXV/+qM1cwFs7lrUa+D/g2EGvrz5yNVpfLTyXn6nHWFvPEz2W13PcLSDX2cDjHevsyGnIdQ5wb305cYzlDeI92SRXo9fYJHLtD9wKPAN8tGusJcC6OvPyqayvTfOPd+dMvgC78etfLNsDPwAW1m+W5fX05cA59fUjgevrF85hwG0dY/0nsLi+vh0wv8fyPgj8U339JOCKLSTX+4B/mM711THmTsDPt5T11TBXo/VVMhvwO8C3gTn15Vbg93osr+e4W0Cus+n6ZdZyrqVUv1TnAi8F7gB2GPRrrI9cjV5jk8i1K3Ao8KnO56N+7h4C9ga2Ae4BFk52fW2av8mbZDZcgH8FFlO1924dT966+vqFwMkd86+r718IfKvB+DcAb66vz6X6y9jNPuUNIFejF3KpXF1jnApcuiWsrz5yTWp9TfG5fDNwF7AtMB+4E3htj/F7jrsF5DqbBuVSMNefAX/RMf0S4IRBv8b6yDWp19hEucZ6Purn8YaO2x8HPj7V9eUxFyAiRoCDqDb1XpGZT9R3/Q/wivr67sBjHQ9bX0/7LeAXEfG1iPhuRPxtRMzpsZhNj8/M54ENwM5bQC6Ad0TEmoj4akTsOV6mArk6nQRcNsZipnt9Nc0Ffa6vqWbLzFupdn88UV9uyMy1PRYz1riDzgVwer3OvhARL2szF9Un7yURMT8idqHa5dnreZru11jTXNDOe3IsTd4TvzFfk/U168slIrYDrgbOyMyNnfdlVdE5wRBzgUXAR6k2Ofem+uQxLLmuo9p3egDVJvuXWs41Os5uwOupPg1N2TTm6mt9lcgWEa8BXgvsQfUGPyIiFo33mIbjTleuC4B9gAOpSuizbebKzBuBVcB/UX1IuBV4YbzHNDGNuQbynixtVpdLRGxN9aRcmplfqyf/uP4FM/qL5sl6+uP85qeMPepp64HVmflw3eYrgYN7LG7T4yNiLrAA+Nmgc2XmzzLzmfrm54E39spUMNeoE4BrMvO5MRY33eurUa5+1lfBbMcB38nMpzLzKar9+W/usbixxh1orsz8cWa+kJkvAhcDb2o5F5n5qcw8MDMXUx37+EGPxU37a6xJrhbfk2OZ6D2x2XwTrS+YxeUSEUG1z3NtZp7bcde1wHvr6++l2o85Ov09UTkM2FBvet4B7BgRo98OegTV2RvdOsc9HviP+lPFQHONvghrR1Od9bOZgrlGncz4u56me301ytV0fRXO9kPg8IiYW/8yOXyM5Y417kBzda2z46jOlGotV0TMiYid6zEPAA6gOmuy27S+xprmavE9OZY7gH0jYq+I2IZqt/C1PeZrtL42GetgzEy/AL9Ltbm4ho5TJKn2If471Wl8NwM71fMHcD7VWRXfAw7pGGtxPc73gC8C29TT/xo4ur4+D7iK6jS+24G9t5Bcf0N1GuM9VPvP95+GXCNUn4K26lrGoNdXk1yN1lfJbFRn81zIr08pP7djGZ/vmK/nuFtArq/Uj1tD9Qtqt5Zzzavz3A98BzhwS3iN9ZGrrffkK6n2aGwEflFf36G+70iqraiHgD+fyvoavfj1L5Kk4mbtbjFJUnssF0lScZaLJKk4y0WSVJzlIkkqznKRJBVnuUiSivt/DWQ0nmv/P5kAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "SalePrice\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAARW0lEQVR4nO3de5AlZXnH8e8jK1fNgLIx1C44ECjMVml0HRHLSxITFSSLiZK4Gy0tQ9h4S8Xyj2QRy5g/UoWpihciKhvFWxRENMgKFoomWpWygAVRQVxdcQ2LF1YsB0MsV/DJH93z7mGcS8/s6dOnZ76fqqnp854+3c/OOTu/eft9uzsyE0mSAB7WdQGSpPFhKEiSCkNBklQYCpKkwlCQJBVrui7gYBx77LE5OTnZdRmS1Cs333zzjzNz7VzP9ToUJicn2blzZ9dlSFKvRMT35nvOw0eSpMJQkCQVhoIkqTAUJElFL0MhIjZFxPbp6emuS5GkFaWXoZCZOzJz68TERNelSNKK0stQkCS1w1CQJBW9PnlttZjcds2c7XsuPGvElUha6ewpSJIKQ0GSVBgKkqTCMYUxMt/YgSSNij0FSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqPHltBfICepKWy1DogGcuSxpXhkKPLTVc7EFIWoxjCpKkwlCQJBWGgiSpGKsxhYj4E+As4DeA92XmZzsuSZJWldZ7ChFxaUTcExG3zWo/IyJ2RcTuiNgGkJlXZeZ5wCuBF7ddmyTpoUZx+OgDwBmDDRFxCHAxcCawAdgSERsGVnlj/bwkaYRaD4XM/BLwk1nNpwG7M/POzNwPXA68ICpvAT6Tmbe0XZsk6aG6GmheB9w18Hhv3fY3wB8B50TEK+d6YURsjYidEbFz37597VcqSavIWA00Z+ZFwEWLrLMd2A4wNTWVo6hLklaLrnoKdwPHDzxeX7dJkjrUVSjcBJwSESdGxKHAZuDqjmqRJNVGMSX1MuDLwKkRsTcizs3MB4DXAtcBdwBXZObtS9jmpojYPj093U7RkrRKtT6mkJlb5mm/Frh2mdvcAeyYmpo672BqkyQ9lJe5kCQVhoIkqehlKDimIEntGKvzFJpyTGG4vPmOpBm97ClIktphKEiSCkNBklT0MhQcaJakdvQyFDJzR2ZunZiY6LoUSVpRehkKkqR2GAqSpKKX5ymoW57XIK1chsIQrNRfkvP9uyStXL08fOTsI0lqRy97Cn25zIV/aUvqm172FCRJ7TAUJEmFoSBJKgwFSVJhKEiSil6GglNSJakdTknV0Cw0BbfvJ/JJq0UvewqSpHYYCpKkwlCQJBWGgiSpMBQkSYWhIEkqehkKnqcgSe3oZShk5o7M3DoxMdF1KZK0ovQyFCRJ7TAUJEmFoSBJKnp57SP1z1JvTeq1kqRu2FOQJBWGgiSpMBQkSYWhIEkqDAVJUtHLUPAyF5LUjl6Ggpe5kKR29DIUJEntMBQkSYWhIEkqDAVJUmEoSJIKL4insTTfBfTmu1DeUteXNLdGPYWIeHzbhUiSutf08NG7IuLGiHh1RHhygCStUI1CITOfCbwEOB64OSI+GhHPabUySdLINR5ozsxvA28E/h74PeCiiPhmRLywreIkSaPVdEzhCRHxNuAO4NnApsz8nXr5bS3WJ0kaoaazj/4VeC/whsz8+UxjZn4/It7YSmWSpJFrGgpnAT/PzAcBIuJhwOGZ+X+Z+eHWqpMkjVTTMYXrgSMGHh9Zt0mSVpCmoXB4Zv7vzIN6+ch2Slqc91OQpHY0DYX7I2LjzIOIeDLw8wXWb5X3U5CkdjQdU3gd8PGI+D4QwG8BL26tKklSJxqFQmbeFBGPA06tm3Zl5i/bK0uS1IWlXBDvKcBk/ZqNEUFmfqiVqiRJnWgUChHxYeC3gVuBB+vmBAwFSVpBmvYUpoANmZltFiNJ6lbT2Ue3UQ0uS5JWsKY9hWOBb0TEjcAvZhoz8+xWqpKGxJvvSEvTNBTe3GYRkqTx0HRK6hcj4rHAKZl5fUQcCRzSbmmSpFFreuns84ArgUvqpnXAVW0VJUnqRtOB5tcATwfug3LDnd9sqyhJUjeahsIvMnP/zIOIWEN1noIkaQVpGgpfjIg3AEfU92b+OLCjvbIkSV1oGgrbgH3A14G/Bq6lul+zJGkFaTr76FfAv9Vfq9J88901Wr4PUruaXvvou8wxhpCZJw29IklSZ5Zy7aMZhwN/Bjxq+OVIkrrUaEwhM+8d+Lo7M98OeJ0ASVphmh4+2jjw8GFUPYel3ItBktQDTX+x/8vA8gPAHuDPh16NJKlTTWcf/UHbhUiSutf08NHrF3o+M996sIVExEnABcBEZp5zsNtbLqc8rg5LvaS2l+DWatH05LUp4FVUF8JbB7wS2Ag8sv6aU0RcGhH3RMRts9rPiIhdEbE7IrYBZOadmXnucv4RkqThaDqmsB7YmJk/A4iINwPXZOZLF3ndB4B3MnAv54g4BLgYeA6wF7gpIq7OzG8srXRJ0rA17Sk8Btg/8Hh/3bagzPwS8JNZzacBu+uewX7gcuAFDesgIrZGxM6I2Llv376mL5MkNdA0FD4E3BgRb657CTcAH1zmPtcBdw083gusi4hHR8R7gCdFxPnzvTgzt2fmVGZOrV27dpklSJLm0nT20T9FxGeAZ9ZNr8jMrwyzkMy8l2qsQpLUkaY9BYAjgfsy8x3A3og4cZn7vBs4fuDx+rpNktSxplNS/4FqBtKpwPuBhwP/TnU3tqW6CTilDpW7gc3AXyxlAxGxCdh08sknL2P30vA4VVUrTdOewp8CZwP3A2Tm91lgKuqMiLgM+DJwakTsjYhzM/MB4LXAdcAdwBWZeftSis7MHZm5dWJiYikvkyQtoumU1P2ZmRGRABFxVJMXZeaWedqvpbpRjyRpjDTtKVwREZcAR0fEecD1rOIb7kjSSrVoTyEiAvgY8DjgPqpxhTdl5udark2SNGKLhkJ92OjazHw8MBZB4ECzJLWj6eGjWyLiKa1WsgQONEtSO5oOND8VeGlE7KGagRRUnYgntFWYJGn0FgyFiDghM/8HeN6I6pEkdWixnsJVVFdH/V5EfCIzXzSKoiRJ3VhsTCEGlk9qs5CliIhNEbF9enq661IkaUVZLBRynuVOOdAsSe1Y7PDR70bEfVQ9hiPqZTgw0PwbrVYnSRqpBUMhMw8ZVSGSpO4t5dLZkqQVzlCQJBW9DAVnH0lSO3oZCs4+kqR29DIUJEntMBQkSYWhIEkqDAVJUmEoSJKKpvdTGCvDuPPa5LZrhleQNMtCn689F541wkqkpellT8EpqZLUjl6GgiSpHYaCJKkwFCRJhaEgSSoMBUlSYShIkopVe56CNJdRnL8yrH14voPa0MuegucpSFI7ehkKkqR2GAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklT0MhQiYlNEbJ+enu66FElaUXoZCl7mQpLa0ctQkCS1w1CQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJhaEgSSoMBUlSsabrApYjIjYBm04++eSuS5E6M7ntmjnb91x41ogrWdgo6uzLz2I5Rv1v62VPwfspSFI7ehkKkqR2GAqSpMJQkCQVhoIkqTAUJEmFoSBJKgwFSVJhKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkSYWhIEkqDAVJUmEoSJIKQ0GSVBgKkqTCUJAkFYaCJKkwFCRJxZquC5gREUcB7wL2A/+VmR/puCRJWnVa7SlExKURcU9E3Dar/YyI2BURuyNiW938QuDKzDwPOLvNuiRJc2v78NEHgDMGGyLiEOBi4ExgA7AlIjYA64G76tUebLkuSdIcWj18lJlfiojJWc2nAbsz806AiLgceAGwlyoYbmWBsIqIrcBWgBNOOGH4RUs9N7ntmqFsZ8+FZy1p+/Otvxyj2Ifm1sVA8zoO9AigCoN1wCeBF0XEu4Ed8704M7dn5lRmTq1du7bdSiVplRmbgebMvB94Rdd1SNJq1kVP4W7g+IHH6+s2SVLHugiFm4BTIuLEiDgU2AxcvZQNRMSmiNg+PT3dSoGStFq1PSX1MuDLwKkRsTcizs3MB4DXAtcBdwBXZObtS9luZu7IzK0TExPDL1qSVrG2Zx9tmaf9WuDaNvctSVo6L3MhSSp6GQqOKUhSO3oZCo4pSFI7IjO7rmHZIuJnwK6u62joWODHXRfRgHUOX19qtc7hGuc6H5uZc579OzYnry3Trsyc6rqIJiJiZx9qtc7h60ut1jlcfalztl4ePpIktcNQkCQVfQ+F7V0XsAR9qdU6h68vtVrncPWlzofo9UCzJGm4+t5TkCQNkaEgSTogM3v5RXWbz13AbmBbi/u5FLgHuG2g7VHA54Bv19+PqdsDuKiu6WvAxoHXvLxe/9vAywfanwx8vX7NRRw4pDfnPhao83jgP4FvALcDfzuOtQKHAzcCX63r/Me6/UTghnrbHwMOrdsPqx/vrp+fHNjW+XX7LuB5i3025tvHIj/XQ4CvAJ8e8zr31O/NrcDOcXzv6/WPBq4Evkl1QcynjVudwKn1z3Hm6z7gdeNWZ2u/80a9w6EUXf1H/Q5wEnAo1S+YDS3t61nARh4aCv88858Y2Aa8pV5+PvCZ+kNyOnDDwBt9Z/39mHp55gN1Y71u1K89c6F9LFDncTMfRuCRwLeo7oE9VrXWr31Evfxwql9+pwNXAJvr9vcAr6qXXw28p17eDHysXt5Qv++HUf0S/U79uZj3szHfPhb5ub4e+CgHQmFc69wDHDurbaze+3qdDwJ/VS8fShUSY1fnrN81PwQeO851DvV33qh3OJSiq78urht4fD5wfov7m+ShobALOK5ePo7qJDqAS4Ats9cDtgCXDLRfUrcdB3xzoL2sN98+llDzp4DnjHOtwJHALcBTqc78XDP7/aW6xPrT6uU19Xox+z2fWW++z0b9mjn3sUB964HPA88GPr3QNrqss15vD78eCmP13gMTwHep/yoe1zpn1fZc4L/Hvc5hfvV1TGG++zyPymMy8wf18g+BxyxS10Lte+doX2gfi4qISeBJVH+Fj12tEXFIRNxKdVjuc1R/Mf80q3ttzN52qad+fhp49DLqf/QC+5jP24G/A35VP15oG13WCZDAZyPi5ojYWreN23t/IrAPeH9EfCUi3hsRR41hnYM2A5ctso1xqHNo+hoKYyOrSM9x2UdEPAL4BPC6zLxvudtZrib7yMwHM/OJVH+JnwY8rs2aliMi/hi4JzNv7rqWhp6RmRuBM4HXRMSzBp8ck/d+DdWh2Hdn5pOA+6kOkSxlGwet6T7qO0OeDXx8uds4GKPYx1z6Ggpd3+f5RxFxHED9/Z5F6lqoff0c7QvtY14R8XCqQPhIZn5ynGsFyMyfUg2OPw04OiJmrsU1uO1ST/38BHDvMuq/d4F9zOXpwNkRsQe4nOoQ0jvGsE4AMvPu+vs9wH9Qhe24vfd7gb2ZeUP9+EqqkBi3OmecCdySmT9aZBtd1zlUfQ2Fg77P80G6mmpWAfX3Tw20vywqpwPTdVfwOuC5EXFMRBxDdZzyuvq5+yLi9IgI4GWztjXXPuZUv/59wB2Z+dZxrTUi1kbE0fXyEVTjHndQhcM589Q5s+1zgC/Uf0FdDWyOiMMi4kTgFKrBuzk/G/Vr5tvHr8nM8zNzfWZO1tv4Qma+ZNzqrH+OR0XEI2eWqd6z2xiz9z4zfwjcFRGn1k1/SDVbbqzqHLCFA4eOFtpG13UO16gHMYb1RTXi/y2q49EXtLify4AfAL+k+kvnXKrjvp+nmjZ2PfCoet0ALq5r+jowNbCdv6SafrYbeMVA+xTVf+DvAO/kwNS0OfexQJ3PoOpqfo0DU+meP261Ak+gmuL5tXpbb6rbT6L6Zbmbqrt+WN1+eP14d/38SQPbuqCuZRf17I2FPhvz7aPBZ+D3OTD7aOzqrNf/Kgem+V6w0PvS1Xtfr/9EYGf9/l9FNStnHOs8iqrXNjHQNnZ1tvHlZS4kSUVfDx9JklpgKEiSCkNBklQYCpKkwlCQJBWGgiSpMBQkScX/Awjo1vcaDrghAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# Analise univariavel\n", + "for coluna in colunas_numericas:\n", + " print(coluna)\n", + " df[coluna].plot.hist(bins = 50, log= True)\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#Analisando a correlacao entre as variaveis númericas\n", + "plt.figure(figsize = (20,20))\n", + "sns.heatmap(df[colunas_numericas].corr().round(2), annot= True)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "correlacionadas = ['GarageArea', 'GarageCars', 'GrLivArea', 'OverallQual']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Analisando as features com yellowbrick" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting yellowbrick\n", + " Downloading yellowbrick-1.1-py3-none-any.whl (263 kB)\n", + "\u001b[K |████████████████████████████████| 263 kB 969 kB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: cycler>=0.10.0 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from yellowbrick) (0.10.0)\n", + "Requirement already satisfied: scipy>=1.0.0 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from yellowbrick) (1.4.1)\n", + "Requirement already satisfied: matplotlib!=3.0.0,>=2.0.2 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from yellowbrick) (3.2.1)\n", + "Requirement already satisfied: numpy>=1.13.0 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from yellowbrick) (1.18.2)\n", + "Requirement already satisfied: scikit-learn>=0.20 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from yellowbrick) (0.22.2.post1)\n", + "Requirement already satisfied: six in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from cycler>=0.10.0->yellowbrick) (1.14.0)\n", + "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (2.4.6)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (1.2.0)\n", + "Requirement already satisfied: python-dateutil>=2.1 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from matplotlib!=3.0.0,>=2.0.2->yellowbrick) (2.8.1)\n", + "Requirement already satisfied: joblib>=0.11 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from scikit-learn>=0.20->yellowbrick) (0.14.1)\n", + "Installing collected packages: yellowbrick\n", + "Successfully installed yellowbrick-1.1\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install yellowbrick" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Removendo a coluna ID\n", + "colunas_numericas.remove('Id')\n", + "df = df[colunas_numericas]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "y_train = df['SalePrice']\n", + "X_train = df.drop(columns = 'SalePrice')" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages/sklearn/utils/deprecation.py:144: FutureWarning: The sklearn.metrics.classification module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.metrics. Anything that cannot be imported from sklearn.metrics is now part of the private API.\n", + " warnings.warn(message, FutureWarning)\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from yellowbrick.features import Rank1D\n", + "\n", + "visualizer = Rank1D(algorithm='shapiro')\n", + "\n", + "visualizer.fit(X_train, y_train) \n", + "visualizer.transform(X_train) \n", + "visualizer.show() " + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from yellowbrick.features import PCA\n", + "\n", + "\n", + "visualizer = PCA(scale=True, proj_features=True, projection=2)\n", + "visualizer.fit_transform(X_train[correlacionadas], y_train)\n", + "visualizer.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages/sklearn/base.py:193: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n", + " warnings.warn('From version 0.24, get_params will raise an '\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from yellowbrick.target import FeatureCorrelation\n", + "\n", + "features = list(X_train.columns)\n", + "\n", + "visualizer = FeatureCorrelation(labels=features)\n", + "\n", + "visualizer.fit(X_train, y_train) \n", + "visualizer.show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Treinando o modelo" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting sklearn\n", + " Using cached sklearn-0.0.tar.gz (1.1 kB)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-0.22.2.post1-cp38-cp38-macosx_10_9_x86_64.whl (7.2 MB)\n", + "\u001b[K |████████████████████████████████| 7.2 MB 6.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: scipy>=0.17.0 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.4.1)\n", + "Requirement already satisfied: numpy>=1.11.0 in /Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages (from scikit-learn->sklearn) (1.18.2)\n", + "Collecting joblib>=0.11\n", + " Using cached joblib-0.14.1-py2.py3-none-any.whl (294 kB)\n", + "Building wheels for collected packages: sklearn\n", + " Building wheel for sklearn (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for sklearn: filename=sklearn-0.0-py2.py3-none-any.whl size=1315 sha256=a06c24060e74cb4e85f9e51f5e3f55cf62c260de007fc4aedc7c860cdc0e3012\n", + " Stored in directory: /Users/tuliosouza/Library/Caches/pip/wheels/22/0b/40/fd3f795caaa1fb4c6cb738bc1f56100be1e57da95849bfc897\n", + "Successfully built sklearn\n", + "Installing collected packages: joblib, scikit-learn, sklearn\n", + "Successfully installed joblib-0.14.1 scikit-learn-0.22.2.post1 sklearn-0.0\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "pip install sklearn" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.linear_model import LinearRegression" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "reg= LinearRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "colunas_treinamento = X_train.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [], + "source": [ + "X_test = pd.read_csv('test.csv')\n", + "y_test = pd.read_csv('sample_submission.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "y_test = y_test['SalePrice']" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "X_test= X_test[colunas_treinamento].fillna(df[colunas_treinamento].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred = reg.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_squared_error" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "erro_normal = mean_squared_error(y_pred=y_pred, y_true=y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4647194215.33722" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "erro_normal" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aplicando o Feature Selection" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.feature_selection import RFE" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [], + "source": [ + "rfe = RFE(reg)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RFE(estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n", + " normalize=False),\n", + " n_features_to_select=None, step=1, verbose=0)" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rfe.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
colunaboolcoeficientes
0MSSubClassTrue-162.672852
1LotAreaFalse0.396228
2OverallQualTrue17905.067194
3OverallCondTrue4418.794796
4YearBuiltTrue346.653503
5YearRemodAddTrue137.073924
6BsmtFinSF1False11.833598
7BsmtFinSF2False-2.728260
8BsmtUnfSFFalse0.787735
9TotalBsmtSFFalse9.893072
101stFlrSFFalse18.837707
112ndFlrSFFalse18.946369
12LowQualFinSFFalse-6.000309
13GrLivAreaFalse31.783767
14BsmtFullBathTrue8534.894057
15BsmtHalfBathTrue2467.200539
16FullBathTrue3577.489051
17HalfBathTrue-1326.861626
18BedroomAbvGrTrue-10530.779326
19KitchenAbvGrTrue-12927.769856
20TotRmsAbvGrdTrue5132.318055
21FireplacesTrue3596.895112
22GarageCarsTrue10633.749904
23GarageAreaFalse1.396213
24WoodDeckSFFalse26.372691
25OpenPorchSFFalse-5.619397
26EnclosedPorchFalse8.722010
273SsnPorchFalse18.771384
28ScreenPorchFalse57.885991
29PoolAreaFalse-42.613687
30MiscValFalse-0.891248
31MoSoldTrue-115.348621
32YrSoldTrue-757.643913
\n", + "
" + ], + "text/plain": [ + " coluna bool coeficientes\n", + "0 MSSubClass True -162.672852\n", + "1 LotArea False 0.396228\n", + "2 OverallQual True 17905.067194\n", + "3 OverallCond True 4418.794796\n", + "4 YearBuilt True 346.653503\n", + "5 YearRemodAdd True 137.073924\n", + "6 BsmtFinSF1 False 11.833598\n", + "7 BsmtFinSF2 False -2.728260\n", + "8 BsmtUnfSF False 0.787735\n", + "9 TotalBsmtSF False 9.893072\n", + "10 1stFlrSF False 18.837707\n", + "11 2ndFlrSF False 18.946369\n", + "12 LowQualFinSF False -6.000309\n", + "13 GrLivArea False 31.783767\n", + "14 BsmtFullBath True 8534.894057\n", + "15 BsmtHalfBath True 2467.200539\n", + "16 FullBath True 3577.489051\n", + "17 HalfBath True -1326.861626\n", + "18 BedroomAbvGr True -10530.779326\n", + "19 KitchenAbvGr True -12927.769856\n", + "20 TotRmsAbvGrd True 5132.318055\n", + "21 Fireplaces True 3596.895112\n", + "22 GarageCars True 10633.749904\n", + "23 GarageArea False 1.396213\n", + "24 WoodDeckSF False 26.372691\n", + "25 OpenPorchSF False -5.619397\n", + "26 EnclosedPorch False 8.722010\n", + "27 3SsnPorch False 18.771384\n", + "28 ScreenPorch False 57.885991\n", + "29 PoolArea False -42.613687\n", + "30 MiscVal False -0.891248\n", + "31 MoSold True -115.348621\n", + "32 YrSold True -757.643913" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame({'coluna':X_train.columns,\n", + " 'bool': rfe.get_support(),\n", + " 'coeficientes': pd.Series(reg.coef_)})" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_importante = rfe.transform(X_train)\n", + "X_test_importante = rfe.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg.fit(X_train_importante, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_imp = reg.predict(X_test_importante)" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [], + "source": [ + "erro_imp = mean_squared_error(y_pred=y_pred_imp, y_true=y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4628516097.925274" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "erro_imp" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages/sklearn/base.py:193: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n", + " warnings.warn('From version 0.24, get_params will raise an '\n", + "/Users/tuliosouza/opt/anaconda3/envs/aceleradev_ds/lib/python3.8/site-packages/sklearn/base.py:193: FutureWarning: From version 0.24, get_params will raise an AttributeError if a parameter cannot be retrieved as an instance attribute. Previously it would return None.\n", + " warnings.warn('From version 0.24, get_params will raise an '\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from yellowbrick.model_selection import RFECV\n", + "\n", + "\n", + "# Instantiate RFECV visualizer with a linear SVM classifier\n", + "visualizer = RFECV(reg)\n", + "\n", + "visualizer.fit(X_train, y_train) # Fit the data to the visualizer\n", + "visualizer.show() # Finalize and render the figure" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Aplicando PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.decomposition import PCA" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "pca = PCA(0.95)" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "PCA(copy=True, iterated_power='auto', n_components=0.95, random_state=None,\n", + " svd_solver='auto', tol=0.0, whiten=False)" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca.fit(X_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.98511677])" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pca.explained_variance_ratio_" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "X_train_pca = pca.transform(X_train)\n", + "X_test_pca = pca.transform(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" + ] + }, + "execution_count": 136, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reg = LinearRegression()\n", + "reg.fit(X_train_pca, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "y_pred_pca = reg.predict(X_test_pca)" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "erro_pca = mean_squared_error(y_pred=y_pred_pca, y_true=y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAegAAAFICAYAAACbTG3iAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAASUUlEQVR4nO3dX2jdd/3H8VeTtDFrujK3idLSaS+iaMZKutuIk4ZuDAZTaaMsyOafi4FOmAakI9PMlTA7vKiujgwGFmeTgYzgNpEyaVjdQDJPatyw/iM48KJDRZONpNk5v4ux+OvmZnp6DvkkeTyu+k1yPt93xgee+Z7z3TmbarVaLQBAUVpWewAA4O0EGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABWpr9ILPPfdcfvazn+X+++/PCy+8kLGxsSTJoUOHcvnll7/j46rVaubn57N58+Zs2rSp0WMBQFFqtVrOnz+frVu3pqXl7dfLDQ307OxsXnrppSwsLCRJxsfHMzw8nDNnzuSpp55Kf3//Oz52fn4+Z8+ebeQ4AFC8rq6ubNu27W1fb2igr7nmmtxxxx35+te/niR5/fXX097enquvvjrPP//8uz528+bNy4Nu2bKlkWOtOzMzM+nu7l7tMVhH7CkazZ763xYXF3P27Nnl/r1Vw5/i/v86OjqyuLiYc+fO5aqrrnrXn33zae0tW7akvb29mWOtC/4b0Wj2FI1mT63MO72su+KbxKanpzMwMJDkjdeLh4aGcvDgwQwMDGR2dva/PubAgQMZGhrKiRMncsstt9QxNgBsTCu6gh4dHc3ExEQ6OjqSJCdPnszi4mLGxsZSqVQyMjKSY8eOLf/8kSNHkiTd3d0ZGRlpwtgAsL6tKNC7du3K0aNHMzg4mCSZmppKb29vkmTPnj2ZmZlp2ECNXGs9m5qaWu0RWGfsKRrNnro0Kwr0/v378/LLLy8fz83NpbOzc/m4tbU1S0tLaWu79Je0u7u7vW7xP0xNTWXv3r2rPQbriD1Fo9lT/9vCwsK7XpTW9UYlnZ2dmZ+fXz6uVqsNiTMA8Ia6At3T05PJyckkSaVSSVdXV0OHAoCNrq7L3r6+vpw+fTr9/f2p1Wo5fPhwo+cCgCRJ693HG7re6w8ONHS9ZllxoHfu3Jnx8fEkSUtLS4aHh5s2FABsdD4sAwAK5M4uAHiL8+fP5957783s7Gyq1Wq+9rWv5b777ssHP/jBbN68Obt3785vfvObvPrqq7n//vtz6tSpPPnkk2lra8v111+fb3zjG5c8gytoAHiLxx9/PFdccUV+/OMf56GHHsrw8HBeffXV3Hnnnfne976XJNm9e3dOnDiRpaWlPP300zlx4kROnDiR2dnZ/PKXv7zkGVxB/w+NvjmhYR57cbUneJu1cuPFarOnVsZ+YjWdPXs2U1NTOXPmTJJkaWkp//jHP/KhD31o+Wfe/Pef//znXHfddcsfenH99dfnD3/4Q2644YZLmsEVNAC8xe7du3PzzTfn+PHjGR0dzY033pjt27df8LnNb/579+7dOXPmTJaWllKr1fLrX//6gpDXyxU0AEVbjWdT+vv7c8899+S2227L3NxcPve5z10Q5//vwx/+cG666aZ89rOfTbVazd69e7Nv375LnkGgAeAttmzZkgceeOCCrx04cGD531/5ylcu+N7tt9+e22+/vaEzeIobAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIHamrn4mTNn8tOf/jRzc3O544478tGPfrSZpwOAdaOpgf7d736XP/3pT0mS97///c08FQCsK00N9Mc+9rF8+tOfzvPPP59Tp07l1ltvbebpADak1ruPr/YI/91jL672BBd4/cGB1R7hojT1NehHH300LS0tee9735t//vOfzTwVAKwrdQd6eno6AwNv/DVSrVYzNDSUgwcPZmBgILOzs0mSm2++OYODgzl+/HhuueWWxkwMABtAXU9xj46OZmJiIh0dHUmSkydPZnFxMWNjY6lUKhkZGcmxY8eyb9++7Nu3r6EDA8BGUFegd+3alaNHj2ZwcDBJMjU1ld7e3iTJnj17MjMzU/dAl/JYVtfU1NRqj8A6Yj/RaGttT9UV6P379+fll19ePp6bm0tnZ+fycWtra5aWltLWdvHLd3d3p729vZ6xmqOwmxxKtnfv3tUeYW2wp1bEfroI9tSKlLanFhYW3vWitCE3iXV2dmZ+fn75uFqt1hVnAOANDQl0T09PJicnkySVSiVdXV2NWBYANqyGXOb29fXl9OnT6e/vT61Wy+HDhxuxLABsWHUHeufOnRkfH0+StLS0ZHh4uGFDAcBG58MyAKBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACtTVz8T/+8Y/50Y9+lMXFxdxxxx3p6upq5ukAYN1o6hX0448/nve9733ZsmVLduzY0cxTAcC60tRAz87O5rbbbsuNN96YJ554opmnAoB1pamBvvLKK3PZZZdl+/btqdVqzTwVAKwrdQd6eno6AwMDSZJqtZqhoaEcPHgwAwMDmZ2dTZL09/fnnnvuyaOPPpqbb765MRMDwAZQ101io6OjmZiYSEdHR5Lk5MmTWVxczNjYWCqVSkZGRnLs2LFce+21eeCBBy5q7ZmZmXpGogBTU1OrPQLriP1Eo621PVVXoHft2pWjR49mcHAwyRu/dG9vb5Jkz549lxTZ7u7utLe31/34hnvsxdWeYM3Yu3fvao+wNthTK2I/XQR7akVK21MLCwvv2su6nuLev39/2tr+0/a5ubl0dnYuH7e2tmZpaamepQGANOgmsc7OzszPzy8fV6vVCwIOAFychgS6p6cnk5OTSZJKpeINSQDgEjXkMrevry+nT59Of39/arVaDh8+3IhlAWDDqjvQO3fuzPj4eJKkpaUlw8PDDRsKADY6H5YBAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABSo6YF+5ZVX8qlPfarZpwGAdaWpga7VannkkUeyY8eOZp4GANadpgb6Jz/5SW655Za0t7c38zQAsO40NdC/+tWvcuLEifz2t7/N008/3cxTAcC6Unegp6enMzAwkCSpVqsZGhrKwYMHMzAwkNnZ2STJ97///QwPD+faa6/NTTfd1JiJAWADaKvnQaOjo5mYmEhHR0eS5OTJk1lcXMzY2FgqlUpGRkZy7Nix5Z8/cuTIiteemZmpZyQKMDU1tdojsI7YTzTaWttTdQV6165dOXr0aAYHB5O88Uv39vYmSfbs2XNJke3u7i7rNevHXlztCdaMvXv3rvYIa4M9tSL200Wwp1aktD21sLDwrr2s6ynu/fv3p63tP22fm5tLZ2fn8nFra2uWlpbqWRoASINuEuvs7Mz8/PzycbVavSDgAMDFaUige3p6Mjk5mSSpVCrp6upqxLIAsGE15DK3r68vp0+fTn9/f2q1Wg4fPtyIZQFgw6o70Dt37sz4+HiSpKWlJcPDww0bCgA2Oh+WAQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUqK2Ziz/33HN58skn89prr+VLX/pSPvKRjzTzdACwbjQ10K+99lruu+++vPTSS3n22WcFGgBWqKlPcX/yk5/Ma6+9luPHj+fWW29t5qkAYF1p6hX03//+93z3u9/NV7/61Vx55ZXNPBUArCt1X0FPT09nYGAgSVKtVjM0NJSDBw9mYGAgs7OzSZKRkZGcO3cuDz74YH7+8583ZmIA2ADquoIeHR3NxMREOjo6kiQnT57M4uJixsbGUqlUMjIykmPHjuWBBx646LVnZmbqGYkCTE1NrfYIrCP2E4221vZUXYHetWtXjh49msHBwSRv/NK9vb1Jkj179lxSZLu7u9Pe3l734xvusRdXe4I1Y+/evas9wtpgT62I/XQR7KkVKW1PLSwsvGsv63qKe//+/Wlr+0/b5+bm0tnZuXzc2tqapaWlepYGANKgu7g7OzszPz+/fFytVi8IOABwcRoS6J6enkxOTiZJKpVKurq6GrEsAGxYDbnM7evry+nTp9Pf359arZbDhw83YlkA2LDqDvTOnTszPj6eJGlpacnw8HDDhgKAjc6HZQBAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABRJoACiQQANAgQQaAAok0ABQIIEGgAIJNAAUSKABoEACDQAFEmgAKJBAA0CBBBoACiTQAFAggQaAAgk0ABRIoAGgQAINAAUSaAAokEADQIEEGgAKJNAAUCCBBoACCTQAFEigAaBAAg0ABWpr5uIvvPBCxsbGkiSHDh3K5Zdf3szTAcC60dQr6PHx8QwPD+czn/lMnnrqqWaeCgDWlaYG+vXXX097e3uuvvrqnDt3rpmnAoB1pamB7ujoyOLiYs6dO5errrqqmacCgHWl7tegp6enc+TIkRw/fjzVajXf+ta38vvf/z5btmzJd77znVxzzTU5cOBAhoaGcv78+QwPD7/rerVaLUmyuLhY70hN8YGtm1d7hDVjYWFhtUdYE+yplbGfVs6eWpnS9tSbvXuzf2+1qfZO33kXo6OjmZiYSEdHR8bHx/OLX/wizzzzTEZGRlKpVPLwww/n2LFjF7Xmv//975w9e/ZiRwGANa2rqyvbtm1729fruoLetWtXjh49msHBwSTJ1NRUent7kyR79uzJzMzMRa+5devWdHV1ZfPmzdm0aVM9YwHAmlGr1XL+/Pls3br1v36/rkDv378/L7/88vLx3NxcOjs7l49bW1uztLSUtraVL9/S0vJf/4IAgPXqPe95zzt+ryE3iXV2dmZ+fn75uFqtXlScAYALNSTQPT09mZycTJJUKpV0dXU1YlkA2LAacpnb19eX06dPp7+/P7VaLYcPH27EsgCwYdV1FzcA0Fw+LAMACiTQa0i1Wl3tEQBWrLQ3nlprBLpwf/3rX3PnnXfm4x//ePbt25dPfOIT+fKXv5y//OUvqz0aQJLkmWeeyQ033JC+vr4LPhjpi1/84ipOtfb5f6EKd+jQodx999257rrrlr9WqVTyzW9+MydOnFjFyQDe8MMf/jBPPPFEqtVq7rrrriwsLOTWW299x7ewZGUEunCLi4sXxDl5493a4FIMDAzk/PnzF3ytVqtl06ZN/vDjom3evDnbt29Pkjz00EP5/Oc/nw984APeFfISuYu7cPfee28WFxfT29ubbdu2ZX5+PqdOncqWLVvy7W9/e7XHY42anp7OPffckx/84AdpbW294Hs7duxYpalYqwYHB3PFFVfkrrvuymWXXZa//e1v+cIXvpB//etfefbZZ1d7vDVLoAtXq9Vy8uTJTE1NLb+lak9PT/r6+vx1yiV55JFHcs0116Svr2+1R2GNW1paysTERG666aZ0dHQkSV555ZU8/PDDOXTo0CpPt3YJNAAUyF3cAFAggQaAAgk0ABRIoAGgQAINAAX6PyMbUszJvontAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pd.DataFrame({'erro' : [erro_normal, erro_imp, erro_pca]}).plot(kind = 'bar', log = True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/Semana 7/.ipynb_checkpoints/aula_7_feature_engineering-checkpoint.ipynb b/Semana 7/.ipynb_checkpoints/aula_7_feature_engineering-checkpoint.ipynb new file mode 100644 index 0000000..4c91e8d --- /dev/null +++ b/Semana 7/.ipynb_checkpoints/aula_7_feature_engineering-checkpoint.ipynb @@ -0,0 +1,5350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MyaSGq65woLh" + }, + "source": [ + "![Codenation](https://forum.codenation.com.br/uploads/default/original/2X/2/2d2d2a9469f0171e7df2c4ee97f70c555e431e76.png)\n", + "\n", + "__Autor__: Kazuki Yokoyama (kazuki.yokoyama@ufrgs.br)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mi4xZxcfBA2U" + }, + "source": [ + "# _Feature engineering_\n", + "\n", + "![cover](https://venturebeat.com/wp-content/uploads/2018/07/feature_engineering.jpg?resize=680%2C198&strip=all)\n", + "\n", + "Neste módulo, trabalharemos a engenharia de _features_, que consiste em preparar os nossos dados para alimentar os algoritmos de ML adequadamente. Ao contrário do mundo dos tutoriais, na vida real os dados dificilmente estarão prontos para serem consumidos. Grande parte do tempo de um projeto de ML é gasto com a engenharia de _features_, e quanto melhor a qualidade desta etapa, maiores são as chances de melhores resultados nas etapas seguintes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "cAxxSlo3QrZV" + }, + "source": [ + "## Importação das bibliotecas" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jMxYy1NkQwW6" + }, + "outputs": [], + "source": [ + "import functools\n", + "from math import sqrt\n", + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "import scipy.stats as sct\n", + "import seaborn as sns\n", + "from sklearn.datasets import load_digits, fetch_20newsgroups\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.feature_extraction.text import (\n", + " CountVectorizer, TfidfTransformer, TfidfVectorizer\n", + ")\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import (\n", + " OneHotEncoder, Binarizer, KBinsDiscretizer,\n", + " MinMaxScaler, StandardScaler, PolynomialFeatures\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "xNbPRHkKQyv2" + }, + "outputs": [], + "source": [ + "# Algumas configurações para o matplotlib.\n", + "%matplotlib inline\n", + "\n", + "from IPython.core.pylabtools import figsize\n", + "\n", + "\n", + "figsize(12, 12)\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "m8onCO86Q2Hm" + }, + "outputs": [], + "source": [ + "np.random.seed(1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "EIEVdatWDh3Z" + }, + "source": [ + "## _One-hot encoding_\n", + "\n", + "Até aqui, nós praticamente ignoramos a existência de variáveis categóricas. Focamos nas variáveis numéricas porque elas são simples de lidar e bastante comuns. Ainda assim, variáveis categóricas são encontradas facilmente e precisamos de uma forma de trabalhar com elas.\n", + "\n", + "Uma das formas mais simples de representação de variáveis categóricas é através do método chamado _one-hot enconding_. Com ele, uma variável categórica com $h$ categorias é transformada em $h$ novas variáveis binárias (0 ou 1), onde a presença do 1 (_hot_) significa que aquela observação pertence àquela categoria, e 0 (_cold_) que não pertence. Veja um exemplo abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "M1zv6xPDk4ym", + "outputId": "b9b41a48-556d-44e1-f142-708bae7a2d02" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", + "
" + ], + "text/plain": [ + " Height Score Course\n", + "0 1.539 6.61 Biology\n", + "1 1.764 6.42 Biology\n", + "2 1.695 8.95 Biology\n", + "3 1.829 5.14 Biology\n", + "4 1.640 6.43 Physics\n", + "5 1.778 7.98 Physics\n", + "6 1.679 7.90 Biology\n", + "7 1.604 6.76 Physics\n", + "8 1.819 7.44 Physics\n", + "9 1.607 6.01 Physics" + ] + }, + "execution_count": 4, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "rows = 100\n", + "\n", + "height = np.random.normal(loc=1.70, scale=0.2, size=rows).round(3)\n", + "score = np.random.normal(loc=7, scale=1, size=rows).round(2)\n", + "courses = [\"Math\", \"Physics\", \"Biology\"]\n", + "course = np.random.choice(courses, size=rows)\n", + "\n", + "data = pd.DataFrame({\"Height\": height, \"Score\": score, \"Course\": course})\n", + "\n", + "data.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nK_6LysZP6Lw" + }, + "source": [ + "Criamos um _data set_ que contém duas variáveis numéricas (`Height` e `Score`) e uma variável categórica (`Course`). Nosso objetivo com o _one-hot encoding_ é transformar a variável `Course` em uma sequência de variáveis numéricas binárias, cada uma descrevendo uma classe da variável. Neste caso, como temos três categorias para `Course` (Biology, Physics e Math), teremos três novas variáveis binárias.\n", + "\n", + "Vamos treinar esse _encoder_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "MDpY6XcNmYlw", + "outputId": "5fda81c9-000d-4557-cb3f-22d012b3e548" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 0, 0],\n", + " [1, 0, 0],\n", + " [1, 0, 0],\n", + " [1, 0, 0],\n", + " [0, 0, 1],\n", + " [0, 0, 1],\n", + " [1, 0, 0],\n", + " [0, 0, 1],\n", + " [0, 0, 1],\n", + " [0, 0, 1]])" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder = OneHotEncoder(sparse=False, dtype=np.int)\n", + "\n", + "#one_hot_encoder.fit(data[[\"Course\"]])\n", + "\n", + "#course_encoded = one_hot_encoder.transform(...)\n", + "\n", + "course_encoded = one_hot_encoder.fit_transform(data[[\"Course\"]])\n", + "\n", + "course_encoded[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "V-O0cMCyQqk4" + }, + "source": [ + "A saída é um `np.ndarray` com formato `(n, h)`, onde `n` é o número de observações no _data set_ e `h` é o número de categorias da variável codificada." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "BP_QsDI6REl_", + "outputId": "10a0faf0-b05f-4ad8-f79d-7642d15862a7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(100, 3)" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_encoded.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eoRT2AR8RHNl" + }, + "source": [ + "No atributo `categories_` do _encoder_, temos as categorias da variável:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "ziGE3VCinqM7", + "outputId": "2c77ac8b-ba1b-4479-97aa-b59cff8b78bf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array(['Biology', 'Math', 'Physics'], dtype=object)]" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder.categories_" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "y8V2WMjmRUkw" + }, + "source": [ + "Podemos criar as novas colunas que descrevem cada categoria. Repare que, para qualquer linha, apenas uma das colunas contém um 1, indicando a qual categoria aquela observação pertence. Isso acontece, obviamente, se as categorias forem mutuamente exclusivas (uma observação não pode pertencer a mais de uma categoria simultaneamente)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "dGepWPRFoqc0", + "outputId": "dc6a6dff-007d-4f66-cbfb-2aad4c8a7448" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysics
01.5396.61Biology100
11.7646.42Biology100
21.6958.95Biology100
31.8295.14Biology100
41.6406.43Physics001
51.7787.98Physics001
61.6797.90Biology100
71.6046.76Physics001
81.8197.44Physics001
91.6076.01Physics001
\n", + "
" + ], + "text/plain": [ + " Height Score Course Biology Math Physics\n", + "0 1.539 6.61 Biology 1 0 0\n", + "1 1.764 6.42 Biology 1 0 0\n", + "2 1.695 8.95 Biology 1 0 0\n", + "3 1.829 5.14 Biology 1 0 0\n", + "4 1.640 6.43 Physics 0 0 1\n", + "5 1.778 7.98 Physics 0 0 1\n", + "6 1.679 7.90 Biology 1 0 0\n", + "7 1.604 6.76 Physics 0 0 1\n", + "8 1.819 7.44 Physics 0 0 1\n", + "9 1.607 6.01 Physics 0 0 1" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "columns_encoded = one_hot_encoder.categories_[0]\n", + "\n", + "data_encoded = pd.concat([data, pd.DataFrame(course_encoded, columns=columns_encoded)], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iIiVR7P4SHXz" + }, + "source": [ + "Como você deve imaginar, a maior parte da matriz retornada é composta por zeros, sendo apenas alguns elementos compostos de um. Dizemos que essa matriz é __esparsa__. É um grande desperdício de memória trabalhar diretamente como uma matriz esparsa assim. Por isso, o _default_ do `OneHotEncoder` é retornar uma `sparse matrix` do NumPy, economizando espaço em memória:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "muGSmJckraf3", + "outputId": "c8957d2b-68c4-4722-80ea-5e241c479a88" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<100x3 sparse matrix of type ''\n", + "\twith 100 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder_sparse = OneHotEncoder(sparse=True) # sparse=True é o default.\n", + "\n", + "course_encoded_sparse = one_hot_encoder_sparse.fit_transform(data[[\"Course\"]])\n", + "\n", + "course_encoded_sparse" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "FOYl0Lx8TPJm" + }, + "source": [ + "Para acessar os dados dessa matriz, podemos convertê-la para um _array_ não esparso:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "mtUziaQmrqTN", + "outputId": "bb7920ae-69a0-4543-97da-b1fc2746ddd0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.],\n", + " [1., 0., 0.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.]])" + ] + }, + "execution_count": 10, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_encoded_sparse.toarray()[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zHGmVXu1uEvM" + }, + "source": [ + "## Binarização (_Binarization_)\n", + "\n", + "Binarização é o processo de discretizar uma variável numérica em dois níveis com base em um _threshold_. Isso pode ser útil, por exemplo, para tornar uma variável numérica contínua em uma variável binária alvo de duas classes (positiva ou negativa).\n", + "\n", + "No exemplo abaixo, vamos separar a variável `Height` em dois grupos, utilizando 1.80 m como _threshold_ de separação. Observações que possuam menos de 1.80 m terão valor 0, enquanto aquelas com mais de 1.80 m terão valor 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "PeGrPpyWPcOw", + "outputId": "edb6b4c4-97e9-4914-f952-aa60c6dbbbc2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 True\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 True\n", + "9 False\n", + "Name: Height, dtype: bool" + ] + }, + "execution_count": 11, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "tall = (data_encoded.Height > 1.80)\n", + "\n", + "tall[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "94vcsMVguGvG", + "outputId": "b2b15447-7399-4309-b18a-3de5a183a41e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.],\n", + " [0.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [0.],\n", + " [0.],\n", + " [0.],\n", + " [1.],\n", + " [0.]])" + ] + }, + "execution_count": 12, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "binarizer = Binarizer(threshold=1.80).fit(data_encoded[[\"Height\"]])\n", + "\n", + "height_binary = binarizer.transform(data_encoded[[\"Height\"]])\n", + "\n", + "height_binary[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "oND_xnxRV8wZ" + }, + "source": [ + "O `Binarizer` tem como saída uma matriz binária numérica. Podemos transformá-la em um vetor de _bool_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "iXbf50-4vdDR", + "outputId": "2f7dba40-f513-491a-e072-743ac0a8c88f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Tall
0False
1False
2False
3True
4False
5False
6False
7False
8True
9False
\n", + "
" + ], + "text/plain": [ + " Tall\n", + "0 False\n", + "1 False\n", + "2 False\n", + "3 True\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 True\n", + "9 False" + ] + }, + "execution_count": 13, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_bool = pd.DataFrame(height_binary.flatten().astype(bool), columns=[\"Tall\"])\n", + "\n", + "height_bool.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nn9Gs9DhWNvi" + }, + "source": [ + "Vamos adicionar a nova variável `Tall`, que indica se a pessoa é alta (> 1.80 m), ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "xjOV0WlJy7DY", + "outputId": "af316c4b-4931-44cb-a4af-4fa51b3c93fc" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTall
01.5396.61Biology100False
11.7646.42Biology100False
21.6958.95Biology100False
31.8295.14Biology100True
41.6406.43Physics001False
51.7787.98Physics001False
61.6797.90Biology100False
71.6046.76Physics001False
81.8197.44Physics001True
91.6076.01Physics001False
\n", + "
" + ], + "text/plain": [ + " Height Score Course Biology Math Physics Tall\n", + "0 1.539 6.61 Biology 1 0 0 False\n", + "1 1.764 6.42 Biology 1 0 0 False\n", + "2 1.695 8.95 Biology 1 0 0 False\n", + "3 1.829 5.14 Biology 1 0 0 True\n", + "4 1.640 6.43 Physics 0 0 1 False\n", + "5 1.778 7.98 Physics 0 0 1 False\n", + "6 1.679 7.90 Biology 1 0 0 False\n", + "7 1.604 6.76 Physics 0 0 1 False\n", + "8 1.819 7.44 Physics 0 0 1 True\n", + "9 1.607 6.01 Physics 0 0 1 False" + ] + }, + "execution_count": 14, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, height_bool], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2tOdmnNi23p4" + }, + "source": [ + "## Discretização (_Binning_)\n", + "\n", + "Discretização, como o nome diz, é o processo de discretizar ou separar em intervalos contínuos uma variável numérica. Isso pode ser útil para converter uma variável numérica em categórica, quando o valor exato numérico não for tão importante quanto o intervalo onde ele se encontra.\n", + "\n", + "Podemos criar _bins_ (_buckets_ ou intervalos) que contenham aproximadamente a mesma quantidade de observações, utilizando a estratégia `quantile` ou que sejam igualmente espaçados com a estratégia `uniform`.\n", + "\n", + "No exemplo a seguir, criamos quatro intervalos da variável `Score` com a estratégia `quantile`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Xir4K6i522ZQ", + "outputId": "e902850a-d3dc-4d97-a80f-ad3dad1bb1a2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.],\n", + " [1.],\n", + " [3.],\n", + " [0.],\n", + " [1.],\n", + " [3.],\n", + " [3.],\n", + " [2.],\n", + " [2.],\n", + " [0.]])" + ] + }, + "execution_count": 15, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer = KBinsDiscretizer(n_bins=4, encode=\"ordinal\", strategy=\"quantile\")\n", + "\n", + "discretizer.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins = discretizer.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3hrP6E4xYXCs" + }, + "source": [ + "Os limites dos intervalos estão disponíveis no atributo `bin_edges_`. Isso pode ser útil para criarmos _labels_ para colunas do _data set_ por exemplo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "ScCmeNtn3-fF", + "outputId": "be1003a5-2d28-42d6-e76d-bc349e957e95" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([array([4.09 , 6.1975, 6.735 , 7.6 , 9.28 ])], dtype=object)" + ] + }, + "execution_count": 16, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer.bin_edges_" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vGl5ONq2Yk7r" + }, + "source": [ + "A função `get_interval()` abaixo facilita a criação de _labels_ indicativas dos intervalos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "fvB70_vd4fSO" + }, + "outputs": [], + "source": [ + "def get_interval(bin_idx, bin_edges):\n", + " return f\"{np.round(bin_edges[bin_idx], 2):.2f} ⊢ {np.round(bin_edges[bin_idx+1], 2):.2f}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Hn3eqHFbYtfm" + }, + "source": [ + "Cada um dos intervalos mostrados abaixo deve possuir aproximadamente a mesma quantidade de observações:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "HX59pepN5ZQQ", + "outputId": "d5b3d4dc-c969-44cb-fa34-e31fad2dd818" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bins quantile\n", + "interval: #elements\n", + "\n", + "4.09 ⊢ 6.20: 25\n", + "6.20 ⊢ 6.74: 25\n", + "6.74 ⊢ 7.60: 25\n", + "7.60 ⊢ 9.28: 25\n" + ] + } + ], + "source": [ + "bin_edges_quantile = discretizer.bin_edges_[0]\n", + "\n", + "print(f\"Bins quantile\")\n", + "print(f\"interval: #elements\\n\")\n", + "for i in range(len(discretizer.bin_edges_[0])-1):\n", + " print(f\"{get_interval(i, bin_edges_quantile)}: {sum(score_bins[:, 0] == i)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OQ0fli3IY2G6" + }, + "source": [ + "A _Series_ abaixo mostra alguns dos intervalos para os quais as observações foram encaixadas:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "SZMBYjqR5-H6", + "outputId": "cba541dc-9f9e-48d8-eb87-fa54440ca353" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6.20 ⊢ 6.74\n", + "1 6.20 ⊢ 6.74\n", + "2 7.60 ⊢ 9.28\n", + "3 4.09 ⊢ 6.20\n", + "4 6.20 ⊢ 6.74\n", + "5 7.60 ⊢ 9.28\n", + "6 7.60 ⊢ 9.28\n", + "7 6.74 ⊢ 7.60\n", + "8 6.74 ⊢ 7.60\n", + "9 4.09 ⊢ 6.20\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "score_intervals = pd.Series(score_bins.flatten().astype(np.int)).apply(get_interval, args=(bin_edges_quantile,))\n", + "\n", + "score_intervals.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6gWE7IU6Y_9q" + }, + "source": [ + "Também podemos criar uma nova variável, `Score_interval`, no nosso _data set_ com os intervalos (que agora são categorias):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "fomFOQbVA8eS", + "outputId": "1f065c4f-6da4-43ad-ebb7-b58706595871" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval
01.5396.61Biology100False6.20 ⊢ 6.74
11.7646.42Biology100False6.20 ⊢ 6.74
21.6958.95Biology100False7.60 ⊢ 9.28
31.8295.14Biology100True4.09 ⊢ 6.20
41.6406.43Physics001False6.20 ⊢ 6.74
51.7787.98Physics001False7.60 ⊢ 9.28
61.6797.90Biology100False7.60 ⊢ 9.28
71.6046.76Physics001False6.74 ⊢ 7.60
81.8197.44Physics001True6.74 ⊢ 7.60
91.6076.01Physics001False4.09 ⊢ 6.20
\n", + "
" + ], + "text/plain": [ + " Height Score Course Biology Math Physics Tall Score_interval\n", + "0 1.539 6.61 Biology 1 0 0 False 6.20 ⊢ 6.74\n", + "1 1.764 6.42 Biology 1 0 0 False 6.20 ⊢ 6.74\n", + "2 1.695 8.95 Biology 1 0 0 False 7.60 ⊢ 9.28\n", + "3 1.829 5.14 Biology 1 0 0 True 4.09 ⊢ 6.20\n", + "4 1.640 6.43 Physics 0 0 1 False 6.20 ⊢ 6.74\n", + "5 1.778 7.98 Physics 0 0 1 False 7.60 ⊢ 9.28\n", + "6 1.679 7.90 Biology 1 0 0 False 7.60 ⊢ 9.28\n", + "7 1.604 6.76 Physics 0 0 1 False 6.74 ⊢ 7.60\n", + "8 1.819 7.44 Physics 0 0 1 True 6.74 ⊢ 7.60\n", + "9 1.607 6.01 Physics 0 0 1 False 4.09 ⊢ 6.20" + ] + }, + "execution_count": 20, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_intervals, columns=[\"Score_interval\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LldlZ92lZN1k" + }, + "source": [ + "Como dito, podemos utilizar a estratégia `uniform` para criar _bins_ igualmente espaçados, independente do número de observações que cada um possui. Também podemos especificar o tipo de codificação utilizada. No caso a seguir, utilizamos `encode=onehot-dense` para informar que queremos que a saída seja codificada como o _one-hot encode_ visto anteriormente:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "E6L1qXuW-v-n", + "outputId": "956f9e9f-67ba-436f-f457-889ee2d1f3db" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [0, 0, 0, 1],\n", + " [1, 0, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 1, 0, 0]])" + ] + }, + "execution_count": 21, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer_uniform = KBinsDiscretizer(n_bins=4, encode=\"onehot-dense\", strategy=\"uniform\")\n", + "\n", + "discretizer_uniform.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins_uniform = discretizer_uniform.transform(data_encoded[[\"Score\"]]).astype(np.int)\n", + "\n", + "score_bins_uniform[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "YapI8RuMZZfM" + }, + "source": [ + "Note como agora os intervalos são ligeiramente diferentes:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "P8gW9k-w-_CC", + "outputId": "731fca86-f052-4a93-e5bf-e13eec18ac8b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([4.09 , 5.3875, 6.685 , 7.9825, 9.28 ])" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "bin_edges_uniform = discretizer_uniform.bin_edges_[0]\n", + "\n", + "bin_edges_uniform" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "ieyy46EJAnb6", + "outputId": "99835fa9-8003-4060-afae-2c4de66685ff" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bins uniform\n", + "interval: #elements\n", + "\n", + "4.09 ⊢ 5.39: 6\n", + "5.39 ⊢ 6.68: 43\n", + "6.68 ⊢ 7.98: 44\n", + "7.98 ⊢ 9.28: 7\n" + ] + } + ], + "source": [ + "score_intervals_columns = [get_interval(i, bin_edges_uniform) for i in range(4)]\n", + "\n", + "print(f\"Bins uniform\")\n", + "print(f\"interval: #elements\\n\")\n", + "for i in range(len(discretizer_uniform.bin_edges_[0])-1):\n", + " print(f\"{get_interval(i, bin_edges_uniform)}: {sum(score_bins_uniform[:, i])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WuWi-1U4Zzf_" + }, + "source": [ + "Podemos adicionar as novas variáveis binárias no _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "P-v3UgiQB87S", + "outputId": "ad22d68f-c0e8-4a91-8838-842e7e2f5041" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28
01.5396.61Biology100False6.20 ⊢ 6.740100
11.7646.42Biology100False6.20 ⊢ 6.740100
21.6958.95Biology100False7.60 ⊢ 9.280001
31.8295.14Biology100True4.09 ⊢ 6.201000
41.6406.43Physics001False6.20 ⊢ 6.740100
51.7787.98Physics001False7.60 ⊢ 9.280010
61.6797.90Biology100False7.60 ⊢ 9.280010
71.6046.76Physics001False6.74 ⊢ 7.600010
81.8197.44Physics001True6.74 ⊢ 7.600010
91.6076.01Physics001False4.09 ⊢ 6.200100
\n", + "
" + ], + "text/plain": [ + " Height Score Course ... 5.39 ⊢ 6.68 6.68 ⊢ 7.98 7.98 ⊢ 9.28\n", + "0 1.539 6.61 Biology ... 1 0 0\n", + "1 1.764 6.42 Biology ... 1 0 0\n", + "2 1.695 8.95 Biology ... 0 0 1\n", + "3 1.829 5.14 Biology ... 0 0 0\n", + "4 1.640 6.43 Physics ... 1 0 0\n", + "5 1.778 7.98 Physics ... 0 1 0\n", + "6 1.679 7.90 Biology ... 0 1 0\n", + "7 1.604 6.76 Physics ... 0 1 0\n", + "8 1.819 7.44 Physics ... 0 1 0\n", + "9 1.607 6.01 Physics ... 1 0 0\n", + "\n", + "[10 rows x 12 columns]" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_bins_uniform, columns=score_intervals_columns)], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jD8WM_-yzqSc" + }, + "source": [ + "## Normalização (_Scaling_)\n", + "\n", + "Normalização é o processo de colocar uma variável numérica em uma escala pré-determinada, geralmente $[0, 1]$, mas também é comum ser $[-1, 1]$.\n", + "\n", + "Para colocar no intervalo $[0, 1]$, basta subtrair cada valor da valor mínimo e dividir pela diferença do valor máximo e mínimo:\n", + "\n", + "$$x_{\\text{scaled}} = \\frac{x - x_{\\text{min}}}{x_{\\text{max}} - x_{\\text{min}}}$$\n", + "\n", + "Abaixo, escalamos a variável `Score` no intervalo $[0, 1]$:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "nMM2mu-Qzwnv", + "outputId": "5c60c83b-13bf-431d-e77e-a2fb2e8af317" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.48554913],\n", + " [0.44894027],\n", + " [0.93641618],\n", + " [0.20231214],\n", + " [0.45086705],\n", + " [0.7495183 ],\n", + " [0.73410405],\n", + " [0.51445087],\n", + " [0.64547206],\n", + " [0.3699422 ]])" + ] + }, + "execution_count": 25, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "minmax_scaler = MinMaxScaler(feature_range=(0, 1)) # Default feature_scale é (0, 1).\n", + "\n", + "minmax_scaler.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_normalized = minmax_scaler.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_normalized[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "FPr-37M2UBj4", + "outputId": "dc170301-56af-4cab-da7c-307c5cbb94a6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.0, 0.9999999999999999)" + ] + }, + "execution_count": 26, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "score_normalized.min(), score_normalized.max()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Et6m_2Bbbq-n" + }, + "source": [ + "Adicionamos a variável `Score` normalizada ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "kaYvCQtK0fzi", + "outputId": "9f8ccb6c-d0b7-4445-96c9-490f284f2357" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940
21.6958.95Biology100False7.60 ⊢ 9.2800010.936416
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867
51.7787.98Physics001False7.60 ⊢ 9.2800100.749518
61.6797.90Biology100False7.60 ⊢ 9.2800100.734104
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451
81.8197.44Physics001True6.74 ⊢ 7.6000100.645472
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942
\n", + "
" + ], + "text/plain": [ + " Height Score Course ... 6.68 ⊢ 7.98 7.98 ⊢ 9.28 Score_normalized\n", + "0 1.539 6.61 Biology ... 0 0 0.485549\n", + "1 1.764 6.42 Biology ... 0 0 0.448940\n", + "2 1.695 8.95 Biology ... 0 1 0.936416\n", + "3 1.829 5.14 Biology ... 0 0 0.202312\n", + "4 1.640 6.43 Physics ... 0 0 0.450867\n", + "5 1.778 7.98 Physics ... 1 0 0.749518\n", + "6 1.679 7.90 Biology ... 1 0 0.734104\n", + "7 1.604 6.76 Physics ... 1 0 0.514451\n", + "8 1.819 7.44 Physics ... 1 0 0.645472\n", + "9 1.607 6.01 Physics ... 0 0 0.369942\n", + "\n", + "[10 rows x 13 columns]" + ] + }, + "execution_count": 27, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_normalized.flatten(), columns=[\"Score_normalized\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "n7-msElsbveR" + }, + "source": [ + "Para avaliar se os valores encontrados conferem, podemos utilizar a função `normalize` abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "EAfUGaFc061d" + }, + "outputs": [], + "source": [ + "def normalize(x, xmin, xmax):\n", + " return (x - xmin)/(xmax - xmin)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "CXywxNX-b-0K" + }, + "source": [ + "A função `partial()` do módulo `functools` (_builtin_ do Python) permite \"congelar\" alguns parâmetros da função passaga como argumento, facilitando a invocação desta função quando tais parâmetros são constantes. No caso abaixo, \"congelamos\" os argumentos `xmin` e `xmax` da função `normalize()` com os valores mínimo e máximo da variável `Score`, respectivamente. Nas invocações subsequentes de `normalize` não precisaremos passar esses argumentos, somente o argumento \"não congelado\" `x`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "UAlpigp21OVx" + }, + "outputs": [], + "source": [ + "normalize_score = functools.partial(normalize,\n", + " xmin=data_encoded.Score.min(),\n", + " xmax=data_encoded.Score.max())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nhR0rwUIctTa" + }, + "source": [ + "O valor abaixo realmente confere com aquele encontrado pelo `MinMaxScaler`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "pMfk3jrU1mQV", + "outputId": "f9851c0d-9446-4f10-874e-cdba22b43722" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.485549" + ] + }, + "execution_count": 30, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "normalize_score(data_encoded.Score[0]).round(6)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HEcSQzWJ2Yum" + }, + "source": [ + "## Padronização (_Standardization_)\n", + "\n", + "Padronização é o processo de tornar a variável com média zero e variância um. Esse processo não deve ser confundido com a normalização descrita acima.\n", + "\n", + "O processo é simples, basta subtrair a média dos dados de cada observação e dividi-los pelo desvio-padrão:\n", + "\n", + "$$x_{\\text{standardized}} = \\frac{x - \\bar{x}}{s}$$\n", + "\n", + "onde $\\bar{x}$ indica a média amostral e $s$ o desvio-padrão amostral." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kXYXezCNdYue" + }, + "source": [ + "No exemplo abaixo, padronizamos a variável `Score`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Qfhs3Eaq2dGV", + "outputId": "572aae65-5460-44d1-8134-dbc26f82e2d2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.20752554],\n", + " [-0.40839081],\n", + " [ 2.26628886],\n", + " [-1.76158843],\n", + " [-0.39781896],\n", + " [ 1.24081879],\n", + " [ 1.15624393],\n", + " [-0.0489477 ],\n", + " [ 0.66993854],\n", + " [-0.84183693]])" + ] + }, + "execution_count": 31, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "standard_scaler = StandardScaler()\n", + "\n", + "standard_scaler.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_standardized = standard_scaler.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_standardized[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SJJucIQddgME" + }, + "source": [ + "E adicionamos a variável padronizada ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "BAndWLe13RSr", + "outputId": "4a6231c1-f459-4307-ad14-24c4e46760cd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalizedScore_standardized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549-0.207526
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940-0.408391
21.6958.95Biology100False7.60 ⊢ 9.2800010.9364162.266289
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312-1.761588
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867-0.397819
51.7787.98Physics001False7.60 ⊢ 9.2800100.7495181.240819
61.6797.90Biology100False7.60 ⊢ 9.2800100.7341041.156244
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451-0.048948
81.8197.44Physics001True6.74 ⊢ 7.6000100.6454720.669939
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942-0.841837
\n", + "
" + ], + "text/plain": [ + " Height Score Course ... 7.98 ⊢ 9.28 Score_normalized Score_standardized\n", + "0 1.539 6.61 Biology ... 0 0.485549 -0.207526\n", + "1 1.764 6.42 Biology ... 0 0.448940 -0.408391\n", + "2 1.695 8.95 Biology ... 1 0.936416 2.266289\n", + "3 1.829 5.14 Biology ... 0 0.202312 -1.761588\n", + "4 1.640 6.43 Physics ... 0 0.450867 -0.397819\n", + "5 1.778 7.98 Physics ... 0 0.749518 1.240819\n", + "6 1.679 7.90 Biology ... 0 0.734104 1.156244\n", + "7 1.604 6.76 Physics ... 0 0.514451 -0.048948\n", + "8 1.819 7.44 Physics ... 0 0.645472 0.669939\n", + "9 1.607 6.01 Physics ... 0 0.369942 -0.841837\n", + "\n", + "[10 rows x 14 columns]" + ] + }, + "execution_count": 32, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_standardized.flatten(), columns=[\"Score_standardized\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_SgwGLgOdk5Q" + }, + "source": [ + "Note que, ao contrário da variável normalizada, é possível ter valores negativos e positivos, menores e maiores que um. Isso é bem óbvio, pois os dados agora têm média 0 e variância 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "I0E9fwo93h9w", + "outputId": "2d9d5cdf-181b-4ca1-bea7-b382bf738ebd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.2501111257279262e-15, 1.0101010101010102)" + ] + }, + "execution_count": 33, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded.Score_standardized.mean(), data_encoded.Score_standardized.var()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Av0cwG_Qd3Ow" + }, + "source": [ + "Novamente, para avaliar os resultados obtidos, podemos escrever nossa própria função de padronização:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "khwEkoks3-cS" + }, + "outputs": [], + "source": [ + "def standardize(x, xmean, xstd):\n", + " return (x - xmean)/xstd" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "14w3018J4Gwy" + }, + "outputs": [], + "source": [ + "standardize_score = functools.partial(standardize,\n", + " xmean=data_encoded.Score.mean(),\n", + " xstd=data_encoded.Score.std())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UAGxoUK5d-22" + }, + "source": [ + "Como esperado, o valor confere com o encontrado:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "dpaNVzOy4aCL", + "outputId": "fa0f42f0-32a5-48f4-f8d7-724350cdca86" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.20648530634442175" + ] + }, + "execution_count": 36, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "standardize_score(data_encoded.Score[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2tO4OOJK7NY1" + }, + "source": [ + "## Criando um _Pipeline_\n", + "\n", + "Todo esse processo de transformar os dados pode ser bastante trabalhoso e entendiante. Para facilitar as coisas, o sklearn dispõe de um mecanismo de _pipeline_ que funciona como ao esteira de uma linha de montagem. Cada etapa desse _pipeline_ é uma transformação nos dados, de forma que, ao final do _pipeline_, temos os dados totalmente transformados. A vantagem é que agora especificamos todas as etapas, ou transformações, de uma só vez, e podemos reaproveitar esse _pipeline_ no futuro." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "T1LyaI0-B2hV", + "outputId": "011176a0-ec92-4122-9fc4-3b3d0a3118c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", + "
" + ], + "text/plain": [ + " Height Score Course\n", + "0 1.539 6.61 Biology\n", + "1 1.764 6.42 Biology\n", + "2 1.695 8.95 Biology\n", + "3 1.829 5.14 Biology\n", + "4 1.640 6.43 Physics\n", + "5 1.778 7.98 Physics\n", + "6 1.679 7.90 Biology\n", + "7 1.604 6.76 Physics\n", + "8 1.819 7.44 Physics\n", + "9 1.607 6.01 Physics" + ] + }, + "execution_count": 37, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "86on9pLMeidf" + }, + "source": [ + "Para evitar bagunçar com nosso _data set_ original, criamos uma cópia (rasa) dele:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vdA8euCcZeq1" + }, + "outputs": [], + "source": [ + "data_missing = data.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "snDUyWqEenh8" + }, + "source": [ + "E para tornar o exemplo mais interessante, adicionamos (ou removemos?) dados faltantes ao _data set_. Isso porque uma das transformações úteis que podemos aplicar no _pipeline_ é justamente a imputação de dados, ou seja, preencher dados faltantes.\n", + "\n", + "As variáveis numéricas faltantes são representadas por `np.nan`, enquanto a variável categórica é representada pela classe `Unknown`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "LkVnbFAKS_fF", + "outputId": "6ba74eb6-0d60-419a-c39a-dd165cd49b60" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
21.695NaNUnknown
51.778NaNPhysics
8NaN7.44Physics
111.539NaNBiology
15NaN5.44Biology
24NaN8.08Biology
292.0206.83Unknown
331.691NaNMath
352.0856.96Unknown
381.3766.54Unknown
\n", + "
" + ], + "text/plain": [ + " Height Score Course\n", + "2 1.695 NaN Unknown\n", + "5 1.778 NaN Physics\n", + "8 NaN 7.44 Physics\n", + "11 1.539 NaN Biology\n", + "15 NaN 5.44 Biology\n", + "24 NaN 8.08 Biology\n", + "29 2.020 6.83 Unknown\n", + "33 1.691 NaN Math\n", + "35 2.085 6.96 Unknown\n", + "38 1.376 6.54 Unknown" + ] + }, + "execution_count": 39, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "unknown_height_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "unknown_score_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "unknown_course_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "\n", + "data_missing.loc[unknown_height_idx, \"Height\"] = np.nan\n", + "data_missing.loc[unknown_score_idx, \"Score\"] = np.nan\n", + "data_missing.loc[unknown_course_idx, \"Course\"] = \"Unknown\"\n", + "\n", + "data_missing_idx = unknown_height_idx | unknown_score_idx | unknown_course_idx\n", + "\n", + "data_missing.loc[data_missing_idx].head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nmUJS9SzfC9Y" + }, + "source": [ + "Criamos o _pipeline_ com as seguintes etapas:\n", + "\n", + "1. Faça imputação dos dados, preenchendo os dados faltantes com a mediana dos dados presentes.\n", + "2. Faça a normalização dos dados no intervalo _default_ $[0, 1]$.\n", + "3. Crie novas variáveis através da expansão polinomial da variável original." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9ypslSlEhGBr" + }, + "source": [ + "O `Pipeline` recebe uma lista de transformações representadas por tuplas de dois elementos. Cada tupla contém:\n", + "\n", + "* O nome para a etapa (ou transformação ou estimador). Isso vai ser útil para recuperar algumas informações do _pipeline_ mais a frente.\n", + "* Um objeto da classe do transformador ou estimador, já com seus parâmetros configurados." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "XqthBhA18ITd" + }, + "outputs": [], + "source": [ + "num_pipeline = Pipeline(steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"minmax_scaler\", MinMaxScaler()),\n", + " (\"poly_features\", PolynomialFeatures(degree=2, include_bias=False))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3UVr1XWCfZID" + }, + "source": [ + "Depois da especificação do nosso _pipeline_, podemos aplicá-lo simultaneamente a diversas variáveis (desde que as transformações especificadas façam sentido).\n", + "\n", + "No exemplo abaixo, aplicamos esse _pipeline_ às variáveis `Height` e `Score` ao mesmo tempo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Qh8kbymmDZqB", + "outputId": "0595019a-1288-4ea8-d18b-1d61dc44136b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.26553106, 0.48554913, 0.07050674, 0.12892838, 0.23575796],\n", + " [0.49098196, 0.44894027, 0.24106329, 0.22042158, 0.20154737],\n", + " [0.42184369, 0.48843931, 0.1779521 , 0.20604504, 0.23857296],\n", + " [0.55611222, 0.20231214, 0.30926081, 0.11250825, 0.0409302 ],\n", + " [0.36673347, 0.45086705, 0.13449344, 0.16534804, 0.2032811 ],\n", + " [0.50501002, 0.48843931, 0.25503512, 0.24666674, 0.23857296],\n", + " [0.40581162, 0.73410405, 0.16468307, 0.29790795, 0.53890875],\n", + " [0.33066132, 0.51445087, 0.10933691, 0.170109 , 0.26465969],\n", + " [0.41082164, 0.64547206, 0.16877442, 0.26517389, 0.41663418],\n", + " [0.33366733, 0.3699422 , 0.11133389, 0.12343763, 0.13685723]])" + ] + }, + "execution_count": 41, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline_transformation = num_pipeline.fit_transform(data_missing[[\"Height\", \"Score\"]])\n", + "\n", + "pipeline_transformation[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HoNf9vDJfrW8" + }, + "source": [ + "Para ficar mais claro a saída do _pipeline_, podemos utilizar os nomes das _features_ geradas através do método `get_feature_names()`. Para tornar ainda mais claro, substituímos o que é chamado `x0` por `Height` e `x1` por `Score`, que é inferido pela ordem das variáveis no _pipeline_." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "OJz5zvr2EeM3", + "outputId": "444fe35c-4e5e-4f9c-ef6a-152dd9bcd775" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Height_n', 'Score_n', 'Height_n^2', 'Height_n Score_n', 'Score_n^2']" + ] + }, + "execution_count": 42, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "poly_features = num_pipeline.get_params()[\"poly_features\"].get_feature_names()\n", + " \n", + "pipeline_columns = [old_name.replace(\"x0\", \"Height_n\").replace(\"x1\", \"Score_n\") for old_name in poly_features]\n", + "\n", + "pipeline_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MBgEafF-gKA3" + }, + "source": [ + "Criamos um novo _data set_ com essas variáveis resultantes do _pipeline_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 237 + }, + "colab_type": "code", + "id": "q_xBepJGIAJm", + "outputId": "6126947b-ef3f-42db-84aa-4317ed5f79d3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2
00.2655310.4855490.0705070.1289280.235758
10.4909820.4489400.2410630.2204220.201547
20.4218440.4884390.1779520.2060450.238573
30.5561120.2023120.3092610.1125080.040930
40.3667330.4508670.1344930.1653480.203281
50.5050100.4884390.2550350.2466670.238573
\n", + "
" + ], + "text/plain": [ + " Height_n Score_n Height_n^2 Height_n Score_n Score_n^2\n", + "0 0.265531 0.485549 0.070507 0.128928 0.235758\n", + "1 0.490982 0.448940 0.241063 0.220422 0.201547\n", + "2 0.421844 0.488439 0.177952 0.206045 0.238573\n", + "3 0.556112 0.202312 0.309261 0.112508 0.040930\n", + "4 0.366733 0.450867 0.134493 0.165348 0.203281\n", + "5 0.505010 0.488439 0.255035 0.246667 0.238573" + ] + }, + "execution_count": 43, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_score_normalized_poly = pd.DataFrame(pipeline_transformation, columns=pipeline_columns)\n", + "\n", + "height_score_normalized_poly.head(6)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9imGtnaygRiX" + }, + "source": [ + "Podemos também criar outro _pipeline_ para a variável categórica `Course`. Como se trata de uma variável de natureza completamente diferente, precisamos especificar um _pipeline_ diferente com as seguintes transformações:\n", + "\n", + "1. Preencha os dados faltantes (`None`) com a classe `Unknown`.\n", + "2. Crie novas variáveis binárias com o `OneHotEncoder`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eZP_HTkchI5c" + }, + "source": [ + "Assim como no _pipeline_ anterior, especificamos cada etapa como uma tupla com um nome e um objeto de um transformador ou estimador:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "NMv_2lV7KxTM" + }, + "outputs": [], + "source": [ + "cat_pipeline = Pipeline([\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"Unknown\")),\n", + " (\"one_hot_encoder\", OneHotEncoder(sparse=False, dtype=np.int))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wK66jYTShV52" + }, + "source": [ + "Após a especificação do _pipeline_, podemos aplicá-lo à nossa variável `Course`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "KIFWvPS7LNUA" + }, + "outputs": [], + "source": [ + "course_pipeline_transformation = cat_pipeline.fit_transform(data_missing[[\"Course\"]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "quJ4ThBBhfBI" + }, + "source": [ + "Agora, utilizaremos o nome que demos à etapa do `OneHotEncoder` para recuperar esse transformador através do método `get_params()`. Depois de recuperado o `OneHotEncoder`, acessamos seu atributo `categories_` (primeiro índice `[0]`, pois poderíamos ter aplicado o _pipeline_ a mais de uma variável categórica):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "Zurb-NVWM4sX", + "outputId": "1e7c2960-6ffb-4285-bb2d-691157302850" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Biology', 'Math', 'Physics', 'Unknown'], dtype=object)" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_columns = cat_pipeline.get_params()[\"one_hot_encoder\"].categories_[0]\n", + "\n", + "course_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ABQDGjU_iDGS" + }, + "source": [ + "Utilizamos a saída do _pipeline_ e os nomes das categorias recuperados do transformador para criar um novo `DataFrame`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "3ec56uIcMvll", + "outputId": "5707acac-8d67-4d74-eb02-d73b98f6340a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BiologyMathPhysicsUnknown
01000
11000
20001
31000
40010
\n", + "
" + ], + "text/plain": [ + " Biology Math Physics Unknown\n", + "0 1 0 0 0\n", + "1 1 0 0 0\n", + "2 0 0 0 1\n", + "3 1 0 0 0\n", + "4 0 0 1 0" + ] + }, + "execution_count": 47, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_discretized = pd.DataFrame(course_pipeline_transformation, columns=course_columns)\n", + "\n", + "course_discretized.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OeO6hmSEiL6N" + }, + "source": [ + "Por fim, combinamos as saídas dos dois _pipelines_ para criar um único `DataFrame`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "d8tL_jS1NTf7", + "outputId": "8b39c1c3-e549-4cea-fade-7c8e90d290ba" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2BiologyMathPhysicsUnknown
00.2655310.4855490.0705070.1289280.2357581000
10.4909820.4489400.2410630.2204220.2015471000
20.4218440.4884390.1779520.2060450.2385730001
30.5561120.2023120.3092610.1125080.0409301000
40.3667330.4508670.1344930.1653480.2032810010
50.5050100.4884390.2550350.2466670.2385730010
60.4058120.7341040.1646830.2979080.5389091000
70.3306610.5144510.1093370.1701090.2646600010
80.4108220.6454720.1687740.2651740.4166340010
90.3336670.3699420.1113340.1234380.1368570010
\n", + "
" + ], + "text/plain": [ + " Height_n Score_n Height_n^2 ... Math Physics Unknown\n", + "0 0.265531 0.485549 0.070507 ... 0 0 0\n", + "1 0.490982 0.448940 0.241063 ... 0 0 0\n", + "2 0.421844 0.488439 0.177952 ... 0 0 1\n", + "3 0.556112 0.202312 0.309261 ... 0 0 0\n", + "4 0.366733 0.450867 0.134493 ... 0 1 0\n", + "5 0.505010 0.488439 0.255035 ... 0 1 0\n", + "6 0.405812 0.734104 0.164683 ... 0 0 0\n", + "7 0.330661 0.514451 0.109337 ... 0 1 0\n", + "8 0.410822 0.645472 0.168774 ... 0 1 0\n", + "9 0.333667 0.369942 0.111334 ... 0 1 0\n", + "\n", + "[10 rows x 9 columns]" + ] + }, + "execution_count": 48, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_transformed = pd.concat([height_score_normalized_poly, course_discretized], axis=1)\n", + "\n", + "data_transformed.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1NLD-pyliXWO" + }, + "source": [ + "Vale ressaltar que:\n", + "\n", + "* Poderíamos utilizar também o `ColumnTransformer` para compor (por isso, ele se encontra no módulo `sklearn.compose`) múltiplos `Pipeline` em diferentes variáveis.\n", + "* Os `Pipeline` não servem apenas para a transformação dos dados de treinamento. Eles também podem (e devem) ser usados para submeter os dados de teste e até de produção aos mesmos procedimentos dos dados de treinamento." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SbShR7kMZGwE" + }, + "source": [ + "## _Outliers_\n", + "\n", + "_Outliers_, os famosos \"pontos fora da curva\", são observações que não parecem seguir o mesmo padrão dos demais dados. Eles podem vir de distribuições diferentes, serem erros na coleta de dados, erros de medição etc.\n", + "\n", + "Eles influenciam nossas análises e os nossos algoritmos ao apresentar comportamento distoante do resto do _data set_, impactando na média, variância, funções de perda e custo etc. Se fizer sentido, eles devem ser removidos ou transformados antes de prosseguirmos com a análise.\n", + "\n", + "No entanto, devemos julgar com cautela sua remoção: __alguns _outliers_ são dados autênticos e devem ser estudados com atenção__. Por exemplo, a remoção de uma medição muito alta na temperatura de um reator seria um erro, pois essa medição pode estar nos indicando um potencial problema com o dispositivo.\n", + "\n", + "Abaixo estudamos algumas técnicas simples para encontrar _outliers_.\n", + "\n", + "![outlier](https://www.stats4stem.org/common/web/plugins/ckeditor/plugins/doksoft_uploader/userfiles/WithInfOutlier.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "u3bsTDv0pAN4" + }, + "source": [ + "Começamos criando uma cópia da variável `Height` do nosso _data set_ para não impactar o original:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tQ7AQztcZkYx" + }, + "outputs": [], + "source": [ + "height_outlier = data.Height.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VQNHBAu4pHcp" + }, + "source": [ + "Adicionamos dez _outliers_ que representam pessoas estranhamente baixas ou estranhamente altas para o padrão que estamos observando:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "nX2R3V0HZI0w", + "outputId": "6acbd63c-820e-485a-cde4-72a69fefe13d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "14 1.646795\n", + "18 1.696510\n", + "29 0.516665\n", + "38 2.943781\n", + "48 1.058498\n", + "49 1.326605\n", + "57 2.074231\n", + "66 1.831315\n", + "68 2.737088\n", + "96 1.966029\n", + "Name: Height, dtype: float64" + ] + }, + "execution_count": 50, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_outlier_idx = pd.Index(np.random.choice(height_outlier.index, 10, replace=False))\n", + "\n", + "too_short_idx = pd.Index(height_outlier_idx[:5])\n", + "too_tall_idx = pd.Index(height_outlier_idx[5:])\n", + "\n", + "height_outlier[too_short_idx] = np.random.normal(loc=1.30, scale=0.5, size=5)\n", + "height_outlier[too_tall_idx] = np.random.normal(loc=2.20, scale=0.5, size=5)\n", + "\n", + "outlier_idx = too_short_idx | too_tall_idx\n", + "\n", + "height_outlier[outlier_idx]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mwNbTzDnpoDL" + }, + "source": [ + "Note que nem todos dados gerados se tornaram realmente _outliers_. Como geramos de uma distribuição aleatória, corremos esse risco.\n", + "\n", + "No entanto, temos alguns dados estranhos como 0.51 m e 2.73 m." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "x5pwD_1EqRNZ" + }, + "source": [ + "No _boxplot_ padrão, os dados mais extremos são mostrados como pontos fora do alcance dos _whiskers_ (as barrinhas do _box plot_).\n", + "\n", + "No caso abaixo, notamos três pontos acima e três pontos abaixo do considerado \"dentro da faixa normal\"." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 695 + }, + "colab_type": "code", + "id": "hRMVhYz3b2KH", + "outputId": "9e090cef-804c-4f17-958b-5e25154662db" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAt0AAAKmCAYAAACR0iLwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3W+s3nV9//HXdZ22YbWupz1WOPyx\naLYQElgwcyEn7SAUsCSjnGLWQZg62YYy0G6LGe3449KCjpKFZGNk1c0QWZwhZFQ5woQBC3i64zLv\nLKWS2p9RKv0HHHrqiujsua7fDfXEyr9TPe/raq/zeNw61znf61yv3sGn33zOdTXa7XY7AABAmWa3\nBwAAQK8T3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADF5nR7wEw7cODltFrtbs8AAKBHNZuNLFr0lqN6Ts9Fd6vVFt0AABxTHC8BAIBiohsAAIqJ\nbgAAKCa6AQCgmOgGAIBiohsAAIqJbgAAKCa6AQCgmOgGAIBiohsAAIp17GPgr7vuujz33HNpNpuZ\nP39+brnllpx55plHXDM5OZnbbrstX/3qV9NoNPLhD384a9as6dREAAAo0bHo3rRpU9761rcmSR57\n7LHceOON2bJlyxHXjIyMZNeuXXn00UczMTGR1atXZ2hoKKeeemqnZgIAwIzr2PGSnwZ3khw6dCiN\nRuNV1zz88MNZs2ZNms1mFi9enIsuuihf+cpXOjURAABKdOxOd5LcdNNN2bp1a9rtdv7pn/7pVT/f\nu3dvTj755KnHg4OD2bdv31G9xsDAgl96JwAAzKSORvcnP/nJJMkXv/jF3HHHHfnHf/zHGX+N8fFD\nabXaM/57AQAgSZrNxlHf6O3Ku5esXr06//Vf/5UDBw4c8f3BwcHs2bNn6vHevXtz0kkndXoeAADM\nqI5E98svv5y9e/dOPX7iiSeycOHC9Pf3H3HdJZdckvvvvz+tVisvvfRSHnvssaxcubITEwEAoExH\njpe88sor+dM//dO88soraTabWbhwYTZv3pxGo5Frrrkma9euzdlnn53h4eH8z//8T9773vcmSa6/\n/vqcdtppnZgIAABlGu12u6cOQDvTDQBApePmTDcAAMwmohsAAIqJbgAAKCa6AQCgmOgGmMUmJg7k\n9ts35uDBiW5PAehpohtgFhsZ2ZKdO3fkwQcf6PYUgJ4mugFmqYmJAxkdfTLtdjujo0+52w1QSHQD\nzFIjI1umPteg1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrmJEn6+uZk\naGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8LFzY3+1J\nAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVbAACAnxDd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdALPYxMSB3H77xhw8ONHtKQA9\nTXQDzGIjI1uyc+eOPPjgA92eAtDTRDfALDUxcSCjo0+m3W5ndPQpd7sBColugFlqZGRLWq12kqTV\narnbDVBIdAPMUmNjWzM5eThJMjl5OGNjW7u8CKB3iW6AWWpoaFn6+uYkSfr65mRoaFmXFwH0LtEN\nMEutWnV5ms1GkqTZbOayy97X5UUAvUt0A8xS/f2Lsnz5+Wk0Glm+/LwsXNjf7UkAPWtOtwcA0D2r\nVl2e3bufc5cboFij3W63uz1iJo2PH5r6a3wAAJhpzWYjAwMLju45RVsAAICfEN0AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMA\nQDHRDQAAxeZ04kUOHDiQG264Ibt27cq8efOydOnSbNy4MYsXLz7iuvXr1+c///M/s2jRoiTJJZdc\nkj/5kz/pxEQAACjTaLfb7eoXmZiYyI4dO3LuuecmSTZt2pSDBw/mU5/61BHXrV+/PmeddVbe//73\n/8KvNT5+KK1W+T8JAIBZqtlsZGBgwdE9p2jLEfr7+6eCO0nOOeec7NmzpxMvDQAAXdfxM92tVitf\n+MIXsmLFitf8+T333JNVq1bluuuuy7e+9a0OrwMAgJnXkeMlP2vDhg3Zv39//v7v/z7N5pHNv3//\n/ixZsiTNZjNf/OIX87d/+7d57LHH0tfX18mJAAAwozoa3Zs2bcqOHTuyefPmzJs3702vP/fcc/PA\nAw/klFNOmfZrONMNAEClY/ZMd5Lceeedefrpp3P33Xe/bnDv379/6uuvfvWraTabOfHEEzs1EQAA\nSnTkTvfOnTtz6aWX5vTTT88JJ5yQJDn11FNz9913Z3h4OJ/5zGdy4okn5kMf+lDGx8fTaDSyYMGC\n3HDDDTnnnHOO6rXc6QYAoNIvcqe742e6q4luAAAqHdPHSwAAYLYS3QAAUEx0AwBAMdENAADFRDcA\nABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdEN\nAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0\nAwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT\n3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0A8xiExMHcvvt\nG3Pw4ES3pwD0NNENMIuNjGzJzp078uCDD3R7CkBPE90As9TExIGMjj6Zdrud0dGn3O0GKCS6AWap\nkZEtabXaSZJWq+VuN0Ah0Q0wS42Nbc3k5OEkyeTk4YyNbe3yIoDeJboBZqmhoWXp65uTJOnrm5Oh\noWVdXgTQu0Q3wCy1atXlaTYbSZJms5nLLntflxcB9C7RDTBL9fcvyvLl56fRaGT58vOycGF/tycB\n9Kw53R4AQPesWnV5du9+zl1ugGKNdrvd7vaImTQ+fmjqr/EBAGCmNZuNDAwsOLrnFG0BAAB+wvES\nmCW2bn0qo6NPdnsGx5iffiCO89z8vOXLz8+yZed1ewb0DHe6AWaxgwcP5uDBg92eAdDznOkGmMU2\nbbo1SbJu3S1dXgJw/HCmGwAAjkGiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY\n6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAA\nioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYA\ngGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroB\nAKCY6AYAgGJzOvEiBw4cyA033JBdu3Zl3rx5Wbp0aTZu3JjFixcfcd0rr7ySv/zLv8z27dvT19eX\ndevW5YILLujERAAAKNORO92NRiN//Md/nEceeSQjIyM57bTT8jd/8zevuu6zn/1sFixYkH//93/P\n5s2bc/PNN+fll1/uxEQAACjTkeju7+/PueeeO/X4nHPOyZ49e1513b/927/liiuuSJKcfvrpOeus\ns/LUU091YiIAAJTp+JnuVquVL3zhC1mxYsWrfrZnz56ccsopU48HBwezb9++Ts4DAIAZ15Ez3T/r\n1ltvzfz58/P+97+/5PcPDCwo+b0AvWju3L4kyZIlb+3yEoDe1tHo3rRpU5599tls3rw5zearb7Kf\nfPLJ2b1799QfWO7du/eIYynTMT5+KK1We0b2AvS6H/1oMknywgv/2+UlAMePZrNx1Dd6O3a85M47\n78zTTz+du+++O/PmzXvNay655JLcd999SZLvfOc72bZtW377t3+7UxMBAKBER6J7586d+fSnP53n\nn38+V155ZYaHh3P99dcnSYaHh7N///4kyR/90R/le9/7Xi6++OJ85CMfycaNG7NggeMiAAAc3zpy\nvOTXf/3Xs2PHjtf82Ze+9KWpr+fPn5+/+7u/68QkAADoGJ9ICQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFCs0W63290eMZPGxw+l1eqpf9JR+5d/uTff/e6z3Z4B\nHAd27frxfyve8Y6lXV4CHA9OO21prrrqg92e0XXNZiMDAwuO6jlzirbQRd/97rPZsfP/pe+E/m5P\nAY5xrcm+JMn/++6LXV4CHOsmfzDR7QnHNdHdo/pO6M/8pRd2ewYA0CO+/+zj3Z5wXHOmGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoNu3o/uxnP/ua37/n\nnntmbAwAAPSiaUf33Xff/Zrf/4d/+IcZGwMAAL1ozptdMDY2liRptVr52te+lna7PfWz5557Lm95\ny1vq1gEAQA940+i+6aabkiQ//OEPc+ONN059v9FoZMmSJbn55pvr1gEAQA940+h+4oknkiQ33HBD\n7rjjjvJBAADQa940un/qZ4O71Wod8bNm05ugAADA65l2dG/fvj0bN27Mjh078sMf/jBJ0m6302g0\n8swzz5QNBACA4920o3v9+vW54IIL8qlPfSonnHBC5SYAAOgp047u3bt358///M/TaDQq9wAAQM+Z\n9mHsiy++OKOjo5VbAACgJ73hne6/+Iu/mLqz/X//93/56Ec/mt/8zd/M2972tiOu864mAADw+t4w\nupcuXXrE41/7tV8rHQMAAL3oDaP7ox/9aKd2AABAz5r2H1L+9OPgf968efNy0kkn5ZRTTpmxUQAA\n0EumHd033XRTnn/++SRJf39/JiYmkiQDAwN58cUXc8YZZ+TOO+/M6aefXjIUAACOV9N+95Lf/d3f\nzQc+8IF8/etfz+joaL7+9a/nD/7gD3LllVfmv//7v3PWWWdlw4YNlVsBAOC4NO3ovvfee/Pxj398\n6oNxTjjhhPzZn/1ZPve5z2X+/PlZv359nn766bKhAABwvJp2dM+fPz/btm074nvbt2/Pr/zKr/z4\nFzWn/asAAGBWmfaZ7rVr1+YP//APs2LFigwODmbfvn35j//4j9xyyy1JfvyHlitXriwbCgAAx6tp\nR/fq1atz1lln5ZFHHsnzzz+f008/Pffdd9/Ue3dfcMEFueCCC8qGAgDA8Wra0Z38+MNxfEAOAAAc\nnTeM7ltuuSW33nprkiM/Ev7n+Rh4AAB4fW8Y3aeeeurU1z//kfAAAMD0vGF0f+QjH5n62kfCAwDA\nL+aoznRv3bo1Dz30UF566aVs3rw527Zty6FDhzI0NFS1DwAAjnvTju5//ud/zr333ps1a9bkkUce\nSfLjD8j55Cc/KbqPMQcPTmTyBxP5/rOPd3sKANAjJn8wkYMHj+p+LT9j2p9o87nPfS733HNPPvzh\nD099EM673vWufPvb3y4bBwAAvWDa/3fl5ZdfzuDgYJJMvYvJ4cOHM3fu3Jpl/MIWLuzPC987nPlL\nL+z2FACgR3z/2cezcGF/t2cct6Z9p/u3fuu38pnPfOaI7917770599xzZ3wUAAD0kmnf6b755ptz\n7bXX5v7778/LL7+clStX5i1veUs+/elPV+4DAIDj3rSj++1vf3v+9V//Ndu2bcvu3btz8skn5zd+\n4zde9wNzAACAH3vT6L7qqqveNKw///nPz9ggAADoNW8a3WvWrJn6ut1u59Zbb80nPvGJ0lEAANBL\n3jS6L7/88iMe//Vf//WrvgcAALy+ab97yU85ww0AAEfnqKMbAAA4Om96vGRsbOyIx4cPH87Xvva1\ntNvtqe/5GHgAAHh9bxrdN9100xGP+/v7c+ONN049bjQaefzxx2d+GQAA9Ig3je4nnniiEzsAAKBn\nOdMNAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAA\nUEx0AwBAMdENAADF5nR7ADUmfzCR7z/7eLdnAMe41uEfJEmac07o8hLgWDf5g4kkb+v2jOOW6O5B\np522tNsTgOPErl3PJknecZr/IQXezNs0xi+h0W63290eMZPGxw+l1eqpfxJAmU2bbk2SrFt3S5eX\nABw/ms1GBgYWHN1zirYAAAA/IboBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGIdi+5NmzZlxYoVOeOMM/LNb37zNa+5\n6667MjQ0lOHh4QwPD2fDhg2dmgcAAGXmdOqFLrzwwnzwgx/M7//+77/hdatXr866des6tAoAAOp1\nLLrf8573dOqlAADgmNKx6J6uhx56KKOjo1myZEk+9rGP5d3vfvdRPX9gYEHRMoDeM3duX5JkyZK3\ndnkJQG87pqL7yiuvzLXXXpu5c+dm69atue666/Lwww9n0aJF0/4d4+OH0mq1C1cC9I4f/WgySfLC\nC//b5SUAx49ms3HUN3qPqXcvWbJkSebOnZskWbZsWQYHB7Nz584urwIAgF/OMRXd+/fvn/r6mWee\nye7du/POd76zi4sAAOCX17HjJbfddlseffTRvPjii7n66qvT39+fhx56KNdcc03Wrl2bs88+O3fe\neWe2b9+eZrOZuXPn5o477siSJUs6NREAAEo02u12Tx2AdqYbYPo2bbo1SbJu3S1dXgJw/Djuz3QD\nAEAvEt0AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxRrtdrvd7REzaXz8UFqtnvonwYzYuvWpjI4+2e0ZHGN27Xo2\nSfKOdyzt8hKONcuXn59ly87r9gw4JjWbjQwMLDiq58wp2gLAcWDhwoXdngAwK7jTDQAAR+EXudPt\nTDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QCz\n2MTEgdx++8YcPDjR7SkAPU10A8xiIyNbsnPnjjz44APdngLQ00Q3wCw1MXEgo6NPpt1uZ3T0KXe7\nAQqJboBZamRkS1qtdpKk1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrm\nJEn6+uZkaGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8\nLFzY3+1JAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVb\nAACAnxDdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVEN8AsNjFxILffvjEHD050ewpATxPdALPYyMiW7Ny5\nIw8++EC3pwD0NNENMEtNTBzI6OiTabfbGR19yt1ugEKiG2CWGhnZklarnSRptVrudgMUEt0As9TY\n2NZMTh5OkkxOHs7Y2NYuLwLoXaIbYJYaGlqWvr45SZK+vjkZGlrW5UUAvUt0A8xSq1ZdnmazkSRp\nNpu57LL3dXkRQO8S3QCzVH//oixffn4ajUaWLz8vCxf2d3sSQM+a0+0BAHTPqlWXZ/fu59zlBijW\naLfb7W6PmEnj44em/hofAABmWrPZyMDAgqN7TtEWAADgJ0Q3AAAUE90AAFBMdAMAQDHRDQAAxToS\n3Zs2bcqKFStyxhln5Jvf/OZrXjM5OZkNGzbkoosuysUXX5z777+/E9MAAKBcR6L7wgsvzOc///mc\ncsopr3vNyMhIdu3alUcffTT33Xdf7rrrrjz33HOdmAcAAKU6Et3vec97Mjg4+IbXPPzww1mzZk2a\nzWYWL16ciy66KF/5ylc6MQ8AAEodM59IuXfv3px88slTjwcHB7Nv376j/j1H+0blAABQ7ZiJ7pni\nEykBAKh0XH8i5eDgYPbs2TP1eO/evTnppJO6uAgAAGbGMRPdl1xySe6///60Wq289NJLeeyxx7Jy\n5cpuzwIAgF9aR6L7tttuy3nnnZd9+/bl6quvzu/8zu8kSa655pps27YtSTI8PJxTTz01733ve/N7\nv/d7uf7663Paaad1Yh4AAJRqtNvtnjoA7Uw3AACVjusz3QAA0KtENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAA\nFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0wi01MHMjtt2/MwYMT3Z4C0NNEN8AsNjKy\nJTt37siDDz7Q7SkAPU10A8xSExMHMjr6ZNrtdkZHn3K3G6CQ6AaYpUZGtqTVaidJWq2Wu90AhUQ3\nwCw1NrY1k5OHkySTk4czNra1y4sAelfHovvb3/52rrjiiqxcuTJXXHFFvvOd77zqmrvuuitDQ0MZ\nHh7O8PBwNmzY0Kl5ALPO0NCy9PXNSZL09c3J0NCyLi8C6F1zOvVCf/VXf5Wrrroqw8PD+dKXvpRP\nfOITuffee1913erVq7Nu3bpOzQKYtVatujyjo09mcjJpNpu57LL3dXsSQM/qyJ3u8fHxfOMb38il\nl16aJLn00kvzjW98Iy+99FInXh6A19DfvyjLl5+fRqOR5cvPy8KF/d2eBNCzOnKne+/evTnxxBPT\n19eXJOnr68vb3/727N27N4sXLz7i2oceeiijo6NZsmRJPvaxj+Xd7373Ub3WwMCCGdsN0Os+9KEP\n5Pnn9+bqqz+YRYve2u05AD2rY8dLpuPKK6/Mtddem7lz52br1q257rrr8vDDD2fRokXT/h3j44em\n/hofgDczNx//+E05fDh54YX/7fYYgONCs9k46hu9HTleMjg4mP3792dycjJJMjk5meeffz6Dg4NH\nXLdkyZLMnTs3SbJs2bIMDg5m586dnZgIAABlOhLdAwMDOfPMM/PlL385SfLlL385Z5555quOluzf\nv3/q62eeeSa7d+/OO9/5zk5MBACAMo12u92Rsxjf+ta3sn79+nzve9/Lr/7qr2bTpk1517velWuu\nuSZr167N2WefnXXr1mX79u1pNpuZO3du1q5dm/PPP/+oXsfxEgAAKv0ix0s6Ft2dIroBAKh0zJ7p\nBgCA2Ux0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADF\nRDcAABQT3QAAUEx0AwBAMdHAkSFcAAAAe0lEQVQNAADFRDcAABQT3QAAUEx0AwBAMdENAADF5nR7\nwExrNhvdngAAQA/7RXqz0W632wVbAACAn3C8BAAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGL/H0DD/OPJX0Z9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(height_outlier, orient=\"vertical\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MOKP49JMqTog" + }, + "source": [ + "Uma primeira abordagem bem simples é encontrar os pontos do _box plot_ acima.\n", + "\n", + "Tudo que estiver fora da faixa $[Q1 - 1.5 \\times \\text{IQR}, Q3 + 1.5 \\times \\text{IQR}]$ é considerado um ponto anômalo para aquele padrão:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "z_h0zaVDce0N", + "outputId": "86b9e772-6438-4820-87ba-dab83a4b1dd8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Faixa considerada \"normal\": [1.18575, 2.24175]\n" + ] + } + ], + "source": [ + "q1 = height_outlier.quantile(0.25)\n", + "q3 = height_outlier.quantile(0.75)\n", + "iqr = q3 - q1\n", + "\n", + "non_outlier_interval_iqr = [q1 - 1.5 * iqr, q3 + 1.5 * iqr]\n", + "\n", + "print(f\"Faixa considerada \\\"normal\\\": {non_outlier_interval_iqr}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wsuVvr8hq4Rc" + }, + "source": [ + "Agora podemos identificar quais pontos encontram-se fora desse intervalo, ou seja, podem ser considerados _outliers_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "hm78PWbhc9Dz", + "outputId": "ee3995ea-8a63-4c90-b3dd-57ba673887ee" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "29 0.516665\n", + "38 2.943781\n", + "48 1.058498\n", + "68 2.737088\n", + "91 2.272000\n", + "92 1.164000\n", + "Name: Height, dtype: float64" + ] + }, + "execution_count": 53, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_iqr = height_outlier[(height_outlier < non_outlier_interval_iqr[0]) | (height_outlier > non_outlier_interval_iqr[1])]\n", + "\n", + "outliers_iqr" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XcF70kmerGEq" + }, + "source": [ + "Se estivermos seguos de que esses pontos representam de fato _outliers_ e que sua remoção não traz prejuízo à nossa análise, então podemos removê-los:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "BVRJS9DNeb9z" + }, + "outputs": [], + "source": [ + "height_no_outlier_iqr = height_outlier.drop(index=outliers_iqr.index)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "urvTyUfHrVrJ" + }, + "source": [ + "Uma segunda abordagem é observar as estatísticas descritivas dos dados.\n", + "\n", + "Repare no histograma abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 726 + }, + "colab_type": "code", + "id": "bc_paOePfHJ5", + "outputId": "6840da1c-bae6-4465-8aa7-87f69928e182" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAALFCAYAAAABe2+3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3XmMpHd97/vPU/vaXb1U99RszNgH\nzATsw+U69uX4OueCcRjdeDSWUGJfjIxscGScyII/guePEcbEUjSjyFKwTEgskog/iJBBgLwQL/ci\nkTjnZEE+YLxjz3jGPb1WdXft+3P/6K72LN3T1dVV/Wzvl4TUU1Vd/bUfuvvj73yf788wTdMUAAAA\ngC3zWV0AAAAA4FSEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAA\nAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEcBqwvo1uJiSe22aXUZjjM2llA2\nW7S6DGyA62NvXB974/rYG9fH3rg+6/P5DI2MxLf0OY4J0+22SZjuEf/e7I3rY29cH3vj+tgb18fe\nuD79wZgHAAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAA\nANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA\n0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0KNNw/Ti4qLuuecefeYz\nn9GRI0f0p3/6p8rlcpe8rlKp6Ctf+YpuvvlmHT58WD//+c+7eg4AAABwqk3DtGEY+tKXvqRnn31W\nTz75pPbt26e//Mu/vOR13/3ud5VIJPT888/rO9/5jo4fP65SqbTpcwAAAIBTbRqmU6mUrr/++rU/\nf+xjH9O5c+cued3PfvYz3XbbbZKkAwcO6KMf/ah+8YtfbPocAAAA4FRbmplut9v6x3/8R33qU5+6\n5Llz585pz549a3/OZDKamZnZ9DkAAADAqQJbefGf//mfKxaL6fOf//yg6tnQ2Fhix7+mW6TTSatL\nwGVwfeyN62NvXB974/rYG9enP7oO0ydOnNC7776r73znO/L5Lm1o7969W1NTUxodHZUkTU9Pr42H\nXO65bmWzRbXb5pY+ByvfKPPzBavLwAa4PvbG9bE3ro+9cX3sjeuzPp/P2HIDt6sxj0ceeUS/+c1v\n9NhjjykUCq37msOHD+sHP/iBJOn06dN6+eWXdeONN276HAB4WbMtlWrNgfyv2bb6nw4A3G/TzvRb\nb72lv/mbv9GBAwd0++23S5L27t2rxx57TEePHtXf/u3fanJyUl/84hd17Ngx3XzzzfL5fPrmN7+p\nRGIl2V/uOQDwslqjqf94bXYg7/27hyYVCG9pmg8AsEWGaZqOmJ1gzKM3/DWOvXF97G0nrk+pNtgw\nHXdxmOb7x964PvbG9VnfwMY8AAAAAFyKMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA\n9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0\niDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSI\nMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9Igw\nDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDAN\nAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0A\nAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0ADtBottVum1aXAQC4SMDq\nAgAAl3duoaRf/OqcktGQPv27exUO+q0uCQCwis40ANiUaZp69XRO/+9/vqdw0K/FQk0v/MdZ1Rot\nq0sDAKwiTAOADTVbbb348oz+8/V57ZtM6Jb/dkD/18d3a7FQ1/P/cVa1OoEaAOyAMA0ANlOuNvTs\nv53VO+fy+tgHx/XfP7ZbwYBPe9MJffLju7VUqOv5/yRQA4AdEKYBwGb+xyuzWi7V9MmP79E1V47J\nMIy15/Z0AnVxNVAz8gEAliJMA4CNVGpNnVso6cP7R7RvIrHua/akE/rk/7ZHi4WaXn47u8MVAgDO\nR5gGABt5d6Yg05QO7h667Ov2pOPaP5nUb99bVrPV3qHqAAAXI0wDgI2cms4rlQhpJBne9LUf3p9S\nvdnWqen8DlQGAFgPYRoAbKJQrmt+qbppV7pjYiSqVCKkN84syTQ50AUArECYBgCbOD1dkCQdzHQX\npg3D0FX7U8rla1pYrg6yNADABgjTAGADpmnqnem8JkaiSkSDXX/eFbuHFfT79MaZpQFWBwDYCGEa\nAGxgsVDTcrGug5nklj4vGPDpij1DOj1dULXeHFB1AICNEKYBwAZOTRdkGNIHdm0tTEvSVftSapum\nfvve8gAqAwBcDmEaACxmmqZOTee1ezyuSCiw5c9PJcOaHI3qjTNLanMjIgDsKMI0AFhsbrGicrXZ\n9Y2H67lq/4hK1aam5kt9rAwAsBnCNABY7NR0XgG/seGJh93YP5FQNOznRkQA2GFd/X3iiRMn9Oyz\nz2pqakpPPvmkPvShD13ymq997Wt644031v78xhtv6LHHHtNNN92kRx99VN///vc1MTEhSfr4xz+u\nBx98sE//CADgXM1WW6dnCto3kVAw0Ht/w+cz9MG9Kf367awK5bqSsVAfqwQAbKSrMH3TTTfpzjvv\n1B133LHha06ePLn28euvv64vfOELuvHGG9ceu/XWW/XAAw9so1QAcJ/XTi+q3mhva8Sj48o9Q/r1\n21mdnS3qdw6O9qE6AMBmumqDXHvttcpkMl2/6Q9/+EMdOXJEoRCdEQC4nDfPLsnvM5QZj2/7vZKx\nkFKJkN5jbhoAdkzfZ6br9bqefPJJffazn73g8aefflpHjhzR3XffrZdeeqnfXxYAHOnMbEGjQ2H5\nfUZf3m9POqHZxbLqjVZf3g8AcHlb38G0iRdeeEG7d+/WoUOH1h67/fbbde+99yoYDOrFF1/Ufffd\np2eeeUYjIyNdv+/YWO835nhdOr31vbXYOVwfexvk9Wm12npvrqgPHxhVMhHpy3te9YFRvXIqp1yx\noVgsrPRorC/va1d8/9gb18feuD790fcw/aMf/eiSrnQ6nV77+IYbblAmk9Fbb72l6667ruv3zWaL\narfZn7pV6XRS8/MFq8vABrg+9jbo63NmtqB6s62hWFCFYrUv7xkL+xQK+vTbs4sql2uab7m3Q833\nj71xfeyN67M+n8/YcgO3r2MeMzMz+uUvf6kjR45c8Pjs7Ozax6+99pqmpqZ08ODBfn5pAHCc0zMr\nv8jGh/vTlZYkn2Fobzqh9+ZpQADATuiqM/3www/rueee08LCgu666y6lUik9/fTTuueee3T//ffr\n6quvliT9+Mc/1ic/+UkNDw9f8PmPPPKIXnnlFfl8PgWDQZ08efKCbjUAeNGp6byiYb+SsWBf33dP\nOq53zuV1ejqvq68Y6+t7AwAuZJimM86eZcyjN/w1jr1xfext0NfnG3//74qGA7r+dyb7+r71Rks/\n+P9+q09fu0//z00f7Ot72wnfP/bG9bE3rs/6LB/zAAB0p9FsaWq+pP2T/b8BKBT0a3Ikpt+8k+37\newMALkSYBgALnJktqtU29YFdg7mbfm86rulsWQtLlYG8PwBgBWEaACxwajovSQPpTEvS3omVv6b8\n1dt0pwFgkAjTAGCBU9N5DcdXTiwchKF4SOlUVL/67cJA3h8AsKLve6YBAJs7NV3QwcyQDKM/Jx+u\n56NXjuqf/9c55Qo1hUP+vr53OBhQgHYMABCmAWCnlatNzeTK+sRH+rvF42If3j+in/9ySk/966m+\nj5P87qFJBcL8CgEA+goAsMPenVmZlz6YGRro1zm4e0jBgE/vzZcG+nUAwMsI0wCww06tnnx4YMBh\n2u/3afd4XO/NFeWQIwUAwHEI0wCww05N55VORZSI9vfkw/XsGY+rWm9pqVgf+NcCAC8iTAPADjs9\nnR/4iEfHrtGYJGkmV96RrwcAXkOYBoAdtFyqK5uv6cCunQnTiVhQiWhQs4RpABgIwjQA7KDOYS1X\n7N6ZMC1Jk6NRzeTKzE0DwAAQpgFgB52ezsswpA8M6OTD9ewajaneaGupWNuxrwkAXkGYBoAddGq6\noN3j8b4fonI5k5256Wxlx74mAHgFYRoAdohpmjo1ndfBHZqX7khEV+amuQkRAPqPMA0AOyS7XFWx\n0tDBzM6NeHTsGo1pdpG5aQDoN8I0AOyQs/NFSer70d7d2DUWVb3RVq7A3DQA9BNhGgB2yEx2Zcwi\nMxbb8a/dmZtmRR4A9BdhGgB2yHSurKF4SLHI4E8+vFg8ElQyFlwL9ACA/iBMA8AOmcmV104ktMLK\n3HRFbeamAaBvCNMAsENmstaG6cnRmBrNthbzzE0DQL8QpgFgBxQrDRUrDUvmpTs6QZ4VeQDQP4Rp\nANgBnVllKzvTsUhAQzH2TQNAPxGmAWAHTOdKkqRdFnamO19/brGidpu5aQDoB8I0AOyAmWxZAb+h\n8eGIpXV05qZzhaqldQCAWxCmAWAHzOTKmhiJye+z9sfu+3PTFUvrAAC3IEwDwA6wei1eRzQc0HA8\npFn2TQNAXxCmAWDAmq225hYrlm7yON/kaFRzSxWZ7JsGgG0jTAPAgC0sV9Vqm7boTEtSOhVVo9nW\nUrFudSkA4HiEaQAYMDusxTvfxEhUkjS/xNw0AGwXYRoABswua/E6EtGgIiG/5hcJ0wCwXYRpABiw\nmWxZQ7Gg4pGg1aVIkgzDUDq1MjcNANgewjQADJhdNnmcLz0SVaHcUKXWtLoUAHA0wjQADNhMrmyb\nEY+OidTK4THMTQPA9hCmAWCAipWGCuWGdo3GrS7lAmNDEfkMaX6JkxABYDsI0wAwQDO51U0eNutM\n+/0+jQ5F6EwDwDYRpgFggDpr8TI2m5mWVlbkdXZgAwB6Q5gGgAGazpXk9xkaX51RtpN0Kqp221Qu\nz6gHAPSKMA0AAzSTLWtiJCq/z34/btMpDm8BgO2y3093AHARO67F64hFAkpEg5rj8BYA6BlhGgAG\npNVua26xosyYvTZ5nC+dWrkJ0TSZmwaAXhCmAWBAFpZWbu6za2daWjm8pVJrqVTh8BYA6AVhGgAG\nZNqma/HON7E6N83R4gDQG8I0AAxIZy2enTvTqWRYAb/BTYgA0CPCNAAMyEyupGQsqEQ0aHUpG/IZ\nhtKpKGEaAHpEmAaAAZnJ2neTx/nSqagW8zU1mm2rSwEAxyFMA8CA2Hkt3vnSqahMSQvLdKcBYKsI\n0wAwAOVqU/lywyFheuV0RvZNA8DWEaYBYAA6Xd7OKYN2Fgr6NZwIaWGZY8UBYKsI0wAwAPNLK8F0\nfLXra3fjQxFll6sc3gIAW0SYBoAB6HSmx4ft35mWpLHhiKr1lspVDm8BgK0gTAPAACwsVRUN+xWP\nBKwupSvjwysddEY9AGBrCNMAMADzyxWND0dlGIbVpXRlJBmWYUhZwjQAbAlhGgAGYGG5utbtdQK/\n36eRZFjZPGEaALaCMA0AfWaaphaWK47Y5HG+8WFuQgSArSJMA0Cf5csN1RttR3WmJWlsKKJ6s61C\nuWF1KQDgGIRpAOizhaXVTR4O60yPrYZ/5qYBoHuEaQDos/nOgS0O60ynEmH5fQYbPQBgCwjTANBn\nC50DWxyyY7rD5zM0OsRNiACwFYRpAOizheWKhmJBhUN+q0vZsrHhiHL5qtrchAgAXSFMA0CfzS9V\nHTcv3TE+HFGzZWq5WLe6FABwBMI0APTZwnLFcZs8OsaGuAkRALaCMA0AfdRum8rla47bMd0xFA8p\nGPBxEyIAdIkwDQB9lCtU1Wqbju1MG4ahsaEINyECQJcI0wDQR2ubPBzamZakseGwFvM1tdrchAgA\nmyFMA0AfOXXH9PnGhqNqm6YWCzWrSwEA2yNMA0AfLSxVZRjS6JBzw/Q4NyECQNcI0wDQRwvLFY0m\nwwr4nfvjNR4NKBz0E6YBoAvO/WkPADY0v1x13MmHFzMMQ2PDES2sjqwAADbWVZg+ceKEPvWpT+mq\nq67Sm2++ue5rHn30UX3iE5/Q0aNHdfToUT300ENrz1UqFX3lK1/RzTffrMOHD+vnP/95f6oHAJtZ\nWKpoPOXcEY+O8eGIlot1NVttq0sBAFsLdPOim266SXfeeafuuOOOy77u1ltv1QMPPHDJ49/97neV\nSCT0/PPP6/Tp07rjjjv03HPPKR6P91Y1ANhQo9nSUrGutMM709LKseKmpFy+qomRmNXlAIBtddWZ\nvvbaa5XJZHr+Ij/72c902223SZIOHDigj370o/rFL37R8/sBgB11DjpxQ2f6/ZMQ2egBAJfT15np\np59+WkeOHNHdd9+tl156ae3xc+fOac+ePWt/zmQympmZ6eeXBgDLrYVpF3Smo2G/IiE/6/EAYBNd\njXl04/bbb9e9996rYDCoF198Uffdd5+eeeYZjYyM9OX9x8YSfXkfL0qnk1aXgMvg+tjbVq5P9a0F\nSdKHrxzXWJeB2syVlUwMppMdDAa29d7jqaiWy/V13yMWCys9av34B98/9sb1sTeuT3/0LUyn0+m1\nj2+44QZlMhm99dZbuu6667R7925NTU1pdHRUkjQ9Pa3rr79+S++fzRbV5jSuLUunk5qfL1hdBjbA\n9bG3rV6fU+8tKeD3qVlraH6+2dXnlGtNFYqDWUHXaGzvvYdiQb1+pqTlfEU+n3HBc+VyTfOt1nZL\n3Ba+f+yN62NvXJ/1+XzGlhu4fRvzmJ2dXfv4tdde09TUlA4ePChJOnz4sH7wgx9Ikk6fPq2XX35Z\nN954Y7++NADYwsJSRWPDEfkMY/MXO8BIMqx221S+XLe6FACwra460w8//LCee+45LSws6K677lIq\nldLTTz+te+65R/fff7+uvvpqPfLII3rllVfk8/kUDAZ18uTJtW71F7/4RR07dkw333yzfD6fvvnN\nbyqRYGwDgLvML1UdfYz4xUaSYUnSYr6mVCJscTUAYE9dhenjx4/r+PHjlzz++OOPr3184sSJDT8/\nFovpW9/6Vg/lAYBzLCxXdHD3kNVl9M1wIiyfIeUKNR20uhgAsClOQASAPihXmypVm67qTPt9hoYT\nYTZ6AMBlEKYBoA86R2+Pp5y/Fu98I8mwFguDuUESANyAMA0AfTC/1Nkx7Z7OtLQSpiu1lqr17raT\nAIDXEKYBoA86nem0CzvTkhj1AIANEKYBoA8WlqqKhv2KR/q2vt8WRofe3+gBALgUYRoA+mB+uaLx\n4agMl+yY7oiEAoqG/crRmQaAdRGmAaAPsstV181Ld4wkI4x5AMAGCNMAsE2maWohX9XYkFvDdFjL\nxZpabdPqUgDAdgjTALBNlVpTtXpLY67tTIfVNqV8ie40AFyMMA0A25RdvTnPrZ3p0dWNHjluQgSA\nS7jrtnMAGIBmW6o1Nt6zPLVQkiRFowGValvbx+yEyYmheEg+n8HcNACsgzANAJuoNZr6j9dmN3z+\n9XcXJUlnZgqaX6xs6b3/64fS26ptJ/h8hlKJEGEaANbBmAcAbFOp2pTPMBQJ+a0uZWBWjhWvyTQd\n0EoHgB1EmAaAbSpVGopHA67bMX2+0WRE1XpLlVrL6lIAwFYI0wCwTaVqQ/FI0OoyBopjxQFgfYRp\nANimUrWpeNTdt6C8H6arFlcCAPZCmAaAbWi3TVWqTdd3psMhv2KRAJ1pALgIYRoAtqFcbcqUXN+Z\nlt6/CREA8D7CNABsQ6nakCTXd6allcNblkt1tVptq0sBANsgTAPANngpTI8kwzJNaalUt7oUALAN\nwjQAbEOpsnLioRfGPFKJlZsQl4uEaQDoIEwDwDaUqg2Fg34F/O7/cZqMh2QY0nKRuWkA6HD/T38A\nGKBipamEB7rSkuT3GUpGg1pmzAMA1hCmAWAbStWG4lH3z0t3DCfCjHkAwHkI0wDQI9M0V44S98DN\nhx3DiZDy5bqabPQAAEmEaQDoWb3ZVrNlKh7xxpiHJA3HQzJNaX6pYnUpAGALhGkA6FGpsroWz0Nj\nHp2NHrO5ssWVAIA9EKYBoEel6upaPA91pofiIUnSDGEaACQRpgGgZ17sTAcDPsUjAc1kCdMAIBGm\nAaBnpWpDPp+hSMhvdSk7ajgRZswDAFYRpgGgR6VKU/FIQIZhWF3KjhqOhzSbq6jdNq0uBQAsR5gG\ngB6Vqt5ai9cxnAip0WprIV+1uhQAsBxhGgB6VKo0FffI6YfnSyVWbkKcXihZXAkAWI8wDQA9aLdN\nlWtNb3am4yvr8aa5CREACNMA0ItyZy2eBzvT4ZBfyVhQ5+hMAwBhGgB6UayursXzYGdaknaNxjSd\nJUwDAGEaAHrQ2TGd8NCO6fNNjsZ0LluWabLRA4C3EaYBoAed0w9jHjr98Hy7xmKq1JpaKtatLgUA\nLEWYBoAelCoNRUJ+Bfze/DG6azQmSYx6APA8b/4WAIBtKlVXDmzxqvfDNBs9AHgbYRoAelCqNhT3\n6Ly0JA3FQ4qGA2z0AOB5hGkA2CLTNFWqePP0ww7DMLR7jI0eAECYBoAtqjfaarZMT495SFJmLK5z\njHkA8DjCNABsUamzY9rDYx6StHs8rnypruLqmkAA8CLCNABsUcnDpx+eLzPGRg8AIEwDwBZ1Dmzx\n8sy0JGXG45LY6AHA2wjTALBFpWpDPp+hSMhvdSmWGh+KKBTwsdEDgKcRpgFgi0qVlR3ThmFYXYql\nfD5Du0ZjOseYBwAPI0wDwBaVqt5ei3e+zHhc0wuMeQDwLsI0AGxRqdpUzONr8ToyYzFl81XVGi2r\nSwEASxCmAWAL2m1TFY8fJX6+yZGVjR7zixWLKwEAaxCmAWALKrWmTLHJo2NyNCpJml1k1AOANxGm\nAWALOjumYx7fMd0xkVrpTM/RmQbgUYRpANiCtdMP6UxLkmKRgJKxIJ1pAJ5FmAaALSh3Tj9kZnrN\n5EhMszk60wC8iTANAFtQqjYU9PsUDPDjs2NyJEpnGoBn8dsAALagVGkqFuXAlvNNjMa0VKyrVmc9\nHgDvIUwDwBaUqw1GPC4yObKy0WNuiVEPAN5DmAaALVg5sIWbD8/X2TU9m2PUA4D3EKYBoEutdlvV\neovO9EUmRtg1DcC7CNMA0KX3N3nQmT5fNBzQUDykWXZNA/AgwjQAdKlUWT2whc70JSZHoppjzAOA\nBxGmAaBLHNiyscmRmGa5ARGABxGmAaBLa2MeHCV+iYmRqJaLdVXrTatLAYAdRZgGgC6Vqg2Fg34F\n/PzovNjk6MpGjznmpgF4DL8RAKBLK2vx6EqvZ3JtowdhGoC3EKYBoEulCge2bKSzHm+O9XgAPIYw\nDQBdKlebike5+XA9kVBAw4mQZnN0pgF4C2EaALrQaLZVb7YZ87iMyVSUg1sAeE5XvxVOnDihZ599\nVlNTU3ryySf1oQ996JLXPPbYY3rmmWfk8/kUDAb11a9+VTfeeKMk6dixY/rXf/1XjYyMSJIOHz6s\nL3/5y338xwCAwWIt3uYmRmP69dtZq8sAgB3VVZi+6aabdOedd+qOO+7Y8DXXXHON7r77bkWjUb3+\n+uv6/Oc/r3/5l39RJBKRJP3xH/+xPv/5z/enagDYYe+ffkhneiOTI1HlS3VVak1Fw/x7AuANXY15\nXHvttcpkMpd9zY033qhodOUGlKuuukqmaWppaWn7FQKADXQ604x5bGxyhPV4ALxnIDPTP/nJT7R/\n/37t2rVr7bG///u/15EjR3Tffffp7bffHsSXBYCBef8occY8NtLZNc3cNAAv6XuL5d///d/1V3/1\nV/q7v/u7tce++tWvKp1Oy+fz6Sc/+Ym+9KUv6YUXXpDf7+/6fcfGEv0u1TPS6aTVJeAyuD72lk4n\nZebKarRMxSIBpYaifX3/YDCgZCLS1/fcifeOxcJKr4bnjuTqv5tivbVj/7/m+8feuD72xvXpj76G\n6Zdeekl/9md/pm9/+9u64oor1h6fnJxc+/jWW2/VX/zFX2hmZkZ79uzp+r2z2aLabbOf5XpCOp3U\n/HzB6jKwAa6PvXWuT7nW1FKhqlg4oEKx2tev0Wg0+/6eO/He5XJN863WJY+nEiGdOru0I/+/5vvH\n3rg+9sb1WZ/PZ2y5gdu3MY9f//rX+upXv6pvfetb+shHPnLBc7Ozs2sf//M//7N8Pt8FARsA7I7T\nD7szORLjFEQAntLVb4aHH35Yzz33nBYWFnTXXXcplUrp6aef1j333KP7779fV199tR566CFVq1V9\n/etfX/u8kydP6qqrrtIDDzygbDYrwzCUSCT013/91woE+KUEwBlM01S52tCe8bjVpdje5GhU/+ut\nBavLAIAd01WiPX78uI4fP37J448//vjaxz/60Y82/Px/+Id/2HplAGAT5VpTzZbJWrwuTI7ElC83\nWI8HwDM4AREANrFYqEmSYhwlvqmJETZ6APAWwjQAbGJpNUzTmd7c5MjKRo/ZHHPTALyBMA0Am1gk\nTHct3QnTdKYBeARhGgA2sVioyTCkCDPAmwoH/RpJhjkFEYBnEKYBYBNLhZpi4YB8hmF1KY4wORKl\nMw3AMwjTALCJxUJNcW4+7NrESFTzS4M5LAYA7IYwDQCbWCzUOLBlC8aHo8qX6qrVLz0hEQDchjAN\nAJfRNk0tFWuKR+hMdyudWrkJcWGZuWkA7keYBoDLKJTqarU5sGUrOmGaUQ8AXkCYBoDLyHUObCFM\ndy2dikiS5pfoTANwP8I0AFxGLr/SXWXMo3uJaFCRkJ8wDcATCNMAcBm5/OqBLVE6090yDEPjw1HC\nNABPIEwDwGXkClUF/T6Fg36rS3GUdCqihWVmpgG4H2EaAC4jl68plQzL4MCWLUmnVjrTpmlaXQoA\nDBRhGgAuI5evaiQZtroMx0nJt8vAAAAgAElEQVSnoqo328qX6laXAgADRZgGgMvIFVY609ga1uMB\n8ArCNABsoNVqa6lYozPdA9bjAfAKwjQAbCCbr8o0pZEEYXqrxocJ0wC8gTANABtYWA2CjHlsXTDg\n10gyrHmOFAfgcoRpANhAJ0wz5tGb9HCEmWkArkeYBoANEKa3p7MeDwDcjDANABuYX6ooEvIrGub0\nw16kU1EtFWpqNFtWlwIAA0OYBoANLCxVNDYUsboMxxpPRWRKnIQIwNUI0wCwgfmlikaGGPHoFbum\nAXgBYRoANrCwVNFoks50rzpheoGNHgBcjDANAOtoNFtaLtY1Sme6Z8PxkIIBHzchAnA1wjQArCNX\nqEkSneltMAxjdaMHYx4A3IswDQDryOVXwzSd6W0ZH47QmQbgaoRpAFhHLr/STR1lm8e2dHZNm6Zp\ndSkAMBCEaQBYR2fMgwNbtiediqpab6lYaVhdCgAMBGEaANaxmK8qGQspHPRbXYqjpVMrnX12TQNw\nK8I0AKwjV6gpPRK1ugzHe3/XNHPTANyJMA0A68jmq2tBEL0bH17pTBOmAbgVYRoA1pHL1zROmN62\nSCigoViQMA3AtQjTAHCRSq2pSq1JmO4Tdk0DcDPCNABcpLPJgzDdH531eADgRoRpALjI4uqOaWam\n+2M8FVUuX1Oz1ba6FADoO8I0AFyEznR/pVMRtU1z7d8rALgJYRoALpLLV2VIGhvm9MN+SA+zHg+A\nexGmAeAiuXxNQ4mQAn5+RPYDu6YBuBm/KQDgIrlCVWNDdKX7ZSQZlt9nEKYBuBJhGgAuks3XNJoM\nW12Ga/h8hsaGI1pgPR4AFyJMA8B5TNPUYr6qUTrTfZUejmhhmc40APchTAPAeUrVpurNNp3pPhsb\njmphmc40APchTAPAeXKrO6bpTPdXOhVRodxQtd60uhQA6KuA1QUAgJ3k8iu7kEeG6ExfjuEzVKp1\nH4yT8ZAk6ex8SbvH45d9bTgYUIBWDwCHIEwDwHlyhdXOdJLO9OXUGi396s35rl/f2eTxP1+Z0b6J\nxGVf+7uHJhUI8+sJgDPw3/4AcJ5cvia/z9DwaicV/ZGIBiVJxUrD4koAoL8I0wBwnlyhqlQiLJ/P\nsLoUV4mE/Ar4DRXLhGkA7kKYBoDz5PI1jTEv3XeGYSgeDdKZBuA6hGkAOE+OHdMDkyBMA3AhwjQA\nrGqbphYLNTZ5DAhhGoAbEaYBYFW+VFerbbLJY0AS0aAazbZqjZbVpQBA3xCmAWBVZ8f0KJ3pgVjb\n6MFNiABchDANAKs6px+OMTM9EIkY6/EAuA9hGgBWcZT4YLFrGoAbEaYBYFU2X1M46Fc8wul7gxAO\n+hUM+AjTAFyFMA0Aq1bW4oVlGBzYMihs9ADgNoRpAFiVzVeZlx4wwjQAtyFMA8AqDmwZvEQ0qFKl\nIdM0rS4FAPqCMA0AkuqNlvLlBkeJD1giFlSzZapaZ9c0AHcgTAOApMVCZ8c0nelBSrLRA4DLEKYB\nQCvz0hJhetDihGkALkOYBgC9H6YZ8xgsTkEE4DaEaQDQ+0eJjyTpTA9SMOBTJOSnMw3ANQjTAKCV\nTR7D8ZCCAX4sDhrr8QC4Cb81AECsxdtJccI0ABchTAOAVo4SZ156Z3R2TbfZNQ3ABQjTADzPNE06\n0zsoGQ2qbUqVatPqUgBg2zYN0ydOnNCnPvUpXXXVVXrzzTfXfU2r1dJDDz2kT3/607r55pv1xBNP\ndPUcANhBsdJQvdnmKPEdkoixHg+AewQ2e8FNN92kO++8U3fccceGr3nyySd15swZPffcc1paWtKt\nt96qT3ziE9q7d+9lnwMAO+hs8qAzvTMS5+2anrS4FgDYrk0709dee60ymcxlX/PMM8/oD//wD+Xz\n+TQ6OqpPf/rT+qd/+qdNnwMAO1jbMT3MzPROiEdX+jh0pgG4QV9mpqenp7V79+61P2cyGc3MzGz6\nHADYAacf7iy/z6dYOMDBLQBcYdMxD7sYG0tYXYJjpdNJq0vAZXB9rFdptBUK+HTF/lEZhnHBc+l0\nUmaurGRiMEE7GAx48r2HE2FV6q113yMWCys9Guvqffj+sTeuj71xffqjL2E6k8no3LlzuuaaayRd\n2I2+3HNbkc0W1W6zRmmr0umk5ucLVpeBDXB97OG92YJGkmEtLBQveLxzfcq1pgrF6kC+dqPhzfeO\nhv2azZXXfY9yuab5VmvT9+D7x964PvbG9Vmfz2dsuYHblzGPw4cP64knnlC73VYul9MLL7ygz3zm\nM5s+BwB2wFq8nZeIBlWuNmmSAHC8TcP0ww8/rN/7vd/TzMyM7rrrLv3BH/yBJOmee+7Ryy+/LEk6\nevSo9u7dq9///d/XH/3RH+lP/uRPtG/fvk2fAwA7yOarrMXbYYloUKakUpW5aQDOtumYx/Hjx3X8\n+PFLHn/88cfXPvb7/XrooYfW/fzLPQcAVmu22soX6xrl9MMddf56vGQsZHE1ANA7TkAE4GmLhZpM\nic70DlsL02z0AOBwhGkAnpbrrMUbJkzvpFgkIMNg1zQA5yNMA/C0tQNb6EzvKJ/PUDwSJEwDcDzC\nNABPy3aOEk8yM73TElHCNADnI0wD8LRcvqpkLKhQ0G91KZ5DmAbgBoRpAJ6WZce0ZRKxoCq1lpqt\nttWlAEDPCNMAPC2XrzEvbZHORo8S3WkADkaYBuBZpmmudKaZl7ZEIrpy1AGjHgCcjDANwLPKtaZq\n9RZjHhZJRFcOaykQpgE4GGEagGdll1fX4rFj2hLRsF8+n8GYBwBHI0wD8KxcYXUtHkeJW8IwjJWN\nHpyCCMDBCNMAPCvHgS2WS0QDzEwDcDTCNADPyuar8vsMDcVDVpfiWYlokJlpAI5GmAbgWbl8TaND\nYfkMw+pSPCsRDareaKvebFldCgD0hDANwLOy+SojHhZLxFb+VoCbEAE4FWEagGflOP3Qcp1d0wVu\nQgTgUIRpAJ7Uare1WKixycNi75+C2LS4EgDoDWEagCct5msyTTZ5WC0c9CvgN9joAcCxCNMAPCm7\nuhZvfDhqcSXe1tk1zUYPAE5FmAbgSQucfmgbKwe31K0uAwB6QpgG4ElrR4kzM225RCyoUqUp0zSt\nLgUAtowwDcCTFvJVDcdDCgb8VpfieYloUI1WW7VG2+pSAGDLCNMAPCm7XGXEwyY6Gz24CRGAExGm\nAXhSNl/VOGHaFgjTAJwsYHUBANAPzbZUa3S3q7htmsrlq7rmyjGVaht/jpkrq1xrqs0o70AlYoRp\nAM5FmAbgCrVGU//x2mxXry1Xm2q2TOXL9ct+TjIRUaFY1X/9ULpfZWIdoYBfoaBPRU5BBOBAjHkA\n8JzSage0M14A6yWiQTrTAByJMA3AczqhLREhTNtFIhpc+48cAHASwjQAzylWV0JbnM60bXQ60+ya\nBuA0hGkAnlOqNBQO+hUM8CPQLhKxoFptU5Vay+pSAGBL+E0CwHOKlabiUe6/thPW4wFwKsI0AM8p\nVRrcfGgzhGkATkWYBuAppmmqVG0ozs2HtkKYBuBUhGkAnlJrtNRsmXSmbSbg9yka9hOmATgOYRqA\npxQrKyceMjNtP/FIkINbADgOYRqAp3Bgi30lYhzcAsB5CNMAPKUTptkxbT/JaFClakPtNrumATgH\nYRqApxQrDQX9PoXYMW07iWhQpiktFWtWlwIAXeO3CQBPKVZXdkwbhmF1KbhIIrbytwULy1WLKwGA\n7hGmAXgKO6btKxkNSZIWlioWVwIA3SNMA/CUYqXBvLRNxaIB+Qw60wCchTANwDPqjZYazTadaZvy\nGYbi0SCdaQCOQpgG4BmlKps87C4ZC9GZBuAohGkAntE5sCUR4cAWu0rGglpYrsg0WY8HwBkI0wA8\no8iOadtLxoKq1FoqVZtWlwIAXSFMA/CMUqUhv89QJOS3uhRsIBlb2egxt8jcNABnIEwD8IzOWjx2\nTNtXcvVvDeaWyhZXAgDdIUwD8IxiZeXAFthX5+CWeTrTAByCMA3AM0rVhuIR5qXtLOD3aTgeYswD\ngGMQpgF4QrPVVrXeYse0A4ynIppj1zQAhyBMA/AENnk4x/hwlDANwDEI0wA8odTZMc3MtO2lU1Et\nF+uqNVpWlwIAmyJMA/CE0mpnmjEP+xtPRSRJ83SnATgAYRqAJxQrDfkMKRqmM21348NRSWz0AOAM\nhGkAnlCsNhSLsGPaCTqdaeamATgBYRqAJ3QObIH9xSNBxcIBwjQARyBMA/CEYqWxdiAI7C89EmXM\nA4AjEKYBuF6z1Valxo5pJ5lIsR4PgDMQpgG4XmfHdJIw7RgTI1Fll6tqtdtWlwIAl0WYBuB6xfLq\nWjzGPBwjnYqq1TaVy9esLgUALoswDcD1iuyYdpyJ1Mp6PEY9ANgdYRqA6xXKDQX8hiIhv9WloEsT\nI+yaBuAMhGkArldcXYvHjmnnSCXDCvh9dKYB2B5hGoDrFdkx7Tg+w1A6FaEzDcD2CNMAXM00TRXL\n7Jh2ojTr8QA4AGEagKvVGi01Wm0loyGrS8EWdXZNm6ZpdSkAsCHCNABXW9vkQWfacdIjUdXqLRVW\nVxsCgB0RpgG4WieIMTPtPKzHA+AEhGkArsaOaediPR4AJwh086JTp07p2LFjWlpaUiqV0okTJ3Tg\nwIELXvO1r31Nb7zxxtqf33jjDT322GO66aab9Oijj+r73/++JiYmJEkf//jH9eCDD/bvnwIANlAs\nNxQJ+RUM0DtwmvHhqAzRmQZgb12F6QcffFCf+9zndPToUf30pz/V17/+dX3ve9+74DUnT55c+/j1\n11/XF77wBd14441rj91666164IEH+lQ2AHSHtXjOFQz4NDIU1hydaQA2tmmrJpvN6tVXX9Utt9wi\nSbrlllv06quvKpfLbfg5P/zhD3XkyBGFQtw9D8BaBdbiOdpEKqp5OtMAbGzTMD09Pa3JyUn5/SvH\n8Pr9fk1MTGh6enrd19frdT355JP67Gc/e8HjTz/9tI4cOaK7775bL730Uh9KB4DLa7dNlaoNJelM\nOxa7pgHYXVdjHlvxwgsvaPfu3Tp06NDaY7fffrvuvfdeBYNBvfjii7rvvvv0zDPPaGRkpOv3HRtL\n9LtUz0ink1aXgMvg+vSHmSsrmYhc8Fi+VJdpSuMjsUue61YyEVEwGOj58zfDe18qFgsrPRqTJF2x\nb0T//OtpxZMRxSKX/kcR3z/2xvWxN65Pf2wapjOZjGZnZ9VqteT3+9VqtTQ3N6dMJrPu63/0ox9d\n0pVOp9NrH99www3KZDJ66623dN1113VdaDZbVLvN4v6tSqeTmp8vWF0GNsD16Z9yralCsXrBY7PZ\nsiQp4NMlz3UjmYioUKyq0bj0vfuF975UuVzTfKslSUqGV/5W9DdvzulgZuiC1/H9Y29cH3vj+qzP\n5zO23MDddMxjbGxMhw4d0lNPPSVJeuqpp3To0CGNjo5e8tqZmRn98pe/1JEjRy54fHZ2du3j1157\nTVNTUzp48OCWCgWArSpU6pLE6YcOtmu1Qz2z+h9GAGA3XY15fOMb39CxY8f07W9/W0NDQzpx4oQk\n6Z577tH999+vq6++WpL04x//WJ/85Cc1PDx8wec/8sgjeuWVV+Tz+RQMBnXy5MkLutUAMAjFckOG\nIcUifZ9oww6ZGInKMKTpHGEagD119Rvmyiuv1BNPPHHJ448//vgFf/7yl7+87ud3wjcA7KRCpaF4\nJCifz7C6FPQo4PcpPRzVDGEagE1xigEA1yqyFs8Vdo3FGPMAYFuEaQCuVaywFs8Ndo3GNLdYVtvk\nJnQA9kOYBuBKjWZb1XqL0w9dYNdoTPVmW7n8YLaHAMB2EKYBuFKx0pAkxjxcYG2jB3PTAGyIMA3A\nlTphmjEP59s1xno8APZFmAbgSsUynWm3GI6HFAn56UwDsCXCNABXKlTqCvgNhYN+q0vBNhmGoV2j\nMc0SpgHYEGEagCsVyw0lYyEZBjum3WDXWIzONABbIkwDcKVipcEmDxfZNRpTNl9TrdGyuhQAuABh\nGoDrmKZJmHaZzkYPRj0A2A1hGoDrVOstNVsmNx+6COvxANgVYRqA67AWz30mCdMAbIowDcB1CqzF\nc51w0K+xoTBhGoDtEKYBuM7a6Yd0pl1l12iMg1sA2A5hGoDrFMp1RcN+Bfz8iHOTXaNxzeTKMk3T\n6lIAYA2/aQC4TmF1xzTcZddYTNV6S8ulutWlAMAawjQA1ymU6xoiTLvO2kYPRj0A2AhhGoCrNJpt\nVWotJePMS7sN6/EA2BFhGoCr5MsrIwB0pt1nZCisUMBHmAZgK4RpAK7SWYuXZC2e6/gMQ5OjMcI0\nAFshTANwlcLqzWncgOhOk6zHA2AzhGkArpJfXYsXDPDjzY12jcY0v1xRo9m2uhQAkESYBuAyhXKD\neWkXy4zGZJrS3FLF6lIAQBJhGoDL5Et1JeOEabfaNcZ6PAD2QpgG4Br1ZkvVeoubD13s/fV4JYsr\nAYAVhGkArlEorWzyYMzDvaLhgIbjITZ6ALANwjQA1yh0dkxzYIurZcZimmbMA4BNEKYBuEZ+dcd0\nIkpn2s32jCc0tVBSu21aXQoAEKYBuEehVFc0HGAtnsvtmYirVm9pbpHuNADr8RsHgGvky3UNcfOh\n6+1NJyRJ707nLa4EAAjTAFykUG6wFs8D9ozHJUmnZwjTAKxHmAbgCpVaU9V6i860B0TDAY0PR/Tu\ndMHqUgCAMA3AHeZXT8RLshbPE/amEzrNmAcAGyBMA3CFTpgeYszDE/ak45qaL6rRbFtdCgCPI0wD\ncIX3O9OMeXjB3nRC7bap6SwnIQKwFmEagCvML1YUCwcU8PNjzQv2plduQpyaJ0wDsBa/dQC4wvxS\nVUlOPvSMydGYAn5D780XrS4FgMcRpgG4wvxSRUPcfOgZAb9PeyeSeo/ONACLEaYBOF652lSx0mBe\n2mMOZIboTAOwHGEagOPNrh4rzSYPb/lAZkiLhZpK1YbVpQDwMMI0AMfrhGl2THvLgcyQJG5CBGAt\nwjQAx5tbZC2eF31g10qYZtQDgJUI0wAcbzZXUSoRYi2ex4ynIoqGA9yECMBS/OYB4Hhzi2WlR6JW\nl4EdZhiG9qTjdKYBWIowDcDxZhcrmkgRpr1obzqhqfmSTNO0uhQAHkWYBuBopWpDxUpD44RpT9qb\njqtSayqXr1ldCgCPIkwDcLTOzYdpwrQn7U0nJHETIgDrEKYBONpsbmUtHmHam/ak45II0wCsQ5gG\n4GjT2bIMgzDtVfFIUCPJMLumAViGMA3A0c5lS5pIRRUM8OPMq/amE3SmAViG3z4AHG06W1ZmLG51\nGbDQ3nRc09mymq221aUA8CDCNADHarbams2VlRmPWV0KLLQ3nVCrbWpmdX4eAHYSYRqAY80vVdRq\nm9pNZ9rTuAkRgJUI0wAc69zCSidy9zhh2ssyY3H5fYbem+MmRAA7jzANwLGmsyvhadcoYx5eFgz4\ntHs8rndn8laXAsCDCNMAHOtctqTRobCi4YDVpcBiBzNJnZ4pcKw4gB1HmAbgWNMLbPLAigOZIZWq\nTc0tVawuBYDHEKYBOFLbNDWdK3HzISRJB3cNSZJOTTPqAWBnEaYBOFJuuap6o81aPEha2egRDPh0\nerpgdSkAPIYwDcCRzmVXN3nQmYakgN+n/ZMJOtMAdhxhGoAjdTZ5sBYPHQd3Dend2YJabU5CBLBz\nCNMAHOncQknJWFCJaNDqUmATBzNDqjfaml7gJEQAO4cwDcCRprNs8sCFDmSSkrgJEcDOIkwDcBzT\nNDWdLTHigQtMjsYUDft1aoabEAHsHMI0AMfJl+oqVZvKjLHJA+/zGYYO7BqiMw1gRxGmATjO2iYP\nOtO4yIFMUu/NFdVochMigJ1BmAbgOGubPJiZxkUO7hpSq23q7FzR6lIAeARhGoDjnFsoKRLyK5UI\nWV0KbOZghpMQAewswjQAx5nOlrV7PC7DMKwuBTYzOhTWUCyo04RpADukqzB96tQp3XbbbfrMZz6j\n2267TadPn77kNY8++qg+8YlP6OjRozp69KgeeuihtecqlYq+8pWv6Oabb9bhw4f185//vG//AAC8\n51y2xM2HWJdhGDqQGWKjB4AdE+jmRQ8++KA+97nP6ejRo/rpT3+qr3/96/re9753yetuvfVWPfDA\nA5c8/t3vfleJRELPP/+8Tp8+rTvuuEPPPfec4nHmHQFsTbna0HKxzrw0NnQwM6SX386qUmsqGu7q\n1xwA9GzTznQ2m9Wrr76qW265RZJ0yy236NVXX1Uul+v6i/zsZz/TbbfdJkk6cOCAPvrRj+oXv/hF\njyUD8LLOJo8MmzywgYOZpExJZ2bpTgMYvE3D9PT0tCYnJ+X3+yVJfr9fExMTmp6evuS1Tz/9tI4c\nOaK7775bL7300trj586d0549e9b+nMlkNDMz04/6AXjM9EJnkwdjHljfgbWbEAnTAAavb3//dfvt\nt+vee+9VMBjUiy++qPvuu0/PPPOMRkZG+vL+Y2OJvryPF6XTSatLwGVwfbZmsdxQMODTh//LhPy+\n929ANHNlJRORvn+9ZCKiYDAwkPeWxHuvIxYLKz3a3X8srff9k5Y0MRLV9GKF7y+L8e/f3rg+/bFp\nmM5kMpqdnVWr1ZLf71er1dLc3JwymcwFr0un02sf33DDDcpkMnrrrbd03XXXaffu3ZqamtLo6Kik\nlW739ddfv6VCs9mi2m1zS5+DlW+U+Xm6M3bF9dm6d95b0q7RmHLZC/cIl2tNFYrVvn6tZCKiQrGq\nRqP/793Be1+qXK5pvtXa9HWX+/7ZP5HQ66ezfH9ZiJ9v9sb1WZ/PZ2y5gbvpmMfY2JgOHTqkp556\nSpL01FNP6dChQ2vBuGN2dnbt49dee01TU1M6ePCgJOnw4cP6wQ9+IEk6ffq0Xn75Zd14441bKhQA\npJUd02zywGYOZoY0v1RVsdKwuhQALtfVmMc3vvENHTt2TN/+9rc1NDSkEydOSJLuuece3X///br6\n6qv1yCOP6JVXXpHP51MwGNTJkyfXutVf/OIXdezYMd18883y+Xz65je/qUSCsQ0AW1NrtJRdrur/\nvCaz+YvhaQfOO7zl6ivGLK4GgJt1FaavvPJKPfHEE5c8/vjjj6993AnY64nFYvrWt77VQ3kA8L7p\nbEmmOEYcmzuYScpnGHrz7BJhGsBAcQIiAMc4O7syJ71vkr/ZwuVFQgEdyCT15tklq0sB4HKEaQCO\ncWauqHDIr3QqanUpcICr9qV0ajqvemPzmxkBoFeEaQCOcXa2oH3phHyGsfmL4Xkf2pdSs2Xq7XN5\nq0sB4GKEaQCOYJqmzs4XGfFA1z64NyXDkN44s2h1KQBcjDANwBEWlquq1FraP0GYRndikYD2TzA3\nDWCwCNMAHOHM6s2H+yc5sQvdu2p/Sm+fy6vRbFtdCgCXIkwDcISzcwUZhrRnnLV46N5V+1JqNNs6\nNc3cNIDBIEwDcIQzs0XtGo0pFPRbXQoc5IP7UjLE3DSAwSFMA3CEs3MFRjywZYloUHvSCb3B3DSA\nAenqBEQAsFKp2lA2X9OnuPnQEwyfoVKtuenrzFxZ5S5ed+WeIf2P38woX64rFgkpQBsJQB8RpgHY\nHicfekut0dKv3pzf9HXJRESFYrWr96w323r2387o//5vBxQI86sPQP/w3+cAbO/M3GqYnmDMA1s3\nObpyYubMYtniSgC4EWEagO2dnS1oOB7ScDxkdSlwoEgooOF4SLO5itWlAHAhwjQA2zszx8mH2J7J\n0ajmFytqtU2rSwHgMoRpALbWbLV1bqGk/Yx4YBsmR2NqtNqaWh0ZAoB+IUwDsLVzCyW12qb205nG\nNkyOxCRJb00tW1wJALchTAOwtbNrNx8SptG7WCSgZCyo377HvmkA/UWYBmBrZ2aLCgV9a51FoFeT\nozG9PZVXm7lpAH1EmAZga2fnCtqbTsjnM6wuBQ63azSmSq2pd2cLVpcCwEUI0wBsyzRNnZktaj8j\nHuiD3eMxGZJefjtrdSkAXIQwDcC2svmqyrWm9k2yyQPbFwkF9IFdSf36HcI0gP4hTAOwrc7Nh3Sm\n0S8fOTiqU+fyypfrVpcCwCUI0wBs6+xsUYakvWnCNPrjdw6OypT0G7rTAPqEMA3Ats7MFTUxGlM4\n5Le6FLjE3omEhuIh/Zq5aQB9QpgGYFtnZguMeKCvfIaha64Y02/eyanVbltdDgAXIEwDsKVCua6F\n5ao+sIubD9Ff11w5pnKtqben8laXAsAFCNMAbOmdcytB58rdQxZXArf5nQOj8vsMvczcNIA+IEwD\nsKW3zy3LZxg6sIswjf6KRQL64N5h/eq3hGkA20eYBmBLb0/ltW8iwc2HGIirrxzTe/NF5fJVq0sB\n4HCEaQC2026bemc6ryv30JXGYFxzxZgkMeoBYNsI0wBsZ2qhpFq9pSt3D1tdClxq93hcY0MRVuQB\n2DbCNADbefvcsiTRmcbAGIaha64c06unF9VosiIPQO8I0wBs5+2pZSWiQaVTUatLgYtdfeWYao2W\n3nxvyepSADgYYRqA7bxzLq//smdYhmFYXQpc7NAHRhTw+/RrtnoA2AbCNABbKVYams6WdQX7pTFg\n4aBfH/5ASr96e0GmaVpdDgCHIkwDsJW1w1r2cPMhBu/jH0xrbrGis3NFq0sB4FCEaQC28s65ZRmG\ndDDDMeIYvP/9qrR8hqF/f23O6lIAOFTA6gIA4HxvTy1rbzqhSIgfT+g/w2eoVGuu/dnn9+mq/Sn9\n26uzOvx/7N/WnH44GFCAFhXgOfy2AmAbbXPlsJbrf2eX1aXApWqNln715vwFj40kw3rt3UX90/98\nV+Pb2CDzu4cmFQjzaxXwGv4bGoBtTC+UVKm1/v/27jy8rfLOF/j3HC2WZcm2bGvzviR2nHjJSlYT\nkgJJh9CkTANtgU6fXsIDDM2dtIWklxamQO9t2nk6bZkULrRl67SluYUUkkBCCBAnIZB98ZbE+yLL\ni+RV1n7uHw4G1wlxHIUHxxMAACAASURBVNtHsr6ff2xZR6+/1vHR+enV+74HOZx8SJMo3ayDKAio\na+2VOwoRhSEW00QUMqo5+ZBkoFYpkGyMQZ2tl6t6ENE1YzFNRCGjurkbMRolzAZerIUmV6ZFD5fH\njzbngNxRiCjMsJgmopBR3dKDHF6shWSQZtJBIXKoBxFdOxbTRBQSXG4fWjr6OV6aZKFSikg16VDf\n2otgkEM9iGj0WEwTUUiosQ2Ol87meGmSSaZFD7c3ALvTJXcUIgojLKaJKCTUNPdAAJBtZc80ySPF\nGAOlQkCtjUM9iGj0WEwTUUi42NyNZGMMorlOL8lEqRCRZtKhwd6LAId6ENEosZgmItn5A0Gcb+pC\nXlq83FEowmVZY+H1BWHr7Jc7ChGFCRbTRCS76uZueH1BzMxMkDsKRThrUgzUShG1l9Y8JyK6GhbT\nRCS78jonBAGYkc6eaZKXQhSQaY1Fg70PHm9A7jhEFAZYTBOR7MrrHciyxkKrUckdhQi5aXEIBCVU\nt3TLHYWIwgCLaSKSlcvtR21LL2ZmGuSOQgQASIjVwBivwfnGbl5enIiuisU0EcmqqtGJoCRhZgbH\nS1PoyE2LR0+/F3YHLy9ORF+MxTQRyaqizgm1UkQOL9ZCISTDoodaJaKqsUvuKEQU4lhME5Gsyuud\nyE2Lh0rJlyMKHUqFiJzkODTYezHg8csdh4hCGM9eRCQbZ68HLR39XBKPQlJuWjwkCbjYxImIRHRl\nLKaJSDYV9Q4A4ORDCklxOjUsCVpcaOpGkBMRiegKWEwTkWzK65zQRauQatLJHYXosnLT4tA34IOt\ng1dEJKLLYzFNRLKQJAnldQ7MzDRAFAS54xBdVppZD41agapGDvUgostjMU1EsrB1utDV5+V4aQpp\nClHAtNQ4NLf1oX/AJ3ccIgpBLKaJSBbldZfGS2dwvDSFtumpcZAAVDVwmTwiGonFNBHJorzOCVN8\nNJLio+WOQvSF9Fo1Mix6VDV0weMNyB2HiEIMi2kimnSBYBCVDU6u4kFhoygnEb5AEBX1TrmjEFGI\nYTFNRJOu1tYLtzfA8dIUNgz6KKSbdaiod8LrY+80EX2GxTQRTbqyWgcEADM4XprCSFFOInz+ICrZ\nO01En8Nimogm3cnz7chJiYMuWiV3FKJRS4jVINWkQ3m9E14/e6eJaJBS7gBEFDn8QaC5vRcNbX34\n6o3Z6Pf4x63tIC9QR5OgKCcRuz/qQ1V9FwpzEofdJ4jCuP5Pf16USgklu7+IQhKLaSKaNB6fH28d\nrgMACACOVtjHre3iXOO4tUV0JUlxGqQYY1Be58SMDANUn6twPb4ATp9vn5DfuyDfDGUUT9lEoYjv\nc4loUjW09iIxNgo6LYd4UHgqykmExxdAVSPXnSaiUfZM19bWYsuWLejq6kJ8fDy2bt2KzMzMYdts\n27YNu3fvhiiKUKlU2LRpE0pKSgAAW7ZsweHDh2EwDE42Wr16NR588MHx/UuIKOQ5ez3o6HZjzvQk\nuaMQjZkxPhrWRC3Kax2YkR4PpYL9UkSRbFTF9BNPPIFvfvObWLt2Lf7+97/j8ccfxyuvvDJsm6Ki\nInznO99BdHQ0Kisrcc899+DgwYPQaDQAgPvvvx/33HPP+P8FRBQ2Tl/sAABkWPQyJyG6PsXTEvHO\nx404V+PAbL45JIpoV3073dnZifLycqxZswYAsGbNGpSXl8PhcAzbrqSkBNHRg1cyy8vLgyRJ6Ori\nR2BE9JnTFzoQr1MjNkYtdxSi62IyaJFp0eNcrQO9Lq/ccYhIRlftmbbZbDCbzVAoFAAAhUIBk8kE\nm82GhITLX3Bhx44dSE9Ph8ViGfrZiy++iNdeew1paWn4/ve/j5ycnGsKmpiou6bt6TNGI3sBQ1mk\n7B9nrxvVzd2Yn2+GXqcZ9/ZVKuWEtKvXaSasbWDickdK29eaYTxz3zQvDf+9pxLHz3dgzdKsCX1O\ntNooGBO0E9L2RIqU17dwxf0zPsZ9avAnn3yCX//61/jDH/4w9LNNmzbBaDRCFEXs2LED9913H/bt\n2zdUoI9GZ2cfglz76poZjXq0t/fKHYOuIJL2zwcnmyEBsCREo7fPPe7t+3z+cW9Xr9Ogt889IW1/\nim2Pve1P989EtD1axdMScayyHeU1HchJiZ2w58Tl8qA9EF5rW0fS61s44v65PFEUrrkD96rDPKxW\nK+x2OwKXDuJAIIC2tjZYrdYR2548eRKPPPIItm3bhuzs7KGfm81miOLgr1q3bh1cLhdaW1uvKSgR\nhbfjVW0wxmsQr+MQD5o6ZqQbEK9T45OKNnh4mXGiiHTVYjoxMRH5+fnYuXMnAGDnzp3Iz88fMcTj\nzJkz2LRpE37zm99g1qxZw+6z2z9bS7a0tBSiKMJsNo9HfiIKA30DPlQ2dKF4mhGCIMgdh2jciKKA\nhbPMcLn92PdJo9xxiEgGoxrm8e///u/YsmULfvvb3yI2NhZbt24FAGzYsAEbN25EYWEhfvKTn8Dt\nduPxxx8fetzPf/5z5OXlYfPmzejs7IQgCNDpdHj22WehVHLxeaJIcepCBwJBCbOnJ6HN6ZI7DtG4\nMhu0yEmJxQcnm7FmSQbidVFyRyKiSTSqijYnJwfbt28f8fMXXnhh6Pu//e1vV3z8Sy+9dO3JiGjK\nOF7VhsTYKKSbdSymaUqam2tES0c/Pilvwy0LUvkJDFEE4UrzRDShBjx+lNU5MDfXxAKDpqzoKCVu\nW5KJVocLlQ1cFpYokrCYJqIJdepCB/wBCfPyjHJHIZpQiwosSDXG4HhlOzp7JmZVDyIKPSymiWhC\nfXiqGSZDNKalxskdhWhCCYKAJYUWRKkVKD3VAp8/KHckIpoELKaJaMI0d/TjfFM3lhcnQ+QQD4oA\nGrUSJUVW9Lh8+KTCfvUHEFHYYzFNRBPmw1PNUIgClhaOXJeeaKqyJGpRlJOI6uYe1LT0yB2HiCYY\ni2kimhBeXwAfnWvFvDwjYmN4oRaKLEU5iTAZonGkrBU9/V654xDRBGIxTUQT4lhVG/rdfiyfnSJ3\nFKJJJ4oCSoqsEEUBB063wB/g+GmiqYrFNBFNiA9OtcBsiMaM9Hi5oxDJIiZahaWFVjh6PDhSZock\nSXJHIqIJwGKaiMZdc3sfLjZ1Y/nsFK4tTREtzaRD8bRE1LT0cP1poimKxTQRjbsPT7VAqRCwtNAi\ndxQi2RXlJCLVpMOxyja0OngFUKKphsU0EY0rry+Aw+daMS/PBL2WEw+JBEHAsiIL9Fo1DpxqQd+A\nT+5IRDSOWEwT0bg6WtkGl8ePm2Ynyx2FKGSolQqsmJOMQEDChyebOSGRaAphMU1E4+qDU82wJGiR\nm8aJh0SfF6eLwrJiKzp7PPiYExKJpgwW00Q0bmptPahu7sHy2cmceEh0GWkm3eAFXTghkWjKYDFN\nRONmR2ktYjRK3FjMIR5EV1I8LRGpxhhOSCSaIlhME9G4uNjcjbM1nfjyogxERynljkMUsgYnJFqH\nJiT2c0IiUVhjMU1E4+KNAzXQa1X40txUuaMQhTy16rMJiR+c5BUSicIZi2kium5VDU5U1DvxT4sy\nEKVWyB2HKCzE6aKwtMiCzh43JyQShTEW00R0XSRJwhultYjTqbFiTorccYjCSrpZPzQh8Xxjt9xx\niGgMWEwT0XUpr3fifGMX1izOhFrFXmmia1U8LRHJSVocq2xDV59H7jhEdI1YTBPRmEmShB2lNTDo\no3BjsVXuOERhSRAELCmwQqkQUXrahkCQ46eJwgmLaSIas7M1DlQ39+D2JZlQKdkrTTRWWo0SSwot\ncPZ6cPJ8h9xxiOgasJgmojEJBiW8caAGSXEaLCtirzTR9Uoz6ZCbFofyOidaOvrljkNEo8RimojG\nZM/RBtTbe3HH8mwoFXwpIRoP82eYEBujxqGzrXB7A3LHIaJR4BmQiK5Zc0c/3jhQi7m5RizMN8sd\nh2jKUCpElBRZ4fH6caSslcvlEYUBFtNEdE0CwSD+sKscGrUC967KgyAIckcimlIS4zSYnWtEg70P\ntbYeueMQ0VWwmCaia/L2kQbU2npx76o8xMWo5Y5DNCXNzDQgKU6DoxXtcHv9cschoi/AYpqIRq2p\nrQ9/P1iLBTNMWDDDJHccoilLFAQsLrDA5w/gWGW73HGI6AuwmCaiUfEHgvjdrnLEaJS459ZcueMQ\nTXkGfRRmZSeipqUHFfUOueMQ0RWwmCaiUXnzUC0a7H24d9UM6LUc3kE0GYqyExAbo8Zf9l2Ah6t7\nEIUkFtNEdFUfnmrGzsP1WFpowbw8o9xxiCKGQiFi8SwzHD0e7DhYI3ccIroMFtNE9IWOVbbhlT1V\nKMxOxL+sniF3HKKIY07QYmmhBXuPNqKulat7EIUaFtNEdEVldQ48/1YZcpLj8NBXC3hxFiKZfGVZ\nNmJj1Hjp7UoEgkG54xDR5/DMSESXVdPSg//621lYErT4n+uLEKVSyB2JKGJpNUrcfXMuGux92H+8\nWe44RPQ5LKaJaISmtj78avtp6LUqfO+u2YjRqOSORBTx5uUZUZidiDdKa+Ds9cgdh4guYTFNRMMc\nKWvF068eg0Ih4Adfn414XZTckYgIgCAIuPvWXASCEl7bf0HuOER0CYtpIgIA+PxBvLqnCs+/VY5M\nsx6P/8sCmAxauWMR0eeY4qNx2+IMfFLRhrJarj1NFApYTBMROroG8H/+eBzvn2zG6oXpeOSbc2DQ\ns0eaKBR9eWEGzIZovLq3Cj4/154mkhuLaaIIFggG8cGpZvzkpaOwOwfw8B2FuHPFNChEvjQQhSqV\nUsQ9t+ahzTmAtz9ukDsOUcRTyh2AiCafJEk4U92Jv75/EbZOF6anxuF/3JbPYR1EYWJWVgJuyDdh\n5+F6LJpp5rFLJCMW00QRpr61F399/yIq6p0wG6Lx8B2FmDM9CYIgyB2NiK7BXSun40x1J/747nls\nWl/MY5hIJiymiSJAUJJwrqYT7x5tRFmdE7poFe6+JRfLZyfzQixEYcqgj8JXS7Lx5/cu4HhVO+bP\nMMkdiSgisZgmmsI83gAOn7Ph3WNNaHW4EK9T444bs7Fybiq0Gh7+ROFu5bwUHDprw5/fu4BZWQmI\njuJxTTTZeNQRTUGOHjfeO96ED0+1wOXxI8uqx/1fmYn5eSb2RBNNIQpRxL2r8vDTV4/jzUO1uGvl\ndLkjEUUcFtMRxB8EPD7/hLQdpVJCyRptmNE835LDBZfn2veJSqmEzz/ycbW2HnxwohmnLrRDAjB7\nWhJumpuCLGssBEGAxx+Exx8cU9vjIShNSLNEES0nJQ43Fifj3aNNWFpgRapJJ3eksDeR58uJfI3l\nuVgeLKYjiMfnx9EK+4S0vSDfDCU/XhxmNM+3XqdBb5/7mtsuzjXi9Pl2AIMrc7R0uHCmuhPtXQNQ\nKUXMyDBgRoYBumgVOrvd6Owe/e/4fNvjrTjXOCHtEkW6r92UgxPn2/HK3ipsuXsuRE5GvC4Teb6c\nyNdYnovlwWecKExJkoSm9n6cqe5EZ7cbWo0SC/JNmJYSBxW7Jogiii5ahfUrcvDi7kocOmtDSVGy\n3JGIIgaLaaIwVFHnwM7D9XD2eqCLVmHxLDOyU+KgENkbRRSplhZaUXrGhu3vV2POdCN00Sq5IxFF\nBHZfEYWRnn4v3jvehN+9WQ5/IIilhRasK8nC9LR4FtJEEU4UBHzr1jy43H78df9FueMQRQz2TBOF\nAa8/gDMXO1FZ74RCFHH7skzEatUsoIlomFSTDl9elI5dH9VjzvQkzOE8BaIJx55pohBX39qLHQdq\nUV7nRHZyHNbdmIWb5qaykCaiy1q7LAvpZh1eeqcSPf1eueMQTXkspolClNvrx4FTLfjwVAtiNEr8\n0+IMLCm08KIMRPSFlAoRG9bMxIAngJferoQkcU1KoonEYpooBDXYe/HmwTo02HsxZ3oSvrwoA0lx\nGrljEVGYSDHq8LXl2Th1sQOlZ2xyxyGa0tjFRRRCvL4APi63o9bWi4TYKNyyIA0GfZTcsYgoDN28\nIA2nLnbgz+9dwIwMA0zx0XJHIpqS2DNNFCLanC68dagOda29KJ6WiH9alMFCmojGTBQE/I/bZkIU\ngN/vLEeQlyAlmhAspolkFgxKOH2xA3s+boQgCPjywnQUT0uCyAmGRHSdEuM0uPuWXFxo6saOg7Vy\nxwk7kiTBHwhiwONHT78X3X1eeHwBWcehB4JBeLwBDHj86Bvwoaffi55+L/yBoGyZIh2HeUxhLrcf\nrZ39aGrtxYDHD0efBw32XsTFREGvVbFYCwG9Li/2Hm1Em3MA2cmxuGGmCWqlQu5YRDSFLJ5lQWVD\nF3YeroM1QYvFBRa5I4Ukjy+AmuZuXGjqRkWDE3W2Xnj9AVyubhYFAZooBaLVCsREqxCrVSM2Ro04\n3eDXKNX1vY77A0H0ugYL5V6Xd/D7S19dbv8VH/f2kQZYErQwJ2iRkhSD2dOSkMj5NhOOxfQU4vMH\ncL6pG+dqOnGuxoHmjv4rbiuKAuJi1DDoo2CMj0amRY8oNYu4yVTf2osjZXYEgxKWFVmQnRwndyQi\nmoIEQcC3VuWho2sAL75dgcQ4DXLT4uWOFRIGPH4cKbfj8Dkb6my9CAQlCACSjTFIN+sQpVZApRSh\nVopQXerocHv9GPAE4Pb64fYE0N3nRVNbHz4/ikatEqGLViFGoxr8Gq2EUiFCIQoIAmiy9wIYLODd\nngDc3sH2XG7/YMHsGV4wa9QK6LUqWBK00GtVUCsVEEUBoihAIQKSBPQP+KBSKtDZ48bRCjs+cPvx\n3++eR5ZVj/l5JszLM8Jk0E7SMxtZWEyHuaAkoazWgfdPNKO83gGvLwilQkBuWjwWzTIj1RKHgM8P\nbZQSEAWcq+lAd58XXX0edPV60epwoaalB0cr7Eg16ZCdHIsUo45rGE8gnz+Io5VtuNjUDZNBi6WF\nZui1arljEdEUplSIeOirhfjpq8fxX6+fxY++NS9iCytJklDX2osPTzXj4/I2eHwBpBp1WL0wHdNT\n4zEtJRaSIOBohX3UbQaDEvoGfOi+NOSib8A3OATD5YWtsx/+wGeV9qGzrSMer1QI0KiViI5SwJqo\nhT5GDb12sMdbr1VBPcqe7gX5ZsRcWj7V7nDhWFUbjle1Y/sH1dj+QTWyrLFYvTAd83gxn3HFYjpM\neXwBHD7Xin3HGmHrdCFOp0ZJYTIKshMwI90w1MtsNOrR3j74DrjfMzjsIylu+IxuR48bNS09qGnp\nQYO9D2qViOmp8ZiZaeCaxuPM0eNG6Wkbuvu9KMhOwLLZqXC5PHLHIqIIoItW4d++VoSnXzmGX20/\ng8e+NQ8xGpXcsSaNJEk4Xd2JNw/Woq61F2qViIX5ZiyfnYIsqx6C8FknUr/nykMpLkcUBcTGDA7x\nuNzv9fqC8AeDCAYlTEszoLy2E5CAKLUCGrUCSsX4T2EzJ2hx2+JM3LY4Ex1dAzhW1Y4PTzXj2R3n\nYIqPxj9/aTqKMw2jLtTpylgphZm+AR/2Hm3A+yea0e/2I8Oix4bbZ2LBDNOYD8aEWA0SYjWYm2uE\nrdOFi83dKK91oLLeielpcSjISoA2gl5wJ0JQklBR58TJ8x2IUitwy4JUWBNj+AkAEU0qc4IWD99R\niP/4yyn89o1z+Lf1xVApp/ZaBJIkobzOiTdKa1DT0gNjvAb33JqLRTMt0GomvgwSBAFRagWiMFi0\nWhK1sHdeeRjmREiKj8bqhem4dUEaTl5ox+4jDXj2b2eg16pw64I0fGleKjRqloRjxWcuTLjcPuz5\npBHvHmuExxvA3FwjblmQhumpccPeTV8PURSQYoxBijEGPf1enK3pRFVDF843dGNaaiwKshOhi2ZR\nfa26+jw4fLYVHd1upJl0WFxg5osWEckmL92Ab395Bn6/qwL/+ddTePiOwinbYVLV4MQbpbU439iF\nhNgo/MvqPCwttE5IT3A4EEUB8/JMmJtrRFuvF396pxJ/+7AGe4824rZFGVgxN2VobDiNHs/oIW7A\n48e7xxqx55NGDHj8mJ9nxNplWUgx6ib098bGqLG00IqinESU1TpwsakbF5t6kJsWh8KcRA7/GIVA\nIIiz1Z04fbETKqWIkiIrMv/ho0QiIjksLbRCFAX8YVcF/vcfT2DT+uIptepDdUs3dhyoQVmdE3Ex\natx9Sy5uLE6e8r3woyUIAgpykrDpzmJUN3fjjdIa/GX/Rew52og1SzJRUhS5bzjGghVRiHJ7/Xjv\neBPe+bgB/W4/5kxPwtplWUg36yc1h16rxqJZFhRkJ+JMdSeqGrtwsbkbeekGFGQlcAWQK6hp6cFL\n71Siqa0PGRY9bsg38Q0IEYWUxbMsiNdF4b9eP4unXz2Gf/taMTIsk3uOGW/1rb3YUVqD09Wd0EWr\ncOeKaVgxN+W6l6qbynJS4vCDr89BZb0Tr5fW4NU9VXj7SD2+sjQLiwvMUIgsqq+GZ/cQ4/EFsP9E\nE94+0oC+AR+KchKxriQLmZZYWXPpolVYUmBBQVYCTl/sQFmtA+cbu5CXHo/8DIOs2UJJW9cAXv+w\nGp9UtEGvVWH57OSwPzkR0dSVn2HA/7pnLv5z+2n87E8n8ODaAhTlJMod65rVtPRg10d1OHmhAzEa\nJf55eTbHAV+jGRkG/DB9Ls7VOvD6gRr8YXcFdh2px7plWViQb4LIT1WviP9lIaJvwIcPTzXj3WNN\n6On3oiArAWtLspATYmsPx8aoUVKcjIJsD85UD65nXVHnRJtzALcvyURC7NT5mPBa9A348NahOuw/\n0QSFKOD2JZm4cU4yzlZ3yh2NiOgLpRh1eOze+fj19tP49fbTKCm24o7lOYgN8SU7JUlCZUMXdn1U\nh/I6J2I0SqxdloVb5qdNysTCqUgQBBRmJ6IgKwEnL3TgjdIa/N83y7DzozqsviEdN+SbOVTmMvjf\nJrOm9j7sO9aIj8rs8PmDmJlpwFfWFYT8gvoGfRSWz05Gd58X52o7UXrGhoNnbFg0c3CZoZyU2IgY\nG9zWNYAPTjTjw9MtcHv9WFZoxbqSbBj0Ude8tBIRkVwM+ihsuWcu3jxYh3ePNeJ4VTu+emM2bpqd\nEnJXy/X6Ajh+vh37jzehuqUHcTFq3LliGpbPTuZwunEiCALm5hoxe3oSjla04a3Ddfj9rgpsf/8i\nbpqTghVzUhCni5I7Zsjgf50M+t0+nDzfgY/KWlFR74RKKWLxLAtunp+K1AmeWDje4nSDExW/tXoG\nDpxswcGzNhw61wprohYlRclYUmC57Lqb4SwoSSivdeC94004U9156UUnCV9ZlhV2+4+I6FMatRJ3\nrpyGpUVW/PfeKvxx73kcON2CO1dMw4wMg6wf80uShHp7L0pP23Ck3I4Bjx/GeA3uvTUXy4qsXIFi\ngoiCgIUzzbgh34TyOifePdaINw/VYddH9ViQb8LCfDNmZSVE/GRFFtOT5NMC+lhVG8pqHQgEJSTG\navDPy7OxfHZK2C85lxCrwd235uKO5dk4WtmG0jMt+Ov7F/G3D6sxKyth8GOj7ASYw/SKW/5AEBca\nu3C6uhMnzrejo9uNWK0Ka5Zk4qY5KTDo+Q6diKaGlKQYPPKNOTha2YbX9l/Ef/zlFJLiNFhSYMGS\nAsukXTnRHwiiurkb52odOH2xA03t/VApRczPM6KkKBm56fEcxztJBEHArKwEzMpKgN3hwr5jTfio\nrBVHyuzQRikxZ3oS5s8wRWxhPapiura2Flu2bEFXVxfi4+OxdetWZGZmDtsmEAjg6aefRmlpKQRB\nwP3334/169df9b6pqrPbjQvNXZeWlOtGY3sfJAlIjNXglvlpmD/DNOKKS1NBdJQSNxYn48biZDR3\n9OPgmRacvNCBM5fGDpsM0SjMSsS01Dikm3UwG7Qh9xEiMPgxYnNHP+rtvSivc6KsthMDngCUCgEz\n0g346o3ZmJ9n4tgxIpqSBEHADflmFE9Lwomqdhw6Z8Nbh+rw5qE6TE+NQ/G0JGRa9Mi06Mdtjeoe\nlxfNbX1obO9HZb0TFQ1OeLwBKEQBOcmxuHdVHhbmm6bsmtjhwpygxd235uKuL01DeZ0DRyvacOJC\nBw6da4VaKSLLGotpqXGYnhqHnJS4iLjK5qiK6SeeeALf/OY3sXbtWvz973/H448/jldeeWXYNm+9\n9RYaGhqwd+9edHV1Yd26dVi8eDFSU1O/8L5wJEkS3N4Ael1e9A744OzxwO50we4YuPTVhR6XDwAQ\npVIgOzkWty/JRFFO0pQsoK8kJSkGd62cjrtWTofd6cK5GgfO1nSi9EwL3jvRBABQq0SkGnVIM+mQ\nFDd4JcYEfRQMsRoYdFETWqx6fAE4ez1w9Ljh6PHA0euG3eFCg70Ptk4XgpIEYHAoy4IZJhTnJCE/\n08DZ4UQUMaJUCiwusGBxgQWOHjc+KmvFR2V2/L8Pqoe2MRuikWHRIzFWA71WjdgYFfRaNVIH/Ojq\ncgEYHB4HCfD5g+gd8KGn34uefi96XV60dQ2gqb0fPf3eoTaT4jRYPGtwBan8DAPHQocgpUJEUU4S\ninKS8C1/EOV1DpTVDV6X4u0jDdh16RyaFKeB2RANU4IWZoMWZkM04nVR0GtV0EWrpsTlzK/639nZ\n2Yny8nK8+OKLAIA1a9bgqaeegsPhQEJCwtB2u3fvxvr16yGKIhISEnDzzTfjnXfewX333feF943W\nZPdeur0BvFFag54+D3wBCT5/EL5AEF5fAP1uHwIBacRj9Fo1kuI0mJ4Wj5SkGGRaYmFNjIYo8xqN\nnz53SoU4Ye/olQrxC/eRNTEG1sQY3LIgDYFgEHbHAGyd/WjucMHW0Yf61l5U1DtHPE6lFKFRKxEd\npYBGrUSUSoRCIUIpilAoBCgVwmXfnASDEvwBCYFgEIGgBL8/CLc3ALfXD7cvCLfXD78/OOJxcboo\n5KTGoWR2MlKTYpCcFAODPmpMb4BG83xHRykR8F/7Ppnofcm2B326f8Itd6S0PZbjJxRyj7VtuT/F\nS4qPxu1Ls3D7LRZkvwAACjZJREFU0iy43H40tfehub0PTW39aOroQ2N732XPjVciANBGqxCvi8Ky\nIgssCTGwJmhhSdRCF62SteMpUv5Pxut/KkqtwJxcI+bkGgEAXl8QDW29qG/thd05gI6uAVQ396Cs\n1jHisWqVAjFRSqhUIpQKESqlAiqFgHSLHqtvSB+XfNdiLM/JVYtpm80Gs9kMhWLwnYNCoYDJZILN\nZhtWTNtsNiQnJw/dtlqtaG1tvep9o2UwxFzT9uPh4TvnTPrvnAiJiZ9Niku1hsZSeyZjLArlDjEJ\nJvL5zk6duPW92TbbZtuh13YoSQSQlhLaq05dr3B9/b4Wn68PxpvVEouFRRPWfEjhgE8iIiIiojG6\najFttVpht9sRCAQADE4mbGtrg9VqHbFdS0vL0G2bzQaLxXLV+4iIiIiIwtVVi+nExETk5+dj586d\nAICdO3ciPz9/2BAPAFi9ejW2b9+OYDAIh8OBffv2YdWqVVe9j4iIiIgoXAmSJF11tkB1dTW2bNmC\nnp4exMbGYuvWrcjOzsaGDRuwceNGFBYWIhAI4Mknn8ShQ4cAABs2bMBdd90FAF94HxERERFRuBpV\nMU1ERERERCNxAiIRERER0RixmCYiIiIiGiMW00REREREY8RimoiIiIhojHix+ymgtrYWW7ZsQVdX\nF+Lj47F161ZkZmYO2+aZZ57Bn/70J5hMJgDA3Llz8cQTT8iQNrJs3boVe/bsQXNzM9566y3k5uaO\n2CYQCODpp59GaWkpBEHA/fffj/Xr18uQNvKMZv/w2JGP0+nEo48+ioaGBqjVamRkZODJJ58csTTr\nwMAAfvjDH6KsrAwKhQKbN2/GihUrZEodOUa7f7Zs2YLDhw/DYBi86t/q1avx4IMPyhE5Ij300ENo\namqCKIrQarX48Y9/jPz8/GHb8Dx0nSQKe/fee6+0Y8cOSZIkaceOHdK99947Ypvf/OY30s9+9rPJ\njhbxjh49KrW0tEgrVqyQqqqqLrvNG2+8IX3nO9+RAoGA1NnZKZWUlEiNjY2TnDQyjWb/8NiRj9Pp\nlI4cOTJ0+2c/+5n0wx/+cMR2zzzzjPTYY49JkiRJtbW10pIlS6S+vr5JyxmpRrt/Nm/eLL366quT\nGY0+p6enZ+j7d999V1q3bt2IbXgeuj4c5hHmOjs7UV5ejjVr1gAA1qxZg/LycjgcDpmTEQDMnz9/\nxNVC/9Hu3buxfv16iKKIhIQE3HzzzXjnnXcmKWFkG83+IfnEx8dj4cKFQ7dnz5497Gq6n3r77beH\nrl2QmZmJgoICHDhwYNJyRqrR7h+Sl16vH/q+r68PgiCM2IbnoevDYR5hzmazwWw2Q6FQAAAUCgVM\nJhNsNtuIj9p27dqFgwcPwmg04rvf/S7mzJkjR2T6BzabDcnJyUO3rVYrWltbZUxE/4jHjvyCwSD+\n/Oc/Y+XKlSPua2lpQUpKytBtHkOT74v2DwC8+OKLeO2115CWlobvf//7yMnJmeSEke2xxx7DoUOH\nIEkSfve73424n+eh68NiOkJ8/etfxwMPPACVSoVDhw7hoYcewu7du4fGsBHR5fHYCQ1PPfUUtFot\n7rnnHrmj0GV80f7ZtGkTjEYjRFHEjh07cN9992Hfvn1DnUA08X76058CAHbs2IGf//zneOGFF2RO\nNLVwmEeYs1qtsNvtCAQCAAYnEbS1tY346NpoNEKlUgEAli5dCqvVigsXLkx6XhrJarUO+2jUZrPB\nYrHImIg+j8eO/LZu3Yr6+nr86le/giiOPG0lJyejubl56DaPocl1tf1jNpuHfr5u3Tq4XC72espk\n3bp1+Pjjj+F0Oof9nOeh68NiOswlJiYiPz8fO3fuBADs3LkT+fn5I4Z42O32oe8rKirQ3NyMrKys\nSc1Kl7d69Wps374dwWAQDocD+/btw6pVq+SORZfw2JHXL3/5S5w7dw7btm2DWq2+7DarV6/Ga6+9\nBgCoq6vD2bNnUVJSMpkxI9Zo9s/nj6HS0lKIogiz2TxZESNaf38/bDbb0O39+/cjLi4O8fHxw7bj\neej6CJIkSXKHoOtTXV2NLVu2oKenB7Gxsdi6dSuys7OxYcMGbNy4EYWFhdi8eTPKysogiiJUKhU2\nbtyI5cuXyx19ynv66aexd+9edHR0wGAwID4+Hrt27Rq2bwKBAJ588kkcOnQIALBhw4ahyVQ0sUaz\nf3jsyOfChQtYs2YNMjMzodFoAACpqanYtm0b1q5di+effx5msxkulwtbtmxBRUUFRFHEI488gptv\nvlnm9FPfaPfPt7/9bXR2dkIQBOh0Ojz66KOYPXu2zOkjQ0dHBx566CEMDAxAFEXExcVh8+bNmDVr\nFs9D44jFNBERERHRGHGYBxERERHRGLGYJiIiIiIaIxbTRERERERjxGKaiIiIiGiMWEwTEREREY0R\ni2kioiniueeew2OPPTaqbZ955hn84Ac/mOBERERTH4tpIqIQsnLlShw+fHjYz15//XV84xvfuOpj\nH3jggaHLBk9EDiIiGonFNBERERHRGLGYJiIKI3a7Hd/97nexaNEirFy5Eq+88srQff84dGPHjh1Y\nsWIFFi5ciG3bto3obfb5fHj00UcxZ84c3HbbbTh79iwA4JFHHkFLSwseeOABzJkzBy+88MLk/YFE\nRGGGxTQRUZgIBoN48MEHkZeXhwMHDuDll1/Gyy+/jNLS0hHbXrx4ET/5yU/wi1/8AqWlpejr64Pd\nbh+2zf79+3Hbbbfh2LFjWLlyJZ566ikAwC9+8QskJyfjueeew8mTJ7Fhw4ZJ+fuIiMKRUu4AREQ0\n3L/+679CoVAM3fb5fJg5cybOnj0Lh8OBhx9+GACQlpaGO++8E7t370ZJScmwNt555x2sWLEC8+fP\nBwBs3LgRr7766rBt5s2bh+XLlwMA1q5di5dffnki/ywioimJxTQRUYjZtm0blixZMnT79ddfx/bt\n29Hc3Iy2trahAhkAAoHAsNufamtrg8ViGbodHR2N+Pj4YdskJSUNfa/RaODxeOD3+6FU8tRARDRa\nfMUkIgoTVqsVqamp2Lt371W3NZlMqK2tHbrtdrvR1dU1kfGIiCISx0wTEYWJoqIixMTE4Pnnn4fb\n7UYgEMD58+dx5syZEduuWrUK+/fvx4kTJ+D1evHMM89AkqRR/66kpCQ0NjaOZ3wioimJxTQRUZhQ\nKBR47rnnUFlZiS996UtYtGgRfvSjH6Gvr2/EttOnT8ePf/xjfO9730NJSQm0Wi0SEhKgVqtH9bvu\nv/9+PPvss5g/fz5+//vfj/efQkQ0ZQjStXRVEBFRWOrv78eCBQuwZ88epKWlyR2HiGjKYM80EdEU\ntX//fgwMDMDlcmHr1q3Izc1Famqq3LGIiKYUFtNERFPUe++9h5KSEpSUlKC+vh6//OUvIQiC3LGI\niKYUDvMgIiIiIhoj9kwTEREREY0Ri2kiIiIiojFiMU1ERERENEYspomIiIiIxojFNBERERHRGLGY\nJiIiIiIao/8Phj/Mf0+w3mIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(height_outlier);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jI9ToieVrisQ" + }, + "source": [ + "Dá para perceber que a maior parte dos dados concentra-se em torno da média (~ 1.7 m) e que apenas algumas observações encontram-se bastante distantes dela." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "q49-oFz4gBHs", + "outputId": "f968b883-a1e3-4ead-963a-19d9f25e9d9e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.7181251474953014, 0.2948590174540895)" + ] + }, + "execution_count": 56, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_outlier_mean = height_outlier.mean()\n", + "height_outlier_std = height_outlier.std()\n", + "\n", + "height_outlier_mean, height_outlier_std" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "dTtLF6P2rvIh" + }, + "source": [ + "Um jeito de procurar por _outliers_ é ver quem se encontra fora do intervalo $[\\bar{x} - k * \\sigma, \\bar{x} + k * \\sigma]$, onde $k$ geralmente é 1.5, 2.0, 2.5 ou até 3.0.\n", + "\n", + "Abaixo utilizamos o $k = 2$, pois esse valor faz sentido (alturas menores que 1.12 m ou maiores que 2.30 m fogem do nosso padrão):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "cI8gL-QrgK1s", + "outputId": "6c472ac1-ea23-4dd3-b833-91969a62f92d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1.1284071125871225, 2.3078431824034804]" + ] + }, + "execution_count": 57, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "non_outlier_interval_dist = [height_outlier_mean - 2 * height_outlier_std, height_outlier_mean + 2 * height_outlier_std]\n", + "\n", + "non_outlier_interval_dist" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "b5A37brPsVPw" + }, + "source": [ + "Novamente, conhecendo o intervalo, podemos identificar as observações que caem foram dele e removê-las:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "colab_type": "code", + "id": "W6jVe5TMglf5", + "outputId": "c270dcb7-d46a-4dd8-94b3-c3d610269282" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "29 0.516665\n", + "38 2.943781\n", + "48 1.058498\n", + "68 2.737088\n", + "Name: Height, dtype: float64" + ] + }, + "execution_count": 58, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_dist = height_outlier[(height_outlier < non_outlier_interval_dist[0]) | (height_outlier > non_outlier_interval_dist[1])]\n", + "\n", + "outliers_dist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jqYD2d3chJTK" + }, + "outputs": [], + "source": [ + "height_no_outlier_dist = height_outlier.drop(index=outliers_dist.index)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "8IL5fWP1sePM" + }, + "source": [ + "Até agora, nossas métodos de identificação de _outlier_ foram baseadas em estatísticas descritivas do nosso _data set_ (quantis, média e variância). Porém, alguns testes de hipóteses também existem.\n", + "\n", + "Um deles é o teste de Grubb. Esse é um teste bastante simples, cuja estatística de teste $G$ depende dos valores extremos do conjunto e da média amostral:\n", + "\n", + "$$G = \\frac{\\vert x_{\\text{\\{min ou max\\}}} - \\bar{x}\\vert}{s}$$\n", + "\n", + "onde $\\bar{x}$ é a média amostral e $s$ é o desvio-padrão da amostra.\n", + "\n", + "A hipótese nula, $H_{0}$, é de que não existem _outliers_ no _data set_. O teste de Grubb assume que os dados originam-se de uma distribuição normal, então pode ser válido testar essa hipótese antes.\n", + "\n", + "Rejeitamos a hipótese nula se o valor de $G$ encontrado for superior ao valor crítico do teste, que é dado por\n", + "\n", + "$$G_{\\text{crítico}} = \\frac{n - 1}{\\sqrt{n}} \\sqrt{\\frac{t_{\\alpha',n-2}^{2}}{n - 2 + t_{\\alpha',n-2}^{2}}}$$\n", + "\n", + "onde $n$ é o tamanho da amostra, $t$ é um valor com distribuição t-Student e $\\alpha'$ é $\\alpha/2n$ se o teste for bilateral (procuramos _outliers_ muito acima ou muito abaixo) ou $\\alpha/n$ se o teste for unilateral (acreditamos que o _outlier_, se houver, está em somente uma das extremidades da distribuição)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RNveH7ftxMOV" + }, + "source": [ + "Abaixo criamos algumas funções que nos auxiliam nos cálculos e na exibição dos resultados:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Ir61-q0ckV6K" + }, + "outputs": [], + "source": [ + "def grubb_test(g, n, alpha=0.05, tailed='two-tailed'):\n", + " if tailed == 'two-tailed':\n", + " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(2*n), n-2)**2/(n - 2 + sct.t.isf(alpha/(2*n), n-2)**2))\n", + " \n", + " return (g, critical, g > critical)\n", + " elif tailed == 'one-tailed':\n", + " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(n), n-2)**2/(n - 2 + sct.t.isf(alpha/(n), n-2)**2))\n", + " \n", + " return (g, critical, g > critical)\n", + " else:\n", + " raise ValueError(f\"Invalid tailed argument\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "c--VvSPuuHaM" + }, + "outputs": [], + "source": [ + "def grubb_summary(result, decimals=10):\n", + " return (\n", + " f\"Null hypothesis: there is no outliers in the data set\\n\"\n", + " f\"Test statistic: {np.round(result[0], decimals)}, \"\n", + " f\"Grubb's critical value: {np.round(result[1], decimals)}, \"\n", + " f\"Reject: {result[2]}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "d8nFGEVuqgdC" + }, + "outputs": [], + "source": [ + "def next_outlier_candidate(data):\n", + " sample_distances = (data - data.mean()).abs()\n", + " candidate_idx = sample_distances.idxmax()\n", + " candidate_value = data[candidate_idx]\n", + " candidate_statistic = sample_distances.max()/data.std()\n", + " \n", + " return (candidate_idx, candidate_value, candidate_statistic, len(data))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MRZwuyOOxU7U" + }, + "source": [ + "Ao executarmos o teste de Grubb no nosso conjunto de alturas, encontramos alguns valores onde a hipótese nula é rejeitada, ou seja, há evidência de que o valor extremo é um _outlier_." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 434 + }, + "colab_type": "code", + "id": "Rz-yVWFlt-M6", + "outputId": "cb11e99b-2195-45d7-9089-fdf292a65e1c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index: 38, Value: 2.944, Test statistic: 4.157, Sample size: 100\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.157, Grubb's critical value: 3.384, Reject: True\n", + "\n", + "\n", + "Index: 29, Value: 0.517, Test statistic: 4.421, Sample size: 99\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.421, Grubb's critical value: 3.381, Reject: True\n", + "\n", + "\n", + "Index: 68, Value: 2.737, Test statistic: 4.219, Sample size: 98\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.219, Grubb's critical value: 3.377, Reject: True\n", + "\n", + "\n", + "Index: 48, Value: 1.058, Test statistic: 2.96, Sample size: 97\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 2.96, Grubb's critical value: 3.374, Reject: False\n", + "\n", + "\n" + ] + } + ], + "source": [ + "height_outlier_grubb = height_outlier.copy()\n", + "outliers_grubb = pd.Series()\n", + "has_outlier = True\n", + "\n", + "while has_outlier:\n", + " outlier_candidate = next_outlier_candidate(height_outlier_grubb)\n", + "\n", + " print(f\"Index: {outlier_candidate[0]}, \"\n", + " f\"Value: {np.round(outlier_candidate[1], 3)}, \"\n", + " f\"Test statistic: {np.round(outlier_candidate[2], 3)}, \"\n", + " f\"Sample size: {outlier_candidate[3]}\\n\")\n", + "\n", + " result = grubb_test(outlier_candidate[2], outlier_candidate[3])\n", + "\n", + " print(grubb_summary(result, 3))\n", + "\n", + " has_outlier = result[2]\n", + "\n", + " if has_outlier:\n", + " height_outlier_grubb = height_outlier_grubb.drop(index=outlier_candidate[0])\n", + " outliers_grubb.at[outlier_candidate[0]] = outlier_candidate[1]\n", + " \n", + " print(f\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86 + }, + "colab_type": "code", + "id": "49MMneSg-DCj", + "outputId": "a98df152-223e-43e1-ced9-d113a40b879f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "38 2.943781\n", + "29 0.516665\n", + "68 2.737088\n", + "dtype: float64" + ] + }, + "execution_count": 64, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_grubb" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_hajYam661Zd" + }, + "source": [ + "Abaixo comparamos os _outliers_ encontrados por cada um dos três métodos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86 + }, + "colab_type": "code", + "id": "l3P2Bavg-zMK", + "outputId": "25065774-49a4-4509-fe92-70a4d32c8cd2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "IQR [29, 38, 48, 68, 91, 92]\n", + "Normal [29, 38, 48, 68]\n", + "Grubb [29, 38, 68]\n", + "dtype: object" + ] + }, + "execution_count": 65, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers = pd.Series({\"IQR\": outliers_iqr.index.values,\n", + " \"Normal\": outliers_dist.index.values,\n", + " \"Grubb\": outliers_grubb.index.values})\n", + "\n", + "outliers.apply(np.sort)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1oMEwGs_DHJW" + }, + "source": [ + "## _Features_ de texto\n", + "\n", + "Dados textuais são muito ricos e muito fáceis de serem encontrados. Diversos _data sets_ são compostos por documentos textuais e ainda um simples _scrapper_ pode coletar dezenas de milhares de documentos da Internet. Coleções de documentos são frequentemente chamadas de _corpus_ (plural, _corpora_).\n", + "\n", + "Nosso objetivo aqui é somente mostrar como preprocessar de forma simples _features_ textuais. Para isso, utilizaremos o _data set_ 20 newsgroups, que contém milhares de documentos categorizados em 20 grupos (desde astronomia até carros)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XItMVwyq8Dp9" + }, + "source": [ + "Abaixo escolhemos somente três grupos para restringir nosso escopo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "usWrDfLvMNxw" + }, + "outputs": [], + "source": [ + "categories = [\"sci.crypt\", \"sci.med\", \"sci.space\"]\n", + "\n", + "newsgroups = fetch_20newsgroups(subset=\"train\", categories=categories, shuffle=True, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4uNwK5uREAn7" + }, + "source": [ + "Temos agora um _corpus_ com 1782 documentos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "_lUWgt06EtnR", + "outputId": "f82dd8b7-5f76-477c-9173-ee35d0c7e0aa" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1782" + ] + }, + "execution_count": 67, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "len(newsgroups.data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xh326fr28Jyc" + }, + "source": [ + "Um exemplo de documento desse _corpus_ é mostrado abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "colab_type": "code", + "id": "vsfaD72_M52H", + "outputId": "fb895197-8753-49e6-a631-e7716ad8c8ee" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Document 4 of 1782:\n", + "\n", + "From: billc@col.hp.com (Bill Claussen)\n", + "Subject: Re: Should I be angry at this doctor?\n", + "Organization: HP Colorado Springs Division\n", + "Lines: 5\n", + "Distribution: na\n", + "NNTP-Posting-Host: hpcspe17.col.hp.com\n", + "\n", + "\n", + "Report them to your local BBB (Better Business Bureau).\n", + "\n", + "Bill Claussen\n", + "\n", + "\n", + "> Category: sci.med\n" + ] + } + ], + "source": [ + "document_idx = 4\n", + "documents_total = len(newsgroups.data)\n", + "\n", + "print(f\"> Document {document_idx} of {documents_total}:\\n\\n{newsgroups.data[document_idx]}\")\n", + "print(f\"> Category: {newsgroups.target_names[newsgroups.target[document_idx]]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6liTZFzv8Nas" + }, + "source": [ + "Quando trabalhando com dados textuais, uma representação simples é ter:\n", + "\n", + "* Cada documento em uma linha.\n", + "* Cada palavra (ou termo) em uma coluna.\n", + "\n", + "Por exemplo, se nosso vocábulário (conjunto de todas palavras ou termos do _corpus_) tiver tamanho 10000 e tivermos 100 documentos, então nosso _data set_ será composto de 100 linhas e 10000 colunas." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qLBi7mFU8mLI" + }, + "source": [ + "O valor de cada célula, $x_{i, j}$, (interseção da linha $i$ com a coluna $j$) do _data set_ depende da tranformação que aplicarmos.\n", + "\n", + "A transformação mais simples é a contagem de palavras no documento, ou seja, $x_{i, j}$ indica o número de ocorrências da palavra $j$ no documento $i$.\n", + "\n", + "Isso pode ser obtido no sklearn pelo `CountVectorizer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "4E6FmUUhNs8b" + }, + "outputs": [], + "source": [ + "count_vectorizer = CountVectorizer()\n", + "newsgroups_counts = count_vectorizer.fit_transform(newsgroups.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "TSylOCPKjLmh", + "outputId": "d7b6e6b8-f227-4ec5-a34a-2cf93fc8ebb5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "scipy.sparse.csr.csr_matrix" + ] + }, + "execution_count": 78, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "type(newsgroups_counts)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "M4rtFrsF9CgR" + }, + "source": [ + "Abaixo escolhemos dez palavras contidas no _corpus_ para exemplificar:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "kmxzJhkSUpIZ", + "outputId": "613a8241-c25e-4d5d-9830-1cee04671fc4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00020001000
10000100200
23000000010
30000040002
40101000000
\n", + "
" + ], + "text/plain": [ + " banks business clipper colorado ... kapor monitor private study\n", + "0 0 0 2 0 ... 1 0 0 0\n", + "1 0 0 0 0 ... 0 2 0 0\n", + "2 3 0 0 0 ... 0 0 1 0\n", + "3 0 0 0 0 ... 0 0 0 2\n", + "4 0 1 0 1 ... 0 0 0 0\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 70, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "words_idx = sorted([count_vectorizer.vocabulary_.get(f\"{word.lower()}\") for word in\n", + " [u\"clipper\", u\"Kapor\",\n", + " u\"monitor\", u\"gibberish\",\n", + " u\"Banks\", u\"private\",\n", + " u\"study\", u\"group\",\n", + " u\"Colorado\", u\"Business\"]])\n", + "\n", + "pd.DataFrame(newsgroups_counts[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "C7WuoRgP9WE9" + }, + "source": [ + "Por exemplo, o valor 2 na interseção do documento 0 com a coluna `clipper` indica que a palavra _clipper_ aparece duas vezes no documento 0. Obviamente é possível que uma mesma palavra apareça em múltiplos documentos e mais óbvio ainda que um documento contenha múltiplas palavras." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UQzj-_QT9p7e" + }, + "source": [ + "O problema com essa abordagem é que não temos como medir relevância dos termos. E se o termo é super comum e aparece em quase todos documentos? E se o termo aparece muitas vezes no mesmo documento, mas poucas vezes nos outros?\n", + "\n", + "Essas perguntas não podem ser respondidas simplesmente com a contagem de termos acima. Para isso, precisamos do tf-idf." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AXBnOFk___QK" + }, + "source": [ + "O tf-idf é uma estatística baseada no _corpus_ composta de outras duas estatísticas:\n", + "\n", + "* $\\text{tf}(t, d)$, ou _term frequency_, é uma medida de quantas vezes o termo $t$ aparece no documento $d$. Algumas opções estão disponíveis, mas a mais simples é a contagem do número de ocorrências do termo no documento, $f_{t, d}$, exatamente o que computamos acima. Essa é a forma como sklearn define $tf$:\n", + "\n", + "$$\\text{tf}(t, d) = f_{t, d}$$\n", + "\n", + "* $\\text{idf}(t)$, ou _inverse document frequency_, é uma medida de relevância do termo em todos documentos do _corpus_. O sklearn a computa, seguindo valores _default_, da seguinte forma:\n", + "\n", + "$$\\text{idf}(t) = \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", + "\n", + "onde $n$ é o número de documentos no _corpus_ e $d_{t}$ é o número de documentos no _corpus_ que contêm o termo $t$ ($0 < d_{t} \\leq n$).\n", + "\n", + "O tf-idf é calculado multiplicando esses dois valores:\n", + "\n", + "$$\\text{tf-idf}(t, d) = \\text{tf}(t, d) \\times \\text{idf}(t) = f_{t, d} \\times \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", + "\n", + "O sklearn também normaliza todos documentos resultantes, ou seja todas linhas da matriz, para terem norma unitária. Em outras palavras, os elementos do vetor de tf-idf do documento $i$ são dados por:\n", + "\n", + "$$\\text{tf-idf}(i, j)_{\\text{normalizado}} = \\frac{\\text{tf-idf}(i, j)}{\\sqrt{\\text{tf-idf}(i, 1)^{2} + \\text{tf-idf}(i, 2)^{2} + \\cdots + \\text{tf-idf}(i, T)^{2}}}$$\n", + "\n", + "onde $T$ é o número de termos do _corpus_, ou seja, o tamanho do vocabulário." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bWpYWUMjCH8l" + }, + "source": [ + "O tf-idf é sempre um valor não negativo e quanto mais alto, maior a relevância do termo.\n", + "\n", + "Note como o tf aumenta de acordo com o número de ocorrências do termo no documento: quanto mais frequente o termo, mas relevante ele parece ser.\n", + "\n", + "O idf é uma medida de \"raridade\" do termo através de todo _corpus_: quanto mais alto, menos o termo aparece no _corpus_ e consequentemente mais informação ele traz.\n", + "\n", + "Multiplicando os dois, temos uma medida do quão relevante aquele termo é para aquele documento no _corpus_." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "b_N2VQnwDaey" + }, + "source": [ + "O sklearn provê um transformador, `TfidfTransformer`, que transforma de uma matriz de frequências, como a retornada pelo `CountVectorizer`, e retorna uma matriz de tf-idf:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Fyxgx0YhVwtF" + }, + "outputs": [], + "source": [ + "tfidf_transformer = TfidfTransformer()\n", + "\n", + "tfidf_transformer.fit(newsgroups_counts)\n", + "\n", + "newsgroups_tfidf = tfidf_transformer.transform(newsgroups_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "evk8smtLWNtO", + "outputId": "bf99b51a-e276-480c-dee9-13713e85a00b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", + "
" + ], + "text/plain": [ + " banks business clipper ... monitor private study\n", + "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", + "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", + "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", + "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", + "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 72, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(newsgroups_tfidf[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h9hI18kYDsuA" + }, + "source": [ + "Também podemos obter a matriz de tf-idf diretamente do _corpus_ sem ter que passar pela matriz de frequência com o transformador `TfidfVectorizer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "wPV4xrxzWlA-" + }, + "outputs": [], + "source": [ + "tfidf_vectorizer = TfidfVectorizer()\n", + "\n", + "tfidf_vectorizer.fit(newsgroups.data)\n", + "\n", + "newsgroups_tfidf_vectorized = tfidf_vectorizer.transform(newsgroups.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "WAQ20ew-Wx5V", + "outputId": "fd781f7a-198a-444f-bfb8-baee26469ef0" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", + "
" + ], + "text/plain": [ + " banks business clipper ... monitor private study\n", + "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", + "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", + "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", + "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", + "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 74, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(newsgroups_tfidf_vectorized[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RLFGR7A_D0px" + }, + "source": [ + "Note como a matriz acima é exatamente igual a retornada pelo `TfidfTransformer`.\n", + "\n", + "O resultado (igual da matriz de frequência) é um _data set_ com 1782 documentos e 33796 termos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "8I_w7yLeYnRe", + "outputId": "e1162574-03a2-4368-c3b6-517759bb973f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1782, 33796)" + ] + }, + "execution_count": 75, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "newsgroups_tfidf_vectorized.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "NjPMTtkUwrS1" + }, + "source": [ + "## Referências\n", + "\n", + "* [Feature engineering](https://jakevdp.github.io/PythonDataScienceHandbook/05.04-feature-engineering.html)\n", + "\n", + "* [Feature Scaling with scikit-learn](http://benalexkeen.com/feature-scaling-with-scikit-learn/)\n", + "\n", + "* [Anthony Goldbloom gives you the secret to winning Kaggle competitions](https://www.import.io/post/how-to-win-a-kaggle-competition/)\n", + "\n", + "* [What are some best practices in Feature Engineering?](https://www.quora.com/What-are-some-best-practices-in-Feature-Engineering)\n", + "\n", + "* [Discover Feature Engineering, How to Engineer Features and How to Get Good at It](https://machinelearningmastery.com/discover-feature-engineering-how-to-engineer-features-and-how-to-get-good-at-it/)\n", + "\n", + "* [Fundamental Techniques of Feature Engineering for Machine Learning](https://towardsdatascience.com/feature-engineering-for-machine-learning-3a5e293a5114)\n", + "\n", + "* [Feature Engineering Cookbook for Machine Learning](https://medium.com/@michaelabehsera/feature-engineering-cookbook-for-machine-learning-7bf21f0bcbae)\n", + "\n", + "* [A Simple Guide to Scikit-learn Pipelines](https://medium.com/vickdata/a-simple-guide-to-scikit-learn-pipelines-4ac0d974bdcf)\n", + "\n", + "* [Outlier detection with Scikit Learn](https://www.mikulskibartosz.name/outlier-detection-with-scikit-learn/)\n", + "\n", + "* [Working With Text Data](https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html)\n", + "\n", + "* [WTF is TF-IDF?](https://www.kdnuggets.com/2018/08/wtf-tf-idf.html)\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Aula 7 - Feature Engineering.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/Semana 7/aula_7_feature_engineering.ipynb b/Semana 7/aula_7_feature_engineering.ipynb index c7cb165..dc8a2d9 100644 --- a/Semana 7/aula_7_feature_engineering.ipynb +++ b/Semana 7/aula_7_feature_engineering.ipynb @@ -1,5337 +1,5339 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MyaSGq65woLh" + }, + "source": [ + "![Codenation](https://forum.codenation.com.br/uploads/default/original/2X/2/2d2d2a9469f0171e7df2c4ee97f70c555e431e76.png)\n", + "\n", + "__Autor__: Kazuki Yokoyama (kazuki.yokoyama@ufrgs.br)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mi4xZxcfBA2U" + }, + "source": [ + "# _Feature engineering_\n", + "\n", + "![cover](https://venturebeat.com/wp-content/uploads/2018/07/feature_engineering.jpg?resize=680%2C198&strip=all)\n", + "\n", + "Neste módulo, trabalharemos a engenharia de _features_, que consiste em preparar os nossos dados para alimentar os algoritmos de ML adequadamente. Ao contrário do mundo dos tutoriais, na vida real os dados dificilmente estarão prontos para serem consumidos. Grande parte do tempo de um projeto de ML é gasto com a engenharia de _features_, e quanto melhor a qualidade desta etapa, maiores são as chances de melhores resultados nas etapas seguintes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "cAxxSlo3QrZV" + }, + "source": [ + "## Importação das bibliotecas" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jMxYy1NkQwW6" + }, + "outputs": [], + "source": [ + "import functools\n", + "from math import sqrt\n", + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import statsmodels.api as sm\n", + "import scipy.stats as sct1\n", + "import seaborn as sns\n", + "from sklearn.datasets import load_digits, fetch_20newsgroups\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.feature_extraction.text import (\n", + " CountVectorizer, TfidfTransformer, TfidfVectorizer\n", + ")\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import (\n", + " OneHotEncoder, Binarizer, KBinsDiscretizer,\n", + " MinMaxScaler, StandardScaler, PolynomialFeatures\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "xNbPRHkKQyv2" + }, + "outputs": [], + "source": [ + "# Algumas configurações para o matplotlib.\n", + "%matplotlib inline\n", + "\n", + "from IPython.core.pylabtools import figsize\n", + "\n", + "\n", + "figsize(12, 12)\n", + "\n", + "sns.set()" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "m8onCO86Q2Hm" + }, + "outputs": [], + "source": [ + "np.random.seed(1000)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "EIEVdatWDh3Z" + }, + "source": [ + "## _One-hot encoding_\n", + "\n", + "Até aqui, nós praticamente ignoramos a existência de variáveis categóricas. Focamos nas variáveis numéricas porque elas são simples de lidar e bastante comuns. Ainda assim, variáveis categóricas são encontradas facilmente e precisamos de uma forma de trabalhar com elas.\n", + "\n", + "Uma das formas mais simples de representação de variáveis categóricas é através do método chamado _one-hot enconding_. Com ele, uma variável categórica com $h$ categorias é transformada em $h$ novas variáveis binárias (0 ou 1), onde a presença do 1 (_hot_) significa que aquela observação pertence àquela categoria, e 0 (_cold_) que não pertence. Veja um exemplo abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { "colab": { - "name": "Aula 7 - Feature Engineering.ipynb", - "version": "0.3.2", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "M1zv6xPDk4ym", + "outputId": "b9b41a48-556d-44e1-f142-708bae7a2d02" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", + "
" + ], + "text/plain": [ + " Height Score Course\n", + "0 1.539 6.61 Biology\n", + "1 1.764 6.42 Biology\n", + "2 1.695 8.95 Biology\n", + "3 1.829 5.14 Biology\n", + "4 1.640 6.43 Physics\n", + "5 1.778 7.98 Physics\n", + "6 1.679 7.90 Biology\n", + "7 1.604 6.76 Physics\n", + "8 1.819 7.44 Physics\n", + "9 1.607 6.01 Physics" + ] + }, + "execution_count": 4, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" } + ], + "source": [ + "rows = 100\n", + "\n", + "height = np.random.normal(loc=1.70, scale=0.2, size=rows).round(3)\n", + "score = np.random.normal(loc=7, scale=1, size=rows).round(2)\n", + "courses = [\"Math\", \"Physics\", \"Biology\"]\n", + "course = np.random.choice(courses, size=rows)\n", + "\n", + "data = pd.DataFrame({\"Height\": height, \"Score\": score, \"Course\": course})\n", + "\n", + "data.head(10)" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "MyaSGq65woLh", - "colab_type": "text" - }, - "source": [ - "![Codenation](https://forum.codenation.com.br/uploads/default/original/2X/2/2d2d2a9469f0171e7df2c4ee97f70c555e431e76.png)\n", - "\n", - "__Autor__: Kazuki Yokoyama (kazuki.yokoyama@ufrgs.br)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mi4xZxcfBA2U", - "colab_type": "text" - }, - "source": [ - "# _Feature engineering_\n", - "\n", - "![cover](https://venturebeat.com/wp-content/uploads/2018/07/feature_engineering.jpg?resize=680%2C198&strip=all)\n", - "\n", - "Neste módulo, trabalharemos a engenharia de _features_, que consiste em preparar os nossos dados para alimentar os algoritmos de ML adequadamente. Ao contrário do mundo dos tutoriais, na vida real os dados dificilmente estarão prontos para serem consumidos. Grande parte do tempo de um projeto de ML é gasto com a engenharia de _features_, e quanto melhor a qualidade desta etapa, maiores são as chances de melhores resultados nas etapas seguintes." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cAxxSlo3QrZV", - "colab_type": "text" - }, - "source": [ - "## Importação das bibliotecas" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jMxYy1NkQwW6", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import functools\n", - "from math import sqrt\n", - "\n", - "import pandas as pd\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "import statsmodels.api as sm\n", - "import scipy.stats as sct\n", - "import seaborn as sns\n", - "from sklearn.datasets import load_digits, fetch_20newsgroups\n", - "from sklearn.decomposition import PCA\n", - "from sklearn.feature_extraction.text import (\n", - " CountVectorizer, TfidfTransformer, TfidfVectorizer\n", - ")\n", - "from sklearn.impute import SimpleImputer\n", - "from sklearn.pipeline import Pipeline\n", - "from sklearn.preprocessing import (\n", - " OneHotEncoder, Binarizer, KBinsDiscretizer,\n", - " MinMaxScaler, StandardScaler, PolynomialFeatures\n", - ")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "xNbPRHkKQyv2", - "colab_type": "code", - "colab": {} - }, - "source": [ - "# Algumas configurações para o matplotlib.\n", - "%matplotlib inline\n", - "\n", - "from IPython.core.pylabtools import figsize\n", - "\n", - "\n", - "figsize(12, 12)\n", - "\n", - "sns.set()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "m8onCO86Q2Hm", - "colab_type": "code", - "colab": {} - }, - "source": [ - "np.random.seed(1000)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EIEVdatWDh3Z", - "colab_type": "text" - }, - "source": [ - "## _One-hot encoding_\n", - "\n", - "Até aqui, nós praticamente ignoramos a existência de variáveis categóricas. Focamos nas variáveis numéricas porque elas são simples de lidar e bastante comuns. Ainda assim, variáveis categóricas são encontradas facilmente e precisamos de uma forma de trabalhar com elas.\n", - "\n", - "Uma das formas mais simples de representação de variáveis categóricas é através do método chamado _one-hot enconding_. Com ele, uma variável categórica com $h$ categorias é transformada em $h$ novas variáveis binárias (0 ou 1), onde a presença do 1 (_hot_) significa que aquela observação pertence àquela categoria, e 0 (_cold_) que não pertence. Veja um exemplo abaixo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "M1zv6xPDk4ym", - "colab_type": "code", - "outputId": "b9b41a48-556d-44e1-f142-708bae7a2d02", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "rows = 100\n", - "\n", - "height = np.random.normal(loc=1.70, scale=0.2, size=rows).round(3)\n", - "score = np.random.normal(loc=7, scale=1, size=rows).round(2)\n", - "courses = [\"Math\", \"Physics\", \"Biology\"]\n", - "course = np.random.choice(courses, size=rows)\n", - "\n", - "data = pd.DataFrame({\"Height\": height, \"Score\": score, \"Course\": course})\n", - "\n", - "data.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", - "
" - ], - "text/plain": [ - " Height Score Course\n", - "0 1.539 6.61 Biology\n", - "1 1.764 6.42 Biology\n", - "2 1.695 8.95 Biology\n", - "3 1.829 5.14 Biology\n", - "4 1.640 6.43 Physics\n", - "5 1.778 7.98 Physics\n", - "6 1.679 7.90 Biology\n", - "7 1.604 6.76 Physics\n", - "8 1.819 7.44 Physics\n", - "9 1.607 6.01 Physics" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 4 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nK_6LysZP6Lw", - "colab_type": "text" - }, - "source": [ - "Criamos um _data set_ que contém duas variáveis numéricas (`Height` e `Score`) e uma variável categórica (`Course`). Nosso objetivo com o _one-hot encoding_ é transformar a variável `Course` em uma sequência de variáveis numéricas binárias, cada uma descrevendo uma classe da variável. Neste caso, como temos três categorias para `Course` (Biology, Physics e Math), teremos três novas variáveis binárias.\n", - "\n", - "Vamos treinar esse _encoder_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "MDpY6XcNmYlw", - "colab_type": "code", - "outputId": "5fda81c9-000d-4557-cb3f-22d012b3e548", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "one_hot_encoder = OneHotEncoder(sparse=False, dtype=np.int)\n", - "\n", - "#one_hot_encoder.fit(data[[\"Course\"]])\n", - "\n", - "#course_encoded = one_hot_encoder.transform(...)\n", - "\n", - "course_encoded = one_hot_encoder.fit_transform(data[[\"Course\"]])\n", - "\n", - "course_encoded[:10]" + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nK_6LysZP6Lw" + }, + "source": [ + "Criamos um _data set_ que contém duas variáveis numéricas (`Height` e `Score`) e uma variável categórica (`Course`). Nosso objetivo com o _one-hot encoding_ é transformar a variável `Course` em uma sequência de variáveis numéricas binárias, cada uma descrevendo uma classe da variável. Neste caso, como temos três categorias para `Course` (Biology, Physics e Math), teremos três novas variáveis binárias.\n", + "\n", + "Vamos treinar esse _encoder_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "MDpY6XcNmYlw", + "outputId": "5fda81c9-000d-4557-cb3f-22d012b3e548" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1, 0, 0],\n", + " [1, 0, 0],\n", + " [1, 0, 0],\n", + " [1, 0, 0],\n", + " [0, 0, 1],\n", + " [0, 0, 1],\n", + " [1, 0, 0],\n", + " [0, 0, 1],\n", + " [0, 0, 1],\n", + " [0, 0, 1]])" + ] + }, + "execution_count": 5, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder = OneHotEncoder(sparse=False, dtype=np.int)\n", + "\n", + "#one_hot_encoder.fit(data[[\"Course\"]])\n", + "\n", + "#course_encoded = one_hot_encoder.transform(...)\n", + "\n", + "course_encoded = one_hot_encoder.fit_transform(data[[\"Course\"]])\n", + "\n", + "course_encoded[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "V-O0cMCyQqk4" + }, + "source": [ + "A saída é um `np.ndarray` com formato `(n, h)`, onde `n` é o número de observações no _data set_ e `h` é o número de categorias da variável codificada." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "BP_QsDI6REl_", + "outputId": "10a0faf0-b05f-4ad8-f79d-7642d15862a7" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(100, 3)" + ] + }, + "execution_count": 6, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_encoded.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eoRT2AR8RHNl" + }, + "source": [ + "No atributo `categories_` do _encoder_, temos as categorias da variável:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "ziGE3VCinqM7", + "outputId": "2c77ac8b-ba1b-4479-97aa-b59cff8b78bf" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[array(['Biology', 'Math', 'Physics'], dtype=object)]" + ] + }, + "execution_count": 7, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder.categories_" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "y8V2WMjmRUkw" + }, + "source": [ + "Podemos criar as novas colunas que descrevem cada categoria. Repare que, para qualquer linha, apenas uma das colunas contém um 1, indicando a qual categoria aquela observação pertence. Isso acontece, obviamente, se as categorias forem mutuamente exclusivas (uma observação não pode pertencer a mais de uma categoria simultaneamente)." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "dGepWPRFoqc0", + "outputId": "dc6a6dff-007d-4f66-cbfb-2aad4c8a7448" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysics
01.5396.61Biology100
11.7646.42Biology100
21.6958.95Biology100
31.8295.14Biology100
41.6406.43Physics001
51.7787.98Physics001
61.6797.90Biology100
71.6046.76Physics001
81.8197.44Physics001
91.6076.01Physics001
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [1, 0, 0],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [1, 0, 0],\n", - " [0, 0, 1],\n", - " [0, 0, 1],\n", - " [0, 0, 1]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 5 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V-O0cMCyQqk4", - "colab_type": "text" - }, - "source": [ - "A saída é um `np.ndarray` com formato `(n, h)`, onde `n` é o número de observações no _data set_ e `h` é o número de categorias da variável codificada." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BP_QsDI6REl_", - "colab_type": "code", - "outputId": "10a0faf0-b05f-4ad8-f79d-7642d15862a7", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "course_encoded.shape" + "text/plain": [ + " Height Score Course Biology Math Physics\n", + "0 1.539 6.61 Biology 1 0 0\n", + "1 1.764 6.42 Biology 1 0 0\n", + "2 1.695 8.95 Biology 1 0 0\n", + "3 1.829 5.14 Biology 1 0 0\n", + "4 1.640 6.43 Physics 0 0 1\n", + "5 1.778 7.98 Physics 0 0 1\n", + "6 1.679 7.90 Biology 1 0 0\n", + "7 1.604 6.76 Physics 0 0 1\n", + "8 1.819 7.44 Physics 0 0 1\n", + "9 1.607 6.01 Physics 0 0 1" + ] + }, + "execution_count": 8, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "columns_encoded = one_hot_encoder.categories_[0]\n", + "\n", + "data_encoded = pd.concat([data, pd.DataFrame(course_encoded, columns=columns_encoded)], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iIiVR7P4SHXz" + }, + "source": [ + "Como você deve imaginar, a maior parte da matriz retornada é composta por zeros, sendo apenas alguns elementos compostos de um. Dizemos que essa matriz é __esparsa__. É um grande desperdício de memória trabalhar diretamente como uma matriz esparsa assim. Por isso, o _default_ do `OneHotEncoder` é retornar uma `sparse matrix` do NumPy, economizando espaço em memória:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 52 + }, + "colab_type": "code", + "id": "muGSmJckraf3", + "outputId": "c8957d2b-68c4-4722-80ea-5e241c479a88" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "<100x3 sparse matrix of type ''\n", + "\twith 100 stored elements in Compressed Sparse Row format>" + ] + }, + "execution_count": 9, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "one_hot_encoder_sparse = OneHotEncoder(sparse=True) # sparse=True é o default.\n", + "\n", + "course_encoded_sparse = one_hot_encoder_sparse.fit_transform(data[[\"Course\"]])\n", + "\n", + "course_encoded_sparse" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "FOYl0Lx8TPJm" + }, + "source": [ + "Para acessar os dados dessa matriz, podemos convertê-la para um _array_ não esparso:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "mtUziaQmrqTN", + "outputId": "bb7920ae-69a0-4543-97da-b1fc2746ddd0" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [1., 0., 0.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.],\n", + " [1., 0., 0.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.],\n", + " [0., 0., 1.]])" + ] + }, + "execution_count": 10, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_encoded_sparse.toarray()[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zHGmVXu1uEvM" + }, + "source": [ + "## Binarização (_Binarization_)\n", + "\n", + "Binarização é o processo de discretizar uma variável numérica em dois níveis com base em um _threshold_. Isso pode ser útil, por exemplo, para tornar uma variável numérica contínua em uma variável binária alvo de duas classes (positiva ou negativa).\n", + "\n", + "No exemplo abaixo, vamos separar a variável `Height` em dois grupos, utilizando 1.80 m como _threshold_ de separação. Observações que possuam menos de 1.80 m terão valor 0, enquanto aquelas com mais de 1.80 m terão valor 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "PeGrPpyWPcOw", + "outputId": "edb6b4c4-97e9-4914-f952-aa60c6dbbbc2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 False\n", + "1 False\n", + "2 False\n", + "3 True\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 True\n", + "9 False\n", + "Name: Height, dtype: bool" + ] + }, + "execution_count": 11, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "tall = (data_encoded.Height > 1.80)\n", + "\n", + "tall[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "94vcsMVguGvG", + "outputId": "b2b15447-7399-4309-b18a-3de5a183a41e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.],\n", + " [0.],\n", + " [0.],\n", + " [1.],\n", + " [0.],\n", + " [0.],\n", + " [0.],\n", + " [0.],\n", + " [1.],\n", + " [0.]])" + ] + }, + "execution_count": 12, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "binarizer = Binarizer(threshold=1.80).fit(data_encoded[[\"Height\"]])\n", + "\n", + "height_binary = binarizer.transform(data_encoded[[\"Height\"]])\n", + "\n", + "height_binary[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "oND_xnxRV8wZ" + }, + "source": [ + "O `Binarizer` tem como saída uma matriz binária numérica. Podemos transformá-la em um vetor de _bool_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "iXbf50-4vdDR", + "outputId": "2f7dba40-f513-491a-e072-743ac0a8c88f" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Tall
0False
1False
2False
3True
4False
5False
6False
7False
8True
9False
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(100, 3)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 6 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eoRT2AR8RHNl", - "colab_type": "text" - }, - "source": [ - "No atributo `categories_` do _encoder_, temos as categorias da variável:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ziGE3VCinqM7", - "colab_type": "code", - "outputId": "2c77ac8b-ba1b-4479-97aa-b59cff8b78bf", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "one_hot_encoder.categories_" + "text/plain": [ + " Tall\n", + "0 False\n", + "1 False\n", + "2 False\n", + "3 True\n", + "4 False\n", + "5 False\n", + "6 False\n", + "7 False\n", + "8 True\n", + "9 False" + ] + }, + "execution_count": 13, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_bool = pd.DataFrame(height_binary.flatten().astype(bool), columns=[\"Tall\"])\n", + "\n", + "height_bool.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nn9Gs9DhWNvi" + }, + "source": [ + "Vamos adicionar a nova variável `Tall`, que indica se a pessoa é alta (> 1.80 m), ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "xjOV0WlJy7DY", + "outputId": "af316c4b-4931-44cb-a4af-4fa51b3c93fc" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTall
01.5396.61Biology100False
11.7646.42Biology100False
21.6958.95Biology100False
31.8295.14Biology100True
41.6406.43Physics001False
51.7787.98Physics001False
61.6797.90Biology100False
71.6046.76Physics001False
81.8197.44Physics001True
91.6076.01Physics001False
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[array(['Biology', 'Math', 'Physics'], dtype=object)]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 7 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y8V2WMjmRUkw", - "colab_type": "text" - }, - "source": [ - "Podemos criar as novas colunas que descrevem cada categoria. Repare que, para qualquer linha, apenas uma das colunas contém um 1, indicando a qual categoria aquela observação pertence. Isso acontece, obviamente, se as categorias forem mutuamente exclusivas (uma observação não pode pertencer a mais de uma categoria simultaneamente)." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dGepWPRFoqc0", - "colab_type": "code", - "outputId": "dc6a6dff-007d-4f66-cbfb-2aad4c8a7448", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "columns_encoded = one_hot_encoder.categories_[0]\n", - "\n", - "data_encoded = pd.concat([data, pd.DataFrame(course_encoded, columns=columns_encoded)], axis=1)\n", - "\n", - "data_encoded.head(10)" + "text/plain": [ + " Height Score Course Biology Math Physics Tall\n", + "0 1.539 6.61 Biology 1 0 0 False\n", + "1 1.764 6.42 Biology 1 0 0 False\n", + "2 1.695 8.95 Biology 1 0 0 False\n", + "3 1.829 5.14 Biology 1 0 0 True\n", + "4 1.640 6.43 Physics 0 0 1 False\n", + "5 1.778 7.98 Physics 0 0 1 False\n", + "6 1.679 7.90 Biology 1 0 0 False\n", + "7 1.604 6.76 Physics 0 0 1 False\n", + "8 1.819 7.44 Physics 0 0 1 True\n", + "9 1.607 6.01 Physics 0 0 1 False" + ] + }, + "execution_count": 14, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, height_bool], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2tOdmnNi23p4" + }, + "source": [ + "## Discretização (_Binning_)\n", + "\n", + "Discretização, como o nome diz, é o processo de discretizar ou separar em intervalos contínuos uma variável numérica. Isso pode ser útil para converter uma variável numérica em categórica, quando o valor exato numérico não for tão importante quanto o intervalo onde ele se encontra.\n", + "\n", + "Podemos criar _bins_ (_buckets_ ou intervalos) que contenham aproximadamente a mesma quantidade de observações, utilizando a estratégia `quantile` ou que sejam igualmente espaçados com a estratégia `uniform`.\n", + "\n", + "No exemplo a seguir, criamos quatro intervalos da variável `Score` com a estratégia `quantile`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Xir4K6i522ZQ", + "outputId": "e902850a-d3dc-4d97-a80f-ad3dad1bb1a2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1.],\n", + " [1.],\n", + " [3.],\n", + " [0.],\n", + " [1.],\n", + " [3.],\n", + " [3.],\n", + " [2.],\n", + " [2.],\n", + " [0.]])" + ] + }, + "execution_count": 15, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer = KBinsDiscretizer(n_bins=4, encode=\"ordinal\", strategy=\"quantile\")\n", + "\n", + "discretizer.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins = discretizer.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3hrP6E4xYXCs" + }, + "source": [ + "Os limites dos intervalos estão disponíveis no atributo `bin_edges_`. Isso pode ser útil para criarmos _labels_ para colunas do _data set_ por exemplo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "ScCmeNtn3-fF", + "outputId": "be1003a5-2d28-42d6-e76d-bc349e957e95" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([array([4.09 , 6.1975, 6.735 , 7.6 , 9.28 ])], dtype=object)" + ] + }, + "execution_count": 16, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer.bin_edges_" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "vGl5ONq2Yk7r" + }, + "source": [ + "A função `get_interval()` abaixo facilita a criação de _labels_ indicativas dos intervalos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "fvB70_vd4fSO" + }, + "outputs": [], + "source": [ + "def get_interval(bin_idx, bin_edges):\n", + " return f\"{np.round(bin_edges[bin_idx], 2):.2f} ⊢ {np.round(bin_edges[bin_idx+1], 2):.2f}\"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Hn3eqHFbYtfm" + }, + "source": [ + "Cada um dos intervalos mostrados abaixo deve possuir aproximadamente a mesma quantidade de observações:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "HX59pepN5ZQQ", + "outputId": "d5b3d4dc-c969-44cb-fa34-e31fad2dd818" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bins quantile\n", + "interval: #elements\n", + "\n", + "4.09 ⊢ 6.20: 25\n", + "6.20 ⊢ 6.74: 25\n", + "6.74 ⊢ 7.60: 25\n", + "7.60 ⊢ 9.28: 25\n" + ] + } + ], + "source": [ + "bin_edges_quantile = discretizer.bin_edges_[0]\n", + "\n", + "print(f\"Bins quantile\")\n", + "print(f\"interval: #elements\\n\")\n", + "for i in range(len(discretizer.bin_edges_[0])-1):\n", + " print(f\"{get_interval(i, bin_edges_quantile)}: {sum(score_bins[:, 0] == i)}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OQ0fli3IY2G6" + }, + "source": [ + "A _Series_ abaixo mostra alguns dos intervalos para os quais as observações foram encaixadas:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "SZMBYjqR5-H6", + "outputId": "cba541dc-9f9e-48d8-eb87-fa54440ca353" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6.20 ⊢ 6.74\n", + "1 6.20 ⊢ 6.74\n", + "2 7.60 ⊢ 9.28\n", + "3 4.09 ⊢ 6.20\n", + "4 6.20 ⊢ 6.74\n", + "5 7.60 ⊢ 9.28\n", + "6 7.60 ⊢ 9.28\n", + "7 6.74 ⊢ 7.60\n", + "8 6.74 ⊢ 7.60\n", + "9 4.09 ⊢ 6.20\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "score_intervals = pd.Series(score_bins.flatten().astype(np.int)).apply(get_interval, args=(bin_edges_quantile,))\n", + "\n", + "score_intervals.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6gWE7IU6Y_9q" + }, + "source": [ + "Também podemos criar uma nova variável, `Score_interval`, no nosso _data set_ com os intervalos (que agora são categorias):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "fomFOQbVA8eS", + "outputId": "1f065c4f-6da4-43ad-ebb7-b58706595871" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval
01.5396.61Biology100False6.20 ⊢ 6.74
11.7646.42Biology100False6.20 ⊢ 6.74
21.6958.95Biology100False7.60 ⊢ 9.28
31.8295.14Biology100True4.09 ⊢ 6.20
41.6406.43Physics001False6.20 ⊢ 6.74
51.7787.98Physics001False7.60 ⊢ 9.28
61.6797.90Biology100False7.60 ⊢ 9.28
71.6046.76Physics001False6.74 ⊢ 7.60
81.8197.44Physics001True6.74 ⊢ 7.60
91.6076.01Physics001False4.09 ⊢ 6.20
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysics
01.5396.61Biology100
11.7646.42Biology100
21.6958.95Biology100
31.8295.14Biology100
41.6406.43Physics001
51.7787.98Physics001
61.6797.90Biology100
71.6046.76Physics001
81.8197.44Physics001
91.6076.01Physics001
\n", - "
" - ], - "text/plain": [ - " Height Score Course Biology Math Physics\n", - "0 1.539 6.61 Biology 1 0 0\n", - "1 1.764 6.42 Biology 1 0 0\n", - "2 1.695 8.95 Biology 1 0 0\n", - "3 1.829 5.14 Biology 1 0 0\n", - "4 1.640 6.43 Physics 0 0 1\n", - "5 1.778 7.98 Physics 0 0 1\n", - "6 1.679 7.90 Biology 1 0 0\n", - "7 1.604 6.76 Physics 0 0 1\n", - "8 1.819 7.44 Physics 0 0 1\n", - "9 1.607 6.01 Physics 0 0 1" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 8 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iIiVR7P4SHXz", - "colab_type": "text" - }, - "source": [ - "Como você deve imaginar, a maior parte da matriz retornada é composta por zeros, sendo apenas alguns elementos compostos de um. Dizemos que essa matriz é __esparsa__. É um grande desperdício de memória trabalhar diretamente como uma matriz esparsa assim. Por isso, o _default_ do `OneHotEncoder` é retornar uma `sparse matrix` do NumPy, economizando espaço em memória:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "muGSmJckraf3", - "colab_type": "code", - "outputId": "c8957d2b-68c4-4722-80ea-5e241c479a88", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 52 - } - }, - "source": [ - "one_hot_encoder_sparse = OneHotEncoder(sparse=True) # sparse=True é o default.\n", - "\n", - "course_encoded_sparse = one_hot_encoder_sparse.fit_transform(data[[\"Course\"]])\n", - "\n", - "course_encoded_sparse" + "text/plain": [ + " Height Score Course Biology Math Physics Tall Score_interval\n", + "0 1.539 6.61 Biology 1 0 0 False 6.20 ⊢ 6.74\n", + "1 1.764 6.42 Biology 1 0 0 False 6.20 ⊢ 6.74\n", + "2 1.695 8.95 Biology 1 0 0 False 7.60 ⊢ 9.28\n", + "3 1.829 5.14 Biology 1 0 0 True 4.09 ⊢ 6.20\n", + "4 1.640 6.43 Physics 0 0 1 False 6.20 ⊢ 6.74\n", + "5 1.778 7.98 Physics 0 0 1 False 7.60 ⊢ 9.28\n", + "6 1.679 7.90 Biology 1 0 0 False 7.60 ⊢ 9.28\n", + "7 1.604 6.76 Physics 0 0 1 False 6.74 ⊢ 7.60\n", + "8 1.819 7.44 Physics 0 0 1 True 6.74 ⊢ 7.60\n", + "9 1.607 6.01 Physics 0 0 1 False 4.09 ⊢ 6.20" + ] + }, + "execution_count": 20, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_intervals, columns=[\"Score_interval\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "LldlZ92lZN1k" + }, + "source": [ + "Como dito, podemos utilizar a estratégia `uniform` para criar _bins_ igualmente espaçados, independente do número de observações que cada um possui. Também podemos especificar o tipo de codificação utilizada. No caso a seguir, utilizamos `encode=onehot-dense` para informar que queremos que a saída seja codificada como o _one-hot encode_ visto anteriormente:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "E6L1qXuW-v-n", + "outputId": "956f9e9f-67ba-436f-f457-889ee2d1f3db" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0, 1, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [0, 0, 0, 1],\n", + " [1, 0, 0, 0],\n", + " [0, 1, 0, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 0, 1, 0],\n", + " [0, 1, 0, 0]])" + ] + }, + "execution_count": 21, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "discretizer_uniform = KBinsDiscretizer(n_bins=4, encode=\"onehot-dense\", strategy=\"uniform\")\n", + "\n", + "discretizer_uniform.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_bins_uniform = discretizer_uniform.transform(data_encoded[[\"Score\"]]).astype(np.int)\n", + "\n", + "score_bins_uniform[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "YapI8RuMZZfM" + }, + "source": [ + "Note como agora os intervalos são ligeiramente diferentes:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "P8gW9k-w-_CC", + "outputId": "731fca86-f052-4a93-e5bf-e13eec18ac8b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([4.09 , 5.3875, 6.685 , 7.9825, 9.28 ])" + ] + }, + "execution_count": 22, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "bin_edges_uniform = discretizer_uniform.bin_edges_[0]\n", + "\n", + "bin_edges_uniform" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "ieyy46EJAnb6", + "outputId": "99835fa9-8003-4060-afae-2c4de66685ff" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bins uniform\n", + "interval: #elements\n", + "\n", + "4.09 ⊢ 5.39: 6\n", + "5.39 ⊢ 6.68: 43\n", + "6.68 ⊢ 7.98: 44\n", + "7.98 ⊢ 9.28: 7\n" + ] + } + ], + "source": [ + "score_intervals_columns = [get_interval(i, bin_edges_uniform) for i in range(4)]\n", + "\n", + "print(f\"Bins uniform\")\n", + "print(f\"interval: #elements\\n\")\n", + "for i in range(len(discretizer_uniform.bin_edges_[0])-1):\n", + " print(f\"{get_interval(i, bin_edges_uniform)}: {sum(score_bins_uniform[:, i])}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "WuWi-1U4Zzf_" + }, + "source": [ + "Podemos adicionar as novas variáveis binárias no _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "P-v3UgiQB87S", + "outputId": "ad22d68f-c0e8-4a91-8838-842e7e2f5041" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28
01.5396.61Biology100False6.20 ⊢ 6.740100
11.7646.42Biology100False6.20 ⊢ 6.740100
21.6958.95Biology100False7.60 ⊢ 9.280001
31.8295.14Biology100True4.09 ⊢ 6.201000
41.6406.43Physics001False6.20 ⊢ 6.740100
51.7787.98Physics001False7.60 ⊢ 9.280010
61.6797.90Biology100False7.60 ⊢ 9.280010
71.6046.76Physics001False6.74 ⊢ 7.600010
81.8197.44Physics001True6.74 ⊢ 7.600010
91.6076.01Physics001False4.09 ⊢ 6.200100
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "<100x3 sparse matrix of type ''\n", - "\twith 100 stored elements in Compressed Sparse Row format>" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 9 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FOYl0Lx8TPJm", - "colab_type": "text" - }, - "source": [ - "Para acessar os dados dessa matriz, podemos convertê-la para um _array_ não esparso:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mtUziaQmrqTN", - "colab_type": "code", - "outputId": "bb7920ae-69a0-4543-97da-b1fc2746ddd0", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "course_encoded_sparse.toarray()[:10]" + "text/plain": [ + " Height Score Course ... 5.39 ⊢ 6.68 6.68 ⊢ 7.98 7.98 ⊢ 9.28\n", + "0 1.539 6.61 Biology ... 1 0 0\n", + "1 1.764 6.42 Biology ... 1 0 0\n", + "2 1.695 8.95 Biology ... 0 0 1\n", + "3 1.829 5.14 Biology ... 0 0 0\n", + "4 1.640 6.43 Physics ... 1 0 0\n", + "5 1.778 7.98 Physics ... 0 1 0\n", + "6 1.679 7.90 Biology ... 0 1 0\n", + "7 1.604 6.76 Physics ... 0 1 0\n", + "8 1.819 7.44 Physics ... 0 1 0\n", + "9 1.607 6.01 Physics ... 1 0 0\n", + "\n", + "[10 rows x 12 columns]" + ] + }, + "execution_count": 24, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_bins_uniform, columns=score_intervals_columns)], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jD8WM_-yzqSc" + }, + "source": [ + "## Normalização (_Scaling_)\n", + "\n", + "Normalização é o processo de colocar uma variável numérica em uma escala pré-determinada, geralmente $[0, 1]$, mas também é comum ser $[-1, 1]$.\n", + "\n", + "Para colocar no intervalo $[0, 1]$, basta subtrair cada valor da valor mínimo e dividir pela diferença do valor máximo e mínimo:\n", + "\n", + "$$x_{\\text{scaled}} = \\frac{x - x_{\\text{min}}}{x_{\\text{max}} - x_{\\text{min}}}$$\n", + "\n", + "Abaixo, escalamos a variável `Score` no intervalo $[0, 1]$:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "nMM2mu-Qzwnv", + "outputId": "5c60c83b-13bf-431d-e77e-a2fb2e8af317" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.48554913],\n", + " [0.44894027],\n", + " [0.93641618],\n", + " [0.20231214],\n", + " [0.45086705],\n", + " [0.7495183 ],\n", + " [0.73410405],\n", + " [0.51445087],\n", + " [0.64547206],\n", + " [0.3699422 ]])" + ] + }, + "execution_count": 25, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "minmax_scaler = MinMaxScaler(feature_range=(0, 1)) # Default feature_scale é (0, 1).\n", + "\n", + "minmax_scaler.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_normalized = minmax_scaler.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_normalized[:10]" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "FPr-37M2UBj4", + "outputId": "dc170301-56af-4cab-da7c-307c5cbb94a6" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.0, 0.9999999999999999)" + ] + }, + "execution_count": 26, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "score_normalized.min(), score_normalized.max()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Et6m_2Bbbq-n" + }, + "source": [ + "Adicionamos a variável `Score` normalizada ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "kaYvCQtK0fzi", + "outputId": "9f8ccb6c-d0b7-4445-96c9-490f284f2357" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940
21.6958.95Biology100False7.60 ⊢ 9.2800010.936416
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867
51.7787.98Physics001False7.60 ⊢ 9.2800100.749518
61.6797.90Biology100False7.60 ⊢ 9.2800100.734104
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451
81.8197.44Physics001True6.74 ⊢ 7.6000100.645472
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[1., 0., 0.],\n", - " [1., 0., 0.],\n", - " [1., 0., 0.],\n", - " [1., 0., 0.],\n", - " [0., 0., 1.],\n", - " [0., 0., 1.],\n", - " [1., 0., 0.],\n", - " [0., 0., 1.],\n", - " [0., 0., 1.],\n", - " [0., 0., 1.]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 10 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zHGmVXu1uEvM", - "colab_type": "text" - }, - "source": [ - "## Binarização (_Binarization_)\n", - "\n", - "Binarização é o processo de discretizar uma variável numérica em dois níveis com base em um _threshold_. Isso pode ser útil, por exemplo, para tornar uma variável numérica contínua em uma variável binária alvo de duas classes (positiva ou negativa).\n", - "\n", - "No exemplo abaixo, vamos separar a variável `Height` em dois grupos, utilizando 1.80 m como _threshold_ de separação. Observações que possuam menos de 1.80 m terão valor 0, enquanto aquelas com mais de 1.80 m terão valor 1:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "PeGrPpyWPcOw", - "colab_type": "code", - "outputId": "edb6b4c4-97e9-4914-f952-aa60c6dbbbc2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 208 - } - }, - "source": [ - "tall = (data_encoded.Height > 1.80)\n", - "\n", - "tall[:10]" + "text/plain": [ + " Height Score Course ... 6.68 ⊢ 7.98 7.98 ⊢ 9.28 Score_normalized\n", + "0 1.539 6.61 Biology ... 0 0 0.485549\n", + "1 1.764 6.42 Biology ... 0 0 0.448940\n", + "2 1.695 8.95 Biology ... 0 1 0.936416\n", + "3 1.829 5.14 Biology ... 0 0 0.202312\n", + "4 1.640 6.43 Physics ... 0 0 0.450867\n", + "5 1.778 7.98 Physics ... 1 0 0.749518\n", + "6 1.679 7.90 Biology ... 1 0 0.734104\n", + "7 1.604 6.76 Physics ... 1 0 0.514451\n", + "8 1.819 7.44 Physics ... 1 0 0.645472\n", + "9 1.607 6.01 Physics ... 0 0 0.369942\n", + "\n", + "[10 rows x 13 columns]" + ] + }, + "execution_count": 27, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_normalized.flatten(), columns=[\"Score_normalized\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "n7-msElsbveR" + }, + "source": [ + "Para avaliar se os valores encontrados conferem, podemos utilizar a função `normalize` abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "EAfUGaFc061d" + }, + "outputs": [], + "source": [ + "def normalize(x, xmin, xmax):\n", + " return (x - xmin)/(xmax - xmin)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "CXywxNX-b-0K" + }, + "source": [ + "A função `partial()` do módulo `functools` (_builtin_ do Python) permite \"congelar\" alguns parâmetros da função passaga como argumento, facilitando a invocação desta função quando tais parâmetros são constantes. No caso abaixo, \"congelamos\" os argumentos `xmin` e `xmax` da função `normalize()` com os valores mínimo e máximo da variável `Score`, respectivamente. Nas invocações subsequentes de `normalize` não precisaremos passar esses argumentos, somente o argumento \"não congelado\" `x`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "UAlpigp21OVx" + }, + "outputs": [], + "source": [ + "normalize_score = functools.partial(normalize,\n", + " xmin=data_encoded.Score.min(),\n", + " xmax=data_encoded.Score.max())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nhR0rwUIctTa" + }, + "source": [ + "O valor abaixo realmente confere com aquele encontrado pelo `MinMaxScaler`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "pMfk3jrU1mQV", + "outputId": "f9851c0d-9446-4f10-874e-cdba22b43722" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.485549" + ] + }, + "execution_count": 30, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "normalize_score(data_encoded.Score[0]).round(6)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HEcSQzWJ2Yum" + }, + "source": [ + "## Padronização (_Standardization_)\n", + "\n", + "Padronização é o processo de tornar a variável com média zero e variância um. Esse processo não deve ser confundido com a normalização descrita acima.\n", + "\n", + "O processo é simples, basta subtrair a média dos dados de cada observação e dividi-los pelo desvio-padrão:\n", + "\n", + "$$x_{\\text{standardized}} = \\frac{x - \\bar{x}}{s}$$\n", + "\n", + "onde $\\bar{x}$ indica a média amostral e $s$ o desvio-padrão amostral." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "kXYXezCNdYue" + }, + "source": [ + "No exemplo abaixo, padronizamos a variável `Score`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Qfhs3Eaq2dGV", + "outputId": "572aae65-5460-44d1-8134-dbc26f82e2d2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[-0.20752554],\n", + " [-0.40839081],\n", + " [ 2.26628886],\n", + " [-1.76158843],\n", + " [-0.39781896],\n", + " [ 1.24081879],\n", + " [ 1.15624393],\n", + " [-0.0489477 ],\n", + " [ 0.66993854],\n", + " [-0.84183693]])" + ] + }, + "execution_count": 31, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "standard_scaler = StandardScaler()\n", + "\n", + "standard_scaler.fit(data_encoded[[\"Score\"]])\n", + "\n", + "score_standardized = standard_scaler.transform(data_encoded[[\"Score\"]])\n", + "\n", + "score_standardized[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SJJucIQddgME" + }, + "source": [ + "E adicionamos a variável padronizada ao nosso _data set_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "BAndWLe13RSr", + "outputId": "4a6231c1-f459-4307-ad14-24c4e46760cd" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalizedScore_standardized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549-0.207526
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940-0.408391
21.6958.95Biology100False7.60 ⊢ 9.2800010.9364162.266289
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312-1.761588
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867-0.397819
51.7787.98Physics001False7.60 ⊢ 9.2800100.7495181.240819
61.6797.90Biology100False7.60 ⊢ 9.2800100.7341041.156244
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451-0.048948
81.8197.44Physics001True6.74 ⊢ 7.6000100.6454720.669939
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942-0.841837
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 False\n", - "1 False\n", - "2 False\n", - "3 True\n", - "4 False\n", - "5 False\n", - "6 False\n", - "7 False\n", - "8 True\n", - "9 False\n", - "Name: Height, dtype: bool" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 11 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "94vcsMVguGvG", - "colab_type": "code", - "outputId": "b2b15447-7399-4309-b18a-3de5a183a41e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "binarizer = Binarizer(threshold=1.80).fit(data_encoded[[\"Height\"]])\n", - "\n", - "height_binary = binarizer.transform(data_encoded[[\"Height\"]])\n", - "\n", - "height_binary[:10]" + "text/plain": [ + " Height Score Course ... 7.98 ⊢ 9.28 Score_normalized Score_standardized\n", + "0 1.539 6.61 Biology ... 0 0.485549 -0.207526\n", + "1 1.764 6.42 Biology ... 0 0.448940 -0.408391\n", + "2 1.695 8.95 Biology ... 1 0.936416 2.266289\n", + "3 1.829 5.14 Biology ... 0 0.202312 -1.761588\n", + "4 1.640 6.43 Physics ... 0 0.450867 -0.397819\n", + "5 1.778 7.98 Physics ... 0 0.749518 1.240819\n", + "6 1.679 7.90 Biology ... 0 0.734104 1.156244\n", + "7 1.604 6.76 Physics ... 0 0.514451 -0.048948\n", + "8 1.819 7.44 Physics ... 0 0.645472 0.669939\n", + "9 1.607 6.01 Physics ... 0 0.369942 -0.841837\n", + "\n", + "[10 rows x 14 columns]" + ] + }, + "execution_count": 32, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_standardized.flatten(), columns=[\"Score_standardized\"])], axis=1)\n", + "\n", + "data_encoded.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_SgwGLgOdk5Q" + }, + "source": [ + "Note que, ao contrário da variável normalizada, é possível ter valores negativos e positivos, menores e maiores que um. Isso é bem óbvio, pois os dados agora têm média 0 e variância 1:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "I0E9fwo93h9w", + "outputId": "2d9d5cdf-181b-4ca1-bea7-b382bf738ebd" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(-1.2501111257279262e-15, 1.0101010101010102)" + ] + }, + "execution_count": 33, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_encoded.Score_standardized.mean(), data_encoded.Score_standardized.var()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Av0cwG_Qd3Ow" + }, + "source": [ + "Novamente, para avaliar os resultados obtidos, podemos escrever nossa própria função de padronização:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "khwEkoks3-cS" + }, + "outputs": [], + "source": [ + "def standardize(x, xmean, xstd):\n", + " return (x - xmean)/xstd" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "14w3018J4Gwy" + }, + "outputs": [], + "source": [ + "standardize_score = functools.partial(standardize,\n", + " xmean=data_encoded.Score.mean(),\n", + " xstd=data_encoded.Score.std())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UAGxoUK5d-22" + }, + "source": [ + "Como esperado, o valor confere com o encontrado:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "dpaNVzOy4aCL", + "outputId": "fa0f42f0-32a5-48f4-f8d7-724350cdca86" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "-0.20648530634442175" + ] + }, + "execution_count": 36, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "standardize_score(data_encoded.Score[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "2tO4OOJK7NY1" + }, + "source": [ + "## Criando um _Pipeline_\n", + "\n", + "Todo esse processo de transformar os dados pode ser bastante trabalhoso e entendiante. Para facilitar as coisas, o sklearn dispõe de um mecanismo de _pipeline_ que funciona como ao esteira de uma linha de montagem. Cada etapa desse _pipeline_ é uma transformação nos dados, de forma que, ao final do _pipeline_, temos os dados totalmente transformados. A vantagem é que agora especificamos todas as etapas, ou transformações, de uma só vez, e podemos reaproveitar esse _pipeline_ no futuro." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "T1LyaI0-B2hV", + "outputId": "011176a0-ec92-4122-9fc4-3b3d0a3118c9" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[0.],\n", - " [0.],\n", - " [0.],\n", - " [1.],\n", - " [0.],\n", - " [0.],\n", - " [0.],\n", - " [0.],\n", - " [1.],\n", - " [0.]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 12 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oND_xnxRV8wZ", - "colab_type": "text" - }, - "source": [ - "O `Binarizer` tem como saída uma matriz binária numérica. Podemos transformá-la em um vetor de _bool_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "iXbf50-4vdDR", - "colab_type": "code", - "outputId": "2f7dba40-f513-491a-e072-743ac0a8c88f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "height_bool = pd.DataFrame(height_binary.flatten().astype(bool), columns=[\"Tall\"])\n", - "\n", - "height_bool.head(10)" + "text/plain": [ + " Height Score Course\n", + "0 1.539 6.61 Biology\n", + "1 1.764 6.42 Biology\n", + "2 1.695 8.95 Biology\n", + "3 1.829 5.14 Biology\n", + "4 1.640 6.43 Physics\n", + "5 1.778 7.98 Physics\n", + "6 1.679 7.90 Biology\n", + "7 1.604 6.76 Physics\n", + "8 1.819 7.44 Physics\n", + "9 1.607 6.01 Physics" + ] + }, + "execution_count": 37, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "86on9pLMeidf" + }, + "source": [ + "Para evitar bagunçar com nosso _data set_ original, criamos uma cópia (rasa) dele:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "vdA8euCcZeq1" + }, + "outputs": [], + "source": [ + "data_missing = data.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "snDUyWqEenh8" + }, + "source": [ + "E para tornar o exemplo mais interessante, adicionamos (ou removemos?) dados faltantes ao _data set_. Isso porque uma das transformações úteis que podemos aplicar no _pipeline_ é justamente a imputação de dados, ou seja, preencher dados faltantes.\n", + "\n", + "As variáveis numéricas faltantes são representadas por `np.nan`, enquanto a variável categórica é representada pela classe `Unknown`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "LkVnbFAKS_fF", + "outputId": "6ba74eb6-0d60-419a-c39a-dd165cd49b60" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HeightScoreCourse
21.695NaNUnknown
51.778NaNPhysics
8NaN7.44Physics
111.539NaNBiology
15NaN5.44Biology
24NaN8.08Biology
292.0206.83Unknown
331.691NaNMath
352.0856.96Unknown
381.3766.54Unknown
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Tall
0False
1False
2False
3True
4False
5False
6False
7False
8True
9False
\n", - "
" - ], - "text/plain": [ - " Tall\n", - "0 False\n", - "1 False\n", - "2 False\n", - "3 True\n", - "4 False\n", - "5 False\n", - "6 False\n", - "7 False\n", - "8 True\n", - "9 False" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 13 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nn9Gs9DhWNvi", - "colab_type": "text" - }, - "source": [ - "Vamos adicionar a nova variável `Tall`, que indica se a pessoa é alta (> 1.80 m), ao nosso _data set_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "xjOV0WlJy7DY", - "colab_type": "code", - "outputId": "af316c4b-4931-44cb-a4af-4fa51b3c93fc", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_encoded = pd.concat([data_encoded, height_bool], axis=1)\n", - "\n", - "data_encoded.head(10)" + "text/plain": [ + " Height Score Course\n", + "2 1.695 NaN Unknown\n", + "5 1.778 NaN Physics\n", + "8 NaN 7.44 Physics\n", + "11 1.539 NaN Biology\n", + "15 NaN 5.44 Biology\n", + "24 NaN 8.08 Biology\n", + "29 2.020 6.83 Unknown\n", + "33 1.691 NaN Math\n", + "35 2.085 6.96 Unknown\n", + "38 1.376 6.54 Unknown" + ] + }, + "execution_count": 39, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "unknown_height_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "unknown_score_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "unknown_course_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", + "\n", + "data_missing.loc[unknown_height_idx, \"Height\"] = np.nan\n", + "data_missing.loc[unknown_score_idx, \"Score\"] = np.nan\n", + "data_missing.loc[unknown_course_idx, \"Course\"] = \"Unknown\"\n", + "\n", + "data_missing_idx = unknown_height_idx | unknown_score_idx | unknown_course_idx\n", + "\n", + "data_missing.loc[data_missing_idx].head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "nmUJS9SzfC9Y" + }, + "source": [ + "Criamos o _pipeline_ com as seguintes etapas:\n", + "\n", + "1. Faça imputação dos dados, preenchendo os dados faltantes com a mediana dos dados presentes.\n", + "2. Faça a normalização dos dados no intervalo _default_ $[0, 1]$.\n", + "3. Crie novas variáveis através da expansão polinomial da variável original." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9ypslSlEhGBr" + }, + "source": [ + "O `Pipeline` recebe uma lista de transformações representadas por tuplas de dois elementos. Cada tupla contém:\n", + "\n", + "* O nome para a etapa (ou transformação ou estimador). Isso vai ser útil para recuperar algumas informações do _pipeline_ mais a frente.\n", + "* Um objeto da classe do transformador ou estimador, já com seus parâmetros configurados." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "XqthBhA18ITd" + }, + "outputs": [], + "source": [ + "num_pipeline = Pipeline(steps=[\n", + " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", + " (\"minmax_scaler\", MinMaxScaler()),\n", + " (\"poly_features\", PolynomialFeatures(degree=2, include_bias=False))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "3UVr1XWCfZID" + }, + "source": [ + "Depois da especificação do nosso _pipeline_, podemos aplicá-lo simultaneamente a diversas variáveis (desde que as transformações especificadas façam sentido).\n", + "\n", + "No exemplo abaixo, aplicamos esse _pipeline_ às variáveis `Height` e `Score` ao mesmo tempo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 191 + }, + "colab_type": "code", + "id": "Qh8kbymmDZqB", + "outputId": "0595019a-1288-4ea8-d18b-1d61dc44136b" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[0.26553106, 0.48554913, 0.07050674, 0.12892838, 0.23575796],\n", + " [0.49098196, 0.44894027, 0.24106329, 0.22042158, 0.20154737],\n", + " [0.42184369, 0.48843931, 0.1779521 , 0.20604504, 0.23857296],\n", + " [0.55611222, 0.20231214, 0.30926081, 0.11250825, 0.0409302 ],\n", + " [0.36673347, 0.45086705, 0.13449344, 0.16534804, 0.2032811 ],\n", + " [0.50501002, 0.48843931, 0.25503512, 0.24666674, 0.23857296],\n", + " [0.40581162, 0.73410405, 0.16468307, 0.29790795, 0.53890875],\n", + " [0.33066132, 0.51445087, 0.10933691, 0.170109 , 0.26465969],\n", + " [0.41082164, 0.64547206, 0.16877442, 0.26517389, 0.41663418],\n", + " [0.33366733, 0.3699422 , 0.11133389, 0.12343763, 0.13685723]])" + ] + }, + "execution_count": 41, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pipeline_transformation = num_pipeline.fit_transform(data_missing[[\"Height\", \"Score\"]])\n", + "\n", + "pipeline_transformation[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "HoNf9vDJfrW8" + }, + "source": [ + "Para ficar mais claro a saída do _pipeline_, podemos utilizar os nomes das _features_ geradas através do método `get_feature_names()`. Para tornar ainda mais claro, substituímos o que é chamado `x0` por `Height` e `x1` por `Score`, que é inferido pela ordem das variáveis no _pipeline_." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "OJz5zvr2EeM3", + "outputId": "444fe35c-4e5e-4f9c-ef6a-152dd9bcd775" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['Height_n', 'Score_n', 'Height_n^2', 'Height_n Score_n', 'Score_n^2']" + ] + }, + "execution_count": 42, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "poly_features = num_pipeline.get_params()[\"poly_features\"].get_feature_names()\n", + " \n", + "pipeline_columns = [old_name.replace(\"x0\", \"Height_n\").replace(\"x1\", \"Score_n\") for old_name in poly_features]\n", + "\n", + "pipeline_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MBgEafF-gKA3" + }, + "source": [ + "Criamos um novo _data set_ com essas variáveis resultantes do _pipeline_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 237 + }, + "colab_type": "code", + "id": "q_xBepJGIAJm", + "outputId": "6126947b-ef3f-42db-84aa-4317ed5f79d3" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2
00.2655310.4855490.0705070.1289280.235758
10.4909820.4489400.2410630.2204220.201547
20.4218440.4884390.1779520.2060450.238573
30.5561120.2023120.3092610.1125080.040930
40.3667330.4508670.1344930.1653480.203281
50.5050100.4884390.2550350.2466670.238573
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysicsTall
01.5396.61Biology100False
11.7646.42Biology100False
21.6958.95Biology100False
31.8295.14Biology100True
41.6406.43Physics001False
51.7787.98Physics001False
61.6797.90Biology100False
71.6046.76Physics001False
81.8197.44Physics001True
91.6076.01Physics001False
\n", - "
" - ], - "text/plain": [ - " Height Score Course Biology Math Physics Tall\n", - "0 1.539 6.61 Biology 1 0 0 False\n", - "1 1.764 6.42 Biology 1 0 0 False\n", - "2 1.695 8.95 Biology 1 0 0 False\n", - "3 1.829 5.14 Biology 1 0 0 True\n", - "4 1.640 6.43 Physics 0 0 1 False\n", - "5 1.778 7.98 Physics 0 0 1 False\n", - "6 1.679 7.90 Biology 1 0 0 False\n", - "7 1.604 6.76 Physics 0 0 1 False\n", - "8 1.819 7.44 Physics 0 0 1 True\n", - "9 1.607 6.01 Physics 0 0 1 False" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 14 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2tOdmnNi23p4", - "colab_type": "text" - }, - "source": [ - "## Discretização (_Binning_)\n", - "\n", - "Discretização, como o nome diz, é o processo de discretizar ou separar em intervalos contínuos uma variável numérica. Isso pode ser útil para converter uma variável numérica em categórica, quando o valor exato numérico não for tão importante quanto o intervalo onde ele se encontra.\n", - "\n", - "Podemos criar _bins_ (_buckets_ ou intervalos) que contenham aproximadamente a mesma quantidade de observações, utilizando a estratégia `quantile` ou que sejam igualmente espaçados com a estratégia `uniform`.\n", - "\n", - "No exemplo a seguir, criamos quatro intervalos da variável `Score` com a estratégia `quantile`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Xir4K6i522ZQ", - "colab_type": "code", - "outputId": "e902850a-d3dc-4d97-a80f-ad3dad1bb1a2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "discretizer = KBinsDiscretizer(n_bins=4, encode=\"ordinal\", strategy=\"quantile\")\n", - "\n", - "discretizer.fit(data_encoded[[\"Score\"]])\n", - "\n", - "score_bins = discretizer.transform(data_encoded[[\"Score\"]])\n", - "\n", - "score_bins[:10]" + "text/plain": [ + " Height_n Score_n Height_n^2 Height_n Score_n Score_n^2\n", + "0 0.265531 0.485549 0.070507 0.128928 0.235758\n", + "1 0.490982 0.448940 0.241063 0.220422 0.201547\n", + "2 0.421844 0.488439 0.177952 0.206045 0.238573\n", + "3 0.556112 0.202312 0.309261 0.112508 0.040930\n", + "4 0.366733 0.450867 0.134493 0.165348 0.203281\n", + "5 0.505010 0.488439 0.255035 0.246667 0.238573" + ] + }, + "execution_count": 43, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_score_normalized_poly = pd.DataFrame(pipeline_transformation, columns=pipeline_columns)\n", + "\n", + "height_score_normalized_poly.head(6)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "9imGtnaygRiX" + }, + "source": [ + "Podemos também criar outro _pipeline_ para a variável categórica `Course`. Como se trata de uma variável de natureza completamente diferente, precisamos especificar um _pipeline_ diferente com as seguintes transformações:\n", + "\n", + "1. Preencha os dados faltantes (`None`) com a classe `Unknown`.\n", + "2. Crie novas variáveis binárias com o `OneHotEncoder`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "eZP_HTkchI5c" + }, + "source": [ + "Assim como no _pipeline_ anterior, especificamos cada etapa como uma tupla com um nome e um objeto de um transformador ou estimador:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "NMv_2lV7KxTM" + }, + "outputs": [], + "source": [ + "cat_pipeline = Pipeline([\n", + " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"Unknown\")),\n", + " (\"one_hot_encoder\", OneHotEncoder(sparse=False, dtype=np.int))\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wK66jYTShV52" + }, + "source": [ + "Após a especificação do _pipeline_, podemos aplicá-lo à nossa variável `Course`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "KIFWvPS7LNUA" + }, + "outputs": [], + "source": [ + "course_pipeline_transformation = cat_pipeline.fit_transform(data_missing[[\"Course\"]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "quJ4ThBBhfBI" + }, + "source": [ + "Agora, utilizaremos o nome que demos à etapa do `OneHotEncoder` para recuperar esse transformador através do método `get_params()`. Depois de recuperado o `OneHotEncoder`, acessamos seu atributo `categories_` (primeiro índice `[0]`, pois poderíamos ter aplicado o _pipeline_ a mais de uma variável categórica):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "Zurb-NVWM4sX", + "outputId": "1e7c2960-6ffb-4285-bb2d-691157302850" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['Biology', 'Math', 'Physics', 'Unknown'], dtype=object)" + ] + }, + "execution_count": 46, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_columns = cat_pipeline.get_params()[\"one_hot_encoder\"].categories_[0]\n", + "\n", + "course_columns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ABQDGjU_iDGS" + }, + "source": [ + "Utilizamos a saída do _pipeline_ e os nomes das categorias recuperados do transformador para criar um novo `DataFrame`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "3ec56uIcMvll", + "outputId": "5707acac-8d67-4d74-eb02-d73b98f6340a" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BiologyMathPhysicsUnknown
01000
11000
20001
31000
40010
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[1.],\n", - " [1.],\n", - " [3.],\n", - " [0.],\n", - " [1.],\n", - " [3.],\n", - " [3.],\n", - " [2.],\n", - " [2.],\n", - " [0.]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 15 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3hrP6E4xYXCs", - "colab_type": "text" - }, - "source": [ - "Os limites dos intervalos estão disponíveis no atributo `bin_edges_`. Isso pode ser útil para criarmos _labels_ para colunas do _data set_ por exemplo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ScCmeNtn3-fF", - "colab_type": "code", - "outputId": "be1003a5-2d28-42d6-e76d-bc349e957e95", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "discretizer.bin_edges_" + "text/plain": [ + " Biology Math Physics Unknown\n", + "0 1 0 0 0\n", + "1 1 0 0 0\n", + "2 0 0 0 1\n", + "3 1 0 0 0\n", + "4 0 0 1 0" + ] + }, + "execution_count": 47, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "course_discretized = pd.DataFrame(course_pipeline_transformation, columns=course_columns)\n", + "\n", + "course_discretized.head(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "OeO6hmSEiL6N" + }, + "source": [ + "Por fim, combinamos as saídas dos dois _pipelines_ para criar um único `DataFrame`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 363 + }, + "colab_type": "code", + "id": "d8tL_jS1NTf7", + "outputId": "8b39c1c3-e549-4cea-fade-7c8e90d290ba" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2BiologyMathPhysicsUnknown
00.2655310.4855490.0705070.1289280.2357581000
10.4909820.4489400.2410630.2204220.2015471000
20.4218440.4884390.1779520.2060450.2385730001
30.5561120.2023120.3092610.1125080.0409301000
40.3667330.4508670.1344930.1653480.2032810010
50.5050100.4884390.2550350.2466670.2385730010
60.4058120.7341040.1646830.2979080.5389091000
70.3306610.5144510.1093370.1701090.2646600010
80.4108220.6454720.1687740.2651740.4166340010
90.3336670.3699420.1113340.1234380.1368570010
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([array([4.09 , 6.1975, 6.735 , 7.6 , 9.28 ])], dtype=object)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 16 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vGl5ONq2Yk7r", - "colab_type": "text" - }, - "source": [ - "A função `get_interval()` abaixo facilita a criação de _labels_ indicativas dos intervalos:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fvB70_vd4fSO", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def get_interval(bin_idx, bin_edges):\n", - " return f\"{np.round(bin_edges[bin_idx], 2):.2f} ⊢ {np.round(bin_edges[bin_idx+1], 2):.2f}\"" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Hn3eqHFbYtfm", - "colab_type": "text" - }, - "source": [ - "Cada um dos intervalos mostrados abaixo deve possuir aproximadamente a mesma quantidade de observações:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "HX59pepN5ZQQ", - "colab_type": "code", - "outputId": "d5b3d4dc-c969-44cb-fa34-e31fad2dd818", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 139 - } - }, - "source": [ - "bin_edges_quantile = discretizer.bin_edges_[0]\n", - "\n", - "print(f\"Bins quantile\")\n", - "print(f\"interval: #elements\\n\")\n", - "for i in range(len(discretizer.bin_edges_[0])-1):\n", - " print(f\"{get_interval(i, bin_edges_quantile)}: {sum(score_bins[:, 0] == i)}\")" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Bins quantile\n", - "interval: #elements\n", - "\n", - "4.09 ⊢ 6.20: 25\n", - "6.20 ⊢ 6.74: 25\n", - "6.74 ⊢ 7.60: 25\n", - "7.60 ⊢ 9.28: 25\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OQ0fli3IY2G6", - "colab_type": "text" - }, - "source": [ - "A _Series_ abaixo mostra alguns dos intervalos para os quais as observações foram encaixadas:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "SZMBYjqR5-H6", - "colab_type": "code", - "outputId": "cba541dc-9f9e-48d8-eb87-fa54440ca353", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 208 - } - }, - "source": [ - "score_intervals = pd.Series(score_bins.flatten().astype(np.int)).apply(get_interval, args=(bin_edges_quantile,))\n", - "\n", - "score_intervals.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0 6.20 ⊢ 6.74\n", - "1 6.20 ⊢ 6.74\n", - "2 7.60 ⊢ 9.28\n", - "3 4.09 ⊢ 6.20\n", - "4 6.20 ⊢ 6.74\n", - "5 7.60 ⊢ 9.28\n", - "6 7.60 ⊢ 9.28\n", - "7 6.74 ⊢ 7.60\n", - "8 6.74 ⊢ 7.60\n", - "9 4.09 ⊢ 6.20\n", - "dtype: object" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 19 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6gWE7IU6Y_9q", - "colab_type": "text" - }, - "source": [ - "Também podemos criar uma nova variável, `Score_interval`, no nosso _data set_ com os intervalos (que agora são categorias):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "fomFOQbVA8eS", - "colab_type": "code", - "outputId": "1f065c4f-6da4-43ad-ebb7-b58706595871", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_intervals, columns=[\"Score_interval\"])], axis=1)\n", - "\n", - "data_encoded.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysicsTallScore_interval
01.5396.61Biology100False6.20 ⊢ 6.74
11.7646.42Biology100False6.20 ⊢ 6.74
21.6958.95Biology100False7.60 ⊢ 9.28
31.8295.14Biology100True4.09 ⊢ 6.20
41.6406.43Physics001False6.20 ⊢ 6.74
51.7787.98Physics001False7.60 ⊢ 9.28
61.6797.90Biology100False7.60 ⊢ 9.28
71.6046.76Physics001False6.74 ⊢ 7.60
81.8197.44Physics001True6.74 ⊢ 7.60
91.6076.01Physics001False4.09 ⊢ 6.20
\n", - "
" - ], - "text/plain": [ - " Height Score Course Biology Math Physics Tall Score_interval\n", - "0 1.539 6.61 Biology 1 0 0 False 6.20 ⊢ 6.74\n", - "1 1.764 6.42 Biology 1 0 0 False 6.20 ⊢ 6.74\n", - "2 1.695 8.95 Biology 1 0 0 False 7.60 ⊢ 9.28\n", - "3 1.829 5.14 Biology 1 0 0 True 4.09 ⊢ 6.20\n", - "4 1.640 6.43 Physics 0 0 1 False 6.20 ⊢ 6.74\n", - "5 1.778 7.98 Physics 0 0 1 False 7.60 ⊢ 9.28\n", - "6 1.679 7.90 Biology 1 0 0 False 7.60 ⊢ 9.28\n", - "7 1.604 6.76 Physics 0 0 1 False 6.74 ⊢ 7.60\n", - "8 1.819 7.44 Physics 0 0 1 True 6.74 ⊢ 7.60\n", - "9 1.607 6.01 Physics 0 0 1 False 4.09 ⊢ 6.20" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 20 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LldlZ92lZN1k", - "colab_type": "text" - }, - "source": [ - "Como dito, podemos utilizar a estratégia `uniform` para criar _bins_ igualmente espaçados, independente do número de observações que cada um possui. Também podemos especificar o tipo de codificação utilizada. No caso a seguir, utilizamos `encode=onehot-dense` para informar que queremos que a saída seja codificada como o _one-hot encode_ visto anteriormente:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "E6L1qXuW-v-n", - "colab_type": "code", - "outputId": "956f9e9f-67ba-436f-f457-889ee2d1f3db", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "discretizer_uniform = KBinsDiscretizer(n_bins=4, encode=\"onehot-dense\", strategy=\"uniform\")\n", - "\n", - "discretizer_uniform.fit(data_encoded[[\"Score\"]])\n", - "\n", - "score_bins_uniform = discretizer_uniform.transform(data_encoded[[\"Score\"]]).astype(np.int)\n", - "\n", - "score_bins_uniform[:10]" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[0, 1, 0, 0],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 1],\n", - " [1, 0, 0, 0],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 1, 0],\n", - " [0, 0, 1, 0],\n", - " [0, 0, 1, 0],\n", - " [0, 0, 1, 0],\n", - " [0, 1, 0, 0]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 21 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YapI8RuMZZfM", - "colab_type": "text" - }, - "source": [ - "Note como agora os intervalos são ligeiramente diferentes:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "P8gW9k-w-_CC", - "colab_type": "code", - "outputId": "731fca86-f052-4a93-e5bf-e13eec18ac8b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "bin_edges_uniform = discretizer_uniform.bin_edges_[0]\n", - "\n", - "bin_edges_uniform" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([4.09 , 5.3875, 6.685 , 7.9825, 9.28 ])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 22 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ieyy46EJAnb6", - "colab_type": "code", - "outputId": "99835fa9-8003-4060-afae-2c4de66685ff", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 139 - } - }, - "source": [ - "score_intervals_columns = [get_interval(i, bin_edges_uniform) for i in range(4)]\n", - "\n", - "print(f\"Bins uniform\")\n", - "print(f\"interval: #elements\\n\")\n", - "for i in range(len(discretizer_uniform.bin_edges_[0])-1):\n", - " print(f\"{get_interval(i, bin_edges_uniform)}: {sum(score_bins_uniform[:, i])}\")" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Bins uniform\n", - "interval: #elements\n", - "\n", - "4.09 ⊢ 5.39: 6\n", - "5.39 ⊢ 6.68: 43\n", - "6.68 ⊢ 7.98: 44\n", - "7.98 ⊢ 9.28: 7\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WuWi-1U4Zzf_", - "colab_type": "text" - }, - "source": [ - "Podemos adicionar as novas variáveis binárias no _data set_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "P-v3UgiQB87S", - "colab_type": "code", - "outputId": "ad22d68f-c0e8-4a91-8838-842e7e2f5041", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_bins_uniform, columns=score_intervals_columns)], axis=1)\n", - "\n", - "data_encoded.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28
01.5396.61Biology100False6.20 ⊢ 6.740100
11.7646.42Biology100False6.20 ⊢ 6.740100
21.6958.95Biology100False7.60 ⊢ 9.280001
31.8295.14Biology100True4.09 ⊢ 6.201000
41.6406.43Physics001False6.20 ⊢ 6.740100
51.7787.98Physics001False7.60 ⊢ 9.280010
61.6797.90Biology100False7.60 ⊢ 9.280010
71.6046.76Physics001False6.74 ⊢ 7.600010
81.8197.44Physics001True6.74 ⊢ 7.600010
91.6076.01Physics001False4.09 ⊢ 6.200100
\n", - "
" - ], - "text/plain": [ - " Height Score Course ... 5.39 ⊢ 6.68 6.68 ⊢ 7.98 7.98 ⊢ 9.28\n", - "0 1.539 6.61 Biology ... 1 0 0\n", - "1 1.764 6.42 Biology ... 1 0 0\n", - "2 1.695 8.95 Biology ... 0 0 1\n", - "3 1.829 5.14 Biology ... 0 0 0\n", - "4 1.640 6.43 Physics ... 1 0 0\n", - "5 1.778 7.98 Physics ... 0 1 0\n", - "6 1.679 7.90 Biology ... 0 1 0\n", - "7 1.604 6.76 Physics ... 0 1 0\n", - "8 1.819 7.44 Physics ... 0 1 0\n", - "9 1.607 6.01 Physics ... 1 0 0\n", - "\n", - "[10 rows x 12 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 24 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jD8WM_-yzqSc", - "colab_type": "text" - }, - "source": [ - "## Normalização (_Scaling_)\n", - "\n", - "Normalização é o processo de colocar uma variável numérica em uma escala pré-determinada, geralmente $[0, 1]$, mas também é comum ser $[-1, 1]$.\n", - "\n", - "Para colocar no intervalo $[0, 1]$, basta subtrair cada valor da valor mínimo e dividir pela diferença do valor máximo e mínimo:\n", - "\n", - "$$x_{\\text{scaled}} = \\frac{x - x_{\\text{min}}}{x_{\\text{max}} - x_{\\text{min}}}$$\n", - "\n", - "Abaixo, escalamos a variável `Score` no intervalo $[0, 1]$:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nMM2mu-Qzwnv", - "colab_type": "code", - "outputId": "5c60c83b-13bf-431d-e77e-a2fb2e8af317", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "minmax_scaler = MinMaxScaler(feature_range=(0, 1)) # Default feature_scale é (0, 1).\n", - "\n", - "minmax_scaler.fit(data_encoded[[\"Score\"]])\n", - "\n", - "score_normalized = minmax_scaler.transform(data_encoded[[\"Score\"]])\n", - "\n", - "score_normalized[:10]" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[0.48554913],\n", - " [0.44894027],\n", - " [0.93641618],\n", - " [0.20231214],\n", - " [0.45086705],\n", - " [0.7495183 ],\n", - " [0.73410405],\n", - " [0.51445087],\n", - " [0.64547206],\n", - " [0.3699422 ]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 25 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "FPr-37M2UBj4", - "colab_type": "code", - "outputId": "dc170301-56af-4cab-da7c-307c5cbb94a6", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "score_normalized.min(), score_normalized.max()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(0.0, 0.9999999999999999)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 26 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Et6m_2Bbbq-n", - "colab_type": "text" - }, - "source": [ - "Adicionamos a variável `Score` normalizada ao nosso _data set_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "kaYvCQtK0fzi", - "colab_type": "code", - "outputId": "9f8ccb6c-d0b7-4445-96c9-490f284f2357", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_normalized.flatten(), columns=[\"Score_normalized\"])], axis=1)\n", - "\n", - "data_encoded.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940
21.6958.95Biology100False7.60 ⊢ 9.2800010.936416
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867
51.7787.98Physics001False7.60 ⊢ 9.2800100.749518
61.6797.90Biology100False7.60 ⊢ 9.2800100.734104
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451
81.8197.44Physics001True6.74 ⊢ 7.6000100.645472
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942
\n", - "
" - ], - "text/plain": [ - " Height Score Course ... 6.68 ⊢ 7.98 7.98 ⊢ 9.28 Score_normalized\n", - "0 1.539 6.61 Biology ... 0 0 0.485549\n", - "1 1.764 6.42 Biology ... 0 0 0.448940\n", - "2 1.695 8.95 Biology ... 0 1 0.936416\n", - "3 1.829 5.14 Biology ... 0 0 0.202312\n", - "4 1.640 6.43 Physics ... 0 0 0.450867\n", - "5 1.778 7.98 Physics ... 1 0 0.749518\n", - "6 1.679 7.90 Biology ... 1 0 0.734104\n", - "7 1.604 6.76 Physics ... 1 0 0.514451\n", - "8 1.819 7.44 Physics ... 1 0 0.645472\n", - "9 1.607 6.01 Physics ... 0 0 0.369942\n", - "\n", - "[10 rows x 13 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 27 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n7-msElsbveR", - "colab_type": "text" - }, - "source": [ - "Para avaliar se os valores encontrados conferem, podemos utilizar a função `normalize` abaixo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "EAfUGaFc061d", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def normalize(x, xmin, xmax):\n", - " return (x - xmin)/(xmax - xmin)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CXywxNX-b-0K", - "colab_type": "text" - }, - "source": [ - "A função `partial()` do módulo `functools` (_builtin_ do Python) permite \"congelar\" alguns parâmetros da função passaga como argumento, facilitando a invocação desta função quando tais parâmetros são constantes. No caso abaixo, \"congelamos\" os argumentos `xmin` e `xmax` da função `normalize()` com os valores mínimo e máximo da variável `Score`, respectivamente. Nas invocações subsequentes de `normalize` não precisaremos passar esses argumentos, somente o argumento \"não congelado\" `x`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "UAlpigp21OVx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "normalize_score = functools.partial(normalize,\n", - " xmin=data_encoded.Score.min(),\n", - " xmax=data_encoded.Score.max())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nhR0rwUIctTa", - "colab_type": "text" - }, - "source": [ - "O valor abaixo realmente confere com aquele encontrado pelo `MinMaxScaler`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "pMfk3jrU1mQV", - "colab_type": "code", - "outputId": "f9851c0d-9446-4f10-874e-cdba22b43722", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "normalize_score(data_encoded.Score[0]).round(6)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "0.485549" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 30 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HEcSQzWJ2Yum", - "colab_type": "text" - }, - "source": [ - "## Padronização (_Standardization_)\n", - "\n", - "Padronização é o processo de tornar a variável com média zero e variância um. Esse processo não deve ser confundido com a normalização descrita acima.\n", - "\n", - "O processo é simples, basta subtrair a média dos dados de cada observação e dividi-los pelo desvio-padrão:\n", - "\n", - "$$x_{\\text{standardized}} = \\frac{x - \\bar{x}}{s}$$\n", - "\n", - "onde $\\bar{x}$ indica a média amostral e $s$ o desvio-padrão amostral." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kXYXezCNdYue", - "colab_type": "text" - }, - "source": [ - "No exemplo abaixo, padronizamos a variável `Score`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Qfhs3Eaq2dGV", - "colab_type": "code", - "outputId": "572aae65-5460-44d1-8134-dbc26f82e2d2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "standard_scaler = StandardScaler()\n", - "\n", - "standard_scaler.fit(data_encoded[[\"Score\"]])\n", - "\n", - "score_standardized = standard_scaler.transform(data_encoded[[\"Score\"]])\n", - "\n", - "score_standardized[:10]" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[-0.20752554],\n", - " [-0.40839081],\n", - " [ 2.26628886],\n", - " [-1.76158843],\n", - " [-0.39781896],\n", - " [ 1.24081879],\n", - " [ 1.15624393],\n", - " [-0.0489477 ],\n", - " [ 0.66993854],\n", - " [-0.84183693]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 31 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SJJucIQddgME", - "colab_type": "text" - }, - "source": [ - "E adicionamos a variável padronizada ao nosso _data set_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BAndWLe13RSr", - "colab_type": "code", - "outputId": "4a6231c1-f459-4307-ad14-24c4e46760cd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_encoded = pd.concat([data_encoded, pd.DataFrame(score_standardized.flatten(), columns=[\"Score_standardized\"])], axis=1)\n", - "\n", - "data_encoded.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourseBiologyMathPhysicsTallScore_interval4.09 ⊢ 5.395.39 ⊢ 6.686.68 ⊢ 7.987.98 ⊢ 9.28Score_normalizedScore_standardized
01.5396.61Biology100False6.20 ⊢ 6.7401000.485549-0.207526
11.7646.42Biology100False6.20 ⊢ 6.7401000.448940-0.408391
21.6958.95Biology100False7.60 ⊢ 9.2800010.9364162.266289
31.8295.14Biology100True4.09 ⊢ 6.2010000.202312-1.761588
41.6406.43Physics001False6.20 ⊢ 6.7401000.450867-0.397819
51.7787.98Physics001False7.60 ⊢ 9.2800100.7495181.240819
61.6797.90Biology100False7.60 ⊢ 9.2800100.7341041.156244
71.6046.76Physics001False6.74 ⊢ 7.6000100.514451-0.048948
81.8197.44Physics001True6.74 ⊢ 7.6000100.6454720.669939
91.6076.01Physics001False4.09 ⊢ 6.2001000.369942-0.841837
\n", - "
" - ], - "text/plain": [ - " Height Score Course ... 7.98 ⊢ 9.28 Score_normalized Score_standardized\n", - "0 1.539 6.61 Biology ... 0 0.485549 -0.207526\n", - "1 1.764 6.42 Biology ... 0 0.448940 -0.408391\n", - "2 1.695 8.95 Biology ... 1 0.936416 2.266289\n", - "3 1.829 5.14 Biology ... 0 0.202312 -1.761588\n", - "4 1.640 6.43 Physics ... 0 0.450867 -0.397819\n", - "5 1.778 7.98 Physics ... 0 0.749518 1.240819\n", - "6 1.679 7.90 Biology ... 0 0.734104 1.156244\n", - "7 1.604 6.76 Physics ... 0 0.514451 -0.048948\n", - "8 1.819 7.44 Physics ... 0 0.645472 0.669939\n", - "9 1.607 6.01 Physics ... 0 0.369942 -0.841837\n", - "\n", - "[10 rows x 14 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 32 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_SgwGLgOdk5Q", - "colab_type": "text" - }, - "source": [ - "Note que, ao contrário da variável normalizada, é possível ter valores negativos e positivos, menores e maiores que um. Isso é bem óbvio, pois os dados agora têm média 0 e variância 1:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "I0E9fwo93h9w", - "colab_type": "code", - "outputId": "2d9d5cdf-181b-4ca1-bea7-b382bf738ebd", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "data_encoded.Score_standardized.mean(), data_encoded.Score_standardized.var()" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(-1.2501111257279262e-15, 1.0101010101010102)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 33 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Av0cwG_Qd3Ow", - "colab_type": "text" - }, - "source": [ - "Novamente, para avaliar os resultados obtidos, podemos escrever nossa própria função de padronização:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "khwEkoks3-cS", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def standardize(x, xmean, xstd):\n", - " return (x - xmean)/xstd" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "14w3018J4Gwy", - "colab_type": "code", - "colab": {} - }, - "source": [ - "standardize_score = functools.partial(standardize,\n", - " xmean=data_encoded.Score.mean(),\n", - " xstd=data_encoded.Score.std())" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UAGxoUK5d-22", - "colab_type": "text" - }, - "source": [ - "Como esperado, o valor confere com o encontrado:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dpaNVzOy4aCL", - "colab_type": "code", - "outputId": "fa0f42f0-32a5-48f4-f8d7-724350cdca86", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "standardize_score(data_encoded.Score[0])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "-0.20648530634442175" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 36 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2tO4OOJK7NY1", - "colab_type": "text" - }, - "source": [ - "## Criando um _Pipeline_\n", - "\n", - "Todo esse processo de transformar os dados pode ser bastante trabalhoso e entendiante. Para facilitar as coisas, o sklearn dispõe de um mecanismo de _pipeline_ que funciona como ao esteira de uma linha de montagem. Cada etapa desse _pipeline_ é uma transformação nos dados, de forma que, ao final do _pipeline_, temos os dados totalmente transformados. A vantagem é que agora especificamos todas as etapas, ou transformações, de uma só vez, e podemos reaproveitar esse _pipeline_ no futuro." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "T1LyaI0-B2hV", - "colab_type": "code", - "outputId": "011176a0-ec92-4122-9fc4-3b3d0a3118c9", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourse
01.5396.61Biology
11.7646.42Biology
21.6958.95Biology
31.8295.14Biology
41.6406.43Physics
51.7787.98Physics
61.6797.90Biology
71.6046.76Physics
81.8197.44Physics
91.6076.01Physics
\n", - "
" - ], - "text/plain": [ - " Height Score Course\n", - "0 1.539 6.61 Biology\n", - "1 1.764 6.42 Biology\n", - "2 1.695 8.95 Biology\n", - "3 1.829 5.14 Biology\n", - "4 1.640 6.43 Physics\n", - "5 1.778 7.98 Physics\n", - "6 1.679 7.90 Biology\n", - "7 1.604 6.76 Physics\n", - "8 1.819 7.44 Physics\n", - "9 1.607 6.01 Physics" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 37 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "86on9pLMeidf", - "colab_type": "text" - }, - "source": [ - "Para evitar bagunçar com nosso _data set_ original, criamos uma cópia (rasa) dele:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vdA8euCcZeq1", - "colab_type": "code", - "colab": {} - }, - "source": [ - "data_missing = data.copy()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "snDUyWqEenh8", - "colab_type": "text" - }, - "source": [ - "E para tornar o exemplo mais interessante, adicionamos (ou removemos?) dados faltantes ao _data set_. Isso porque uma das transformações úteis que podemos aplicar no _pipeline_ é justamente a imputação de dados, ou seja, preencher dados faltantes.\n", - "\n", - "As variáveis numéricas faltantes são representadas por `np.nan`, enquanto a variável categórica é representada pela classe `Unknown`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "LkVnbFAKS_fF", - "colab_type": "code", - "outputId": "6ba74eb6-0d60-419a-c39a-dd165cd49b60", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "unknown_height_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", - "unknown_score_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", - "unknown_course_idx = pd.Index(np.random.choice(data_missing.index, 10, replace=False))\n", - "\n", - "data_missing.loc[unknown_height_idx, \"Height\"] = np.nan\n", - "data_missing.loc[unknown_score_idx, \"Score\"] = np.nan\n", - "data_missing.loc[unknown_course_idx, \"Course\"] = \"Unknown\"\n", - "\n", - "data_missing_idx = unknown_height_idx | unknown_score_idx | unknown_course_idx\n", - "\n", - "data_missing.loc[data_missing_idx].head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
HeightScoreCourse
21.695NaNUnknown
51.778NaNPhysics
8NaN7.44Physics
111.539NaNBiology
15NaN5.44Biology
24NaN8.08Biology
292.0206.83Unknown
331.691NaNMath
352.0856.96Unknown
381.3766.54Unknown
\n", - "
" - ], - "text/plain": [ - " Height Score Course\n", - "2 1.695 NaN Unknown\n", - "5 1.778 NaN Physics\n", - "8 NaN 7.44 Physics\n", - "11 1.539 NaN Biology\n", - "15 NaN 5.44 Biology\n", - "24 NaN 8.08 Biology\n", - "29 2.020 6.83 Unknown\n", - "33 1.691 NaN Math\n", - "35 2.085 6.96 Unknown\n", - "38 1.376 6.54 Unknown" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 39 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nmUJS9SzfC9Y", - "colab_type": "text" - }, - "source": [ - "Criamos o _pipeline_ com as seguintes etapas:\n", - "\n", - "1. Faça imputação dos dados, preenchendo os dados faltantes com a mediana dos dados presentes.\n", - "2. Faça a normalização dos dados no intervalo _default_ $[0, 1]$.\n", - "3. Crie novas variáveis através da expansão polinomial da variável original." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9ypslSlEhGBr", - "colab_type": "text" - }, - "source": [ - "O `Pipeline` recebe uma lista de transformações representadas por tuplas de dois elementos. Cada tupla contém:\n", - "\n", - "* O nome para a etapa (ou transformação ou estimador). Isso vai ser útil para recuperar algumas informações do _pipeline_ mais a frente.\n", - "* Um objeto da classe do transformador ou estimador, já com seus parâmetros configurados." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "XqthBhA18ITd", - "colab_type": "code", - "colab": {} - }, - "source": [ - "num_pipeline = Pipeline(steps=[\n", - " (\"imputer\", SimpleImputer(strategy=\"median\")),\n", - " (\"minmax_scaler\", MinMaxScaler()),\n", - " (\"poly_features\", PolynomialFeatures(degree=2, include_bias=False))\n", - "])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3UVr1XWCfZID", - "colab_type": "text" - }, - "source": [ - "Depois da especificação do nosso _pipeline_, podemos aplicá-lo simultaneamente a diversas variáveis (desde que as transformações especificadas façam sentido).\n", - "\n", - "No exemplo abaixo, aplicamos esse _pipeline_ às variáveis `Height` e `Score` ao mesmo tempo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Qh8kbymmDZqB", - "colab_type": "code", - "outputId": "0595019a-1288-4ea8-d18b-1d61dc44136b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 191 - } - }, - "source": [ - "pipeline_transformation = num_pipeline.fit_transform(data_missing[[\"Height\", \"Score\"]])\n", - "\n", - "pipeline_transformation[:10]" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array([[0.26553106, 0.48554913, 0.07050674, 0.12892838, 0.23575796],\n", - " [0.49098196, 0.44894027, 0.24106329, 0.22042158, 0.20154737],\n", - " [0.42184369, 0.48843931, 0.1779521 , 0.20604504, 0.23857296],\n", - " [0.55611222, 0.20231214, 0.30926081, 0.11250825, 0.0409302 ],\n", - " [0.36673347, 0.45086705, 0.13449344, 0.16534804, 0.2032811 ],\n", - " [0.50501002, 0.48843931, 0.25503512, 0.24666674, 0.23857296],\n", - " [0.40581162, 0.73410405, 0.16468307, 0.29790795, 0.53890875],\n", - " [0.33066132, 0.51445087, 0.10933691, 0.170109 , 0.26465969],\n", - " [0.41082164, 0.64547206, 0.16877442, 0.26517389, 0.41663418],\n", - " [0.33366733, 0.3699422 , 0.11133389, 0.12343763, 0.13685723]])" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 41 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HoNf9vDJfrW8", - "colab_type": "text" - }, - "source": [ - "Para ficar mais claro a saída do _pipeline_, podemos utilizar os nomes das _features_ geradas através do método `get_feature_names()`. Para tornar ainda mais claro, substituímos o que é chamado `x0` por `Height` e `x1` por `Score`, que é inferido pela ordem das variáveis no _pipeline_." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "OJz5zvr2EeM3", - "colab_type": "code", - "outputId": "444fe35c-4e5e-4f9c-ef6a-152dd9bcd775", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "poly_features = num_pipeline.get_params()[\"poly_features\"].get_feature_names()\n", - " \n", - "pipeline_columns = [old_name.replace(\"x0\", \"Height_n\").replace(\"x1\", \"Score_n\") for old_name in poly_features]\n", - "\n", - "pipeline_columns" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "['Height_n', 'Score_n', 'Height_n^2', 'Height_n Score_n', 'Score_n^2']" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 42 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MBgEafF-gKA3", - "colab_type": "text" - }, - "source": [ - "Criamos um novo _data set_ com essas variáveis resultantes do _pipeline_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "q_xBepJGIAJm", - "colab_type": "code", - "outputId": "6126947b-ef3f-42db-84aa-4317ed5f79d3", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 237 - } - }, - "source": [ - "height_score_normalized_poly = pd.DataFrame(pipeline_transformation, columns=pipeline_columns)\n", - "\n", - "height_score_normalized_poly.head(6)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2
00.2655310.4855490.0705070.1289280.235758
10.4909820.4489400.2410630.2204220.201547
20.4218440.4884390.1779520.2060450.238573
30.5561120.2023120.3092610.1125080.040930
40.3667330.4508670.1344930.1653480.203281
50.5050100.4884390.2550350.2466670.238573
\n", - "
" - ], - "text/plain": [ - " Height_n Score_n Height_n^2 Height_n Score_n Score_n^2\n", - "0 0.265531 0.485549 0.070507 0.128928 0.235758\n", - "1 0.490982 0.448940 0.241063 0.220422 0.201547\n", - "2 0.421844 0.488439 0.177952 0.206045 0.238573\n", - "3 0.556112 0.202312 0.309261 0.112508 0.040930\n", - "4 0.366733 0.450867 0.134493 0.165348 0.203281\n", - "5 0.505010 0.488439 0.255035 0.246667 0.238573" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 43 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9imGtnaygRiX", - "colab_type": "text" - }, - "source": [ - "Podemos também criar outro _pipeline_ para a variável categórica `Course`. Como se trata de uma variável de natureza completamente diferente, precisamos especificar um _pipeline_ diferente com as seguintes transformações:\n", - "\n", - "1. Preencha os dados faltantes (`None`) com a classe `Unknown`.\n", - "2. Crie novas variáveis binárias com o `OneHotEncoder`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eZP_HTkchI5c", - "colab_type": "text" - }, - "source": [ - "Assim como no _pipeline_ anterior, especificamos cada etapa como uma tupla com um nome e um objeto de um transformador ou estimador:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "NMv_2lV7KxTM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "cat_pipeline = Pipeline([\n", - " (\"imputer\", SimpleImputer(strategy=\"constant\", fill_value=\"Unknown\")),\n", - " (\"one_hot_encoder\", OneHotEncoder(sparse=False, dtype=np.int))\n", - "])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wK66jYTShV52", - "colab_type": "text" - }, - "source": [ - "Após a especificação do _pipeline_, podemos aplicá-lo à nossa variável `Course`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "KIFWvPS7LNUA", - "colab_type": "code", - "colab": {} - }, - "source": [ - "course_pipeline_transformation = cat_pipeline.fit_transform(data_missing[[\"Course\"]])" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "quJ4ThBBhfBI", - "colab_type": "text" - }, - "source": [ - "Agora, utilizaremos o nome que demos à etapa do `OneHotEncoder` para recuperar esse transformador através do método `get_params()`. Depois de recuperado o `OneHotEncoder`, acessamos seu atributo `categories_` (primeiro índice `[0]`, pois poderíamos ter aplicado o _pipeline_ a mais de uma variável categórica):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Zurb-NVWM4sX", - "colab_type": "code", - "outputId": "1e7c2960-6ffb-4285-bb2d-691157302850", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "course_columns = cat_pipeline.get_params()[\"one_hot_encoder\"].categories_[0]\n", - "\n", - "course_columns" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "array(['Biology', 'Math', 'Physics', 'Unknown'], dtype=object)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 46 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ABQDGjU_iDGS", - "colab_type": "text" - }, - "source": [ - "Utilizamos a saída do _pipeline_ e os nomes das categorias recuperados do transformador para criar um novo `DataFrame`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "3ec56uIcMvll", - "colab_type": "code", - "outputId": "5707acac-8d67-4d74-eb02-d73b98f6340a", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - } - }, - "source": [ - "course_discretized = pd.DataFrame(course_pipeline_transformation, columns=course_columns)\n", - "\n", - "course_discretized.head(5)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
BiologyMathPhysicsUnknown
01000
11000
20001
31000
40010
\n", - "
" - ], - "text/plain": [ - " Biology Math Physics Unknown\n", - "0 1 0 0 0\n", - "1 1 0 0 0\n", - "2 0 0 0 1\n", - "3 1 0 0 0\n", - "4 0 0 1 0" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 47 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OeO6hmSEiL6N", - "colab_type": "text" - }, - "source": [ - "Por fim, combinamos as saídas dos dois _pipelines_ para criar um único `DataFrame`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "d8tL_jS1NTf7", - "colab_type": "code", - "outputId": "8b39c1c3-e549-4cea-fade-7c8e90d290ba", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 363 - } - }, - "source": [ - "data_transformed = pd.concat([height_score_normalized_poly, course_discretized], axis=1)\n", - "\n", - "data_transformed.head(10)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Height_nScore_nHeight_n^2Height_n Score_nScore_n^2BiologyMathPhysicsUnknown
00.2655310.4855490.0705070.1289280.2357581000
10.4909820.4489400.2410630.2204220.2015471000
20.4218440.4884390.1779520.2060450.2385730001
30.5561120.2023120.3092610.1125080.0409301000
40.3667330.4508670.1344930.1653480.2032810010
50.5050100.4884390.2550350.2466670.2385730010
60.4058120.7341040.1646830.2979080.5389091000
70.3306610.5144510.1093370.1701090.2646600010
80.4108220.6454720.1687740.2651740.4166340010
90.3336670.3699420.1113340.1234380.1368570010
\n", - "
" - ], - "text/plain": [ - " Height_n Score_n Height_n^2 ... Math Physics Unknown\n", - "0 0.265531 0.485549 0.070507 ... 0 0 0\n", - "1 0.490982 0.448940 0.241063 ... 0 0 0\n", - "2 0.421844 0.488439 0.177952 ... 0 0 1\n", - "3 0.556112 0.202312 0.309261 ... 0 0 0\n", - "4 0.366733 0.450867 0.134493 ... 0 1 0\n", - "5 0.505010 0.488439 0.255035 ... 0 1 0\n", - "6 0.405812 0.734104 0.164683 ... 0 0 0\n", - "7 0.330661 0.514451 0.109337 ... 0 1 0\n", - "8 0.410822 0.645472 0.168774 ... 0 1 0\n", - "9 0.333667 0.369942 0.111334 ... 0 1 0\n", - "\n", - "[10 rows x 9 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 48 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1NLD-pyliXWO", - "colab_type": "text" - }, - "source": [ - "Vale ressaltar que:\n", - "\n", - "* Poderíamos utilizar também o `ColumnTransformer` para compor (por isso, ele se encontra no módulo `sklearn.compose`) múltiplos `Pipeline` em diferentes variáveis.\n", - "* Os `Pipeline` não servem apenas para a transformação dos dados de treinamento. Eles também podem (e devem) ser usados para submeter os dados de teste e até de produção aos mesmos procedimentos dos dados de treinamento." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SbShR7kMZGwE", - "colab_type": "text" - }, - "source": [ - "## _Outliers_\n", - "\n", - "_Outliers_, os famosos \"pontos fora da curva\", são observações que não parecem seguir o mesmo padrão dos demais dados. Eles podem vir de distribuições diferentes, serem erros na coleta de dados, erros de medição etc.\n", - "\n", - "Eles influenciam nossas análises e os nossos algoritmos ao apresentar comportamento distoante do resto do _data set_, impactando na média, variância, funções de perda e custo etc. Se fizer sentido, eles devem ser removidos ou transformados antes de prosseguirmos com a análise.\n", - "\n", - "No entanto, devemos julgar com cautela sua remoção: __alguns _outliers_ são dados autênticos e devem ser estudados com atenção__. Por exemplo, a remoção de uma medição muito alta na temperatura de um reator seria um erro, pois essa medição pode estar nos indicando um potencial problema com o dispositivo.\n", - "\n", - "Abaixo estudamos algumas técnicas simples para encontrar _outliers_.\n", - "\n", - "![outlier](https://www.stats4stem.org/common/web/plugins/ckeditor/plugins/doksoft_uploader/userfiles/WithInfOutlier.png)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "u3bsTDv0pAN4", - "colab_type": "text" - }, - "source": [ - "Começamos criando uma cópia da variável `Height` do nosso _data set_ para não impactar o original:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "tQ7AQztcZkYx", - "colab_type": "code", - "colab": {} - }, - "source": [ - "height_outlier = data.Height.copy()" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VQNHBAu4pHcp", - "colab_type": "text" - }, - "source": [ - "Adicionamos dez _outliers_ que representam pessoas estranhamente baixas ou estranhamente altas para o padrão que estamos observando:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nX2R3V0HZI0w", - "colab_type": "code", - "outputId": "6acbd63c-820e-485a-cde4-72a69fefe13d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 208 - } - }, - "source": [ - "height_outlier_idx = pd.Index(np.random.choice(height_outlier.index, 10, replace=False))\n", - "\n", - "too_short_idx = pd.Index(height_outlier_idx[:5])\n", - "too_tall_idx = pd.Index(height_outlier_idx[5:])\n", - "\n", - "height_outlier[too_short_idx] = np.random.normal(loc=1.30, scale=0.5, size=5)\n", - "height_outlier[too_tall_idx] = np.random.normal(loc=2.20, scale=0.5, size=5)\n", - "\n", - "outlier_idx = too_short_idx | too_tall_idx\n", - "\n", - "height_outlier[outlier_idx]\n" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "14 1.646795\n", - "18 1.696510\n", - "29 0.516665\n", - "38 2.943781\n", - "48 1.058498\n", - "49 1.326605\n", - "57 2.074231\n", - "66 1.831315\n", - "68 2.737088\n", - "96 1.966029\n", - "Name: Height, dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 50 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mwNbTzDnpoDL", - "colab_type": "text" - }, - "source": [ - "Note que nem todos dados gerados se tornaram realmente _outliers_. Como geramos de uma distribuição aleatória, corremos esse risco.\n", - "\n", - "No entanto, temos alguns dados estranhos como 0.51 m e 2.73 m." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x5pwD_1EqRNZ", - "colab_type": "text" - }, - "source": [ - "No _boxplot_ padrão, os dados mais extremos são mostrados como pontos fora do alcance dos _whiskers_ (as barrinhas do _box plot_).\n", - "\n", - "No caso abaixo, notamos três pontos acima e três pontos abaixo do considerado \"dentro da faixa normal\"." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "hRMVhYz3b2KH", - "colab_type": "code", - "outputId": "9e090cef-804c-4f17-958b-5e25154662db", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 695 - } - }, - "source": [ - "sns.boxplot(height_outlier, orient=\"vertical\");" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAt0AAAKmCAYAAACR0iLwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3W+s3nV9//HXdZ22YbWupz1WOPyx\naLYQElgwcyEn7SAUsCSjnGLWQZg62YYy0G6LGe3449KCjpKFZGNk1c0QWZwhZFQ5woQBC3i64zLv\nLKWS2p9RKv0HHHrqiujsua7fDfXEyr9TPe/raq/zeNw61znf61yv3sGn33zOdTXa7XY7AABAmWa3\nBwAAQK8T3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADF5nR7wEw7cODltFrtbs8AAKBHNZuNLFr0lqN6Ts9Fd6vVFt0AABxTHC8BAIBiohsAAIqJ\nbgAAKCa6AQCgmOgGAIBiohsAAIqJbgAAKCa6AQCgmOgGAIBiohsAAIp17GPgr7vuujz33HNpNpuZ\nP39+brnllpx55plHXDM5OZnbbrstX/3qV9NoNPLhD384a9as6dREAAAo0bHo3rRpU9761rcmSR57\n7LHceOON2bJlyxHXjIyMZNeuXXn00UczMTGR1atXZ2hoKKeeemqnZgIAwIzr2PGSnwZ3khw6dCiN\nRuNV1zz88MNZs2ZNms1mFi9enIsuuihf+cpXOjURAABKdOxOd5LcdNNN2bp1a9rtdv7pn/7pVT/f\nu3dvTj755KnHg4OD2bdv31G9xsDAgl96JwAAzKSORvcnP/nJJMkXv/jF3HHHHfnHf/zHGX+N8fFD\nabXaM/57AQAgSZrNxlHf6O3Ku5esXr06//Vf/5UDBw4c8f3BwcHs2bNn6vHevXtz0kkndXoeAADM\nqI5E98svv5y9e/dOPX7iiSeycOHC9Pf3H3HdJZdckvvvvz+tVisvvfRSHnvssaxcubITEwEAoExH\njpe88sor+dM//dO88soraTabWbhwYTZv3pxGo5Frrrkma9euzdlnn53h4eH8z//8T9773vcmSa6/\n/vqcdtppnZgIAABlGu12u6cOQDvTDQBApePmTDcAAMwmohsAAIqJbgAAKCa6AQCgmOgGmMUmJg7k\n9ts35uDBiW5PAehpohtgFhsZ2ZKdO3fkwQcf6PYUgJ4mugFmqYmJAxkdfTLtdjujo0+52w1QSHQD\nzFIjI1umPteg1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrmJEn6+uZk\naGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8LFzY3+1J\nAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVbAACAnxDd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdALPYxMSB3H77xhw8ONHtKQA9\nTXQDzGIjI1uyc+eOPPjgA92eAtDTRDfALDUxcSCjo0+m3W5ndPQpd7sBColugFlqZGRLWq12kqTV\narnbDVBIdAPMUmNjWzM5eThJMjl5OGNjW7u8CKB3iW6AWWpoaFn6+uYkSfr65mRoaFmXFwH0LtEN\nMEutWnV5ms1GkqTZbOayy97X5UUAvUt0A8xS/f2Lsnz5+Wk0Glm+/LwsXNjf7UkAPWtOtwcA0D2r\nVl2e3bufc5cboFij3W63uz1iJo2PH5r6a3wAAJhpzWYjAwMLju45RVsAAICfEN0AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMA\nQDHRDQAAxeZ04kUOHDiQG264Ibt27cq8efOydOnSbNy4MYsXLz7iuvXr1+c///M/s2jRoiTJJZdc\nkj/5kz/pxEQAACjTaLfb7eoXmZiYyI4dO3LuuecmSTZt2pSDBw/mU5/61BHXrV+/PmeddVbe//73\n/8KvNT5+KK1W+T8JAIBZqtlsZGBgwdE9p2jLEfr7+6eCO0nOOeec7NmzpxMvDQAAXdfxM92tVitf\n+MIXsmLFitf8+T333JNVq1bluuuuy7e+9a0OrwMAgJnXkeMlP2vDhg3Zv39//v7v/z7N5pHNv3//\n/ixZsiTNZjNf/OIX87d/+7d57LHH0tfX18mJAAAwozoa3Zs2bcqOHTuyefPmzJs3702vP/fcc/PA\nAw/klFNOmfZrONMNAEClY/ZMd5Lceeedefrpp3P33Xe/bnDv379/6uuvfvWraTabOfHEEzs1EQAA\nSnTkTvfOnTtz6aWX5vTTT88JJ5yQJDn11FNz9913Z3h4OJ/5zGdy4okn5kMf+lDGx8fTaDSyYMGC\n3HDDDTnnnHOO6rXc6QYAoNIvcqe742e6q4luAAAqHdPHSwAAYLYS3QAAUEx0AwBAMdENAADFRDcA\nABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdEN\nAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0\nAwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT\n3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0A8xiExMHcvvt\nG3Pw4ES3pwD0NNENMIuNjGzJzp078uCDD3R7CkBPE90As9TExIGMjj6Zdrud0dGn3O0GKCS6AWap\nkZEtabXaSZJWq+VuN0Ah0Q0wS42Nbc3k5OEkyeTk4YyNbe3yIoDeJboBZqmhoWXp65uTJOnrm5Oh\noWVdXgTQu0Q3wCy1atXlaTYbSZJms5nLLntflxcB9C7RDTBL9fcvyvLl56fRaGT58vOycGF/tycB\n9Kw53R4AQPesWnV5du9+zl1ugGKNdrvd7vaImTQ+fmjqr/EBAGCmNZuNDAwsOLrnFG0BAAB+wvES\nmCW2bn0qo6NPdnsGx5iffiCO89z8vOXLz8+yZed1ewb0DHe6AWaxgwcP5uDBg92eAdDznOkGmMU2\nbbo1SbJu3S1dXgJw/HCmGwAAjkGiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY\n6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAA\nioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYA\ngGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroB\nAKCY6AYAgGJzOvEiBw4cyA033JBdu3Zl3rx5Wbp0aTZu3JjFixcfcd0rr7ySv/zLv8z27dvT19eX\ndevW5YILLujERAAAKNORO92NRiN//Md/nEceeSQjIyM57bTT8jd/8zevuu6zn/1sFixYkH//93/P\n5s2bc/PNN+fll1/uxEQAACjTkeju7+/PueeeO/X4nHPOyZ49e1513b/927/liiuuSJKcfvrpOeus\ns/LUU091YiIAAJTp+JnuVquVL3zhC1mxYsWrfrZnz56ccsopU48HBwezb9++Ts4DAIAZ15Ez3T/r\n1ltvzfz58/P+97+/5PcPDCwo+b0AvWju3L4kyZIlb+3yEoDe1tHo3rRpU5599tls3rw5zearb7Kf\nfPLJ2b1799QfWO7du/eIYynTMT5+KK1We0b2AvS6H/1oMknywgv/2+UlAMePZrNx1Dd6O3a85M47\n78zTTz+du+++O/PmzXvNay655JLcd999SZLvfOc72bZtW377t3+7UxMBAKBER6J7586d+fSnP53n\nn38+V155ZYaHh3P99dcnSYaHh7N///4kyR/90R/le9/7Xi6++OJ85CMfycaNG7NggeMiAAAc3zpy\nvOTXf/3Xs2PHjtf82Ze+9KWpr+fPn5+/+7u/68QkAADoGJ9ICQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFCs0W63290eMZPGxw+l1eqpf9JR+5d/uTff/e6z3Z4B\nHAd27frxfyve8Y6lXV4CHA9OO21prrrqg92e0XXNZiMDAwuO6jlzirbQRd/97rPZsfP/pe+E/m5P\nAY5xrcm+JMn/++6LXV4CHOsmfzDR7QnHNdHdo/pO6M/8pRd2ewYA0CO+/+zj3Z5wXHOmGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoNu3o/uxnP/ua37/n\nnntmbAwAAPSiaUf33Xff/Zrf/4d/+IcZGwMAAL1ozptdMDY2liRptVr52te+lna7PfWz5557Lm95\ny1vq1gEAQA940+i+6aabkiQ//OEPc+ONN059v9FoZMmSJbn55pvr1gEAQA940+h+4oknkiQ33HBD\n7rjjjvJBAADQa940un/qZ4O71Wod8bNm05ugAADA65l2dG/fvj0bN27Mjh078sMf/jBJ0m6302g0\n8swzz5QNBACA4920o3v9+vW54IIL8qlPfSonnHBC5SYAAOgp047u3bt358///M/TaDQq9wAAQM+Z\n9mHsiy++OKOjo5VbAACgJ73hne6/+Iu/mLqz/X//93/56Ec/mt/8zd/M2972tiOu864mAADw+t4w\nupcuXXrE41/7tV8rHQMAAL3oDaP7ox/9aKd2AABAz5r2H1L+9OPgf968efNy0kkn5ZRTTpmxUQAA\n0EumHd033XRTnn/++SRJf39/JiYmkiQDAwN58cUXc8YZZ+TOO+/M6aefXjIUAACOV9N+95Lf/d3f\nzQc+8IF8/etfz+joaL7+9a/nD/7gD3LllVfmv//7v3PWWWdlw4YNlVsBAOC4NO3ovvfee/Pxj398\n6oNxTjjhhPzZn/1ZPve5z2X+/PlZv359nn766bKhAABwvJp2dM+fPz/btm074nvbt2/Pr/zKr/z4\nFzWn/asAAGBWmfaZ7rVr1+YP//APs2LFigwODmbfvn35j//4j9xyyy1JfvyHlitXriwbCgAAx6tp\nR/fq1atz1lln5ZFHHsnzzz+f008/Pffdd9/Ue3dfcMEFueCCC8qGAgDA8Wra0Z38+MNxfEAOAAAc\nnTeM7ltuuSW33nprkiM/Ev7n+Rh4AAB4fW8Y3aeeeurU1z//kfAAAMD0vGF0f+QjH5n62kfCAwDA\nL+aoznRv3bo1Dz30UF566aVs3rw527Zty6FDhzI0NFS1DwAAjnvTju5//ud/zr333ps1a9bkkUce\nSfLjD8j55Cc/KbqPMQcPTmTyBxP5/rOPd3sKANAjJn8wkYMHj+p+LT9j2p9o87nPfS733HNPPvzh\nD099EM673vWufPvb3y4bBwAAvWDa/3fl5ZdfzuDgYJJMvYvJ4cOHM3fu3Jpl/MIWLuzPC987nPlL\nL+z2FACgR3z/2cezcGF/t2cct6Z9p/u3fuu38pnPfOaI7917770599xzZ3wUAAD0kmnf6b755ptz\n7bXX5v7778/LL7+clStX5i1veUs+/elPV+4DAIDj3rSj++1vf3v+9V//Ndu2bcvu3btz8skn5zd+\n4zde9wNzAACAH3vT6L7qqqveNKw///nPz9ggAADoNW8a3WvWrJn6ut1u59Zbb80nPvGJ0lEAANBL\n3jS6L7/88iMe//Vf//WrvgcAALy+ab97yU85ww0AAEfnqKMbAAA4Om96vGRsbOyIx4cPH87Xvva1\ntNvtqe/5GHgAAHh9bxrdN9100xGP+/v7c+ONN049bjQaefzxx2d+GQAA9Ig3je4nnniiEzsAAKBn\nOdMNAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAA\nUEx0AwBAMdENAADF5nR7ADUmfzCR7z/7eLdnAMe41uEfJEmac07o8hLgWDf5g4kkb+v2jOOW6O5B\np522tNsTgOPErl3PJknecZr/IQXezNs0xi+h0W63290eMZPGxw+l1eqpfxJAmU2bbk2SrFt3S5eX\nABw/ms1GBgYWHN1zirYAAAA/IboBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGIdi+5NmzZlxYoVOeOMM/LNb37zNa+5\n6667MjQ0lOHh4QwPD2fDhg2dmgcAAGXmdOqFLrzwwnzwgx/M7//+77/hdatXr866des6tAoAAOp1\nLLrf8573dOqlAADgmNKx6J6uhx56KKOjo1myZEk+9rGP5d3vfvdRPX9gYEHRMoDeM3duX5JkyZK3\ndnkJQG87pqL7yiuvzLXXXpu5c+dm69atue666/Lwww9n0aJF0/4d4+OH0mq1C1cC9I4f/WgySfLC\nC//b5SUAx49ms3HUN3qPqXcvWbJkSebOnZskWbZsWQYHB7Nz584urwIAgF/OMRXd+/fvn/r6mWee\nye7du/POd76zi4sAAOCX17HjJbfddlseffTRvPjii7n66qvT39+fhx56KNdcc03Wrl2bs88+O3fe\neWe2b9+eZrOZuXPn5o477siSJUs6NREAAEo02u12Tx2AdqYbYPo2bbo1SbJu3S1dXgJw/Djuz3QD\nAEAvEt0AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxRrtdrvd7REzaXz8UFqtnvonwYzYuvWpjI4+2e0ZHGN27Xo2\nSfKOdyzt8hKONcuXn59ly87r9gw4JjWbjQwMLDiq58wp2gLAcWDhwoXdngAwK7jTDQAAR+EXudPt\nTDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QCz\n2MTEgdx++8YcPDjR7SkAPU10A8xiIyNbsnPnjjz44APdngLQ00Q3wCw1MXEgo6NPpt1uZ3T0KXe7\nAQqJboBZamRkS1qtdpKk1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrm\nJEn6+uZkaGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8\nLFzY3+1JAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVb\nAACAnxDdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVEN8AsNjFxILffvjEHD050ewpATxPdALPYyMiW7Ny5\nIw8++EC3pwD0NNENMEtNTBzI6OiTabfbGR19yt1ugEKiG2CWGhnZklarnSRptVrudgMUEt0As9TY\n2NZMTh5OkkxOHs7Y2NYuLwLoXaIbYJYaGlqWvr45SZK+vjkZGlrW5UUAvUt0A8xSq1ZdnmazkSRp\nNpu57LL3dXkRQO8S3QCzVH//oixffn4ajUaWLz8vCxf2d3sSQM+a0+0BAHTPqlWXZ/fu59zlBijW\naLfb7W6PmEnj44em/hofAABmWrPZyMDAgqN7TtEWAADgJ0Q3AAAUE90AAFBMdAMAQDHRDQAAxToS\n3Zs2bcqKFStyxhln5Jvf/OZrXjM5OZkNGzbkoosuysUXX5z777+/E9MAAKBcR6L7wgsvzOc///mc\ncsopr3vNyMhIdu3alUcffTT33Xdf7rrrrjz33HOdmAcAAKU6Et3vec97Mjg4+IbXPPzww1mzZk2a\nzWYWL16ciy66KF/5ylc6MQ8AAEodM59IuXfv3px88slTjwcHB7Nv376j/j1H+0blAABQ7ZiJ7pni\nEykBAKh0XH8i5eDgYPbs2TP1eO/evTnppJO6uAgAAGbGMRPdl1xySe6///60Wq289NJLeeyxx7Jy\n5cpuzwIAgF9aR6L7tttuy3nnnZd9+/bl6quvzu/8zu8kSa655pps27YtSTI8PJxTTz01733ve/N7\nv/d7uf7663Paaad1Yh4AAJRqtNvtnjoA7Uw3AACVjusz3QAA0KtENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAA\nFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0wi01MHMjtt2/MwYMT3Z4C0NNEN8AsNjKy\nJTt37siDDz7Q7SkAPU10A8xSExMHMjr6ZNrtdkZHn3K3G6CQ6AaYpUZGtqTVaidJWq2Wu90AhUQ3\nwCw1NrY1k5OHkySTk4czNra1y4sAelfHovvb3/52rrjiiqxcuTJXXHFFvvOd77zqmrvuuitDQ0MZ\nHh7O8PBwNmzY0Kl5ALPO0NCy9PXNSZL09c3J0NCyLi8C6F1zOvVCf/VXf5Wrrroqw8PD+dKXvpRP\nfOITuffee1913erVq7Nu3bpOzQKYtVatujyjo09mcjJpNpu57LL3dXsSQM/qyJ3u8fHxfOMb38il\nl16aJLn00kvzjW98Iy+99FInXh6A19DfvyjLl5+fRqOR5cvPy8KF/d2eBNCzOnKne+/evTnxxBPT\n19eXJOnr68vb3/727N27N4sXLz7i2oceeiijo6NZsmRJPvaxj+Xd7373Ub3WwMCCGdsN0Os+9KEP\n5Pnn9+bqqz+YRYve2u05AD2rY8dLpuPKK6/Mtddem7lz52br1q257rrr8vDDD2fRokXT/h3j44em\n/hofgDczNx//+E05fDh54YX/7fYYgONCs9k46hu9HTleMjg4mP3792dycjJJMjk5meeffz6Dg4NH\nXLdkyZLMnTs3SbJs2bIMDg5m586dnZgIAABlOhLdAwMDOfPMM/PlL385SfLlL385Z5555quOluzf\nv3/q62eeeSa7d+/OO9/5zk5MBACAMo12u92Rsxjf+ta3sn79+nzve9/Lr/7qr2bTpk1517velWuu\nuSZr167N2WefnXXr1mX79u1pNpuZO3du1q5dm/PPP/+oXsfxEgAAKv0ix0s6Ft2dIroBAKh0zJ7p\nBgCA2Ux0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADF\nRDcAABQT3QAAUEx0AwBAMdHAkSFcAAAAe0lEQVQNAADFRDcAABQT3QAAUEx0AwBAMdENAADF5nR7\nwExrNhvdngAAQA/7RXqz0W632wVbAACAn3C8BAAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGL/H0DD/OPJX0Z9AAAAAElFTkSuQmCC\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MOKP49JMqTog", - "colab_type": "text" - }, - "source": [ - "Uma primeira abordagem bem simples é encontrar os pontos do _box plot_ acima.\n", - "\n", - "Tudo que estiver fora da faixa $[Q1 - 1.5 \\times \\text{IQR}, Q3 + 1.5 \\times \\text{IQR}]$ é considerado um ponto anômalo para aquele padrão:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "z_h0zaVDce0N", - "colab_type": "code", - "outputId": "86b9e772-6438-4820-87ba-dab83a4b1dd8", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "q1 = height_outlier.quantile(0.25)\n", - "q3 = height_outlier.quantile(0.75)\n", - "iqr = q3 - q1\n", - "\n", - "non_outlier_interval_iqr = [q1 - 1.5 * iqr, q3 + 1.5 * iqr]\n", - "\n", - "print(f\"Faixa considerada \\\"normal\\\": {non_outlier_interval_iqr}\")" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Faixa considerada \"normal\": [1.18575, 2.24175]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wsuVvr8hq4Rc", - "colab_type": "text" - }, - "source": [ - "Agora podemos identificar quais pontos encontram-se fora desse intervalo, ou seja, podem ser considerados _outliers_:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "hm78PWbhc9Dz", - "colab_type": "code", - "outputId": "ee3995ea-8a63-4c90-b3dd-57ba673887ee", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 139 - } - }, - "source": [ - "outliers_iqr = height_outlier[(height_outlier < non_outlier_interval_iqr[0]) | (height_outlier > non_outlier_interval_iqr[1])]\n", - "\n", - "outliers_iqr" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "29 0.516665\n", - "38 2.943781\n", - "48 1.058498\n", - "68 2.737088\n", - "91 2.272000\n", - "92 1.164000\n", - "Name: Height, dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 53 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XcF70kmerGEq", - "colab_type": "text" - }, - "source": [ - "Se estivermos seguos de que esses pontos representam de fato _outliers_ e que sua remoção não traz prejuízo à nossa análise, então podemos removê-los:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "BVRJS9DNeb9z", - "colab_type": "code", - "colab": {} - }, - "source": [ - "height_no_outlier_iqr = height_outlier.drop(index=outliers_iqr.index)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "urvTyUfHrVrJ", - "colab_type": "text" - }, - "source": [ - "Uma segunda abordagem é observar as estatísticas descritivas dos dados.\n", - "\n", - "Repare no histograma abaixo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "bc_paOePfHJ5", - "colab_type": "code", - "outputId": "6840da1c-bae6-4465-8aa7-87f69928e182", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 726 - } - }, - "source": [ - "sns.distplot(height_outlier);" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "display_data", - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAALFCAYAAAABe2+3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3XmMpHd97/vPU/vaXb1U99RszNgH\nzATsw+U69uX4OueCcRjdeDSWUGJfjIxscGScyII/guePEcbEUjSjyFKwTEgskog/iJBBgLwQL/ci\nkTjnZEE+YLxjz3jGPb1WdXft+3P/6K72LN3T1dVV/Wzvl4TUU1Vd/bUfuvvj73yf788wTdMUAAAA\ngC3zWV0AAAAA4FSEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAA\nAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEcBqwvo1uJiSe22aXUZjjM2llA2\nW7S6DGyA62NvXB974/rYG9fH3rg+6/P5DI2MxLf0OY4J0+22SZjuEf/e7I3rY29cH3vj+tgb18fe\nuD79wZgHAAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAA\nANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA\n0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0KNNw/Ti4qLuuecefeYz\nn9GRI0f0p3/6p8rlcpe8rlKp6Ctf+YpuvvlmHT58WD//+c+7eg4AAABwqk3DtGEY+tKXvqRnn31W\nTz75pPbt26e//Mu/vOR13/3ud5VIJPT888/rO9/5jo4fP65SqbTpcwAAAIBTbRqmU6mUrr/++rU/\nf+xjH9O5c+cued3PfvYz3XbbbZKkAwcO6KMf/ah+8YtfbPocAAAA4FRbmplut9v6x3/8R33qU5+6\n5Llz585pz549a3/OZDKamZnZ9DkAAADAqQJbefGf//mfKxaL6fOf//yg6tnQ2Fhix7+mW6TTSatL\nwGVwfeyN62NvXB974/rYG9enP7oO0ydOnNC7776r73znO/L5Lm1o7969W1NTUxodHZUkTU9Pr42H\nXO65bmWzRbXb5pY+ByvfKPPzBavLwAa4PvbG9bE3ro+9cX3sjeuzPp/P2HIDt6sxj0ceeUS/+c1v\n9NhjjykUCq37msOHD+sHP/iBJOn06dN6+eWXdeONN276HAB4WbMtlWrNgfyv2bb6nw4A3G/TzvRb\nb72lv/mbv9GBAwd0++23S5L27t2rxx57TEePHtXf/u3fanJyUl/84hd17Ngx3XzzzfL5fPrmN7+p\nRGIl2V/uOQDwslqjqf94bXYg7/27hyYVCG9pmg8AsEWGaZqOmJ1gzKM3/DWOvXF97G0nrk+pNtgw\nHXdxmOb7x964PvbG9VnfwMY8AAAAAFyKMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA\n9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0\niDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSI\nMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9Igw\nDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDAN\nAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0A\nAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0ADtBottVum1aXAQC4SMDq\nAgAAl3duoaRf/OqcktGQPv27exUO+q0uCQCwis40ANiUaZp69XRO/+9/vqdw0K/FQk0v/MdZ1Rot\nq0sDAKwiTAOADTVbbb348oz+8/V57ZtM6Jb/dkD/18d3a7FQ1/P/cVa1OoEaAOyAMA0ANlOuNvTs\nv53VO+fy+tgHx/XfP7ZbwYBPe9MJffLju7VUqOv5/yRQA4AdEKYBwGb+xyuzWi7V9MmP79E1V47J\nMIy15/Z0AnVxNVAz8gEAliJMA4CNVGpNnVso6cP7R7RvIrHua/akE/rk/7ZHi4WaXn47u8MVAgDO\nR5gGABt5d6Yg05QO7h667Ov2pOPaP5nUb99bVrPV3qHqAAAXI0wDgI2cms4rlQhpJBne9LUf3p9S\nvdnWqen8DlQGAFgPYRoAbKJQrmt+qbppV7pjYiSqVCKkN84syTQ50AUArECYBgCbOD1dkCQdzHQX\npg3D0FX7U8rla1pYrg6yNADABgjTAGADpmnqnem8JkaiSkSDXX/eFbuHFfT79MaZpQFWBwDYCGEa\nAGxgsVDTcrGug5nklj4vGPDpij1DOj1dULXeHFB1AICNEKYBwAZOTRdkGNIHdm0tTEvSVftSapum\nfvve8gAqAwBcDmEaACxmmqZOTee1ezyuSCiw5c9PJcOaHI3qjTNLanMjIgDsKMI0AFhsbrGicrXZ\n9Y2H67lq/4hK1aam5kt9rAwAsBnCNABY7NR0XgG/seGJh93YP5FQNOznRkQA2GFd/X3iiRMn9Oyz\nz2pqakpPPvmkPvShD13ymq997Wt644031v78xhtv6LHHHtNNN92kRx99VN///vc1MTEhSfr4xz+u\nBx98sE//CADgXM1WW6dnCto3kVAw0Ht/w+cz9MG9Kf367awK5bqSsVAfqwQAbKSrMH3TTTfpzjvv\n1B133LHha06ePLn28euvv64vfOELuvHGG9ceu/XWW/XAAw9so1QAcJ/XTi+q3mhva8Sj48o9Q/r1\n21mdnS3qdw6O9qE6AMBmumqDXHvttcpkMl2/6Q9/+EMdOXJEoRCdEQC4nDfPLsnvM5QZj2/7vZKx\nkFKJkN5jbhoAdkzfZ6br9bqefPJJffazn73g8aefflpHjhzR3XffrZdeeqnfXxYAHOnMbEGjQ2H5\nfUZf3m9POqHZxbLqjVZf3g8AcHlb38G0iRdeeEG7d+/WoUOH1h67/fbbde+99yoYDOrFF1/Ufffd\np2eeeUYjIyNdv+/YWO835nhdOr31vbXYOVwfexvk9Wm12npvrqgPHxhVMhHpy3te9YFRvXIqp1yx\noVgsrPRorC/va1d8/9gb18feuD790fcw/aMf/eiSrnQ6nV77+IYbblAmk9Fbb72l6667ruv3zWaL\narfZn7pV6XRS8/MFq8vABrg+9jbo63NmtqB6s62hWFCFYrUv7xkL+xQK+vTbs4sql2uab7m3Q833\nj71xfeyN67M+n8/YcgO3r2MeMzMz+uUvf6kjR45c8Pjs7Ozax6+99pqmpqZ08ODBfn5pAHCc0zMr\nv8jGh/vTlZYkn2Fobzqh9+ZpQADATuiqM/3www/rueee08LCgu666y6lUik9/fTTuueee3T//ffr\n6quvliT9+Mc/1ic/+UkNDw9f8PmPPPKIXnnlFfl8PgWDQZ08efKCbjUAeNGp6byiYb+SsWBf33dP\nOq53zuV1ejqvq68Y6+t7AwAuZJimM86eZcyjN/w1jr1xfext0NfnG3//74qGA7r+dyb7+r71Rks/\n+P9+q09fu0//z00f7Ot72wnfP/bG9bE3rs/6LB/zAAB0p9FsaWq+pP2T/b8BKBT0a3Ikpt+8k+37\newMALkSYBgALnJktqtU29YFdg7mbfm86rulsWQtLlYG8PwBgBWEaACxwajovSQPpTEvS3omVv6b8\n1dt0pwFgkAjTAGCBU9N5DcdXTiwchKF4SOlUVL/67cJA3h8AsKLve6YBAJs7NV3QwcyQDKM/Jx+u\n56NXjuqf/9c55Qo1hUP+vr53OBhQgHYMABCmAWCnlatNzeTK+sRH+rvF42If3j+in/9ySk/966m+\nj5P87qFJBcL8CgEA+goAsMPenVmZlz6YGRro1zm4e0jBgE/vzZcG+nUAwMsI0wCww06tnnx4YMBh\n2u/3afd4XO/NFeWQIwUAwHEI0wCww05N55VORZSI9vfkw/XsGY+rWm9pqVgf+NcCAC8iTAPADjs9\nnR/4iEfHrtGYJGkmV96RrwcAXkOYBoAdtFyqK5uv6cCunQnTiVhQiWhQs4RpABgIwjQA7KDOYS1X\n7N6ZMC1Jk6NRzeTKzE0DwAAQpgFgB52ezsswpA8M6OTD9ewajaneaGupWNuxrwkAXkGYBoAddGq6\noN3j8b4fonI5k5256Wxlx74mAHgFYRoAdohpmjo1ndfBHZqX7khEV+amuQkRAPqPMA0AOyS7XFWx\n0tDBzM6NeHTsGo1pdpG5aQDoN8I0AOyQs/NFSer70d7d2DUWVb3RVq7A3DQA9BNhGgB2yEx2Zcwi\nMxbb8a/dmZtmRR4A9BdhGgB2yHSurKF4SLHI4E8+vFg8ElQyFlwL9ACA/iBMA8AOmcmV104ktMLK\n3HRFbeamAaBvCNMAsENmstaG6cnRmBrNthbzzE0DQL8QpgFgBxQrDRUrDUvmpTs6QZ4VeQDQP4Rp\nANgBnVllKzvTsUhAQzH2TQNAPxGmAWAHTOdKkqRdFnamO19/brGidpu5aQDoB8I0AOyAmWxZAb+h\n8eGIpXV05qZzhaqldQCAWxCmAWAHzOTKmhiJye+z9sfu+3PTFUvrAAC3IEwDwA6wei1eRzQc0HA8\npFn2TQNAXxCmAWDAmq225hYrlm7yON/kaFRzSxWZ7JsGgG0jTAPAgC0sV9Vqm7boTEtSOhVVo9nW\nUrFudSkA4HiEaQAYMDusxTvfxEhUkjS/xNw0AGwXYRoABswua/E6EtGgIiG/5hcJ0wCwXYRpABiw\nmWxZQ7Gg4pGg1aVIkgzDUDq1MjcNANgewjQADJhdNnmcLz0SVaHcUKXWtLoUAHA0wjQADNhMrmyb\nEY+OidTK4THMTQPA9hCmAWCAipWGCuWGdo3GrS7lAmNDEfkMaX6JkxABYDsI0wAwQDO51U0eNutM\n+/0+jQ5F6EwDwDYRpgFggDpr8TI2m5mWVlbkdXZgAwB6Q5gGgAGazpXk9xkaX51RtpN0Kqp221Qu\nz6gHAPSKMA0AAzSTLWtiJCq/z34/btMpDm8BgO2y3093AHARO67F64hFAkpEg5rj8BYA6BlhGgAG\npNVua26xosyYvTZ5nC+dWrkJ0TSZmwaAXhCmAWBAFpZWbu6za2daWjm8pVJrqVTh8BYA6AVhGgAG\nZNqma/HON7E6N83R4gDQG8I0AAxIZy2enTvTqWRYAb/BTYgA0CPCNAAMyEyupGQsqEQ0aHUpG/IZ\nhtKpKGEaAHpEmAaAAZnJ2neTx/nSqagW8zU1mm2rSwEAxyFMA8CA2Hkt3vnSqahMSQvLdKcBYKsI\n0wAwAOVqU/lywyFheuV0RvZNA8DWEaYBYAA6Xd7OKYN2Fgr6NZwIaWGZY8UBYKsI0wAwAPNLK8F0\nfLXra3fjQxFll6sc3gIAW0SYBoAB6HSmx4ft35mWpLHhiKr1lspVDm8BgK0gTAPAACwsVRUN+xWP\nBKwupSvjwysddEY9AGBrCNMAMADzyxWND0dlGIbVpXRlJBmWYUhZwjQAbAlhGgAGYGG5utbtdQK/\n36eRZFjZPGEaALaCMA0AfWaaphaWK47Y5HG+8WFuQgSArSJMA0Cf5csN1RttR3WmJWlsKKJ6s61C\nuWF1KQDgGIRpAOizhaXVTR4O60yPrYZ/5qYBoHuEaQDos/nOgS0O60ynEmH5fQYbPQBgCwjTANBn\nC50DWxyyY7rD5zM0OsRNiACwFYRpAOizheWKhmJBhUN+q0vZsrHhiHL5qtrchAgAXSFMA0CfzS9V\nHTcv3TE+HFGzZWq5WLe6FABwBMI0APTZwnLFcZs8OsaGuAkRALaCMA0AfdRum8rla47bMd0xFA8p\nGPBxEyIAdIkwDQB9lCtU1Wqbju1MG4ahsaEINyECQJcI0wDQR2ubPBzamZakseGwFvM1tdrchAgA\nmyFMA0AfOXXH9PnGhqNqm6YWCzWrSwEA2yNMA0AfLSxVZRjS6JBzw/Q4NyECQNcI0wDQRwvLFY0m\nwwr4nfvjNR4NKBz0E6YBoAvO/WkPADY0v1x13MmHFzMMQ2PDES2sjqwAADbWVZg+ceKEPvWpT+mq\nq67Sm2++ue5rHn30UX3iE5/Q0aNHdfToUT300ENrz1UqFX3lK1/RzTffrMOHD+vnP/95f6oHAJtZ\nWKpoPOXcEY+O8eGIlot1NVttq0sBAFsLdPOim266SXfeeafuuOOOy77u1ltv1QMPPHDJ49/97neV\nSCT0/PPP6/Tp07rjjjv03HPPKR6P91Y1ANhQo9nSUrGutMM709LKseKmpFy+qomRmNXlAIBtddWZ\nvvbaa5XJZHr+Ij/72c902223SZIOHDigj370o/rFL37R8/sBgB11DjpxQ2f6/ZMQ2egBAJfT15np\np59+WkeOHNHdd9+tl156ae3xc+fOac+ePWt/zmQympmZ6eeXBgDLrYVpF3Smo2G/IiE/6/EAYBNd\njXl04/bbb9e9996rYDCoF198Uffdd5+eeeYZjYyM9OX9x8YSfXkfL0qnk1aXgMvg+tjbVq5P9a0F\nSdKHrxzXWJeB2syVlUwMppMdDAa29d7jqaiWy/V13yMWCys9av34B98/9sb1sTeuT3/0LUyn0+m1\nj2+44QZlMhm99dZbuu6667R7925NTU1pdHRUkjQ9Pa3rr79+S++fzRbV5jSuLUunk5qfL1hdBjbA\n9bG3rV6fU+8tKeD3qVlraH6+2dXnlGtNFYqDWUHXaGzvvYdiQb1+pqTlfEU+n3HBc+VyTfOt1nZL\n3Ba+f+yN62NvXJ/1+XzGlhu4fRvzmJ2dXfv4tdde09TUlA4ePChJOnz4sH7wgx9Ikk6fPq2XX35Z\nN954Y7++NADYwsJSRWPDEfkMY/MXO8BIMqx221S+XLe6FACwra460w8//LCee+45LSws6K677lIq\nldLTTz+te+65R/fff7+uvvpqPfLII3rllVfk8/kUDAZ18uTJtW71F7/4RR07dkw333yzfD6fvvnN\nbyqRYGwDgLvML1UdfYz4xUaSYUnSYr6mVCJscTUAYE9dhenjx4/r+PHjlzz++OOPr3184sSJDT8/\nFovpW9/6Vg/lAYBzLCxXdHD3kNVl9M1wIiyfIeUKNR20uhgAsClOQASAPihXmypVm67qTPt9hoYT\nYTZ6AMBlEKYBoA86R2+Pp5y/Fu98I8mwFguDuUESANyAMA0AfTC/1Nkx7Z7OtLQSpiu1lqr17raT\nAIDXEKYBoA86nem0CzvTkhj1AIANEKYBoA8WlqqKhv2KR/q2vt8WRofe3+gBALgUYRoA+mB+uaLx\n4agMl+yY7oiEAoqG/crRmQaAdRGmAaAPsstV181Ld4wkI4x5AMAGCNMAsE2maWohX9XYkFvDdFjL\nxZpabdPqUgDAdgjTALBNlVpTtXpLY67tTIfVNqV8ie40AFyMMA0A25RdvTnPrZ3p0dWNHjluQgSA\nS7jrtnMAGIBmW6o1Nt6zPLVQkiRFowGValvbx+yEyYmheEg+n8HcNACsgzANAJuoNZr6j9dmN3z+\n9XcXJUlnZgqaX6xs6b3/64fS26ptJ/h8hlKJEGEaANbBmAcAbFOp2pTPMBQJ+a0uZWBWjhWvyTQd\n0EoHgB1EmAaAbSpVGopHA67bMX2+0WRE1XpLlVrL6lIAwFYI0wCwTaVqQ/FI0OoyBopjxQFgfYRp\nANimUrWpeNTdt6C8H6arFlcCAPZCmAaAbWi3TVWqTdd3psMhv2KRAJ1pALgIYRoAtqFcbcqUXN+Z\nlt6/CREA8D7CNABsQ6nakCTXd6allcNblkt1tVptq0sBANsgTAPANngpTI8kwzJNaalUt7oUALAN\nwjQAbEOpsnLioRfGPFKJlZsQl4uEaQDoIEwDwDaUqg2Fg34F/O7/cZqMh2QY0nKRuWkA6HD/T38A\nGKBipamEB7rSkuT3GUpGg1pmzAMA1hCmAWAbStWG4lH3z0t3DCfCjHkAwHkI0wDQI9M0V44S98DN\nhx3DiZDy5bqabPQAAEmEaQDoWb3ZVrNlKh7xxpiHJA3HQzJNaX6pYnUpAGALhGkA6FGpsroWz0Nj\nHp2NHrO5ssWVAIA9EKYBoEel6upaPA91pofiIUnSDGEaACQRpgGgZ17sTAcDPsUjAc1kCdMAIBGm\nAaBnpWpDPp+hSMhvdSk7ajgRZswDAFYRpgGgR6VKU/FIQIZhWF3KjhqOhzSbq6jdNq0uBQAsR5gG\ngB6Vqt5ai9cxnAip0WprIV+1uhQAsBxhGgB6VKo0FffI6YfnSyVWbkKcXihZXAkAWI8wDQA9aLdN\nlWtNb3am4yvr8aa5CREACNMA0ItyZy2eBzvT4ZBfyVhQ5+hMAwBhGgB6UayursXzYGdaknaNxjSd\nJUwDAGEaAHrQ2TGd8NCO6fNNjsZ0LluWabLRA4C3EaYBoAed0w9jHjr98Hy7xmKq1JpaKtatLgUA\nLEWYBoAelCoNRUJ+Bfze/DG6azQmSYx6APA8b/4WAIBtKlVXDmzxqvfDNBs9AHgbYRoAelCqNhT3\n6Ly0JA3FQ4qGA2z0AOB5hGkA2CLTNFWqePP0ww7DMLR7jI0eAECYBoAtqjfaarZMT495SFJmLK5z\njHkA8DjCNABsUamzY9rDYx6StHs8rnypruLqmkAA8CLCNABsUcnDpx+eLzPGRg8AIEwDwBZ1Dmzx\n8sy0JGXG45LY6AHA2wjTALBFpWpDPp+hSMhvdSmWGh+KKBTwsdEDgKcRpgFgi0qVlR3ThmFYXYql\nfD5Du0ZjOseYBwAPI0wDwBaVqt5ei3e+zHhc0wuMeQDwLsI0AGxRqdpUzONr8ToyYzFl81XVGi2r\nSwEASxCmAWAL2m1TFY8fJX6+yZGVjR7zixWLKwEAaxCmAWALKrWmTLHJo2NyNCpJml1k1AOANxGm\nAWALOjumYx7fMd0xkVrpTM/RmQbgUYRpANiCtdMP6UxLkmKRgJKxIJ1pAJ5FmAaALSh3Tj9kZnrN\n5EhMszk60wC8iTANAFtQqjYU9PsUDPDjs2NyJEpnGoBn8dsAALagVGkqFuXAlvNNjMa0VKyrVmc9\nHgDvIUwDwBaUqw1GPC4yObKy0WNuiVEPAN5DmAaALVg5sIWbD8/X2TU9m2PUA4D3EKYBoEutdlvV\neovO9EUmRtg1DcC7CNMA0KX3N3nQmT5fNBzQUDykWXZNA/AgwjQAdKlUWT2whc70JSZHoppjzAOA\nBxGmAaBLHNiyscmRmGa5ARGABxGmAaBLa2MeHCV+iYmRqJaLdVXrTatLAYAdRZgGgC6Vqg2Fg34F\n/PzovNjk6MpGjznmpgF4DL8RAKBLK2vx6EqvZ3JtowdhGoC3EKYBoEulCge2bKSzHm+O9XgAPIYw\nDQBdKlebike5+XA9kVBAw4mQZnN0pgF4C2EaALrQaLZVb7YZ87iMyVSUg1sAeE5XvxVOnDihZ599\nVlNTU3ryySf1oQ996JLXPPbYY3rmmWfk8/kUDAb11a9+VTfeeKMk6dixY/rXf/1XjYyMSJIOHz6s\nL3/5y338xwCAwWIt3uYmRmP69dtZq8sAgB3VVZi+6aabdOedd+qOO+7Y8DXXXHON7r77bkWjUb3+\n+uv6/Oc/r3/5l39RJBKRJP3xH/+xPv/5z/enagDYYe+ffkhneiOTI1HlS3VVak1Fw/x7AuANXY15\nXHvttcpkMpd9zY033qhodOUGlKuuukqmaWppaWn7FQKADXQ604x5bGxyhPV4ALxnIDPTP/nJT7R/\n/37t2rVr7bG///u/15EjR3Tffffp7bffHsSXBYCBef8occY8NtLZNc3cNAAv6XuL5d///d/1V3/1\nV/q7v/u7tce++tWvKp1Oy+fz6Sc/+Ym+9KUv6YUXXpDf7+/6fcfGEv0u1TPS6aTVJeAyuD72lk4n\nZebKarRMxSIBpYaifX3/YDCgZCLS1/fcifeOxcJKr4bnjuTqv5tivbVj/7/m+8feuD72xvXpj76G\n6Zdeekl/9md/pm9/+9u64oor1h6fnJxc+/jWW2/VX/zFX2hmZkZ79uzp+r2z2aLabbOf5XpCOp3U\n/HzB6jKwAa6PvXWuT7nW1FKhqlg4oEKx2tev0Wg0+/6eO/He5XJN863WJY+nEiGdOru0I/+/5vvH\n3rg+9sb1WZ/PZ2y5gdu3MY9f//rX+upXv6pvfetb+shHPnLBc7Ozs2sf//M//7N8Pt8FARsA7I7T\nD7szORLjFEQAntLVb4aHH35Yzz33nBYWFnTXXXcplUrp6aef1j333KP7779fV199tR566CFVq1V9\n/etfX/u8kydP6qqrrtIDDzygbDYrwzCUSCT013/91woE+KUEwBlM01S52tCe8bjVpdje5GhU/+ut\nBavLAIAd01WiPX78uI4fP37J448//vjaxz/60Y82/Px/+Id/2HplAGAT5VpTzZbJWrwuTI7ElC83\nWI8HwDM4AREANrFYqEmSYhwlvqmJETZ6APAWwjQAbGJpNUzTmd7c5MjKRo/ZHHPTALyBMA0Am1gk\nTHct3QnTdKYBeARhGgA2sVioyTCkCDPAmwoH/RpJhjkFEYBnEKYBYBNLhZpi4YB8hmF1KY4wORKl\nMw3AMwjTALCJxUJNcW4+7NrESFTzS4M5LAYA7IYwDQCbWCzUOLBlC8aHo8qX6qrVLz0hEQDchjAN\nAJfRNk0tFWuKR+hMdyudWrkJcWGZuWkA7keYBoDLKJTqarU5sGUrOmGaUQ8AXkCYBoDLyHUObCFM\ndy2dikiS5pfoTANwP8I0AFxGLr/SXWXMo3uJaFCRkJ8wDcATCNMAcBm5/OqBLVE6090yDEPjw1HC\nNABPIEwDwGXkClUF/T6Fg36rS3GUdCqihWVmpgG4H2EaAC4jl68plQzL4MCWLUmnVjrTpmlaXQoA\nDBRhGgAuI5evaiQZtroMx0nJt8vAAAAgAElEQVSnoqo328qX6laXAgADRZgGgMvIFVY609ga1uMB\n8ArCNABsoNVqa6lYozPdA9bjAfAKwjQAbCCbr8o0pZEEYXqrxocJ0wC8gTANABtYWA2CjHlsXTDg\n10gyrHmOFAfgcoRpANhAJ0wz5tGb9HCEmWkArkeYBoANEKa3p7MeDwDcjDANABuYX6ooEvIrGub0\nw16kU1EtFWpqNFtWlwIAA0OYBoANLCxVNDYUsboMxxpPRWRKnIQIwNUI0wCwgfmlikaGGPHoFbum\nAXgBYRoANrCwVNFoks50rzpheoGNHgBcjDANAOtoNFtaLtY1Sme6Z8PxkIIBHzchAnA1wjQArCNX\nqEkSneltMAxjdaMHYx4A3IswDQDryOVXwzSd6W0ZH47QmQbgaoRpAFhHLr/STR1lm8e2dHZNm6Zp\ndSkAMBCEaQBYR2fMgwNbtiediqpab6lYaVhdCgAMBGEaANaxmK8qGQspHPRbXYqjpVMrnX12TQNw\nK8I0AKwjV6gpPRK1ugzHe3/XNHPTANyJMA0A68jmq2tBEL0bH17pTBOmAbgVYRoA1pHL1zROmN62\nSCigoViQMA3AtQjTAHCRSq2pSq1JmO4Tdk0DcDPCNABcpLPJgzDdH531eADgRoRpALjI4uqOaWam\n+2M8FVUuX1Oz1ba6FADoO8I0AFyEznR/pVMRtU1z7d8rALgJYRoALpLLV2VIGhvm9MN+SA+zHg+A\nexGmAeAiuXxNQ4mQAn5+RPYDu6YBuBm/KQDgIrlCVWNDdKX7ZSQZlt9nEKYBuBJhGgAuks3XNJoM\nW12Ga/h8hsaGI1pgPR4AFyJMA8B5TNPUYr6qUTrTfZUejmhhmc40APchTAPAeUrVpurNNp3pPhsb\njmphmc40APchTAPAeXKrO6bpTPdXOhVRodxQtd60uhQA6KuA1QUAgJ3k8iu7kEeG6ExfjuEzVKp1\nH4yT8ZAk6ex8SbvH45d9bTgYUIBWDwCHIEwDwHlyhdXOdJLO9OXUGi396s35rl/f2eTxP1+Z0b6J\nxGVf+7uHJhUI8+sJgDPw3/4AcJ5cvia/z9DwaicV/ZGIBiVJxUrD4koAoL8I0wBwnlyhqlQiLJ/P\nsLoUV4mE/Ar4DRXLhGkA7kKYBoDz5PI1jTEv3XeGYSgeDdKZBuA6hGkAOE+OHdMDkyBMA3AhwjQA\nrGqbphYLNTZ5DAhhGoAbEaYBYFW+VFerbbLJY0AS0aAazbZqjZbVpQBA3xCmAWBVZ8f0KJ3pgVjb\n6MFNiABchDANAKs6px+OMTM9EIkY6/EAuA9hGgBWcZT4YLFrGoAbEaYBYFU2X1M46Fc8wul7gxAO\n+hUM+AjTAFyFMA0Aq1bW4oVlGBzYMihs9ADgNoRpAFiVzVeZlx4wwjQAtyFMA8AqDmwZvEQ0qFKl\nIdM0rS4FAPqCMA0AkuqNlvLlBkeJD1giFlSzZapaZ9c0AHcgTAOApMVCZ8c0nelBSrLRA4DLEKYB\nQCvz0hJhetDihGkALkOYBgC9H6YZ8xgsTkEE4DaEaQDQ+0eJjyTpTA9SMOBTJOSnMw3ANQjTAKCV\nTR7D8ZCCAX4sDhrr8QC4Cb81AECsxdtJccI0ABchTAOAVo4SZ156Z3R2TbfZNQ3ABQjTADzPNE06\n0zsoGQ2qbUqVatPqUgBg2zYN0ydOnNCnPvUpXXXVVXrzzTfXfU2r1dJDDz2kT3/607r55pv1xBNP\ndPUcANhBsdJQvdnmKPEdkoixHg+AewQ2e8FNN92kO++8U3fccceGr3nyySd15swZPffcc1paWtKt\nt96qT3ziE9q7d+9lnwMAO+hs8qAzvTMS5+2anrS4FgDYrk0709dee60ymcxlX/PMM8/oD//wD+Xz\n+TQ6OqpPf/rT+qd/+qdNnwMAO1jbMT3MzPROiEdX+jh0pgG4QV9mpqenp7V79+61P2cyGc3MzGz6\nHADYAacf7iy/z6dYOMDBLQBcYdMxD7sYG0tYXYJjpdNJq0vAZXB9rFdptBUK+HTF/lEZhnHBc+l0\nUmaurGRiMEE7GAx48r2HE2FV6q113yMWCys9Guvqffj+sTeuj71xffqjL2E6k8no3LlzuuaaayRd\n2I2+3HNbkc0W1W6zRmmr0umk5ucLVpeBDXB97OG92YJGkmEtLBQveLxzfcq1pgrF6kC+dqPhzfeO\nhv2azZXXfY9yuab5VmvT9+D7x964PvbG9Vmfz2dsuYHblzGPw4cP64knnlC73VYul9MLL7ygz3zm\nM5s+BwB2wFq8nZeIBlWuNmmSAHC8TcP0ww8/rN/7vd/TzMyM7rrrLv3BH/yBJOmee+7Ryy+/LEk6\nevSo9u7dq9///d/XH/3RH+lP/uRPtG/fvk2fAwA7yOarrMXbYYloUKakUpW5aQDOtumYx/Hjx3X8\n+PFLHn/88cfXPvb7/XrooYfW/fzLPQcAVmu22soX6xrl9MMddf56vGQsZHE1ANA7TkAE4GmLhZpM\nic70DlsL02z0AOBwhGkAnpbrrMUbJkzvpFgkIMNg1zQA5yNMA/C0tQNb6EzvKJ/PUDwSJEwDcDzC\nNABPy3aOEk8yM73TElHCNADnI0wD8LRcvqpkLKhQ0G91KZ5DmAbgBoRpAJ6WZce0ZRKxoCq1lpqt\nttWlAEDPCNMAPC2XrzEvbZHORo8S3WkADkaYBuBZpmmudKaZl7ZEIrpy1AGjHgCcjDANwLPKtaZq\n9RZjHhZJRFcOaykQpgE4GGEagGdll1fX4rFj2hLRsF8+n8GYBwBHI0wD8KxcYXUtHkeJW8IwjJWN\nHpyCCMDBCNMAPCvHgS2WS0QDzEwDcDTCNADPyuar8vsMDcVDVpfiWYlokJlpAI5GmAbgWbl8TaND\nYfkMw+pSPCsRDareaKvebFldCgD0hDANwLOy+SojHhZLxFb+VoCbEAE4FWEagGflOP3Qcp1d0wVu\nQgTgUIRpAJ7Uare1WKixycNi75+C2LS4EgDoDWEagCct5msyTTZ5WC0c9CvgN9joAcCxCNMAPCm7\nuhZvfDhqcSXe1tk1zUYPAE5FmAbgSQucfmgbKwe31K0uAwB6QpgG4ElrR4kzM225RCyoUqUp0zSt\nLgUAtowwDcCTFvJVDcdDCgb8VpfieYloUI1WW7VG2+pSAGDLCNMAPCm7XGXEwyY6Gz24CRGAExGm\nAXhSNl/VOGHaFgjTAJwsYHUBANAPzbZUa3S3q7htmsrlq7rmyjGVaht/jpkrq1xrqs0o70AlYoRp\nAM5FmAbgCrVGU//x2mxXry1Xm2q2TOXL9ct+TjIRUaFY1X/9ULpfZWIdoYBfoaBPRU5BBOBAjHkA\n8JzSage0M14A6yWiQTrTAByJMA3AczqhLREhTNtFIhpc+48cAHASwjQAzylWV0JbnM60bXQ60+ya\nBuA0hGkAnlOqNBQO+hUM8CPQLhKxoFptU5Vay+pSAGBL+E0CwHOKlabiUe6/thPW4wFwKsI0AM8p\nVRrcfGgzhGkATkWYBuAppmmqVG0ozs2HtkKYBuBUhGkAnlJrtNRsmXSmbSbg9yka9hOmATgOYRqA\npxQrKyceMjNtP/FIkINbADgOYRqAp3Bgi30lYhzcAsB5CNMAPKUTptkxbT/JaFClakPtNrumATgH\nYRqApxQrDQX9PoXYMW07iWhQpiktFWtWlwIAXeO3CQBPKVZXdkwbhmF1KbhIIrbytwULy1WLKwGA\n7hGmAXgKO6btKxkNSZIWlioWVwIA3SNMA/CUYqXBvLRNxaIB+Qw60wCchTANwDPqjZYazTadaZvy\nGYbi0SCdaQCOQpgG4BmlKps87C4ZC9GZBuAohGkAntE5sCUR4cAWu0rGglpYrsg0WY8HwBkI0wA8\no8iOadtLxoKq1FoqVZtWlwIAXSFMA/CMUqUhv89QJOS3uhRsIBlb2egxt8jcNABnIEwD8IzOWjx2\nTNtXcvVvDeaWyhZXAgDdIUwD8IxiZeXAFthX5+CWeTrTAByCMA3AM0rVhuIR5qXtLOD3aTgeYswD\ngGMQpgF4QrPVVrXeYse0A4ynIppj1zQAhyBMA/AENnk4x/hwlDANwDEI0wA8odTZMc3MtO2lU1Et\nF+uqNVpWlwIAmyJMA/CE0mpnmjEP+xtPRSRJ83SnATgAYRqAJxQrDfkMKRqmM21348NRSWz0AOAM\nhGkAnlCsNhSLsGPaCTqdaeamATgBYRqAJ3QObIH9xSNBxcIBwjQARyBMA/CEYqWxdiAI7C89EmXM\nA4AjEKYBuF6z1Valxo5pJ5lIsR4PgDMQpgG4XmfHdJIw7RgTI1Fll6tqtdtWlwIAl0WYBuB6xfLq\nWjzGPBwjnYqq1TaVy9esLgUALoswDcD1iuyYdpyJ1Mp6PEY9ANgdYRqA6xXKDQX8hiIhv9WloEsT\nI+yaBuAMhGkArldcXYvHjmnnSCXDCvh9dKYB2B5hGoDrFdkx7Tg+w1A6FaEzDcD2CNMAXM00TRXL\n7Jh2ojTr8QA4AGEagKvVGi01Wm0loyGrS8EWdXZNm6ZpdSkAsCHCNABXW9vkQWfacdIjUdXqLRVW\nVxsCgB0RpgG4WieIMTPtPKzHA+AEhGkArsaOaediPR4AJwh086JTp07p2LFjWlpaUiqV0okTJ3Tg\nwIELXvO1r31Nb7zxxtqf33jjDT322GO66aab9Oijj+r73/++JiYmJEkf//jH9eCDD/bvnwIANlAs\nNxQJ+RUM0DtwmvHhqAzRmQZgb12F6QcffFCf+9zndPToUf30pz/V17/+dX3ve9+74DUnT55c+/j1\n11/XF77wBd14441rj91666164IEH+lQ2AHSHtXjOFQz4NDIU1hydaQA2tmmrJpvN6tVXX9Utt9wi\nSbrlllv06quvKpfLbfg5P/zhD3XkyBGFQtw9D8BaBdbiOdpEKqp5OtMAbGzTMD09Pa3JyUn5/SvH\n8Pr9fk1MTGh6enrd19frdT355JP67Gc/e8HjTz/9tI4cOaK7775bL730Uh9KB4DLa7dNlaoNJelM\nOxa7pgHYXVdjHlvxwgsvaPfu3Tp06NDaY7fffrvuvfdeBYNBvfjii7rvvvv0zDPPaGRkpOv3HRtL\n9LtUz0ink1aXgMvg+vSHmSsrmYhc8Fi+VJdpSuMjsUue61YyEVEwGOj58zfDe18qFgsrPRqTJF2x\nb0T//OtpxZMRxSKX/kcR3z/2xvWxN65Pf2wapjOZjGZnZ9VqteT3+9VqtTQ3N6dMJrPu63/0ox9d\n0pVOp9NrH99www3KZDJ66623dN1113VdaDZbVLvN4v6tSqeTmp8vWF0GNsD16Z9yralCsXrBY7PZ\nsiQp4NMlz3UjmYioUKyq0bj0vfuF975UuVzTfKslSUqGV/5W9DdvzulgZuiC1/H9Y29cH3vj+qzP\n5zO23MDddMxjbGxMhw4d0lNPPSVJeuqpp3To0CGNjo5e8tqZmRn98pe/1JEjRy54fHZ2du3j1157\nTVNTUzp48OCWCgWArSpU6pLE6YcOtmu1Qz2z+h9GAGA3XY15fOMb39CxY8f07W9/W0NDQzpx4oQk\n6Z577tH999+vq6++WpL04x//WJ/85Cc1PDx8wec/8sgjeuWVV+Tz+RQMBnXy5MkLutUAMAjFckOG\nIcUifZ9oww6ZGInKMKTpHGEagD119Rvmyiuv1BNPPHHJ448//vgFf/7yl7+87ud3wjcA7KRCpaF4\nJCifz7C6FPQo4PcpPRzVDGEagE1xigEA1yqyFs8Vdo3FGPMAYFuEaQCuVaywFs8Ndo3GNLdYVtvk\nJnQA9kOYBuBKjWZb1XqL0w9dYNdoTPVmW7n8YLaHAMB2EKYBuFKx0pAkxjxcYG2jB3PTAGyIMA3A\nlTphmjEP59s1xno8APZFmAbgSsUynWm3GI6HFAn56UwDsCXCNABXKlTqCvgNhYN+q0vBNhmGoV2j\nMc0SpgHYEGEagCsVyw0lYyEZBjum3WDXWIzONABbIkwDcKVipcEmDxfZNRpTNl9TrdGyuhQAuABh\nGoDrmKZJmHaZzkYPRj0A2A1hGoDrVOstNVsmNx+6COvxANgVYRqA67AWz30mCdMAbIowDcB1CqzF\nc51w0K+xoTBhGoDtEKYBuM7a6Yd0pl1l12iMg1sA2A5hGoDrFMp1RcN+Bfz8iHOTXaNxzeTKMk3T\n6lIAYA2/aQC4TmF1xzTcZddYTNV6S8ulutWlAMAawjQA1ymU6xoiTLvO2kYPRj0A2AhhGoCrNJpt\nVWotJePMS7sN6/EA2BFhGoCr5MsrIwB0pt1nZCisUMBHmAZgK4RpAK7SWYuXZC2e6/gMQ5OjMcI0\nAFshTANwlcLqzWncgOhOk6zHA2AzhGkArpJfXYsXDPDjzY12jcY0v1xRo9m2uhQAkESYBuAyhXKD\neWkXy4zGZJrS3FLF6lIAQBJhGoDL5Et1JeOEabfaNcZ6PAD2QpgG4Br1ZkvVeoubD13s/fV4JYsr\nAYAVhGkArlEorWzyYMzDvaLhgIbjITZ6ALANwjQA1yh0dkxzYIurZcZimmbMA4BNEKYBuEZ+dcd0\nIkpn2s32jCc0tVBSu21aXQoAEKYBuEehVFc0HGAtnsvtmYirVm9pbpHuNADr8RsHgGvky3UNcfOh\n6+1NJyRJ707nLa4EAAjTAFykUG6wFs8D9ozHJUmnZwjTAKxHmAbgCpVaU9V6i860B0TDAY0PR/Tu\ndMHqUgCAMA3AHeZXT8RLshbPE/amEzrNmAcAGyBMA3CFTpgeYszDE/ak45qaL6rRbFtdCgCPI0wD\ncIX3O9OMeXjB3nRC7bap6SwnIQKwFmEagCvML1YUCwcU8PNjzQv2plduQpyaJ0wDsBa/dQC4wvxS\nVUlOPvSMydGYAn5D780XrS4FgMcRpgG4wvxSRUPcfOgZAb9PeyeSeo/ONACLEaYBOF652lSx0mBe\n2mMOZIboTAOwHGEagOPNrh4rzSYPb/lAZkiLhZpK1YbVpQDwMMI0AMfrhGl2THvLgcyQJG5CBGAt\nwjQAx5tbZC2eF31g10qYZtQDgJUI0wAcbzZXUSoRYi2ex4ynIoqGA9yECMBS/OYB4Hhzi2WlR6JW\nl4EdZhiG9qTjdKYBWIowDcDxZhcrmkgRpr1obzqhqfmSTNO0uhQAHkWYBuBopWpDxUpD44RpT9qb\njqtSayqXr1ldCgCPIkwDcLTOzYdpwrQn7U0nJHETIgDrEKYBONpsbmUtHmHam/ak45II0wCsQ5gG\n4GjT2bIMgzDtVfFIUCPJMLumAViGMA3A0c5lS5pIRRUM8OPMq/amE3SmAViG3z4AHG06W1ZmLG51\nGbDQ3nRc09mymq221aUA8CDCNADHarbams2VlRmPWV0KLLQ3nVCrbWpmdX4eAHYSYRqAY80vVdRq\nm9pNZ9rTuAkRgJUI0wAc69zCSidy9zhh2ssyY3H5fYbem+MmRAA7jzANwLGmsyvhadcoYx5eFgz4\ntHs8rndn8laXAsCDCNMAHOtctqTRobCi4YDVpcBiBzNJnZ4pcKw4gB1HmAbgWNMLbPLAigOZIZWq\nTc0tVawuBYDHEKYBOFLbNDWdK3HzISRJB3cNSZJOTTPqAWBnEaYBOFJuuap6o81aPEha2egRDPh0\nerpgdSkAPIYwDcCRzmVXN3nQmYakgN+n/ZMJOtMAdhxhGoAjdTZ5sBYPHQd3Dend2YJabU5CBLBz\nCNMAHOncQknJWFCJaNDqUmATBzNDqjfaml7gJEQAO4cwDcCRprNs8sCFDmSSkrgJEcDOIkwDcBzT\nNDWdLTHigQtMjsYUDft1aoabEAHsHMI0AMfJl+oqVZvKjLHJA+/zGYYO7BqiMw1gRxGmATjO2iYP\nOtO4yIFMUu/NFdVochMigJ1BmAbgOGubPJiZxkUO7hpSq23q7FzR6lIAeARhGoDjnFsoKRLyK5UI\nWV0KbOZghpMQAewswjQAx5nOlrV7PC7DMKwuBTYzOhTWUCyo04RpADukqzB96tQp3XbbbfrMZz6j\n2267TadPn77kNY8++qg+8YlP6OjRozp69KgeeuihtecqlYq+8pWv6Oabb9bhw4f185//vG//AAC8\n51y2xM2HWJdhGDqQGWKjB4AdE+jmRQ8++KA+97nP6ejRo/rpT3+qr3/96/re9753yetuvfVWPfDA\nA5c8/t3vfleJRELPP/+8Tp8+rTvuuEPPPfec4nHmHQFsTbna0HKxzrw0NnQwM6SX386qUmsqGu7q\n1xwA9GzTznQ2m9Wrr76qW265RZJ0yy236NVXX1Uul+v6i/zsZz/TbbfdJkk6cOCAPvrRj+oXv/hF\njyUD8LLOJo8MmzywgYOZpExJZ2bpTgMYvE3D9PT0tCYnJ+X3+yVJfr9fExMTmp6evuS1Tz/9tI4c\nOaK7775bL7300trj586d0549e9b+nMlkNDMz04/6AXjM9EJnkwdjHljfgbWbEAnTAAavb3//dfvt\nt+vee+9VMBjUiy++qPvuu0/PPPOMRkZG+vL+Y2OJvryPF6XTSatLwGVwfbZmsdxQMODTh//LhPy+\n929ANHNlJRORvn+9ZCKiYDAwkPeWxHuvIxYLKz3a3X8srff9k5Y0MRLV9GKF7y+L8e/f3rg+/bFp\nmM5kMpqdnVWr1ZLf71er1dLc3JwymcwFr0un02sf33DDDcpkMnrrrbd03XXXaffu3ZqamtLo6Kik\nlW739ddfv6VCs9mi2m1zS5+DlW+U+Xm6M3bF9dm6d95b0q7RmHLZC/cIl2tNFYrVvn6tZCKiQrGq\nRqP/793Be1+qXK5pvtXa9HWX+/7ZP5HQ66ezfH9ZiJ9v9sb1WZ/PZ2y5gbvpmMfY2JgOHTqkp556\nSpL01FNP6dChQ2vBuGN2dnbt49dee01TU1M6ePCgJOnw4cP6wQ9+IEk6ffq0Xn75Zd14441bKhQA\npJUd02zywGYOZoY0v1RVsdKwuhQALtfVmMc3vvENHTt2TN/+9rc1NDSkEydOSJLuuece3X///br6\n6qv1yCOP6JVXXpHP51MwGNTJkyfXutVf/OIXdezYMd18883y+Xz65je/qUSCsQ0AW1NrtJRdrur/\nvCaz+YvhaQfOO7zl6ivGLK4GgJt1FaavvPJKPfHEE5c8/vjjj6993AnY64nFYvrWt77VQ3kA8L7p\nbEmmOEYcmzuYScpnGHrz7BJhGsBAcQIiAMc4O7syJ71vkr/ZwuVFQgEdyCT15tklq0sB4HKEaQCO\ncWauqHDIr3QqanUpcICr9qV0ajqvemPzmxkBoFeEaQCOcXa2oH3phHyGsfmL4Xkf2pdSs2Xq7XN5\nq0sB4GKEaQCOYJqmzs4XGfFA1z64NyXDkN44s2h1KQBcjDANwBEWlquq1FraP0GYRndikYD2TzA3\nDWCwCNMAHOHM6s2H+yc5sQvdu2p/Sm+fy6vRbFtdCgCXIkwDcISzcwUZhrRnnLV46N5V+1JqNNs6\nNc3cNIDBIEwDcIQzs0XtGo0pFPRbXQoc5IP7UjLE3DSAwSFMA3CEs3MFRjywZYloUHvSCb3B3DSA\nAenqBEQAsFKp2lA2X9OnuPnQEwyfoVKtuenrzFxZ5S5ed+WeIf2P38woX64rFgkpQBsJQB8RpgHY\nHicfekut0dKv3pzf9HXJRESFYrWr96w323r2387o//5vBxQI86sPQP/w3+cAbO/M3GqYnmDMA1s3\nObpyYubMYtniSgC4EWEagO2dnS1oOB7ScDxkdSlwoEgooOF4SLO5itWlAHAhwjQA2zszx8mH2J7J\n0ajmFytqtU2rSwHgMoRpALbWbLV1bqGk/Yx4YBsmR2NqtNqaWh0ZAoB+IUwDsLVzCyW12qb205nG\nNkyOxCRJb00tW1wJALchTAOwtbNrNx8SptG7WCSgZCyo377HvmkA/UWYBmBrZ2aLCgV9a51FoFeT\nozG9PZVXm7lpAH1EmAZga2fnCtqbTsjnM6wuBQ63azSmSq2pd2cLVpcCwEUI0wBsyzRNnZktaj8j\nHuiD3eMxGZJefjtrdSkAXIQwDcC2svmqyrWm9k2yyQPbFwkF9IFdSf36HcI0gP4hTAOwrc7Nh3Sm\n0S8fOTiqU+fyypfrVpcCwCUI0wBs6+xsUYakvWnCNPrjdw6OypT0G7rTAPqEMA3Ats7MFTUxGlM4\n5Le6FLjE3omEhuIh/Zq5aQB9QpgGYFtnZguMeKCvfIaha64Y02/eyanVbltdDgAXIEwDsKVCua6F\n5ao+sIubD9Ff11w5pnKtqben8laXAsAFCNMAbOmdcytB58rdQxZXArf5nQOj8vsMvczcNIA+IEwD\nsKW3zy3LZxg6sIswjf6KRQL64N5h/eq3hGkA20eYBmBLb0/ltW8iwc2HGIirrxzTe/NF5fJVq0sB\n4HCEaQC2026bemc6ryv30JXGYFxzxZgkMeoBYNsI0wBsZ2qhpFq9pSt3D1tdClxq93hcY0MRVuQB\n2DbCNADbefvcsiTRmcbAGIaha64c06unF9VosiIPQO8I0wBs5+2pZSWiQaVTUatLgYtdfeWYao2W\n3nxvyepSADgYYRqA7bxzLq//smdYhmFYXQpc7NAHRhTw+/RrtnoA2AbCNABbKVYams6WdQX7pTFg\n4aBfH/5ASr96e0GmaVpdDgCHIkwDsJW1w1r2cPMhBu/jH0xrbrGis3NFq0sB4FCEaQC28s65ZRmG\ndDDDMeIYvP/9qrR8hqF/f23O6lIAOFTA6gIA4HxvTy1rbzqhSIgfT+g/w2eoVGuu/dnn9+mq/Sn9\n26uzOvx/7N/WnH44GFCAFhXgOfy2AmAbbXPlsJbrf2eX1aXApWqNln715vwFj40kw3rt3UX90/98\nV+Pb2CDzu4cmFQjzaxXwGv4bGoBtTC+UVKm1/v/27jy8rfLOF/j3HC2WZcm2bGvzviR2nHjJSlYT\nkgJJh9CkTANtgU6fXsIDDM2dtIWklxamQO9t2nk6bZkULrRl67SluYUUkkBCCBAnIZB98ZbE+yLL\ni+RV1n7uHw4G1wlxHIUHxxMAACAASURBVNtHsr6ff2xZR6+/1vHR+enV+74HOZx8SJMo3ayDKAio\na+2VOwoRhSEW00QUMqo5+ZBkoFYpkGyMQZ2tl6t6ENE1YzFNRCGjurkbMRolzAZerIUmV6ZFD5fH\njzbngNxRiCjMsJgmopBR3dKDHF6shWSQZtJBIXKoBxFdOxbTRBQSXG4fWjr6OV6aZKFSikg16VDf\n2otgkEM9iGj0WEwTUUiosQ2Ol87meGmSSaZFD7c3ALvTJXcUIgojLKaJKCTUNPdAAJBtZc80ySPF\nGAOlQkCtjUM9iGj0WEwTUUi42NyNZGMMorlOL8lEqRCRZtKhwd6LAId6ENEosZgmItn5A0Gcb+pC\nXlq83FEowmVZY+H1BWHr7Jc7ChGFCRbTRCS76uZueH1BzMxMkDsKRThrUgzUShG1l9Y8JyK6GhbT\nRCS78jonBAGYkc6eaZKXQhSQaY1Fg70PHm9A7jhEFAZYTBOR7MrrHciyxkKrUckdhQi5aXEIBCVU\nt3TLHYWIwgCLaSKSlcvtR21LL2ZmGuSOQgQASIjVwBivwfnGbl5enIiuisU0EcmqqtGJoCRhZgbH\nS1PoyE2LR0+/F3YHLy9ORF+MxTQRyaqizgm1UkQOL9ZCISTDoodaJaKqsUvuKEQU4lhME5Gsyuud\nyE2Lh0rJlyMKHUqFiJzkODTYezHg8csdh4hCGM9eRCQbZ68HLR39XBKPQlJuWjwkCbjYxImIRHRl\nLKaJSDYV9Q4A4ORDCklxOjUsCVpcaOpGkBMRiegKWEwTkWzK65zQRauQatLJHYXosnLT4tA34IOt\ng1dEJKLLYzFNRLKQJAnldQ7MzDRAFAS54xBdVppZD41agapGDvUgostjMU1EsrB1utDV5+V4aQpp\nClHAtNQ4NLf1oX/AJ3ccIgpBLKaJSBbldZfGS2dwvDSFtumpcZAAVDVwmTwiGonFNBHJorzOCVN8\nNJLio+WOQvSF9Fo1Mix6VDV0weMNyB2HiEIMi2kimnSBYBCVDU6u4kFhoygnEb5AEBX1TrmjEFGI\nYTFNRJOu1tYLtzfA8dIUNgz6KKSbdaiod8LrY+80EX2GxTQRTbqyWgcEADM4XprCSFFOInz+ICrZ\nO01En8Nimogm3cnz7chJiYMuWiV3FKJRS4jVINWkQ3m9E14/e6eJaJBS7gBEFDn8QaC5vRcNbX34\n6o3Z6Pf4x63tIC9QR5OgKCcRuz/qQ1V9FwpzEofdJ4jCuP5Pf16USgklu7+IQhKLaSKaNB6fH28d\nrgMACACOVtjHre3iXOO4tUV0JUlxGqQYY1Be58SMDANUn6twPb4ATp9vn5DfuyDfDGUUT9lEoYjv\nc4loUjW09iIxNgo6LYd4UHgqykmExxdAVSPXnSaiUfZM19bWYsuWLejq6kJ8fDy2bt2KzMzMYdts\n27YNu3fvhiiKUKlU2LRpE0pKSgAAW7ZsweHDh2EwDE42Wr16NR588MHx/UuIKOQ5ez3o6HZjzvQk\nuaMQjZkxPhrWRC3Kax2YkR4PpYL9UkSRbFTF9BNPPIFvfvObWLt2Lf7+97/j8ccfxyuvvDJsm6Ki\nInznO99BdHQ0Kisrcc899+DgwYPQaDQAgPvvvx/33HPP+P8FRBQ2Tl/sAABkWPQyJyG6PsXTEvHO\nx404V+PAbL45JIpoV3073dnZifLycqxZswYAsGbNGpSXl8PhcAzbrqSkBNHRg1cyy8vLgyRJ6Ori\nR2BE9JnTFzoQr1MjNkYtdxSi62IyaJFp0eNcrQO9Lq/ccYhIRlftmbbZbDCbzVAoFAAAhUIBk8kE\nm82GhITLX3Bhx44dSE9Ph8ViGfrZiy++iNdeew1paWn4/ve/j5ycnGsKmpiou6bt6TNGI3sBQ1mk\n7B9nrxvVzd2Yn2+GXqcZ9/ZVKuWEtKvXaSasbWDickdK29eaYTxz3zQvDf+9pxLHz3dgzdKsCX1O\ntNooGBO0E9L2RIqU17dwxf0zPsZ9avAnn3yCX//61/jDH/4w9LNNmzbBaDRCFEXs2LED9913H/bt\n2zdUoI9GZ2cfglz76poZjXq0t/fKHYOuIJL2zwcnmyEBsCREo7fPPe7t+3z+cW9Xr9Ogt889IW1/\nim2Pve1P989EtD1axdMScayyHeU1HchJiZ2w58Tl8qA9EF5rW0fS61s44v65PFEUrrkD96rDPKxW\nK+x2OwKXDuJAIIC2tjZYrdYR2548eRKPPPIItm3bhuzs7KGfm81miOLgr1q3bh1cLhdaW1uvKSgR\nhbfjVW0wxmsQr+MQD5o6ZqQbEK9T45OKNnh4mXGiiHTVYjoxMRH5+fnYuXMnAGDnzp3Iz88fMcTj\nzJkz2LRpE37zm99g1qxZw+6z2z9bS7a0tBSiKMJsNo9HfiIKA30DPlQ2dKF4mhGCIMgdh2jciKKA\nhbPMcLn92PdJo9xxiEgGoxrm8e///u/YsmULfvvb3yI2NhZbt24FAGzYsAEbN25EYWEhfvKTn8Dt\nduPxxx8fetzPf/5z5OXlYfPmzejs7IQgCNDpdHj22WehVHLxeaJIcepCBwJBCbOnJ6HN6ZI7DtG4\nMhu0yEmJxQcnm7FmSQbidVFyRyKiSTSqijYnJwfbt28f8fMXXnhh6Pu//e1vV3z8Sy+9dO3JiGjK\nOF7VhsTYKKSbdSymaUqam2tES0c/Pilvwy0LUvkJDFEE4UrzRDShBjx+lNU5MDfXxAKDpqzoKCVu\nW5KJVocLlQ1cFpYokrCYJqIJdepCB/wBCfPyjHJHIZpQiwosSDXG4HhlOzp7JmZVDyIKPSymiWhC\nfXiqGSZDNKalxskdhWhCCYKAJYUWRKkVKD3VAp8/KHckIpoELKaJaMI0d/TjfFM3lhcnQ+QQD4oA\nGrUSJUVW9Lh8+KTCfvUHEFHYYzFNRBPmw1PNUIgClhaOXJeeaKqyJGpRlJOI6uYe1LT0yB2HiCYY\ni2kimhBeXwAfnWvFvDwjYmN4oRaKLEU5iTAZonGkrBU9/V654xDRBGIxTUQT4lhVG/rdfiyfnSJ3\nFKJJJ4oCSoqsEEUBB063wB/g+GmiqYrFNBFNiA9OtcBsiMaM9Hi5oxDJIiZahaWFVjh6PDhSZock\nSXJHIqIJwGKaiMZdc3sfLjZ1Y/nsFK4tTREtzaRD8bRE1LT0cP1poimKxTQRjbsPT7VAqRCwtNAi\ndxQi2RXlJCLVpMOxyja0OngFUKKphsU0EY0rry+Aw+daMS/PBL2WEw+JBEHAsiIL9Fo1DpxqQd+A\nT+5IRDSOWEwT0bg6WtkGl8ePm2Ynyx2FKGSolQqsmJOMQEDChyebOSGRaAphMU1E4+qDU82wJGiR\nm8aJh0SfF6eLwrJiKzp7PPiYExKJpgwW00Q0bmptPahu7sHy2cmceEh0GWkm3eAFXTghkWjKYDFN\nRONmR2ktYjRK3FjMIR5EV1I8LRGpxhhOSCSaIlhME9G4uNjcjbM1nfjyogxERynljkMUsgYnJFqH\nJiT2c0IiUVhjMU1E4+KNAzXQa1X40txUuaMQhTy16rMJiR+c5BUSicIZi2kium5VDU5U1DvxT4sy\nEKVWyB2HKCzE6aKwtMiCzh43JyQShTEW00R0XSRJwhultYjTqbFiTorccYjCSrpZPzQh8Xxjt9xx\niGgMWEwT0XUpr3fifGMX1izOhFrFXmmia1U8LRHJSVocq2xDV59H7jhEdI1YTBPRmEmShB2lNTDo\no3BjsVXuOERhSRAELCmwQqkQUXrahkCQ46eJwgmLaSIas7M1DlQ39+D2JZlQKdkrTTRWWo0SSwot\ncPZ6cPJ8h9xxiOgasJgmojEJBiW8caAGSXEaLCtirzTR9Uoz6ZCbFofyOidaOvrljkNEo8RimojG\nZM/RBtTbe3HH8mwoFXwpIRoP82eYEBujxqGzrXB7A3LHIaJR4BmQiK5Zc0c/3jhQi7m5RizMN8sd\nh2jKUCpElBRZ4fH6caSslcvlEYUBFtNEdE0CwSD+sKscGrUC967KgyAIckcimlIS4zSYnWtEg70P\ntbYeueMQ0VWwmCaia/L2kQbU2npx76o8xMWo5Y5DNCXNzDQgKU6DoxXtcHv9cschoi/AYpqIRq2p\nrQ9/P1iLBTNMWDDDJHccoilLFAQsLrDA5w/gWGW73HGI6AuwmCaiUfEHgvjdrnLEaJS459ZcueMQ\nTXkGfRRmZSeipqUHFfUOueMQ0RWwmCaiUXnzUC0a7H24d9UM6LUc3kE0GYqyExAbo8Zf9l2Ah6t7\nEIUkFtNEdFUfnmrGzsP1WFpowbw8o9xxiCKGQiFi8SwzHD0e7DhYI3ccIroMFtNE9IWOVbbhlT1V\nKMxOxL+sniF3HKKIY07QYmmhBXuPNqKulat7EIUaFtNEdEVldQ48/1YZcpLj8NBXC3hxFiKZfGVZ\nNmJj1Hjp7UoEgkG54xDR5/DMSESXVdPSg//621lYErT4n+uLEKVSyB2JKGJpNUrcfXMuGux92H+8\nWe44RPQ5LKaJaISmtj78avtp6LUqfO+u2YjRqOSORBTx5uUZUZidiDdKa+Ds9cgdh4guYTFNRMMc\nKWvF068eg0Ih4Adfn414XZTckYgIgCAIuPvWXASCEl7bf0HuOER0CYtpIgIA+PxBvLqnCs+/VY5M\nsx6P/8sCmAxauWMR0eeY4qNx2+IMfFLRhrJarj1NFApYTBMROroG8H/+eBzvn2zG6oXpeOSbc2DQ\ns0eaKBR9eWEGzIZovLq3Cj4/154mkhuLaaIIFggG8cGpZvzkpaOwOwfw8B2FuHPFNChEvjQQhSqV\nUsQ9t+ahzTmAtz9ukDsOUcRTyh2AiCafJEk4U92Jv75/EbZOF6anxuF/3JbPYR1EYWJWVgJuyDdh\n5+F6LJpp5rFLJCMW00QRpr61F399/yIq6p0wG6Lx8B2FmDM9CYIgyB2NiK7BXSun40x1J/747nls\nWl/MY5hIJiymiSJAUJJwrqYT7x5tRFmdE7poFe6+JRfLZyfzQixEYcqgj8JXS7Lx5/cu4HhVO+bP\nMMkdiSgisZgmmsI83gAOn7Ph3WNNaHW4EK9T444bs7Fybiq0Gh7+ROFu5bwUHDprw5/fu4BZWQmI\njuJxTTTZeNQRTUGOHjfeO96ED0+1wOXxI8uqx/1fmYn5eSb2RBNNIQpRxL2r8vDTV4/jzUO1uGvl\ndLkjEUUcFtMRxB8EPD7/hLQdpVJCyRptmNE835LDBZfn2veJSqmEzz/ycbW2HnxwohmnLrRDAjB7\nWhJumpuCLGssBEGAxx+Exx8cU9vjIShNSLNEES0nJQ43Fifj3aNNWFpgRapJJ3eksDeR58uJfI3l\nuVgeLKYjiMfnx9EK+4S0vSDfDCU/XhxmNM+3XqdBb5/7mtsuzjXi9Pl2AIMrc7R0uHCmuhPtXQNQ\nKUXMyDBgRoYBumgVOrvd6Owe/e/4fNvjrTjXOCHtEkW6r92UgxPn2/HK3ipsuXsuRE5GvC4Teb6c\nyNdYnovlwWecKExJkoSm9n6cqe5EZ7cbWo0SC/JNmJYSBxW7Jogiii5ahfUrcvDi7kocOmtDSVGy\n3JGIIgaLaaIwVFHnwM7D9XD2eqCLVmHxLDOyU+KgENkbRRSplhZaUXrGhu3vV2POdCN00Sq5IxFF\nBHZfEYWRnn4v3jvehN+9WQ5/IIilhRasK8nC9LR4FtJEEU4UBHzr1jy43H78df9FueMQRQz2TBOF\nAa8/gDMXO1FZ74RCFHH7skzEatUsoIlomFSTDl9elI5dH9VjzvQkzOE8BaIJx55pohBX39qLHQdq\nUV7nRHZyHNbdmIWb5qaykCaiy1q7LAvpZh1eeqcSPf1eueMQTXkspolClNvrx4FTLfjwVAtiNEr8\n0+IMLCm08KIMRPSFlAoRG9bMxIAngJferoQkcU1KoonEYpooBDXYe/HmwTo02HsxZ3oSvrwoA0lx\nGrljEVGYSDHq8LXl2Th1sQOlZ2xyxyGa0tjFRRRCvL4APi63o9bWi4TYKNyyIA0GfZTcsYgoDN28\nIA2nLnbgz+9dwIwMA0zx0XJHIpqS2DNNFCLanC68dagOda29KJ6WiH9alMFCmojGTBQE/I/bZkIU\ngN/vLEeQlyAlmhAspolkFgxKOH2xA3s+boQgCPjywnQUT0uCyAmGRHSdEuM0uPuWXFxo6saOg7Vy\nxwk7kiTBHwhiwONHT78X3X1eeHwBWcehB4JBeLwBDHj86Bvwoaffi55+L/yBoGyZIh2HeUxhLrcf\nrZ39aGrtxYDHD0efBw32XsTFREGvVbFYCwG9Li/2Hm1Em3MA2cmxuGGmCWqlQu5YRDSFLJ5lQWVD\nF3YeroM1QYvFBRa5I4Ukjy+AmuZuXGjqRkWDE3W2Xnj9AVyubhYFAZooBaLVCsREqxCrVSM2Ro04\n3eDXKNX1vY77A0H0ugYL5V6Xd/D7S19dbv8VH/f2kQZYErQwJ2iRkhSD2dOSkMj5NhOOxfQU4vMH\ncL6pG+dqOnGuxoHmjv4rbiuKAuJi1DDoo2CMj0amRY8oNYu4yVTf2osjZXYEgxKWFVmQnRwndyQi\nmoIEQcC3VuWho2sAL75dgcQ4DXLT4uWOFRIGPH4cKbfj8Dkb6my9CAQlCACSjTFIN+sQpVZApRSh\nVopQXerocHv9GPAE4Pb64fYE0N3nRVNbHz4/ikatEqGLViFGoxr8Gq2EUiFCIQoIAmiy9wIYLODd\nngDc3sH2XG7/YMHsGV4wa9QK6LUqWBK00GtVUCsVEEUBoihAIQKSBPQP+KBSKtDZ48bRCjs+cPvx\n3++eR5ZVj/l5JszLM8Jk0E7SMxtZWEyHuaAkoazWgfdPNKO83gGvLwilQkBuWjwWzTIj1RKHgM8P\nbZQSEAWcq+lAd58XXX0edPV60epwoaalB0cr7Eg16ZCdHIsUo45rGE8gnz+Io5VtuNjUDZNBi6WF\nZui1arljEdEUplSIeOirhfjpq8fxX6+fxY++NS9iCytJklDX2osPTzXj4/I2eHwBpBp1WL0wHdNT\n4zEtJRaSIOBohX3UbQaDEvoGfOi+NOSib8A3OATD5YWtsx/+wGeV9qGzrSMer1QI0KiViI5SwJqo\nhT5GDb12sMdbr1VBPcqe7gX5ZsRcWj7V7nDhWFUbjle1Y/sH1dj+QTWyrLFYvTAd83gxn3HFYjpM\neXwBHD7Xin3HGmHrdCFOp0ZJYTIKshMwI90w1MtsNOrR3j74DrjfMzjsIylu+IxuR48bNS09qGnp\nQYO9D2qViOmp8ZiZaeCaxuPM0eNG6Wkbuvu9KMhOwLLZqXC5PHLHIqIIoItW4d++VoSnXzmGX20/\ng8e+NQ8xGpXcsSaNJEk4Xd2JNw/Woq61F2qViIX5ZiyfnYIsqx6C8FknUr/nykMpLkcUBcTGDA7x\nuNzv9fqC8AeDCAYlTEszoLy2E5CAKLUCGrUCSsX4T2EzJ2hx2+JM3LY4Ex1dAzhW1Y4PTzXj2R3n\nYIqPxj9/aTqKMw2jLtTpylgphZm+AR/2Hm3A+yea0e/2I8Oix4bbZ2LBDNOYD8aEWA0SYjWYm2uE\nrdOFi83dKK91oLLeielpcSjISoA2gl5wJ0JQklBR58TJ8x2IUitwy4JUWBNj+AkAEU0qc4IWD99R\niP/4yyn89o1z+Lf1xVApp/ZaBJIkobzOiTdKa1DT0gNjvAb33JqLRTMt0GomvgwSBAFRagWiMFi0\nWhK1sHdeeRjmREiKj8bqhem4dUEaTl5ox+4jDXj2b2eg16pw64I0fGleKjRqloRjxWcuTLjcPuz5\npBHvHmuExxvA3FwjblmQhumpccPeTV8PURSQYoxBijEGPf1enK3pRFVDF843dGNaaiwKshOhi2ZR\nfa26+jw4fLYVHd1upJl0WFxg5osWEckmL92Ab395Bn6/qwL/+ddTePiOwinbYVLV4MQbpbU439iF\nhNgo/MvqPCwttE5IT3A4EEUB8/JMmJtrRFuvF396pxJ/+7AGe4824rZFGVgxN2VobDiNHs/oIW7A\n48e7xxqx55NGDHj8mJ9nxNplWUgx6ib098bGqLG00IqinESU1TpwsakbF5t6kJsWh8KcRA7/GIVA\nIIiz1Z04fbETKqWIkiIrMv/ho0QiIjksLbRCFAX8YVcF/vcfT2DT+uIptepDdUs3dhyoQVmdE3Ex\natx9Sy5uLE6e8r3woyUIAgpykrDpzmJUN3fjjdIa/GX/Rew52og1SzJRUhS5bzjGghVRiHJ7/Xjv\neBPe+bgB/W4/5kxPwtplWUg36yc1h16rxqJZFhRkJ+JMdSeqGrtwsbkbeekGFGQlcAWQK6hp6cFL\n71Siqa0PGRY9bsg38Q0IEYWUxbMsiNdF4b9eP4unXz2Gf/taMTIsk3uOGW/1rb3YUVqD09Wd0EWr\ncOeKaVgxN+W6l6qbynJS4vCDr89BZb0Tr5fW4NU9VXj7SD2+sjQLiwvMUIgsqq+GZ/cQ4/EFsP9E\nE94+0oC+AR+KchKxriQLmZZYWXPpolVYUmBBQVYCTl/sQFmtA+cbu5CXHo/8DIOs2UJJW9cAXv+w\nGp9UtEGvVWH57OSwPzkR0dSVn2HA/7pnLv5z+2n87E8n8ODaAhTlJMod65rVtPRg10d1OHmhAzEa\nJf55eTbHAV+jGRkG/DB9Ls7VOvD6gRr8YXcFdh2px7plWViQb4LIT1WviP9lIaJvwIcPTzXj3WNN\n6On3oiArAWtLspATYmsPx8aoUVKcjIJsD85UD65nXVHnRJtzALcvyURC7NT5mPBa9A348NahOuw/\n0QSFKOD2JZm4cU4yzlZ3yh2NiOgLpRh1eOze+fj19tP49fbTKCm24o7lOYgN8SU7JUlCZUMXdn1U\nh/I6J2I0SqxdloVb5qdNysTCqUgQBBRmJ6IgKwEnL3TgjdIa/N83y7DzozqsviEdN+SbOVTmMvjf\nJrOm9j7sO9aIj8rs8PmDmJlpwFfWFYT8gvoGfRSWz05Gd58X52o7UXrGhoNnbFg0c3CZoZyU2IgY\nG9zWNYAPTjTjw9MtcHv9WFZoxbqSbBj0Ude8tBIRkVwM+ihsuWcu3jxYh3ePNeJ4VTu+emM2bpqd\nEnJXy/X6Ajh+vh37jzehuqUHcTFq3LliGpbPTuZwunEiCALm5hoxe3oSjla04a3Ddfj9rgpsf/8i\nbpqTghVzUhCni5I7Zsjgf50M+t0+nDzfgY/KWlFR74RKKWLxLAtunp+K1AmeWDje4nSDExW/tXoG\nDpxswcGzNhw61wprohYlRclYUmC57Lqb4SwoSSivdeC94004U9156UUnCV9ZlhV2+4+I6FMatRJ3\nrpyGpUVW/PfeKvxx73kcON2CO1dMw4wMg6wf80uShHp7L0pP23Ck3I4Bjx/GeA3uvTUXy4qsXIFi\ngoiCgIUzzbgh34TyOifePdaINw/VYddH9ViQb8LCfDNmZSVE/GRFFtOT5NMC+lhVG8pqHQgEJSTG\navDPy7OxfHZK2C85lxCrwd235uKO5dk4WtmG0jMt+Ov7F/G3D6sxKyth8GOj7ASYw/SKW/5AEBca\nu3C6uhMnzrejo9uNWK0Ka5Zk4qY5KTDo+Q6diKaGlKQYPPKNOTha2YbX9l/Ef/zlFJLiNFhSYMGS\nAsukXTnRHwiiurkb52odOH2xA03t/VApRczPM6KkKBm56fEcxztJBEHArKwEzMpKgN3hwr5jTfio\nrBVHyuzQRikxZ3oS5s8wRWxhPapiura2Flu2bEFXVxfi4+OxdetWZGZmDtsmEAjg6aefRmlpKQRB\nwP3334/169df9b6pqrPbjQvNXZeWlOtGY3sfJAlIjNXglvlpmD/DNOKKS1NBdJQSNxYn48biZDR3\n9OPgmRacvNCBM5fGDpsM0SjMSsS01Dikm3UwG7Qh9xEiMPgxYnNHP+rtvSivc6KsthMDngCUCgEz\n0g346o3ZmJ9n4tgxIpqSBEHADflmFE9Lwomqdhw6Z8Nbh+rw5qE6TE+NQ/G0JGRa9Mi06Mdtjeoe\nlxfNbX1obO9HZb0TFQ1OeLwBKEQBOcmxuHdVHhbmm6bsmtjhwpygxd235uKuL01DeZ0DRyvacOJC\nBw6da4VaKSLLGotpqXGYnhqHnJS4iLjK5qiK6SeeeALf/OY3sXbtWvz973/H448/jldeeWXYNm+9\n9RYaGhqwd+9edHV1Yd26dVi8eDFSU1O/8L5wJEkS3N4Ael1e9A744OzxwO50we4YuPTVhR6XDwAQ\npVIgOzkWty/JRFFO0pQsoK8kJSkGd62cjrtWTofd6cK5GgfO1nSi9EwL3jvRBABQq0SkGnVIM+mQ\nFDd4JcYEfRQMsRoYdFETWqx6fAE4ez1w9Ljh6PHA0euG3eFCg70Ptk4XgpIEYHAoy4IZJhTnJCE/\n08DZ4UQUMaJUCiwusGBxgQWOHjc+KmvFR2V2/L8Pqoe2MRuikWHRIzFWA71WjdgYFfRaNVIH/Ojq\ncgEYHB4HCfD5g+gd8KGn34uefi96XV60dQ2gqb0fPf3eoTaT4jRYPGtwBan8DAPHQocgpUJEUU4S\ninKS8C1/EOV1DpTVDV6X4u0jDdh16RyaFKeB2RANU4IWZoMWZkM04nVR0GtV0EWrpsTlzK/639nZ\n2Yny8nK8+OKLAIA1a9bgqaeegsPhQEJCwtB2u3fvxvr16yGKIhISEnDzzTfjnXfewX333feF943W\nZPdeur0BvFFag54+D3wBCT5/EL5AEF5fAP1uHwIBacRj9Fo1kuI0mJ4Wj5SkGGRaYmFNjIYo8xqN\nnz53SoU4Ye/olQrxC/eRNTEG1sQY3LIgDYFgEHbHAGyd/WjucMHW0Yf61l5U1DtHPE6lFKFRKxEd\npYBGrUSUSoRCIUIpilAoBCgVwmXfnASDEvwBCYFgEIGgBL8/CLc3ALfXD7cvCLfXD78/OOJxcboo\n5KTGoWR2MlKTYpCcFAODPmpMb4BG83xHRykR8F/7Ppnofcm2B326f8Itd6S0PZbjJxRyj7VtuT/F\nS4qPxu1Ls3D7LRZkvwAACjZJREFU0iy43H40tfehub0PTW39aOroQ2N732XPjVciANBGqxCvi8Ky\nIgssCTGwJmhhSdRCF62SteMpUv5Pxut/KkqtwJxcI+bkGgEAXl8QDW29qG/thd05gI6uAVQ396Cs\n1jHisWqVAjFRSqhUIpQKESqlAiqFgHSLHqtvSB+XfNdiLM/JVYtpm80Gs9kMhWLwnYNCoYDJZILN\nZhtWTNtsNiQnJw/dtlqtaG1tvep9o2UwxFzT9uPh4TvnTPrvnAiJiZ9Niku1hsZSeyZjLArlDjEJ\nJvL5zk6duPW92TbbZtuh13YoSQSQlhLaq05dr3B9/b4Wn68PxpvVEouFRRPWfEjhgE8iIiIiojG6\najFttVpht9sRCAQADE4mbGtrg9VqHbFdS0vL0G2bzQaLxXLV+4iIiIiIwtVVi+nExETk5+dj586d\nAICdO3ciPz9/2BAPAFi9ejW2b9+OYDAIh8OBffv2YdWqVVe9j4iIiIgoXAmSJF11tkB1dTW2bNmC\nnp4exMbGYuvWrcjOzsaGDRuwceNGFBYWIhAI4Mknn8ShQ4cAABs2bMBdd90FAF94HxERERFRuBpV\nMU1ERERERCNxAiIRERER0RixmCYiIiIiGiMW00REREREY8RimoiIiIhojHix+ymgtrYWW7ZsQVdX\nF+Lj47F161ZkZmYO2+aZZ57Bn/70J5hMJgDA3Llz8cQTT8iQNrJs3boVe/bsQXNzM9566y3k5uaO\n2CYQCODpp59GaWkpBEHA/fffj/Xr18uQNvKMZv/w2JGP0+nEo48+ioaGBqjVamRkZODJJ58csTTr\nwMAAfvjDH6KsrAwKhQKbN2/GihUrZEodOUa7f7Zs2YLDhw/DYBi86t/q1avx4IMPyhE5Ij300ENo\namqCKIrQarX48Y9/jPz8/GHb8Dx0nSQKe/fee6+0Y8cOSZIkaceOHdK99947Ypvf/OY30s9+9rPJ\njhbxjh49KrW0tEgrVqyQqqqqLrvNG2+8IX3nO9+RAoGA1NnZKZWUlEiNjY2TnDQyjWb/8NiRj9Pp\nlI4cOTJ0+2c/+5n0wx/+cMR2zzzzjPTYY49JkiRJtbW10pIlS6S+vr5JyxmpRrt/Nm/eLL366quT\nGY0+p6enZ+j7d999V1q3bt2IbXgeuj4c5hHmOjs7UV5ejjVr1gAA1qxZg/LycjgcDpmTEQDMnz9/\nxNVC/9Hu3buxfv16iKKIhIQE3HzzzXjnnXcmKWFkG83+IfnEx8dj4cKFQ7dnz5497Gq6n3r77beH\nrl2QmZmJgoICHDhwYNJyRqrR7h+Sl16vH/q+r68PgiCM2IbnoevDYR5hzmazwWw2Q6FQAAAUCgVM\nJhNsNtuIj9p27dqFgwcPwmg04rvf/S7mzJkjR2T6BzabDcnJyUO3rVYrWltbZUxE/4jHjvyCwSD+\n/Oc/Y+XKlSPua2lpQUpKytBtHkOT74v2DwC8+OKLeO2115CWlobvf//7yMnJmeSEke2xxx7DoUOH\nIEkSfve73424n+eh68NiOkJ8/etfxwMPPACVSoVDhw7hoYcewu7du4fGsBHR5fHYCQ1PPfUUtFot\n7rnnHrmj0GV80f7ZtGkTjEYjRFHEjh07cN9992Hfvn1DnUA08X76058CAHbs2IGf//zneOGFF2RO\nNLVwmEeYs1qtsNvtCAQCAAYnEbS1tY346NpoNEKlUgEAli5dCqvVigsXLkx6XhrJarUO+2jUZrPB\nYrHImIg+j8eO/LZu3Yr6+nr86le/giiOPG0lJyejubl56DaPocl1tf1jNpuHfr5u3Tq4XC72espk\n3bp1+Pjjj+F0Oof9nOeh68NiOswlJiYiPz8fO3fuBADs3LkT+fn5I4Z42O32oe8rKirQ3NyMrKys\nSc1Kl7d69Wps374dwWAQDocD+/btw6pVq+SORZfw2JHXL3/5S5w7dw7btm2DWq2+7DarV6/Ga6+9\nBgCoq6vD2bNnUVJSMpkxI9Zo9s/nj6HS0lKIogiz2TxZESNaf38/bDbb0O39+/cjLi4O8fHxw7bj\neej6CJIkSXKHoOtTXV2NLVu2oKenB7Gxsdi6dSuys7OxYcMGbNy4EYWFhdi8eTPKysogiiJUKhU2\nbtyI5cuXyx19ynv66aexd+9edHR0wGAwID4+Hrt27Rq2bwKBAJ588kkcOnQIALBhw4ahyVQ0sUaz\nf3jsyOfChQtYs2YNMjMzodFoAACpqanYtm0b1q5di+effx5msxkulwtbtmxBRUUFRFHEI488gptv\nvlnm9FPfaPfPt7/9bXR2dkIQBOh0Ojz66KOYPXu2zOkjQ0dHBx566CEMDAxAFEXExcVh8+bNmDVr\nFs9D44jFNBERERHRGHGYBxERERHRGLGYJiIiIiIaIxbTRERERERjxGKaiIiIiGiMWEwTEREREY0R\ni2kioiniueeew2OPPTaqbZ955hn84Ac/mOBERERTH4tpIqIQsnLlShw+fHjYz15//XV84xvfuOpj\nH3jggaHLBk9EDiIiGonFNBERERHRGLGYJiIKI3a7Hd/97nexaNEirFy5Eq+88srQff84dGPHjh1Y\nsWIFFi5ciG3bto3obfb5fHj00UcxZ84c3HbbbTh79iwA4JFHHkFLSwseeOABzJkzBy+88MLk/YFE\nRGGGxTQRUZgIBoN48MEHkZeXhwMHDuDll1/Gyy+/jNLS0hHbXrx4ET/5yU/wi1/8AqWlpejr64Pd\nbh+2zf79+3Hbbbfh2LFjWLlyJZ566ikAwC9+8QskJyfjueeew8mTJ7Fhw4ZJ+fuIiMKRUu4AREQ0\n3L/+679CoVAM3fb5fJg5cybOnj0Lh8OBhx9+GACQlpaGO++8E7t370ZJScmwNt555x2sWLEC8+fP\nBwBs3LgRr7766rBt5s2bh+XLlwMA1q5di5dffnki/ywioimJxTQRUYjZtm0blixZMnT79ddfx/bt\n29Hc3Iy2trahAhkAAoHAsNufamtrg8ViGbodHR2N+Pj4YdskJSUNfa/RaODxeOD3+6FU8tRARDRa\nfMUkIgoTVqsVqamp2Lt371W3NZlMqK2tHbrtdrvR1dU1kfGIiCISx0wTEYWJoqIixMTE4Pnnn4fb\n7UYgEMD58+dx5syZEduuWrUK+/fvx4kTJ+D1evHMM89AkqRR/66kpCQ0NjaOZ3wioimJxTQRUZhQ\nKBR47rnnUFlZiS996UtYtGgRfvSjH6Gvr2/EttOnT8ePf/xjfO9730NJSQm0Wi0SEhKgVqtH9bvu\nv/9+PPvss5g/fz5+//vfj/efQkQ0ZQjStXRVEBFRWOrv78eCBQuwZ88epKWlyR2HiGjKYM80EdEU\ntX//fgwMDMDlcmHr1q3Izc1Famqq3LGIiKYUFtNERFPUe++9h5KSEpSUlKC+vh6//OUvIQiC3LGI\niKYUDvMgIiIiIhoj9kwTEREREY0Ri2kiIiIiojFiMU1ERERENEYspomIiIiIxojFNBERERHRGLGY\nJiIiIiIao/8Phj/Mf0+w3mIAAAAASUVORK5CYII=\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "tags": [] - } - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jI9ToieVrisQ", - "colab_type": "text" - }, - "source": [ - "Dá para perceber que a maior parte dos dados concentra-se em torno da média (~ 1.7 m) e que apenas algumas observações encontram-se bastante distantes dela." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "q49-oFz4gBHs", - "colab_type": "code", - "outputId": "f968b883-a1e3-4ead-963a-19d9f25e9d9e", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "height_outlier_mean = height_outlier.mean()\n", - "height_outlier_std = height_outlier.std()\n", - "\n", - "height_outlier_mean, height_outlier_std" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(1.7181251474953014, 0.2948590174540895)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 56 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dTtLF6P2rvIh", - "colab_type": "text" - }, - "source": [ - "Um jeito de procurar por _outliers_ é ver quem se encontra fora do intervalo $[\\bar{x} - k * \\sigma, \\bar{x} + k * \\sigma]$, onde $k$ geralmente é 1.5, 2.0, 2.5 ou até 3.0.\n", - "\n", - "Abaixo utilizamos o $k = 2$, pois esse valor faz sentido (alturas menores que 1.12 m ou maiores que 2.30 m fogem do nosso padrão):" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "cI8gL-QrgK1s", - "colab_type": "code", - "outputId": "6c472ac1-ea23-4dd3-b833-91969a62f92d", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "non_outlier_interval_dist = [height_outlier_mean - 2 * height_outlier_std, height_outlier_mean + 2 * height_outlier_std]\n", - "\n", - "non_outlier_interval_dist" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "[1.1284071125871225, 2.3078431824034804]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 57 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b5A37brPsVPw", - "colab_type": "text" - }, - "source": [ - "Novamente, conhecendo o intervalo, podemos identificar as observações que caem foram dele e removê-las:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "W6jVe5TMglf5", - "colab_type": "code", - "outputId": "c270dcb7-d46a-4dd8-94b3-c3d610269282", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 104 - } - }, - "source": [ - "outliers_dist = height_outlier[(height_outlier < non_outlier_interval_dist[0]) | (height_outlier > non_outlier_interval_dist[1])]\n", - "\n", - "outliers_dist" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "29 0.516665\n", - "38 2.943781\n", - "48 1.058498\n", - "68 2.737088\n", - "Name: Height, dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 58 - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "jqYD2d3chJTK", - "colab_type": "code", - "colab": {} - }, - "source": [ - "height_no_outlier_dist = height_outlier.drop(index=outliers_dist.index)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8IL5fWP1sePM", - "colab_type": "text" - }, - "source": [ - "Até agora, nossas métodos de identificação de _outlier_ foram baseadas em estatísticas descritivas do nosso _data set_ (quantis, média e variância). Porém, alguns testes de hipóteses também existem.\n", - "\n", - "Um deles é o teste de Grubb. Esse é um teste bastante simples, cuja estatística de teste $G$ depende dos valores extremos do conjunto e da média amostral:\n", - "\n", - "$$G = \\frac{\\vert x_{\\text{\\{min ou max\\}}} - \\bar{x}\\vert}{s}$$\n", - "\n", - "onde $\\bar{x}$ é a média amostral e $s$ é o desvio-padrão da amostra.\n", - "\n", - "A hipótese nula, $H_{0}$, é de que não existem _outliers_ no _data set_. O teste de Grubb assume que os dados originam-se de uma distribuição normal, então pode ser válido testar essa hipótese antes.\n", - "\n", - "Rejeitamos a hipótese nula se o valor de $G$ encontrado for superior ao valor crítico do teste, que é dado por\n", - "\n", - "$$G_{\\text{crítico}} = \\frac{n - 1}{\\sqrt{n}} \\sqrt{\\frac{t_{\\alpha',n-2}^{2}}{n - 2 + t_{\\alpha',n-2}^{2}}}$$\n", - "\n", - "onde $n$ é o tamanho da amostra, $t$ é um valor com distribuição t-Student e $\\alpha'$ é $\\alpha/2n$ se o teste for bilateral (procuramos _outliers_ muito acima ou muito abaixo) ou $\\alpha/n$ se o teste for unilateral (acreditamos que o _outlier_, se houver, está em somente uma das extremidades da distribuição)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RNveH7ftxMOV", - "colab_type": "text" - }, - "source": [ - "Abaixo criamos algumas funções que nos auxiliam nos cálculos e na exibição dos resultados:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Ir61-q0ckV6K", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def grubb_test(g, n, alpha=0.05, tailed='two-tailed'):\n", - " if tailed == 'two-tailed':\n", - " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(2*n), n-2)**2/(n - 2 + sct.t.isf(alpha/(2*n), n-2)**2))\n", - " \n", - " return (g, critical, g > critical)\n", - " elif tailed == 'one-tailed':\n", - " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(n), n-2)**2/(n - 2 + sct.t.isf(alpha/(n), n-2)**2))\n", - " \n", - " return (g, critical, g > critical)\n", - " else:\n", - " raise ValueError(f\"Invalid tailed argument\")" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "c--VvSPuuHaM", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def grubb_summary(result, decimals=10):\n", - " return (\n", - " f\"Null hypothesis: there is no outliers in the data set\\n\"\n", - " f\"Test statistic: {np.round(result[0], decimals)}, \"\n", - " f\"Grubb's critical value: {np.round(result[1], decimals)}, \"\n", - " f\"Reject: {result[2]}\"\n", - " )" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "d8nFGEVuqgdC", - "colab_type": "code", - "colab": {} - }, - "source": [ - "def next_outlier_candidate(data):\n", - " sample_distances = (data - data.mean()).abs()\n", - " candidate_idx = sample_distances.idxmax()\n", - " candidate_value = data[candidate_idx]\n", - " candidate_statistic = sample_distances.max()/data.std()\n", - " \n", - " return (candidate_idx, candidate_value, candidate_statistic, len(data))" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MRZwuyOOxU7U", - "colab_type": "text" - }, - "source": [ - "Ao executarmos o teste de Grubb no nosso conjunto de alturas, encontramos alguns valores onde a hipótese nula é rejeitada, ou seja, há evidência de que o valor extremo é um _outlier_." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Rz-yVWFlt-M6", - "colab_type": "code", - "outputId": "cb11e99b-2195-45d7-9089-fdf292a65e1c", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 434 - } - }, - "source": [ - "height_outlier_grubb = height_outlier.copy()\n", - "outliers_grubb = pd.Series()\n", - "has_outlier = True\n", - "\n", - "while has_outlier:\n", - " outlier_candidate = next_outlier_candidate(height_outlier_grubb)\n", - "\n", - " print(f\"Index: {outlier_candidate[0]}, \"\n", - " f\"Value: {np.round(outlier_candidate[1], 3)}, \"\n", - " f\"Test statistic: {np.round(outlier_candidate[2], 3)}, \"\n", - " f\"Sample size: {outlier_candidate[3]}\\n\")\n", - "\n", - " result = grubb_test(outlier_candidate[2], outlier_candidate[3])\n", - "\n", - " print(grubb_summary(result, 3))\n", - "\n", - " has_outlier = result[2]\n", - "\n", - " if has_outlier:\n", - " height_outlier_grubb = height_outlier_grubb.drop(index=outlier_candidate[0])\n", - " outliers_grubb.at[outlier_candidate[0]] = outlier_candidate[1]\n", - " \n", - " print(f\"\\n\")" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "Index: 38, Value: 2.944, Test statistic: 4.157, Sample size: 100\n", - "\n", - "Null hypothesis: there is no outliers in the data set\n", - "Test statistic: 4.157, Grubb's critical value: 3.384, Reject: True\n", - "\n", - "\n", - "Index: 29, Value: 0.517, Test statistic: 4.421, Sample size: 99\n", - "\n", - "Null hypothesis: there is no outliers in the data set\n", - "Test statistic: 4.421, Grubb's critical value: 3.381, Reject: True\n", - "\n", - "\n", - "Index: 68, Value: 2.737, Test statistic: 4.219, Sample size: 98\n", - "\n", - "Null hypothesis: there is no outliers in the data set\n", - "Test statistic: 4.219, Grubb's critical value: 3.377, Reject: True\n", - "\n", - "\n", - "Index: 48, Value: 1.058, Test statistic: 2.96, Sample size: 97\n", - "\n", - "Null hypothesis: there is no outliers in the data set\n", - "Test statistic: 2.96, Grubb's critical value: 3.374, Reject: False\n", - "\n", - "\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "49MMneSg-DCj", - "colab_type": "code", - "outputId": "a98df152-223e-43e1-ced9-d113a40b879f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 86 - } - }, - "source": [ - "outliers_grubb" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "38 2.943781\n", - "29 0.516665\n", - "68 2.737088\n", - "dtype: float64" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 64 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_hajYam661Zd", - "colab_type": "text" - }, - "source": [ - "Abaixo comparamos os _outliers_ encontrados por cada um dos três métodos:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "l3P2Bavg-zMK", - "colab_type": "code", - "outputId": "25065774-49a4-4509-fe92-70a4d32c8cd2", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 86 - } - }, - "source": [ - "outliers = pd.Series({\"IQR\": outliers_iqr.index.values,\n", - " \"Normal\": outliers_dist.index.values,\n", - " \"Grubb\": outliers_grubb.index.values})\n", - "\n", - "outliers.apply(np.sort)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "IQR [29, 38, 48, 68, 91, 92]\n", - "Normal [29, 38, 48, 68]\n", - "Grubb [29, 38, 68]\n", - "dtype: object" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 65 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1oMEwGs_DHJW", - "colab_type": "text" - }, - "source": [ - "## _Features_ de texto\n", - "\n", - "Dados textuais são muito ricos e muito fáceis de serem encontrados. Diversos _data sets_ são compostos por documentos textuais e ainda um simples _scrapper_ pode coletar dezenas de milhares de documentos da Internet. Coleções de documentos são frequentemente chamadas de _corpus_ (plural, _corpora_).\n", - "\n", - "Nosso objetivo aqui é somente mostrar como preprocessar de forma simples _features_ textuais. Para isso, utilizaremos o _data set_ 20 newsgroups, que contém milhares de documentos categorizados em 20 grupos (desde astronomia até carros)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XItMVwyq8Dp9", - "colab_type": "text" - }, - "source": [ - "Abaixo escolhemos somente três grupos para restringir nosso escopo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "usWrDfLvMNxw", - "colab_type": "code", - "colab": {} - }, - "source": [ - "categories = [\"sci.crypt\", \"sci.med\", \"sci.space\"]\n", - "\n", - "newsgroups = fetch_20newsgroups(subset=\"train\", categories=categories, shuffle=True, random_state=42)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4uNwK5uREAn7", - "colab_type": "text" - }, - "source": [ - "Temos agora um _corpus_ com 1782 documentos:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "_lUWgt06EtnR", - "colab_type": "code", - "outputId": "f82dd8b7-5f76-477c-9173-ee35d0c7e0aa", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "len(newsgroups.data)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "1782" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 67 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xh326fr28Jyc", - "colab_type": "text" - }, - "source": [ - "Um exemplo de documento desse _corpus_ é mostrado abaixo:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vsfaD72_M52H", - "colab_type": "code", - "outputId": "fb895197-8753-49e6-a631-e7716ad8c8ee", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 295 - } - }, - "source": [ - "document_idx = 4\n", - "documents_total = len(newsgroups.data)\n", - "\n", - "print(f\"> Document {document_idx} of {documents_total}:\\n\\n{newsgroups.data[document_idx]}\")\n", - "print(f\"> Category: {newsgroups.target_names[newsgroups.target[document_idx]]}\")" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "> Document 4 of 1782:\n", - "\n", - "From: billc@col.hp.com (Bill Claussen)\n", - "Subject: Re: Should I be angry at this doctor?\n", - "Organization: HP Colorado Springs Division\n", - "Lines: 5\n", - "Distribution: na\n", - "NNTP-Posting-Host: hpcspe17.col.hp.com\n", - "\n", - "\n", - "Report them to your local BBB (Better Business Bureau).\n", - "\n", - "Bill Claussen\n", - "\n", - "\n", - "> Category: sci.med\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6liTZFzv8Nas", - "colab_type": "text" - }, - "source": [ - "Quando trabalhando com dados textuais, uma representação simples é ter:\n", - "\n", - "* Cada documento em uma linha.\n", - "* Cada palavra (ou termo) em uma coluna.\n", - "\n", - "Por exemplo, se nosso vocábulário (conjunto de todas palavras ou termos do _corpus_) tiver tamanho 10000 e tivermos 100 documentos, então nosso _data set_ será composto de 100 linhas e 10000 colunas." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qLBi7mFU8mLI", - "colab_type": "text" - }, - "source": [ - "O valor de cada célula, $x_{i, j}$, (interseção da linha $i$ com a coluna $j$) do _data set_ depende da tranformação que aplicarmos.\n", - "\n", - "A transformação mais simples é a contagem de palavras no documento, ou seja, $x_{i, j}$ indica o número de ocorrências da palavra $j$ no documento $i$.\n", - "\n", - "Isso pode ser obtido no sklearn pelo `CountVectorizer`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "4E6FmUUhNs8b", - "colab_type": "code", - "colab": {} - }, - "source": [ - "count_vectorizer = CountVectorizer()\n", - "newsgroups_counts = count_vectorizer.fit_transform(newsgroups.data)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "TSylOCPKjLmh", - "colab_type": "code", - "outputId": "d7b6e6b8-f227-4ec5-a34a-2cf93fc8ebb5", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "type(newsgroups_counts)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "scipy.sparse.csr.csr_matrix" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 78 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M4rtFrsF9CgR", - "colab_type": "text" - }, - "source": [ - "Abaixo escolhemos dez palavras contidas no _corpus_ para exemplificar:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "kmxzJhkSUpIZ", - "colab_type": "code", - "outputId": "613a8241-c25e-4d5d-9830-1cee04671fc4", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - } - }, - "source": [ - "words_idx = sorted([count_vectorizer.vocabulary_.get(f\"{word.lower()}\") for word in\n", - " [u\"clipper\", u\"Kapor\",\n", - " u\"monitor\", u\"gibberish\",\n", - " u\"Banks\", u\"private\",\n", - " u\"study\", u\"group\",\n", - " u\"Colorado\", u\"Business\"]])\n", - "\n", - "pd.DataFrame(newsgroups_counts[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00020001000
10000100200
23000000010
30000040002
40101000000
\n", - "
" - ], - "text/plain": [ - " banks business clipper colorado ... kapor monitor private study\n", - "0 0 0 2 0 ... 1 0 0 0\n", - "1 0 0 0 0 ... 0 2 0 0\n", - "2 3 0 0 0 ... 0 0 1 0\n", - "3 0 0 0 0 ... 0 0 0 2\n", - "4 0 1 0 1 ... 0 0 0 0\n", - "\n", - "[5 rows x 10 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 70 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C7WuoRgP9WE9", - "colab_type": "text" - }, - "source": [ - "Por exemplo, o valor 2 na interseção do documento 0 com a coluna `clipper` indica que a palavra _clipper_ aparece duas vezes no documento 0. Obviamente é possível que uma mesma palavra apareça em múltiplos documentos e mais óbvio ainda que um documento contenha múltiplas palavras." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UQzj-_QT9p7e", - "colab_type": "text" - }, - "source": [ - "O problema com essa abordagem é que não temos como medir relevância dos termos. E se o termo é super comum e aparece em quase todos documentos? E se o termo aparece muitas vezes no mesmo documento, mas poucas vezes nos outros?\n", - "\n", - "Essas perguntas não podem ser respondidas simplesmente com a contagem de termos acima. Para isso, precisamos do tf-idf." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AXBnOFk___QK", - "colab_type": "text" - }, - "source": [ - "O tf-idf é uma estatística baseada no _corpus_ composta de outras duas estatísticas:\n", - "\n", - "* $\\text{tf}(t, d)$, ou _term frequency_, é uma medida de quantas vezes o termo $t$ aparece no documento $d$. Algumas opções estão disponíveis, mas a mais simples é a contagem do número de ocorrências do termo no documento, $f_{t, d}$, exatamente o que computamos acima. Essa é a forma como sklearn define $tf$:\n", - "\n", - "$$\\text{tf}(t, d) = f_{t, d}$$\n", - "\n", - "* $\\text{idf}(t)$, ou _inverse document frequency_, é uma medida de relevância do termo em todos documentos do _corpus_. O sklearn a computa, seguindo valores _default_, da seguinte forma:\n", - "\n", - "$$\\text{idf}(t) = \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", - "\n", - "onde $n$ é o número de documentos no _corpus_ e $d_{t}$ é o número de documentos no _corpus_ que contêm o termo $t$ ($0 < d_{t} \\leq n$).\n", - "\n", - "O tf-idf é calculado multiplicando esses dois valores:\n", - "\n", - "$$\\text{tf-idf}(t, d) = \\text{tf}(t, d) \\times \\text{idf}(t) = f_{t, d} \\times \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", - "\n", - "O sklearn também normaliza todos documentos resultantes, ou seja todas linhas da matriz, para terem norma unitária. Em outras palavras, os elementos do vetor de tf-idf do documento $i$ são dados por:\n", - "\n", - "$$\\text{tf-idf}(i, j)_{\\text{normalizado}} = \\frac{\\text{tf-idf}(i, j)}{\\sqrt{\\text{tf-idf}(i, 1)^{2} + \\text{tf-idf}(i, 2)^{2} + \\cdots + \\text{tf-idf}(i, T)^{2}}}$$\n", - "\n", - "onde $T$ é o número de termos do _corpus_, ou seja, o tamanho do vocabulário." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bWpYWUMjCH8l", - "colab_type": "text" - }, - "source": [ - "O tf-idf é sempre um valor não negativo e quanto mais alto, maior a relevância do termo.\n", - "\n", - "Note como o tf aumenta de acordo com o número de ocorrências do termo no documento: quanto mais frequente o termo, mas relevante ele parece ser.\n", - "\n", - "O idf é uma medida de \"raridade\" do termo através de todo _corpus_: quanto mais alto, menos o termo aparece no _corpus_ e consequentemente mais informação ele traz.\n", - "\n", - "Multiplicando os dois, temos uma medida do quão relevante aquele termo é para aquele documento no _corpus_." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b_N2VQnwDaey", - "colab_type": "text" - }, - "source": [ - "O sklearn provê um transformador, `TfidfTransformer`, que transforma de uma matriz de frequências, como a retornada pelo `CountVectorizer`, e retorna uma matriz de tf-idf:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Fyxgx0YhVwtF", - "colab_type": "code", - "colab": {} - }, - "source": [ - "tfidf_transformer = TfidfTransformer()\n", - "\n", - "tfidf_transformer.fit(newsgroups_counts)\n", - "\n", - "newsgroups_tfidf = tfidf_transformer.transform(newsgroups_counts)" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "evk8smtLWNtO", - "colab_type": "code", - "outputId": "bf99b51a-e276-480c-dee9-13713e85a00b", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - } - }, - "source": [ - "pd.DataFrame(newsgroups_tfidf[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", - "
" - ], - "text/plain": [ - " banks business clipper ... monitor private study\n", - "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", - "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", - "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", - "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", - "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", - "\n", - "[5 rows x 10 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 72 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h9hI18kYDsuA", - "colab_type": "text" - }, - "source": [ - "Também podemos obter a matriz de tf-idf diretamente do _corpus_ sem ter que passar pela matriz de frequência com o transformador `TfidfVectorizer`:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "wPV4xrxzWlA-", - "colab_type": "code", - "colab": {} - }, - "source": [ - "tfidf_vectorizer = TfidfVectorizer()\n", - "\n", - "tfidf_vectorizer.fit(newsgroups.data)\n", - "\n", - "newsgroups_tfidf_vectorized = tfidf_vectorizer.transform(newsgroups.data)" + "text/plain": [ + " Height_n Score_n Height_n^2 ... Math Physics Unknown\n", + "0 0.265531 0.485549 0.070507 ... 0 0 0\n", + "1 0.490982 0.448940 0.241063 ... 0 0 0\n", + "2 0.421844 0.488439 0.177952 ... 0 0 1\n", + "3 0.556112 0.202312 0.309261 ... 0 0 0\n", + "4 0.366733 0.450867 0.134493 ... 0 1 0\n", + "5 0.505010 0.488439 0.255035 ... 0 1 0\n", + "6 0.405812 0.734104 0.164683 ... 0 0 0\n", + "7 0.330661 0.514451 0.109337 ... 0 1 0\n", + "8 0.410822 0.645472 0.168774 ... 0 1 0\n", + "9 0.333667 0.369942 0.111334 ... 0 1 0\n", + "\n", + "[10 rows x 9 columns]" + ] + }, + "execution_count": 48, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "data_transformed = pd.concat([height_score_normalized_poly, course_discretized], axis=1)\n", + "\n", + "data_transformed.head(10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1NLD-pyliXWO" + }, + "source": [ + "Vale ressaltar que:\n", + "\n", + "* Poderíamos utilizar também o `ColumnTransformer` para compor (por isso, ele se encontra no módulo `sklearn.compose`) múltiplos `Pipeline` em diferentes variáveis.\n", + "* Os `Pipeline` não servem apenas para a transformação dos dados de treinamento. Eles também podem (e devem) ser usados para submeter os dados de teste e até de produção aos mesmos procedimentos dos dados de treinamento." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "SbShR7kMZGwE" + }, + "source": [ + "## _Outliers_\n", + "\n", + "_Outliers_, os famosos \"pontos fora da curva\", são observações que não parecem seguir o mesmo padrão dos demais dados. Eles podem vir de distribuições diferentes, serem erros na coleta de dados, erros de medição etc.\n", + "\n", + "Eles influenciam nossas análises e os nossos algoritmos ao apresentar comportamento distoante do resto do _data set_, impactando na média, variância, funções de perda e custo etc. Se fizer sentido, eles devem ser removidos ou transformados antes de prosseguirmos com a análise.\n", + "\n", + "No entanto, devemos julgar com cautela sua remoção: __alguns _outliers_ são dados autênticos e devem ser estudados com atenção__. Por exemplo, a remoção de uma medição muito alta na temperatura de um reator seria um erro, pois essa medição pode estar nos indicando um potencial problema com o dispositivo.\n", + "\n", + "Abaixo estudamos algumas técnicas simples para encontrar _outliers_.\n", + "\n", + "![outlier](https://www.stats4stem.org/common/web/plugins/ckeditor/plugins/doksoft_uploader/userfiles/WithInfOutlier.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "u3bsTDv0pAN4" + }, + "source": [ + "Começamos criando uma cópia da variável `Height` do nosso _data set_ para não impactar o original:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "tQ7AQztcZkYx" + }, + "outputs": [], + "source": [ + "height_outlier = data.Height.copy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "VQNHBAu4pHcp" + }, + "source": [ + "Adicionamos dez _outliers_ que representam pessoas estranhamente baixas ou estranhamente altas para o padrão que estamos observando:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 208 + }, + "colab_type": "code", + "id": "nX2R3V0HZI0w", + "outputId": "6acbd63c-820e-485a-cde4-72a69fefe13d" + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'pd' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mheight_outlier_idx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mchoice\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mheight_outlier\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreplace\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mtoo_short_idx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mheight_outlier_idx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mtoo_tall_idx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mIndex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mheight_outlier_idx\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", + "\u001b[1;31mNameError\u001b[0m: name 'pd' is not defined" + ] + } + ], + "source": [ + "height_outlier_idx = pd.Index(np.random.choice(height_outlier.index, 10, replace=False))\n", + "\n", + "too_short_idx = pd.Index(height_outlier_idx[:5])\n", + "too_tall_idx = pd.Index(height_outlier_idx[5:])\n", + "\n", + "height_outlier[too_short_idx] = np.random.normal(loc=1.30, scale=0.5, size=5)\n", + "height_outlier[too_tall_idx] = np.random.normal(loc=2.20, scale=0.5, size=5)\n", + "\n", + "outlier_idx = too_short_idx | too_tall_idx\n", + "\n", + "height_outlier[outlier_idx]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "mwNbTzDnpoDL" + }, + "source": [ + "Note que nem todos dados gerados se tornaram realmente _outliers_. Como geramos de uma distribuição aleatória, corremos esse risco.\n", + "\n", + "No entanto, temos alguns dados estranhos como 0.51 m e 2.73 m." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "x5pwD_1EqRNZ" + }, + "source": [ + "No _boxplot_ padrão, os dados mais extremos são mostrados como pontos fora do alcance dos _whiskers_ (as barrinhas do _box plot_).\n", + "\n", + "No caso abaixo, notamos três pontos acima e três pontos abaixo do considerado \"dentro da faixa normal\"." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 695 + }, + "colab_type": "code", + "id": "hRMVhYz3b2KH", + "outputId": "9e090cef-804c-4f17-958b-5e25154662db" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAt0AAAKmCAYAAACR0iLwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3W+s3nV9//HXdZ22YbWupz1WOPyx\naLYQElgwcyEn7SAUsCSjnGLWQZg62YYy0G6LGe3449KCjpKFZGNk1c0QWZwhZFQ5woQBC3i64zLv\nLKWS2p9RKv0HHHrqiujsua7fDfXEyr9TPe/raq/zeNw61znf61yv3sGn33zOdTXa7XY7AABAmWa3\nBwAAQK8T3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADF5nR7wEw7cODltFrtbs8AAKBHNZuNLFr0lqN6Ts9Fd6vVFt0AABxTHC8BAIBiohsAAIqJ\nbgAAKCa6AQCgmOgGAIBiohsAAIqJbgAAKCa6AQCgmOgGAIBiohsAAIp17GPgr7vuujz33HNpNpuZ\nP39+brnllpx55plHXDM5OZnbbrstX/3qV9NoNPLhD384a9as6dREAAAo0bHo3rRpU9761rcmSR57\n7LHceOON2bJlyxHXjIyMZNeuXXn00UczMTGR1atXZ2hoKKeeemqnZgIAwIzr2PGSnwZ3khw6dCiN\nRuNV1zz88MNZs2ZNms1mFi9enIsuuihf+cpXOjURAABKdOxOd5LcdNNN2bp1a9rtdv7pn/7pVT/f\nu3dvTj755KnHg4OD2bdv31G9xsDAgl96JwAAzKSORvcnP/nJJMkXv/jF3HHHHfnHf/zHGX+N8fFD\nabXaM/57AQAgSZrNxlHf6O3Ku5esXr06//Vf/5UDBw4c8f3BwcHs2bNn6vHevXtz0kkndXoeAADM\nqI5E98svv5y9e/dOPX7iiSeycOHC9Pf3H3HdJZdckvvvvz+tVisvvfRSHnvssaxcubITEwEAoExH\njpe88sor+dM//dO88soraTabWbhwYTZv3pxGo5Frrrkma9euzdlnn53h4eH8z//8T9773vcmSa6/\n/vqcdtppnZgIAABlGu12u6cOQDvTDQBApePmTDcAAMwmohsAAIqJbgAAKCa6AQCgmOgGmMUmJg7k\n9ts35uDBiW5PAehpohtgFhsZ2ZKdO3fkwQcf6PYUgJ4mugFmqYmJAxkdfTLtdjujo0+52w1QSHQD\nzFIjI1umPteg1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrmJEn6+uZk\naGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8LFzY3+1J\nAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVbAACAnxDd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdALPYxMSB3H77xhw8ONHtKQA9\nTXQDzGIjI1uyc+eOPPjgA92eAtDTRDfALDUxcSCjo0+m3W5ndPQpd7sBColugFlqZGRLWq12kqTV\narnbDVBIdAPMUmNjWzM5eThJMjl5OGNjW7u8CKB3iW6AWWpoaFn6+uYkSfr65mRoaFmXFwH0LtEN\nMEutWnV5ms1GkqTZbOayy97X5UUAvUt0A8xS/f2Lsnz5+Wk0Glm+/LwsXNjf7UkAPWtOtwcA0D2r\nVl2e3bufc5cboFij3W63uz1iJo2PH5r6a3wAAJhpzWYjAwMLju45RVsAAICfEN0AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMA\nQDHRDQAAxeZ04kUOHDiQG264Ibt27cq8efOydOnSbNy4MYsXLz7iuvXr1+c///M/s2jRoiTJJZdc\nkj/5kz/pxEQAACjTaLfb7eoXmZiYyI4dO3LuuecmSTZt2pSDBw/mU5/61BHXrV+/PmeddVbe//73\n/8KvNT5+KK1W+T8JAIBZqtlsZGBgwdE9p2jLEfr7+6eCO0nOOeec7NmzpxMvDQAAXdfxM92tVitf\n+MIXsmLFitf8+T333JNVq1bluuuuy7e+9a0OrwMAgJnXkeMlP2vDhg3Zv39//v7v/z7N5pHNv3//\n/ixZsiTNZjNf/OIX87d/+7d57LHH0tfX18mJAAAwozoa3Zs2bcqOHTuyefPmzJs3702vP/fcc/PA\nAw/klFNOmfZrONMNAEClY/ZMd5Lceeedefrpp3P33Xe/bnDv379/6uuvfvWraTabOfHEEzs1EQAA\nSnTkTvfOnTtz6aWX5vTTT88JJ5yQJDn11FNz9913Z3h4OJ/5zGdy4okn5kMf+lDGx8fTaDSyYMGC\n3HDDDTnnnHOO6rXc6QYAoNIvcqe742e6q4luAAAqHdPHSwAAYLYS3QAAUEx0AwBAMdENAADFRDcA\nABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdEN\nAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0\nAwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT\n3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0A8xiExMHcvvt\nG3Pw4ES3pwD0NNENMIuNjGzJzp078uCDD3R7CkBPE90As9TExIGMjj6Zdrud0dGn3O0GKCS6AWap\nkZEtabXaSZJWq+VuN0Ah0Q0wS42Nbc3k5OEkyeTk4YyNbe3yIoDeJboBZqmhoWXp65uTJOnrm5Oh\noWVdXgTQu0Q3wCy1atXlaTYbSZJms5nLLntflxcB9C7RDTBL9fcvyvLl56fRaGT58vOycGF/tycB\n9Kw53R4AQPesWnV5du9+zl1ugGKNdrvd7vaImTQ+fmjqr/EBAGCmNZuNDAwsOLrnFG0BAAB+wvES\nmCW2bn0qo6NPdnsGx5iffiCO89z8vOXLz8+yZed1ewb0DHe6AWaxgwcP5uDBg92eAdDznOkGmMU2\nbbo1SbJu3S1dXgJw/HCmGwAAjkGiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY\n6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAA\nioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYA\ngGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroB\nAKCY6AYAgGJzOvEiBw4cyA033JBdu3Zl3rx5Wbp0aTZu3JjFixcfcd0rr7ySv/zLv8z27dvT19eX\ndevW5YILLujERAAAKNORO92NRiN//Md/nEceeSQjIyM57bTT8jd/8zevuu6zn/1sFixYkH//93/P\n5s2bc/PNN+fll1/uxEQAACjTkeju7+/PueeeO/X4nHPOyZ49e1513b/927/liiuuSJKcfvrpOeus\ns/LUU091YiIAAJTp+JnuVquVL3zhC1mxYsWrfrZnz56ccsopU48HBwezb9++Ts4DAIAZ15Ez3T/r\n1ltvzfz58/P+97+/5PcPDCwo+b0AvWju3L4kyZIlb+3yEoDe1tHo3rRpU5599tls3rw5zearb7Kf\nfPLJ2b1799QfWO7du/eIYynTMT5+KK1We0b2AvS6H/1oMknywgv/2+UlAMePZrNx1Dd6O3a85M47\n78zTTz+du+++O/PmzXvNay655JLcd999SZLvfOc72bZtW377t3+7UxMBAKBER6J7586d+fSnP53n\nn38+V155ZYaHh3P99dcnSYaHh7N///4kyR/90R/le9/7Xi6++OJ85CMfycaNG7NggeMiAAAc3zpy\nvOTXf/3Xs2PHjtf82Ze+9KWpr+fPn5+/+7u/68QkAADoGJ9ICQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFCs0W63290eMZPGxw+l1eqpf9JR+5d/uTff/e6z3Z4B\nHAd27frxfyve8Y6lXV4CHA9OO21prrrqg92e0XXNZiMDAwuO6jlzirbQRd/97rPZsfP/pe+E/m5P\nAY5xrcm+JMn/++6LXV4CHOsmfzDR7QnHNdHdo/pO6M/8pRd2ewYA0CO+/+zj3Z5wXHOmGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAoNu3o/uxnP/ua37/n\nnntmbAwAAPSiaUf33Xff/Zrf/4d/+IcZGwMAAL1ozptdMDY2liRptVr52te+lna7PfWz5557Lm95\ny1vq1gEAQA940+i+6aabkiQ//OEPc+ONN059v9FoZMmSJbn55pvr1gEAQA940+h+4oknkiQ33HBD\n7rjjjvJBAADQa940un/qZ4O71Wod8bNm05ugAADA65l2dG/fvj0bN27Mjh078sMf/jBJ0m6302g0\n8swzz5QNBACA4920o3v9+vW54IIL8qlPfSonnHBC5SYAAOgp047u3bt358///M/TaDQq9wAAQM+Z\n9mHsiy++OKOjo5VbAACgJ73hne6/+Iu/mLqz/X//93/56Ec/mt/8zd/M2972tiOu864mAADw+t4w\nupcuXXrE41/7tV8rHQMAAL3oDaP7ox/9aKd2AABAz5r2H1L+9OPgf968efNy0kkn5ZRTTpmxUQAA\n0EumHd033XRTnn/++SRJf39/JiYmkiQDAwN58cUXc8YZZ+TOO+/M6aefXjIUAACOV9N+95Lf/d3f\nzQc+8IF8/etfz+joaL7+9a/nD/7gD3LllVfmv//7v3PWWWdlw4YNlVsBAOC4NO3ovvfee/Pxj398\n6oNxTjjhhPzZn/1ZPve5z2X+/PlZv359nn766bKhAABwvJp2dM+fPz/btm074nvbt2/Pr/zKr/z4\nFzWn/asAAGBWmfaZ7rVr1+YP//APs2LFigwODmbfvn35j//4j9xyyy1JfvyHlitXriwbCgAAx6tp\nR/fq1atz1lln5ZFHHsnzzz+f008/Pffdd9/Ue3dfcMEFueCCC8qGAgDA8Wra0Z38+MNxfEAOAAAc\nnTeM7ltuuSW33nprkiM/Ev7n+Rh4AAB4fW8Y3aeeeurU1z//kfAAAMD0vGF0f+QjH5n62kfCAwDA\nL+aoznRv3bo1Dz30UF566aVs3rw527Zty6FDhzI0NFS1DwAAjnvTju5//ud/zr333ps1a9bkkUce\nSfLjD8j55Cc/KbqPMQcPTmTyBxP5/rOPd3sKANAjJn8wkYMHj+p+LT9j2p9o87nPfS733HNPPvzh\nD099EM673vWufPvb3y4bBwAAvWDa/3fl5ZdfzuDgYJJMvYvJ4cOHM3fu3Jpl/MIWLuzPC987nPlL\nL+z2FACgR3z/2cezcGF/t2cct6Z9p/u3fuu38pnPfOaI7917770599xzZ3wUAAD0kmnf6b755ptz\n7bXX5v7778/LL7+clStX5i1veUs+/elPV+4DAIDj3rSj++1vf3v+9V//Ndu2bcvu3btz8skn5zd+\n4zde9wNzAACAH3vT6L7qqqveNKw///nPz9ggAADoNW8a3WvWrJn6ut1u59Zbb80nPvGJ0lEAANBL\n3jS6L7/88iMe//Vf//WrvgcAALy+ab97yU85ww0AAEfnqKMbAAA4Om96vGRsbOyIx4cPH87Xvva1\ntNvtqe/5GHgAAHh9bxrdN9100xGP+/v7c+ONN049bjQaefzxx2d+GQAA9Ig3je4nnniiEzsAAKBn\nOdMNAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAA\nUEx0AwBAMdENAADF5nR7ADUmfzCR7z/7eLdnAMe41uEfJEmac07o8hLgWDf5g4kkb+v2jOOW6O5B\np522tNsTgOPErl3PJknecZr/IQXezNs0xi+h0W63290eMZPGxw+l1eqpfxJAmU2bbk2SrFt3S5eX\nABw/ms1GBgYWHN1zirYAAAA/IboBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGKiGwAAiolu\nAAAoJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGIdi+5NmzZlxYoVOeOMM/LNb37zNa+5\n6667MjQ0lOHh4QwPD2fDhg2dmgcAAGXmdOqFLrzwwnzwgx/M7//+77/hdatXr866des6tAoAAOp1\nLLrf8573dOqlAADgmNKx6J6uhx56KKOjo1myZEk+9rGP5d3vfvdRPX9gYEHRMoDeM3duX5JkyZK3\ndnkJQG87pqL7yiuvzLXXXpu5c+dm69atue666/Lwww9n0aJF0/4d4+OH0mq1C1cC9I4f/WgySfLC\nC//b5SUAx49ms3HUN3qPqXcvWbJkSebOnZskWbZsWQYHB7Nz584urwIAgF/OMRXd+/fvn/r6mWee\nye7du/POd76zi4sAAOCX17HjJbfddlseffTRvPjii7n66qvT39+fhx56KNdcc03Wrl2bs88+O3fe\neWe2b9+eZrOZuXPn5o477siSJUs6NREAAEo02u12Tx2AdqYbYPo2bbo1SbJu3S1dXgJw/Djuz3QD\nAEAvEt0AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHR\nDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBM\ndAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAU\nE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAAxUQ3AAAUE90AAFBMdAMAQDHRDQAA\nxUQ3AAAUE90AAFBMdAMAQDHRDQAAxRrtdrvd7REzaXz8UFqtnvonwYzYuvWpjI4+2e0ZHGN27Xo2\nSfKOdyzt8hKONcuXn59ly87r9gw4JjWbjQwMLDiq58wp2gLAcWDhwoXdngAwK7jTDQAAR+EXudPt\nTDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBA\nMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QCz\n2MTEgdx++8YcPDjR7SkAPU10A8xiIyNbsnPnjjz44APdngLQ00Q3wCw1MXEgo6NPpt1uZ3T0KXe7\nAQqJboBZamRkS1qtdpKk1Wq52w1QSHQDzFJjY1szOXk4STI5eThjY1u7vAigd4lugFlqaGhZ+vrm\nJEn6+uZkaGhZlxcB9C7RDTBLrVp1eZrNRpKk2Wzmssve1+VFAL1LdAPMUv39i7J8+flpNBpZvvy8\nLFzY3+1JAD1rTrcHANA9q1Zdnt27n3OXG6BYo91ut7s9YiaNjx+a+mt8AACYac1mIwMDC47uOUVb\nAACAnxDdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVEN8AsNjFxILffvjEHD050ewpATxPdALPYyMiW7Ny5\nIw8++EC3pwD0NNENMEtNTBzI6OiTabfbGR19yt1ugEKiG2CWGhnZklarnSRptVrudgMUEt0As9TY\n2NZMTh5OkkxOHs7Y2NYuLwLoXaIbYJYaGlqWvr45SZK+vjkZGlrW5UUAvUt0A8xSq1ZdnmazkSRp\nNpu57LL3dXkRQO8S3QCzVH//oixffn4ajUaWLz8vCxf2d3sSQM+a0+0BAHTPqlWXZ/fu59zlBijW\naLfb7W6PmEnj44em/hofAABmWrPZyMDAgqN7TtEWAADgJ0Q3AAAUE90AAFBMdAMAQDHRDQAAxToS\n3Zs2bcqKFStyxhln5Jvf/OZrXjM5OZkNGzbkoosuysUXX5z777+/E9MAAKBcR6L7wgsvzOc///mc\ncsopr3vNyMhIdu3alUcffTT33Xdf7rrrrjz33HOdmAcAAKU6Et3vec97Mjg4+IbXPPzww1mzZk2a\nzWYWL16ciy66KF/5ylc6MQ8AAEodM59IuXfv3px88slTjwcHB7Nv376j/j1H+0blAABQ7ZiJ7pni\nEykBAKh0XH8i5eDgYPbs2TP1eO/evTnppJO6uAgAAGbGMRPdl1xySe6///60Wq289NJLeeyxx7Jy\n5cpuzwIAgF9aR6L7tttuy3nnnZd9+/bl6quvzu/8zu8kSa655pps27YtSTI8PJxTTz01733ve/N7\nv/d7uf7663Paaad1Yh4AAJRqtNvtnjoA7Uw3AACVjusz3QAA0KtENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVE\nNwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx\n0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQ\nTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAA\nFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0A\nAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQD\nAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0AAMVENwAAFBPd\nAABQTHQDAEAx0Q0AAMVENwAAFBPdAABQTHQDAEAx0Q0wi01MHMjtt2/MwYMT3Z4C0NNEN8AsNjKy\nJTt37siDDz7Q7SkAPU10A8xSExMHMjr6ZNrtdkZHn3K3G6CQ6AaYpUZGtqTVaidJWq2Wu90AhUQ3\nwCw1NrY1k5OHkySTk4czNra1y4sAelfHovvb3/52rrjiiqxcuTJXXHFFvvOd77zqmrvuuitDQ0MZ\nHh7O8PBwNmzY0Kl5ALPO0NCy9PXNSZL09c3J0NCyLi8C6F1zOvVCf/VXf5Wrrroqw8PD+dKXvpRP\nfOITuffee1913erVq7Nu3bpOzQKYtVatujyjo09mcjJpNpu57LL3dXsSQM/qyJ3u8fHxfOMb38il\nl16aJLn00kvzjW98Iy+99FInXh6A19DfvyjLl5+fRqOR5cvPy8KF/d2eBNCzOnKne+/evTnxxBPT\n19eXJOnr68vb3/727N27N4sXLz7i2oceeiijo6NZsmRJPvaxj+Xd7373Ub3WwMCCGdsN0Os+9KEP\n5Pnn9+bqqz+YRYve2u05AD2rY8dLpuPKK6/Mtddem7lz52br1q257rrr8vDDD2fRokXT/h3j44em\n/hofgDczNx//+E05fDh54YX/7fYYgONCs9k46hu9HTleMjg4mP3792dycjJJMjk5meeffz6Dg4NH\nXLdkyZLMnTs3SbJs2bIMDg5m586dnZgIAABlOhLdAwMDOfPMM/PlL385SfLlL385Z5555quOluzf\nv3/q62eeeSa7d+/OO9/5zk5MBACAMo12u92Rsxjf+ta3sn79+nzve9/Lr/7qr2bTpk1517velWuu\nuSZr167N2WefnXXr1mX79u1pNpuZO3du1q5dm/PPP/+oXsfxEgAAKv0ix0s6Ft2dIroBAKh0zJ7p\nBgCA2Ux0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADFRDcAABQT3QAAUEx0AwBAMdENAADF\nRDcAABQT3QAAUEx0AwBAMdHAkSFcAAAAe0lEQVQNAADFRDcAABQT3QAAUEx0AwBAMdENAADF5nR7\nwExrNhvdngAAQA/7RXqz0W632wVbAACAn3C8BAAAioluAAAoJroBAKCY6AYAgGKiGwAAioluAAAo\nJroBAKCY6AYAgGKiGwAAioluAAAoJroBAKCY6AYAgGL/H0DD/OPJX0Z9AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.boxplot(height_outlier, orient=\"vertical\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MOKP49JMqTog" + }, + "source": [ + "Uma primeira abordagem bem simples é encontrar os pontos do _box plot_ acima.\n", + "\n", + "Tudo que estiver fora da faixa $[Q1 - 1.5 \\times \\text{IQR}, Q3 + 1.5 \\times \\text{IQR}]$ é considerado um ponto anômalo para aquele padrão:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "z_h0zaVDce0N", + "outputId": "86b9e772-6438-4820-87ba-dab83a4b1dd8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Faixa considerada \"normal\": [1.18575, 2.24175]\n" + ] + } + ], + "source": [ + "q1 = height_outlier.quantile(0.25)\n", + "q3 = height_outlier.quantile(0.75)\n", + "iqr = q3 - q1\n", + "\n", + "non_outlier_interval_iqr = [q1 - 1.5 * iqr, q3 + 1.5 * iqr]\n", + "\n", + "print(f\"Faixa considerada \\\"normal\\\": {non_outlier_interval_iqr}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "wsuVvr8hq4Rc" + }, + "source": [ + "Agora podemos identificar quais pontos encontram-se fora desse intervalo, ou seja, podem ser considerados _outliers_:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "id": "hm78PWbhc9Dz", + "outputId": "ee3995ea-8a63-4c90-b3dd-57ba673887ee" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "29 0.516665\n", + "38 2.943781\n", + "48 1.058498\n", + "68 2.737088\n", + "91 2.272000\n", + "92 1.164000\n", + "Name: Height, dtype: float64" + ] + }, + "execution_count": 53, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_iqr = height_outlier[(height_outlier < non_outlier_interval_iqr[0]) | (height_outlier > non_outlier_interval_iqr[1])]\n", + "\n", + "outliers_iqr" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XcF70kmerGEq" + }, + "source": [ + "Se estivermos seguos de que esses pontos representam de fato _outliers_ e que sua remoção não traz prejuízo à nossa análise, então podemos removê-los:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "BVRJS9DNeb9z" + }, + "outputs": [], + "source": [ + "height_no_outlier_iqr = height_outlier.drop(index=outliers_iqr.index)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "urvTyUfHrVrJ" + }, + "source": [ + "Uma segunda abordagem é observar as estatísticas descritivas dos dados.\n", + "\n", + "Repare no histograma abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 726 + }, + "colab_type": "code", + "id": "bc_paOePfHJ5", + "outputId": "6840da1c-bae6-4465-8aa7-87f69928e182" + }, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAALFCAYAAAABe2+3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzs3XmMpHd97/vPU/vaXb1U99RszNgH\nzATsw+U69uX4OueCcRjdeDSWUGJfjIxscGScyII/guePEcbEUjSjyFKwTEgskog/iJBBgLwQL/ci\nkTjnZEE+YLxjz3jGPb1WdXft+3P/6K72LN3T1dVV/Wzvl4TUU1Vd/bUfuvvj73yf788wTdMUAAAA\ngC3zWV0AAAAA4FSEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEeEaQAA\nAKBHhGkAAACgR4RpAAAAoEeEaQAAAKBHhGkAAACgR4RpAAAAoEcBqwvo1uJiSe22aXUZjjM2llA2\nW7S6DGyA62NvXB974/rYG9fH3rg+6/P5DI2MxLf0OY4J0+22SZjuEf/e7I3rY29cH3vj+tgb18fe\nuD79wZgHAAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAA\nANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA\n0CPCNAAAANAjwjQAAADQI8I0AAAA0CPCNAAAANAjwjQAAADQI8I0AAAA0KNNw/Ti4qLuuecefeYz\nn9GRI0f0p3/6p8rlcpe8rlKp6Ctf+YpuvvlmHT58WD//+c+7eg4AAABwqk3DtGEY+tKXvqRnn31W\nTz75pPbt26e//Mu/vOR13/3ud5VIJPT888/rO9/5jo4fP65SqbTpcwAAAIBTbRqmU6mUrr/++rU/\nf+xjH9O5c+cued3PfvYz3XbbbZKkAwcO6KMf/ah+8YtfbPocAAAA4FRbmplut9v6x3/8R33qU5+6\n5Llz585pz549a3/OZDKamZnZ9DkAAADAqQJbefGf//mfKxaL6fOf//yg6tnQ2Fhix7+mW6TTSatL\nwGVwfeyN62NvXB974/rYG9enP7oO0ydOnNC7776r73znO/L5Lm1o7969W1NTUxodHZUkTU9Pr42H\nXO65bmWzRbXb5pY+ByvfKPPzBavLwAa4PvbG9bE3ro+9cX3sjeuzPp/P2HIDt6sxj0ceeUS/+c1v\n9NhjjykUCq37msOHD+sHP/iBJOn06dN6+eWXdeONN276HAB4WbMtlWrNgfyv2bb6nw4A3G/TzvRb\nb72lv/mbv9GBAwd0++23S5L27t2rxx57TEePHtXf/u3fanJyUl/84hd17Ngx3XzzzfL5fPrmN7+p\nRGIl2V/uOQDwslqjqf94bXYg7/27hyYVCG9pmg8AsEWGaZqOmJ1gzKM3/DWOvXF97G0nrk+pNtgw\nHXdxmOb7x964PvbG9VnfwMY8AAAAAFyKMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA\n9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0\niDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSI\nMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9Igw\nDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDAN\nAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0A\nAAD0iDANAAAA9IgwDQAAAPSIMA0AAAD0iDANAAAA9IgwDQAAAPSIMA0ADtBottVum1aXAQC4SMDq\nAgAAl3duoaRf/OqcktGQPv27exUO+q0uCQCwis40ANiUaZp69XRO/+9/vqdw0K/FQk0v/MdZ1Rot\nq0sDAKwiTAOADTVbbb348oz+8/V57ZtM6Jb/dkD/18d3a7FQ1/P/cVa1OoEaAOyAMA0ANlOuNvTs\nv53VO+fy+tgHx/XfP7ZbwYBPe9MJffLju7VUqOv5/yRQA4AdEKYBwGb+xyuzWi7V9MmP79E1V47J\nMIy15/Z0AnVxNVAz8gEAliJMA4CNVGpNnVso6cP7R7RvIrHua/akE/rk/7ZHi4WaXn47u8MVAgDO\nR5gGABt5d6Yg05QO7h667Ov2pOPaP5nUb99bVrPV3qHqAAAXI0wDgI2cms4rlQhpJBne9LUf3p9S\nvdnWqen8DlQGAFgPYRoAbKJQrmt+qbppV7pjYiSqVCKkN84syTQ50AUArECYBgCbOD1dkCQdzHQX\npg3D0FX7U8rla1pYrg6yNADABgjTAGADpmnqnem8JkaiSkSDXX/eFbuHFfT79MaZpQFWBwDYCGEa\nAGxgsVDTcrGug5nklj4vGPDpij1DOj1dULXeHFB1AICNEKYBwAZOTRdkGNIHdm0tTEvSVftSapum\nfvve8gAqAwBcDmEaACxmmqZOTee1ezyuSCiw5c9PJcOaHI3qjTNLanMjIgDsKMI0AFhsbrGicrXZ\n9Y2H67lq/4hK1aam5kt9rAwAsBnCNABY7NR0XgG/seGJh93YP5FQNOznRkQA2GFd/X3iiRMn9Oyz\nz2pqakpPPvmkPvShD13ymq997Wt644031v78xhtv6LHHHtNNN92kRx99VN///vc1MTEhSfr4xz+u\nBx98sE//CADgXM1WW6dnCto3kVAw0Ht/w+cz9MG9Kf367awK5bqSsVAfqwQAbKSrMH3TTTfpzjvv\n1B133LHha06ePLn28euvv64vfOELuvHGG9ceu/XWW/XAAw9so1QAcJ/XTi+q3mhva8Sj48o9Q/r1\n21mdnS3qdw6O9qE6AMBmumqDXHvttcpkMl2/6Q9/+EMdOXJEoRCdEQC4nDfPLsnvM5QZj2/7vZKx\nkFKJkN5jbhoAdkzfZ6br9bqefPJJffazn73g8aefflpHjhzR3XffrZdeeqnfXxYAHOnMbEGjQ2H5\nfUZf3m9POqHZxbLqjVZf3g8AcHlb38G0iRdeeEG7d+/WoUOH1h67/fbbde+99yoYDOrFF1/Ufffd\np2eeeUYjIyNdv+/YWO835nhdOr31vbXYOVwfexvk9Wm12npvrqgPHxhVMhHpy3te9YFRvXIqp1yx\noVgsrPRorC/va1d8/9gb18feuD790fcw/aMf/eiSrnQ6nV77+IYbblAmk9Fbb72l6667ruv3zWaL\narfZn7pV6XRS8/MFq8vABrg+9jbo63NmtqB6s62hWFCFYrUv7xkL+xQK+vTbs4sql2uab7m3Q833\nj71xfeyN67M+n8/YcgO3r2MeMzMz+uUvf6kjR45c8Pjs7Ozax6+99pqmpqZ08ODBfn5pAHCc0zMr\nv8jGh/vTlZYkn2Fobzqh9+ZpQADATuiqM/3www/rueee08LCgu666y6lUik9/fTTuueee3T//ffr\n6quvliT9+Mc/1ic/+UkNDw9f8PmPPPKIXnnlFfl8PgWDQZ08efKCbjUAeNGp6byiYb+SsWBf33dP\nOq53zuV1ejqvq68Y6+t7AwAuZJimM86eZcyjN/w1jr1xfext0NfnG3//74qGA7r+dyb7+r71Rks/\n+P9+q09fu0//z00f7Ot72wnfP/bG9bE3rs/6LB/zAAB0p9FsaWq+pP2T/b8BKBT0a3Ikpt+8k+37\newMALkSYBgALnJktqtU29YFdg7mbfm86rulsWQtLlYG8PwBgBWEaACxwajovSQPpTEvS3omVv6b8\n1dt0pwFgkAjTAGCBU9N5DcdXTiwchKF4SOlUVL/67cJA3h8AsKLve6YBAJs7NV3QwcyQDKM/Jx+u\n56NXjuqf/9c55Qo1hUP+vr53OBhQgHYMABCmAWCnlatNzeTK+sRH+rvF42If3j+in/9ySk/966m+\nj5P87qFJBcL8CgEA+goAsMPenVmZlz6YGRro1zm4e0jBgE/vzZcG+nUAwMsI0wCww06tnnx4YMBh\n2u/3afd4XO/NFeWQIwUAwHEI0wCww05N55VORZSI9vfkw/XsGY+rWm9pqVgf+NcCAC8iTAPADjs9\nnR/4iEfHrtGYJGkmV96RrwcAXkOYBoAdtFyqK5uv6cCunQnTiVhQiWhQs4RpABgIwjQA7KDOYS1X\n7N6ZMC1Jk6NRzeTKzE0DwAAQpgFgB52ezsswpA8M6OTD9ewajaneaGupWNuxrwkAXkGYBoAddGq6\noN3j8b4fonI5k5256Wxlx74mAHgFYRoAdohpmjo1ndfBHZqX7khEV+amuQkRAPqPMA0AOyS7XFWx\n0tDBzM6NeHTsGo1pdpG5aQDoN8I0AOyQs/NFSer70d7d2DUWVb3RVq7A3DQA9BNhGgB2yEx2Zcwi\nMxbb8a/dmZtmRR4A9BdhGgB2yHSurKF4SLHI4E8+vFg8ElQyFlwL9ACA/iBMA8AOmcmV104ktMLK\n3HRFbeamAaBvCNMAsENmstaG6cnRmBrNthbzzE0DQL8QpgFgBxQrDRUrDUvmpTs6QZ4VeQDQP4Rp\nANgBnVllKzvTsUhAQzH2TQNAPxGmAWAHTOdKkqRdFnamO19/brGidpu5aQDoB8I0AOyAmWxZAb+h\n8eGIpXV05qZzhaqldQCAWxCmAWAHzOTKmhiJye+z9sfu+3PTFUvrAAC3IEwDwA6wei1eRzQc0HA8\npFn2TQNAXxCmAWDAmq225hYrlm7yON/kaFRzSxWZ7JsGgG0jTAPAgC0sV9Vqm7boTEtSOhVVo9nW\nUrFudSkA4HiEaQAYMDusxTvfxEhUkjS/xNw0AGwXYRoABswua/E6EtGgIiG/5hcJ0wCwXYRpABiw\nmWxZQ7Gg4pGg1aVIkgzDUDq1MjcNANgewjQADJhdNnmcLz0SVaHcUKXWtLoUAHA0wjQADNhMrmyb\nEY+OidTK4THMTQPA9hCmAWCAipWGCuWGdo3GrS7lAmNDEfkMaX6JkxABYDsI0wAwQDO51U0eNutM\n+/0+jQ5F6EwDwDYRpgFggDpr8TI2m5mWVlbkdXZgAwB6Q5gGgAGazpXk9xkaX51RtpN0Kqp221Qu\nz6gHAPSKMA0AAzSTLWtiJCq/z34/btMpDm8BgO2y3093AHARO67F64hFAkpEg5rj8BYA6BlhGgAG\npNVua26xosyYvTZ5nC+dWrkJ0TSZmwaAXhCmAWBAFpZWbu6za2daWjm8pVJrqVTh8BYA6AVhGgAG\nZNqma/HON7E6N83R4gDQG8I0AAxIZy2enTvTqWRYAb/BTYgA0CPCNAAMyEyupGQsqEQ0aHUpG/IZ\nhtKpKGEaAHpEmAaAAZnJ2neTx/nSqagW8zU1mm2rSwEAxyFMA8CA2Hkt3vnSqahMSQvLdKcBYKsI\n0wAwAOVqU/lywyFheuV0RvZNA8DWEaYBYAA6Xd7OKYN2Fgr6NZwIaWGZY8UBYKsI0wAwAPNLK8F0\nfLXra3fjQxFll6sc3gIAW0SYBoAB6HSmx4ft35mWpLHhiKr1lspVDm8BgK0gTAPAACwsVRUN+xWP\nBKwupSvjwysddEY9AGBrCNMAMADzyxWND0dlGIbVpXRlJBmWYUhZwjQAbAlhGgAGYGG5utbtdQK/\n36eRZFjZPGEaALaCMA0AfWaaphaWK47Y5HG+8WFuQgSArSJMA0Cf5csN1RttR3WmJWlsKKJ6s61C\nuWF1KQDgGIRpAOizhaXVTR4O60yPrYZ/5qYBoHuEaQDos/nOgS0O60ynEmH5fQYbPQBgCwjTANBn\nC50DWxyyY7rD5zM0OsRNiACwFYRpAOizheWKhmJBhUN+q0vZsrHhiHL5qtrchAgAXSFMA0CfzS9V\nHTcv3TE+HFGzZWq5WLe6FABwBMI0APTZwnLFcZs8OsaGuAkRALaCMA0AfdRum8rla47bMd0xFA8p\nGPBxEyIAdIkwDQB9lCtU1Wqbju1MG4ahsaEINyECQJcI0wDQR2ubPBzamZakseGwFvM1tdrchAgA\nmyFMA0AfOXXH9PnGhqNqm6YWCzWrSwEA2yNMA0AfLSxVZRjS6JBzw/Q4NyECQNcI0wDQRwvLFY0m\nwwr4nfvjNR4NKBz0E6YBoAvO/WkPADY0v1x13MmHFzMMQ2PDES2sjqwAADbWVZg+ceKEPvWpT+mq\nq67Sm2++ue5rHn30UX3iE5/Q0aNHdfToUT300ENrz1UqFX3lK1/RzTffrMOHD+vnP/95f6oHAJtZ\nWKpoPOXcEY+O8eGIlot1NVttq0sBAFsLdPOim266SXfeeafuuOOOy77u1ltv1QMPPHDJ49/97neV\nSCT0/PPP6/Tp07rjjjv03HPPKR6P91Y1ANhQo9nSUrGutMM709LKseKmpFy+qomRmNXlAIBtddWZ\nvvbaa5XJZHr+Ij/72c902223SZIOHDigj370o/rFL37R8/sBgB11DjpxQ2f6/ZMQ2egBAJfT15np\np59+WkeOHNHdd9+tl156ae3xc+fOac+ePWt/zmQympmZ6eeXBgDLrYVpF3Smo2G/IiE/6/EAYBNd\njXl04/bbb9e9996rYDCoF198Uffdd5+eeeYZjYyM9OX9x8YSfXkfL0qnk1aXgMvg+tjbVq5P9a0F\nSdKHrxzXWJeB2syVlUwMppMdDAa29d7jqaiWy/V13yMWCys9av34B98/9sb1sTeuT3/0LUyn0+m1\nj2+44QZlMhm99dZbuu6667R7925NTU1pdHRUkjQ9Pa3rr79+S++fzRbV5jSuLUunk5qfL1hdBjbA\n9bG3rV6fU+8tKeD3qVlraH6+2dXnlGtNFYqDWUHXaGzvvYdiQb1+pqTlfEU+n3HBc+VyTfOt1nZL\n3Ba+f+yN62NvXJ/1+XzGlhu4fRvzmJ2dXfv4tdde09TUlA4ePChJOnz4sH7wgx9Ikk6fPq2XX35Z\nN954Y7++NADYwsJSRWPDEfkMY/MXO8BIMqx221S+XLe6FACwra460w8//LCee+45LSws6K677lIq\nldLTTz+te+65R/fff7+uvvpqPfLII3rllVfk8/kUDAZ18uTJtW71F7/4RR07dkw333yzfD6fvvnN\nbyqRYGwDgLvML1UdfYz4xUaSYUnSYr6mVCJscTUAYE9dhenjx4/r+PHjlzz++OOPr3184sSJDT8/\nFovpW9/6Vg/lAYBzLCxXdHD3kNVl9M1wIiyfIeUKNR20uhgAsClOQASAPihXmypVm67qTPt9hoYT\nYTZ6AMBlEKYBoA86R2+Pp5y/Fu98I8mwFguDuUESANyAMA0AfTC/1Nkx7Z7OtLQSpiu1lqr17raT\nAIDXEKYBoA86nem0CzvTkhj1AIANEKYBoA8WlqqKhv2KR/q2vt8WRofe3+gBALgUYRoA+mB+uaLx\n4agMl+yY7oiEAoqG/crRmQaAdRGmAaAPsstV181Ld4wkI4x5AMAGCNMAsE2maWohX9XYkFvDdFjL\nxZpabdPqUgDAdgjTALBNlVpTtXpLY67tTIfVNqV8ie40AFyMMA0A25RdvTnPrZ3p0dWNHjluQgSA\nS7jrtnMAGIBmW6o1Nt6zPLVQkiRFowGValvbx+yEyYmheEg+n8HcNACsgzANAJuoNZr6j9dmN3z+\n9XcXJUlnZgqaX6xs6b3/64fS26ptJ/h8hlKJEGEaANbBmAcAbFOp2pTPMBQJ+a0uZWBWjhWvyTQd\n0EoHgB1EmAaAbSpVGopHA67bMX2+0WRE1XpLlVrL6lIAwFYI0wCwTaVqQ/FI0OoyBopjxQFgfYRp\nANimUrWpeNTdt6C8H6arFlcCAPZCmAaAbWi3TVWqTdd3psMhv2KRAJ1pALgIYRoAtqFcbcqUXN+Z\nlt6/CREA8D7CNABsQ6nakCTXd6allcNblkt1tVptq0sBANsgTAPANngpTI8kwzJNaalUt7oUALAN\nwjQAbEOpsnLioRfGPFKJlZsQl4uEaQDoIEwDwDaUqg2Fg34F/O7/cZqMh2QY0nKRuWkA6HD/T38A\nGKBipamEB7rSkuT3GUpGg1pmzAMA1hCmAWAbStWG4lH3z0t3DCfCjHkAwHkI0wDQI9M0V44S98DN\nhx3DiZDy5bqabPQAAEmEaQDoWb3ZVrNlKh7xxpiHJA3HQzJNaX6pYnUpAGALhGkA6FGpsroWz0Nj\nHp2NHrO5ssWVAIA9EKYBoEel6upaPA91pofiIUnSDGEaACQRpgGgZ17sTAcDPsUjAc1kCdMAIBGm\nAaBnpWpDPp+hSMhvdSk7ajgRZswDAFYRpgGgR6VKU/FIQIZhWF3KjhqOhzSbq6jdNq0uBQAsR5gG\ngB6Vqt5ai9cxnAip0WprIV+1uhQAsBxhGgB6VKo0FffI6YfnSyVWbkKcXihZXAkAWI8wDQA9aLdN\nlWtNb3am4yvr8aa5CREACNMA0ItyZy2eBzvT4ZBfyVhQ5+hMAwBhGgB6UayursXzYGdaknaNxjSd\nJUwDAGEaAHrQ2TGd8NCO6fNNjsZ0LluWabLRA4C3EaYBoAed0w9jHjr98Hy7xmKq1JpaKtatLgUA\nLEWYBoAelCoNRUJ+Bfze/DG6azQmSYx6APA8b/4WAIBtKlVXDmzxqvfDNBs9AHgbYRoAelCqNhT3\n6Ly0JA3FQ4qGA2z0AOB5hGkA2CLTNFWqePP0ww7DMLR7jI0eAECYBoAtqjfaarZMT495SFJmLK5z\njHkA8DjCNABsUamzY9rDYx6StHs8rnypruLqmkAA8CLCNABsUcnDpx+eLzPGRg8AIEwDwBZ1Dmzx\n8sy0JGXG45LY6AHA2wjTALBFpWpDPp+hSMhvdSmWGh+KKBTwsdEDgKcRpgFgi0qVlR3ThmFYXYql\nfD5Du0ZjOseYBwAPI0wDwBaVqt5ei3e+zHhc0wuMeQDwLsI0AGxRqdpUzONr8ToyYzFl81XVGi2r\nSwEASxCmAWAL2m1TFY8fJX6+yZGVjR7zixWLKwEAaxCmAWALKrWmTLHJo2NyNCpJml1k1AOANxGm\nAWALOjumYx7fMd0xkVrpTM/RmQbgUYRpANiCtdMP6UxLkmKRgJKxIJ1pAJ5FmAaALSh3Tj9kZnrN\n5EhMszk60wC8iTANAFtQqjYU9PsUDPDjs2NyJEpnGoBn8dsAALagVGkqFuXAlvNNjMa0VKyrVmc9\nHgDvIUwDwBaUqw1GPC4yObKy0WNuiVEPAN5DmAaALVg5sIWbD8/X2TU9m2PUA4D3EKYBoEutdlvV\neovO9EUmRtg1DcC7CNMA0KX3N3nQmT5fNBzQUDykWXZNA/AgwjQAdKlUWT2whc70JSZHoppjzAOA\nBxGmAaBLHNiyscmRmGa5ARGABxGmAaBLa2MeHCV+iYmRqJaLdVXrTatLAYAdRZgGgC6Vqg2Fg34F\n/PzovNjk6MpGjznmpgF4DL8RAKBLK2vx6EqvZ3JtowdhGoC3EKYBoEulCge2bKSzHm+O9XgAPIYw\nDQBdKlebike5+XA9kVBAw4mQZnN0pgF4C2EaALrQaLZVb7YZ87iMyVSUg1sAeE5XvxVOnDihZ599\nVlNTU3ryySf1oQ996JLXPPbYY3rmmWfk8/kUDAb11a9+VTfeeKMk6dixY/rXf/1XjYyMSJIOHz6s\nL3/5y338xwCAwWIt3uYmRmP69dtZq8sAgB3VVZi+6aabdOedd+qOO+7Y8DXXXHON7r77bkWjUb3+\n+uv6/Oc/r3/5l39RJBKRJP3xH/+xPv/5z/enagDYYe+ffkhneiOTI1HlS3VVak1Fw/x7AuANXY15\nXHvttcpkMpd9zY033qhodOUGlKuuukqmaWppaWn7FQKADXQ604x5bGxyhPV4ALxnIDPTP/nJT7R/\n/37t2rVr7bG///u/15EjR3Tffffp7bffHsSXBYCBef8occY8NtLZNc3cNAAv6XuL5d///d/1V3/1\nV/q7v/u7tce++tWvKp1Oy+fz6Sc/+Ym+9KUv6YUXXpDf7+/6fcfGEv0u1TPS6aTVJeAyuD72lk4n\nZebKarRMxSIBpYaifX3/YDCgZCLS1/fcifeOxcJKr4bnjuTqv5tivbVj/7/m+8feuD72xvXpj76G\n6Zdeekl/9md/pm9/+9u64oor1h6fnJxc+/jWW2/VX/zFX2hmZkZ79uzp+r2z2aLabbOf5XpCOp3U\n/HzB6jKwAa6PvXWuT7nW1FKhqlg4oEKx2tev0Wg0+/6eO/He5XJN863WJY+nEiGdOru0I/+/5vvH\n3rg+9sb1WZ/PZ2y5gdu3MY9f//rX+upXv6pvfetb+shHPnLBc7Ozs2sf//M//7N8Pt8FARsA7I7T\nD7szORLjFEQAntLVb4aHH35Yzz33nBYWFnTXXXcplUrp6aef1j333KP7779fV199tR566CFVq1V9\n/etfX/u8kydP6qqrrtIDDzygbDYrwzCUSCT013/91woE+KUEwBlM01S52tCe8bjVpdje5GhU/+ut\nBavLAIAd01WiPX78uI4fP37J448//vjaxz/60Y82/Px/+Id/2HplAGAT5VpTzZbJWrwuTI7ElC83\nWI8HwDM4AREANrFYqEmSYhwlvqmJETZ6APAWwjQAbGJpNUzTmd7c5MjKRo/ZHHPTALyBMA0Am1gk\nTHct3QnTdKYBeARhGgA2sVioyTCkCDPAmwoH/RpJhjkFEYBnEKYBYBNLhZpi4YB8hmF1KY4wORKl\nMw3AMwjTALCJxUJNcW4+7NrESFTzS4M5LAYA7IYwDQCbWCzUOLBlC8aHo8qX6qrVLz0hEQDchjAN\nAJfRNk0tFWuKR+hMdyudWrkJcWGZuWkA7keYBoDLKJTqarU5sGUrOmGaUQ8AXkCYBoDLyHUObCFM\ndy2dikiS5pfoTANwP8I0AFxGLr/SXWXMo3uJaFCRkJ8wDcATCNMAcBm5/OqBLVE6090yDEPjw1HC\nNABPIEwDwGXkClUF/T6Fg36rS3GUdCqihWVmpgG4H2EaAC4jl68plQzL4MCWLUmnVjrTpmlaXQoA\nDBRhGgAuI5evaiQZtroMx0nJt8vAAAAgAElEQVSnoqo328qX6laXAgADRZgGgMvIFVY609ga1uMB\n8ArCNABsoNVqa6lYozPdA9bjAfAKwjQAbCCbr8o0pZEEYXqrxocJ0wC8gTANABtYWA2CjHlsXTDg\n10gyrHmOFAfgcoRpANhAJ0wz5tGb9HCEmWkArkeYBoANEKa3p7MeDwDcjDANABuYX6ooEvIrGub0\nw16kU1EtFWpqNFtWlwIAA0OYBoANLCxVNDYUsboMxxpPRWRKnIQIwNUI0wCwgfmlikaGGPHoFbum\nAXgBYRoANrCwVNFoks50rzpheoGNHgBcjDANAOtoNFtaLtY1Sme6Z8PxkIIBHzchAnA1wjQArCNX\nqEkSneltMAxjdaMHYx4A3IswDQDryOVXwzSd6W0ZH47QmQbgaoRpAFhHLr/STR1lm8e2dHZNm6Zp\ndSkAMBCEaQBYR2fMgwNbtiediqpab6lYaVhdCgAMBGEaANaxmK8qGQspHPRbXYqjpVMrnX12TQNw\nK8I0AKwjV6gpPRK1ugzHe3/XNHPTANyJMA0A68jmq2tBEL0bH17pTBOmAbgVYRoA1pHL1zROmN62\nSCigoViQMA3AtQjTAHCRSq2pSq1JmO4Tdk0DcDPCNABcpLPJgzDdH531eADgRoRpALjI4uqOaWam\n+2M8FVUuX1Oz1ba6FADoO8I0AFyEznR/pVMRtU1z7d8rALgJYRoALpLLV2VIGhvm9MN+SA+zHg+A\nexGmAeAiuXxNQ4mQAn5+RPYDu6YBuBm/KQDgIrlCVWNDdKX7ZSQZlt9nEKYBuBJhGgAuks3XNJoM\nW12Ga/h8hsaGI1pgPR4AFyJMA8B5TNPUYr6qUTrTfZUejmhhmc40APchTAPAeUrVpurNNp3pPhsb\njmphmc40APchTAPAeXKrO6bpTPdXOhVRodxQtd60uhQA6KuA1QUAgJ3k8iu7kEeG6ExfjuEzVKp1\nH4yT8ZAk6ex8SbvH45d9bTgYUIBWDwCHIEwDwHlyhdXOdJLO9OXUGi396s35rl/f2eTxP1+Z0b6J\nxGVf+7uHJhUI8+sJgDPw3/4AcJ5cvia/z9DwaicV/ZGIBiVJxUrD4koAoL8I0wBwnlyhqlQiLJ/P\nsLoUV4mE/Ar4DRXLhGkA7kKYBoDz5PI1jTEv3XeGYSgeDdKZBuA6hGkAOE+OHdMDkyBMA3AhwjQA\nrGqbphYLNTZ5DAhhGoAbEaYBYFW+VFerbbLJY0AS0aAazbZqjZbVpQBA3xCmAWBVZ8f0KJ3pgVjb\n6MFNiABchDANAKs6px+OMTM9EIkY6/EAuA9hGgBWcZT4YLFrGoAbEaYBYFU2X1M46Fc8wul7gxAO\n+hUM+AjTAFyFMA0Aq1bW4oVlGBzYMihs9ADgNoRpAFiVzVeZlx4wwjQAtyFMA8AqDmwZvEQ0qFKl\nIdM0rS4FAPqCMA0AkuqNlvLlBkeJD1giFlSzZapaZ9c0AHcgTAOApMVCZ8c0nelBSrLRA4DLEKYB\nQCvz0hJhetDihGkALkOYBgC9H6YZ8xgsTkEE4DaEaQDQ+0eJjyTpTA9SMOBTJOSnMw3ANQjTAKCV\nTR7D8ZCCAX4sDhrr8QC4Cb81AECsxdtJccI0ABchTAOAVo4SZ156Z3R2TbfZNQ3ABQjTADzPNE06\n0zsoGQ2qbUqVatPqUgBg2zYN0ydOnNCnPvUpXXXVVXrzzTfXfU2r1dJDDz2kT3/607r55pv1xBNP\ndPUcANhBsdJQvdnmKPEdkoixHg+AewQ2e8FNN92kO++8U3fccceGr3nyySd15swZPffcc1paWtKt\nt96qT3ziE9q7d+9lnwMAO+hs8qAzvTMS5+2anrS4FgDYrk0709dee60ymcxlX/PMM8/oD//wD+Xz\n+TQ6OqpPf/rT+qd/+qdNnwMAO1jbMT3MzPROiEdX+jh0pgG4QV9mpqenp7V79+61P2cyGc3MzGz6\nHADYAacf7iy/z6dYOMDBLQBcYdMxD7sYG0tYXYJjpdNJq0vAZXB9rFdptBUK+HTF/lEZhnHBc+l0\nUmaurGRiMEE7GAx48r2HE2FV6q113yMWCys9Guvqffj+sTeuj71xffqjL2E6k8no3LlzuuaaayRd\n2I2+3HNbkc0W1W6zRmmr0umk5ucLVpeBDXB97OG92YJGkmEtLBQveLxzfcq1pgrF6kC+dqPhzfeO\nhv2azZXXfY9yuab5VmvT9+D7x964PvbG9Vmfz2dsuYHblzGPw4cP64knnlC73VYul9MLL7ygz3zm\nM5s+BwB2wFq8nZeIBlWuNmmSAHC8TcP0ww8/rN/7vd/TzMyM7rrrLv3BH/yBJOmee+7Ryy+/LEk6\nevSo9u7dq9///d/XH/3RH+lP/uRPtG/fvk2fAwA7yOarrMXbYYloUKakUpW5aQDOtumYx/Hjx3X8\n+PFLHn/88cfXPvb7/XrooYfW/fzLPQcAVmu22soX6xrl9MMddf56vGQsZHE1ANA7TkAE4GmLhZpM\nic70DlsL02z0AOBwhGkAnpbrrMUbJkzvpFgkIMNg1zQA5yNMA/C0tQNb6EzvKJ/PUDwSJEwDcDzC\nNABPy3aOEk8yM73TElHCNADnI0wD8LRcvqpkLKhQ0G91KZ5DmAbgBoRpAJ6WZce0ZRKxoCq1lpqt\nttWlAEDPCNMAPC2XrzEvbZHORo8S3WkADkaYBuBZpmmudKaZl7ZEIrpy1AGjHgCcjDANwLPKtaZq\n9RZjHhZJRFcOaykQpgE4GGEagGdll1fX4rFj2hLRsF8+n8GYBwBHI0wD8KxcYXUtHkeJW8IwjJWN\nHpyCCMDBCNMAPCvHgS2WS0QDzEwDcDTCNADPyuar8vsMDcVDVpfiWYlokJlpAI5GmAbgWbl8TaND\nYfkMw+pSPCsRDareaKvebFldCgD0hDANwLOy+SojHhZLxFb+VoCbEAE4FWEagGflOP3Qcp1d0wVu\nQgTgUIRpAJ7Uare1WKixycNi75+C2LS4EgDoDWEagCct5msyTTZ5WC0c9CvgN9joAcCxCNMAPCm7\nuhZvfDhqcSXe1tk1zUYPAE5FmAbgSQucfmgbKwe31K0uAwB6QpgG4ElrR4kzM225RCyoUqUp0zSt\nLgUAtowwDcCTFvJVDcdDCgb8VpfieYloUI1WW7VG2+pSAGDLCNMAPCm7XGXEwyY6Gz24CRGAExGm\nAXhSNl/VOGHaFgjTAJwsYHUBANAPzbZUa3S3q7htmsrlq7rmyjGVaht/jpkrq1xrqs0o70AlYoRp\nAM5FmAbgCrVGU//x2mxXry1Xm2q2TOXL9ct+TjIRUaFY1X/9ULpfZWIdoYBfoaBPRU5BBOBAjHkA\n8JzSage0M14A6yWiQTrTAByJMA3AczqhLREhTNtFIhpc+48cAHASwjQAzylWV0JbnM60bXQ60+ya\nBuA0hGkAnlOqNBQO+hUM8CPQLhKxoFptU5Vay+pSAGBL+E0CwHOKlabiUe6/thPW4wFwKsI0AM8p\nVRrcfGgzhGkATkWYBuAppmmqVG0ozs2HtkKYBuBUhGkAnlJrtNRsmXSmbSbg9yka9hOmATgOYRqA\npxQrKyceMjNtP/FIkINbADgOYRqAp3Bgi30lYhzcAsB5CNMAPKUTptkxbT/JaFClakPtNrumATgH\nYRqApxQrDQX9PoXYMW07iWhQpiktFWtWlwIAXeO3CQBPKVZXdkwbhmF1KbhIIrbytwULy1WLKwGA\n7hGmAXgKO6btKxkNSZIWlioWVwIA3SNMA/CUYqXBvLRNxaIB+Qw60wCchTANwDPqjZYazTadaZvy\nGYbi0SCdaQCOQpgG4BmlKps87C4ZC9GZBuAohGkAntE5sCUR4cAWu0rGglpYrsg0WY8HwBkI0wA8\no8iOadtLxoKq1FoqVZtWlwIAXSFMA/CMUqUhv89QJOS3uhRsIBlb2egxt8jcNABnIEwD8IzOWjx2\nTNtXcvVvDeaWyhZXAgDdIUwD8IxiZeXAFthX5+CWeTrTAByCMA3AM0rVhuIR5qXtLOD3aTgeYswD\ngGMQpgF4QrPVVrXeYse0A4ynIppj1zQAhyBMA/AENnk4x/hwlDANwDEI0wA8odTZMc3MtO2lU1Et\nF+uqNVpWlwIAmyJMA/CE0mpnmjEP+xtPRSRJ83SnATgAYRqAJxQrDfkMKRqmM21348NRSWz0AOAM\nhGkAnlCsNhSLsGPaCTqdaeamATgBYRqAJ3QObIH9xSNBxcIBwjQARyBMA/CEYqWxdiAI7C89EmXM\nA4AjEKYBuF6z1Valxo5pJ5lIsR4PgDMQpgG4XmfHdJIw7RgTI1Fll6tqtdtWlwIAl0WYBuB6xfLq\nWjzGPBwjnYqq1TaVy9esLgUALoswDcD1iuyYdpyJ1Mp6PEY9ANgdYRqA6xXKDQX8hiIhv9WloEsT\nI+yaBuAMhGkArldcXYvHjmnnSCXDCvh9dKYB2B5hGoDrFdkx7Tg+w1A6FaEzDcD2CNMAXM00TRXL\n7Jh2ojTr8QA4AGEagKvVGi01Wm0loyGrS8EWdXZNm6ZpdSkAsCHCNABXW9vkQWfacdIjUdXqLRVW\nVxsCgB0RpgG4WieIMTPtPKzHA+AEhGkArsaOaediPR4AJwh086JTp07p2LFjWlpaUiqV0okTJ3Tg\nwIELXvO1r31Nb7zxxtqf33jjDT322GO66aab9Oijj+r73/++JiYmJEkf//jH9eCDD/bvnwIANlAs\nNxQJ+RUM0DtwmvHhqAzRmQZgb12F6QcffFCf+9zndPToUf30pz/V17/+dX3ve9+74DUnT55c+/j1\n11/XF77wBd14441rj91666164IEH+lQ2AHSHtXjOFQz4NDIU1hydaQA2tmmrJpvN6tVXX9Utt9wi\nSbrlllv06quvKpfLbfg5P/zhD3XkyBGFQtw9D8BaBdbiOdpEKqp5OtMAbGzTMD09Pa3JyUn5/SvH\n8Pr9fk1MTGh6enrd19frdT355JP67Gc/e8HjTz/9tI4cOaK7775bL730Uh9KB4DLa7dNlaoNJelM\nOxa7pgHYXVdjHlvxwgsvaPfu3Tp06NDaY7fffrvuvfdeBYNBvfjii7rvvvv0zDPPaGRkpOv3HRtL\n9LtUz0ink1aXgMvg+vSHmSsrmYhc8Fi+VJdpSuMjsUue61YyEVEwGOj58zfDe18qFgsrPRqTJF2x\nb0T//OtpxZMRxSKX/kcR3z/2xvWxN65Pf2wapjOZjGZnZ9VqteT3+9VqtTQ3N6dMJrPu63/0ox9d\n0pVOp9NrH99www3KZDJ66623dN1113VdaDZbVLvN4v6tSqeTmp8vWF0GNsD16Z9yralCsXrBY7PZ\nsiQp4NMlz3UjmYioUKyq0bj0vfuF975UuVzTfKslSUqGV/5W9DdvzulgZuiC1/H9Y29cH3vj+qzP\n5zO23MDddMxjbGxMhw4d0lNPPSVJeuqpp3To0CGNjo5e8tqZmRn98pe/1JEjRy54fHZ2du3j1157\nTVNTUzp48OCWCgWArSpU6pLE6YcOtmu1Qz2z+h9GAGA3XY15fOMb39CxY8f07W9/W0NDQzpx4oQk\n6Z577tH999+vq6++WpL04x//WJ/85Cc1PDx8wec/8sgjeuWVV+Tz+RQMBnXy5MkLutUAMAjFckOG\nIcUifZ9oww6ZGInKMKTpHGEagD119Rvmyiuv1BNPPHHJ448//vgFf/7yl7+87ud3wjcA7KRCpaF4\nJCifz7C6FPQo4PcpPRzVDGEagE1xigEA1yqyFs8Vdo3FGPMAYFuEaQCuVaywFs8Ndo3GNLdYVtvk\nJnQA9kOYBuBKjWZb1XqL0w9dYNdoTPVmW7n8YLaHAMB2EKYBuFKx0pAkxjxcYG2jB3PTAGyIMA3A\nlTphmjEP59s1xno8APZFmAbgSsUynWm3GI6HFAn56UwDsCXCNABXKlTqCvgNhYN+q0vBNhmGoV2j\nMc0SpgHYEGEagCsVyw0lYyEZBjum3WDXWIzONABbIkwDcKVipcEmDxfZNRpTNl9TrdGyuhQAuABh\nGoDrmKZJmHaZzkYPRj0A2A1hGoDrVOstNVsmNx+6COvxANgVYRqA67AWz30mCdMAbIowDcB1CqzF\nc51w0K+xoTBhGoDtEKYBuM7a6Yd0pl1l12iMg1sA2A5hGoDrFMp1RcN+Bfz8iHOTXaNxzeTKMk3T\n6lIAYA2/aQC4TmF1xzTcZddYTNV6S8ulutWlAMAawjQA1ymU6xoiTLvO2kYPRj0A2AhhGoCrNJpt\nVWotJePMS7sN6/EA2BFhGoCr5MsrIwB0pt1nZCisUMBHmAZgK4RpAK7SWYuXZC2e6/gMQ5OjMcI0\nAFshTANwlcLqzWncgOhOk6zHA2AzhGkArpJfXYsXDPDjzY12jcY0v1xRo9m2uhQAkESYBuAyhXKD\neWkXy4zGZJrS3FLF6lIAQBJhGoDL5Et1JeOEabfaNcZ6PAD2QpgG4Br1ZkvVeoubD13s/fV4JYsr\nAYAVhGkArlEorWzyYMzDvaLhgIbjITZ6ALANwjQA1yh0dkxzYIurZcZimmbMA4BNEKYBuEZ+dcd0\nIkpn2s32jCc0tVBSu21aXQoAEKYBuEehVFc0HGAtnsvtmYirVm9pbpHuNADr8RsHgGvky3UNcfOh\n6+1NJyRJ707nLa4EAAjTAFykUG6wFs8D9ozHJUmnZwjTAKxHmAbgCpVaU9V6i860B0TDAY0PR/Tu\ndMHqUgCAMA3AHeZXT8RLshbPE/amEzrNmAcAGyBMA3CFTpgeYszDE/ak45qaL6rRbFtdCgCPI0wD\ncIX3O9OMeXjB3nRC7bap6SwnIQKwFmEagCvML1YUCwcU8PNjzQv2plduQpyaJ0wDsBa/dQC4wvxS\nVUlOPvSMydGYAn5D780XrS4FgMcRpgG4wvxSRUPcfOgZAb9PeyeSeo/ONACLEaYBOF652lSx0mBe\n2mMOZIboTAOwHGEagOPNrh4rzSYPb/lAZkiLhZpK1YbVpQDwMMI0AMfrhGl2THvLgcyQJG5CBGAt\nwjQAx5tbZC2eF31g10qYZtQDgJUI0wAcbzZXUSoRYi2ex4ynIoqGA9yECMBS/OYB4Hhzi2WlR6JW\nl4EdZhiG9qTjdKYBWIowDcDxZhcrmkgRpr1obzqhqfmSTNO0uhQAHkWYBuBopWpDxUpD44RpT9qb\njqtSayqXr1ldCgCPIkwDcLTOzYdpwrQn7U0nJHETIgDrEKYBONpsbmUtHmHam/ak45II0wCsQ5gG\n4GjT2bIMgzDtVfFIUCPJMLumAViGMA3A0c5lS5pIRRUM8OPMq/amE3SmAViG3z4AHG06W1ZmLG51\nGbDQ3nRc09mymq221aUA8CDCNADHarbams2VlRmPWV0KLLQ3nVCrbWpmdX4eAHYSYRqAY80vVdRq\nm9pNZ9rTuAkRgJUI0wAc69zCSidy9zhh2ssyY3H5fYbem+MmRAA7jzANwLGmsyvhadcoYx5eFgz4\ntHs8rndn8laXAsCDCNMAHOtctqTRobCi4YDVpcBiBzNJnZ4pcKw4gB1HmAbgWNMLbPLAigOZIZWq\nTc0tVawuBYDHEKYBOFLbNDWdK3HzISRJB3cNSZJOTTPqAWBnEaYBOFJuuap6o81aPEha2egRDPh0\nerpgdSkAPIYwDcCRzmVXN3nQmYakgN+n/ZMJOtMAdhxhGoAjdTZ5sBYPHQd3Dend2YJabU5CBLBz\nCNMAHOncQknJWFCJaNDqUmATBzNDqjfaml7gJEQAO4cwDcCRprNs8sCFDmSSkrgJEcDOIkwDcBzT\nNDWdLTHigQtMjsYUDft1aoabEAHsHMI0AMfJl+oqVZvKjLHJA+/zGYYO7BqiMw1gRxGmATjO2iYP\nOtO4yIFMUu/NFdVochMigJ1BmAbgOGubPJiZxkUO7hpSq23q7FzR6lIAeARhGoDjnFsoKRLyK5UI\nWV0KbOZghpMQAewswjQAx5nOlrV7PC7DMKwuBTYzOhTWUCyo04RpADukqzB96tQp3XbbbfrMZz6j\n2267TadPn77kNY8++qg+8YlP6OjRozp69KgeeuihtecqlYq+8pWv6Oabb9bhw4f185//vG//AAC8\n51y2xM2HWJdhGDqQGWKjB4AdE+jmRQ8++KA+97nP6ejRo/rpT3+qr3/96/re9753yetuvfVWPfDA\nA5c8/t3vfleJRELPP/+8Tp8+rTvuuEPPPfec4nHmHQFsTbna0HKxzrw0NnQwM6SX386qUmsqGu7q\n1xwA9GzTznQ2m9Wrr76qW265RZJ0yy236NVXX1Uul+v6i/zsZz/TbbfdJkk6cOCAPvrRj+oXv/hF\njyUD8LLOJo8MmzywgYOZpExJZ2bpTgMYvE3D9PT0tCYnJ+X3+yVJfr9fExMTmp6evuS1Tz/9tI4c\nOaK7775bL7300trj586d0549e9b+nMlkNDMz04/6AXjM9EJnkwdjHljfgbWbEAnTAAavb3//dfvt\nt+vee+9VMBjUiy++qPvuu0/PPPOMRkZG+vL+Y2OJvryPF6XTSatLwGVwfbZmsdxQMODTh//LhPy+\n929ANHNlJRORvn+9ZCKiYDAwkPeWxHuvIxYLKz3a3X8srff9k5Y0MRLV9GKF7y+L8e/f3rg+/bFp\nmM5kMpqdnVWr1ZLf71er1dLc3JwymcwFr0un02sf33DDDcpkMnrrrbd03XXXaffu3ZqamtLo6Kik\nlW739ddfv6VCs9mi2m1zS5+DlW+U+Xm6M3bF9dm6d95b0q7RmHLZC/cIl2tNFYrVvn6tZCKiQrGq\nRqP/793Be1+qXK5pvtXa9HWX+/7ZP5HQ66ezfH9ZiJ9v9sb1WZ/PZ2y5gbvpmMfY2JgOHTqkp556\nSpL01FNP6dChQ2vBuGN2dnbt49dee01TU1M6ePCgJOnw4cP6wQ9+IEk6ffq0Xn75Zd14441bKhQA\npJUd02zywGYOZoY0v1RVsdKwuhQALtfVmMc3vvENHTt2TN/+9rc1NDSkEydOSJLuuece3X///br6\n6qv1yCOP6JVXXpHP51MwGNTJkyfXutVf/OIXdezYMd18883y+Xz65je/qUSCsQ0AW1NrtJRdrur/\nvCaz+YvhaQfOO7zl6ivGLK4GgJt1FaavvPJKPfHEE5c8/vjjj6993AnY64nFYvrWt77VQ3kA8L7p\nbEmmOEYcmzuYScpnGHrz7BJhGsBAcQIiAMc4O7syJ71vkr/ZwuVFQgEdyCT15tklq0sB4HKEaQCO\ncWauqHDIr3QqanUpcICr9qV0ajqvemPzmxkBoFeEaQCOcXa2oH3phHyGsfmL4Xkf2pdSs2Xq7XN5\nq0sB4GKEaQCOYJqmzs4XGfFA1z64NyXDkN44s2h1KQBcjDANwBEWlquq1FraP0GYRndikYD2TzA3\nDWCwCNMAHOHM6s2H+yc5sQvdu2p/Sm+fy6vRbFtdCgCXIkwDcISzcwUZhrRnnLV46N5V+1JqNNs6\nNc3cNIDBIEwDcIQzs0XtGo0pFPRbXQoc5IP7UjLE3DSAwSFMA3CEs3MFRjywZYloUHvSCb3B3DSA\nAenqBEQAsFKp2lA2X9OnuPnQEwyfoVKtuenrzFxZ5S5ed+WeIf2P38woX64rFgkpQBsJQB8RpgHY\nHicfekut0dKv3pzf9HXJRESFYrWr96w323r2387o//5vBxQI86sPQP/w3+cAbO/M3GqYnmDMA1s3\nObpyYubMYtniSgC4EWEagO2dnS1oOB7ScDxkdSlwoEgooOF4SLO5itWlAHAhwjQA2zszx8mH2J7J\n0ajmFytqtU2rSwHgMoRpALbWbLV1bqGk/Yx4YBsmR2NqtNqaWh0ZAoB+IUwDsLVzCyW12qb205nG\nNkyOxCRJb00tW1wJALchTAOwtbNrNx8SptG7WCSgZCyo377HvmkA/UWYBmBrZ2aLCgV9a51FoFeT\nozG9PZVXm7lpAH1EmAZga2fnCtqbTsjnM6wuBQ63azSmSq2pd2cLVpcCwEUI0wBsyzRNnZktaj8j\nHuiD3eMxGZJefjtrdSkAXIQwDcC2svmqyrWm9k2yyQPbFwkF9IFdSf36HcI0gP4hTAOwrc7Nh3Sm\n0S8fOTiqU+fyypfrVpcCwCUI0wBs6+xsUYakvWnCNPrjdw6OypT0G7rTAPqEMA3Ats7MFTUxGlM4\n5Le6FLjE3omEhuIh/Zq5aQB9QpgGYFtnZguMeKCvfIaha64Y02/eyanVbltdDgAXIEwDsKVCua6F\n5ao+sIubD9Ff11w5pnKtqben8laXAsAFCNMAbOmdcytB58rdQxZXArf5nQOj8vsMvczcNIA+IEwD\nsKW3zy3LZxg6sIswjf6KRQL64N5h/eq3hGkA20eYBmBLb0/ltW8iwc2HGIirrxzTe/NF5fJVq0sB\n4HCEaQC2026bemc6ryv30JXGYFxzxZgkMeoBYNsI0wBsZ2qhpFq9pSt3D1tdClxq93hcY0MRVuQB\n2DbCNADbefvcsiTRmcbAGIaha64c06unF9VosiIPQO8I0wBs5+2pZSWiQaVTUatLgYtdfeWYao2W\n3nxvyepSADgYYRqA7bxzLq//smdYhmFYXQpc7NAHRhTw+/RrtnoA2AbCNABbKVYams6WdQX7pTFg\n4aBfH/5ASr96e0GmaVpdDgCHIkwDsJW1w1r2cPMhBu/jH0xrbrGis3NFq0sB4FCEaQC28s65ZRmG\ndDDDMeIYvP/9qrR8hqF/f23O6lIAOFTA6gIA4HxvTy1rbzqhSIgfT+g/w2eoVGuu/dnn9+mq/Sn9\n26uzOvx/7N/WnH44GFCAFhXgOfy2AmAbbXPlsJbrf2eX1aXApWqNln715vwFj40kw3rt3UX90/98\nV+Pb2CDzu4cmFQjzaxXwGv4bGoBtTC+UVKm1/v/27jy8rfLOF/j3HC2WZcm2bGvzviR2nHjJSlYT\nkgJJh9CkTANtgU6fXsIDDM2dtIWklxamQO9t2nk6bZkULrRl67SluYUUkkBCCBAnIZB98ZbE+yLL\ni+RV1n7uHw4G1wlxHIUHxxMAACAASURBVNtHsr6ff2xZR6+/1vHR+enV+74HOZx8SJMo3ayDKAio\na+2VOwoRhSEW00QUMqo5+ZBkoFYpkGyMQZ2tl6t6ENE1YzFNRCGjurkbMRolzAZerIUmV6ZFD5fH\njzbngNxRiCjMsJgmopBR3dKDHF6shWSQZtJBIXKoBxFdOxbTRBQSXG4fWjr6OV6aZKFSikg16VDf\n2otgkEM9iGj0WEwTUUiosQ2Ol87meGmSSaZFD7c3ALvTJXcUIgojLKaJKCTUNPdAAJBtZc80ySPF\nGAOlQkCtjUM9iGj0WEwTUUi42NyNZGMMorlOL8lEqRCRZtKhwd6LAId6ENEosZgmItn5A0Gcb+pC\nXlq83FEowmVZY+H1BWHr7Jc7ChGFCRbTRCS76uZueH1BzMxMkDsKRThrUgzUShG1l9Y8JyK6GhbT\nRCS78jonBAGYkc6eaZKXQhSQaY1Fg70PHm9A7jhEFAZYTBOR7MrrHciyxkKrUckdhQi5aXEIBCVU\nt3TLHYWIwgCLaSKSlcvtR21LL2ZmGuSOQgQASIjVwBivwfnGbl5enIiuisU0EcmqqtGJoCRhZgbH\nS1PoyE2LR0+/F3YHLy9ORF+MxTQRyaqizgm1UkQOL9ZCISTDoodaJaKqsUvuKEQU4lhME5Gsyuud\nyE2Lh0rJlyMKHUqFiJzkODTYezHg8csdh4hCGM9eRCQbZ68HLR39XBKPQlJuWjwkCbjYxImIRHRl\nLKaJSDYV9Q4A4ORDCklxOjUsCVpcaOpGkBMRiegKWEwTkWzK65zQRauQatLJHYXosnLT4tA34IOt\ng1dEJKLLYzFNRLKQJAnldQ7MzDRAFAS54xBdVppZD41agapGDvUgostjMU1EsrB1utDV5+V4aQpp\nClHAtNQ4NLf1oX/AJ3ccIgpBLKaJSBbldZfGS2dwvDSFtumpcZAAVDVwmTwiGonFNBHJorzOCVN8\nNJLio+WOQvSF9Fo1Mix6VDV0weMNyB2HiEIMi2kimnSBYBCVDU6u4kFhoygnEb5AEBX1TrmjEFGI\nYTFNRJOu1tYLtzfA8dIUNgz6KKSbdaiod8LrY+80EX2GxTQRTbqyWgcEADM4XprCSFFOInz+ICrZ\nO01En8Nimogm3cnz7chJiYMuWiV3FKJRS4jVINWkQ3m9E14/e6eJaJBS7gBEFDn8QaC5vRcNbX34\n6o3Z6Pf4x63tIC9QR5OgKCcRuz/qQ1V9FwpzEofdJ4jCuP5Pf16USgklu7+IQhKLaSKaNB6fH28d\nrgMACACOVtjHre3iXOO4tUV0JUlxGqQYY1Be58SMDANUn6twPb4ATp9vn5DfuyDfDGUUT9lEoYjv\nc4loUjW09iIxNgo6LYd4UHgqykmExxdAVSPXnSaiUfZM19bWYsuWLejq6kJ8fDy2bt2KzMzMYdts\n27YNu3fvhiiKUKlU2LRpE0pKSgAAW7ZsweHDh2EwDE42Wr16NR588MHx/UuIKOQ5ez3o6HZjzvQk\nuaMQjZkxPhrWRC3Kax2YkR4PpYL9UkSRbFTF9BNPPIFvfvObWLt2Lf7+97/j8ccfxyuvvDJsm6Ki\nInznO99BdHQ0Kisrcc899+DgwYPQaDQAgPvvvx/33HPP+P8FRBQ2Tl/sAABkWPQyJyG6PsXTEvHO\nx404V+PAbL45JIpoV3073dnZifLycqxZswYAsGbNGpSXl8PhcAzbrqSkBNHRg1cyy8vLgyRJ6Ori\nR2BE9JnTFzoQr1MjNkYtdxSi62IyaJFp0eNcrQO9Lq/ccYhIRlftmbbZbDCbzVAoFAAAhUIBk8kE\nm82GhITLX3Bhx44dSE9Ph8ViGfrZiy++iNdeew1paWn4/ve/j5ycnGsKmpiou6bt6TNGI3sBQ1mk\n7B9nrxvVzd2Yn2+GXqcZ9/ZVKuWEtKvXaSasbWDickdK29eaYTxz3zQvDf+9pxLHz3dgzdKsCX1O\ntNooGBO0E9L2RIqU17dwxf0zPsZ9avAnn3yCX//61/jDH/4w9LNNmzbBaDRCFEXs2LED9913H/bt\n2zdUoI9GZ2cfglz76poZjXq0t/fKHYOuIJL2zwcnmyEBsCREo7fPPe7t+3z+cW9Xr9Ogt889IW1/\nim2Pve1P989EtD1axdMScayyHeU1HchJiZ2w58Tl8qA9EF5rW0fS61s44v65PFEUrrkD96rDPKxW\nK+x2OwKXDuJAIIC2tjZYrdYR2548eRKPPPIItm3bhuzs7KGfm81miOLgr1q3bh1cLhdaW1uvKSgR\nhbfjVW0wxmsQr+MQD5o6ZqQbEK9T45OKNnh4mXGiiHTVYjoxMRH5+fnYuXMnAGDnzp3Iz88fMcTj\nzJkz2LRpE37zm99g1qxZw+6z2z9bS7a0tBSiKMJsNo9HfiIKA30DPlQ2dKF4mhGCIMgdh2jciKKA\nhbPMcLn92PdJo9xxiEgGoxrm8e///u/YsmULfvvb3yI2NhZbt24FAGzYsAEbN25EYWEhfvKTn8Dt\nduPxxx8fetzPf/5z5OXlYfPmzejs7IQgCNDpdHj22WehVHLxeaJIcepCBwJBCbOnJ6HN6ZI7DtG4\nMhu0yEmJxQcnm7FmSQbidVFyRyKiSTSqijYnJwfbt28f8fMXXnhh6Pu//e1vV3z8Sy+9dO3JiGjK\nOF7VhsTYKKSbdSymaUqam2tES0c/Pilvwy0LUvkJDFEE4UrzRDShBjx+lNU5MDfXxAKDpqzoKCVu\nW5KJVocLlQ1cFpYokrCYJqIJdepCB/wBCfPyjHJHIZpQiwosSDXG4HhlOzp7JmZVDyIKPSymiWhC\nfXiqGSZDNKalxskdhWhCCYKAJYUWRKkVKD3VAp8/KHckIpoELKaJaMI0d/TjfFM3lhcnQ+QQD4oA\nGrUSJUVW9Lh8+KTCfvUHEFHYYzFNRBPmw1PNUIgClhaOXJeeaKqyJGpRlJOI6uYe1LT0yB2HiCYY\ni2kimhBeXwAfnWvFvDwjYmN4oRaKLEU5iTAZonGkrBU9/V654xDRBGIxTUQT4lhVG/rdfiyfnSJ3\nFKJJJ4oCSoqsEEUBB063wB/g+GmiqYrFNBFNiA9OtcBsiMaM9Hi5oxDJIiZahaWFVjh6PDhSZock\nSXJHIqIJwGKaiMZdc3sfLjZ1Y/nsFK4tTREtzaRD8bRE1LT0cP1poimKxTQRjbsPT7VAqRCwtNAi\ndxQi2RXlJCLVpMOxyja0OngFUKKphsU0EY0rry+Aw+daMS/PBL2WEw+JBEHAsiIL9Fo1DpxqQd+A\nT+5IRDSOWEwT0bg6WtkGl8ePm2Ynyx2FKGSolQqsmJOMQEDChyebOSGRaAphMU1E4+qDU82wJGiR\nm8aJh0SfF6eLwrJiKzp7PPiYExKJpgwW00Q0bmptPahu7sHy2cmceEh0GWkm3eAFXTghkWjKYDFN\nRONmR2ktYjRK3FjMIR5EV1I8LRGpxhhOSCSaIlhME9G4uNjcjbM1nfjyogxERynljkMUsgYnJFqH\nJiT2c0IiUVhjMU1E4+KNAzXQa1X40txUuaMQhTy16rMJiR+c5BUSicIZi2kium5VDU5U1DvxT4sy\nEKVWyB2HKCzE6aKwtMiCzh43JyQShTEW00R0XSRJwhultYjTqbFiTorccYjCSrpZPzQh8Xxjt9xx\niGgMWEwT0XUpr3fifGMX1izOhFrFXmmia1U8LRHJSVocq2xDV59H7jhEdI1YTBPRmEmShB2lNTDo\no3BjsVXuOERhSRAELCmwQqkQUXrahkCQ46eJwgmLaSIas7M1DlQ39+D2JZlQKdkrTTRWWo0SSwot\ncPZ6cPJ8h9xxiOgasJgmojEJBiW8caAGSXEaLCtirzTR9Uoz6ZCbFofyOidaOvrljkNEo8RimojG\nZM/RBtTbe3HH8mwoFXwpIRoP82eYEBujxqGzrXB7A3LHIaJR4BmQiK5Zc0c/3jhQi7m5RizMN8sd\nh2jKUCpElBRZ4fH6caSslcvlEYUBFtNEdE0CwSD+sKscGrUC967KgyAIckcimlIS4zSYnWtEg70P\ntbYeueMQ0VWwmCaia/L2kQbU2npx76o8xMWo5Y5DNCXNzDQgKU6DoxXtcHv9cschoi/AYpqIRq2p\nrQ9/P1iLBTNMWDDDJHccoilLFAQsLrDA5w/gWGW73HGI6AuwmCaiUfEHgvjdrnLEaJS459ZcueMQ\nTXkGfRRmZSeipqUHFfUOueMQ0RWwmCaiUXnzUC0a7H24d9UM6LUc3kE0GYqyExAbo8Zf9l2Ah6t7\nEIUkFtNEdFUfnmrGzsP1WFpowbw8o9xxiCKGQiFi8SwzHD0e7DhYI3ccIroMFtNE9IWOVbbhlT1V\nKMxOxL+sniF3HKKIY07QYmmhBXuPNqKulat7EIUaFtNEdEVldQ48/1YZcpLj8NBXC3hxFiKZfGVZ\nNmJj1Hjp7UoEgkG54xDR5/DMSESXVdPSg//621lYErT4n+uLEKVSyB2JKGJpNUrcfXMuGux92H+8\nWe44RPQ5LKaJaISmtj78avtp6LUqfO+u2YjRqOSORBTx5uUZUZidiDdKa+Ds9cgdh4guYTFNRMMc\nKWvF068eg0Ih4Adfn414XZTckYgIgCAIuPvWXASCEl7bf0HuOER0CYtpIgIA+PxBvLqnCs+/VY5M\nsx6P/8sCmAxauWMR0eeY4qNx2+IMfFLRhrJarj1NFApYTBMROroG8H/+eBzvn2zG6oXpeOSbc2DQ\ns0eaKBR9eWEGzIZovLq3Cj4/154mkhuLaaIIFggG8cGpZvzkpaOwOwfw8B2FuHPFNChEvjQQhSqV\nUsQ9t+ahzTmAtz9ukDsOUcRTyh2AiCafJEk4U92Jv75/EbZOF6anxuF/3JbPYR1EYWJWVgJuyDdh\n5+F6LJpp5rFLJCMW00QRpr61F399/yIq6p0wG6Lx8B2FmDM9CYIgyB2NiK7BXSun40x1J/747nls\nWl/MY5hIJiymiSJAUJJwrqYT7x5tRFmdE7poFe6+JRfLZyfzQixEYcqgj8JXS7Lx5/cu4HhVO+bP\nMMkdiSgisZgmmsI83gAOn7Ph3WNNaHW4EK9T444bs7Fybiq0Gh7+ROFu5bwUHDprw5/fu4BZWQmI\njuJxTTTZeNQRTUGOHjfeO96ED0+1wOXxI8uqx/1fmYn5eSb2RBNNIQpRxL2r8vDTV4/jzUO1uGvl\ndLkjEUUcFtMRxB8EPD7/hLQdpVJCyRptmNE835LDBZfn2veJSqmEzz/ycbW2HnxwohmnLrRDAjB7\nWhJumpuCLGssBEGAxx+Exx8cU9vjIShNSLNEES0nJQ43Fifj3aNNWFpgRapJJ3eksDeR58uJfI3l\nuVgeLKYjiMfnx9EK+4S0vSDfDCU/XhxmNM+3XqdBb5/7mtsuzjXi9Pl2AIMrc7R0uHCmuhPtXQNQ\nKUXMyDBgRoYBumgVOrvd6Owe/e/4fNvjrTjXOCHtEkW6r92UgxPn2/HK3ipsuXsuRE5GvC4Teb6c\nyNdYnovlwWecKExJkoSm9n6cqe5EZ7cbWo0SC/JNmJYSBxW7Jogiii5ahfUrcvDi7kocOmtDSVGy\n3JGIIgaLaaIwVFHnwM7D9XD2eqCLVmHxLDOyU+KgENkbRRSplhZaUXrGhu3vV2POdCN00Sq5IxFF\nBHZfEYWRnn4v3jvehN+9WQ5/IIilhRasK8nC9LR4FtJEEU4UBHzr1jy43H78df9FueMQRQz2TBOF\nAa8/gDMXO1FZ74RCFHH7skzEatUsoIlomFSTDl9elI5dH9VjzvQkzOE8BaIJx55pohBX39qLHQdq\nUV7nRHZyHNbdmIWb5qaykCaiy1q7LAvpZh1eeqcSPf1eueMQTXkspolClNvrx4FTLfjwVAtiNEr8\n0+IMLCm08KIMRPSFlAoRG9bMxIAngJferoQkcU1KoonEYpooBDXYe/HmwTo02HsxZ3oSvrwoA0lx\nGrljEVGYSDHq8LXl2Th1sQOlZ2xyxyGa0tjFRRRCvL4APi63o9bWi4TYKNyyIA0GfZTcsYgoDN28\nIA2nLnbgz+9dwIwMA0zx0XJHIpqS2DNNFCLanC68dagOda29KJ6WiH9alMFCmojGTBQE/I/bZkIU\ngN/vLEeQlyAlmhAspolkFgxKOH2xA3s+boQgCPjywnQUT0uCyAmGRHSdEuM0uPuWXFxo6saOg7Vy\nxwk7kiTBHwhiwONHT78X3X1eeHwBWcehB4JBeLwBDHj86Bvwoaffi55+L/yBoGyZIh2HeUxhLrcf\nrZ39aGrtxYDHD0efBw32XsTFREGvVbFYCwG9Li/2Hm1Em3MA2cmxuGGmCWqlQu5YRDSFLJ5lQWVD\nF3YeroM1QYvFBRa5I4Ukjy+AmuZuXGjqRkWDE3W2Xnj9AVyubhYFAZooBaLVCsREqxCrVSM2Ro04\n3eDXKNX1vY77A0H0ugYL5V6Xd/D7S19dbv8VH/f2kQZYErQwJ2iRkhSD2dOSkMj5NhOOxfQU4vMH\ncL6pG+dqOnGuxoHmjv4rbiuKAuJi1DDoo2CMj0amRY8oNYu4yVTf2osjZXYEgxKWFVmQnRwndyQi\nmoIEQcC3VuWho2sAL75dgcQ4DXLT4uWOFRIGPH4cKbfj8Dkb6my9CAQlCACSjTFIN+sQpVZApRSh\nVopQXerocHv9GPAE4Pb64fYE0N3nRVNbHz4/ikatEqGLViFGoxr8Gq2EUiFCIQoIAmiy9wIYLODd\nngDc3sH2XG7/YMHsGV4wa9QK6LUqWBK00GtVUCsVEEUBoihAIQKSBPQP+KBSKtDZ48bRCjs+cPvx\n3++eR5ZVj/l5JszLM8Jk0E7SMxtZWEyHuaAkoazWgfdPNKO83gGvLwilQkBuWjwWzTIj1RKHgM8P\nbZQSEAWcq+lAd58XXX0edPV60epwoaalB0cr7Eg16ZCdHIsUo45rGE8gnz+Io5VtuNjUDZNBi6WF\nZui1arljEdEUplSIeOirhfjpq8fxX6+fxY++NS9iCytJklDX2osPTzXj4/I2eHwBpBp1WL0wHdNT\n4zEtJRaSIOBohX3UbQaDEvoGfOi+NOSib8A3OATD5YWtsx/+wGeV9qGzrSMer1QI0KiViI5SwJqo\nhT5GDb12sMdbr1VBPcqe7gX5ZsRcWj7V7nDhWFUbjle1Y/sH1dj+QTWyrLFYvTAd83gxn3HFYjpM\neXwBHD7Xin3HGmHrdCFOp0ZJYTIKshMwI90w1MtsNOrR3j74DrjfMzjsIylu+IxuR48bNS09qGnp\nQYO9D2qViOmp8ZiZaeCaxuPM0eNG6Wkbuvu9KMhOwLLZqXC5PHLHIqIIoItW4d++VoSnXzmGX20/\ng8e+NQ8xGpXcsSaNJEk4Xd2JNw/Woq61F2qViIX5ZiyfnYIsqx6C8FknUr/nykMpLkcUBcTGDA7x\nuNzv9fqC8AeDCAYlTEszoLy2E5CAKLUCGrUCSsX4T2EzJ2hx2+JM3LY4Ex1dAzhW1Y4PTzXj2R3n\nYIqPxj9/aTqKMw2jLtTpylgphZm+AR/2Hm3A+yea0e/2I8Oix4bbZ2LBDNOYD8aEWA0SYjWYm2uE\nrdOFi83dKK91oLLeielpcSjISoA2gl5wJ0JQklBR58TJ8x2IUitwy4JUWBNj+AkAEU0qc4IWD99R\niP/4yyn89o1z+Lf1xVApp/ZaBJIkobzOiTdKa1DT0gNjvAb33JqLRTMt0GomvgwSBAFRagWiMFi0\nWhK1sHdeeRjmREiKj8bqhem4dUEaTl5ox+4jDXj2b2eg16pw64I0fGleKjRqloRjxWcuTLjcPuz5\npBHvHmuExxvA3FwjblmQhumpccPeTV8PURSQYoxBijEGPf1enK3pRFVDF843dGNaaiwKshOhi2ZR\nfa26+jw4fLYVHd1upJl0WFxg5osWEckmL92Ab395Bn6/qwL/+ddTePiOwinbYVLV4MQbpbU439iF\nhNgo/MvqPCwttE5IT3A4EEUB8/JMmJtrRFuvF396pxJ/+7AGe4824rZFGVgxN2VobDiNHs/oIW7A\n48e7xxqx55NGDHj8mJ9nxNplWUgx6ib098bGqLG00IqinESU1TpwsakbF5t6kJsWh8KcRA7/GIVA\nIIiz1Z04fbETKqWIkiIrMv/ho0QiIjksLbRCFAX8YVcF/vcfT2DT+uIptepDdUs3dhyoQVmdE3Ex\natx9Sy5uLE6e8r3woyUIAgpykrDpzmJUN3fjjdIa/GX/Rew52og1SzJRUhS5bzjGghVRiHJ7/Xjv\neBPe+bgB/W4/5kxPwtplWUg36yc1h16rxqJZFhRkJ+JMdSeqGrtwsbkbeekGFGQlcAWQK6hp6cFL\n71Siqa0PGRY9bsg38Q0IEYWUxbMsiNdF4b9eP4unXz2Gf/taMTIsk3uOGW/1rb3YUVqD09Wd0EWr\ncOeKaVgxN+W6l6qbynJS4vCDr89BZb0Tr5fW4NU9VXj7SD2+sjQLiwvMUIgsqq+GZ/cQ4/EFsP9E\nE94+0oC+AR+KchKxriQLmZZYWXPpolVYUmBBQVYCTl/sQFmtA+cbu5CXHo/8DIOs2UJJW9cAXv+w\nGp9UtEGvVWH57OSwPzkR0dSVn2HA/7pnLv5z+2n87E8n8ODaAhTlJMod65rVtPRg10d1OHmhAzEa\nJf55eTbHAV+jGRkG/DB9Ls7VOvD6gRr8YXcFdh2px7plWViQb4LIT1WviP9lIaJvwIcPTzXj3WNN\n6On3oiArAWtLspATYmsPx8aoUVKcjIJsD85UD65nXVHnRJtzALcvyURC7NT5mPBa9A348NahOuw/\n0QSFKOD2JZm4cU4yzlZ3yh2NiOgLpRh1eOze+fj19tP49fbTKCm24o7lOYgN8SU7JUlCZUMXdn1U\nh/I6J2I0SqxdloVb5qdNysTCqUgQBBRmJ6IgKwEnL3TgjdIa/N83y7DzozqsviEdN+SbOVTmMvjf\nJrOm9j7sO9aIj8rs8PmDmJlpwFfWFYT8gvoGfRSWz05Gd58X52o7UXrGhoNnbFg0c3CZoZyU2IgY\nG9zWNYAPTjTjw9MtcHv9WFZoxbqSbBj0Ude8tBIRkVwM+ihsuWcu3jxYh3ePNeJ4VTu+emM2bpqd\nEnJXy/X6Ajh+vh37jzehuqUHcTFq3LliGpbPTuZwunEiCALm5hoxe3oSjla04a3Ddfj9rgpsf/8i\nbpqTghVzUhCni5I7Zsjgf50M+t0+nDzfgY/KWlFR74RKKWLxLAtunp+K1AmeWDje4nSDExW/tXoG\nDpxswcGzNhw61wprohYlRclYUmC57Lqb4SwoSSivdeC94004U9156UUnCV9ZlhV2+4+I6FMatRJ3\nrpyGpUVW/PfeKvxx73kcON2CO1dMw4wMg6wf80uShHp7L0pP23Ck3I4Bjx/GeA3uvTUXy4qsXIFi\ngoiCgIUzzbgh34TyOifePdaINw/VYddH9ViQb8LCfDNmZSVE/GRFFtOT5NMC+lhVG8pqHQgEJSTG\navDPy7OxfHZK2C85lxCrwd235uKO5dk4WtmG0jMt+Ov7F/G3D6sxKyth8GOj7ASYw/SKW/5AEBca\nu3C6uhMnzrejo9uNWK0Ka5Zk4qY5KTDo+Q6diKaGlKQYPPKNOTha2YbX9l/Ef/zlFJLiNFhSYMGS\nAsukXTnRHwiiurkb52odOH2xA03t/VApRczPM6KkKBm56fEcxztJBEHArKwEzMpKgN3hwr5jTfio\nrBVHyuzQRikxZ3oS5s8wRWxhPapiura2Flu2bEFXVxfi4+OxdetWZGZmDtsmEAjg6aefRmlpKQRB\nwP3334/169df9b6pqrPbjQvNXZeWlOtGY3sfJAlIjNXglvlpmD/DNOKKS1NBdJQSNxYn48biZDR3\n9OPgmRacvNCBM5fGDpsM0SjMSsS01Dikm3UwG7Qh9xEiMPgxYnNHP+rtvSivc6KsthMDngCUCgEz\n0g346o3ZmJ9n4tgxIpqSBEHADflmFE9Lwomqdhw6Z8Nbh+rw5qE6TE+NQ/G0JGRa9Mi06Mdtjeoe\nlxfNbX1obO9HZb0TFQ1OeLwBKEQBOcmxuHdVHhbmm6bsmtjhwpygxd235uKuL01DeZ0DRyvacOJC\nBw6da4VaKSLLGotpqXGYnhqHnJS4iLjK5qiK6SeeeALf/OY3sXbtWvz973/H448/jldeeWXYNm+9\n9RYaGhqwd+9edHV1Yd26dVi8eDFSU1O/8L5wJEkS3N4Ael1e9A744OzxwO50we4YuPTVhR6XDwAQ\npVIgOzkWty/JRFFO0pQsoK8kJSkGd62cjrtWTofd6cK5GgfO1nSi9EwL3jvRBABQq0SkGnVIM+mQ\nFDd4JcYEfRQMsRoYdFETWqx6fAE4ez1w9Ljh6PHA0euG3eFCg70Ptk4XgpIEYHAoy4IZJhTnJCE/\n08DZ4UQUMaJUCiwusGBxgQWOHjc+KmvFR2V2/L8Pqoe2MRuikWHRIzFWA71WjdgYFfRaNVIH/Ojq\ncgEYHB4HCfD5g+gd8KGn34uefi96XV60dQ2gqb0fPf3eoTaT4jRYPGtwBan8DAPHQocgpUJEUU4S\ninKS8C1/EOV1DpTVDV6X4u0jDdh16RyaFKeB2RANU4IWZoMWZkM04nVR0GtV0EWrpsTlzK/639nZ\n2Yny8nK8+OKLAIA1a9bgqaeegsPhQEJCwtB2u3fvxvr16yGKIhISEnDzzTfjnXfewX333feF943W\nZPdeur0BvFFag54+D3wBCT5/EL5AEF5fAP1uHwIBacRj9Fo1kuI0mJ4Wj5SkGGRaYmFNjIYo8xqN\nnz53SoU4Ye/olQrxC/eRNTEG1sQY3LIgDYFgEHbHAGyd/WjucMHW0Yf61l5U1DtHPE6lFKFRKxEd\npYBGrUSUSoRCIUIpilAoBCgVwmXfnASDEvwBCYFgEIGgBL8/CLc3ALfXD7cvCLfXD78/OOJxcboo\n5KTGoWR2MlKTYpCcFAODPmpMb4BG83xHRykR8F/7Ppnofcm2B326f8Itd6S0PZbjJxRyj7VtuT/F\nS4qPxu1Ls3D7LRZkvwAACjZJREFU0iy43H40tfehub0PTW39aOroQ2N732XPjVciANBGqxCvi8Ky\nIgssCTGwJmhhSdRCF62SteMpUv5Pxut/KkqtwJxcI+bkGgEAXl8QDW29qG/thd05gI6uAVQ396Cs\n1jHisWqVAjFRSqhUIpQKESqlAiqFgHSLHqtvSB+XfNdiLM/JVYtpm80Gs9kMhWLwnYNCoYDJZILN\nZhtWTNtsNiQnJw/dtlqtaG1tvep9o2UwxFzT9uPh4TvnTPrvnAiJiZ9Niku1hsZSeyZjLArlDjEJ\nJvL5zk6duPW92TbbZtuh13YoSQSQlhLaq05dr3B9/b4Wn68PxpvVEouFRRPWfEjhgE8iIiIiojG6\najFttVpht9sRCAQADE4mbGtrg9VqHbFdS0vL0G2bzQaLxXLV+4iIiIiIwtVVi+nExETk5+dj586d\nAICdO3ciPz9/2BAPAFi9ejW2b9+OYDAIh8OBffv2YdWqVVe9j4iIiIgoXAmSJF11tkB1dTW2bNmC\nnp4exMbGYuvWrcjOzsaGDRuwceNGFBYWIhAI4Mknn8ShQ4cAABs2bMBdd90FAF94HxERERFRuBpV\nMU1ERERERCNxAiIRERER0RixmCYiIiIiGiMW00REREREY8RimoiIiIhojHix+ymgtrYWW7ZsQVdX\nF+Lj47F161ZkZmYO2+aZZ57Bn/70J5hMJgDA3Llz8cQTT8iQNrJs3boVe/bsQXNzM9566y3k5uaO\n2CYQCODpp59GaWkpBEHA/fffj/Xr18uQNvKMZv/w2JGP0+nEo48+ioaGBqjVamRkZODJJ58csTTr\nwMAAfvjDH6KsrAwKhQKbN2/GihUrZEodOUa7f7Zs2YLDhw/DYBi86t/q1avx4IMPyhE5Ij300ENo\namqCKIrQarX48Y9/jPz8/GHb8Dx0nSQKe/fee6+0Y8cOSZIkaceOHdK99947Ypvf/OY30s9+9rPJ\njhbxjh49KrW0tEgrVqyQqqqqLrvNG2+8IX3nO9+RAoGA1NnZKZWUlEiNjY2TnDQyjWb/8NiRj9Pp\nlI4cOTJ0+2c/+5n0wx/+cMR2zzzzjPTYY49JkiRJtbW10pIlS6S+vr5JyxmpRrt/Nm/eLL366quT\nGY0+p6enZ+j7d999V1q3bt2IbXgeuj4c5hHmOjs7UV5ejjVr1gAA1qxZg/LycjgcDpmTEQDMnz9/\nxNVC/9Hu3buxfv16iKKIhIQE3HzzzXjnnXcmKWFkG83+IfnEx8dj4cKFQ7dnz5497Gq6n3r77beH\nrl2QmZmJgoICHDhwYNJyRqrR7h+Sl16vH/q+r68PgiCM2IbnoevDYR5hzmazwWw2Q6FQAAAUCgVM\nJhNsNtuIj9p27dqFgwcPwmg04rvf/S7mzJkjR2T6BzabDcnJyUO3rVYrWltbZUxE/4jHjvyCwSD+\n/Oc/Y+XKlSPua2lpQUpKytBtHkOT74v2DwC8+OKLeO2115CWlobvf//7yMnJmeSEke2xxx7DoUOH\nIEkSfve73424n+eh68NiOkJ8/etfxwMPPACVSoVDhw7hoYcewu7du4fGsBHR5fHYCQ1PPfUUtFot\n7rnnHrmj0GV80f7ZtGkTjEYjRFHEjh07cN9992Hfvn1DnUA08X76058CAHbs2IGf//zneOGFF2RO\nNLVwmEeYs1qtsNvtCAQCAAYnEbS1tY346NpoNEKlUgEAli5dCqvVigsXLkx6XhrJarUO+2jUZrPB\nYrHImIg+j8eO/LZu3Yr6+nr86le/giiOPG0lJyejubl56DaPocl1tf1jNpuHfr5u3Tq4XC72espk\n3bp1+Pjjj+F0Oof9nOeh68NiOswlJiYiPz8fO3fuBADs3LkT+fn5I4Z42O32oe8rKirQ3NyMrKys\nSc1Kl7d69Wps374dwWAQDocD+/btw6pVq+SORZfw2JHXL3/5S5w7dw7btm2DWq2+7DarV6/Ga6+9\nBgCoq6vD2bNnUVJSMpkxI9Zo9s/nj6HS0lKIogiz2TxZESNaf38/bDbb0O39+/cjLi4O8fHxw7bj\neej6CJIkSXKHoOtTXV2NLVu2oKenB7Gxsdi6dSuys7OxYcMGbNy4EYWFhdi8eTPKysogiiJUKhU2\nbtyI5cuXyx19ynv66aexd+9edHR0wGAwID4+Hrt27Rq2bwKBAJ588kkcOnQIALBhw4ahyVQ0sUaz\nf3jsyOfChQtYs2YNMjMzodFoAACpqanYtm0b1q5di+effx5msxkulwtbtmxBRUUFRFHEI488gptv\nvlnm9FPfaPfPt7/9bXR2dkIQBOh0Ojz66KOYPXu2zOkjQ0dHBx566CEMDAxAFEXExcVh8+bNmDVr\nFs9D44jFNBERERHRGHGYBxERERHRGLGYJiIiIiIaIxbTRERERERjxGKaiIiIiGiMWEwTEREREY0R\ni2kioiniueeew2OPPTaqbZ955hn84Ac/mOBERERTH4tpIqIQsnLlShw+fHjYz15//XV84xvfuOpj\nH3jggaHLBk9EDiIiGonFNBERERHRGLGYJiIKI3a7Hd/97nexaNEirFy5Eq+88srQff84dGPHjh1Y\nsWIFFi5ciG3bto3obfb5fHj00UcxZ84c3HbbbTh79iwA4JFHHkFLSwseeOABzJkzBy+88MLk/YFE\nRGGGxTQRUZgIBoN48MEHkZeXhwMHDuDll1/Gyy+/jNLS0hHbXrx4ET/5yU/wi1/8AqWlpejr64Pd\nbh+2zf79+3Hbbbfh2LFjWLlyJZ566ikAwC9+8QskJyfjueeew8mTJ7Fhw4ZJ+fuIiMKRUu4AREQ0\n3L/+679CoVAM3fb5fJg5cybOnj0Lh8OBhx9+GACQlpaGO++8E7t370ZJScmwNt555x2sWLEC8+fP\nBwBs3LgRr7766rBt5s2bh+XLlwMA1q5di5dffnki/ywioimJxTQRUYjZtm0blixZMnT79ddfx/bt\n29Hc3Iy2trahAhkAAoHAsNufamtrg8ViGbodHR2N+Pj4YdskJSUNfa/RaODxeOD3+6FU8tRARDRa\nfMUkIgoTVqsVqamp2Lt371W3NZlMqK2tHbrtdrvR1dU1kfGIiCISx0wTEYWJoqIixMTE4Pnnn4fb\n7UYgEMD58+dx5syZEduuWrUK+/fvx4kTJ+D1evHMM89AkqRR/66kpCQ0NjaOZ3wioimJxTQRUZhQ\nKBR47rnnUFlZiS996UtYtGgRfvSjH6Gvr2/EttOnT8ePf/xjfO9730NJSQm0Wi0SEhKgVqtH9bvu\nv/9+PPvss5g/fz5+//vfj/efQkQ0ZQjStXRVEBFRWOrv78eCBQuwZ88epKWlyR2HiGjKYM80EdEU\ntX//fgwMDMDlcmHr1q3Izc1Famqq3LGIiKYUFtNERFPUe++9h5KSEpSUlKC+vh6//OUvIQiC3LGI\niKYUDvMgIiIiIhoj9kwTEREREY0Ri2kiIiIiojFiMU1ERERENEYspomIiIiIxojFNBERERHRGLGY\nJiIiIiIao/8Phj/Mf0+w3mIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(height_outlier);" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "jI9ToieVrisQ" + }, + "source": [ + "Dá para perceber que a maior parte dos dados concentra-se em torno da média (~ 1.7 m) e que apenas algumas observações encontram-se bastante distantes dela." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "q49-oFz4gBHs", + "outputId": "f968b883-a1e3-4ead-963a-19d9f25e9d9e" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1.7181251474953014, 0.2948590174540895)" + ] + }, + "execution_count": 56, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "height_outlier_mean = height_outlier.mean()\n", + "height_outlier_std = height_outlier.std()\n", + "\n", + "height_outlier_mean, height_outlier_std" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "dTtLF6P2rvIh" + }, + "source": [ + "Um jeito de procurar por _outliers_ é ver quem se encontra fora do intervalo $[\\bar{x} - k * \\sigma, \\bar{x} + k * \\sigma]$, onde $k$ geralmente é 1.5, 2.0, 2.5 ou até 3.0.\n", + "\n", + "Abaixo utilizamos o $k = 2$, pois esse valor faz sentido (alturas menores que 1.12 m ou maiores que 2.30 m fogem do nosso padrão):" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "cI8gL-QrgK1s", + "outputId": "6c472ac1-ea23-4dd3-b833-91969a62f92d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[1.1284071125871225, 2.3078431824034804]" + ] + }, + "execution_count": 57, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "non_outlier_interval_dist = [height_outlier_mean - 2 * height_outlier_std, height_outlier_mean + 2 * height_outlier_std]\n", + "\n", + "non_outlier_interval_dist" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "b5A37brPsVPw" + }, + "source": [ + "Novamente, conhecendo o intervalo, podemos identificar as observações que caem foram dele e removê-las:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 104 + }, + "colab_type": "code", + "id": "W6jVe5TMglf5", + "outputId": "c270dcb7-d46a-4dd8-94b3-c3d610269282" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "29 0.516665\n", + "38 2.943781\n", + "48 1.058498\n", + "68 2.737088\n", + "Name: Height, dtype: float64" + ] + }, + "execution_count": 58, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_dist = height_outlier[(height_outlier < non_outlier_interval_dist[0]) | (height_outlier > non_outlier_interval_dist[1])]\n", + "\n", + "outliers_dist" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "jqYD2d3chJTK" + }, + "outputs": [], + "source": [ + "height_no_outlier_dist = height_outlier.drop(index=outliers_dist.index)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "8IL5fWP1sePM" + }, + "source": [ + "Até agora, nossas métodos de identificação de _outlier_ foram baseadas em estatísticas descritivas do nosso _data set_ (quantis, média e variância). Porém, alguns testes de hipóteses também existem.\n", + "\n", + "Um deles é o teste de Grubb. Esse é um teste bastante simples, cuja estatística de teste $G$ depende dos valores extremos do conjunto e da média amostral:\n", + "\n", + "$$G = \\frac{\\vert x_{\\text{\\{min ou max\\}}} - \\bar{x}\\vert}{s}$$\n", + "\n", + "onde $\\bar{x}$ é a média amostral e $s$ é o desvio-padrão da amostra.\n", + "\n", + "A hipótese nula, $H_{0}$, é de que não existem _outliers_ no _data set_. O teste de Grubb assume que os dados originam-se de uma distribuição normal, então pode ser válido testar essa hipótese antes.\n", + "\n", + "Rejeitamos a hipótese nula se o valor de $G$ encontrado for superior ao valor crítico do teste, que é dado por\n", + "\n", + "$$G_{\\text{crítico}} = \\frac{n - 1}{\\sqrt{n}} \\sqrt{\\frac{t_{\\alpha',n-2}^{2}}{n - 2 + t_{\\alpha',n-2}^{2}}}$$\n", + "\n", + "onde $n$ é o tamanho da amostra, $t$ é um valor com distribuição t-Student e $\\alpha'$ é $\\alpha/2n$ se o teste for bilateral (procuramos _outliers_ muito acima ou muito abaixo) ou $\\alpha/n$ se o teste for unilateral (acreditamos que o _outlier_, se houver, está em somente uma das extremidades da distribuição)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RNveH7ftxMOV" + }, + "source": [ + "Abaixo criamos algumas funções que nos auxiliam nos cálculos e na exibição dos resultados:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Ir61-q0ckV6K" + }, + "outputs": [], + "source": [ + "def grubb_test(g, n, alpha=0.05, tailed='two-tailed'):\n", + " if tailed == 'two-tailed':\n", + " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(2*n), n-2)**2/(n - 2 + sct.t.isf(alpha/(2*n), n-2)**2))\n", + " \n", + " return (g, critical, g > critical)\n", + " elif tailed == 'one-tailed':\n", + " critical = ((n - 1)/sqrt(n)) * sqrt(sct.t.isf(alpha/(n), n-2)**2/(n - 2 + sct.t.isf(alpha/(n), n-2)**2))\n", + " \n", + " return (g, critical, g > critical)\n", + " else:\n", + " raise ValueError(f\"Invalid tailed argument\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "c--VvSPuuHaM" + }, + "outputs": [], + "source": [ + "def grubb_summary(result, decimals=10):\n", + " return (\n", + " f\"Null hypothesis: there is no outliers in the data set\\n\"\n", + " f\"Test statistic: {np.round(result[0], decimals)}, \"\n", + " f\"Grubb's critical value: {np.round(result[1], decimals)}, \"\n", + " f\"Reject: {result[2]}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "d8nFGEVuqgdC" + }, + "outputs": [], + "source": [ + "def next_outlier_candidate(data):\n", + " sample_distances = (data - data.mean()).abs()\n", + " candidate_idx = sample_distances.idxmax()\n", + " candidate_value = data[candidate_idx]\n", + " candidate_statistic = sample_distances.max()/data.std()\n", + " \n", + " return (candidate_idx, candidate_value, candidate_statistic, len(data))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "MRZwuyOOxU7U" + }, + "source": [ + "Ao executarmos o teste de Grubb no nosso conjunto de alturas, encontramos alguns valores onde a hipótese nula é rejeitada, ou seja, há evidência de que o valor extremo é um _outlier_." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 434 + }, + "colab_type": "code", + "id": "Rz-yVWFlt-M6", + "outputId": "cb11e99b-2195-45d7-9089-fdf292a65e1c" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index: 38, Value: 2.944, Test statistic: 4.157, Sample size: 100\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.157, Grubb's critical value: 3.384, Reject: True\n", + "\n", + "\n", + "Index: 29, Value: 0.517, Test statistic: 4.421, Sample size: 99\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.421, Grubb's critical value: 3.381, Reject: True\n", + "\n", + "\n", + "Index: 68, Value: 2.737, Test statistic: 4.219, Sample size: 98\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 4.219, Grubb's critical value: 3.377, Reject: True\n", + "\n", + "\n", + "Index: 48, Value: 1.058, Test statistic: 2.96, Sample size: 97\n", + "\n", + "Null hypothesis: there is no outliers in the data set\n", + "Test statistic: 2.96, Grubb's critical value: 3.374, Reject: False\n", + "\n", + "\n" + ] + } + ], + "source": [ + "height_outlier_grubb = height_outlier.copy()\n", + "outliers_grubb = pd.Series()\n", + "has_outlier = True\n", + "\n", + "while has_outlier:\n", + " outlier_candidate = next_outlier_candidate(height_outlier_grubb)\n", + "\n", + " print(f\"Index: {outlier_candidate[0]}, \"\n", + " f\"Value: {np.round(outlier_candidate[1], 3)}, \"\n", + " f\"Test statistic: {np.round(outlier_candidate[2], 3)}, \"\n", + " f\"Sample size: {outlier_candidate[3]}\\n\")\n", + "\n", + " result = grubb_test(outlier_candidate[2], outlier_candidate[3])\n", + "\n", + " print(grubb_summary(result, 3))\n", + "\n", + " has_outlier = result[2]\n", + "\n", + " if has_outlier:\n", + " height_outlier_grubb = height_outlier_grubb.drop(index=outlier_candidate[0])\n", + " outliers_grubb.at[outlier_candidate[0]] = outlier_candidate[1]\n", + " \n", + " print(f\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86 + }, + "colab_type": "code", + "id": "49MMneSg-DCj", + "outputId": "a98df152-223e-43e1-ced9-d113a40b879f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "38 2.943781\n", + "29 0.516665\n", + "68 2.737088\n", + "dtype: float64" + ] + }, + "execution_count": 64, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers_grubb" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "_hajYam661Zd" + }, + "source": [ + "Abaixo comparamos os _outliers_ encontrados por cada um dos três métodos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 86 + }, + "colab_type": "code", + "id": "l3P2Bavg-zMK", + "outputId": "25065774-49a4-4509-fe92-70a4d32c8cd2" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "IQR [29, 38, 48, 68, 91, 92]\n", + "Normal [29, 38, 48, 68]\n", + "Grubb [29, 38, 68]\n", + "dtype: object" + ] + }, + "execution_count": 65, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "outliers = pd.Series({\"IQR\": outliers_iqr.index.values,\n", + " \"Normal\": outliers_dist.index.values,\n", + " \"Grubb\": outliers_grubb.index.values})\n", + "\n", + "outliers.apply(np.sort)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "1oMEwGs_DHJW" + }, + "source": [ + "## _Features_ de texto\n", + "\n", + "Dados textuais são muito ricos e muito fáceis de serem encontrados. Diversos _data sets_ são compostos por documentos textuais e ainda um simples _scrapper_ pode coletar dezenas de milhares de documentos da Internet. Coleções de documentos são frequentemente chamadas de _corpus_ (plural, _corpora_).\n", + "\n", + "Nosso objetivo aqui é somente mostrar como preprocessar de forma simples _features_ textuais. Para isso, utilizaremos o _data set_ 20 newsgroups, que contém milhares de documentos categorizados em 20 grupos (desde astronomia até carros)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XItMVwyq8Dp9" + }, + "source": [ + "Abaixo escolhemos somente três grupos para restringir nosso escopo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "usWrDfLvMNxw" + }, + "outputs": [], + "source": [ + "categories = [\"sci.crypt\", \"sci.med\", \"sci.space\"]\n", + "\n", + "newsgroups = fetch_20newsgroups(subset=\"train\", categories=categories, shuffle=True, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "4uNwK5uREAn7" + }, + "source": [ + "Temos agora um _corpus_ com 1782 documentos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "_lUWgt06EtnR", + "outputId": "f82dd8b7-5f76-477c-9173-ee35d0c7e0aa" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1782" + ] + }, + "execution_count": 67, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "len(newsgroups.data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "xh326fr28Jyc" + }, + "source": [ + "Um exemplo de documento desse _corpus_ é mostrado abaixo:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 295 + }, + "colab_type": "code", + "id": "vsfaD72_M52H", + "outputId": "fb895197-8753-49e6-a631-e7716ad8c8ee" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Document 4 of 1782:\n", + "\n", + "From: billc@col.hp.com (Bill Claussen)\n", + "Subject: Re: Should I be angry at this doctor?\n", + "Organization: HP Colorado Springs Division\n", + "Lines: 5\n", + "Distribution: na\n", + "NNTP-Posting-Host: hpcspe17.col.hp.com\n", + "\n", + "\n", + "Report them to your local BBB (Better Business Bureau).\n", + "\n", + "Bill Claussen\n", + "\n", + "\n", + "> Category: sci.med\n" + ] + } + ], + "source": [ + "document_idx = 4\n", + "documents_total = len(newsgroups.data)\n", + "\n", + "print(f\"> Document {document_idx} of {documents_total}:\\n\\n{newsgroups.data[document_idx]}\")\n", + "print(f\"> Category: {newsgroups.target_names[newsgroups.target[document_idx]]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "6liTZFzv8Nas" + }, + "source": [ + "Quando trabalhando com dados textuais, uma representação simples é ter:\n", + "\n", + "* Cada documento em uma linha.\n", + "* Cada palavra (ou termo) em uma coluna.\n", + "\n", + "Por exemplo, se nosso vocábulário (conjunto de todas palavras ou termos do _corpus_) tiver tamanho 10000 e tivermos 100 documentos, então nosso _data set_ será composto de 100 linhas e 10000 colunas." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "qLBi7mFU8mLI" + }, + "source": [ + "O valor de cada célula, $x_{i, j}$, (interseção da linha $i$ com a coluna $j$) do _data set_ depende da tranformação que aplicarmos.\n", + "\n", + "A transformação mais simples é a contagem de palavras no documento, ou seja, $x_{i, j}$ indica o número de ocorrências da palavra $j$ no documento $i$.\n", + "\n", + "Isso pode ser obtido no sklearn pelo `CountVectorizer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "4E6FmUUhNs8b" + }, + "outputs": [], + "source": [ + "count_vectorizer = CountVectorizer()\n", + "newsgroups_counts = count_vectorizer.fit_transform(newsgroups.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "TSylOCPKjLmh", + "outputId": "d7b6e6b8-f227-4ec5-a34a-2cf93fc8ebb5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "scipy.sparse.csr.csr_matrix" + ] + }, + "execution_count": 78, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "type(newsgroups_counts)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "M4rtFrsF9CgR" + }, + "source": [ + "Abaixo escolhemos dez palavras contidas no _corpus_ para exemplificar:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "kmxzJhkSUpIZ", + "outputId": "613a8241-c25e-4d5d-9830-1cee04671fc4" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00020001000
10000100200
23000000010
30000040002
40101000000
\n", + "
" ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "WAQ20ew-Wx5V", - "colab_type": "code", - "outputId": "fd781f7a-198a-444f-bfb8-baee26469ef0", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 206 - } - }, - "source": [ - "pd.DataFrame(newsgroups_tfidf_vectorized[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + "text/plain": [ + " banks business clipper colorado ... kapor monitor private study\n", + "0 0 0 2 0 ... 1 0 0 0\n", + "1 0 0 0 0 ... 0 2 0 0\n", + "2 3 0 0 0 ... 0 0 1 0\n", + "3 0 0 0 0 ... 0 0 0 2\n", + "4 0 1 0 1 ... 0 0 0 0\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 70, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "words_idx = sorted([count_vectorizer.vocabulary_.get(f\"{word.lower()}\") for word in\n", + " [u\"clipper\", u\"Kapor\",\n", + " u\"monitor\", u\"gibberish\",\n", + " u\"Banks\", u\"private\",\n", + " u\"study\", u\"group\",\n", + " u\"Colorado\", u\"Business\"]])\n", + "\n", + "pd.DataFrame(newsgroups_counts[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "C7WuoRgP9WE9" + }, + "source": [ + "Por exemplo, o valor 2 na interseção do documento 0 com a coluna `clipper` indica que a palavra _clipper_ aparece duas vezes no documento 0. Obviamente é possível que uma mesma palavra apareça em múltiplos documentos e mais óbvio ainda que um documento contenha múltiplas palavras." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "UQzj-_QT9p7e" + }, + "source": [ + "O problema com essa abordagem é que não temos como medir relevância dos termos. E se o termo é super comum e aparece em quase todos documentos? E se o termo aparece muitas vezes no mesmo documento, mas poucas vezes nos outros?\n", + "\n", + "Essas perguntas não podem ser respondidas simplesmente com a contagem de termos acima. Para isso, precisamos do tf-idf." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AXBnOFk___QK" + }, + "source": [ + "O tf-idf é uma estatística baseada no _corpus_ composta de outras duas estatísticas:\n", + "\n", + "* $\\text{tf}(t, d)$, ou _term frequency_, é uma medida de quantas vezes o termo $t$ aparece no documento $d$. Algumas opções estão disponíveis, mas a mais simples é a contagem do número de ocorrências do termo no documento, $f_{t, d}$, exatamente o que computamos acima. Essa é a forma como sklearn define $tf$:\n", + "\n", + "$$\\text{tf}(t, d) = f_{t, d}$$\n", + "\n", + "* $\\text{idf}(t)$, ou _inverse document frequency_, é uma medida de relevância do termo em todos documentos do _corpus_. O sklearn a computa, seguindo valores _default_, da seguinte forma:\n", + "\n", + "$$\\text{idf}(t) = \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", + "\n", + "onde $n$ é o número de documentos no _corpus_ e $d_{t}$ é o número de documentos no _corpus_ que contêm o termo $t$ ($0 < d_{t} \\leq n$).\n", + "\n", + "O tf-idf é calculado multiplicando esses dois valores:\n", + "\n", + "$$\\text{tf-idf}(t, d) = \\text{tf}(t, d) \\times \\text{idf}(t) = f_{t, d} \\times \\log{\\frac{1+n}{1 + d_{t}}} + 1$$\n", + "\n", + "O sklearn também normaliza todos documentos resultantes, ou seja todas linhas da matriz, para terem norma unitária. Em outras palavras, os elementos do vetor de tf-idf do documento $i$ são dados por:\n", + "\n", + "$$\\text{tf-idf}(i, j)_{\\text{normalizado}} = \\frac{\\text{tf-idf}(i, j)}{\\sqrt{\\text{tf-idf}(i, 1)^{2} + \\text{tf-idf}(i, 2)^{2} + \\cdots + \\text{tf-idf}(i, T)^{2}}}$$\n", + "\n", + "onde $T$ é o número de termos do _corpus_, ou seja, o tamanho do vocabulário." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "bWpYWUMjCH8l" + }, + "source": [ + "O tf-idf é sempre um valor não negativo e quanto mais alto, maior a relevância do termo.\n", + "\n", + "Note como o tf aumenta de acordo com o número de ocorrências do termo no documento: quanto mais frequente o termo, mas relevante ele parece ser.\n", + "\n", + "O idf é uma medida de \"raridade\" do termo através de todo _corpus_: quanto mais alto, menos o termo aparece no _corpus_ e consequentemente mais informação ele traz.\n", + "\n", + "Multiplicando os dois, temos uma medida do quão relevante aquele termo é para aquele documento no _corpus_." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "b_N2VQnwDaey" + }, + "source": [ + "O sklearn provê um transformador, `TfidfTransformer`, que transforma de uma matriz de frequências, como a retornada pelo `CountVectorizer`, e retorna uma matriz de tf-idf:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Fyxgx0YhVwtF" + }, + "outputs": [], + "source": [ + "tfidf_transformer = TfidfTransformer()\n", + "\n", + "tfidf_transformer.fit(newsgroups_counts)\n", + "\n", + "newsgroups_tfidf = tfidf_transformer.transform(newsgroups_counts)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "evk8smtLWNtO", + "outputId": "bf99b51a-e276-480c-dee9-13713e85a00b" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", - "
" - ], - "text/plain": [ - " banks business clipper ... monitor private study\n", - "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", - "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", - "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", - "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", - "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", - "\n", - "[5 rows x 10 columns]" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 74 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RLFGR7A_D0px", - "colab_type": "text" - }, - "source": [ - "Note como a matriz acima é exatamente igual a retornada pelo `TfidfTransformer`.\n", - "\n", - "O resultado (igual da matriz de frequência) é um _data set_ com 1782 documentos e 33796 termos:" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "8I_w7yLeYnRe", - "colab_type": "code", - "outputId": "e1162574-03a2-4368-c3b6-517759bb973f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 34 - } - }, - "source": [ - "newsgroups_tfidf_vectorized.shape" + "text/plain": [ + " banks business clipper ... monitor private study\n", + "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", + "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", + "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", + "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", + "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 72, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame(newsgroups_tfidf[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "h9hI18kYDsuA" + }, + "source": [ + "Também podemos obter a matriz de tf-idf diretamente do _corpus_ sem ter que passar pela matriz de frequência com o transformador `TfidfVectorizer`:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "wPV4xrxzWlA-" + }, + "outputs": [], + "source": [ + "tfidf_vectorizer = TfidfVectorizer()\n", + "\n", + "tfidf_vectorizer.fit(newsgroups.data)\n", + "\n", + "newsgroups_tfidf_vectorized = tfidf_vectorizer.transform(newsgroups.data)" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "colab_type": "code", + "id": "WAQ20ew-Wx5V", + "outputId": "fd781f7a-198a-444f-bfb8-baee26469ef0" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
banksbusinessclippercoloradogibberishgroupkapormonitorprivatestudy
00.0000000.0000000.0812930.0000000.0000000.0000000.0963680.0000000.0000000.000000
10.0000000.0000000.0000000.0000000.1098940.0000000.0000000.1793520.0000000.000000
20.1481520.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0485510.000000
30.0000000.0000000.0000000.0000000.0000000.1452230.0000000.0000000.0000000.083477
40.0000000.1172480.0000000.1315680.0000000.0000000.0000000.0000000.0000000.000000
\n", + "
" ], - "execution_count": 0, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "(1782, 33796)" - ] - }, - "metadata": { - "tags": [] - }, - "execution_count": 75 - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NjPMTtkUwrS1", - "colab_type": "text" - }, - "source": [ - "## Referências\n", - "\n", - "* [Feature engineering](https://jakevdp.github.io/PythonDataScienceHandbook/05.04-feature-engineering.html)\n", - "\n", - "* [Feature Scaling with scikit-learn](http://benalexkeen.com/feature-scaling-with-scikit-learn/)\n", - "\n", - "* [Anthony Goldbloom gives you the secret to winning Kaggle competitions](https://www.import.io/post/how-to-win-a-kaggle-competition/)\n", - "\n", - "* [What are some best practices in Feature Engineering?](https://www.quora.com/What-are-some-best-practices-in-Feature-Engineering)\n", - "\n", - "* [Discover Feature Engineering, How to Engineer Features and How to Get Good at It](https://machinelearningmastery.com/discover-feature-engineering-how-to-engineer-features-and-how-to-get-good-at-it/)\n", - "\n", - "* [Fundamental Techniques of Feature Engineering for Machine Learning](https://towardsdatascience.com/feature-engineering-for-machine-learning-3a5e293a5114)\n", - "\n", - "* [Feature Engineering Cookbook for Machine Learning](https://medium.com/@michaelabehsera/feature-engineering-cookbook-for-machine-learning-7bf21f0bcbae)\n", - "\n", - "* [A Simple Guide to Scikit-learn Pipelines](https://medium.com/vickdata/a-simple-guide-to-scikit-learn-pipelines-4ac0d974bdcf)\n", - "\n", - "* [Outlier detection with Scikit Learn](https://www.mikulskibartosz.name/outlier-detection-with-scikit-learn/)\n", - "\n", - "* [Working With Text Data](https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html)\n", - "\n", - "* [WTF is TF-IDF?](https://www.kdnuggets.com/2018/08/wtf-tf-idf.html)\n" - ] + "text/plain": [ + " banks business clipper ... monitor private study\n", + "0 0.000000 0.000000 0.081293 ... 0.000000 0.000000 0.000000\n", + "1 0.000000 0.000000 0.000000 ... 0.179352 0.000000 0.000000\n", + "2 0.148152 0.000000 0.000000 ... 0.000000 0.048551 0.000000\n", + "3 0.000000 0.000000 0.000000 ... 0.000000 0.000000 0.083477\n", + "4 0.000000 0.117248 0.000000 ... 0.000000 0.000000 0.000000\n", + "\n", + "[5 rows x 10 columns]" + ] + }, + "execution_count": 74, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" } - ] -} \ No newline at end of file + ], + "source": [ + "pd.DataFrame(newsgroups_tfidf_vectorized[:5, words_idx].toarray(), columns=np.array(count_vectorizer.get_feature_names())[words_idx])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "RLFGR7A_D0px" + }, + "source": [ + "Note como a matriz acima é exatamente igual a retornada pelo `TfidfTransformer`.\n", + "\n", + "O resultado (igual da matriz de frequência) é um _data set_ com 1782 documentos e 33796 termos:" + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "8I_w7yLeYnRe", + "outputId": "e1162574-03a2-4368-c3b6-517759bb973f" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1782, 33796)" + ] + }, + "execution_count": 75, + "metadata": { + "tags": [] + }, + "output_type": "execute_result" + } + ], + "source": [ + "newsgroups_tfidf_vectorized.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "NjPMTtkUwrS1" + }, + "source": [ + "## Referências\n", + "\n", + "* [Feature engineering](https://jakevdp.github.io/PythonDataScienceHandbook/05.04-feature-engineering.html)\n", + "\n", + "* [Feature Scaling with scikit-learn](http://benalexkeen.com/feature-scaling-with-scikit-learn/)\n", + "\n", + "* [Anthony Goldbloom gives you the secret to winning Kaggle competitions](https://www.import.io/post/how-to-win-a-kaggle-competition/)\n", + "\n", + "* [What are some best practices in Feature Engineering?](https://www.quora.com/What-are-some-best-practices-in-Feature-Engineering)\n", + "\n", + "* [Discover Feature Engineering, How to Engineer Features and How to Get Good at It](https://machinelearningmastery.com/discover-feature-engineering-how-to-engineer-features-and-how-to-get-good-at-it/)\n", + "\n", + "* [Fundamental Techniques of Feature Engineering for Machine Learning](https://towardsdatascience.com/feature-engineering-for-machine-learning-3a5e293a5114)\n", + "\n", + "* [Feature Engineering Cookbook for Machine Learning](https://medium.com/@michaelabehsera/feature-engineering-cookbook-for-machine-learning-7bf21f0bcbae)\n", + "\n", + "* [A Simple Guide to Scikit-learn Pipelines](https://medium.com/vickdata/a-simple-guide-to-scikit-learn-pipelines-4ac0d974bdcf)\n", + "\n", + "* [Outlier detection with Scikit Learn](https://www.mikulskibartosz.name/outlier-detection-with-scikit-learn/)\n", + "\n", + "* [Working With Text Data](https://scikit-learn.org/stable/tutorial/text_analytics/working_with_text_data.html)\n", + "\n", + "* [WTF is TF-IDF?](https://www.kdnuggets.com/2018/08/wtf-tf-idf.html)\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "Aula 7 - Feature Engineering.ipynb", + "provenance": [], + "version": "0.3.2" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}