diff --git a/DesafioDIO_ETL.ipynb b/DesafioDIO_ETL.ipynb
new file mode 100644
index 0000000..2d10978
--- /dev/null
+++ b/DesafioDIO_ETL.ipynb
@@ -0,0 +1,1255 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "JqxdSj5j0yuY",
+ "outputId": "e84b321c-9593-4962-f1c8-4d3baba0ea9b"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Writing banco.csv\n"
+ ]
+ }
+ ],
+ "source": [
+ "%%writefile banco.csv\n",
+ "age,job,marital,education,default,balance,housing,loan\n",
+ "34,unemployed,single,primary,no,1587,no,yes\n",
+ "34,services,married,secondary,no,3789,yes,yes\n",
+ "31,management,single,tertiary,no,1850,yes,no\n",
+ "32,management,married,tertiary,no,1476,yes,no\n",
+ "39,blue-collar,married,secondary,no,0,yes,no\n",
+ "35,management,single,tertiary,no,747,no,no\n",
+ "36,self-employed,married,tertiary,no,307,yes,no\n",
+ "38,technician,single,secondary,no,147,no,no\n",
+ "42,entrepreneur,married,tertiary,no,221,yes,no\n",
+ "45,services,married,primary,no,-88,,yes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd"
+ ],
+ "metadata": {
+ "id": "14rznqrx1FcU"
+ },
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df = pd.read_csv('banco.csv', na_values='na')"
+ ],
+ "metadata": {
+ "id": "8FXN7Il81MPL"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head(n=10)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 362
+ },
+ "id": "xyudAlDR1Nva",
+ "outputId": "dc08e3c3-86f8-4b57-80e7-a6e44bc78265"
+ },
+ "execution_count": 5,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " age job marital education default balance housing loan\n",
+ "0 34 unemployed single primary no 1587 no yes\n",
+ "1 34 services married secondary no 3789 yes yes\n",
+ "2 31 management single tertiary no 1850 yes no\n",
+ "3 32 management married tertiary no 1476 yes no\n",
+ "4 39 blue-collar married secondary no 0 yes no\n",
+ "5 35 management single tertiary no 747 no no\n",
+ "6 36 self-employed married tertiary no 307 yes no\n",
+ "7 38 technician single secondary no 147 no no\n",
+ "8 42 entrepreneur married tertiary no 221 yes no\n",
+ "9 45 services married primary no -88 NaN yes"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " job | \n",
+ " marital | \n",
+ " education | \n",
+ " default | \n",
+ " balance | \n",
+ " housing | \n",
+ " loan | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 34 | \n",
+ " unemployed | \n",
+ " single | \n",
+ " primary | \n",
+ " no | \n",
+ " 1587 | \n",
+ " no | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 34 | \n",
+ " services | \n",
+ " married | \n",
+ " secondary | \n",
+ " no | \n",
+ " 3789 | \n",
+ " yes | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 31 | \n",
+ " management | \n",
+ " single | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 1850 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 32 | \n",
+ " management | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 1476 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 39 | \n",
+ " blue-collar | \n",
+ " married | \n",
+ " secondary | \n",
+ " no | \n",
+ " 0 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 35 | \n",
+ " management | \n",
+ " single | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 747 | \n",
+ " no | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 36 | \n",
+ " self-employed | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 307 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 38 | \n",
+ " technician | \n",
+ " single | \n",
+ " secondary | \n",
+ " no | \n",
+ " 147 | \n",
+ " no | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 42 | \n",
+ " entrepreneur | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 221 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " 45 | \n",
+ " services | \n",
+ " married | \n",
+ " primary | \n",
+ " no | \n",
+ " -88 | \n",
+ " NaN | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.dtypes"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Z2eOai5X1Qv6",
+ "outputId": "1f5876f9-2b58-4610-e84d-6c342f8c31cf"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "age int64\n",
+ "job object\n",
+ "marital object\n",
+ "education object\n",
+ "default object\n",
+ "balance int64\n",
+ "housing object\n",
+ "loan object\n",
+ "dtype: object"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 6
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.select_dtypes('object').describe().transpose()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 237
+ },
+ "id": "FuFA7xQU1R3X",
+ "outputId": "a8069f88-b544-447c-cf41-d4d8a49bcfea"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " count unique top freq\n",
+ "job 10 7 management 3\n",
+ "marital 10 2 married 6\n",
+ "education 10 3 tertiary 5\n",
+ "default 10 1 no 10\n",
+ "housing 9 2 yes 6\n",
+ "loan 10 2 no 7"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " count | \n",
+ " unique | \n",
+ " top | \n",
+ " freq | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " job | \n",
+ " 10 | \n",
+ " 7 | \n",
+ " management | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " marital | \n",
+ " 10 | \n",
+ " 2 | \n",
+ " married | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " education | \n",
+ " 10 | \n",
+ " 3 | \n",
+ " tertiary | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " default | \n",
+ " 10 | \n",
+ " 1 | \n",
+ " no | \n",
+ " 10 | \n",
+ "
\n",
+ " \n",
+ " housing | \n",
+ " 9 | \n",
+ " 2 | \n",
+ " yes | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " loan | \n",
+ " 10 | \n",
+ " 2 | \n",
+ " no | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.isna().any()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Wds8rTRQ1UeG",
+ "outputId": "a38cdefd-fddc-42cd-dffe-b6c1cb552136"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "age False\n",
+ "job False\n",
+ "marital False\n",
+ "education False\n",
+ "default False\n",
+ "balance False\n",
+ "housing True\n",
+ "loan False\n",
+ "dtype: bool"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.dropna(inplace=True)"
+ ],
+ "metadata": {
+ "id": "X-UghV8_1WRu"
+ },
+ "execution_count": 9,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.head(n=10)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 331
+ },
+ "id": "p4KdHbku1X6o",
+ "outputId": "6a846276-7eb1-4286-a148-71032e369618"
+ },
+ "execution_count": 10,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " age job marital education default balance housing loan\n",
+ "0 34 unemployed single primary no 1587 no yes\n",
+ "1 34 services married secondary no 3789 yes yes\n",
+ "2 31 management single tertiary no 1850 yes no\n",
+ "3 32 management married tertiary no 1476 yes no\n",
+ "4 39 blue-collar married secondary no 0 yes no\n",
+ "5 35 management single tertiary no 747 no no\n",
+ "6 36 self-employed married tertiary no 307 yes no\n",
+ "7 38 technician single secondary no 147 no no\n",
+ "8 42 entrepreneur married tertiary no 221 yes no"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " age | \n",
+ " job | \n",
+ " marital | \n",
+ " education | \n",
+ " default | \n",
+ " balance | \n",
+ " housing | \n",
+ " loan | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 34 | \n",
+ " unemployed | \n",
+ " single | \n",
+ " primary | \n",
+ " no | \n",
+ " 1587 | \n",
+ " no | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 34 | \n",
+ " services | \n",
+ " married | \n",
+ " secondary | \n",
+ " no | \n",
+ " 3789 | \n",
+ " yes | \n",
+ " yes | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 31 | \n",
+ " management | \n",
+ " single | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 1850 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 32 | \n",
+ " management | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 1476 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 39 | \n",
+ " blue-collar | \n",
+ " married | \n",
+ " secondary | \n",
+ " no | \n",
+ " 0 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " 35 | \n",
+ " management | \n",
+ " single | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 747 | \n",
+ " no | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " 36 | \n",
+ " self-employed | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 307 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " 38 | \n",
+ " technician | \n",
+ " single | \n",
+ " secondary | \n",
+ " no | \n",
+ " 147 | \n",
+ " no | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " 42 | \n",
+ " entrepreneur | \n",
+ " married | \n",
+ " tertiary | \n",
+ " no | \n",
+ " 221 | \n",
+ " yes | \n",
+ " no | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "idades = []\n",
+ "with open(file='idades.csv', mode='w', encoding='utf8') as fp:\n",
+ " linha = 'idade' + '\\n'\n",
+ " fp.write(linha)\n",
+ " for idade in idades:\n",
+ " linha = str(idade) + '\\n'\n",
+ " fp.write(linha)"
+ ],
+ "metadata": {
+ "id": "_B4FBmxI1mYG"
+ },
+ "execution_count": 13,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 6dc7e61..3f73402 100644
--- a/README.md
+++ b/README.md
@@ -1,66 +1,62 @@
# Santander Dev Week 2023 Java API
-RESTful API da Santander Dev Week 2023 construída em Java 17 com Spring Boot 3.
-
-## Principais Tecnologias
- - **Java 17**: Utilizaremos a versão LTS mais recente do Java para tirar vantagem das últimas inovações que essa linguagem robusta e amplamente utilizada oferece;
- - **Spring Boot 3**: Trabalharemos com a mais nova versão do Spring Boot, que maximiza a produtividade do desenvolvedor por meio de sua poderosa premissa de autoconfiguração;
- - **Spring Data JPA**: Exploraremos como essa ferramenta pode simplificar nossa camada de acesso aos dados, facilitando a integração com bancos de dados SQL;
- - **OpenAPI (Swagger)**: Vamos criar uma documentação de API eficaz e fácil de entender usando a OpenAPI (Swagger), perfeitamente alinhada com a alta produtividade que o Spring Boot oferece;
- - **Railway**: facilita o deploy e monitoramento de nossas soluções na nuvem, além de oferecer diversos bancos de dados como serviço e pipelines de CI/CD.
-
-## [Link do Figma](https://www.figma.com/file/0ZsjwjsYlYd3timxqMWlbj/SANTANDER---Projeto-Web%2FMobile?type=design&node-id=1421%3A432&mode=design&t=6dPQuerScEQH0zAn-1)
-
-O Figma foi utilizado para a abstração do domínio desta API, sendo útil na análise e projeto da solução.
-
-## Diagrama de Classes (Domínio da API)
-
-```mermaid
-classDiagram
- class User {
- -String name
- -Account account
- -Feature[] features
- -Card card
- -News[] news
- }
-
- class Account {
- -String number
- -String agency
- -Number balance
- -Number limit
- }
-
- class Feature {
- -String icon
- -String description
- }
-
- class Card {
- -String number
- -Number limit
- }
-
- class News {
- -String icon
- -String description
- }
-
- User "1" *-- "1" Account
- User "1" *-- "N" Feature
- User "1" *-- "1" Card
- User "1" *-- "N" News
-```
-
-## Documentação da API (Swagger)
-
-### [https://sdw-2023-prd.up.railway.app/swagger-ui.html](https://sdw-2023-prd.up.railway.app/swagger-ui.html)
-
-Esta API ficará disponível no Railway por um período de tempo limitado, mas este é um código-fonte aberto. Portanto, sintam-se à vontade para cloná-lo, modificá-lo (já que é um bom projeto base para novos projetos) e executar localmente ou onde achar mais interessante! Só não esquece de marcar a gente quando divulgar a sua solução 🥰
-
-### IMPORTANTE
-
-Aos interessados no desenvolvimento da tela inicial do App do Santander (Figma) em Angular, Android, iOS ou Flutter... Caso a URL produtiva não esteja mais disponível, deixamos um Backup no GitHub Pages, é só dar um GET lá 😘
-- URL de Produção: https://sdw-2023-prd.up.railway.app/users/1
-- Mock (Backup): https://digitalinnovationone.github.io/santander-dev-week-2023-api/mocks/find_one.json
+Desafio: "Explorando IA Generativa em um Pipeline de ETL com Python" parte do Santander Bootcamp 2023 - Ciência de Dados com Python
+
+## Etapas Realizadas
+Arquivo CSV criado usando googgle collab:
+
+%%writefile banco.csv
+age,job,marital,education,default,balance,housing,loan
+34,unemployed,single,primary,no,1587,no,yes
+34,services,married,secondary,no,3789,yes,yes
+31,management,single,tertiary,no,1850,yes,no
+32,management,married,tertiary,no,1476,yes,no
+39,blue-collar,married,secondary,no,0,yes,no
+35,management,single,tertiary,no,747,no,no
+36,self-employed,married,tertiary,no,307,yes,no
+38,technician,single,secondary,no,147,no,no
+42,entrepreneur,married,tertiary,no,221,yes,no
+45,services,married,primary,no,-88,,yes
+
+
+#### Extração
+Extrair as informações de idades do arquivo anterior:
+
+import pandas as pd
+df = pd.read_csv('banco.csv', na_values='na')
+df.head(n=10)
+
+Analisando as informações
+
+df.dtypes
+df.select_dtypes('object').describe().transpose()
+
+#### Transform
+
+Verificar se alguma coluna tem info faltante:
+df.isna().any()
+
+Remover a linha com informação incompleta:
+df.dropna(inplace=True)
+
+Revisar informações após ajuste:
+df.head(n=10)
+
+#### Load
+
+Extrair as informações de idades do arquivo aque foi ajustado e salvar como novo arquivo:
+
+idades = []
+with open(file='idades.csv', mode='w', encoding='utf8') as fp:
+ linha = 'idade' + '\n'
+ fp.write(linha)
+ for idade in idades:
+ linha = str(idade) + '\n'
+ fp.write(linha)
+
+
+
+
+
+
+