diff --git a/FOSS_WEEKEND(ML,reg) (3).ipynb b/FOSS_WEEKEND(ML,reg) (3).ipynb
new file mode 100644
index 0000000..18239c2
--- /dev/null
+++ b/FOSS_WEEKEND(ML,reg) (3).ipynb
@@ -0,0 +1,3193 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "id": "Tt7Mn1KMbZ5y"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from scipy import stats\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.svm import SVR\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "from sklearn.metrics import r2_score\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7B6JttFQmG3z"
+ },
+ "source": [
+ "# Import Dataframe"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "1pZS5hHvePDs",
+ "outputId": "32aaf9ca-9a40-4ac8-d1e5-5a92d43de0a2"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " winery wine year rating num_reviews \\\n",
+ "0 Teso La Monja Tinto 2013 4.9 58 \n",
+ "1 Artadi Vina El Pison 2018 4.9 31 \n",
+ "2 Vega Sicilia Unico 2009 4.8 1793 \n",
+ "3 Vega Sicilia Unico 1999 4.8 1705 \n",
+ "4 Vega Sicilia Unico 1996 4.8 1309 \n",
+ "... ... ... ... ... ... \n",
+ "7495 Contino Reserva 2016 4.2 392 \n",
+ "7496 Conreria d'Scala Dei Les Brugueres 2018 4.2 390 \n",
+ "7497 Mustiguillo Finca Terrerazo 2017 4.2 390 \n",
+ "7498 Matarromera Gran Reserva 2011 4.2 389 \n",
+ "7499 Sei Solo Preludio 2016 4.2 388 \n",
+ "\n",
+ " country region price type body acidity \n",
+ "0 Espana Toro 995.00 Toro Red 5.0 3.0 \n",
+ "1 Espana Vino de Espana 313.50 Tempranillo 4.0 2.0 \n",
+ "2 Espana Ribera del Duero 324.95 Ribera Del Duero Red 5.0 3.0 \n",
+ "3 Espana Ribera del Duero 692.96 Ribera Del Duero Red 5.0 3.0 \n",
+ "4 Espana Ribera del Duero 778.06 Ribera Del Duero Red 5.0 3.0 \n",
+ "... ... ... ... ... ... ... \n",
+ "7495 Espana Rioja 19.98 Rioja Red 4.0 3.0 \n",
+ "7496 Espana Priorato 16.76 Priorat Red 4.0 3.0 \n",
+ "7497 Espana El Terrerazo 24.45 Red 4.0 3.0 \n",
+ "7498 Espana Ribera del Duero 64.50 Ribera Del Duero Red 5.0 3.0 \n",
+ "7499 Espana Ribera del Duero 31.63 Ribera Del Duero Red 5.0 3.0 \n",
+ "\n",
+ "[7500 rows x 11 columns]"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " winery | \n",
+ " wine | \n",
+ " year | \n",
+ " rating | \n",
+ " num_reviews | \n",
+ " country | \n",
+ " region | \n",
+ " price | \n",
+ " type | \n",
+ " body | \n",
+ " acidity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Teso La Monja | \n",
+ " Tinto | \n",
+ " 2013 | \n",
+ " 4.9 | \n",
+ " 58 | \n",
+ " Espana | \n",
+ " Toro | \n",
+ " 995.00 | \n",
+ " Toro Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Artadi | \n",
+ " Vina El Pison | \n",
+ " 2018 | \n",
+ " 4.9 | \n",
+ " 31 | \n",
+ " Espana | \n",
+ " Vino de Espana | \n",
+ " 313.50 | \n",
+ " Tempranillo | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Vega Sicilia | \n",
+ " Unico | \n",
+ " 2009 | \n",
+ " 4.8 | \n",
+ " 1793 | \n",
+ " Espana | \n",
+ " Ribera del Duero | \n",
+ " 324.95 | \n",
+ " Ribera Del Duero Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Vega Sicilia | \n",
+ " Unico | \n",
+ " 1999 | \n",
+ " 4.8 | \n",
+ " 1705 | \n",
+ " Espana | \n",
+ " Ribera del Duero | \n",
+ " 692.96 | \n",
+ " Ribera Del Duero Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " Vega Sicilia | \n",
+ " Unico | \n",
+ " 1996 | \n",
+ " 4.8 | \n",
+ " 1309 | \n",
+ " Espana | \n",
+ " Ribera del Duero | \n",
+ " 778.06 | \n",
+ " Ribera Del Duero Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 7495 | \n",
+ " Contino | \n",
+ " Reserva | \n",
+ " 2016 | \n",
+ " 4.2 | \n",
+ " 392 | \n",
+ " Espana | \n",
+ " Rioja | \n",
+ " 19.98 | \n",
+ " Rioja Red | \n",
+ " 4.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 7496 | \n",
+ " Conreria d'Scala Dei | \n",
+ " Les Brugueres | \n",
+ " 2018 | \n",
+ " 4.2 | \n",
+ " 390 | \n",
+ " Espana | \n",
+ " Priorato | \n",
+ " 16.76 | \n",
+ " Priorat Red | \n",
+ " 4.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 7497 | \n",
+ " Mustiguillo | \n",
+ " Finca Terrerazo | \n",
+ " 2017 | \n",
+ " 4.2 | \n",
+ " 390 | \n",
+ " Espana | \n",
+ " El Terrerazo | \n",
+ " 24.45 | \n",
+ " Red | \n",
+ " 4.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 7498 | \n",
+ " Matarromera | \n",
+ " Gran Reserva | \n",
+ " 2011 | \n",
+ " 4.2 | \n",
+ " 389 | \n",
+ " Espana | \n",
+ " Ribera del Duero | \n",
+ " 64.50 | \n",
+ " Ribera Del Duero Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ " 7499 | \n",
+ " Sei Solo | \n",
+ " Preludio | \n",
+ " 2016 | \n",
+ " 4.2 | \n",
+ " 388 | \n",
+ " Espana | \n",
+ " Ribera del Duero | \n",
+ " 31.63 | \n",
+ " Ribera Del Duero Red | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
7500 rows × 11 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 7500,\n \"fields\": [\n {\n \"column\": \"winery\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 480,\n \"samples\": [\n \"Francisco Garcia Perez\",\n \"Jimenez-Landi\",\n \"Bodegas Yzaguirre\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wine\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 847,\n \"samples\": [\n \"Cava El Tros Nou Pinot Noir\",\n \"Teixar\",\n \"Pagos Viejos\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 71,\n \"samples\": [\n \"1985\",\n \"2013\",\n \"1958\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1180290345746442,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.8,\n 4.4,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_reviews\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 723,\n \"min\": 25,\n \"max\": 32624,\n \"num_unique_values\": 817,\n \"samples\": [\n 115,\n 177,\n 2144\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"country\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Espana\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"region\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 76,\n \"samples\": [\n \"Jumilla\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35667645268242,\n \"min\": 4.99,\n \"max\": 3119.08,\n \"num_unique_values\": 1292,\n \"samples\": [\n 168.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 21,\n \"samples\": [\n \"Toro Red\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5833522358684095,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.24820201012421672,\n \"min\": 1.0,\n \"max\": 3.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 3.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ],
+ "source": [
+ "df = pd.read_csv('/content/wines_SPA.csv')\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "eHcGko0lEdij"
+ },
+ "source": [
+ "DATA PREPROCESSING AND LABEL ENCODING"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "z5wXrhc6edUz",
+ "outputId": "11f02069-8ebc-442a-f093-77faed08b885"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "winery 0\n",
+ "wine 0\n",
+ "year 2\n",
+ "rating 0\n",
+ "num_reviews 0\n",
+ "country 0\n",
+ "region 0\n",
+ "price 0\n",
+ "type 545\n",
+ "body 1169\n",
+ "acidity 1169\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "id": "4CGvxJxGfKlZ"
+ },
+ "outputs": [],
+ "source": [
+ "df['year'] = pd.to_numeric(df['year'], errors='coerce')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4STtiVmMiCzz",
+ "outputId": "786e7b0b-1128-4726-f5a5-261599ff1141"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 2013.0\n",
+ "1 2018.0\n",
+ "2 2009.0\n",
+ "3 1999.0\n",
+ "4 1996.0\n",
+ " ... \n",
+ "7495 2016.0\n",
+ "7496 2018.0\n",
+ "7497 2017.0\n",
+ "7498 2011.0\n",
+ "7499 2016.0\n",
+ "Name: year, Length: 7500, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ],
+ "source": [
+ "df['year']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df['year'] = 2024 - df['year']"
+ ],
+ "metadata": {
+ "id": "7Acu80pUr4bi"
+ },
+ "execution_count": 14,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df['year']"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "uDxoitKhsOGP",
+ "outputId": "e4b24cda-4567-404e-cc5b-0018aabec8ad"
+ },
+ "execution_count": 15,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "0 11.0\n",
+ "1 6.0\n",
+ "2 15.0\n",
+ "3 25.0\n",
+ "4 28.0\n",
+ " ... \n",
+ "7495 8.0\n",
+ "7496 6.0\n",
+ "7497 7.0\n",
+ "7498 13.0\n",
+ "7499 8.0\n",
+ "Name: year, Length: 7500, dtype: float64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "id": "1Beesqqoei4z"
+ },
+ "outputs": [],
+ "source": [
+ "imputer = SimpleImputer(strategy='median')\n",
+ "df['year'] = imputer.fit_transform(df[['year']])\n",
+ "df['body'] = imputer.fit_transform(df[['body']])\n",
+ "df['acidity'] = imputer.fit_transform(df[['acidity']])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ft49cSNrfAQa",
+ "outputId": "2619083d-3c9d-4cab-e6c0-4927004bff17"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "winery 0\n",
+ "wine 0\n",
+ "year 0\n",
+ "rating 0\n",
+ "num_reviews 0\n",
+ "country 0\n",
+ "region 0\n",
+ "price 0\n",
+ "type 545\n",
+ "body 0\n",
+ "acidity 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mLiYsXULfOGh",
+ "outputId": "ca6bb949-9c1a-4ef6-98d7-fbca14636620"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['Espana'], dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ],
+ "source": [
+ "df['country'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "V_QhGYgDgeAR",
+ "outputId": "a12b50c9-b15f-477a-caba-6f23100ea28f"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['Teso La Monja', 'Artadi', 'Vega Sicilia', 'Pago de Carraovejas',\n",
+ " 'Toro Albala', 'Bodegas El Nido', 'Valdespino',\n",
+ " 'Dominio de Pingus', 'Alvaro Palacios', 'Ordonez',\n",
+ " 'Bodegas Valduero', 'Vina Sastre', 'Sierra Cantabria',\n",
+ " 'Descendientes de J. Palacios', 'La Rioja Alta',\n",
+ " 'Marques de Murrieta', 'Vinedos de Paganos', 'Emilio Moro',\n",
+ " 'Quinta de la Quietud', 'Bodegas Mauro',\n",
+ " 'Bodega Contador (Benjamin Romeo)', 'Remirez de Ganuza',\n",
+ " 'Bodegas San Roman', 'Pago de Los Capellanes', 'Bodega Numanthia',\n",
+ " 'Alto Moncayo', 'Mas Doix', 'Finca Moncloa', 'Bodegas Roda',\n",
+ " 'Martinet', 'Recaredo', 'Clos Erasmus', 'Barbadillo',\n",
+ " 'Gonzalez-Byass', 'Bodegas Amaren', 'Alvear', 'Equipo Navazos',\n",
+ " 'Morca', 'Territorio Luthier', 'Rafael Palacios', 'Terra Remota',\n",
+ " 'Dehesa de Los Canonigos', 'Miguel Merino', 'Gutierrez de la Vega',\n",
+ " 'Alion', 'Aalto', 'Carmelo Rodero', 'Dominio del Bendito',\n",
+ " \"Mas d'en Gil\", 'Casa Castillo', 'Matarromera', 'Nin-Ortiz',\n",
+ " 'Vinas del Vero', 'Marques de Riscal', 'Arzuaga',\n",
+ " 'Bodegas Mas Alta', 'Dominio de Calogia', 'Tomas Postigo',\n",
+ " 'Cal Pla', 'Ossian', 'Cepa 21', 'Bodegas Vilano', 'Allende',\n",
+ " 'Costers del Siurana', 'Hacienda Monasterio', 'Castillo Perelada',\n",
+ " 'Osborne', 'Ysios', 'Marques de Grinon', 'Contino', 'Gramona',\n",
+ " 'Dominio del Aguila', 'Hacienda Solano', 'Francisco Garcia Perez',\n",
+ " 'Anima Negra', 'Castano', 'La Legua', 'Castillo de Cuzcurrita',\n",
+ " 'Oxer Wines', 'Bodegas Yuste',\n",
+ " 'Bodegas 6o Elemento - Vino Sexto Elemento',\n",
+ " 'Proyecto Garnachas de Espana', 'Casal de Arman', 'Vall Llach',\n",
+ " 'Pago de Larrainzar', 'Ukan Winery', 'Vina Real',\n",
+ " 'Dominio de Atauta', 'Micro Bio (MicroBio)',\n",
+ " 'Terroir Al Limit Soc. Lda', 'Finca Torremilanos',\n",
+ " 'Huerta de Albala', 'Gomez Cruzado', \"Castell d'Encus\",\n",
+ " 'Abadia Retuerta', 'Enate',\n",
+ " 'Benjamin de Rothschild - Vega Sicilia', 'Bodegas Aragonesas',\n",
+ " 'Telmo Rodriguez', 'Acustic Celler', 'Vina Pedrosa', 'Pintia',\n",
+ " 'Belondrade', 'Muga', 'Clos Mogador', 'Bodegas Tradicion',\n",
+ " 'Senorio de San Vicente', 'Francisco Barona', 'Juan Gil', 'Lan',\n",
+ " 'R. Lopez de Heredia', 'Adama Wines', 'Milsetentayseis',\n",
+ " 'Espectacle del Montsant', 'Tinto Pesquera', 'Ferrer Bobet',\n",
+ " 'Familia Torres', 'Pazo Barrantes', 'Tio Pepe', 'Cruz de Alba',\n",
+ " 'Emilio Rojo', 'Dominio de Es', 'Jesus Madrazo',\n",
+ " 'Bodegas Naluar & Acediano', 'Marques de Caceres', 'Trus',\n",
+ " 'Mestres', 'Chivite', 'Bodega Ribas', 'Vinas del Cenit',\n",
+ " 'Mas del Serral', 'Cervoles', 'Diez Merito', 'Marques de Burgos',\n",
+ " 'Penafiel', 'Tobia', 'Tresmano - Tr3smano', 'Sei Solo',\n",
+ " 'Finca Villacreces', 'Virgen de la Asuncion', 'Romate',\n",
+ " 'Sara Perez y Rene Barbier', 'Dehesa del Carrizal', 'Tarsus',\n",
+ " 'Beronia', 'Monte Real', 'Divina Proporcion',\n",
+ " 'Vina al Lado de la Casa', 'Marques de la Concordia',\n",
+ " 'Agusti Torello Mata', 'Dominio de Cair', 'Valquejigoso',\n",
+ " 'Veigamoura', 'Vizcarra', 'Legaris', 'Federico Paternina',\n",
+ " 'Vinicola Real', 'Losada', \"Clos d'Agon\", 'Sacristia AB',\n",
+ " 'Uvaguilera Aguilera', 'Emilio Hidalgo', 'Altos del Terral',\n",
+ " 'El Regajal', 'Valderiz', 'Jose Pariente', 'Amos Baneres',\n",
+ " 'Scala Dei', 'Bodegas Valdelana', 'Abel Mendoza Monge', 'Anayon',\n",
+ " 'Sierra Salinas', 'Vinos Sanz', 'Cerro San Cristobal',\n",
+ " 'Altos de Rioja',\n",
+ " 'Bodegas Celler Francisco Castillo - Clos Dominic',\n",
+ " 'La Vinya del Vuit', 'Venus la Universal', 'Pedralonga', 'Atalaya',\n",
+ " 'Matsu', 'Marques de Vargas', 'Volver', 'Tridente', 'Ausas',\n",
+ " 'Cillar de Silos', 'Imperial', 'Ramos-Paul', 'Bodega de Bardos',\n",
+ " 'Rodriguez Sanzo', 'Antidoto', 'Bosque de Matasnos', 'Altanza',\n",
+ " 'Remelluri', 'Perinet', 'La Vicalanda', 'Monovar', 'Alvaro Domecq',\n",
+ " 'Los Aguilares', 'Bodega Rento', 'Dominio do Bibei',\n",
+ " 'Marco Abella', 'Javier Sanz Viticultor', 'Protos', 'Harveys',\n",
+ " 'Orben', 'Jaro', 'Bodegas Ximenez-Spinola', 'Bodegas El Paraguas',\n",
+ " 'Mustiguillo', 'Vina Pomal', 'Astrales', 'Merum Priorati',\n",
+ " 'Hacienda Lopez de Haro', 'Cayetano del Pino', 'La Unica',\n",
+ " 'Pazo Senorans', 'Fefinanes', 'Resalte', 'Figuero', 'Pinea',\n",
+ " 'Portal del Priorat', 'Cims de Porrera', 'Casa Rojo', 'Murua',\n",
+ " 'Vinyes Domenech', 'Pago de Vallegarcia', 'Eulogio Pomares',\n",
+ " 'Sangenis I Vaque', 'Delgado Zuleta', 'VALENCISO', 'Mas Perinet',\n",
+ " 'Luis A. Rodriguez Vazquez', 'Chozas Carrascal',\n",
+ " 'Alonso del Yerro', 'Clos Figueras', 'Bodegas Monte La Reina',\n",
+ " 'Ontanon', 'Alta Alella', 'Bodega Elias Mora', 'Joan Simo',\n",
+ " 'Can Axartell', 'Juan Pinero', 'Mas Llunes', 'Bodegas Frontaura',\n",
+ " 'Roig Parals', 'Avgvstvs', 'Jorge Ordonez', 'Ramon Bilbao', 'Leda',\n",
+ " 'Enric Soler', 'Es Fangar', 'Cal Batllet - Celler Ripoll Sans',\n",
+ " 'Llopart', 'Portia', 'Ostatu', 'Finca Valpiedra', 'Baigorri',\n",
+ " 'Vivaltus', 'Comenge', 'Collbaix Celler El Moli',\n",
+ " 'Martinez Lacuesta', 'Pujanza', 'Albet i Noya', 'Olivier Riviere',\n",
+ " 'Bodegas Tierra', 'Torre de Ona',\n",
+ " 'Do Ferreiro (Bodegas Gerardo Mendez)', 'Martin Codax',\n",
+ " 'Vinedos Hermanos Hernaiz', 'Freixenet', 'Azpilicueta',\n",
+ " 'Veronica Salgado', 'Clos Galena',\n",
+ " 'Bodegas Imperiales - Abadia de San Quirce', 'Marta Mate',\n",
+ " 'Venta Las Vacas', 'Pago Calzadilla', 'Vivanco', 'Fernandez Gao',\n",
+ " 'Museum', 'Farina', 'Galindo San Millan', 'Bodegas Casa Primicia',\n",
+ " 'Castell Miquel', 'Barco del Corneta', \"Joan d'Anguera\", 'Xaloc',\n",
+ " 'Carlos Moro', 'Bodegas Luis Perez', 'Ladairo',\n",
+ " \"Conreria d'Scala Dei\", 'Traslascuestas', 'Lopez Cristobal',\n",
+ " 'Codorniu', 'Sota els Angels', 'Berceo', 'Mas Blanch I Jove',\n",
+ " 'Carlos Serres', 'David Moreno', 'Edra', 'Valdelosfrailes',\n",
+ " 'Briego', 'Guillem Carol - Cellers Carol Valles', 'Barahonda',\n",
+ " 'Particular', 'Bodegas Frontonio', 'Bodegas Asenjo & Manso',\n",
+ " 'Luis Canas', 'Garmon', 'Williams & Humbert', 'Condado de Haza',\n",
+ " 'Lustau', 'Bodegas Faustino', 'St. Petroni', 'Llanos Negros',\n",
+ " 'Aster', 'Avancia', 'Marques de Tomares', 'Quinta Sardonia',\n",
+ " 'Abadal', 'Hermanos Pecina', 'Biniagual', 'Breca', 'Pradorey',\n",
+ " 'Terras Gauda', 'Bodega Tesalia', 'Pazo de Rubianes', 'Godeval',\n",
+ " 'Bodegas Olarra', 'Taron', 'Sonsierra', 'Vins Miquel Gelabert',\n",
+ " 'El Grillo y La Luna', 'De Moya', 'Campillo', 'Jean Leon',\n",
+ " 'Bodegas Ateca', 'Bodega Campo Eliseo', 'Fernandez de Pierola',\n",
+ " 'De Muller', 'Monteabellon', 'Pepe Mendoza Casa Agricola',\n",
+ " 'Vinos Guerra', 'Neo', \"Terra d'Uro\", 'Zarate', 'Noelia Bebelia',\n",
+ " 'Marti Fabra', 'Palmeri Sicilia', 'Mas Que Vinos',\n",
+ " 'Bodegas Estraunza', 'Pittacum', 'Bodega Sommos',\n",
+ " 'Celler de Capcanes', 'Arinzano', 'Pago del Vostal', 'Habla',\n",
+ " 'Rodriguez de Vera', 'Callejuela', 'El Maestro Sierra',\n",
+ " 'Paco & Lola', 'Valtravieso', 'Costers del Priorat',\n",
+ " 'Bodegas Urium', 'Bodega Otazu', 'Perez Barquero', 'Oller del Mas',\n",
+ " 'Bodegas Clunia', 'Bodegas Ochoa', \"Ca l'Apotecari\",\n",
+ " 'Josep Grau Viticultor', 'Fos', 'Hacienda Grimon', 'Capafons-Osso',\n",
+ " 'Can Rafols dels Caus', 'Raul Perez', 'Dominio de la Vega',\n",
+ " 'Juve & Camps', 'Mas de les Pereres', 'Guilera',\n",
+ " \"l'Infernal Combier Fischer Gerin\", 'Jorge Piernas', 'Balandro',\n",
+ " 'M. Antonio de la Riva', 'Espelt', 'Bodegas Arfe', 'Real de Asua',\n",
+ " 'Vinyes dels Aspres', 'Valserrano', 'Shaya', 'Finca Rio Negro',\n",
+ " 'Loxera', 'Dominio Fournier', 'Bodegas Yzaguirre', 'Neton',\n",
+ " 'Bodega Aida i Luis', 'Cartoixa de Montsalvat',\n",
+ " 'El Lagar de Isilla', 'Anibal de Otero', 'Altos del Enebro',\n",
+ " 'Albamar', 'Can Bas', 'Atlantida', 'Atlan & Artisan', 'Olivardots',\n",
+ " 'Lar de Paula', 'La Val', 'Tres Piedras', 'Javi Revert',\n",
+ " 'Lambuena', 'Attis', 'San Cobate', 'Santa Petronila',\n",
+ " 'Jimenez-Landi', 'Hidalgo (La Gitana)', 'Gallina de Piel', 'Arid',\n",
+ " 'Bodegas Reyes', 'Hugas de Batlle', 'Bodegas Arrocal',\n",
+ " 'Buil & Gine', 'Castell del Remei', 'Valdubon',\n",
+ " 'Bodegas Y Vinedos Alilian', 'Sebio', 'Goyo Garcia Viadero',\n",
+ " 'Santiago Ruiz', 'Mauro Estevez', 'Mas Bertran', 'Bodegas Gama',\n",
+ " 'Edetaria', 'Hacienda Zorita Natural Reserve', 'Ton Rimbau',\n",
+ " 'Can Xanet', 'A Coroa', 'Alonso & Pedrajo', 'Mandia Vell',\n",
+ " 'Bodega Juan Carlos Sancha', 'Valdecuevas', 'Algueira', 'Mocen',\n",
+ " 'Estones', 'Bodega Tameran', 'Inurrieta', 'Flor de Chasna',\n",
+ " 'Lagar de Sabariz', 'Naia', 'AGE', 'Navajas', 'Vinyes del Terrer',\n",
+ " 'Cepas Familiares', 'Dominio de Loalva', 'Zifar',\n",
+ " 'Bordalas Garcia', 'Rimarts', 'Pinna Fidelis', 'Loess',\n",
+ " 'Dominio de Anza', 'Samsara', 'Cuentavinas', 'Felix Solis',\n",
+ " 'El Coto', 'Mar de Frades', 'Montecastro', 'Bodegas La Horra',\n",
+ " 'Baluarte', 'Hispano Suizas', 'Murviedro', 'Diez Caballero',\n",
+ " 'Vegalfaro', 'Palacios Remondo', 'Fernando de Castilla',\n",
+ " 'Bohorquez', 'Adega Familiar Eladio Pineiro', 'Culebron',\n",
+ " 'Enrique Mendoza', 'Clos Pons', 'Ramon do Casar', 'Binigrau'],\n",
+ " dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ],
+ "source": [
+ "df['winery'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "DfNqmkRFge7q",
+ "outputId": "92cf50c0-9c3c-4e9e-e64b-14d585eb2d8d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['Tinto', 'Vina El Pison', 'Unico',\n",
+ " 'Unico Reserva Especial Edicion', 'El Anejon',\n",
+ " 'Don PX Convento Seleccion', 'Cuesta de Las Liebres', 'El Nido',\n",
+ " 'Toneles Moscatel', 'Pingus', 'Don PX Pedro Ximenez',\n",
+ " \"L'Ermita Velles Vinyes Priorat\", 'Vatan Arena Tinta de Toro',\n",
+ " 'Ribera Del Duero Gran Reserva 12 Anos', 'Pesus Ribera del Duero',\n",
+ " 'Magico', 'La Faraona Bierzo (Corullon)', 'Gran Reserva 890',\n",
+ " 'Valbuena 5o', 'Castillo Ygay Gran Reserva Especial Blanco',\n",
+ " 'La Nieta', 'Malleolus de Valderramiro',\n",
+ " 'Malleolus de Sanchomartin', 'Alabaster', 'La Mula de la Quietud',\n",
+ " 'Terreus Paraje de Cueva Baja', 'Contador Rioja',\n",
+ " 'Maria Remirez de Ganuza', 'Cartago Paraje de Pozo',\n",
+ " 'Parcela El Picon Tinto', 'Termanthia', 'Clon De La Familia',\n",
+ " 'Aquilon Garnacha', 'Quinon de Valmira',\n",
+ " '1902 Centenary Carignan Priorat', 'Tintilla de Rota',\n",
+ " 'Cirsion Rioja', 'Cami Pesseroles', \"Turo d'en Mota\", 'Priorat',\n",
+ " 'Reliquia Palo Cortado Sherry', 'Anada Palo Cortado 1987',\n",
+ " 'Daphne Glorian Red', 'El Regollar', 'Abuelo Diego Palo Cortado',\n",
+ " 'La Bota 78 de Oloroso', 'Touran', 'Luthier Gran Reserva',\n",
+ " 'Sorte O Soro Val do Bibei',\n",
+ " 'Reserva Particular de Recaredo Brut Nature', 'Usted',\n",
+ " 'Gran Reserva', 'Regina Vides Ribera del Duero', 'La Loma',\n",
+ " 'Recondita Armonia Monastrell Dulce',\n",
+ " 'Castillo Ygay Gran Reserva Especial Tinto', 'Ribera del Duero',\n",
+ " 'PS (Pagos Seleccionados) Ribera del Duero', 'Flor de Pingus',\n",
+ " 'Pago De Valtarrena', 'El Titan del Bendito', 'Dalmau Rioja',\n",
+ " 'Finca el Bosque', 'Pago de Santa Cruz', 'Clos Fonta Priorat',\n",
+ " 'Cuvee N Vinas Viejas',\n",
+ " 'Ribera del Duero Prestigio Pago de las Solanas',\n",
+ " \"Nit de Nin Mas d'en Cacador\", 'VS', 'Blecua Somontano',\n",
+ " 'Frank Gehry Selection', 'Amaya Arzuaga (Coleccion)',\n",
+ " 'La Creu Alta', '5o Ano Ribera del Duero Tinto', 'Planots Priorat',\n",
+ " 'Capitel', 'Gran Arzuaga Ribera del Duero', 'Horcajo', 'Aurus',\n",
+ " \"Dolc de L'Obac\", 'Trasnocho',\n",
+ " 'Doroteo Edicion Especial 25 Aniversario',\n",
+ " 'Tierra Alta de 2 Racimos Gran Reserva',\n",
+ " 'Reserva Especial Ribera del Duero', 'Reserva Especial',\n",
+ " 'Gran Buig Priorat', 'Finca Garbet',\n",
+ " 'Solera India Oloroso Rare Sherry', 'Amancio',\n",
+ " 'Gines Liebana Pedro Ximenez', 'Don PX Seleccion', 'Reserva',\n",
+ " 'Finca Las Naves', 'AAA', 'Vina del Olivo',\n",
+ " 'Les Aubaguetes Priorat',\n",
+ " 'Cava Enoteca Finca La Plana Brut Nature',\n",
+ " 'Gran Reserva Penas Aladas', 'Pena Lobera',\n",
+ " 'Adega do Moucho Treixadura', 'Son Negre', 'Casa Cisca Monastrell',\n",
+ " 'Moncerbal Bierzo (Corullon)', 'Capricho',\n",
+ " 'Cerrado del Castillo Rioja', 'Kalamity Rioja',\n",
+ " 'Conde de Aldama Amontillado', 'Respeto',\n",
+ " 'El Garnacho Viejo de la Familia Acha',\n",
+ " 'Ribera del Duero Prestigio', 'Finca Misenhora Edicion Limitada',\n",
+ " 'La Poza de Ballesteros', 'Reserva Rioja (Finca Ygay)', 'Idus',\n",
+ " 'Rosado de Larrainzar', 'Valdafoz Bierzo (Corullon)',\n",
+ " 'Rioja Gran Reserva', 'Valdegatiles Ribera del Duero',\n",
+ " 'Respublica Verdejo', 'Les Tosses',\n",
+ " 'Torremilanos Coleccion Ribera del Duero', 'El Carretil',\n",
+ " 'Taberner No. 1', 'Vina Motulleri', 'Saktih', 'Cuvee Palomar',\n",
+ " 'Uno Tinto', 'Macan', '1903 Centenary Grenache',\n",
+ " 'Galiano Seleccion Especial', 'Molino Real',\n",
+ " 'Etern Vinyes Molt Velles', '40 Aniversario Gran Reserva',\n",
+ " 'Solera BC-200',\n",
+ " 'Corpinnat Enoteca Reserva Particular de Recaredo', 'Toro', 'Clio',\n",
+ " 'Garnacha', 'Parcela El Nogal Tinto', 'Belondrade y Lurton',\n",
+ " 'Rioja Reserva', 'Prado Enea Gran Reserva',\n",
+ " 'Pedro Ximenez Tradicion 20 Years Old Vos', 'Torre Muga', 'Aro',\n",
+ " 'San Vicente Rioja', 'Ribera del Duero TSM',\n",
+ " 'Ribera Del Duero Reserva Premium 6 Anos', \"Clos de L'Obac\",\n",
+ " 'Reserva Ribera del Duero', 'Ribera Del Duero', 'Bruto',\n",
+ " 'Culmen Reserva Rioja', 'Vina Tondonia Gran Reserva',\n",
+ " 'Petit Verdot', 'Clos Martinet', 'Millenium Gran Reserva',\n",
+ " 'Finca Dofi', 'Gran Reserva Ribera del Duero',\n",
+ " 'Seleccio Especial Vinyes Velles', 'Calvario Rioja',\n",
+ " 'Ribera del Duero Una Cepa I', 'Grans Muralles',\n",
+ " 'Gran Vino Albarino', 'Cuatro Palmas Amontillado', 'Pago Garduna',\n",
+ " 'La Cueva del Contador Rioja', 'Solera 1830 Pedro Ximenez',\n",
+ " 'Que Bonito Cacareaba Blanco', 'Baron de Chirel Rioja Reserva',\n",
+ " 'Emeritvs (Emeritus)', 'Finca Los Hoyales Ribera del Duero',\n",
+ " 'Blanco', 'La Basseta', 'Territorio Luthier Reserva',\n",
+ " 'Vinas Viejas de Soria Ribera Del Duero',\n",
+ " 'Pago de Santa Cruz Gran Reserva Ribera del Duero',\n",
+ " 'As Sortes Val do Bibei Godello', 'Seleccion Rioja',\n",
+ " 'Millenium Reserva', 'Acediano', 'Gaudium', 'Pico de Luyas',\n",
+ " 'Grandes Anadas Rioja', 'Mas Via Gran Reserva Brut',\n",
+ " 'Navarra Coleccion 125 Blanco', 'Chardonnay Uno',\n",
+ " 'Don PX Vieja Cosecha', 'Ribas de Cabrera', 'Cenit',\n",
+ " 'Mas del Serral', 'Estrats', 'Fino Imperial Amontillado 30 Anos',\n",
+ " 'Coliseo Amontillado VORS', 'Vina Tondonia Gran Reserva Blanco',\n",
+ " '8000', 'Mironia Black Edition Tempranillo', 'Miserere',\n",
+ " 'Alma Tobia Rioja', 'Tm', 'Doix', 'Nebro Ribera del Duero',\n",
+ " 'El Canto del Angel', 'Rioja Blanco Reserva',\n",
+ " 'Cayetano del Pino Viejisimo Palo Cortado',\n",
+ " 'Gratallops Partida Bellvisos', 'Coleccion Privada',\n",
+ " 'Finca El Canto', 'Finca Cascorrales', 'Rioja Gran Reserva 904',\n",
+ " 'Marques de Poley Amontillado Seleccion 1951', 'Platon Tinto',\n",
+ " 'Altos de Las Gateras Monastrell', 'Canta la Perdiz',\n",
+ " 'Marques de la Concordia VSG Premium (formerly Hacienda de Susar Rioja)',\n",
+ " 'Cava Kripta Gran Reserva Brut Nature',\n",
+ " 'Alba de Miros Rueda Verdejo', 'Pendon De La Aguilera', 'V2 Tinto',\n",
+ " 'Celia', 'Calmo', 'Confesor Rioja', 'La Bienquerida',\n",
+ " 'Seleccion Especial', 'Saca Unica', 'Palomero', 'Grano a Grano',\n",
+ " 'Santa Ana Pedro Ximenez 1861', 'Cuvee Julia Ribera del Duero',\n",
+ " 'Las Lamas Bierzo (Corullon)',\n",
+ " '200 Monges Rioja Reserva Seleccion Especial', 'Galia Clos Santuy',\n",
+ " 'Tomas Esteban', 'Finca Las Comas Verdejo', 'Diables',\n",
+ " 'St Antoni de Scala Dei Priorat', 'Valdegines',\n",
+ " 'Seda de Oro Reserva', 'Grano a Grano Graciano - Garnacha',\n",
+ " 'Carraquintana de Amaren', 'Garnacha Parcela 81 Vinas Viejas',\n",
+ " \"Cava Enoteca Finca de L'Origen Brut Nature\", 'Salinas 1237',\n",
+ " 'Cabernet Sauvignon', 'Finca La Colina Sauvignon Blanc',\n",
+ " 'Dominio del Verso Red Blend',\n",
+ " 'Altos R Pigeage Nacido en Barrica Blanco', 'Vinyes Altes Porrera',\n",
+ " 'La Vinya del Vuit (8)', 'Venus de la Figuera', 'Vendetta',\n",
+ " 'Ribera del Duero Reserva', 'Malleolus',\n",
+ " 'Seleccion Especial (Reserva)', 'Cosecha', 'Red',\n",
+ " 'Vina Ardanza Reserva', 'Roda I Reserva Rioja',\n",
+ " '3er Ano Ribera del Duero Tinto', 'Macan Clasico',\n",
+ " 'Alaya Tierra (Old Vines Vieilles Vignes)', 'El Viejo',\n",
+ " 'Hacienda Pradolagar', '200 Monges Rioja Reserva', 'Reserva Tinto',\n",
+ " 'Jumilla Blue Label',\n",
+ " 'Palo Cortado Tradicion Vors 30 Years Sherry', 'Triga',\n",
+ " 'Amontillado Tradicion Vors 30 Years', 'Victorino',\n",
+ " 'Reserva Privada Rioja', 'Pago Negralada', 'Rejon Tempranillo',\n",
+ " 'Interpretacion', 'Vendimia', 'Priorat Vinyes Velles',\n",
+ " 'Tempranillo Reserva', 'Cream Tradicion Vos 20 Years',\n",
+ " 'Vatan Tinta de Toro', 'Dominio del Pidio', 'Pagos Viejos', 'An',\n",
+ " 'Numanthia', 'Suprema 30 Meses',\n",
+ " 'Whisba Tempranillo Aged 18 Months in Whisky Barrels',\n",
+ " 'A Mano Rioja', '200 Monges Rioja Gran Reserva',\n",
+ " 'La Hormiga de Antidoto', 'Le Domaine Blanco de Guarda',\n",
+ " 'Edicion Limitada',\n",
+ " 'Lealtanza Coleccion de Artistas Espanoles Sorolla',\n",
+ " 'Granja Gran Reserva Rioja',\n",
+ " 'Perez Pascuas Gran Reserva Seleccion', 'El Puntido Gran Reserva',\n",
+ " 'Cuvee Old Vines Unfiltered', 'Fondillon', '1730 Pedro Ximenez',\n",
+ " 'Cirerets', 'Vinas Viejas Verdejo', 'Tadeo Tinto',\n",
+ " 'Tierras de Cair Reserva', 'Gran Reserva Rosado',\n",
+ " 'Cava Celler Batlle', 'Rioja Blanco', 'Lapola',\n",
+ " 'Clos Abella Seleccion Especial', 'Pie Franco',\n",
+ " 'Singular V Malcorta Verdejo', 'Serral del Vell Brut Nature',\n",
+ " 'Seleccion Finca el Grajo Viejo',\n",
+ " '30 Years VORS Pedro Ximenez Sherry', 'Malpuesto', 'Chafandin',\n",
+ " 'La Vina de Andres Romeo Rioja', 'Delicado Cosecha Pedro Ximenez',\n",
+ " 'Atlantico', 'Quincha Corral', 'Els Escurcons',\n",
+ " 'Vinos Singulares Tempranillo Blanco Reserva',\n",
+ " 'Christina Ribera del Duero', 'Desti',\n",
+ " 'Gran Reserva Classica Tinto', 'Godello Blanco',\n",
+ " 'Palo Cortado 1/10', 'Fourth Edition',\n",
+ " 'Seleccion de Anada Albarino Rias Baixas',\n",
+ " 'Albarino de Fefinanes III Ano', 'Perpetual', 'El Cel',\n",
+ " 'Montes Obarenes Seleccion Terroir', 'Porrera Vi de Vila',\n",
+ " 'Gran Ribera del Duero', 'Heretge Priorat', 'Vinas Viejas',\n",
+ " 'Sibarita Oloroso Jerez-Xeres-Sherry 30 Years V.O.R.S', '17',\n",
+ " 'Clos del Portal Tros de Clos', 'Classic', 'Minami',\n",
+ " 'Vi de Vila Gratallops', 'Veguin de Murua Gran Reserva',\n",
+ " 'O Luar do Sil Valdeorras Godello Sobre Lias', 'Teixar',\n",
+ " 'Anada Pedro Ximenez', 'Clos del Portal Somni', 'Les Manyes',\n",
+ " 'Emporda Aires de Garbet', 'Montes de Toledo Syrah',\n",
+ " 'Maceracion Con Pieles', 'Clos Monlleo',\n",
+ " 'B.Rodriguez La-Cave Quo Vadis?', 'La Solana Alta',\n",
+ " 'Rioja Reserva 10 Anos Despues Edicion Limitada', 'Perinet + Plus',\n",
+ " 'Vina de Martin Escolma Ribeiro', 'Ribera del Duero Torralvo',\n",
+ " 'El Cabernet F - Familiar', 'Premium', 'Hipperia Red Blend',\n",
+ " 'Maria Ribera del Duero', 'Vendimia Seleccionada', 'Clos Figueres',\n",
+ " 'Castillo de Monte la Reina Cuvee Privee', 'Amontillado Saca',\n",
+ " 'Queiron Reserva Vinedos Familiares', 'Don PX Gran Reserva',\n",
+ " 'Llanos del Almendro Ribera del Duero', 'Dolc Mataro',\n",
+ " 'Toro 2V Premium', 'Les Eres Vinyes Velles',\n",
+ " 'Gran Reserva Classica Blanco', 'The Artist',\n",
+ " 'Amontillado 30 Years Old V.O.R.S Sherry',\n",
+ " 'La Comtesse de Pazo Barrantes Albarino', '5V Blanco',\n",
+ " 'Finca Butaros Vinya Centenaria', 'Seleccion Privada',\n",
+ " \"Aponte Winemaker's Private Collection Tempranillo\",\n",
+ " 'Cami de Cormes', '1194', 'Malabrigo Ribera del Duero',\n",
+ " 'Old & Plus Pedro Ximenez', 'Reserva Trajanvs', 'No. 2 Victoria',\n",
+ " 'Emporda Gran Claustro', 'Lalomba Finca Valhonta', 'Manyetes',\n",
+ " 'Pago Valdebellon', 'Guarda de Leda Seleccion Tempranillo',\n",
+ " 'Nun Vinya dels Taus', 'Lo Cortinelo.Lo Blanc',\n",
+ " '5 Partides Gratallops Vi de La Vila',\n",
+ " 'Cava Ex-Vite Gran Reserva Brut', 'Summa Edicion Limitada',\n",
+ " 'Pago El Espino', 'Gloria Rioja', 'Petra de Valpiedra Garnacha',\n",
+ " 'Alto de la Caseta', 'Mas de la Rosa', 'Rioja B70', 'Don Miguel',\n",
+ " 'Singular Negre', 'Rioja Reserva Especial', 'Rioja Cisma',\n",
+ " 'Martires Rioja', 'Cava Original Brut (Espumos)',\n",
+ " 'Marti Reserva Penedes', 'Vina Ardanza Reserva Seleccion Especial',\n",
+ " 'Oloroso 30 Years Old V.O.R.S', 'Pozo Alto',\n",
+ " 'El Cristo de Samaniego', 'El Belisario',\n",
+ " 'Baron de Ona Rioja Reserva', 'Cepas Vellas Rias Baixas Albarino',\n",
+ " 'La Aguilera', 'Pirata', 'Hispania', 'Arousa',\n",
+ " 'Las Cenizas Tempranillo', 'Solanes Priorat',\n",
+ " 'Cava Cuvee De Prestige Trepat',\n",
+ " 'Felix Azpilicueta Coleccion Privada', 'Savinat Sauvignon Blanc',\n",
+ " 'Capricho Crianza', 'Finca La Emperatriz Vinedo Singular Tinto',\n",
+ " 'Galena', 'Solera PAP Palo Cortado Rare Sherry', 'Finca Helena',\n",
+ " 'Primordium', 'La Cuartilleja Reserva', 'Noble', 'De Garage Rioja',\n",
+ " 'Gran Calzadilla', '4 Varietales Coleccion Rioja', 'Oloroso',\n",
+ " 'Numerus Clausus',\n",
+ " '200 Monges Rioja Seleccion Especial Reserva Blanco',\n",
+ " '30 Years V.O.R.S Very Old Palo Cortado Blend Medium Sherry',\n",
+ " 'Campus Gothorum', 'La Pizca', 'La Bota 90 de Manzanilla Pasada',\n",
+ " 'Moradillo de Roa', 'Cofradia', 'Stairway To Heaven Rosado',\n",
+ " 'La Silleria', 'Planella', 'Gallaecia Rias Baixes Albarino',\n",
+ " 'Monada', 'Verdejo', 'Vina Garugele', 'Barrica', 'Voltons',\n",
+ " 'Ekam Essencia', 'Valcavado de Traslascuestas',\n",
+ " 'Oscar Tobia Gran Reserva Rioja', 'La Baixada', 'Seleccion',\n",
+ " 'Cava El Tros Nou Pinot Noir',\n",
+ " 'Los Dominios de Berceo Prefiloxerico', 'Old & Plus Amontillado',\n",
+ " 'Sao Expressiu', 'Onomastica Rioja Reserva',\n",
+ " 'Vina Arana Gran Reserva', 'Carinena Parcela 15 Vinas Viejas',\n",
+ " 'Vobiscum', 'La Navilla', 'Merlot - Syrah',\n",
+ " 'Casta Diva Cosecha Miel Moscatel Dulce', 'Pago de Las Costanas',\n",
+ " 'Cava Gran Reserva Familiar Millenium Brut',\n",
+ " 'Zona Zepa Monastrell', 'Alma Tobia Blanco',\n",
+ " 'Garnacha Vinas Centenarias',\n",
+ " 'El Jardin de las Iguales Vinedo Historico Garnacha',\n",
+ " 'Silvanus Edicion Limitada Ribera del Duero',\n",
+ " 'San Valentin Parellada', 'Vina Tondonia Reserva', 'PSI',\n",
+ " 'Ribera del Duero Crianza', 'Mas La Plana Cabernet Sauvignon',\n",
+ " 'Crianza Tinto', 'Bosque de Matasnos Etiqueta Blanca',\n",
+ " 'Matusalem Sherry VORS', 'Reserva Seleccion de la Familia',\n",
+ " 'Apostoles 30 Years Old Sherry', 'Gran Reserva Rioja (Finca Ygay)',\n",
+ " 'El Puntido',\n",
+ " 'Don Guido Pedro Ximenez Solera Especial Aged 20 Years',\n",
+ " 'Gran Reserva Rioja', 'La Baraja', 'Finca Malaveina', 'Reserva XR',\n",
+ " 'Alenza Ribera del Duero Gran Reserva',\n",
+ " 'Chardonnay Fermentado en Barrica', 'Alicante Bouschet by Tarima',\n",
+ " 'Oloroso Tradicion Vors 30 Years', 'Dominio de Atauta', 'Crianza',\n",
+ " 'Laurel', 'Seleccion La Aguilera', 'Finca Martelo',\n",
+ " 'Pedro Ximenez Murillo Seleccion del Centenario', 'Mirto',\n",
+ " 'I Gran Reserva', 'Roda Reserva Rioja', 'Rioja Vina Coqueta',\n",
+ " 'Vermello Vermu', 'Capellania Reserva', 'Los Tabaqueros',\n",
+ " 'Torre Albeniz Reserva Ribera del Duero', 'Finca El Otero',\n",
+ " 'Familia Comenge Reserva', 'Tierra Alta de 2 Cotas Reserva',\n",
+ " 'Fagus de Coto de Hayas Garnacha', 'Finca Terrerazo', 'Godello',\n",
+ " 'Godina Garnacha', 'QS', 'Don Miguel Comenge Reserva', '3.9',\n",
+ " 'Finca Iscorta Gran Reserva', 'Finca Biniagual Veran',\n",
+ " 'Brega Garnacha', 'Elite', 'La Mar', 'Nelin Priorat',\n",
+ " 'Arcos de la Frontera', 'Albarino', 'Cepas Vellas Godello',\n",
+ " 'Las Sabias', 'Cerro Anon Rioja Gran Reserva', 'Ekam', 'Masdeu',\n",
+ " 'Pantocrator', 'Senorio de Cuzcurrita Rioja',\n",
+ " 'Silencio de Miros Ribera Del Duero', 'Queiron Mi Lugar',\n",
+ " 'Purgatori Costers del Segre', 'Perfume de Sonsierra',\n",
+ " 'Chardonnay Roure', 'Rioja Graciano', 'ARX Arcos de la Frontera',\n",
+ " 'Navarra Reserva Coleccion 125', 'Venus', 'Grillo',\n",
+ " 'Prieto Picudo', 'Sofia', 'Finca Cuesta Clara Raro Reserva',\n",
+ " 'Fermentado En Barrica Blanco', 'Ribera Del Duero Gran Reserva',\n",
+ " 'Vinya La Scala Gran Reserva Cabernet Sauvignon',\n",
+ " 'Atteca Armas Garnacha Old Vines', 'Syrah (Shiraz)',\n",
+ " 'Campo Eliseo', 'Vinas Viejas Tempranillo', 'Vitium Reserva',\n",
+ " 'Lo Cabalo Reserva', 'Finca Matambres', 'Suzzane Rioja',\n",
+ " 'Giro de Abargues', 'Reserva Vermouth Rojo', 'Punta Esencia',\n",
+ " 'Pla dels Angels Rosado', 'Uro', 'Tras da Vina Albarino',\n",
+ " 'Masia Carreras Negre', 'Branco Vermu', 'Palmeri Navalta Garnacha',\n",
+ " 'La Plazuela', 'Solar de Estraunza Reserva', 'III a.C',\n",
+ " 'Matallana', 'Rioja Norte', 'Artigas', 'Julia',\n",
+ " 'Jerez-Xeres-Sherry 30 Year Old Oloroso VORS', 'Pancrudo', 'Aurea',\n",
+ " 'Coleccion Syrah', \"Flor de Primavera Peraj Ha'abib\",\n",
+ " 'Altos de Losada El Cepon', 'Gran Vino Blanco',\n",
+ " 'Crianza Vendimia Seleccionada', 'Vermouth Reserva',\n",
+ " 'Finca Azaya Premium', 'Ribera del Duero Tinto', 'No. 20',\n",
+ " 'La Casilla Amontillado', 'Palo Cortado', 'Martelo Reserva',\n",
+ " 'Vintage Albarino', 'Cardenal Palo Cortado VORS',\n",
+ " 'Vallegarcia Viognier', 'Finca la Atalaya Reserva',\n",
+ " 'Clos Cypres Vinyes Velles (Old Vines)', 'Castineiro Albarino',\n",
+ " 'Amontillado V.O.R.S', 'Altar',\n",
+ " 'Gran Barquero Palo Cortado Solera 25 Anos',\n",
+ " 'Toro Gran Elias Mora', 'Arnau Oller Seleccio de la Familia',\n",
+ " 'Orbus', 'Malbec', 'Altos de Losada', 'Reserva Especial Rioja',\n",
+ " 'La Time', 'Vendimia Tardia Moscatel', 'Hiru 3 Rioja Racimos',\n",
+ " 'Triaca', 'Amontillado 51-1a 30 Years VORS Sherry', 'Cyclo',\n",
+ " 'Maimo Garnatxa', 'Fos Baranda', 'Finca la Oracion',\n",
+ " 'Albarino Pedralonga', 'Mas de Masos', 'Com Tu', 'El Rocallis',\n",
+ " 'Iugiter', 'Ultreia Valtuille Mencia', 'La Chispa Negra',\n",
+ " 'Sauvignon Blanc', 'Clos Adrien', 'Laderas Rioja',\n",
+ " 'Blanco de Parcela', 'Finca la Beata Bobal', 'Resalte Expresion',\n",
+ " 'Cava Brut Milesime', 'Syrah de Matasnos',\n",
+ " 'Massipa de Scala Dei Garnatxa Blanca - Xenin', 'Nunci Negre',\n",
+ " 'Oloroso Extra Viejo 1/7', 'Musivari Gran Reserva Brut Nature',\n",
+ " 'Faustino de Autor Reserva Especial', 'Vinas Viejas Aguilera',\n",
+ " 'Juan Piernas Finca El Paso Malo Monastrell', 'Pedra de Guix',\n",
+ " 'Vinyes Velles de Samso', 'Malvasia', 'La Bota 69 de Amontillado',\n",
+ " 'Antigues Reserves', 'Baron de Chirel Verdejo Vinas Centenarias',\n",
+ " 'Milagros de Figuero', 'Las Botas Palo Cortado Horizontal',\n",
+ " 'La Riva Fino Balbaina Alta', 'Coma Bruna',\n",
+ " 'Palo Cortado de la Cruz de 1767', 'Grano a Grano Tempranillo',\n",
+ " \"S'Alou\", 'Altos R Pigeage', 'Finca Monteviejo', 'Habis', '5o Ano',\n",
+ " 'Gaminde', 'Varietal Fermentacion Lenta',\n",
+ " 'El Cordero y las Virgenes',\n",
+ " 'Selection 1884 Vermouth Gran Reserva', 'Garnacha Tintorera',\n",
+ " 'Penalba Lopez Blanco', 'Cabernet', 'El Cuentista',\n",
+ " 'Tahulla Moscatel Blanco Seco', 'Juegabolos', 'Sketch',\n",
+ " 'Montsalvat', 'Sio Negre', 'Pago de Otazu Chardonnay Con Crianza',\n",
+ " 'Verdejo Fermentado en Barrica',\n",
+ " 'Vinyes Altes de Les Garrigues Vi Blanc', 'Vina Centenaria Mencia',\n",
+ " 'Cerro del Lobo Syrah', 'Summum Monastrell', 'Finca La Herradura',\n",
+ " 'Alma de Mar Albarino', 'Canasta 20 Anos Cream',\n",
+ " 'Rioja Anadas Frias', 'Monreal', '8 Vents Mallorca',\n",
+ " \"Vd'O 1 Samso - Pissarra\", 'Cava Gran Reserva La Capella Brut',\n",
+ " 'Coleccion No. 3 Cosecha', 'Albarino Sobre Lias', 'Nobbis',\n",
+ " 'Rara Avis Prieto Picudo', 'Sensal', 'Campo Eliseo Blanco',\n",
+ " 'Blanc de Neu', 'T3rno Seleccion de 3 Parcelas Ribera del Duero',\n",
+ " 'Fino La Barajuela', 'Parcelas de Maturana Tinta Coleccion Rioja',\n",
+ " 'Reserva Real', 'Attis Embaixador Albarino', 'Vitola Reserva',\n",
+ " 'San Cucufate Monasterio', 'Tras Los Muros Albarino', 'CM Tinto',\n",
+ " 'La Loma & Los Santos Garnacha Blanca - Macabeo',\n",
+ " 'Gratallops Escanya-Vella', 'Lalomba Finca Ladero',\n",
+ " 'Oloroso En Rama', 'El Reventon Cebreros',\n",
+ " 'Amontillado Napoleon Vinos Viejos 30 Year Old Sherry',\n",
+ " 'Navarra Vendimia Tardia Coleccion 125', 'Manar dos Seixas',\n",
+ " '200 Monges Edicion Exclusiva Blanco Reserva',\n",
+ " 'Eolic Sauvignon Blanc',\n",
+ " 'Finca Valmediano Tinta de Toro 16 Meses Barrica',\n",
+ " 'Cantos del Diablo', 'Teofilo Reyes Crianza', 'Falguera',\n",
+ " 'Epistem No. 3', 'Angel Ribera del Duero', 'Pleret',\n",
+ " 'A Torna dos Pasas Escolma Ribeiro', 'Costers Del Segre 1780',\n",
+ " 'Ines', 'Honoris', 'La Revelia Godello', 'Club Rioja Reserva',\n",
+ " 'Camino del Abuelo Vinas Viejas', 'La Morera de San Lazaro',\n",
+ " 'Quintanilla', 'Salvaxe', 'Vi De Glass Gewurztraminer',\n",
+ " 'Finca Los Quemados', 'Albarino (O Rosal)', 'Rioja',\n",
+ " 'Vino de Autor Ribeiro Blanco',\n",
+ " 'Argila Reserva 3 Anys Brut Nature', 'Octogenarius',\n",
+ " 'Finca La Pedrissa', 'Magister',\n",
+ " 'Ribera del Duero Vendimia Seleccionada VT',\n",
+ " 'Porcellanic VI Xarel-lo Sur Lie', 'Sibila',\n",
+ " 'Emporda Reserva Especial Don Miguel Mateu', '200 Cestos Godello',\n",
+ " 'Heaven & Hell', 'Cabrida Montsant', 'Suane Blanco', 'Pinot Noir',\n",
+ " 'Cerro La Isa Blanco', 'Cuvee Verdejo', 'Anadelia', 'Tabuerniga',\n",
+ " 'Seleccion Especial Verdejo', \"Coster d'en Fornos\",\n",
+ " 'Malvasia Volcanica', 'Laderas de Inurrieta', 'El Velado Tinto',\n",
+ " 'Blanco Afrutado', 'A Pita Cega', 'Naiades',\n",
+ " 'Fuenmayor Gran Reserva', 'Mirador de Navajas Reserva Rioja',\n",
+ " 'Coleccion Roberto Amillo Amontillado', '1730 Amontillado',\n",
+ " 'Cerro La Isa Garnacha', 'Nus del Terrer', 'Nivel', 'Le Rose',\n",
+ " 'Pago de las Sabinas', 'Old & Plus Oloroso',\n",
+ " 'Coleccion Roberto Amillo Palo Cortado', 'Gueta-Lupia',\n",
+ " 'Marmajuelo', 'Cava Reserva Especial Chardonnay Brut Nature',\n",
+ " 'Ribera del Duero Vendimia Seleccionada', 'No. 23',\n",
+ " 'Las Iruelas El Tiemblo', 'V Dulce de Invierno', 'Diego Magana',\n",
+ " 'Graciano', 'Cerro Las Cuevas Seleccion Terroir',\n",
+ " 'Altos R Pigeage Graciano', 'Ronda',\n",
+ " 'Nisia Las Suertes Verdejo Old Vines', 'Territori', 'Alomado',\n",
+ " 'Selma de Nin Blanco', 'Senda de Los Olivos Roble',\n",
+ " 'Mucho Mas Tinto', 'Coto de Imaz Rioja Gran Reserva',\n",
+ " 'Vina Alberdi Reserva', 'Rias Baixas Albarino Atlantico',\n",
+ " '27 Ribera del Duero', 'Terras Gauda O Rosal', 'Salmos',\n",
+ " 'Vina Bosconia Reserva', 'Louro Godello', 'La Atalaya del Camino',\n",
+ " 'Rioja 150 Aniversario', 'Veraton Garnacha',\n",
+ " 'Alexander vs. The Ham Factory', 'Vina Alberdi Crianza',\n",
+ " 'Taberner Tierra de Cadiz', 'Gessami', 'Finest Old Harvest Medium',\n",
+ " 'Ribera del Duero Red', 'Corimbo I', 'Vermut Rojo',\n",
+ " 'Vina Tondonia Reserva Blanco', 'Reserva 5 Ano',\n",
+ " 'Del Duque Amontillado Sherry VORS',\n",
+ " 'Les Terrasses Velles Vinyes Priorat',\n",
+ " 'Les Terrasses Laderas de Pizarra', 'Inici',\n",
+ " '5 Finques (Fincas) Reserva', 'Martinet Bru (Garnatxa - Syrah)',\n",
+ " 'Ribera Del Duero Reserva', 'Muscat',\n",
+ " 'Angeles de Amaren Tempranillo - Graciano', 'Bassus Pinot Noir',\n",
+ " 'Sericis Cepas Viejas Monastrell',\n",
+ " 'Rioja Reserva Fincas de Ganuza', 'Corpinnat Terrers Brut Nature',\n",
+ " 'Dos Cortados Palo Cortado Solera Especial Aged 20 Years',\n",
+ " 'Oscar Tobia Reserva Rioja', 'Rioja Satinela Blanco Semidulce',\n",
+ " 'Clos del Portal Negre de Negres', 'Solideo', 'Preludio',\n",
+ " 'Reserva Tempranillo', 'Coma Vella Priorat', 'Las Ocho (8) Tinto',\n",
+ " 'Pago de los Balagueses Syrah', 'Bagus Ribera del Duero',\n",
+ " 'Formiga de Vellut', 'Tierra Alta En 2 Maderas',\n",
+ " 'Propiedad Rioja', 'Reserva Especial Icon Edition',\n",
+ " 'Antique Palo Cortado', 'Vina Lanciano Reserva',\n",
+ " 'Jalifa Amontillado Rare Old Dry Solera Especial Aged 30 Years',\n",
+ " 'La Felisa', 'Milmanda', 'Envidiacochina (Tete de Cuvee)', 'Adaro',\n",
+ " 'Fondillon Gran Reserva', 'Monastrell - Syrah Finca Espolla',\n",
+ " 'Santa Rosa', 'Roc Nu', 'Rias Baixas Albarino Finca Valinas',\n",
+ " 'Treixadura', 'Candela Cream Dulce Sweet', 'Nounat',\n",
+ " 'Les Brugueres'], dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 20
+ }
+ ],
+ "source": [
+ "df['wine'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YMK8ok3Bgeh4",
+ "outputId": "4cce2cdd-a6ef-4575-a424-240b5341e14d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['Toro', 'Vino de Espana', 'Ribera del Duero', 'Montilla-Moriles',\n",
+ " 'Jumilla', 'Jerez-Xeres-Sherry', 'Priorato', 'Rioja', 'Bierzo',\n",
+ " 'Castilla y Leon', 'Campo de Borja', 'Cadiz', 'Cava',\n",
+ " 'Jerez Palo Cortado', 'Manzanilla', 'Valdeorras', 'Emporda',\n",
+ " 'Rioja Alta', 'Alicante', 'Somontano', 'Dominio de Valdepusa',\n",
+ " 'Ribeiro', 'Mallorca', 'Yecla', 'Cigales', 'Utiel-Requena',\n",
+ " 'Navarra', 'Rioja Alavesa', 'Costers del Segre', 'Sardon de Duero',\n",
+ " 'Malaga', 'Cataluna', 'Rueda', 'Jerez Pedro Ximenes (PX)',\n",
+ " 'Montsant', 'Conca de Barbera', 'Rias Baixas', 'Jerez Amontillado',\n",
+ " 'Tierra del Vino de Zamora', 'Penedes', 'Dehesa del Carrizal',\n",
+ " 'Madrid', 'Carinena', 'Condado de Huelva', 'Almansa',\n",
+ " 'Jerez Cream', 'Valencia', 'Sierras de Malaga', 'Ribeira Sacra',\n",
+ " 'El Terrerazo', 'Jerez Oloroso', 'Castilla', 'Alella',\n",
+ " 'Pla i Llevant', 'Pla de Bages', 'Pago Calzadilla', 'Andalucia',\n",
+ " 'Monterrei', 'Ribera del Gallego-Cinco Villas', 'Aragon',\n",
+ " 'Galicia', 'La Palma', 'Binissalem-Mallorca', 'Calatayud',\n",
+ " 'Arinzano', 'Tarragona', 'Extremadura', 'Murcia', 'La Mancha',\n",
+ " 'Otazu', 'Valdejalon', 'Mentrida', 'Terra Alta', 'Arribes',\n",
+ " 'Gran Canaria', 'Abona'], dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ],
+ "source": [
+ "df['region'].unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "id": "66U3nMN3fz4H"
+ },
+ "outputs": [],
+ "source": [
+ "df = df.drop(columns = ['country'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "id": "0noqAOqeg0eq"
+ },
+ "outputs": [],
+ "source": [
+ "df['type'] = df['type'].fillna('')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "id": "tFqh3Gi9f8fx"
+ },
+ "outputs": [],
+ "source": [
+ "df['type'] = df['type'].replace('', 'N/A')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "eMENyaIEgcF_",
+ "outputId": "a08bf838-0dd6-4063-de52-6798b7876404"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "winery 0\n",
+ "wine 0\n",
+ "year 0\n",
+ "rating 0\n",
+ "num_reviews 0\n",
+ "region 0\n",
+ "price 0\n",
+ "type 0\n",
+ "body 0\n",
+ "acidity 0\n",
+ "dtype: int64"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 25
+ }
+ ],
+ "source": [
+ "df.isnull().sum()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df.describe()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "lXqymEDi2QEX",
+ "outputId": "3ea3143e-1ca6-42d0-a474-8c6725f37c27"
+ },
+ "execution_count": 26,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " year rating num_reviews price body \\\n",
+ "count 7500.000000 7500.000000 7500.000000 7500.000000 7500.000000 \n",
+ "mean 10.446000 4.254933 451.109067 60.095822 4.133733 \n",
+ "std 6.811104 0.118029 723.001856 150.356676 0.539031 \n",
+ "min 3.000000 4.200000 25.000000 4.990000 2.000000 \n",
+ "25% 7.000000 4.200000 389.000000 18.900000 4.000000 \n",
+ "50% 9.000000 4.200000 404.000000 28.530000 4.000000 \n",
+ "75% 13.000000 4.200000 415.000000 51.350000 4.000000 \n",
+ "max 114.000000 4.900000 32624.000000 3119.080000 5.000000 \n",
+ "\n",
+ " acidity \n",
+ "count 7500.000000 \n",
+ "mean 2.954933 \n",
+ "std 0.228858 \n",
+ "min 1.000000 \n",
+ "25% 3.000000 \n",
+ "50% 3.000000 \n",
+ "75% 3.000000 \n",
+ "max 3.000000 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " rating | \n",
+ " num_reviews | \n",
+ " price | \n",
+ " body | \n",
+ " acidity | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 7500.000000 | \n",
+ " 7500.000000 | \n",
+ " 7500.000000 | \n",
+ " 7500.000000 | \n",
+ " 7500.000000 | \n",
+ " 7500.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 10.446000 | \n",
+ " 4.254933 | \n",
+ " 451.109067 | \n",
+ " 60.095822 | \n",
+ " 4.133733 | \n",
+ " 2.954933 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 6.811104 | \n",
+ " 0.118029 | \n",
+ " 723.001856 | \n",
+ " 150.356676 | \n",
+ " 0.539031 | \n",
+ " 0.228858 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 3.000000 | \n",
+ " 4.200000 | \n",
+ " 25.000000 | \n",
+ " 4.990000 | \n",
+ " 2.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 7.000000 | \n",
+ " 4.200000 | \n",
+ " 389.000000 | \n",
+ " 18.900000 | \n",
+ " 4.000000 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 9.000000 | \n",
+ " 4.200000 | \n",
+ " 404.000000 | \n",
+ " 28.530000 | \n",
+ " 4.000000 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 13.000000 | \n",
+ " 4.200000 | \n",
+ " 415.000000 | \n",
+ " 51.350000 | \n",
+ " 4.000000 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 114.000000 | \n",
+ " 4.900000 | \n",
+ " 32624.000000 | \n",
+ " 3119.080000 | \n",
+ " 5.000000 | \n",
+ " 3.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 8,\n \"fields\": [\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2643.6654972653555,\n \"min\": 3.0,\n \"max\": 7500.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 10.446,\n 9.0,\n 7500.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2650.3339657222677,\n \"min\": 0.1180290345746442,\n \"max\": 7500.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 4.254933333333334,\n 4.9,\n 0.1180290345746442\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_reviews\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11311.669508789137,\n \"min\": 25.0,\n \"max\": 32624.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 451.10906666666665,\n 404.0,\n 7500.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2700.9673832556828,\n \"min\": 4.99,\n \"max\": 7500.0,\n \"num_unique_values\": 8,\n \"samples\": [\n 60.09582187159273,\n 28.53,\n 7500.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2650.45516025624,\n \"min\": 0.539030737627305,\n \"max\": 7500.0,\n \"num_unique_values\": 6,\n \"samples\": [\n 7500.0,\n 4.133733333333334,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2650.8332500202478,\n \"min\": 0.22885798876033325,\n \"max\": 7500.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 2.9549333333333334,\n 3.0,\n 0.22885798876033325\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 26
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# LABEL ENCODING"
+ ],
+ "metadata": {
+ "id": "qfLm1ueRgBUJ"
+ }
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "id": "CKZbd9zdhMxS"
+ },
+ "outputs": [],
+ "source": [
+ "label_encoder = LabelEncoder()\n",
+ "df['winery_encoded'] = label_encoder.fit_transform(df['winery'])\n",
+ "df['type_encoded'] = label_encoder.fit_transform(df['type'])\n",
+ "df['region_encoded'] = label_encoder.fit_transform(df['region'])\n",
+ "df['wine_encoded'] = label_encoder.fit_transform(df['wine'])\n",
+ "df.drop(columns=['winery', 'type', 'region','wine',], inplace=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "a-_TVuEwhtbh",
+ "outputId": "b18ba086-a53f-4309-a5e9-00830028149a"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " year rating num_reviews price body acidity winery_encoded \\\n",
+ "0 11.0 4.9 58 995.00 5.0 3.0 422 \n",
+ "1 6.0 4.9 31 313.50 4.0 2.0 33 \n",
+ "2 15.0 4.8 1793 324.95 5.0 3.0 447 \n",
+ "3 25.0 4.8 1705 692.96 5.0 3.0 447 \n",
+ "4 28.0 4.8 1309 778.06 5.0 3.0 447 \n",
+ "\n",
+ " type_encoded region_encoded wine_encoded \n",
+ "0 20 69 759 \n",
+ "1 19 74 819 \n",
+ "2 12 57 778 \n",
+ "3 12 57 778 \n",
+ "4 12 57 778 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " rating | \n",
+ " num_reviews | \n",
+ " price | \n",
+ " body | \n",
+ " acidity | \n",
+ " winery_encoded | \n",
+ " type_encoded | \n",
+ " region_encoded | \n",
+ " wine_encoded | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 11.0 | \n",
+ " 4.9 | \n",
+ " 58 | \n",
+ " 995.00 | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ " 422 | \n",
+ " 20 | \n",
+ " 69 | \n",
+ " 759 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 6.0 | \n",
+ " 4.9 | \n",
+ " 31 | \n",
+ " 313.50 | \n",
+ " 4.0 | \n",
+ " 2.0 | \n",
+ " 33 | \n",
+ " 19 | \n",
+ " 74 | \n",
+ " 819 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 15.0 | \n",
+ " 4.8 | \n",
+ " 1793 | \n",
+ " 324.95 | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ " 447 | \n",
+ " 12 | \n",
+ " 57 | \n",
+ " 778 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 25.0 | \n",
+ " 4.8 | \n",
+ " 1705 | \n",
+ " 692.96 | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ " 447 | \n",
+ " 12 | \n",
+ " 57 | \n",
+ " 778 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 28.0 | \n",
+ " 4.8 | \n",
+ " 1309 | \n",
+ " 778.06 | \n",
+ " 5.0 | \n",
+ " 3.0 | \n",
+ " 447 | \n",
+ " 12 | \n",
+ " 57 | \n",
+ " 778 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 7500,\n \"fields\": [\n {\n \"column\": \"year\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 6.811103886411282,\n \"min\": 3.0,\n \"max\": 114.0,\n \"num_unique_values\": 70,\n \"samples\": [\n 95.0,\n 11.0,\n 82.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"rating\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.1180290345746442,\n \"min\": 4.2,\n \"max\": 4.9,\n \"num_unique_values\": 8,\n \"samples\": [\n 4.8,\n 4.4,\n 4.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"num_reviews\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 723,\n \"min\": 25,\n \"max\": 32624,\n \"num_unique_values\": 817,\n \"samples\": [\n 115,\n 177,\n 2144\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 150.35667645268242,\n \"min\": 4.99,\n \"max\": 3119.08,\n \"num_unique_values\": 1292,\n \"samples\": [\n 168.0,\n 190.0,\n 28.7315367295517\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.539030737627305,\n \"min\": 2.0,\n \"max\": 5.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 4.0,\n 2.0,\n 5.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"acidity\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22885798876033325,\n \"min\": 1.0,\n \"max\": 3.0,\n \"num_unique_values\": 3,\n \"samples\": [\n 3.0,\n 2.0,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"winery_encoded\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 128,\n \"min\": 0,\n \"max\": 479,\n \"num_unique_values\": 480,\n \"samples\": [\n 225,\n 257,\n 108\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"type_encoded\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 0,\n \"max\": 21,\n \"num_unique_values\": 22,\n \"samples\": [\n 20,\n 18,\n 14\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"region_encoded\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 17,\n \"min\": 0,\n \"max\": 75,\n \"num_unique_values\": 76,\n \"samples\": [\n 35,\n 19,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"wine_encoded\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 231,\n \"min\": 0,\n \"max\": 846,\n \"num_unique_values\": 847,\n \"samples\": [\n 139,\n 745,\n 557\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 28
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "# Removing Outliners by the help of Z value"
+ ],
+ "metadata": {
+ "id": "LFCKvvDdWLfC"
+ }
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['price']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "bPek4cr8TbXn",
+ "outputId": "1a4d0386-7b8c-4b99-c2d3-00f8bd3df8b7"
+ },
+ "execution_count": 29,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 6.218324\n",
+ "1 1.685466\n",
+ "2 1.761623\n",
+ "3 4.209367\n",
+ "4 4.775392\n",
+ " ... \n",
+ "7495 0.266822\n",
+ "7496 0.288239\n",
+ "7497 0.237091\n",
+ "7498 0.029293\n",
+ "7499 0.189335\n",
+ "Name: price, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['year']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "xBalM3p8UVY6",
+ "outputId": "ead29177-b7c0-46b7-e9f9-3582ef6e9906"
+ },
+ "execution_count": 30,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 0.081343\n",
+ "1 0.652801\n",
+ "2 0.668659\n",
+ "3 2.136947\n",
+ "4 2.577434\n",
+ " ... \n",
+ "7495 0.359143\n",
+ "7496 0.652801\n",
+ "7497 0.505972\n",
+ "7498 0.375001\n",
+ "7499 0.359143\n",
+ "Name: year, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['rating']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "c-vJdnSCUXOi",
+ "outputId": "05a26471-fcfe-4297-d38e-6149fd3c840d"
+ },
+ "execution_count": 31,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 5.465686\n",
+ "1 5.465686\n",
+ "2 4.618381\n",
+ "3 4.618381\n",
+ "4 4.618381\n",
+ " ... \n",
+ "7495 0.465453\n",
+ "7496 0.465453\n",
+ "7497 0.465453\n",
+ "7498 0.465453\n",
+ "7499 0.465453\n",
+ "Name: rating, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['num_reviews']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UBal4zyDUXsy",
+ "outputId": "205ab315-64cf-43dc-d4bd-e2fb1c891b42"
+ },
+ "execution_count": 32,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 0.543754\n",
+ "1 0.581101\n",
+ "2 1.856123\n",
+ "3 1.734400\n",
+ "4 1.186647\n",
+ " ... \n",
+ "7495 0.081761\n",
+ "7496 0.084527\n",
+ "7497 0.084527\n",
+ "7498 0.085910\n",
+ "7499 0.087293\n",
+ "Name: num_reviews, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['body']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "TO7EnYbIUYJ6",
+ "outputId": "d31223f7-5a26-4e76-f82d-6f802fc61320"
+ },
+ "execution_count": 33,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 1.607189\n",
+ "1 0.248116\n",
+ "2 1.607189\n",
+ "3 1.607189\n",
+ "4 1.607189\n",
+ " ... \n",
+ "7495 0.248116\n",
+ "7496 0.248116\n",
+ "7497 0.248116\n",
+ "7498 1.607189\n",
+ "7499 1.607189\n",
+ "Name: body, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['acidity']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "86gBSnkmUp4y",
+ "outputId": "5f6f8ec0-a0c8-4981-a6ea-7f3a743e5b01"
+ },
+ "execution_count": 34,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 0.196933\n",
+ "1 4.172880\n",
+ "2 0.196933\n",
+ "3 0.196933\n",
+ "4 0.196933\n",
+ " ... \n",
+ "7495 0.196933\n",
+ "7496 0.196933\n",
+ "7497 0.196933\n",
+ "7498 0.196933\n",
+ "7499 0.196933\n",
+ "Name: acidity, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['winery_encoded']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "UTgxEzKIUtqR",
+ "outputId": "fd3f2ee0-d358-403d-cbf4-2dd57f0f5fb2"
+ },
+ "execution_count": 35,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 1.273008\n",
+ "1 1.753308\n",
+ "2 1.467501\n",
+ "3 1.467501\n",
+ "4 1.467501\n",
+ " ... \n",
+ "7495 0.773062\n",
+ "7496 0.780841\n",
+ "7497 0.588392\n",
+ "7498 0.448356\n",
+ "7499 1.148532\n",
+ "Name: winery_encoded, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['type_encoded']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "18tXd6zZUuHS",
+ "outputId": "2576526b-cb83-4f24-9e34-2d3a249cdb19"
+ },
+ "execution_count": 36,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 2.130557\n",
+ "1 1.871974\n",
+ "2 0.061888\n",
+ "3 0.061888\n",
+ "4 0.061888\n",
+ " ... \n",
+ "7495 0.320471\n",
+ "7496 0.455280\n",
+ "7497 0.196696\n",
+ "7498 0.061888\n",
+ "7499 0.061888\n",
+ "Name: type_encoded, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['region_encoded']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "0vVSEP4yUug6",
+ "outputId": "c9984f13-6459-4397-9cf0-8e9829b9918c"
+ },
+ "execution_count": 37,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 1.086370\n",
+ "1 1.375263\n",
+ "2 0.393026\n",
+ "3 0.393026\n",
+ "4 0.393026\n",
+ " ... \n",
+ "7495 0.508583\n",
+ "7496 0.161911\n",
+ "7497 1.513670\n",
+ "7498 0.393026\n",
+ "7499 0.393026\n",
+ "Name: region_encoded, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "z = np.abs(stats.zscore(df['wine_encoded']))\n",
+ "print(z)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "cXpBwuNmU9fq",
+ "outputId": "40cf42b0-fae5-41ab-a9ae-49d43b73e673"
+ },
+ "execution_count": 38,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "0 1.257374\n",
+ "1 1.516556\n",
+ "2 1.339448\n",
+ "3 1.339448\n",
+ "4 1.339448\n",
+ " ... \n",
+ "7495 0.652618\n",
+ "7496 0.120607\n",
+ "7497 0.716724\n",
+ "7498 0.543936\n",
+ "7499 0.553265\n",
+ "Name: wine_encoded, Length: 7500, dtype: float64\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "threshold_z = 2\n",
+ "\n",
+ "outlier_indices = []\n",
+ "\n",
+ "for column in df.columns:\n",
+ "\n",
+ " z = np.abs((df[column] - df[column].mean()) / df[column].std())\n",
+ "\n",
+ "\n",
+ " column_outliers = np.where(z > threshold_z)[0]\n",
+ "\n",
+ "\n",
+ " outlier_indices.extend(column_outliers)\n",
+ "\n",
+ "\n",
+ "outlier_indices = np.unique(outlier_indices)\n",
+ "\n",
+ "\n",
+ "no_outliers = df.drop(outlier_indices)\n",
+ "\n",
+ "\n",
+ "print(\"Original DataFrame Shape:\", df.shape)\n",
+ "print(\"DataFrame Shape after Removing Outliers:\", no_outliers.shape)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "YNHMDzD7VMaq",
+ "outputId": "94607d73-31f4-41f9-e50e-18e8e055748e"
+ },
+ "execution_count": 42,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Original DataFrame Shape: (7500, 10)\n",
+ "DataFrame Shape after Removing Outliers: (5247, 10)\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "4yO6D9SlgqFG"
+ },
+ "outputs": [],
+ "source": [
+ "Scaler = StandardScaler()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "lfTsMVoghIbE"
+ },
+ "outputs": [],
+ "source": [
+ "X = df.drop(columns=['price'], axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "pyS5LgY7jKbx"
+ },
+ "outputs": [],
+ "source": [
+ "Y = df['price']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "8OcbFsWljTpg",
+ "outputId": "3e79a252-07f8-48d8-ddab-5cc6fe07baed"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "\n",
+ "RangeIndex: 7500 entries, 0 to 7499\n",
+ "Data columns (total 10 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 year 7500 non-null float64\n",
+ " 1 rating 7500 non-null float64\n",
+ " 2 num_reviews 7500 non-null int64 \n",
+ " 3 price 7500 non-null float64\n",
+ " 4 body 7500 non-null float64\n",
+ " 5 acidity 7500 non-null float64\n",
+ " 6 winery_encoded 7500 non-null int64 \n",
+ " 7 type_encoded 7500 non-null int64 \n",
+ " 8 region_encoded 7500 non-null int64 \n",
+ " 9 wine_encoded 7500 non-null int64 \n",
+ "dtypes: float64(5), int64(5)\n",
+ "memory usage: 586.1 KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "df.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4ZvAlvK9jX13",
+ "outputId": "597070e8-c439-4f2f-b4c5-190579606488"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[[ 6.21832388]\n",
+ " [ 1.68546605]\n",
+ " [ 1.76162338]\n",
+ " ...\n",
+ " [-0.23709089]\n",
+ " [ 0.02929349]\n",
+ " [-0.18933459]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "standarized_type_price = Scaler.fit_transform(df['price'].values.reshape(-1, 1))\n",
+ "print(standarized_type_price)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ywg5MAXc4Iec",
+ "outputId": "e1150ff4-1a5d-44c6-a9ea-073b96151fab"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "[[ 0.08134319 5.46568628 -0.54375417 ... 2.13055749 1.08636959\n",
+ " 1.25737416]\n",
+ " [-0.65280115 5.46568628 -0.58110097 ... 1.87197376 1.37526284\n",
+ " 1.51655555]\n",
+ " [ 0.66865866 4.61838064 1.85612304 ... 0.0618877 0.39302579\n",
+ " 1.33944827]\n",
+ " ...\n",
+ " [-0.50597228 -0.46545323 -0.08452695 ... -0.19669602 -1.51366967\n",
+ " -0.71672409]\n",
+ " [ 0.37500093 -0.46545323 -0.08591016 ... 0.0618877 0.39302579\n",
+ " -0.54393649]\n",
+ " [-0.35914341 -0.46545323 -0.08729338 ... 0.0618877 0.39302579\n",
+ " 0.55326472]]\n"
+ ]
+ }
+ ],
+ "source": [
+ "standardize_X = Scaler.fit_transform(X)\n",
+ "print(standardize_X)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "OkgImhYGjzI6"
+ },
+ "outputs": [],
+ "source": [
+ "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IuDY1YtUk4UJ",
+ "outputId": "6c9fbbfe-b4ef-41a3-888d-5ca7251c7692"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "(7500, 9) (6000, 9) (1500, 9)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(X.shape, X_train.shape, X_test.shape)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "LOkDlNrbEMDj"
+ },
+ "source": [
+ "# CORRELATION MATRIX"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "m38bmTLzoQPB"
+ },
+ "outputs": [],
+ "source": [
+ "correlation = df.corr()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 912
+ },
+ "id": "yrx5c5ASk4sU",
+ "outputId": "6c8b7943-d183-44a3-aaf9-f99ebdd9597d"
+ },
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "text/plain": [
+ "