From 00a46856abfd43cdb842310dc9b02bfc1ea6e6bd Mon Sep 17 00:00:00 2001
From: Diwakar Gupta <39624018+Diwakar-Gupta@users.noreply.github.com>
Date: Sun, 14 Aug 2022 14:38:11 +0530
Subject: [PATCH] NLP intro

---
 22-08-14-NLP/NLP_Intro_April.ipynb | 1681 ++++++++++++++++++++++++++++
 1 file changed, 1681 insertions(+)
 create mode 100644 22-08-14-NLP/NLP_Intro_April.ipynb
diff --git a/22-08-14-NLP/NLP_Intro_April.ipynb b/22-08-14-NLP/NLP_Intro_April.ipynb
new file mode 100644
index 0000000..5428303
--- /dev/null
+++ b/22-08-14-NLP/NLP_Intro_April.ipynb
@@ -0,0 +1,1681 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "NLP_Intro_April.ipynb",
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/Diwakar-Gupta/Data-Science-APRIL/blob/main/22-08-14-NLP/NLP_Intro_April.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "EtjVlIDUCt7C"
+      },
+      "outputs": [],
+      "source": [
+        "import nltk ## Natural Language ToolKit\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "nltk.download('all')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PpONcgZdD20G",
+        "outputId": "86abe33a-f529-4ca2-f371-fff051d57dfc"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "[nltk_data] Downloading collection 'all'\n",
+            "[nltk_data]    | \n",
+            "[nltk_data]    | Downloading package abc to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/abc.zip.\n",
+            "[nltk_data]    | Downloading package alpino to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/alpino.zip.\n",
+            "[nltk_data]    | Downloading package averaged_perceptron_tagger to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping taggers/averaged_perceptron_tagger.zip.\n",
+            "[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping\n",
+            "[nltk_data]    |       taggers/averaged_perceptron_tagger_ru.zip.\n",
+            "[nltk_data]    | Downloading package basque_grammars to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping grammars/basque_grammars.zip.\n",
+            "[nltk_data]    | Downloading package biocreative_ppi to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/biocreative_ppi.zip.\n",
+            "[nltk_data]    | Downloading package bllip_wsj_no_aux to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping models/bllip_wsj_no_aux.zip.\n",
+            "[nltk_data]    | Downloading package book_grammars to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping grammars/book_grammars.zip.\n",
+            "[nltk_data]    | Downloading package brown to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/brown.zip.\n",
+            "[nltk_data]    | Downloading package brown_tei to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/brown_tei.zip.\n",
+            "[nltk_data]    | Downloading package cess_cat to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/cess_cat.zip.\n",
+            "[nltk_data]    | Downloading package cess_esp to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/cess_esp.zip.\n",
+            "[nltk_data]    | Downloading package chat80 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/chat80.zip.\n",
+            "[nltk_data]    | Downloading package city_database to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/city_database.zip.\n",
+            "[nltk_data]    | Downloading package cmudict to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/cmudict.zip.\n",
+            "[nltk_data]    | Downloading package comparative_sentences to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/comparative_sentences.zip.\n",
+            "[nltk_data]    | Downloading package comtrans to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package conll2000 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/conll2000.zip.\n",
+            "[nltk_data]    | Downloading package conll2002 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/conll2002.zip.\n",
+            "[nltk_data]    | Downloading package conll2007 to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package crubadan to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/crubadan.zip.\n",
+            "[nltk_data]    | Downloading package dependency_treebank to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/dependency_treebank.zip.\n",
+            "[nltk_data]    | Downloading package dolch to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/dolch.zip.\n",
+            "[nltk_data]    | Downloading package europarl_raw to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/europarl_raw.zip.\n",
+            "[nltk_data]    | Downloading package extended_omw to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package floresta to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/floresta.zip.\n",
+            "[nltk_data]    | Downloading package framenet_v15 to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/framenet_v15.zip.\n",
+            "[nltk_data]    | Downloading package framenet_v17 to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/framenet_v17.zip.\n",
+            "[nltk_data]    | Downloading package gazetteers to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/gazetteers.zip.\n",
+            "[nltk_data]    | Downloading package genesis to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/genesis.zip.\n",
+            "[nltk_data]    | Downloading package gutenberg to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/gutenberg.zip.\n",
+            "[nltk_data]    | Downloading package ieer to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/ieer.zip.\n",
+            "[nltk_data]    | Downloading package inaugural to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/inaugural.zip.\n",
+            "[nltk_data]    | Downloading package indian to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/indian.zip.\n",
+            "[nltk_data]    | Downloading package jeita to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package kimmo to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/kimmo.zip.\n",
+            "[nltk_data]    | Downloading package knbc to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package large_grammars to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping grammars/large_grammars.zip.\n",
+            "[nltk_data]    | Downloading package lin_thesaurus to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/lin_thesaurus.zip.\n",
+            "[nltk_data]    | Downloading package mac_morpho to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/mac_morpho.zip.\n",
+            "[nltk_data]    | Downloading package machado to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package masc_tagged to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package maxent_ne_chunker to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping chunkers/maxent_ne_chunker.zip.\n",
+            "[nltk_data]    | Downloading package maxent_treebank_pos_tagger to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping taggers/maxent_treebank_pos_tagger.zip.\n",
+            "[nltk_data]    | Downloading package moses_sample to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping models/moses_sample.zip.\n",
+            "[nltk_data]    | Downloading package movie_reviews to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/movie_reviews.zip.\n",
+            "[nltk_data]    | Downloading package mte_teip5 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/mte_teip5.zip.\n",
+            "[nltk_data]    | Downloading package mwa_ppdb to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping misc/mwa_ppdb.zip.\n",
+            "[nltk_data]    | Downloading package names to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/names.zip.\n",
+            "[nltk_data]    | Downloading package nombank.1.0 to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package nonbreaking_prefixes to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/nonbreaking_prefixes.zip.\n",
+            "[nltk_data]    | Downloading package nps_chat to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/nps_chat.zip.\n",
+            "[nltk_data]    | Downloading package omw to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package omw-1.4 to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package opinion_lexicon to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/opinion_lexicon.zip.\n",
+            "[nltk_data]    | Downloading package panlex_swadesh to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package paradigms to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/paradigms.zip.\n",
+            "[nltk_data]    | Downloading package pe08 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/pe08.zip.\n",
+            "[nltk_data]    | Downloading package perluniprops to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping misc/perluniprops.zip.\n",
+            "[nltk_data]    | Downloading package pil to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/pil.zip.\n",
+            "[nltk_data]    | Downloading package pl196x to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/pl196x.zip.\n",
+            "[nltk_data]    | Downloading package porter_test to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping stemmers/porter_test.zip.\n",
+            "[nltk_data]    | Downloading package ppattach to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/ppattach.zip.\n",
+            "[nltk_data]    | Downloading package problem_reports to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/problem_reports.zip.\n",
+            "[nltk_data]    | Downloading package product_reviews_1 to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/product_reviews_1.zip.\n",
+            "[nltk_data]    | Downloading package product_reviews_2 to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/product_reviews_2.zip.\n",
+            "[nltk_data]    | Downloading package propbank to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package pros_cons to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/pros_cons.zip.\n",
+            "[nltk_data]    | Downloading package ptb to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/ptb.zip.\n",
+            "[nltk_data]    | Downloading package punkt to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping tokenizers/punkt.zip.\n",
+            "[nltk_data]    | Downloading package qc to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/qc.zip.\n",
+            "[nltk_data]    | Downloading package reuters to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package rslp to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping stemmers/rslp.zip.\n",
+            "[nltk_data]    | Downloading package rte to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/rte.zip.\n",
+            "[nltk_data]    | Downloading package sample_grammars to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping grammars/sample_grammars.zip.\n",
+            "[nltk_data]    | Downloading package semcor to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package senseval to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/senseval.zip.\n",
+            "[nltk_data]    | Downloading package sentence_polarity to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/sentence_polarity.zip.\n",
+            "[nltk_data]    | Downloading package sentiwordnet to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/sentiwordnet.zip.\n",
+            "[nltk_data]    | Downloading package shakespeare to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/shakespeare.zip.\n",
+            "[nltk_data]    | Downloading package sinica_treebank to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/sinica_treebank.zip.\n",
+            "[nltk_data]    | Downloading package smultron to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/smultron.zip.\n",
+            "[nltk_data]    | Downloading package snowball_data to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package spanish_grammars to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping grammars/spanish_grammars.zip.\n",
+            "[nltk_data]    | Downloading package state_union to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/state_union.zip.\n",
+            "[nltk_data]    | Downloading package stopwords to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/stopwords.zip.\n",
+            "[nltk_data]    | Downloading package subjectivity to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/subjectivity.zip.\n",
+            "[nltk_data]    | Downloading package swadesh to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/swadesh.zip.\n",
+            "[nltk_data]    | Downloading package switchboard to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/switchboard.zip.\n",
+            "[nltk_data]    | Downloading package tagsets to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping help/tagsets.zip.\n",
+            "[nltk_data]    | Downloading package timit to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/timit.zip.\n",
+            "[nltk_data]    | Downloading package toolbox to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/toolbox.zip.\n",
+            "[nltk_data]    | Downloading package treebank to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/treebank.zip.\n",
+            "[nltk_data]    | Downloading package twitter_samples to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/twitter_samples.zip.\n",
+            "[nltk_data]    | Downloading package udhr to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/udhr.zip.\n",
+            "[nltk_data]    | Downloading package udhr2 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/udhr2.zip.\n",
+            "[nltk_data]    | Downloading package unicode_samples to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/unicode_samples.zip.\n",
+            "[nltk_data]    | Downloading package universal_tagset to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping taggers/universal_tagset.zip.\n",
+            "[nltk_data]    | Downloading package universal_treebanks_v20 to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package vader_lexicon to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package verbnet to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/verbnet.zip.\n",
+            "[nltk_data]    | Downloading package verbnet3 to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/verbnet3.zip.\n",
+            "[nltk_data]    | Downloading package webtext to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/webtext.zip.\n",
+            "[nltk_data]    | Downloading package wmt15_eval to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping models/wmt15_eval.zip.\n",
+            "[nltk_data]    | Downloading package word2vec_sample to\n",
+            "[nltk_data]    |     /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping models/word2vec_sample.zip.\n",
+            "[nltk_data]    | Downloading package wordnet to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package wordnet2021 to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package wordnet31 to /root/nltk_data...\n",
+            "[nltk_data]    | Downloading package wordnet_ic to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/wordnet_ic.zip.\n",
+            "[nltk_data]    | Downloading package words to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/words.zip.\n",
+            "[nltk_data]    | Downloading package ycoe to /root/nltk_data...\n",
+            "[nltk_data]    |   Unzipping corpora/ycoe.zip.\n",
+            "[nltk_data]    | \n",
+            "[nltk_data]  Done downloading collection all\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 2
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "paragraph = \"\"\"\n",
+        "Looking back on a childhood filled with events and memories, I find it rather difficult to pick one that leaves me with the fabled \"warm and fuzzy feelings.\" As the daughter of an Air Force major, I had the pleasure of traveling across America in many moving trips. I have visited the monstrous trees of the Sequoia National Forest, stood on the edge of the Grand Canyon and have jumped on the beds at Caesar's Palace in Lake Tahoe.\"\n",
+        "\"The day I picked my dog up from the pound was one of the happiest days of both of our lives. I had gone to the pound just a week earlier with the idea that I would just \"look\" at a puppy. Of course, you can no more just look at those squiggling little faces so filled with hope and joy than you can stop the sun from setting in the evening. I knew within minutes of walking in the door that I would get a puppy… but it wasn't until I saw him that I knew I had found my puppy.\"\n",
+        "\"Looking for houses was supposed to be a fun and exciting process. \"\"\" "
+      ],
+      "metadata": {
+        "id": "D9AVXZbOD90K"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences=nltk.sent_tokenize(paragraph)"
+      ],
+      "metadata": {
+        "id": "02P9DKMPEgpn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Ie-3XElKEpQg",
+        "outputId": "e6de2fac-35a9-46ea-dbf1-5adcb2d28515"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['\\nLooking back on a childhood filled with events and memories, I find it rather difficult to pick one that leaves me with the fabled \"warm and fuzzy feelings.\"',\n",
+              " 'As the daughter of an Air Force major, I had the pleasure of traveling across America in many moving trips.',\n",
+              " 'I have visited the monstrous trees of the Sequoia National Forest, stood on the edge of the Grand Canyon and have jumped on the beds at Caesar\\'s Palace in Lake Tahoe.\"',\n",
+              " '\"The day I picked my dog up from the pound was one of the happiest days of both of our lives.',\n",
+              " 'I had gone to the pound just a week earlier with the idea that I would just \"look\" at a puppy.',\n",
+              " 'Of course, you can no more just look at those squiggling little faces so filled with hope and joy than you can stop the sun from setting in the evening.',\n",
+              " 'I knew within minutes of walking in the door that I would get a puppy… but it wasn\\'t until I saw him that I knew I had found my puppy.\"',\n",
+              " '\"Looking for houses was supposed to be a fun and exciting process.']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 6
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "words=nltk.word_tokenize(paragraph)"
+      ],
+      "metadata": {
+        "id": "KpAOneHUEsLh"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "words"
+      ],
+      "metadata": {
+        "id": "4T8lkbttEzis"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Stemming"
+      ],
+      "metadata": {
+        "id": "Lf7SZN19E28b"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences = nltk.sent_tokenize(paragraph)"
+      ],
+      "metadata": {
+        "id": "aw7iois5E0Qx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from nltk.stem import PorterStemmer\n",
+        "from nltk.corpus import stopwords\n",
+        "stemmer=PorterStemmer()"
+      ],
+      "metadata": {
+        "id": "lmXTw0I0E7Ka"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "stopwords.words('english')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "U4_OpnzaFNgv",
+        "outputId": "3b832b64-7510-41a7-aa45-9d403bee48ba"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['i',\n",
+              " 'me',\n",
+              " 'my',\n",
+              " 'myself',\n",
+              " 'we',\n",
+              " 'our',\n",
+              " 'ours',\n",
+              " 'ourselves',\n",
+              " 'you',\n",
+              " \"you're\",\n",
+              " \"you've\",\n",
+              " \"you'll\",\n",
+              " \"you'd\",\n",
+              " 'your',\n",
+              " 'yours',\n",
+              " 'yourself',\n",
+              " 'yourselves',\n",
+              " 'he',\n",
+              " 'him',\n",
+              " 'his',\n",
+              " 'himself',\n",
+              " 'she',\n",
+              " \"she's\",\n",
+              " 'her',\n",
+              " 'hers',\n",
+              " 'herself',\n",
+              " 'it',\n",
+              " \"it's\",\n",
+              " 'its',\n",
+              " 'itself',\n",
+              " 'they',\n",
+              " 'them',\n",
+              " 'their',\n",
+              " 'theirs',\n",
+              " 'themselves',\n",
+              " 'what',\n",
+              " 'which',\n",
+              " 'who',\n",
+              " 'whom',\n",
+              " 'this',\n",
+              " 'that',\n",
+              " \"that'll\",\n",
+              " 'these',\n",
+              " 'those',\n",
+              " 'am',\n",
+              " 'is',\n",
+              " 'are',\n",
+              " 'was',\n",
+              " 'were',\n",
+              " 'be',\n",
+              " 'been',\n",
+              " 'being',\n",
+              " 'have',\n",
+              " 'has',\n",
+              " 'had',\n",
+              " 'having',\n",
+              " 'do',\n",
+              " 'does',\n",
+              " 'did',\n",
+              " 'doing',\n",
+              " 'a',\n",
+              " 'an',\n",
+              " 'the',\n",
+              " 'and',\n",
+              " 'but',\n",
+              " 'if',\n",
+              " 'or',\n",
+              " 'because',\n",
+              " 'as',\n",
+              " 'until',\n",
+              " 'while',\n",
+              " 'of',\n",
+              " 'at',\n",
+              " 'by',\n",
+              " 'for',\n",
+              " 'with',\n",
+              " 'about',\n",
+              " 'against',\n",
+              " 'between',\n",
+              " 'into',\n",
+              " 'through',\n",
+              " 'during',\n",
+              " 'before',\n",
+              " 'after',\n",
+              " 'above',\n",
+              " 'below',\n",
+              " 'to',\n",
+              " 'from',\n",
+              " 'up',\n",
+              " 'down',\n",
+              " 'in',\n",
+              " 'out',\n",
+              " 'on',\n",
+              " 'off',\n",
+              " 'over',\n",
+              " 'under',\n",
+              " 'again',\n",
+              " 'further',\n",
+              " 'then',\n",
+              " 'once',\n",
+              " 'here',\n",
+              " 'there',\n",
+              " 'when',\n",
+              " 'where',\n",
+              " 'why',\n",
+              " 'how',\n",
+              " 'all',\n",
+              " 'any',\n",
+              " 'both',\n",
+              " 'each',\n",
+              " 'few',\n",
+              " 'more',\n",
+              " 'most',\n",
+              " 'other',\n",
+              " 'some',\n",
+              " 'such',\n",
+              " 'no',\n",
+              " 'nor',\n",
+              " 'not',\n",
+              " 'only',\n",
+              " 'own',\n",
+              " 'same',\n",
+              " 'so',\n",
+              " 'than',\n",
+              " 'too',\n",
+              " 'very',\n",
+              " 's',\n",
+              " 't',\n",
+              " 'can',\n",
+              " 'will',\n",
+              " 'just',\n",
+              " 'don',\n",
+              " \"don't\",\n",
+              " 'should',\n",
+              " \"should've\",\n",
+              " 'now',\n",
+              " 'd',\n",
+              " 'll',\n",
+              " 'm',\n",
+              " 'o',\n",
+              " 're',\n",
+              " 've',\n",
+              " 'y',\n",
+              " 'ain',\n",
+              " 'aren',\n",
+              " \"aren't\",\n",
+              " 'couldn',\n",
+              " \"couldn't\",\n",
+              " 'didn',\n",
+              " \"didn't\",\n",
+              " 'doesn',\n",
+              " \"doesn't\",\n",
+              " 'hadn',\n",
+              " \"hadn't\",\n",
+              " 'hasn',\n",
+              " \"hasn't\",\n",
+              " 'haven',\n",
+              " \"haven't\",\n",
+              " 'isn',\n",
+              " \"isn't\",\n",
+              " 'ma',\n",
+              " 'mightn',\n",
+              " \"mightn't\",\n",
+              " 'mustn',\n",
+              " \"mustn't\",\n",
+              " 'needn',\n",
+              " \"needn't\",\n",
+              " 'shan',\n",
+              " \"shan't\",\n",
+              " 'shouldn',\n",
+              " \"shouldn't\",\n",
+              " 'wasn',\n",
+              " \"wasn't\",\n",
+              " 'weren',\n",
+              " \"weren't\",\n",
+              " 'won',\n",
+              " \"won't\",\n",
+              " 'wouldn',\n",
+              " \"wouldn't\"]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 12
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "bfInToe-FeF_",
+        "outputId": "9c9b13af-5a22-4214-a349-cb0ee794a850"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['\\nLooking back on a childhood filled with events and memories, I find it rather difficult to pick one that leaves me with the fabled \"warm and fuzzy feelings.\"',\n",
+              " 'As the daughter of an Air Force major, I had the pleasure of traveling across America in many moving trips.',\n",
+              " 'I have visited the monstrous trees of the Sequoia National Forest, stood on the edge of the Grand Canyon and have jumped on the beds at Caesar\\'s Palace in Lake Tahoe.\"',\n",
+              " '\"The day I picked my dog up from the pound was one of the happiest days of both of our lives.',\n",
+              " 'I had gone to the pound just a week earlier with the idea that I would just \"look\" at a puppy.',\n",
+              " 'Of course, you can no more just look at those squiggling little faces so filled with hope and joy than you can stop the sun from setting in the evening.',\n",
+              " 'I knew within minutes of walking in the door that I would get a puppy… but it wasn\\'t until I saw him that I knew I had found my puppy.\"',\n",
+              " '\"Looking for houses was supposed to be a fun and exciting process.']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 16
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Stemming\n",
+        "for i in range(len(sentences)):\n",
+        "    words = nltk.word_tokenize(sentences[i])\n",
+        "    words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]\n",
+        "    sentences[i] = ' '.join(words)"
+      ],
+      "metadata": {
+        "id": "2pqIQUl9FHPC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mmykru4NFZcW",
+        "outputId": "886dda59-1892-4c99-cf4c-cb4f61a7946f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[\"look back childhood fill event memori , i find rather difficult pick one leav fabl `` warm fuzzi feel . ''\",\n",
+              " 'as daughter air forc major , i pleasur travel across america mani move trip .',\n",
+              " \"i visit monstrou tree sequoia nation forest , stood edg grand canyon jump bed caesar 's palac lake taho . ''\",\n",
+              " '`` the day i pick dog pound one happiest day live .',\n",
+              " \"i gone pound week earlier idea i would `` look '' puppi .\",\n",
+              " 'of cours , look squiggl littl face fill hope joy stop sun set even .',\n",
+              " \"i knew within minut walk door i would get puppy… n't i saw i knew i found puppi . ''\",\n",
+              " '`` look hous suppos fun excit process .']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 18
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Lemmetization"
+      ],
+      "metadata": {
+        "id": "R1AAV3d0GHSp"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from nltk.stem import WordNetLemmatizer\n",
+        "from nltk.corpus import stopwords\n",
+        "\n",
+        "sentences = nltk.sent_tokenize(paragraph)\n",
+        "lemmatizer = WordNetLemmatizer()\n",
+        "\n",
+        "for i in range(len(sentences)):\n",
+        "    words = nltk.word_tokenize(sentences[i])\n",
+        "    words = [lemmatizer.lemmatize(word) for word in words if word not in set(stopwords.words('english'))]\n",
+        "    sentences[i] = ' '.join(words)"
+      ],
+      "metadata": {
+        "id": "F--Uhj7tFxiZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "G_1QMDdeGSOb",
+        "outputId": "cf478d5a-a28c-4e86-add4-f4c39814c843"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[\"Looking back childhood filled event memory , I find rather difficult pick one leaf fabled `` warm fuzzy feeling . ''\",\n",
+              " 'As daughter Air Force major , I pleasure traveling across America many moving trip .',\n",
+              " \"I visited monstrous tree Sequoia National Forest , stood edge Grand Canyon jumped bed Caesar 's Palace Lake Tahoe . ''\",\n",
+              " '`` The day I picked dog pound one happiest day life .',\n",
+              " \"I gone pound week earlier idea I would `` look '' puppy .\",\n",
+              " 'Of course , look squiggling little face filled hope joy stop sun setting evening .',\n",
+              " \"I knew within minute walking door I would get puppy… n't I saw I knew I found puppy . ''\",\n",
+              " '`` Looking house supposed fun exciting process .']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 20
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "POS Tagging"
+      ],
+      "metadata": {
+        "id": "3lkNI8b7Gjqu"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentence = \"A quick brown fox runs over a greedy dog\"\n",
+        "token = nltk.word_tokenize(sentence)\n",
+        "token"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "hKprfi1MGXgG",
+        "outputId": "14cad267-df4d-4600-da46-eb891a92a42e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['A', 'quick', 'brown', 'fox', 'runs', 'over', 'a', 'greedy', 'dog']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 21
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "nltk.pos_tag(token)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "-O5yFTdRG6Vz",
+        "outputId": "7d79f04e-83e7-4957-c554-414b57486d25"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('A', 'DT'),\n",
+              " ('quick', 'JJ'),\n",
+              " ('brown', 'NN'),\n",
+              " ('fox', 'NN'),\n",
+              " ('runs', 'VBZ'),\n",
+              " ('over', 'IN'),\n",
+              " ('a', 'DT'),\n",
+              " ('greedy', 'NN'),\n",
+              " ('dog', 'NN')]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 23
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# We can get more details about any POS tag using help funciton of NLTK as follows.\n",
+        "nltk.help.upenn_tagset(\"JJ\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1twd5yS-G9TQ",
+        "outputId": "531241a4-8283-4d73-a34f-d9092abeacf2"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "JJ: adjective or numeral, ordinal\n",
+            "    third ill-mannered pre-war regrettable oiled calamitous first separable\n",
+            "    ectoplasmic battery-powered participatory fourth still-to-be-named\n",
+            "    multilingual multi-disciplinary ...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "nltk.help.upenn_tagset(\"VBZ\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "lbVf80v1HF-5",
+        "outputId": "19dc06c9-a9ad-4f73-ef18-611bf1e7460a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "VBZ: verb, present tense, 3rd person singular\n",
+            "    bases reconstructs marks mixes displeases seals carps weaves snatches\n",
+            "    slumps stretches authorizes smolders pictures emerges stockpiles\n",
+            "    seduces fizzes uses bolsters slaps speaks pleads ...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "BAG OF WORDS"
+      ],
+      "metadata": {
+        "id": "x6wAhMyrKtIo"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import re #Regular Expression\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.stem.porter import PorterStemmer\n",
+        "from nltk.stem import WordNetLemmatizer"
+      ],
+      "metadata": {
+        "id": "_PrvOiaJHNvA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "ps = PorterStemmer()\n",
+        "wordnet=WordNetLemmatizer()\n",
+        "sentences = nltk.sent_tokenize(paragraph)\n",
+        "corpus = []"
+      ],
+      "metadata": {
+        "id": "_Ez-DYGUPDZ7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences[0]+'11212'"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 53
+        },
+        "id": "h6G9qe1tPXlN",
+        "outputId": "c0f3b138-0caf-473b-e1dd-cdf3b3d15a10"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "'\\nLooking back on a childhood filled with events and memories, I find it rather difficult to pick one that leaves me with the fabled \"warm and fuzzy feelings.\"11212'"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 31
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "re.sub('[^a-zA-Z]', ' ', sentences[0])"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 53
+        },
+        "id": "VaStGzq4PZpm",
+        "outputId": "320c18f3-7aef-44a9-fded-0797ec6a1e12"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "' Looking back on a childhood filled with events and memories  I find it rather difficult to pick one that leaves me with the fabled  warm and fuzzy feelings       '"
+            ],
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            }
+          },
+          "metadata": {},
+          "execution_count": 32
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "for i in range(len(sentences)):\n",
+        "    review = re.sub('[^a-zA-Z]', ' ', sentences[i])\n",
+        "    review = review.lower()\n",
+        "    review = review.split()\n",
+        "    review = [wordnet.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]\n",
+        "    review = ' '.join(review)\n",
+        "    corpus.append(review)"
+      ],
+      "metadata": {
+        "id": "XsuGJtw7PGGq"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "corpus"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "HmdObQvfPyUU",
+        "outputId": "3d03fb64-fba5-46e5-9c09-88099479d484"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "['looking back childhood filled event memory find rather difficult pick one leaf fabled warm fuzzy feeling',\n",
+              " 'daughter air force major pleasure traveling across america many moving trip',\n",
+              " 'visited monstrous tree sequoia national forest stood edge grand canyon jumped bed caesar palace lake tahoe',\n",
+              " 'day picked dog pound one happiest day life',\n",
+              " 'gone pound week earlier idea would look puppy',\n",
+              " 'course look squiggling little face filled hope joy stop sun setting evening',\n",
+              " 'knew within minute walking door would get puppy saw knew found puppy',\n",
+              " 'looking house supposed fun exciting process']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 34
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Creating the Bag of Words model\n",
+        "from sklearn.feature_extraction.text import CountVectorizer\n",
+        "cv = CountVectorizer(max_features = 1500)\n",
+        "X = cv.fit_transform(corpus).toarray()"
+      ],
+      "metadata": {
+        "id": "ectRiysqPzMQ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X"
+      ],
+      "metadata": {
+        "id": "MrwQOSesP8eo"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X.shape"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "E0v7a9BeP85U",
+        "outputId": "128d5968-342f-423c-d8f7-6224ecb2242f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(8, 79)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 37
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "TF-IDF"
+      ],
+      "metadata": {
+        "id": "UTQhnfJRQImf"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import re\n",
+        "from nltk.corpus import stopwords\n",
+        "from nltk.stem.porter import PorterStemmer\n",
+        "from nltk.stem import WordNetLemmatizer"
+      ],
+      "metadata": {
+        "id": "GqcEOcjaQEvE"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#ps = PorterStemmer()\n",
+        "wordnet=WordNetLemmatizer()\n",
+        "sentences = nltk.sent_tokenize(paragraph)\n",
+        "corpus = []\n",
+        "for i in range(len(sentences)):\n",
+        "    review = re.sub('[^a-zA-Z]', ' ', sentences[i])\n",
+        "    review = review.lower()\n",
+        "    review = review.split()\n",
+        "    review = [wordnet.lemmatize(word) for word in review if not word in set(stopwords.words('english'))]\n",
+        "    review = ' '.join(review)\n",
+        "    corpus.append(review)"
+      ],
+      "metadata": {
+        "id": "97CExC3IQLZA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Creating TF-IDF Model\n",
+        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
+        "tfidf=TfidfVectorizer()"
+      ],
+      "metadata": {
+        "id": "fZa067jbQNZs"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_tfidf=tfidf.fit_transform(corpus).toarray()"
+      ],
+      "metadata": {
+        "id": "7p3a-r96QQYB"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_tfidf"
+      ],
+      "metadata": {
+        "id": "lACUy-n7QRzC"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "X_tfidf.shape"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PA1vkbzKQS-E",
+        "outputId": "6dda2b2b-6cf5-473c-b164-718881abec6c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "(8, 79)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 43
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Word2Vec"
+      ],
+      "metadata": {
+        "id": "yYrQ9yQQQuCn"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "### A king is similar to queen\n",
+        "### A man is similar to woman\n",
+        "text = re.sub(r'\\[[0-9]*\\]',' ',paragraph)\n",
+        "text = re.sub(r'\\s+',' ',text)\n",
+        "text = text.lower()\n",
+        "text = re.sub(r'\\d',' ',text)\n",
+        "text = re.sub(r'\\s+',' ',text)"
+      ],
+      "metadata": {
+        "id": "m5AucxtkQabA"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Preparing the dataset\n",
+        "sentences = nltk.sent_tokenize(text)"
+      ],
+      "metadata": {
+        "id": "SuWK_DUGS-oP"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "kSLI0sVbTCuW",
+        "outputId": "f3c818a4-a60d-4a7a-b42a-752406f5501e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[' looking back on a childhood filled with events and memories, i find it rather difficult to pick one that leaves me with the fabled \"warm and fuzzy feelings.\"',\n",
+              " 'as the daughter of an air force major, i had the pleasure of traveling across america in many moving trips.',\n",
+              " 'i have visited the monstrous trees of the sequoia national forest, stood on the edge of the grand canyon and have jumped on the beds at caesar\\'s palace in lake tahoe.\"',\n",
+              " '\"the day i picked my dog up from the pound was one of the happiest days of both of our lives.',\n",
+              " 'i had gone to the pound just a week earlier with the idea that i would just \"look\" at a puppy.',\n",
+              " 'of course, you can no more just look at those squiggling little faces so filled with hope and joy than you can stop the sun from setting in the evening.',\n",
+              " 'i knew within minutes of walking in the door that i would get a puppy… but it wasn\\'t until i saw him that i knew i had found my puppy.\"',\n",
+              " '\"looking for houses was supposed to be a fun and exciting process.']"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 46
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences = [nltk.word_tokenize(sentence) for sentence in sentences]"
+      ],
+      "metadata": {
+        "id": "yVOYBPlCTEz-"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "#StopWord Removal\n",
+        "for i in range(len(sentences)):\n",
+        "    sentences[i] = [word for word in sentences[i] if word not in stopwords.words('english')]"
+      ],
+      "metadata": {
+        "id": "D4poc4B4THho"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "sentences"
+      ],
+      "metadata": {
+        "id": "V8QdexkOTIy5"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from gensim.models import Word2Vec"
+      ],
+      "metadata": {
+        "id": "BwMAygitTKnk"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model=Word2Vec(sentences,min_count=1)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "mBxyqLEZTUpH",
+        "outputId": "3d213927-9178-4a51-f91a-81b2636614cd"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "WARNING:gensim.models.base_any2vec:under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "DPiFuI5-Te9K",
+        "outputId": "28eec034-8600-4a0e-9c45-6ba96512959a"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "<gensim.models.word2vec.Word2Vec at 0x7f28215e4090>"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 52
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.wv.vocab"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uQl63dqLTgCN",
+        "outputId": "457ccdcb-b4ba-4083-efe3-5bc2cbef9f37"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "{\"''\": <gensim.models.keyedvectors.Vocab at 0x7f28215e4610>,\n",
+              " \"'s\": <gensim.models.keyedvectors.Vocab at 0x7f28215e4c50>,\n",
+              " ',': <gensim.models.keyedvectors.Vocab at 0x7f28215e4310>,\n",
+              " '.': <gensim.models.keyedvectors.Vocab at 0x7f28215e45d0>,\n",
+              " '``': <gensim.models.keyedvectors.Vocab at 0x7f28215e44d0>,\n",
+              " 'across': <gensim.models.keyedvectors.Vocab at 0x7f28215e47d0>,\n",
+              " 'air': <gensim.models.keyedvectors.Vocab at 0x7f28215e4690>,\n",
+              " 'america': <gensim.models.keyedvectors.Vocab at 0x7f28215e4810>,\n",
+              " 'back': <gensim.models.keyedvectors.Vocab at 0x7f28215df750>,\n",
+              " 'beds': <gensim.models.keyedvectors.Vocab at 0x7f28215e4bd0>,\n",
+              " 'caesar': <gensim.models.keyedvectors.Vocab at 0x7f28215e4c10>,\n",
+              " 'canyon': <gensim.models.keyedvectors.Vocab at 0x7f28215e4b50>,\n",
+              " 'childhood': <gensim.models.keyedvectors.Vocab at 0x7f28215df390>,\n",
+              " 'course': <gensim.models.keyedvectors.Vocab at 0x7f28215eb110>,\n",
+              " 'daughter': <gensim.models.keyedvectors.Vocab at 0x7f28215e4650>,\n",
+              " 'day': <gensim.models.keyedvectors.Vocab at 0x7f28215e4d50>,\n",
+              " 'days': <gensim.models.keyedvectors.Vocab at 0x7f28215e4e90>,\n",
+              " 'difficult': <gensim.models.keyedvectors.Vocab at 0x7f28215e4210>,\n",
+              " 'dog': <gensim.models.keyedvectors.Vocab at 0x7f28215e4dd0>,\n",
+              " 'door': <gensim.models.keyedvectors.Vocab at 0x7f28215eb490>,\n",
+              " 'earlier': <gensim.models.keyedvectors.Vocab at 0x7f28215e4f90>,\n",
+              " 'edge': <gensim.models.keyedvectors.Vocab at 0x7f28215e4ad0>,\n",
+              " 'evening': <gensim.models.keyedvectors.Vocab at 0x7f28215eb350>,\n",
+              " 'events': <gensim.models.keyedvectors.Vocab at 0x7f2823ff5b90>,\n",
+              " 'exciting': <gensim.models.keyedvectors.Vocab at 0x7f28215eb6d0>,\n",
+              " 'fabled': <gensim.models.keyedvectors.Vocab at 0x7f28215e4490>,\n",
+              " 'faces': <gensim.models.keyedvectors.Vocab at 0x7f28215eb1d0>,\n",
+              " 'feelings': <gensim.models.keyedvectors.Vocab at 0x7f28215e4590>,\n",
+              " 'filled': <gensim.models.keyedvectors.Vocab at 0x7f28215dff50>,\n",
+              " 'find': <gensim.models.keyedvectors.Vocab at 0x7f28215e4390>,\n",
+              " 'force': <gensim.models.keyedvectors.Vocab at 0x7f28215e46d0>,\n",
+              " 'forest': <gensim.models.keyedvectors.Vocab at 0x7f28215e4a50>,\n",
+              " 'found': <gensim.models.keyedvectors.Vocab at 0x7f28215eb5d0>,\n",
+              " 'fun': <gensim.models.keyedvectors.Vocab at 0x7f28215eb690>,\n",
+              " 'fuzzy': <gensim.models.keyedvectors.Vocab at 0x7f28215e4550>,\n",
+              " 'get': <gensim.models.keyedvectors.Vocab at 0x7f28215eb4d0>,\n",
+              " 'gone': <gensim.models.keyedvectors.Vocab at 0x7f28215e4f10>,\n",
+              " 'grand': <gensim.models.keyedvectors.Vocab at 0x7f28215e4b10>,\n",
+              " 'happiest': <gensim.models.keyedvectors.Vocab at 0x7f28215e4e50>,\n",
+              " 'hope': <gensim.models.keyedvectors.Vocab at 0x7f28215eb210>,\n",
+              " 'houses': <gensim.models.keyedvectors.Vocab at 0x7f28215eb610>,\n",
+              " 'idea': <gensim.models.keyedvectors.Vocab at 0x7f28215e4fd0>,\n",
+              " 'joy': <gensim.models.keyedvectors.Vocab at 0x7f28215eb250>,\n",
+              " 'jumped': <gensim.models.keyedvectors.Vocab at 0x7f28215e4b90>,\n",
+              " 'knew': <gensim.models.keyedvectors.Vocab at 0x7f28215eb390>,\n",
+              " 'lake': <gensim.models.keyedvectors.Vocab at 0x7f28215e4cd0>,\n",
+              " 'leaves': <gensim.models.keyedvectors.Vocab at 0x7f28215e4450>,\n",
+              " 'little': <gensim.models.keyedvectors.Vocab at 0x7f28215eb190>,\n",
+              " 'lives': <gensim.models.keyedvectors.Vocab at 0x7f28215e4ed0>,\n",
+              " 'look': <gensim.models.keyedvectors.Vocab at 0x7f28215eb090>,\n",
+              " 'looking': <gensim.models.keyedvectors.Vocab at 0x7f28215df710>,\n",
+              " 'major': <gensim.models.keyedvectors.Vocab at 0x7f28215e4710>,\n",
+              " 'many': <gensim.models.keyedvectors.Vocab at 0x7f28215e4850>,\n",
+              " 'memories': <gensim.models.keyedvectors.Vocab at 0x7f28215e4290>,\n",
+              " 'minutes': <gensim.models.keyedvectors.Vocab at 0x7f28215eb410>,\n",
+              " 'monstrous': <gensim.models.keyedvectors.Vocab at 0x7f28215e4950>,\n",
+              " 'moving': <gensim.models.keyedvectors.Vocab at 0x7f28215e4890>,\n",
+              " \"n't\": <gensim.models.keyedvectors.Vocab at 0x7f28215eb550>,\n",
+              " 'national': <gensim.models.keyedvectors.Vocab at 0x7f28215e4a10>,\n",
+              " 'one': <gensim.models.keyedvectors.Vocab at 0x7f28215e4410>,\n",
+              " 'palace': <gensim.models.keyedvectors.Vocab at 0x7f28215e4c90>,\n",
+              " 'pick': <gensim.models.keyedvectors.Vocab at 0x7f28215e43d0>,\n",
+              " 'picked': <gensim.models.keyedvectors.Vocab at 0x7f28215e4d90>,\n",
+              " 'pleasure': <gensim.models.keyedvectors.Vocab at 0x7f28215e4750>,\n",
+              " 'pound': <gensim.models.keyedvectors.Vocab at 0x7f28215e4e10>,\n",
+              " 'process': <gensim.models.keyedvectors.Vocab at 0x7f28215eb710>,\n",
+              " 'puppy': <gensim.models.keyedvectors.Vocab at 0x7f28215eb0d0>,\n",
+              " 'puppy…': <gensim.models.keyedvectors.Vocab at 0x7f28215eb510>,\n",
+              " 'rather': <gensim.models.keyedvectors.Vocab at 0x7f28215dfc50>,\n",
+              " 'saw': <gensim.models.keyedvectors.Vocab at 0x7f28215eb590>,\n",
+              " 'sequoia': <gensim.models.keyedvectors.Vocab at 0x7f28215e49d0>,\n",
+              " 'setting': <gensim.models.keyedvectors.Vocab at 0x7f28215eb310>,\n",
+              " 'squiggling': <gensim.models.keyedvectors.Vocab at 0x7f28215eb150>,\n",
+              " 'stood': <gensim.models.keyedvectors.Vocab at 0x7f28215e4a90>,\n",
+              " 'stop': <gensim.models.keyedvectors.Vocab at 0x7f28215eb290>,\n",
+              " 'sun': <gensim.models.keyedvectors.Vocab at 0x7f28215eb2d0>,\n",
+              " 'supposed': <gensim.models.keyedvectors.Vocab at 0x7f28215eb650>,\n",
+              " 'tahoe': <gensim.models.keyedvectors.Vocab at 0x7f28215e4d10>,\n",
+              " 'traveling': <gensim.models.keyedvectors.Vocab at 0x7f28215e4790>,\n",
+              " 'trees': <gensim.models.keyedvectors.Vocab at 0x7f28215e4990>,\n",
+              " 'trips': <gensim.models.keyedvectors.Vocab at 0x7f28215e48d0>,\n",
+              " 'visited': <gensim.models.keyedvectors.Vocab at 0x7f28215e4910>,\n",
+              " 'walking': <gensim.models.keyedvectors.Vocab at 0x7f28215eb450>,\n",
+              " 'warm': <gensim.models.keyedvectors.Vocab at 0x7f28215e4510>,\n",
+              " 'week': <gensim.models.keyedvectors.Vocab at 0x7f28215e4f50>,\n",
+              " 'within': <gensim.models.keyedvectors.Vocab at 0x7f28215eb3d0>,\n",
+              " 'would': <gensim.models.keyedvectors.Vocab at 0x7f28215eb050>}"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 53
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.wv['within']"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "K_3q5caTTkPt",
+        "outputId": "482e91f2-7e1a-4e1d-dec1-4c7c335b1b32"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "array([ 9.3364244e-05, -2.0226017e-03,  3.0744560e-03,  2.4722507e-03,\n",
+              "       -1.7916801e-03,  1.1004605e-03,  1.5778342e-03, -3.4744558e-03,\n",
+              "        2.3985626e-03,  4.4293050e-03, -3.2416270e-03,  9.6469990e-04,\n",
+              "       -1.8468881e-03, -4.5837839e-03, -2.7734184e-04, -4.2819157e-03,\n",
+              "       -3.4303457e-04, -2.3855946e-03,  8.0992520e-04, -1.1062848e-03,\n",
+              "       -3.0107235e-03, -3.3425987e-03,  2.1235049e-03,  2.2391626e-03,\n",
+              "        3.5790335e-03, -5.0837500e-04, -2.4947856e-04, -3.1816968e-04,\n",
+              "        2.5805044e-03, -3.9695371e-03, -3.2627376e-04,  3.3404287e-03,\n",
+              "        3.3210497e-03, -3.7256633e-03,  2.4546732e-03, -3.5926504e-03,\n",
+              "       -3.1259684e-03,  4.1785319e-03, -1.8811250e-03, -3.2083079e-04,\n",
+              "        1.0983367e-03,  3.0588740e-03, -3.8055759e-03,  1.8654363e-03,\n",
+              "       -2.9959625e-03,  1.9540614e-03, -3.4162696e-03, -2.8583435e-03,\n",
+              "       -4.2043673e-03,  4.3449313e-03,  4.6059112e-03,  3.2427472e-03,\n",
+              "       -2.5208378e-03, -1.8257565e-03,  6.5149547e-04,  4.7284369e-03,\n",
+              "        4.6374514e-03, -6.3585694e-04, -3.1542520e-03,  3.3707032e-03,\n",
+              "       -1.2445718e-03, -3.5111818e-03,  6.5203488e-04,  1.2171916e-03,\n",
+              "       -2.3727534e-04, -3.1939638e-04,  9.9689921e-04,  2.6938734e-03,\n",
+              "        4.8971297e-03,  3.5206450e-03, -4.8659677e-03, -1.8277732e-03,\n",
+              "        2.6473652e-03,  2.9146350e-03, -4.9722218e-03,  2.6932417e-03,\n",
+              "        2.5721423e-03,  4.2625722e-03, -7.3851732e-04, -3.2395408e-03,\n",
+              "        1.5004680e-03, -1.8992539e-03,  4.8010377e-03,  4.0566269e-03,\n",
+              "       -1.9251317e-03, -2.0484554e-03, -1.7119809e-03, -4.4474346e-03,\n",
+              "       -2.1356612e-03, -4.4765472e-03,  4.5961127e-04, -2.1204483e-03,\n",
+              "        2.8737509e-04, -2.6111265e-03, -4.5112278e-03, -1.3529632e-03,\n",
+              "        2.2771490e-04, -4.9307575e-03, -4.3379571e-03,  1.6518446e-03],\n",
+              "      dtype=float32)"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 57
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "model.wv.most_similar('national')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "PCP3GC49Ttlw",
+        "outputId": "1299a52e-d719-40f9-9e25-ab5a5de1d60c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('air', 0.2506329417228699),\n",
+              " ('monstrous', 0.20575261116027832),\n",
+              " (\"n't\", 0.19816187024116516),\n",
+              " ('lake', 0.17937466502189636),\n",
+              " ('pound', 0.17282086610794067),\n",
+              " ('fun', 0.13929110765457153),\n",
+              " ('leaves', 0.13837510347366333),\n",
+              " ('happiest', 0.12096145749092102),\n",
+              " ('feelings', 0.11812127381563187),\n",
+              " ('found', 0.10904533416032791)]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 58
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import gensim.downloader"
+      ],
+      "metadata": {
+        "id": "bdRhM-zpT_JZ"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "print(list(gensim.downloader.info()['models'].keys()))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "yT2Mq4AVUIRk",
+        "outputId": "7e829b86-82ee-4217-d8ad-e70f0dec54f2"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "['fasttext-wiki-news-subwords-300', 'conceptnet-numberbatch-17-06-300', 'word2vec-ruscorpora-300', 'word2vec-google-news-300', 'glove-wiki-gigaword-50', 'glove-wiki-gigaword-100', 'glove-wiki-gigaword-200', 'glove-wiki-gigaword-300', 'glove-twitter-25', 'glove-twitter-50', 'glove-twitter-100', 'glove-twitter-200', '__testing_word2vec-matrix-synopsis']\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "google_vectors=gensim.downloader.load('glove-twitter-25')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "cKkqSBIyUYz_",
+        "outputId": "6bbfdf8e-7df4-4dfd-a55f-e5c99beb0e66"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[==================================================] 100.0% 104.8/104.8MB downloaded\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "google_vectors.wv.most_similar('twitter')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "7NjGT-RVUc2j",
+        "outputId": "a7fdc59a-4f83-491c-d0e3-ce863b4ef02c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
+            "  \"\"\"Entry point for launching an IPython kernel.\n"
+          ]
+        },
+        {
+          "output_type": "execute_result",
+          "data": {
+            "text/plain": [
+              "[('facebook', 0.9480051398277283),\n",
+              " ('tweet', 0.9403422474861145),\n",
+              " ('fb', 0.9342358708381653),\n",
+              " ('instagram', 0.9104823470115662),\n",
+              " ('chat', 0.8964964747428894),\n",
+              " ('hashtag', 0.8885936141014099),\n",
+              " ('tweets', 0.8878157734870911),\n",
+              " ('tl', 0.8778461813926697),\n",
+              " ('link', 0.877821147441864),\n",
+              " ('internet', 0.8753897547721863)]"
+            ]
+          },
+          "metadata": {},
+          "execution_count": 62
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        ""
+      ],
+      "metadata": {
+        "id": "_sQy5bfdUr0e"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file