-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmonChatbot.py
73 lines (61 loc) · 2.83 KB
/
monChatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import nltk # for natural language processing tasks such as tokenization, lemmatization, and stopword removal.
# The ‘nltk.download()’ function is used to download additional resources needed for the nltk library.
# In this case, we are downloading the punkt and averaged_perceptron_tagger resources.
# These resources are needed for tokenization and part-of-speech tagging tasks.
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('stopwords')
nltk.download('wordnet')
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string # used for string operations
import streamlit as st # used to create the web-based chatbot interface
# Load the text file and preprocess the data
with open('Climatic_Changes_Their_Nature_and_Causes2.txt', 'r', encoding='utf-8') as f:
data = f.read().replace('\n', ' ')
# Tokenize the text into sentences
sentences = sent_tokenize(data)
# Define a function to preprocess each sentence
def preprocess(sentence):
# Tokenize the sentence into words
words = word_tokenize(sentence)
# Remove stopwords and punctuation
words = [word.lower() for word in words if word.lower() not in stopwords.words('english') and word not in string.punctuation]
# Lemmatize the words
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word) for word in words]
return words
# Preprocess each sentence in the text
corpus = [preprocess(sentence) for sentence in sentences]
# Define a function to find the most relevant sentence given a query
def get_most_relevant_sentence(query):
# Preprocess the query
query = preprocess(query)
# Compute the similarity between the query and each sentence in the text
max_similarity = 0
most_relevant_sentence = ""
for sentence in corpus:
similarity = len(set(query).intersection(sentence)) / float(len(set(query).union(sentence)))
if similarity > max_similarity:
max_similarity = similarity
most_relevant_sentence = " ".join(sentence)
return most_relevant_sentence
def chatbot(question):
# Find the most relevant sentence
most_relevant_sentence = get_most_relevant_sentence(question)
# Return the answer
return most_relevant_sentence
# Create a Streamlit app
def main():
st.title("Chatbot")
st.write("Hello! I'm a chatbot. Ask me anything about the topic in the text file.")
# Get the user's question
question = st.text_input("You:")
# Create a button to submit the question
if st.button("Submit"):
# Call the chatbot function with the question and display the response
response = chatbot(question)
st.write("Chatbot: " + response)
if __name__ == "__main__":
main()