-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
126 lines (94 loc) · 4.41 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import streamlit as st
import utils
import pandas as pd
from google.cloud import firestore
import datetime
import json
@st.cache_data()
def get_retrievers():
data_dir = 'data/office/'
faiss_path = data_dir + 'faiss_index.index'
idx_to_metadata_path = data_dir + 'idx_to_metadata.json'
retriever = utils.Retriever(faiss_path, idx_to_metadata_path)
data_dir = 'data/parks/'
faiss_path = data_dir + 'faiss_index.index'
idx_to_metadata_path = data_dir + 'idx_to_metadata.json'
retriever2 = utils.Retriever(faiss_path, idx_to_metadata_path)
return {'Office': retriever, 'Parks': retriever2}
@st.cache_data()
def get_retrievers_keywords():
data_dir = 'data/office/'
faiss_path = data_dir + 'faiss_index.index'
idx_to_metadata_path = data_dir + 'idx_to_metadata.json'
retriever_keyword1 = utils.KeywordRetriever(idx_to_metadata_path)
data_dir = 'data/parks/'
faiss_path = data_dir + 'faiss_index.index'
idx_to_metadata_path = data_dir + 'idx_to_metadata.json'
retriever_keyword2 = utils.KeywordRetriever(idx_to_metadata_path)
return {'Office': retriever_keyword1, 'Parks': retriever_keyword2}
def main():
st.title("TV Show Episode Search")
# Sidebar navigation
st.sidebar.image("imgs/dunder_mifflin.png", use_column_width=True)
st.sidebar.info("You can search for scenes related to specific topics to help find episodes of interest")
st.sidebar.title("Navigation")
tabs = ["Office", "Parks"]
selected_tab = st.sidebar.radio("Select Tab", tabs)
keyphrase = st.text_input("Enter a keyphrase")
searchbar = st.button("Search")
st.sidebar.title("Example Usage")
st.sidebar.markdown("The Office:\n1. Pam goes to design school\n2. Jim puts Dwight's stapler in Jello\n3. Michael does a Chris Rock impression\n\nParks and Rec:\n1. John Ralphio and Tom\n2. Duke silver playing in concert")
st.sidebar.title("How it works")
st.sidebar.info("1. Given a keyphrase, system searches through various scenes of each episode.\n2. Using semantic search it retrieves episodes with most similar scenes matching the query")
st.sidebar.title("Note for user")
st.sidebar.markdown(
"The tool retreives the episode with a corresponding matching scene from the show. \nThe scene description displayed may not always be 100% accurate. Also, items in the searchbox are logged")
MIN_CHAR, MAX_CHAR = 4, 150
if searchbar and not keyphrase.strip():
st.warning("Please enter a keyphrase to search.")
elif searchbar and len(keyphrase) < MIN_CHAR:
st.warning('Enter more characters')
elif searchbar and len(keyphrase) > MAX_CHAR:
st.warning('Enter less characters')
elif searchbar and keyphrase.strip():
retrievers = get_retrievers()
retrievers_keywords = get_retrievers_keywords()
if selected_tab not in selected_tab:
raise ValueError('Selected TVShow not supported')
keyphrase = keyphrase.strip()
retriever = retrievers[selected_tab]
retriever_keyword = retrievers_keywords[selected_tab]
resp = retriever.get_final_answer(keyphrase)
resp2 = retriever_keyword.get_final_answer(keyphrase)
DISPCOLS = ['season', 'episode', 'scene summary', 'score']
if len(resp2) > 0:
st.subheader('Matching Episodes')
df2 = resp2[DISPCOLS]
st.dataframe(df2)
elif len(resp['correct']):
st.subheader('Matching Episodes')
resp['correct'].drop_duplicates(['season', 'episode'], inplace=True)
df1 = resp['correct'][DISPCOLS]
st.dataframe(df1)
else:
pass
if len(resp['similar']) > 0:
st.subheader('Episodes with Similar Scenes')
st.dataframe(resp['similar'][DISPCOLS])
if not len(resp['correct']) and not len(resp['similar']) and not len(resp2):
st.subheader('No matches')
if len(keyphrase.strip()) > 3:
db = get_logger()
data = {
"keyword": keyphrase[:80],
"name": selected_tab,
"timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
}
db.collection(st.secrets["collection_name"]).add(data)
@st.cache_resource
def get_logger():
key_dict = json.loads(st.secrets["textkey"])
db = firestore.Client.from_service_account_info(key_dict)
return db
if __name__ == "__main__":
main()