-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
203 lines (166 loc) · 7.85 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#libraries used in the script
import streamlit as st
import pandas as pd
import requests
import json
from io import StringIO
import ast
def convert_df(df):
return df.to_csv().encode('utf-8')
st.title('Local Keyword Generator')
st.markdown(
'''Local keyword generator done by [Antoine Eripret](https://twitter.com/antoineripret). You can report a bug or an issue in [Github](https://github.com/antoineeripret/streamlit-local-keyword-generator).
You can generate keyword & get search volume from [Keyword Surfer](https://surferseo.com/keyword-surfer-extension/) or [Semrush API](https://www.semrush.com/api-analytics/). For the later, **an API key is required and you will spend 10 credits per keyword**.
Note that these API are not perfect and if you use a better one, please reach out to me and let's talk to improve this tool :)
''')
data = pd.read_csv('https://raw.githubusercontent.com/antoineeripret/streamlit-local-keyword-generator/main/cities1000.txt', sep='\t')
data.columns = [
'geonameid',
'name',
'city',
'alternatenames',
'latitude',
'longitude',
'feature class',
'feature code',
'country',
'cc2',
'admin1',
'admin2',
'admin3',
'admin4',
'population',
'elevation',
'dem',
'timezone',
'modification']
data = data[['city','country']]
with st.expander('STEP 1: Create your local keywords'):
st.markdown('''
This application use an [external city database](http://www.geonames.org/). Please pick a country below. **Country names are displayed in using their two-letters code.**.
''')
country_data = st.selectbox('Choose the country', data['country'].sort_values().drop_duplicates().tolist())
modifier = st.text_input('Choose your main keyword (e.g. hotel, restaurant, lawyer...')
st.write('Where does the city name must be included? Before or after your keyword?')
position = st.selectbox('Choose the right structure', ['Before (e.g. barcelona hotel)', 'After (e.g. hotel barcelona)'])
with st.expander('STEP 2: Configure your extraction'):
st.markdown('Use two letters ISO code (es,fr,de...). **Please check Keyword Surfer\'s or Semrush\'s documentation to check if your country is available.** Not all of them are. **You can indicate here a different country than what you have for STEP 1.** You can perfectly extract volumes for France for German cities for instance.')
source = st.selectbox('Source', ('Keyword Surfer (FREE)', 'Semrush (Paid)','Keywordseverywhere (Paid)'))
st.write('If a keyword is not included in a database, volume returned will be 0. **Which doesn\'t mean that it has no search volume ;)**')
if source == 'Semrush (Paid)':
semrush_api_key = st.text_input('API')
if source == 'Keywordseverywhere (Paid)':
keywordseverywhere_api_key = st.text_input('API key')
country_api = st.selectbox('Country',['au','ca','in','za','uk','us'])
else:
country_api = st.text_input('Country')
with st.expander('STEP 3: Extract Volume'):
st.markdown('**You cannot launch this part of the tool without completing step 1 & 2 first!! Execution will fail.**')
if st.button('Launch extraction'):
#prepare keywords for encoding
cities = data[data['country']==country_data]['city'].str.replace('-',' ')
if position == 'Before (e.g. barcelona hotel)':
kws = cities.str.lower().unique()+' '+modifier
else:
kws = modifier+' '+cities.str.lower().unique()
#divide kws into chunks of kws
chunks = [kws[x:x+50] for x in range(0, len(kws), 50)]
#create dataframe to receive data from API
results = pd.DataFrame(columns=['keyword','volume'])
if source == 'Keyword Surfer (FREE)':
status_bar = st.progress(0)
#get search volume data
#get data
for i in range(0,len(chunks)):
chunk = chunks[i]
url = (
'https://db2.keywordsur.fr/keyword_surfer_keywords?country={}&keywords=[%22'.format(country_api)+
'%22,%22'.join(chunk)+
'%22]'
)
r = requests.get(url)
try:
data = json.loads(r.text)
except:
continue
for key in data.keys():
results.loc[len(results)] = [key,data[key]['search_volume']]
status_bar.progress(i/len(chunks))
status_bar.progress(100)
results = (
pd.Series(kws)
.to_frame()
.rename({0:'keyword'},axis=1)
.merge(results,on='keyword',how='left')
.fillna(0)
)
st.download_button(
"Press to download your data",
convert_df(results),
"file.csv",
"text/csv",
key='download-csv'
)
elif source == 'Semrush (Paid)':
status_bar = st.progress(0)
for i in range(len(chunks)):
chunk = chunks[i]
url = 'https://api.semrush.com/?type=phrase_these&key={}&export_columns=Ph,Nq&database={}&phrase={}'.format(semrush_api_key,country_api,';'.join(chunk))
try:
r = requests.get(url)
df = pd.read_csv(StringIO(r.text), sep=';')
results = pd.concat([results, df.rename({'Keyword':'keyword', 'Search Volume':'volume'}, axis=1)])
except:
continue
status_bar.progress(i/len(chunks))
status_bar.progress(100)
results = (
pd.Series(kws)
.to_frame()
.rename({0:'keyword'},axis=1)
.merge(results,on='keyword',how='left')
.fillna(0)
)
st.download_button(
"Press to download your data",
convert_df(results),
"file.csv",
"text/csv",
key='download-csv'
)
elif source == 'Keywordseverywhere (Paid)':
headers = {
'Accept': 'application/json',
'Authorization': 'Bearer {}'.format(keywordseverywhere_api_key)
}
status_bar = st.progress(0)
for i in range(len(chunks)):
chunk = chunks[i]
data = {
'country':country_api,
'currency':'usd',
'dataSource':'gkp',
'kw[]':chunk
}
try:
r = requests.post('https://api.keywordseverywhere.com/v1/get_keyword_data', headers=headers, data=data)
for element in ast.literal_eval(r.content.decode('utf-8'))['data']:
results.loc[len(results)] = [element['keyword'], element['vol']]
except:
continue
status_bar.progress(i/len(chunks))
status_bar.progress(100)
results = (
pd.Series(kws)
.to_frame()
.rename({0:'keyword'},axis=1)
.merge(results,on='keyword',how='left')
.fillna(0)
)
st.download_button(
"Press to download your data",
convert_df(results),
"file.csv",
"text/csv",
key='download-csv'
)