forked from davidq2010/HackU
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathYelpParser.py
92 lines (78 loc) · 3.34 KB
/
YelpParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
"""
This program requires an input of a list of businesses that have been scraped
from GooglePlaces. It then proceeds to visit the Yelp page of the company
corresponding to its phone number, and then stores additional data to the passed in
list, such as the company's Yelp rating, whether it's listed or cliamed, and number of reviews.
"""
from yelpapi import YelpAPI
import json
import re
import time
APIKEY = "BDF_tYMOnEwm0jPojGG5Jb3Syj4FyGA7KPxu-L4ze8aLsUXtH_Hag1ezCEQc65wqDHiQjLcIfG3wbiu_xPZk9aIrYQuxKvTPUIRkUxeC5bK6lK73doWNGJzIn-OOWnYx"
def strip(str1):
"""
brief: Remove all comma from a string for it to be later stored in csv file
param str1: given string
return the given string with all comma stripped off
"""
str1 = str1.replace(',', '')
return str1
def changeFormatTel(tel):
"""
This function takes in string that represents the international telephone number
of a business, and convert it to a string of numbers without any space or
non-numeric characters.
param: tel given string that represents the international telephone number
of a business
return: the reformated telephone number without any space or
non-numeric characters.
"""
updatedNum = re.sub('[^0-9]', '', tel)
updatedNum.replace(" ","")
return updatedNum
def parseYelpData(places, heading = "tel"):
"""
This function takes in a list of businesses and looks them up by phone number
in Yelp Fusion using yelpapi. It adds data about Yelp average rating of the
business, the number of Yelp reviews, and the company's Yelp link to the
dictionary of the business. The function then returns the updated list of businesses.
param: places list of businesses that have been scraped by the GoogleParser
param: heading default to "tel", signifying the heading for the phone number data
field of each business
return: updated list of businesses, with these data fields added to each business:
'yelp_rating', 'yelp_review_count', 'yelp_url','is_listed_yelp','is_claimed_yelp'
"""
yelp_api = YelpAPI(APIKEY)
for place in places:
phoneNum = changeFormatTel(place[heading])
if phoneNum == "":
place['yelp_rating'] = ""
place['yelp_review_count'] = ""
place['yelp_url'] = ""
place['is_listed_yelp'] = "0"
place['is_claimed_yelp'] = "0"
continue
response = yelp_api.phone_search_query(phone=str(phoneNum))
# If the phone number is listed in Yelp, add Yelp rating, review_count, and
# yelpUrl to the dictionary of the business.
if response['total'] != 0:
business = response['businesses'][0]
place['yelp_rating'] = strip(str(business['rating']))
place['yelp_review_count'] = strip(str(business['review_count']))
place['yelp_url'] = strip(str(business['url']))
companyID = strip(str(business['id']))
claimResponse = yelp_api.business_query(id=companyID)
place['is_listed_yelp'] = "1"
if claimResponse['is_claimed'] == True:
place['is_claimed_yelp'] = "1"
else:
place['is_claimed_yelp'] = "0"
else:
place['yelp_rating'] = ""
place['yelp_review_count'] = ""
place['yelp_url'] = ""
place['is_listed_yelp'] = "0"
place['is_claimed_yelp'] = "0"
# to avoid Yelp's error messages of too many queries per second
time.sleep(1)
return places