-
Notifications
You must be signed in to change notification settings - Fork 0
/
twitter_etl.py
41 lines (33 loc) · 1.16 KB
/
twitter_etl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import tweepy
import pandas as pd
import json
from datetime import datetime
import s3fs
keys_file = open("keys.txt")
lines = keys_file.readlines()
def run_twitter_etl():
#Twitter keys
access_key = lines[0].rstrip()
access_secret = lines[1].rstrip()
consumer_key = lines[2].rstrip()
consumer_secret = lines[3].rstrip()
#Twitter Authentication
auth = tweepy.OAuth2AppHandler(consumer_key, consumer_secret)
#API object
api = tweepy.API(auth)
#Fetching the tweets
tweets = api.user_timeline( screen_name='@BillGates', count=200, include_rts=False, tweet_mode='extended')
#iterating on the response
tweet_list = []
for tweet in tweets:
text = tweet._json["full_text"]
refined_tweet = {
"user": tweet.user.screen_name,
"text": text,
"favorite_count": tweet.favorite_count,
"retweet_count": tweet.retweet_count,
"created_at": tweet.created_at
}
tweet_list.append(refined_tweet)
df = pd.DataFrame(tweet_list)
df.to_csv("s3://medc-airflow-pipeline-bucket/billgates_tweeter_data.csv")