-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
137 lines (92 loc) · 3.72 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import datetime
from datetime import datetime, timedelta
from time import sleep
import instaloader
import yaml
from os import path
from openpyxl import load_workbook
from openpyxl.workbook import Workbook
from itertools import takewhile
#reads the config.yaml file
def read(config_file):
"""
Read yaml config file from current directory
"""
print('Reading', config_file)
with open(config_file, 'r', encoding='utf-8') as f:
config = yaml.safe_load(f)
return config
def save(config_file,data):
"""
Read yaml config file from current directory
"""
print('Writing', config_file)
with open(config_file, 'w', encoding='utf-8') as f:
yaml.dump(data,f,default_flow_style=False)
def setupconfig():
config = read("config.yaml")
TrackedNames = config.get('TrackedNames',[])
TrackLifetime = config.get('TrackLifetime')
currenttracked = read("currenttracked.yaml")
username = config.get('LoginUsername')
password = config.get('LoginPassword')
return TrackedNames, TrackLifetime, currenttracked, username, password
def setupworkbook(workbook_name):
workbook_name = f'Results/{workbook_name}'
if path.exists(workbook_name):
wb = load_workbook(workbook_name)
page = wb.active
else:
headers = ['Profile Name', 'Current date', 'URL', 'Post Type', 'Date Posted', 'Current Likes',
'Current Comments',
'Current View'] # ,'Video duration','Is Sponsored?'] ##'Current View','Video duration'
wb = Workbook()
page = wb.active
page.title = 'Data'
page.append(headers)
wb.save(filename=workbook_name)
return wb, page
#def GetData(bot):
def main ():
TrackedNames, TrackLifetime, currenttracked, username, password = setupconfig()
date = datetime.utcnow() - timedelta(hours=TrackLifetime)
print (f"tracking {TrackedNames} for {TrackLifetime} hours")
print("initialize instaloader bot")
bot = instaloader.Instaloader()
if path.exists('session'):
bot.load_session_from_file(username,'session')
if bot.test_login() == None:
bot.login(user=username, passwd=password)
print (bot.test_login())
#get the data
for name in TrackedNames:
wb, page = setupworkbook(f'{name}.xlsx')
print(f"Results will be stored in {name}.xlsx")
profile = instaloader.Profile.from_username(bot.context,name)
posts = profile.get_posts()
for post in takewhile(lambda p: p.date > date, posts):
postsh = str(post.shortcode)
#checks if we should get the data from the post or not
if postsh not in currenttracked:
currenttracked[postsh] = TrackLifetime
elif postsh in currenttracked:
if currenttracked[postsh] == 0:
continue
try:
data = [post.owner_username, datetime.utcnow(), "https://www.instagram.com/p/" + postsh + "/",
post.typename, post.date, post.likes, post.comments,
post.video_view_count] # ,Post.video_duration]#,Post.is_sponsored]#Post.video_view_count,Post.video_duration,
page.append(data)
except:
print("error. Instagram blocked the connection!")
exit()
if currenttracked[postsh] >= 1:
currenttracked[postsh] -= 1
save('currenttracked.yaml', currenttracked)
wb.save(filename=f'Results/{name}.xlsx')
wb.close()
bot.save_session_to_file('session')
# Press the green button in the gutter to run the script.
if __name__ == '__main__':
main()
# See PyCharm help at https://www.jetbrains.com/help/pycharm/