forked from stashapp/CommunityScripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GHScraper_Checker.py
208 lines (181 loc) · 7.15 KB
/
GHScraper_Checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
import json
import os
import re
import sys
import zipfile
from datetime import datetime
import requests
import log
FRAGMENT = json.loads(sys.stdin.read())
FRAGMENT_SERVER = FRAGMENT["server_connection"]
FRAGMENT_ARG = FRAGMENT['args']['mode']
log.LogDebug("Starting Plugin: Github Scraper Checker")
CHECK_LOG = False
GET_NEW_FILE = False
OVERWRITE = False
if FRAGMENT_ARG == "CHECK":
CHECK_LOG = True
if FRAGMENT_ARG == "NEWFILE":
GET_NEW_FILE = True
if FRAGMENT_ARG == "OVERWRITE":
OVERWRITE = True
# Don't write in log if the file don't exist locally.
IGNORE_MISS_LOCAL = False
def graphql_getScraperPath():
query = """
query Configuration {
configuration {
general {
scrapersPath
}
}
}
"""
result = callGraphQL(query)
return result["configuration"]["general"]["scrapersPath"]
def callGraphQL(query, variables=None):
# Session cookie for authentication
graphql_port = FRAGMENT_SERVER['Port']
graphql_scheme = FRAGMENT_SERVER['Scheme']
graphql_cookies = {
'session': FRAGMENT_SERVER.get('SessionCookie').get('Value')
}
graphql_headers = {
"Accept-Encoding": "gzip, deflate, br",
"Content-Type": "application/json",
"Accept": "application/json",
"Connection": "keep-alive",
"DNT": "1"
}
if FRAGMENT_SERVER.get('Domain'):
graphql_domain = FRAGMENT_SERVER['Domain']
else:
if FRAGMENT_SERVER.get('Host'):
graphql_domain = FRAGMENT_SERVER['Host']
else:
graphql_domain = 'localhost'
# Because i don't understand how host work...
graphql_domain = 'localhost'
# Stash GraphQL endpoint
graphql_url = graphql_scheme + "://" + \
graphql_domain + ":" + str(graphql_port) + "/graphql"
json = {'query': query}
if variables is not None:
json['variables'] = variables
try:
response = requests.post(
graphql_url, json=json, headers=graphql_headers, cookies=graphql_cookies, timeout=10)
except:
sys.exit("[FATAL] Error with the graphql request, are you sure the GraphQL endpoint ({}) is correct.".format(
graphql_url))
if response.status_code == 200:
result = response.json()
if result.get("error"):
for error in result["error"]["errors"]:
raise Exception("GraphQL error: {}".format(error))
if result.get("data"):
return result.get("data")
elif response.status_code == 401:
sys.exit("HTTP Error 401, Unauthorised.")
else:
raise ConnectionError("GraphQL query failed:{} - {}. Query: {}. Variables: {}".format(
response.status_code, response.content, query, variables))
def file_getlastline(path):
with open(path, 'r', encoding="utf-8") as f:
for line in f:
u_match = re.search(r"^\s*#\s*last updated", line.lower())
if u_match:
return line.strip()
return None
def get_date(line):
try:
date = datetime.strptime(re.sub(r".*#.*Last Updated\s*", "", line), "%B %d, %Y")
except:
return None
return date
scraper_folder_path = graphql_getScraperPath()
GITHUB_LINK = "https://github.com/stashapp/CommunityScrapers/archive/refs/heads/master.zip"
try:
r = requests.get(GITHUB_LINK, timeout=10)
except:
sys.exit("Failing to download the zip file.")
zip_path = os.path.join(scraper_folder_path, "github.zip")
log.LogDebug(zip_path)
with open(zip_path, "wb") as zip_file:
zip_file.write(r.content)
with zipfile.ZipFile(zip_path) as z:
change_detected = False
for filename in z.namelist():
# Only care about the scrapers folders
if "/scrapers/" in filename and filename.endswith(".yml"):
# read the file
line = bytes()
# Filename abc.yml
gh_file = os.path.basename(filename)
# Filename /scrapers/<subdir>/abc.yml
if filename.endswith(f"/scrapers/{gh_file}") == False:
log.LogDebug("Subdirectory detected: " + filename)
subdir = re.findall('\/scrapers\/(.*)\/.*\.yml', filename)
if len(subdir) != 1:
log.LogError(f"Unexpected number of matching subdirectories found. Expected 1. Found {len(subdir)}.")
exit(1)
gh_file = subdir[0] + "/" + gh_file
log.LogDebug(gh_file)
path_local = os.path.join(scraper_folder_path, gh_file)
gh_line = None
yml_script = None
if OVERWRITE:
with z.open(filename) as f:
scraper_content = f.read()
with open(path_local, 'wb') as yml_file:
yml_file.write(scraper_content)
log.LogInfo("Replacing/Creating {}".format(gh_file))
continue
with z.open(filename) as f:
for line in f:
script_match = re.search(r"action:\sscript", line.decode().lower())
update_match = re.search(r"^\s*#\s*last updated", line.decode().lower())
if script_match:
yml_script = True
if update_match:
gh_line = line.decode().strip()
break
# Got last line
if gh_line is None:
log.LogError("[Github] Line Error ({}) ".format(gh_file))
continue
gh_date = get_date(gh_line)
if gh_date is None:
log.LogError("[Github] Date Error ({}) ".format(gh_file))
continue
elif os.path.exists(path_local):
# Local Part
local_line = file_getlastline(path_local)
if local_line is None:
log.LogError("[Local] Line Error ({}) ".format(gh_file))
continue
local_date = get_date(local_line.strip())
if local_date is None:
log.LogError("[Local] Date Error ({}) ".format(gh_file))
continue
if gh_date > local_date and CHECK_LOG:
change_detected = True
if yml_script:
log.LogInfo("[{}] New version on github (Can be any of the related files)".format(gh_file))
else:
log.LogInfo("[{}] New version on github".format(gh_file))
elif GET_NEW_FILE:
change_detected = True
# File don't exist local so we take the github version.
with z.open(filename) as f:
scraper_content = f.read()
with open(path_local, 'wb') as yml_file:
yml_file.write(scraper_content)
log.LogInfo("Creating {}".format(gh_file))
continue
elif CHECK_LOG and IGNORE_MISS_LOCAL == False:
change_detected = True
log.LogWarning("[{}] File don't exist locally".format(gh_file))
if change_detected == False:
log.LogInfo("Scrapers appear to be in sync with GitHub version.")
os.remove(zip_path)