-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
118 lines (92 loc) · 3.88 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
import re
import sys
import requests
from markdown import markdown
from urllib.parse import urlparse
def read_file(filename):
try:
with open(filename, 'r') as file:
content = file.read()
return content
except FileNotFoundError:
print(f"File '{filename}' not found.")
return None
def extract_links_from_markdown(markdown_text):
if markdown_text is None:
return [], []
html = markdown(markdown_text)
link_pattern = r'<a\s+(?:[^>]*?\s+)?href="([^"]*)"'
image_pattern = r'<img\s+(?:[^>]*?\s+)?src="([^"]*)"'
links = re.findall(link_pattern, html)
images = re.findall(image_pattern, html)
return links, images
def find_files(directory, filename):
files_list = []
for root, directories, files in os.walk(directory):
for file in files:
if file == filename:
files_list.append(os.path.join(root, file))
return files_list
def check_link(task_folder, link):
try:
if link.startswith('course://'):
internal_resource_link = link[len('course://'):]
internal_resource_path = f"{course_directory}/{internal_resource_link}"
if not (os.path.isfile(internal_resource_path) or os.path.isdir(internal_resource_path)):
return False, f"NO such file: {internal_resource_path}"
else:
return True, ""
elif link.startswith(('http://', 'https://')):
headers = {
'authority': 'www.google.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'en-US,en;q=0.9',
'cache-control': 'max-age=0',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36',
}
response = requests.head(link, headers=headers)
if response.status_code == 403 and urlparse(link).netloc == "hi.hyperskill.org":
return True, f"[WARN]: In automatics tests hi.hyperskill.org can return 403 error code. Now this happend to: {link}"
if 400 <= response.status_code <= 599:
return False, f"NOT valid url (returns {response.status_code}): {link}"
else:
return True, ""
elif link.startswith(('file://', 'psi_element://', 'tool_window://', 'settings://')):
return True, "" # Not supported for now
else: # Assumes that it's a relative path
path = f"{task_folder}/{link}"
if not (os.path.isfile(path) or os.path.isdir(path)):
return False, f"NO such file: {path}"
return True, ""
except Exception as e:
return False, str(e)
if __name__ == '__main__':
args = sys.argv
if len(args) < 2:
print("No path provided")
exit(1)
course_directory = args[1]
task_description_name = 'task.md'
print("\n===== Common info =====")
print(f"Running for directory {course_directory} (absolute path: {os.path.abspath(course_directory)})")
task_files = find_files(course_directory, task_description_name)
print(f"\n===== The following links were found =====")
errors_log = ""
for file in task_files:
print(f"FILE: {file}")
links, images = extract_links_from_markdown(read_file(file))
links.extend(images)
task_folder = file[:file.rfind("/")]
for link in links:
print(f"\t LINK: {link}")
result, log = check_link(task_folder, link)
if not result:
errors_log += f"Error in file: {file}\n\t{log}\n"
print("\n===== ERRORS LOG =====")
if len(errors_log) > 0:
print(errors_log)
exit(1)
else:
print("No errors found")
exit(0)