This repository has been archived by the owner on Jul 21, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
archive_cli.py
216 lines (184 loc) · 8.81 KB
/
archive_cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
import argparse
import shutil
import stat
import os
import tqdm
import re
import git
import json
# STATIC PROPERTIES
# the route for previous versions to be saved under.
previous_route = "previous"
# I'm doing spaces here for readability with symbol characters.
# The strings will get stripped of whitespace.
# These get compiled into a regex string
allowed_in_link = "".join(list(map(lambda s: s.strip(), [
" - ",
" ? ",
" \w ",
" \\ ",
" $ ",
" \. ",
" ! ",
" \* ",
" ' ",
" () ",
" / ",
])))
# Error handler for windows by:
# https://stackoverflow.com/questions/2656322/shutil-rmtree-fails-on-windows-with-access-is-denied
def onerror(func, path, exc_info):
"""
Error handler for ``shutil.rmtree``.
If the error is due to an access error (read only file)
it attempts to add write permission and then retries.
If the error is for another reason it re-raises the error.
Usage : ``shutil.rmtree(path, onerror=onerror)``
"""
try:
if not os.access(path, os.W_OK):
# Is the error an access error ?
os.chmod(path, stat.S_IWUSR)
func(path)
except:
raise
def replace_links(filepath, version_name, version_displayname):
"""In the given file, replace the in-site links to reference
the correct previous version
"""
# read file contents
with open(filepath, mode="r", encoding='utf8') as html:
html_str = html.read()
# foramt of the destination links, e.g /previous/oct2018/...
dest_link_format = f"/{previous_route}/{version_name}\g<1>"
def substitute(prefix, html_str):
fromstr = f"{prefix}=[\"']([{allowed_in_link}]+)[\"']"
tostr = f'{prefix}="{dest_link_format}"'
return re.sub(fromstr, tostr, html_str)
def substitute_redirection(prefix, html_str):
from_str = f"{prefix}=([{allowed_in_link}]+)[\"']"
to_str = f'{prefix}={dest_link_format}"'
return re.sub(from_str, to_str, html_str)
html_str = substitute("src", html_str)
html_str = substitute("href", html_str)
html_str = substitute_redirection('content="0; url', html_str)
# add the previous versions header
html_str = html_str.replace("<!-- !previous versions banner! -->", (\
'<div class="container-fluid version-banner">'\
'<div class="icon-inline baseline mr-1">'\
f'<img src="/{previous_route}/{version_name}/theme/images/icon-warning-24px.svg">'\
'</div>This is a preserved version of the site that was '\
f'live between {version_displayname["start"]} and {version_displayname["end"]}. '\
'<a href="/resources/previous-versions/">'\
'See other versions</a> or <a href="/">'\
'the current version</a>.</div>'))
# replace previous versions link in dropdown
html_str = html_str.replace(f"/{previous_route}/{version_name}/resources/previous-versions/", "/resources/previous-versions/")
# replace changelog link in dropdown
html_str = html_str.replace(f"/{previous_route}/{version_name}/resources/updates/", "/resources/updates/")
# overwrite with updated html
with open(filepath, mode="w", encoding='utf8') as updated_html:
updated_html.write(html_str)
def preserve(version_name, version_displayname, changelog, cti_url, gh_pages_url):
"""preserve the current version on github as a named previous version. """
print("preserving current version under route '" + version_name + \
"' with display name '" + str(version_displayname) + "'")
dest = version_name
# handle replace
if os.path.exists(dest):
print("\t- previous version exists with this name already: deleting previous version... ", end="", flush=True)
shutil.rmtree(dest, onerror=onerror)
print("done")
print("\t- cloning attack-website github repo... ", end="", flush=True)
git.Repo.clone_from("https://github.com/mitre-attack/attack-website.git", dest, branch='gh-pages')
print("done")
# we don't want the cloned repo to use its own version control so
# remove the .git folder
print("\t- removing .git from cloned repo... ", end="", flush=True)
gitpath = os.path.join(dest, ".git")
shutil.rmtree(gitpath, onerror=onerror)
print("done")
# remove cname
print("\t- removing CNAME... ", end="", flush=True)
cname = os.path.join(dest, "CNAME")
if os.path.exists(cname):
os.remove(cname)
print("done")
# remove previous versions from this previous version
preserved_previous_path = os.path.join(dest, previous_route)
if os.path.exists(preserved_previous_path):
print(f"\t- removing '{previous_route}' folder from preserved version to prevent recursive previous versions... ", end="", flush=True)
shutil.rmtree(preserved_previous_path, onerror=onerror)
print("done")
else:
print(f"\t- no '{previous_route}' folder to remove from from preserved version")
previous_versions_list_path = os.path.join(dest, "resources", \
"previous-versions")
if os.path.exists(previous_versions_list_path):
print("\t- removing previous-versions page...", end="", flush=True)
shutil.rmtree(previous_versions_list_path, onerror=onerror)
print("done")
# ditto for changelog
changelog_path = os.path.join(dest, "resources", "updates")
if os.path.exists(changelog_path):
print("\t- removing updates page...", end="", flush=True)
shutil.rmtree(changelog_path, onerror=onerror)
print("done")
# parse and replace content links
print("\t- replacing links... ", end="", flush=True)
for directory, _, files in tqdm.tqdm(os.walk(dest), \
desc="\t- replacing links"):
for filename in filter(lambda f: f.endswith(".html"), files):
filepath = os.path.join(directory, filename)
replace_links(filepath, version_name, version_displayname)
# replace site_base_url in search.js
print("\t- replacing 'site_base_url' in search.js... ",end="", flush=True)
search_file_path = os.path.join(dest, "theme", "scripts", "search.js")
if os.path.exists(search_file_path):
search_contents = ""
with open(search_file_path, mode="r", encoding='utf8') as search_file:
search_contents = search_file.read()
search_contents = re.sub('site_base_url ?= ? ""', \
f'site_base_url = "/{previous_route}/{version_name}"', \
search_contents)
with open(search_file_path, mode="w", encoding='utf8') as search_file:
search_file.write(search_contents)
print("done")
# update archives.json
print("\t- updating archives.json... ", end="", flush=True)
with open("archives.json", "r") as archives:
archives_data = json.loads(archives.read())
archives_data.append({
"path": version_name,
"date_start": version_displayname["start"],
"date_end": version_displayname["end"],
"changelog": changelog,
"cti_url": cti_url,
"gh_pages_url": gh_pages_url,
"retired": False
})
with open("archives.json", "w") as archives:
archives.write(json.dumps(archives_data, indent=4))
print("done")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="preserve the current state of attack.mitre.org as a named previous version")
parser.add_argument("route", type=str,
help="The route under which to store the previous version. Format should be monthYEAR, e.g 'january1970'"
)
parser.add_argument("date_start", type=str,
help="the date the current version went live in 'Month day, YEAR' format, e.g 'January 1, 1970'"
)
parser.add_argument("date_end", type=str,
help="the date the current version is being replaced in 'Month day, YEAR' format, e.g 'January 1, 1970'"
)
parser.add_argument("changelog", type=str,
help="The name of the changelog file for this version, to be found in resources/updates. Typically updates-MONTH-YEAR, e.g 'updates-january-1970'"
)
parser.add_argument("cti_url", type=str,
help="The mitre/cti url for this version, which is listed at https://github.com/mitre/cti/releases"
)
parser.add_argument("gh_pages_url", type=str,
help="The url to the latest github commit for this version, found at https://github.com/mitre-attack/attack-website/commits/master"
)
args = parser.parse_args()
preserve(args.route, {"start": args.date_start, "end": args.date_end}, args.changelog, args.cti_url, args.gh_pages_url)