-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathRenameDiscussions.py
274 lines (223 loc) · 8.39 KB
/
RenameDiscussions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
import sys
if sys.version_info <= (3, 0):
sys.exit("I am a Python 3 script. Run me with python3.")
import os
import glob
import argparse
import xml.etree.ElementTree as ET
instructions = """
To use:
python3 Rename_Discussions.py path/to/course.xml (options)
Run this on a course folder, or a course.xml file inside an edX course folder (from export).
The discussion components will automatically have their category and
subcategory set using the section, subsection, and unit names for the course.
There are currently no options. This script may fail on courses where
the discussion components are in their own folder instead of
inline in the verticals.
Last update: Feb 22nd 2022
"""
# Always gets the display name.
def getComponentInfo(folder, filename, child, parentage, args):
# Try to open file.
try:
tree = ET.parse(os.path.join(folder, filename + ".xml"))
root = tree.getroot()
except OSError:
# If we can't get a file, try to traverse inline XML.
root = child
# Note: edX does discussions inline in the vertical xml by default.
# Need to remove any discussion_category and discussion_target attributes
# and replace them with section and subsection, respectively.
temp = {
"type": root.tag,
"name": "",
# space for other info
}
# get display_name or use placeholder
if "display_name" in root.attrib:
temp["name"] = root.attrib["display_name"]
else:
temp["name"] = root.tag
if root.tag == "discussion":
# Remove the attributes if they exist.
if "discussion_category" in root.attrib:
del root.attrib["discussion_category"]
if "discussion_target" in root.attrib:
del root.attrib["discussion_target"]
# Add the attributes
root.attrib["discussion_category"] = (
parentage["section"] + ": " + parentage["subsection"]
)
root.attrib["discussion_target"] = parentage["page"]
# Label all of them as components regardless of type.
temp["component"] = temp["name"]
return {"contents": temp, "parent_name": temp["name"], "childroot": root}
# Recursion function for outline-declared xml files
def drillDown(folder, filename, root, parentage, args):
# Try to open file.
try:
tree = ET.parse(os.path.join(folder, (filename + ".xml")))
root = tree.getroot()
except IOError:
# If we can't get a file, try to traverse inline XML.
ddinfo = getXMLInfo(folder, root, parentage, args)
if ddinfo:
return ddinfo
else:
print(
"Possible missing file or empty XML element: "
+ os.path.join(folder, (filename + ".xml"))
)
return {"contents": [], "parent_name": "", "found_file": False}
XMLInfo = getXMLInfo(folder, root, parentage, args)
# Rewrite all verticals that have discussion components.
if XMLInfo["has_discussion"]:
tree.write(
os.path.join(folder, (filename + ".xml")),
encoding="utf-8",
xml_declaration=False,
)
return XMLInfo
def getXMLInfo(folder, root, parentage, args):
# We need lists of container nodes so we can tell
# whether we have to do more recursion.
branch_nodes = [
"course",
"chapter",
"sequential",
"vertical",
"split_test",
"conditional",
]
contents = []
has_discussion = False
# Some items are created without a display name; use their tag name instead.
if "display_name" in root.attrib:
display_name = root.attrib["display_name"]
else:
display_name = root.tag
if root.tag == "course":
parentage["course"] = display_name
elif root.tag == "chapter":
parentage["section"] = display_name
elif root.tag == "sequential":
parentage["subsection"] = display_name
elif root.tag == "vertical":
parentage["page"] = display_name
elif root.tag in branch_nodes:
parentage["smaller"] = display_name
for index, child in enumerate(root):
temp = {
"index": index,
"type": child.tag,
"name": "",
"url": "",
"contents": [],
"links": [],
"images": [],
"sub": [],
}
# get display_name or use placeholder
if "display_name" in child.attrib:
temp["name"] = child.attrib["display_name"]
else:
temp["name"] = child.tag + str(index)
temp["tempname"] = True
# get url_name but there are no placeholders
# Note that even some inline XML have url_names.
if "url_name" in child.attrib:
temp["url"] = child.attrib["url_name"]
else:
temp["url"] = None
nextFile = os.path.join(os.path.dirname(folder), child.tag)
# Some tags trip us up. Add them here so we can skip them.
if child.tag in ["wiki"]:
child_info = {"contents": False, "parent_name": child.tag}
del temp["contents"]
elif child.tag in branch_nodes:
child_info = drillDown(nextFile, temp["url"], child, parentage, args)
temp["contents"] = child_info["contents"]
else:
child_info = getComponentInfo(nextFile, temp["url"], child, parentage, args)
# Looking for discussions that need to get fixed.
if child.tag == "discussion":
has_discussion = True
# For leaf nodes, add item info to the dict
# instead of adding a new contents entry
temp.update(child_info["contents"])
del temp["contents"]
# If the display name was temporary, replace it.
if "tempname" in temp:
temp["name"] = child_info["parent_name"]
del temp["tempname"]
# We need not only a name, but a custom key with that name.
temp[temp["type"]] = temp["name"]
contents.append(temp)
return {
"contents": contents,
"parent_name": display_name,
"found_file": True,
"root": root,
"has_discussion": has_discussion,
}
# Main function
def RenameDiscussions(args=["-h"]):
# Handle arguments and flags
parser = argparse.ArgumentParser(usage=instructions, add_help=False)
parser.add_argument("--help", "-h", action="store_true")
parser.add_argument("file_names", nargs="*")
# "extra" will help us deal with out-of-order arguments.
args, extra = parser.parse_known_args(args)
if args.help:
sys.exit(instructions)
# Replace arguments with wildcards with their expansion.
# If a string does not contain a wildcard, glob will return it as is.
# Mostly important if we run this on Windows systems.
file_names = list()
for arg in args.file_names:
file_names += glob.glob(glob.escape(arg))
for item in extra:
file_names += glob.glob(glob.escape(item))
# Don't run the script on itself.
if sys.argv[0] in file_names:
file_names.remove(sys.argv[0])
# If the filenames don't exist, say so and quit.
if file_names == []:
sys.exit("No file or directory found by that name.")
# Get the course.xml file and root directory
for name in file_names:
if os.path.isdir(name):
if os.path.exists(os.path.join(name, "course.xml")):
rootFileDir = name
else:
if "course.xml" in name:
rootFileDir = os.path.dirname(name)
rootFilePath = os.path.join(rootFileDir, "course.xml")
course_tree = ET.parse(rootFilePath)
# Open course's root xml file
# Get the current course run filename
course_root = course_tree.getroot()
course_dict = {
"type": course_root.tag,
"name": "",
"url": course_root.attrib["url_name"],
"contents": [],
}
parentage = {
"course": "",
"section": "",
"subsection": "",
"page": "",
"smaller": "",
}
course_info = drillDown(
os.path.join(rootFileDir, course_dict["type"]),
course_dict["url"],
course_root,
parentage,
args,
)
print("Updated discussion names in " + course_info["parent_name"])
if __name__ == "__main__":
# this won't be run when imported
RenameDiscussions(sys.argv)