This repository has been archived by the owner on Oct 26, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathmigrations.py
executable file
·359 lines (323 loc) · 15.7 KB
/
migrations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
#!/usr/bin/env python3
import couchconnection
import json
import re
import sys
import urllib.request
import urllib.parse
import urllib.error
from constants import LATEST_MIGRATION_VERSION, MIGRATIONS_DOCUMENT_ID
(db, conn) = couchconnection.arsnova_connection(
"/etc/arsnova/arsnova.properties")
db_url = "/" + db
migrations_url = db_url + "/" + MIGRATIONS_DOCUMENT_ID
def bump(next_version):
conn.request("GET", migrations_url)
res = conn.getresponse()
migration = json.loads(res.read())
migration["version"] = next_version
res = conn.json_put(migrations_url, json.dumps(migration))
return res.read()
def migrate(migration):
global db_url, migrations_url
all_docs_url = db_url + "/_all_docs"
bulk_url = db_url + "/_bulk_docs"
cleanup_url = db_url + "/_view_cleanup"
current_version = migration["version"]
if current_version >= LATEST_MIGRATION_VERSION:
print("Database is already up to date.")
sys.exit(0)
else:
conn.require_legacy_couchdb_version()
# Changes to 'skill_question' and 'skill_question_answer':
# added 'questionVariant' field, defaulting to 'lecture' value
if current_version == 0:
def question_migration():
questions = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question') emit(doc._id, doc); }\" }"
answers = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question_answer') emit(doc._id, doc); }\" }"
# We are doing three steps:
# 1) Load all documents we are going to migrate in bulk
# 2) Each document that is not migrated yet is changed
# 3) Update all changed documents in bulk
#
# Because the documents could change in the database while
# we perform any of these steps, we will get an error for
# those documents. To solve this we repeat all steps until
# no more errors occur.
def migrate_with_temp_view(temp_view):
while True:
res = conn.temp_view(db_url, temp_view)
doc = json.loads(res.read().decode('utf-8'))
ds = []
for col in doc["rows"]:
val = col["value"]
if "questionVariant" not in val:
ds.append(val)
for d in ds:
d["questionVariant"] = "lecture"
res = conn.json_post(bulk_url, json.dumps({"docs": ds}))
result_docs = json.loads(res.read().decode('utf-8'))
errors = []
for result in result_docs:
if "error" in result:
errors.append(result)
if not errors:
# All documents were migrated.
# jump out of loop and exit this function
break
print("Migrating all Question documents...")
migrate_with_temp_view(questions)
print("Migrating all Answer documents...")
migrate_with_temp_view(answers)
# skill_question
question_migration()
# bump database version
current_version = 1
print(bump(current_version))
if current_version == 1:
print("Deleting obsolete food vote design document...")
if not conn.delete(db_url + "/_design/food_vote"):
print("Food vote design document not found")
# bump database version
current_version = 2
print(bump(current_version))
if current_version == 2:
print(
"Deleting obsolete user ranking, understanding, and admin design documents...")
if not conn.delete(db_url + "/_design/user_ranking"):
print("User ranking design document not found")
if not conn.delete(db_url + "/_design/understanding"):
print("Understanding design document not found")
if not conn.delete(db_url + "/_design/admin"):
print("Admin design document not found")
# bump database version
current_version = 3
print(bump(current_version))
if current_version == 3:
def add_variant_to_freetext_abstention_answers():
answers = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question_answer' && typeof doc.questionVariant === 'undefined' && doc.abstention == true) emit(doc._id, doc.questionId); }\" }"
# get all bug-affected answer documents
res = conn.temp_view_with_params(
db_url, "?include_docs=true", answers)
doc = json.loads(res.read().decode('utf-8'))
questions = []
answers = []
for col in doc["rows"]:
questions.append(col["value"])
answers.append(col["doc"])
# bulk fetch all (unique) question documents of which we found problematic answers
res = conn.json_post(
all_docs_url + "?include_docs=true", json.dumps({"keys": list(set(questions))}))
result_docs = json.loads(res.read().decode('utf-8'))
# we need to find the variant of each question so that we can put it into the answer document
questions = []
for result in result_docs["rows"]:
questions.append(result["doc"])
for answer in answers:
for question in questions:
if answer["questionId"] == question["_id"]:
answer["questionVariant"] = question["questionVariant"]
# bulk update the answers
res = conn.json_post(bulk_url, json.dumps({"docs": answers}))
result_docs = json.loads(res.read().decode('utf-8'))
print(result_docs)
print("Fixing freetext answers (abstentions) with missing question variant (#13313)...")
add_variant_to_freetext_abstention_answers()
# bump database version
current_version = 4
print(bump(current_version))
if current_version == 4:
print("Deleting obsolete learning_progress design documents...")
if not conn.delete(db_url + "/_design/learning_progress_course_answers"):
print("course_answers design document not found")
if not conn.delete(db_url + "/_design/learning_progress_maximum_value"):
print("maximum_value design document not found")
if not conn.delete(db_url + "/_design/learning_progress_user_values"):
print("learning_progress_user_values design document not found")
# bump database version
current_version = 5
print(bump(current_version))
if current_version == 5:
print("Deleting misspelled 'statistic' design document...")
if not conn.delete(db_url + "/_design/statistic"):
print("'statistic' design document not found")
# bump database version
current_version = 6
print(bump(current_version))
if current_version == 6:
print("Transforming pre-picture-answer freetext questions into text only questions (#15613)...")
def add_text_answer_to_freetext_questions():
old_freetext_qs = "{ \"map\": \"function(doc) { if (doc.type == 'skill_question' && doc.questionType == 'freetext' && typeof doc.textAnswerEnabled === 'undefined') emit(doc._id); }\" }"
# get all bug-affected documents
res = conn.temp_view_with_params(
db_url, "?include_docs=true", old_freetext_qs)
doc = json.loads(res.read().decode('utf-8'))
questions = []
for result in doc["rows"]:
questions.append(result["doc"])
# add missing properties
for question in questions:
question["imageQuestion"] = False
question["textAnswerEnabled"] = True
# bulk update the documents
res = conn.json_post(bulk_url, json.dumps({"docs": questions}))
result_docs = json.loads(res.read().decode('utf-8'))
print(result_docs)
add_text_answer_to_freetext_questions()
# bump database version
current_version = 7
print(bump(current_version))
if current_version == 7:
print("Transforming session documents to new learning progress options format (#15617)...")
def change_learning_progress_property_on_session():
sessions = "{ \"map\": \"function(doc) { if (doc.type == 'session' && doc.learningProgressType) emit(doc._id); }\" }"
res = conn.temp_view_with_params(
db_url, "?include_docs=true", sessions)
doc = json.loads(res.read().decode('utf-8'))
sessions = []
for result in doc["rows"]:
sessions.append(result["doc"])
# change property 'learningProgressType' to 'learningProgressOptions'
for session in sessions:
currentProgressType = session.pop(
"learningProgressType", "questions")
progressOptions = {
"type": currentProgressType, "questionVariant": ""}
session["learningProgressOptions"] = progressOptions
# bulk update sessions
res = conn.json_post(bulk_url, json.dumps({"docs": sessions}))
result_docs = json.loads(res.read().decode('utf-8'))
print(result_docs)
change_learning_progress_property_on_session()
# bump database version
current_version = 8
print(bump(current_version))
if current_version == 8:
print("Migrating DB and LDAP user IDs to lowercase...")
conn.request("GET", db_url + "/_design/user/_view/all")
res = conn.getresponse()
doc = json.loads(res.read().decode('utf-8'))
affected_users = {}
unaffected_users = []
bulk_docs = []
# Look for user documents where user ID is not in lowercase
# 1) Delete document if account has not been activated
# 2) Lock account if a lowercase version already exists
# 3) Convert user ID to lowercase if only one captitalization exists
for user_doc in doc["rows"]:
if user_doc["key"] != user_doc["key"].lower():
# create a list of user documents since there might be multiple
# items for different captitalizations
affected_users.setdefault(
user_doc["key"].lower(), []).append(user_doc["value"])
else:
unaffected_users.append(user_doc["key"])
for uid, users in affected_users.items():
migration_targets = []
for user in users:
if "activationKey" in user:
print("User %s has not been activated. Deleting document %s..." % (
user["username"], user["_id"]))
conn.delete(db_url + "/" + user["_id"])
elif uid in unaffected_users:
print("Migration target exists. Locking duplicate user %s (document %s)..." % (
user["username"], user["_id"]))
user["locked"] = True
bulk_docs.append(user)
else:
migration_targets.append(user)
if len(migration_targets) > 1:
print(
"Cannot migrate some users automatically. Conflicting duplicate users found:")
for user in migration_targets:
print("Locking user %s (document %s)..." %
(user["username"], user["_id"]))
user["locked"] = True
bulk_docs.append(user)
elif migration_targets:
print("Migrating user %s (document %s)..." %
(user["username"], user["_id"]))
user["username"] = uid
bulk_docs.append(user)
# Look for data where assigned user's ID is not in lowercase
# 1) Migrate if user ID was affected by previous migration step
# 2) Exclude Facebook and Google account IDs
# 3) Exclude guest account IDs
# 4) Migrate all remaining IDs (LDAP)
def reassign_data(type, user_prop):
print("Reassigning %s data to migrated users..." % type)
migration_view = "{ \"map\": \"function(doc) { function check(doc, type, uid) { return doc.type === type && uid !== uid.toLowerCase() && uid.indexOf('Guest') !== 0; } if (check(doc, '%s', doc.%s)) { emit(doc._id, doc); }}\" }" % (
type, user_prop)
res = conn.temp_view(db_url, migration_view)
doc = json.loads(res.read().decode('utf-8'))
print("Documents: %d" % len(doc["rows"]))
for affected_doc in doc["rows"]:
val = affected_doc["value"]
print(affected_doc["id"], val[user_prop])
# exclude Facebook and Google accounts from migration (might be
# redundant)
if (not re.match("https?:", val[user_prop]) and not "@" in val[user_prop]) or val[user_prop].lower() in affected_users:
val[user_prop] = val[user_prop].lower()
bulk_docs.append(val)
else:
print("Skipped %s (Facebook/Google account)" %
val[user_prop])
reassign_data("session", "creator")
reassign_data("interposed_question", "creator")
reassign_data("skill_question_answer", "user")
reassign_data("logged_in", "user")
reassign_data("motdlist", "username")
# bulk update users and assignments
res = conn.json_post(bulk_url, json.dumps({"docs": bulk_docs}))
if res:
res.read()
# bump database version
current_version = 9
print(bump(current_version))
if current_version == 9:
print("Migrating MotD documents...")
migration_view = "{ \"map\": \"function(doc) { if (doc.type === 'motd' && doc.audience === 'session' && !doc.sessionId) { emit(null, doc); } }\" }"
res = conn.temp_view(db_url, migration_view)
docs = json.loads(res.read().decode('utf-8'))
print("Documents: %d" % len(docs["rows"]))
bulk_docs = []
for affected_doc in docs["rows"]:
val = affected_doc["value"]
print(affected_doc["id"], val["motdkey"])
conn.request(
"GET", db_url + "/_design/session/_view/by_keyword?key=" + '"%s"' % val["sessionkey"])
session_res = conn.getresponse()
session_docs = json.loads(session_res.read().decode('utf-8'))
if len(session_docs["rows"]) > 0:
val["sessionId"] = session_docs["rows"][0]["id"]
bulk_docs.append(val)
else:
print("No session document found for " + val["sessionkey"])
# bulk update MotDs
res = conn.json_post(bulk_url, json.dumps({"docs": bulk_docs}))
if res:
res.read()
# bump database version
current_version = 10
print(bump(current_version))
if current_version == 10:
print("Deleting 'sort_order' design document...")
if not conn.delete(db_url + "/_design/sort_order"):
print("'sort_order' design document not found")
# bump database version
current_version = 11
print(bump(current_version))
if current_version == 11:
# Next migration goes here
pass
conn.json_post(cleanup_url)
conn.request("GET", migrations_url)
res = conn.getresponse()
mig = res.read()
if res.status == 404:
res = conn.json_post(db_url, json.dumps(
{"_id": MIGRATIONS_DOCUMENT_ID, "version": 0}))
res.read()
migrate({"version": 0})
else:
migrate(json.loads(mig.decode('utf-8')))