-
Notifications
You must be signed in to change notification settings - Fork 0
/
migrate_convertacard_results.py
161 lines (137 loc) · 5.45 KB
/
migrate_convertacard_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
"""
Migrate results from previous Convert-a-Card projects to Web Annotations.
Usage:
python cli/migrate_z3950_results.py
"""
import sys
import json
import uuid
import click
from datetime import datetime
from sqlalchemy.sql import text
from pybossa.core import db, create_app
app = create_app(run_as_server=False)
def get_xsd_datetime():
"""Return timestamp expressed in the UTC xsd:datetime format."""
return datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
def get_anno_base(motivation):
"""Return the base for a new Web Annotation."""
ts_now = get_xsd_datetime()
github_repo = app.config.get('GITHUB_REPO')
return {
"@context": "http://www.w3.org/ns/anno.jsonld",
"id": str(uuid.uuid4()),
"type": "Annotation",
"motivation": motivation,
"created": ts_now,
"generated": ts_now,
"generator": {
"id": github_repo,
"type": "Software",
"name": "LibCrowds",
"homepage": app.config.get('SPA_SERVER_NAME')
}
}
def create_commenting_anno(target, value):
"""Create a Web Annotation with the commenting motivation."""
anno = get_anno_base('commenting')
anno['target'] = target
anno['body'] = {
"type": "TextualBody",
"value": value,
"purpose": "commenting",
"format": "text/plain"
}
return anno
def create_desc_anno(target, value, tag):
"""Create a Web Annotation with the describing motivation."""
anno = get_anno_base('describing')
anno['target'] = target
anno['body'] = [
{
"type": "TextualBody",
"purpose": "describing",
"value": value,
"format": "text/plain",
"modified": get_xsd_datetime()
},
{
"type": "TextualBody",
"purpose": "tagging",
"value": tag
}
]
return anno
@click.command()
def run():
with app.app_context():
# Prompt for a category ID
query = text('''SELECT id, name FROM category''')
db_results = db.engine.execute(query)
categories = db_results.fetchall()
for category in categories:
print '{0}: {1}'.format(category.id, category.name)
category_id = click.prompt('Please enter a category ID', type=int)
if category_id not in [c.id for c in categories]:
print 'Invalid choice'
return
# Get the category's projects
query = text('''SELECT id
FROM project
WHERE category_id=:category_id''')
db_results = db.engine.execute(query, category_id=category_id)
projects = db_results.fetchall()
print('Updating {} projects'.format(len(projects)))
for project in projects:
# Get the project's results
query = text('''SELECT result.id, result.info,
task.info AS task_info
FROM result, task
WHERE result.project_id=:project_id
AND result.task_id=task.id
AND (result.info->>'annotations') IS NULL
''')
db_results = db.engine.execute(query, project_id=project.id)
results = db_results.fetchall()
for result in results:
# Migrate
target = result.task_info['link']
old_keys = ['oclc', 'shelfmark', 'oclc-option',
'shelfmark-option', 'comments-option']
info = None
if result.info and any(key in result.info for key in old_keys):
def rpl_key(key, new_key):
old_val = result.info.get(key)
old_analysed = result.info.get('{}-option'.format(key))
new_val = result.info.get(new_key)
return (old_analysed if old_analysed
else new_val if new_val
else old_val)
info = {
'control_number': rpl_key('oclc', 'control_number'),
'reference': rpl_key('shelfmark', 'reference'),
'comments': rpl_key('comments', 'comments'),
}
if info:
annotations = []
if info['comments']:
anno = create_commenting_anno(target, info['comments'])
annotations.append(anno)
if info['control_number'] and info['reference']:
ctrl_anno = create_desc_anno(target,
info['control_number'],
'control_number')
ref_anno = create_desc_anno(target,
info['reference'],
'reference')
annotations.append(ctrl_anno)
annotations.append(ref_anno)
new_info = dict(annotations=annotations)
query = text('''UPDATE result
SET info=:info
WHERE id=:id''')
db.engine.execute(query, id=result.id,
info=json.dumps(new_info))
if __name__ == '__main__':
run()