-
Notifications
You must be signed in to change notification settings - Fork 1
/
backend.py
601 lines (502 loc) · 20.4 KB
/
backend.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
"""
Author: Nicholas Zwart, Jim Pipe
Date: 2011dec19
Summary: Backend interface for AMPC Chair person's spreadsheet editing tasks.
"""
__authors__ = ["Nick Zwart","Jim Pipe"]
__date__ = "2011dec19"
__version__ = "r2832"
import xlrd # tools for reading spreadsheets
import xlwt # tools for writing spreadsheets
import pickle # for saving and opening sessions
class AMPCchair_data:
'''Methods for storing and retrieving reviewer data and category data'''
def __init__(self):
self.rev_sheet = None # reviewer workbook info
self.cat_sheet = None # category workbook info
self.lab_sheet = None # category workbook info
# window state
self.state = None
# load data from xls file
#self.read_rev()
#self.read_cat()
#self.read()
def deleteAll(self):
'''remove all sheets and dictionaries from mem'''
#del self.rev_sheet # these need to be unloaded properly
#del self.cat_sheet
del self.reviewers
del self.categories
del self.labels
def read_rev(self,fn='reviewer.xls'):
'''read the spreadsheet data given the supplied Excel filename
'''
book = xlrd.open_workbook(fn)
self.rev_sheet = book.sheets()[0] # only the first sheet
return self.rev_sheet # return ref to sheet
def read_cat(self,fn='category.xls'):
'''read the spreadsheet data given the supplied Excel filename
'''
book = xlrd.open_workbook(fn)
self.cat_sheet = book.sheets()[0] # only the first sheet
return self.cat_sheet # return ref to sheet
def read_lab(self,fn='label.xls'):
'''read the spreadsheet data given the supplied Excel filename
'''
book = xlrd.open_workbook(fn)
self.lab_sheet = book.sheets()[0] # only the first sheet
return self.lab_sheet # return ref to sheet
def format_revlist(self,inlist):
'''take the input list, and enforce cell format
-all are strings, member numbers and category choices should
be changed to ints before strings
-member number col 0
-col 12:16 are choices
'''
outlist = []
# member number
outlist.append(str(int(float(inlist[0]))))
# string info
for i in range(1,12):
# check all chars in each string
if type(inlist[i]) == bytes or type(inlist[i]) == str:
cell = []
for c in inlist[i]:
try:
cell.append(str(c))
except:
pass #skip extra encoded chars
cell = ''.join(cell)
outlist.append(cell)
else:
outlist.append(str(inlist[i]))
# choices
for i in range(12,17):
try:
# for non-empty entries
outlist.append(str(int(float(inlist[i]))))
except:
# for empty entries
outlist.append('-1')
# add extra columns for num abstracts and assigned cat
while len(outlist) < 19:
outlist.append(str(0))
# set keylist to a list
outlist[18] = []
return(outlist)
def format_catlist(self,inlist):
'''take the input list, and enforce cell format
-all are strings, member numbers and category choices should
be changed to ints before strings
-category number (some are ints and others are alpha-numeric), col 0
-category name, col 1
-num abs, col 2
'''
outlist = []
# category number
try:
outlist.append(str(int(float(inlist[0]))))
except:
outlist.append(str(inlist[0]))
# name
outlist.append(str(inlist[1]))
# num abstracts
outlist.append(str(int(float(inlist[2]))))
# add extra columns for # assigned revs, pool, and assigned revs if needed
while len(outlist) < 6:
outlist.append(str(0))
# set keylist to a list
outlist[5] = []
return(outlist)
def format_lablist(self,inlist):
'''take the input list, and enforce cell format
-all are strings.
'''
# label
# submission category
# review category
outlist = []
for item in inlist:
text = str(item).strip()
outlist.append(text)
return outlist
def read(self,fn): #='abstassn.xls'):
'''read both reviewer and category sheets from a single book
'''
try:
book = xlrd.open_workbook(fn)
except:
print('ERROR: invalid xls file')
return 1
sheets = book.sheets()
self.rev_sheet = sheets[0] # only the first sheet
self.cat_sheet = sheets[1] # only the second sheet
self.lab_sheet = sheets[2] # only the third sheet
# REVIEWERS
# make a dictionary for reviewers based on member number
# number of reviewers, only count rows with member numbers
# -all numbers should be stored as float
# -any alphanumeric either unicode or string
self.num_reviewers = 0
self.reviewers = []
bad_cnt = 0
read_cnt = 0
for i in range(1,self.rev_sheet.nrows):
# filter out non-compliant rows
try:
mem_info = self.format_revlist(self.rev_sheet.row_values(i))
self.num_reviewers += 1
self.reviewers.append((mem_info[0],mem_info))
read_cnt += 1
except:
bad_cnt += 1
print('bad_cnt:'+str(bad_cnt))
print(self.rev_sheet.row_values(i))
#pass # just skip rows with no member number
print("REVIEWERS: read:"+str(read_cnt))
print("REVIEWERS: unreadable rows:"+str(bad_cnt))
self.reviewers = dict(self.reviewers)
# CATEGORIES
# dictionary and number of categories, skip major category rows
self.num_categories = 0
self.categories = []
bad_cnt = 0
read_cnt = 0
for i in range(1,self.cat_sheet.nrows):
# filter out non-compliant rows
try:
cur_row = self.format_catlist(self.cat_sheet.row_values(i))
# Valid rows have category numbers that can be reduced to digits
# if only one char is dropped from the end in the event of <cat>A
# or <cat>B.
# This will drop the main categories, header rows and abs-total rows.
# check that the rule applies
if cur_row[0].isdigit() or cur_row[0][0:-1].isdigit():
self.num_categories += 1
# zero-fill category numbers
# split categories are already strings so they
# are already zero filled
if cur_row[0].isdigit():
cur_row[0] = cur_row[0].zfill(3)
self.categories.append((cur_row[0],cur_row))
read_cnt += 1
else:
bad_cnt += 1
print(self.cat_sheet.row_values(i))
except:
# tally non-readable rows
bad_cnt += 1
print(self.cat_sheet.row_values(i))
print("CATEGORIES: read:"+str(read_cnt))
print("CATEGORIES: unreadable/main-category rows:"+str(bad_cnt))
self.categories = dict(self.categories)
# LABELS
# dictionary and number of labels
self.num_labels = 0
self.labels = []
bad_cnt = 0
read_cnt = 0
for i in range(1, self.lab_sheet.nrows):
# filter out non-compliant rows
try:
cur_row = self.format_lablist(self.lab_sheet.row_values(i))
# Valid rows have three non-zero elements (label, submission category, review category)
# check that the rule applies
if len(cur_row) == 3:
self.num_labels += 1
self.labels.append((cur_row[0],cur_row))
read_cnt += 1
else:
bad_cnt += 1
print(self.lab_sheet.row_values(i))
except:
# tally non-readable rows
bad_cnt += 1
print(self.lab_sheet.row_values(i))
print("LABELS: read:"+str(read_cnt))
print("LABELS: unreadable/main-label rows:"+str(bad_cnt))
self.labels = dict(self.labels)
# calculate the number of reviewers for each cat
self.calcReviewPools()
return(0) # success
def calcReviewPools(self):
'''Each reviewer has chosen categories that they would like to review in columns 12,13,14,15,16 (0-based)
Each category has a number, however, some are split (e.g. 900->900A & 900B).
The split categories will get the same reviewer pool sum. These categories need to be identified with
the correct number first.
'''
# for each reviewer
for k,v in self.reviewers.items():
# for each choice
for choice in v[12:17]:
# determine if choice is empty
if choice.isdigit():
# try the num, numA, and numB
# also add leading zeros, up to 2, 0 or 00
choices = [choice,choice+'A',choice+'B',choice+'C',choice+'D',choice+'E',choice+'F',choice+'G']
choices_00 = []
for c in choices:
choices_00.append(c)
choices_00.append('0'+c)
choices_00.append('00'+c)
# increment all matches
for c in choices_00:
try:
self.categories[c][4] = str(int(self.categories[c][4]) + 1)
except:
pass
def addRev(self,cur_rev,cur_cat):
'''add keys to each list and tally abstracts and reviewers for the
appropriate lists '''
# swap keys
self.reviewers[cur_rev][18].append(cur_cat)
self.categories[cur_cat][5].append(cur_rev)
# tally keys
## number of abstracts
self.reviewers[cur_rev][17] = str( int(self.reviewers[cur_rev][17]) + int(self.categories[cur_cat][2]) )
## number of reviewers
self.categories[cur_cat][3] = str(len(self.categories[cur_cat][5]))
def removeRev(self,cur_rev,cur_cat):
'''add keys to each list and tally abstracts and reviewers for the
appropriate lists '''
# swap keys
ind = self.reviewers[cur_rev][18].index(cur_cat)
self.reviewers[cur_rev][18].pop(ind)
ind = self.categories[cur_cat][5].index(cur_rev)
self.categories[cur_cat][5].pop(ind)
# tally keys
## number of abstracts
self.reviewers[cur_rev][17] = str( int(self.reviewers[cur_rev][17]) - int(self.categories[cur_cat][2]) )
## number of reviewers
self.categories[cur_cat][3] = str(len(self.categories[cur_cat][5]))
def incCat_PoolSize(self,cnum):
'''checks to see if pool size column exists,
creates it if it doesn't and increments the value.
-cnum is the category ref number key
column 4, 0-based
'''
# get list
item = self.categories[cnum]
# grow the list (if needed)
while len(item) < 5:
item.append(0)
# assign the value
item[4] += 1
def incCat_NumRev(self,cnum):
'''checks to see if pool size column exists,
creates it if it doesn't and increments the value.
-cnum is the category ref number key
column 3, 0-based
'''
# get list
item = self.categories[cnum]
# grow the list (if needed)
while len(item) < 4:
item.append('')
# assign the value
item[3] = value
def setRev_NumAbs(self,mem,value):
'''checks to see if pool size column exists,
creates it if it doesn't and inserts the value.
-mem is the reviewer number key
column 17, 0-based
'''
# get list
item = self.reviewers[mem]
# grow the list (if needed)
while len(item) < 18:
item.append('')
# assign the value
item[17] = value
def get_rev_titles(self):
'''get the column titles from rev info
this is left open in case new reviewer info is added
'''
return(self.rev_sheet.row_values(0))
def get_rev_info(self, num):
'''get the reviewer info, 1-based to skip title'''
if num <= self.rev_sheet.nrows:
return(self.rev_sheet.row_values(num))
def get_cat_info(self, num):
'''get the category info, 1-based to skip title'''
if num <= self.cat_sheet.nrows:
return(self.cat_sheet.row_values(num))
def get_nrev(self):
'''number of rows -1 == num reviewers'''
return(self.rev_sheet.nrows-1)
def get_ncat(self):
'''number of rows -1 == num categories'''
return(self.cat_sheet.nrows-1)
def writeSpreadsheet(self,fn):
'''get dictionaries into proper spreadsheet format and write'''
# init output book
book = xlwt.Workbook()
# REVIEWERS
# make a sheet for reviewers
sheet1 = book.add_sheet('Reviewers')
# convert dict to list
outlist = self.dict2List(self.reviewers)
# copy list into sheet
self.addRevHeader2Sheet(sheet1)
self.list2Sheet(outlist,sheet1,1)
# CATEGORIES
# make a sheet for reviewers
sheet2 = book.add_sheet('Categories')
# convert dict to list
outlist = self.dict2List(self.categories)
# copy list into sheet
self.addCatHeader2Sheet(sheet2)
self.list2Sheet(outlist,sheet2,1)
# write to file
book.save(fn)
def expandColumnList(self, li):
'''Unwrap the elements of Assigned Reviewers & Assigned Categories into multiple columns'''
row = []
for elem in li:
row.append(str(elem))
return(row)
def addRevHeader2Sheet(self,sheet_in):
'''add the rev header row to supplied sheet'''
head = ['Member #','Type','First','Last','Designation','Institution','Email','Primary Training','PubMed','PubMed #','Journal Articles','Previously Reviewed','Choice 1','Choice 2','Choice 3','Choice 4','Choice 5','# of Assigned Abstracts','Assigned Categories']
row = 0
for col in range(0,len(head)):
sheet_in.write(row,col,head[col])
def addCatHeader2Sheet(self,sheet_in):
'''add the category header row to supplied sheet'''
head = ['Category #','Category Title','# of Abstracts','# of Assigned Reviewers','Pool Size','Assigned Reviewers']
row = 0
for col in range(0,len(head)):
sheet_in.write(row,col,head[col])
def dict2List(self,dict_in):
'''flatten dictionary elements to string elements in a list'''
outlist = []
for v in dict_in.values():
rowlist = []
# ensure each element of a row is a string
for i in v:
rowlist.append( str(i) )
# make lists individual cells
rowlist.pop()
rowlist += self.expandColumnList(v[-1])
# append the row
outlist.append(rowlist)
return(outlist)
def list2Sheet(self,list_in,sheet_in,row_offset):
'''copy elems of list into sheet'''
for row in range(0,len(list_in)):
for col in range(0,len(list_in[row])):
sheet_in.write(row+row_offset,col,list_in[row][col])
def writeSession(self,fn,state=None):
'''write dictionaries to supplied file'''
# group data
group = []
group.append(['ISMRM AMPC Chair Session',str(__version__)]) # header list
group.append(self.reviewers)
group.append(self.categories)
# state is elem 2 in header
if state:
group[0].append(state)
else:
group[0].append('no state info')
# open file stream
fileptr = open(fn,'wb')
# write
pickle.dump(group, fileptr)
# close stream
fileptr.close()
def readSession(self,fn):
'''read dictionaries from supplied file'''
# open file stream
fileptr = open(fn,'rb')
# load data
try:
group = pickle.load(fileptr)
except:
print('ERROR: invalid pickle file')
fileptr.close()
return 1
# close stream
fileptr.close()
# load data into class
hdr = group[0]
if hdr[0] == 'ISMRM AMPC Chair Session':
print('valid ISMRM AMPC Chair Session found')
print('\t-file created with version '+str(hdr[1]))
self.reviewers = group[1]
self.categories = group[2]
if group[0][2] == 'no state info':
self.state = None
else:
self.state = group[0][2] # window state
else:
print("ERROR: the chosen file is not an ISMRM AMPC Chair session")
return 0
def rev_candidates(self, max, cat, nr_choice, level):
# A reviewer is a candidate for the category if:
# (i) They have less than the minimal number required
# (ii) adding the category does not give them too many abstracts to review.
# (iii) they have chosen the category;
# (iv) they are not yet assigned to the category;
nr_cat_abstracts = cat[2]
assigned_revs = cat[5]
candidates = []
for revkey, rev in self.reviewers.items():
nr_rev_abstracts = rev[17]
if int(nr_rev_abstracts) + int(nr_cat_abstracts) <= max:
if self.found_match(rev[11 + nr_choice], cat, level):
if revkey not in assigned_revs:
candidates.append(revkey)
return candidates
def select_rev_reserves(self, revkeys, threshold):
if revkeys == []: return revkeys
reserves = []
for key in revkeys:
rev = self.reviewers[key]
nr_rev_abstracts = rev[17]
if int(nr_rev_abstracts) < threshold:
reserves.append(key)
return reserves
def prioritize_revs(self, revkeys):
if revkeys == []: return revkeys
# Build criteria for prioritisation
# Experience and nr abstracts already assigned
experience = []
nr_abstracts_assigned = []
for revkey in revkeys:
rev = self.reviewers[revkey]
experience.append(self.measure_experience(rev))
nr_abstracts_assigned.append(int(rev[17]))
# Sort according to priorities
# First by experience (more experience = prioritised)
# For those with equal experience, prioritise those with less abstracts
paired = zip(experience, nr_abstracts_assigned, revkeys)
paired_sorted = sorted(paired, key = lambda x: (-x[0], x[1]))
_, _ , revkeys = zip(*paired_sorted)
return revkeys
def measure_experience(self, rev):
journal_articles = rev[10]
if journal_articles == '0-5':
return 1
if journal_articles == '6-15':
return 2
if journal_articles == '>15':
return 3
def get_choice_label(self, choice):
"""get the category label for a given reviewer choice"""
for key, cat in self.categories.items():
if int(choice) == int(key[:3]):
return cat[1]
def found_match(self, choice, cat, level):
"""Check if the reviewer's choice matches the category
at the level of label (0), submission category (1) or review category (2)."""
if level == 0:
rev_category = int(choice)
cat_category = int(cat[0][:3])
else:
rev_label = self.get_choice_label(choice)
cat_label = cat[1]
if rev_label is None:
return False # if none of the abstracts carry the chosen label, it is not in the list
rev_category = self.labels[rev_label][level]
cat_category = self.labels[cat_label][level]
return rev_category == cat_category