forked from nobelsHist/prizes_history
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nominations.py
105 lines (91 loc) · 3.53 KB
/
nominations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from mongoHandler import MongoHandler
from pymongo import MongoClient
def export_nominations_to_collection():
client = MongoClient()
collection = client.nobel['nominations']
handler = MongoHandler("people")
for person in handler.get_all():
person_id = person['id']
for nominator_id, nominations in person['nominations'].items():
for nomination in nominations:
nomination = {
"nominee_id": person_id,
"nominee_name": person['name'],
"nominator_id": nominator_id,
"nominator_name": handler.get_person_by_id(nominator_id)['name'],
"year": nomination['year'],
"type": nomination['type']
}
collection.insert_one(nomination)
def export_yearly_nom_count_for_each_nominee(outputFile):
collection = MongoClient().nobel['nominations']
handler = MongoHandler("people")
file = open(outputFile, mode="w", errors="replace")
header = "nominee_id,nominee_name,year_NP"
for year in range(1901, 1972):
header += ","+str(year)
file.write(header+"\n")
for nominee in handler.get_all():
line = ""
line += nominee['id']+","
line += "\""+nominee['name']+"\","
first_ch_win = get_first_ch_win(nominee['nobel'])
if first_ch_win is not None:
line += str(first_ch_win)
else:
line += "None"
for year in range(1901, 1972):
count = collection.count_documents({"year": str(year),
"nominee_id": nominee['id']})
line += ","+str(count)
file.write(line+"\n")
file.close()
def export_yearly_nominators_count(outputFile):
collection = MongoClient().nobel['nominations']
handler = MongoHandler("people")
file = open(outputFile, mode="w", errors="replace")
header = "nominee_id,nominee_name,year_NP"
for year in range(1901, 1972):
header += ","+str(year)
file.write(header+"\n")
for nominee in handler.get_all():
line = ""
line += nominee['id']+","
line += "\""+nominee['name']+"\","
first_ch_win = get_first_ch_win(nominee['nobel'])
if first_ch_win is not None:
line += str(first_ch_win)
else:
line += "None"
for year in range(1901, 1972):
# equivalent to select distinct(nominator_id),nominee_id,
# nominee_name from nominations.csv
# where nominee_id=<id> and year=<year>
count = list(collection.aggregate([
{
'$match': {
'year': str(year),
'nominee_id': nominee['id']
}
}, {
'$group': {
'_id': '$nominator_id'
}
}, {
'$count': 'n'
}]))
line += ","+str(0 if not count else count[0]['n'])
file.write(line+"\n")
file.close()
def get_first_ch_win(wins):
if not wins:
return None
lowest_win = None
for win in wins:
if win['type'] == "C" and (lowest_win is None or int(win['year']) < lowest_win):
lowest_win = int(win['year'])
return lowest_win
def main():
export_nominations_to_collection()
if __name__ == "__main__":
main()