-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathopenreviews_parsing.py
98 lines (81 loc) · 3.7 KB
/
openreviews_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import openreview
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
from datetime import datetime
import os
# https://openreview-py.readthedocs.io/en/latest/
# https://readthedocs.org/projects/openreview-py-dm-branch/downloads/pdf/latest/
# https://docs.openreview.net/getting-started/using-the-api/installing-and-instantiating-the-python-client
client = openreview.Client(baseurl='https://api.openreview.net')
papers = {}
# Find invitation ID by running
# print(client.get_group(id='venues').members) # NeurIPS.cc/2022/Track/Datasets_and_Benchmarks/-/Submission
# submissions = client.get_all_notes(invitation="ICLR.cc/2024/Conference/-/Blind_Submission", details='directReplies')
client = openreview.Client(baseurl='https://api2.openreview.net')
submissions = client.get_all_notes(invitation="ICLR.cc/2024/Conference/-/Submission", details='directReplies')
# import pdb; pdb.set_trace()
listofreviewers = set()
for submission in submissions:
reviews = []
for review in submission.details['directReplies']:
if 'rating' in review['content'].keys():
rating = int(review['content']['rating']['value'].split(':')[0])
confidence = int(review['content']['confidence']['value'].split(':')[0])
aTup = rating,confidence
reviews.append(aTup)
papers[submission.content['title']["value"]] = reviews
# Stats Calculation
allRatings = []
allRatingsMeans = []
for paper in papers:
ratingsList = []
for pair in papers[paper]:
# print(pair[0])
ratingsList.append(pair[0])
#paper specific statistics
mean = np.nanmean(ratingsList)
median = np.nanmedian(ratingsList)
stdev = np.nanstd(ratingsList)
statsTup = mean, median, stdev
papers[paper].insert(0, statsTup)
allRatingsMeans.append(mean)
overallMean = np.nanmean(allRatingsMeans)
overallMedian = np.nanmedian(allRatingsMeans)
overallStdev = np.nanstd(allRatingsMeans)
print(f"Mean, Mean Paper Rating: {overallMean}")
print(f"Median, Mean Paper Rating: {overallMedian}")
print(f"Standard Deviation of Mean Paper Rating: {overallStdev}")
print(f"Total Papers: {len(allRatingsMeans)}")
print(f"Total Papers with Nan Reviews: {np.sum(np.isnan(allRatingsMeans))}")
# Write to spreadsheet
if not os.path.exists("csvs"):
os.makedirs("csvs")
handle = open(f"csvs/openreview_ratings_{datetime.today().strftime('%Y_%m_%d')}.csv", 'w')
handle.write("Title,Mean,Median,StDev," + "Rating,Confidence,"*8 + "\n")
for title in papers:
stuff = str(papers[title])
stuff = stuff.replace("(","").replace(")","").replace("[","").replace("]","")
handle.write(title.replace("\n","").replace(",","") + "," + stuff + "\n")
handle.close()
# Visualize results
fig, ax = plt.subplots(figsize=(20,10))
counts, bins, patches = ax.hist(allRatingsMeans, bins=50)
# Set the ticks to be at the edges of the bins.
ax.set_xticks(bins)
# Set the xaxis's tick labels to be formatted with 1 decimal place...
ax.xaxis.set_major_formatter(FormatStrFormatter('%0.1f'))
# Label the raw counts and the percentages below the x-axis...
bin_centers = 0.5 * np.diff(bins) + bins[:-1]
for count, x in zip(counts, bin_centers):
# Label the raw counts
ax.annotate(str(int(count)), xy=(x, 0), xycoords=('data', 'axes fraction'),
xytext=(0, -18), textcoords='offset points', va='top', ha='center')
# Label the percentages
percent = '%0.0f%%' % (100 * float(count) / counts.sum())
ax.annotate(percent, xy=(x, 0), xycoords=('data', 'axes fraction'),
xytext=(0, -32), textcoords='offset points', va='top', ha='center')
if not os.path.exists("figs"):
os.makedirs("figs")
plt.savefig(f"figs/hist_{datetime.today().strftime('%Y_%m_%d')}.png")
plt.show(block=True)