Skip to content

Commit

Permalink
feat: generate plagiarism reports for AAAI submissions (#2213)
Browse files Browse the repository at this point in the history
* feat: create functions for Turnitin Core API endpoints

* fix: add submission_id and pdf_request_id paramater in functions

* fix: use requests package

* fix: exclude optional fields from payload

* enable plagiarism detection for the venue class

* make API Key visible to the PCs only

* fix: venues should provide thier own API base URL

* fix: add API base URL field to request form

* test: add API base URL value in test

* fix: API base URL order in venue request form

* refactor: remove duplicate iThenticate client files

* fix: change ithenticate edge invitation

* fix: pass data to json parameter

* fix: ithenticate edge invitation labels use enums

* fix: divide plagiarism check in 2 parts, add status function

* fix: change request from get to post

* fix: use tqdm

* fix: use get_submission_id()

* fix: add label filter in get_grouped_edges()

* feat: allow status check for all label values

* fix: rollback edge label value in case of errors

* fix: use signature and preferred email in upload_submissions() parameters

* refactor: format code

* fix: catch all exceptions

* fix: check if edge already exists before creating new submission

* fix:
* remove multiple API calls
* do not delete edge
* handle errors in a separate function

* fix: print submission with exisiting edges

* fix: missing } in venue_request.py

* fix: pass an Edge object to post_edge

* fix: use from_json and print edge IDs for submissions with pre-existing edges

* fix: print edge label

* feat: add function to poll iThenticate and update status

* refactor plagiarism variables and add plagiarism edge readers

* fix test

* add iThenticate invitation id to the AC/SAC consoles

* feat: resubmit edges with upload error

* feat: print edges and labels in check status

* feat: resubmit similarity report requests with errors

* refactor: change method name

* refactor: add print statements in error handling function

* fix: rollback to original error status

* fix: resubmit files for edges in 'Created' state

* fix: use updated status values and avoid iThenticate API calls

* refactor code

* get name from the profiles instead of signatures

---------

Co-authored-by: Melisa Bok <[email protected]>
Co-authored-by: Melisa Bok <[email protected]>
  • Loading branch information
3 people authored Aug 5, 2024
1 parent 1cec309 commit 243ebba
Show file tree
Hide file tree
Showing 12 changed files with 738 additions and 28 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ cov.xml
.coverage*
tests/drivers/geckodriver
tests/data/*.csv
TCA_key.json
1 change: 1 addition & 0 deletions openreview/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@
from .client import Edge
from .client import Group
from .client import Tag
from .iThenticate_client import iThenticateClient
263 changes: 263 additions & 0 deletions openreview/api/iThenticate_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
import os
import requests
import json
from datetime import datetime


class iThenticateClient:
def __init__(self, api_key, api_base_url):

if api_key is None:
script_dir = os.path.dirname(os.path.abspath(__file__))
key_path = os.path.join(script_dir, "TCA_key.json")

with open(key_path) as key_file:
self.auth_key = json.load(key_file)["secret"]
else:
self.auth_key = api_key
self.TCA_URL = api_base_url
self.TCA_integration_name = "OpenReview"
self.TCA_integration_version = "1.0.0"
self.headers = {
"X-Turnitin-Integration-Name": self.TCA_integration_name,
"X-Turnitin-Integration-Version": self.TCA_integration_version,
"Authorization": f"Bearer {self.auth_key}",
}

def get_EULA(self):
response = requests.get(
f"https://{self.TCA_URL}/api/v1/eula/latest?lang=en-US",
headers=self.headers,
)
response.raise_for_status()

return response.json()["url"]

def accept_EULA(self, user_id, timestamp):
data = {
"user_id": user_id,
"accepted_timestamp": timestamp,
"language": "en-US",
}
headers = self.headers.copy()
headers["Content-Type"] = "application/json"
response = requests.post(
f"https://{self.TCA_URL}/api/v1/eula/v1beta/accept",
headers=headers,
json=data,
)
response.raise_for_status()

return response.json()

def create_submission(
self,
owner,
title,
timestamp,
owner_first_name,
owner_last_name,
owner_email,
group_id,
group_context,
group_type,
submitter=None,
submitter_first_name=None,
submitter_last_name=None,
submitter_email=None,
extract_text_only=None,
owner_permission_set="LEARNER",
submitter_permission_set="INSTRUCTOR",
):
data = {
"owner": owner,
"title": title,
"owner_default_permission_set": owner_permission_set,
"eula": {
"version": "v1beta",
"language": "en-US",
"accepted_timestamp": timestamp,
},
"metadata": {
"owners": [
{
"id": owner,
"given_name": owner_first_name,
"family_name": owner_last_name,
"email": owner_email,
}
],
"group": {
"id": group_id,
"name": title,
"type": group_type,
},
"group_context": group_context,
"original_submitted_time": timestamp,
},
}
if submitter is not None:
data["submitter"] = submitter
data["submitter_default_permission_set"] = submitter_permission_set
data["metadata"]["submitter"] = {
"id": submitter,
"given_name": submitter_first_name,
"family_name": submitter_last_name,
"email": submitter_email,
}
if extract_text_only is not None:
data["extract_text_only"] = extract_text_only

headers = self.headers.copy()
headers["Content-Type"] = "application/json"
response = requests.post(
f"https://{self.TCA_URL}/api/v1/submissions", headers=headers, json=data
)
response.raise_for_status()

return response.json()

def delete_submission(self, submission_id):
headers = self.headers.copy()
response = requests.delete(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}",
headers=headers,
)
response.raise_for_status()

return response.json()

def get_submission_status(self, submission_id):

response = requests.get(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}",
headers=self.headers,
)
response.raise_for_status()
return response.json()["status"]

def upload_submission(self, submission_id, file_data, file_name):
headers = self.headers.copy()
headers["Content-Type"] = "binary/octet-stream"
headers["Content-Disposition"] = f'inline; filename="{file_name}.pdf"'
response = requests.put(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/original",
headers=headers,
data=file_data,
)
response.raise_for_status()

return response.json()

def generate_similarity_report(
self,
submission_id,
search_repositories,
submission_auto_excludes=None,
auto_exclude_self_matching_scope=None,
priority=None,
view_settings=None,
indexing_settings=None,
):
data = {"generation_settings": {"search_repositories": search_repositories}}
if submission_auto_excludes is not None:
data["generation_settings"][
"submission_auto_excludes"
] = submission_auto_excludes
if auto_exclude_self_matching_scope is not None:
data["generation_settings"][
"auto_exclude_self_matching_scope"
] = auto_exclude_self_matching_scope
if priority is not None:
data["generation_settings"]["priority"] = priority
if view_settings is not None:
data["view_settings"] = view_settings
if indexing_settings is not None:
data["indexing_settings"] = indexing_settings

response = requests.put(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/similarity",
headers=self.headers,
json=data,
)
response.raise_for_status()

return response.json()

def get_similarity_report_status(self, submission_id):

response = requests.get(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/similarity",
headers=self.headers,
)
response.raise_for_status()
json_response = response.json()

if json_response["status"] == "COMPLETE":
return json_response["status"], json_response["overall_match_percentage"]
else:
return json_response["status"], -1

def get_viewer_url(
self,
submission_id,
viewer_id,
viewer_default_permission_set,
viewer_timestamp,
viewer_permissions=None,
similarity=None,
author_metadata_override=None,
sidebar=None,
):
data = {
"viewer_user_id": viewer_id,
"locale": "en-US",
"viewer_default_permission_set": viewer_default_permission_set,
"eula": {
"version": "v1beta",
"accepted_timestamp": viewer_timestamp,
"language": "en-US",
},
}
if viewer_permissions is not None:
data["viewer_permissions"] = viewer_permissions
if similarity is not None:
data["similarity"] = similarity
if viewer_permissions is not None:
data["author_metadata_override"] = author_metadata_override
if viewer_permissions is not None:
data["sidebar"] = sidebar
headers = self.headers.copy()
headers["Content-Type"] = "application/json"
response = requests.post(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/viewer-url",
headers=headers,
json=data,
)
response.raise_for_status()

return response.json()["viewer_url"]

def generate_pdf(self, submission_id):
data = {"locale": "en-US"}
headers = self.headers.copy()
headers["Content-Type"] = "application/json"
response = requests.post(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/similarity/pdf",
headers=headers,
json=data,
)
response.raise_for_status()

return response.json()

def download_pdf(self, download_location, submission_id, pdf_request_id):
response = requests.get(
f"https://{self.TCA_URL}/api/v1/submissions/{submission_id}/similarity/pdf/{pdf_request_id}",
headers=self.headers,
)
response.raise_for_status()
with open(download_location, "wb") as f:
f.write(response.content)

return response.json()
4 changes: 4 additions & 0 deletions openreview/conference/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,10 @@ def get_conference(client, request_form_id, support_user='OpenReview.net/Support
venue.sac_paper_assignments = note.content.get('senior_area_chairs_assignment', 'Area Chairs') == 'Submissions'
venue.submission_assignment_max_reviewers = int(note.content.get('submission_assignment_max_reviewers')) if note.content.get('submission_assignment_max_reviewers') is not None else None
venue.preferred_emails_groups = note.content.get('preferred_emails_groups', [])
venue.iThenticate_plagiarism_check = note.content.get('iThenticate_plagiarism_check', 'No') == 'Yes'
venue.iThenticate_plagiarism_check_api_key = note.content.get('iThenticate_plagiarism_check_api_key', '')
venue.iThenticate_plagiarism_check_api_base_url = note.content.get('iThenticate_plagiarism_check_api_base_url', '')
venue.iThenticate_plagiarism_check_committee_readers = note.content.get('iThenticate_plagiarism_check_committee_readers', '')

venue.submission_stage = get_submission_stage(note, venue)
venue.review_stage = get_review_stage(note)
Expand Down
13 changes: 13 additions & 0 deletions openreview/venue/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,19 @@ def create_venue_group(self):
'reviewers_message_id': { 'value': self.venue.get_message_id(committee_id=self.venue.get_reviewers_id()) }
}

if self.venue.iThenticate_plagiarism_check:
content['iThenticate_plagiarism_check'] = { 'value': self.venue.iThenticate_plagiarism_check }
content['iThenticate_plagiarism_check_api_key'] = {
'value': self.venue.iThenticate_plagiarism_check_api_key,
'readers': [self.venue.id],
}
content['iThenticate_plagiarism_check_api_base_url'] = {
'value': self.venue.iThenticate_plagiarism_check_api_base_url,
'readers': [self.venue.id],
}
content['iThenticate_plagiarism_check_invitation_id'] = { 'value': self.venue.get_iThenticate_plagiarism_check_invitation_id() }
content['iThenticate_plagiarism_check_committee_readers'] = { 'value': self.venue.iThenticate_plagiarism_check_committee_readers }

if self.venue.preferred_emails_groups:
content['preferred_emails_groups'] = { 'value': self.venue.preferred_emails_groups }
content['preferred_emails_id'] = { 'value': self.venue.get_preferred_emails_invitation_id() }
Expand Down
90 changes: 89 additions & 1 deletion openreview/venue/invitation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4425,5 +4425,93 @@ def set_preferred_emails_invitation(self):
}]
)

self.save_invitation(invitation)
self.save_invitation(invitation)


def set_iThenticate_plagiarism_check_invitation(self):

venue_id = self.venue_id

if not self.venue.iThenticate_plagiarism_check:
return

if openreview.tools.get_invitation(self.client, self.venue.get_iThenticate_plagiarism_check_invitation_id()):
return

paper_number = '${{2/head}/number}'
edge_readers = [venue_id]

for committee_name in self.venue.iThenticate_plagiarism_check_committee_readers:
edge_readers.append(self.venue.get_committee_id(committee_name, number=paper_number))

invitation = Invitation(
id=self.venue.get_iThenticate_plagiarism_check_invitation_id(),
invitees=[venue_id],
readers=[venue_id],
writers=[venue_id],
signatures=[venue_id],
minReplies=1,
maxReplies=1,
type='Edge',
edit={
'id': {
'param': {
'withInvitation': self.venue.get_iThenticate_plagiarism_check_invitation_id(),
'optional': True
}
},
'ddate': {
'param': {
'range': [ 0, 9999999999999 ],
'optional': True,
'deletable': True
}
},
'cdate': {
'param': {
'range': [ 0, 9999999999999 ],
'optional': True,
'deletable': True
}
},
'readers': edge_readers,
'nonreaders': [self.venue.get_authors_id(number=paper_number)],
'writers': [venue_id],
'signatures': [venue_id],
'head': {
'param': {
'type': 'note',
'withInvitation': self.venue.get_submission_id()
}
},
'tail': {
'param': {
'type': 'string'
}
},
'weight': {
'param': {
'minimum': -1,
'maximum': 100,
'default': -1
}
},
'label': {
'param': {
'enum': [
{'prefix': 'Error'},
{'value': 'File Sent'},
{'value': 'File Uploaded'},
{'value': 'Similarity Requested'},
{'value': 'Similarity Complete'},
{'value': 'Created'},
{'value': 'Processing'},
],
'default': "Created",
}
},
}
)

self.save_invitation(invitation)

Loading

0 comments on commit 243ebba

Please sign in to comment.