From c5f41b240c40c70260d312e9fb6319441e1e052b Mon Sep 17 00:00:00 2001 From: samwinebrake Date: Tue, 13 Feb 2024 14:19:22 -0500 Subject: [PATCH 1/7] - added extract_identifiers to find patterns within plugin inits - changed variable naming within is_submission_original - duplicate protection of directory names and plugin identifiers --- benchmarks/views/user.py | 54 +++++++++++++++++++++++++++++++++++----- 1 file changed, 48 insertions(+), 6 deletions(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index 0535bbc1f..bd273acb7 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -2,7 +2,9 @@ import logging import os import zipfile +import re from typing import Tuple, Union, List +from io import TextIOWrapper import boto3 import requests @@ -213,7 +215,7 @@ def post(self, request): return render(request, 'benchmarks/success.html', {"domain": self.domain}) -def is_submission_original(file, submitter: User) -> Tuple[bool, Union[None, List[str]]]: +def is_submission_original(file, submitter): # add metrics and data eventually plugin_db_mapping = {"models": Model, "benchmarks": BenchmarkType} @@ -221,22 +223,35 @@ def is_submission_original(file, submitter: User) -> Tuple[bool, Union[None, Lis namelist = archive.infolist() plugins = plugins_exist(namelist)[1] + # grab identifiers from inits of all plugins + plugin_identifiers = extract_identifiers(archive) + # for each plugin submitted, make sure that the identifier does not exist already: for plugin in plugins: - identifiers = plugin_has_instances(namelist, plugin)[1] + plugin_directory_names = plugin_has_instances(namelist, plugin)[1] db_table = plugin_db_mapping[plugin] # Determine the field name based on the plugin type field_name = 'name' if plugin == "models" else 'identifier' - for identifier in identifiers: - query_filter = {field_name: identifier} + # plugin_name corresponds to the directory name, plugin_identifier corresponds to actual identifiers from inits + all_plugin_ids = plugin_directory_names + list(plugin_identifiers[plugin]) + for plugin_name_or_identifier in all_plugin_ids: + query_filter = {field_name: plugin_name_or_identifier} # Check if an entry with the given identifier exists if db_table.objects.filter(**query_filter).exists(): - return False, [plugin, identifier] + owner_obj = db_table.objects.get(**query_filter) + owner_id = getattr(owner_obj, 'owner_id', None) or getattr(owner_obj, 'owner').id + + # Check to see if the submitter is the owner (or superuser) + if owner_id == submitter.id or submitter.is_superuser: + # Khaled versioning here + print(owner_id, submitter) + else: + return False, [plugin, plugin_name_or_identifier] - return True, None # Passes all checks, then the submission is original -> good to go + return True, [] # Passes all checks, then the submission is original -> good to go def validate_zip(file): @@ -309,6 +324,33 @@ def instance_has_files(namelist, instances): return True, files_list, None +def extract_identifiers(zip_ref): + # define patterns for each plugin type (data and metrics to be added later) + possible_plugins = ["models", "benchmarks"] + registry_patterns = { + "models": re.compile(r"model_registry\['(.+?)'\]"), + "benchmarks": re.compile(r"benchmark_registry\['(.+?)'\]"), + } + + # dictionary to hold identifiers for each plugin type found + identifiers = {plugin: set() for plugin in possible_plugins} + + for file_info in zip_ref.infolist(): + path_segments = file_info.filename.split('/') + for plugin in possible_plugins: + # check if __init__.py under any of the possible plugins' directories + if plugin in path_segments and '__init__.py' in path_segments[-1]: + with zip_ref.open(file_info) as file: + # extract identifier pattern matches + for line in TextIOWrapper(file, encoding='utf-8'): + if pattern := registry_patterns.get(plugin): + matches = pattern.findall(line) + identifiers[plugin].update(matches) + break + + return identifiers + + def collect_models_benchmarks(request): assert request.method == 'POST' From d44cfcd617254d4420c05020cf307ef53d9872c0 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:29:12 -0500 Subject: [PATCH 2/7] := not compatible in python 3.7... code changed --- benchmarks/views/user.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index bd273acb7..7d8c21834 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -343,7 +343,8 @@ def extract_identifiers(zip_ref): with zip_ref.open(file_info) as file: # extract identifier pattern matches for line in TextIOWrapper(file, encoding='utf-8'): - if pattern := registry_patterns.get(plugin): + pattern = registry_patterns.get(plugin) + if pattern: matches = pattern.findall(line) identifiers[plugin].update(matches) break From 6e99c005d4d9859af483a21b8f4e9ba4713f73c1 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Wed, 14 Feb 2024 14:45:50 -0500 Subject: [PATCH 3/7] ignore possible commented out identifier patterns --- benchmarks/views/user.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index fa091e431..1dcc062c5 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -349,9 +349,10 @@ def extract_identifiers(zip_ref): with zip_ref.open(file_info) as file: # extract identifier pattern matches for line in TextIOWrapper(file, encoding='utf-8'): + line_code = line.split('#', 1)[0] # ignore both inline and own line comments pattern = registry_patterns.get(plugin) if pattern: - matches = pattern.findall(line) + matches = pattern.findall(line_code) identifiers[plugin].update(matches) break From dd085d861cc48a71386601bf2ab607fffaaa4ec4 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Tue, 20 Feb 2024 14:46:17 -0500 Subject: [PATCH 4/7] make zip structure 'zip_root/plugin/plugin_name/__init__.py' mandatory --- benchmarks/views/user.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index 1dcc062c5..79c8d1e49 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -343,18 +343,16 @@ def extract_identifiers(zip_ref): for file_info in zip_ref.infolist(): path_segments = file_info.filename.split('/') - for plugin in possible_plugins: - # check if __init__.py under any of the possible plugins' directories - if plugin in path_segments and '__init__.py' in path_segments[-1]: - with zip_ref.open(file_info) as file: - # extract identifier pattern matches - for line in TextIOWrapper(file, encoding='utf-8'): - line_code = line.split('#', 1)[0] # ignore both inline and own line comments - pattern = registry_patterns.get(plugin) - if pattern: - matches = pattern.findall(line_code) - identifiers[plugin].update(matches) - break + # ensure the path has 4 segments [zip root, plugin, plugin_name, __init__.py] + if len(path_segments) == 4 and path_segments[1] in possible_plugins and path_segments[-1] == '__init__.py': + plugin = path_segments[1] + with zip_ref.open(file_info) as file: + # extract identifier pattern matches + for line in TextIOWrapper(file, encoding='utf-8'): + line_code = line.split('#', 1)[0].strip() + if pattern := registry_patterns.get(plugin): + matches = pattern.findall(line_code) + identifiers[plugin].update(matches) return identifiers From 104ee75634e1617f24871a21ac7555b810847ef1 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:46:20 -0500 Subject: [PATCH 5/7] refix := --- benchmarks/views/user.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index 79c8d1e49..b131291b3 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -349,8 +349,9 @@ def extract_identifiers(zip_ref): with zip_ref.open(file_info) as file: # extract identifier pattern matches for line in TextIOWrapper(file, encoding='utf-8'): - line_code = line.split('#', 1)[0].strip() - if pattern := registry_patterns.get(plugin): + line_code = line.split('#', 1)[0].strip() # ignore both inline and own line comments + pattern = registry_patterns.get(plugin) + if pattern: matches = pattern.findall(line_code) identifiers[plugin].update(matches) From ac9614cd493534d27fd8e056ad2166d4f08c9442 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:52:24 -0500 Subject: [PATCH 6/7] update check for submitter id --- benchmarks/views/user.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index b131291b3..68aa16407 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -239,18 +239,16 @@ def is_submission_original(file, submitter): for plugin_name_or_identifier in all_plugin_ids: query_filter = {field_name: plugin_name_or_identifier} - # Check if an entry with the given identifier exists + # check if an entry with the given identifier exists if db_table.objects.filter(**query_filter).exists(): owner_obj = db_table.objects.get(**query_filter) owner_id = getattr(owner_obj, 'owner_id', None) or getattr(owner_obj, 'owner').id - # Check to see if the submitter is the owner (or superuser) - if owner_id == submitter.id or submitter.is_superuser: - # Khaled versioning here - print(owner_id, submitter) - else: + # check to see if the submitter is the owner (or superuser) + if owner_id != submitter.id and not submitter.is_superuser: return False, [plugin, plugin_name_or_identifier] - + # else, versioning will be input here + return True, [] # Passes all checks, then the submission is original -> good to go From 176e0d586c13f075e25e492ca9909d7878dd1df7 Mon Sep 17 00:00:00 2001 From: Sam Winebrake <85908068+samwinebrake@users.noreply.github.com> Date: Wed, 21 Feb 2024 13:59:45 -0500 Subject: [PATCH 7/7] readd type hints --- benchmarks/views/user.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/views/user.py b/benchmarks/views/user.py index 68aa16407..6c2f94e20 100644 --- a/benchmarks/views/user.py +++ b/benchmarks/views/user.py @@ -215,7 +215,7 @@ def post(self, request): return render(request, 'benchmarks/success.html', {"domain": self.domain}) -def is_submission_original(file, submitter): +def is_submission_original(file, submitter: User) -> Tuple[bool, Union[None, List[str]]]: # add metrics and data eventually plugin_db_mapping = {"models": Model, "benchmarks": BenchmarkType}