Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(deposition): Use github pages to not get rate limited, assert sequences with ena specific fields were submitted by us or the insdc_submission_group #3327

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
username: external_metadata_updater
password: external_metadata_updater
keycloak_client_id: backend-client
ingest_pipeline_submitter: insdc_ingest_user
ingest_pipeline_submission_group: 1
db_name: Loculus
unique_project_suffix: Loculus
ena_submission_username: fake-user
Expand Down
18 changes: 9 additions & 9 deletions ena-submission/scripts/get_ena_submission_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def filter_for_submission(
- data must be state "OPEN" for use
- data must not already exist in ENA or be in the submission process.
To prevent this we need to make sure:
- data was not submitted by the config.ingest_pipeline_submitter
- data was not submitted by the config.ingest_pipeline_submission_group
- data is not in submission_table
- as an extra check we discard all sequences with ena-specific-metadata fields
(if users uploaded correctly this should not be needed)
Expand All @@ -37,14 +37,14 @@ def filter_for_submission(
accession, version = key.split(".")
if item["metadata"]["dataUseTerms"] != "OPEN":
continue
if item["metadata"]["submitter"] == config.ingest_pipeline_submitter:
if item["metadata"]["groupId"] == config.ingest_pipeline_submission_group:
continue
if in_submission_table(db_config, {"accession": accession, "version": version}):
continue
if any(item["metadata"].get(field, False) for field in config.ena_specific_metadata):
logging.warning(
f"Found sequence: {key} with ena-specific-metadata fields and not submitted by us ",
f"or {config.ingest_pipeline_submitter}. Potential user error: discarding sequence.",
logger.warning(
f"Found sequence: {key} with ena-specific-metadata fields and not submitted by us "
f"or {config.ingest_pipeline_submission_group}. Potential user error: discarding sequence."
)
continue
item["organism"] = organism
Expand All @@ -59,7 +59,7 @@ def send_slack_notification_with_file(config: Config, output_file: str) -> None:
slack_channel_id_default=config.slack_channel_id,
)
if not slack_config.slack_hook:
logging.info("Could not find slack hook, cannot send message")
logger.info("Could not find slack hook, cannot send message")
return
comment = (
f"{config.backend_url}: ENA Submission pipeline wants to submit the following sequences"
Expand Down Expand Up @@ -104,14 +104,14 @@ def get_ena_submission_list(config_file, output_file):
directory = file_path.parent
if not directory.exists():
directory.mkdir(parents=True)
logging.debug(f"Created directory '{directory}'")
logger.debug(f"Created directory '{directory}'")

entries_to_submit = {}
for organism in config.organisms:
config.ena_specific_metadata = [
value["name"] for value in config.organisms[organism]["externalMetadata"]
]
logging.info(f"Getting released sequences for organism: {organism}")
logger.info(f"Getting released sequences for organism: {organism}")

released_entries = fetch_released_entries(config, organism)
submittable_entries = filter_for_submission(config, db_config, released_entries, organism)
Expand All @@ -121,7 +121,7 @@ def get_ena_submission_list(config_file, output_file):
Path(output_file).write_text(json.dumps(entries_to_submit), encoding="utf-8")
send_slack_notification_with_file(config, output_file)
else:
logging.info("No sequences found to submit to ENA")
logger.info("No sequences found to submit to ENA")
Path(output_file).write_text("", encoding="utf-8")


Expand Down
4 changes: 2 additions & 2 deletions ena-submission/src/ena_deposition/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def secure_ena_connection(config: Config):
config.test = True
logging.info("Submitting to ENA dev environment")
config.ena_submission_url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json"
config.github_url = "https://pathoplexus.github.io/ena-submission/test/approved_ena_submission_list.json"
config.ena_reports_service_url = "https://wwwdev.ebi.ac.uk/ena/submit/report"

if submit_to_ena_prod:
config.test = False
logging.warn("WARNING: Submitting to ENA production")
config.ena_submission_url = "https://www.ebi.ac.uk/ena/submit/drop-box/submit"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/approved/approved_ena_submission_list.json"
config.github_url = "https://pathoplexus.github.io/ena-submission/approved/approved_ena_submission_list.json"
config.ena_reports_service_url = "https://www.ebi.ac.uk/ena/submit/report"


Expand Down
Loading