Skip to content

Commit

Permalink
feat(deposition): Use github pages to not get rate limited, assert se…
Browse files Browse the repository at this point in the history
…quences with ena specific fields were submitted by us or the insdc_submission_group (#3327)

* Use github pages to not get rate limited

* fix logging errors and check if data was submitted by the insdc_submission_group and not the insdc_submission_user to not warn if a sequence has been curated
  • Loading branch information
anna-parker authored Dec 4, 2024
1 parent 6715be4 commit fdd58b6
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion ena-submission/config/defaults.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
username: external_metadata_updater
password: external_metadata_updater
keycloak_client_id: backend-client
ingest_pipeline_submitter: insdc_ingest_user
ingest_pipeline_submission_group: 1
db_name: Loculus
unique_project_suffix: Loculus
ena_submission_username: fake-user
Expand Down
18 changes: 9 additions & 9 deletions ena-submission/scripts/get_ena_submission_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def filter_for_submission(
- data must be state "OPEN" for use
- data must not already exist in ENA or be in the submission process.
To prevent this we need to make sure:
- data was not submitted by the config.ingest_pipeline_submitter
- data was not submitted by the config.ingest_pipeline_submission_group
- data is not in submission_table
- as an extra check we discard all sequences with ena-specific-metadata fields
(if users uploaded correctly this should not be needed)
Expand All @@ -37,14 +37,14 @@ def filter_for_submission(
accession, version = key.split(".")
if item["metadata"]["dataUseTerms"] != "OPEN":
continue
if item["metadata"]["submitter"] == config.ingest_pipeline_submitter:
if item["metadata"]["groupId"] == config.ingest_pipeline_submission_group:
continue
if in_submission_table(db_config, {"accession": accession, "version": version}):
continue
if any(item["metadata"].get(field, False) for field in config.ena_specific_metadata):
logging.warning(
f"Found sequence: {key} with ena-specific-metadata fields and not submitted by us ",
f"or {config.ingest_pipeline_submitter}. Potential user error: discarding sequence.",
logger.warning(
f"Found sequence: {key} with ena-specific-metadata fields and not submitted by us "
f"or {config.ingest_pipeline_submission_group}. Potential user error: discarding sequence."
)
continue
item["organism"] = organism
Expand All @@ -59,7 +59,7 @@ def send_slack_notification_with_file(config: Config, output_file: str) -> None:
slack_channel_id_default=config.slack_channel_id,
)
if not slack_config.slack_hook:
logging.info("Could not find slack hook, cannot send message")
logger.info("Could not find slack hook, cannot send message")
return
comment = (
f"{config.backend_url}: ENA Submission pipeline wants to submit the following sequences"
Expand Down Expand Up @@ -104,14 +104,14 @@ def get_ena_submission_list(config_file, output_file):
directory = file_path.parent
if not directory.exists():
directory.mkdir(parents=True)
logging.debug(f"Created directory '{directory}'")
logger.debug(f"Created directory '{directory}'")

entries_to_submit = {}
for organism in config.organisms:
config.ena_specific_metadata = [
value["name"] for value in config.organisms[organism]["externalMetadata"]
]
logging.info(f"Getting released sequences for organism: {organism}")
logger.info(f"Getting released sequences for organism: {organism}")

released_entries = fetch_released_entries(config, organism)
submittable_entries = filter_for_submission(config, db_config, released_entries, organism)
Expand All @@ -121,7 +121,7 @@ def get_ena_submission_list(config_file, output_file):
Path(output_file).write_text(json.dumps(entries_to_submit), encoding="utf-8")
send_slack_notification_with_file(config, output_file)
else:
logging.info("No sequences found to submit to ENA")
logger.info("No sequences found to submit to ENA")
Path(output_file).write_text("", encoding="utf-8")


Expand Down
4 changes: 2 additions & 2 deletions ena-submission/src/ena_deposition/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@ def secure_ena_connection(config: Config):
config.test = True
logging.info("Submitting to ENA dev environment")
config.ena_submission_url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json"
config.github_url = "https://pathoplexus.github.io/ena-submission/test/approved_ena_submission_list.json"
config.ena_reports_service_url = "https://wwwdev.ebi.ac.uk/ena/submit/report"

if submit_to_ena_prod:
config.test = False
logging.warn("WARNING: Submitting to ENA production")
config.ena_submission_url = "https://www.ebi.ac.uk/ena/submit/drop-box/submit"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/approved/approved_ena_submission_list.json"
config.github_url = "https://pathoplexus.github.io/ena-submission/approved/approved_ena_submission_list.json"
config.ena_reports_service_url = "https://www.ebi.ac.uk/ena/submit/report"


Expand Down

0 comments on commit fdd58b6

Please sign in to comment.