Skip to content

Commit

Permalink
Merge pull request #69 from usegalaxy-eu/remote-fix
Browse files Browse the repository at this point in the history
Bug fix for the remote check
  • Loading branch information
bgruening authored May 3, 2022
2 parents e5c693c + 7eb4736 commit 61be1b3
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
2 changes: 1 addition & 1 deletion ena_upload/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.5.3"
__version__ = "0.6.0"
4 changes: 2 additions & 2 deletions ena_upload/check_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
URL = "https://www.ebi.ac.uk/ena/portal/api/search"
DEV_URL = "https://wwwdev.ebi.ac.uk/ena/portal/api/search"

def identify_action(entry_type, alias, dev):
''' define action ['add' | 'modify'] that needs to be performed for this entry '''
def remote_check(entry_type, alias, dev):
''' Identidy if an ENA object is present or not '''
query = {entry_type + '_alias': alias}
remote_accessions = check_remote_entry(entry_type, query, dev)
if isinstance(remote_accessions, list) and len(remote_accessions) > 0:
Expand Down
20 changes: 12 additions & 8 deletions ena_upload/ena_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pandas as pd
import tempfile
from ena_upload._version import __version__
from ena_upload.check_remote import identify_action
from ena_upload.check_remote import remote_check

SCHEMA_TYPES = ['study', 'experiment', 'run', 'sample']

Expand Down Expand Up @@ -85,6 +85,7 @@ def extract_targets(action, schema_dataframe):

def check_columns(df, schema, action, dev, auto_action):
# checking for optional columns and if not present, adding them
print(f"Check if all required columns are present in the {schema} table.")
if schema == 'sample':
optional_columns = ['accession', 'submission_date',
'status', 'scientific_name', 'taxon_id']
Expand All @@ -101,23 +102,26 @@ def check_columns(df, schema, action, dev, auto_action):
for index, row in df.iterrows():
remote_present = np.nan
try:
remote_present = str(identify_action(
schema, str(df['alias'][index]), dev)).upper()
remote_present = remote_check(
schema, str(df['alias'][index]), dev)

except Exception as e:
print(e)
print(
f"Something went wrong with detecting the ENA object {df['alias'][index]} on the servers of ENA. This object will be skipped.")
if remote_present == np.nan:
df.at[index, header] = np.nan
elif remote_present and action == 'MODIFY':
if remote_present and action == 'MODIFY':
df.at[index, header] = action
print(
f"\t'{df['alias'][index]}' gets '{remote_present}' as action in the status column")
f"\t'{df['alias'][index]}' gets '{action}' as action in the status column")
elif not remote_present and action in ['ADD', 'CANCEL', 'RELEASE']:
df.at[index, header] = action
print(
f"\t'{df['alias'][index]}' gets '{remote_present}' as action in the status column")
f"\t'{df['alias'][index]}' gets '{action}' as action in the status column")
else:
df.at[index, header] = np.nan
print(
f"\t'{df['alias'][index]}' gets skipped since it is already present at ENA")

else:
# status column contain action keywords
# for xml rendering, keywords require uppercase
Expand Down

0 comments on commit 61be1b3

Please sign in to comment.