Skip to content

Commit

Permalink
Merge pull request #45 from itrujnara/bug_fixes
Browse files Browse the repository at this point in the history
Multiple fixes
  • Loading branch information
itrujnara authored May 29, 2024
2 parents 2da7da1 + 824403c commit cb220ff
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 196 deletions.
11 changes: 8 additions & 3 deletions bin/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from typing import Any

import requests
from requests.exceptions import RequestException

POLLING_INTERVAL = 0.5

Expand All @@ -16,7 +15,10 @@ def safe_get(url: str):
Get a URL and return the response.
"""
try:
return requests.get(url)
return requests.get(url, timeout = 300)
except requests.exceptions.Timeout as e:
print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr)
sys.exit(10)
except requests.exceptions.RequestException as e:
print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr)
sys.exit(10)
Expand All @@ -27,7 +29,10 @@ def safe_post(url: str, data: dict = dict(), json: dict = dict()):
Post data to a URL and return the response.
"""
try:
return requests.post(url, data=data, json=json)
return requests.post(url, data = data, json = json, timeout = 300)
except requests.exceptions.Timeout as e:
print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr)
sys.exit(10)
except requests.exceptions.RequestException as e:
print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr)
sys.exit(10)
Expand Down
81 changes: 18 additions & 63 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ process {
publishDir = [
path: { "${params.outdir}/seqinfo" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
maxRetries = 3
Expand All @@ -36,7 +37,8 @@ process {
publishDir = [
path: { "${params.outdir}/orthologs/oma" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
maxRetries = 3
Expand All @@ -46,7 +48,8 @@ process {
publishDir = [
path: { "${params.outdir}/orthologs/panther" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
maxRetries = 3
Expand All @@ -56,7 +59,8 @@ process {
publishDir = [
path: { "${params.outdir}/orthologs/orthoinspector" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
maxRetries = 3
Expand All @@ -66,19 +70,15 @@ process {
publishDir = [
path: { "${params.outdir}/orthologs/eggnog" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
maxRetries = 3
}

withName: 'MERGE_CSV' {
ext.args = '-f 1 --outer-join --na 0'
publishDir = [
path: { "${params.outdir}/orthologs/merge_csv" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MAKE_SCORE_TABLE' {
Expand All @@ -93,7 +93,8 @@ process {
publishDir = [
path: { "${params.outdir}/orthologs/filter_hits" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

Expand All @@ -105,33 +106,9 @@ process {
]
}

withName: 'MAKE_HITS_TABLE' {
publishDir = [
path: { "${params.outdir}/orthologs/hits" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MERGE_HITS' {
ext.args = "-u 0 -k"
ext.prefix = "aggregated_hits"
publishDir = [
path: { "${params.outdir}/orthologs/hits" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'MAKE_STATS' {
publishDir = [
path: { "${params.outdir}/orthologs/stats" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'STATS2CSV' {
publishDir = [
path: { "${params.outdir}/orthologs/stats" },
mode: params.publish_dir_mode,
Expand All @@ -155,7 +132,7 @@ process {

withName: 'FETCH_SEQUENCES_ONLINE' {
publishDir = [
path: { "${params.outdir}/sequences" },
path: { "${params.outdir}/alignment/sequences" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand All @@ -165,7 +142,7 @@ process {

withName: 'FETCH_AFDB_STRUCTURES' {
publishDir = [
path: { "${params.outdir}/structures" },
path: { "${params.outdir}/alignment/structures" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
Expand All @@ -177,15 +154,8 @@ process {
publishDir = [
path: { "${params.outdir}/alignment/filter" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'CREATE_TCOFFEETEMPLATE' {
publishDir = [
path: { "${params.outdir}/alignment/template" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

Expand All @@ -201,14 +171,6 @@ process {
// Tree reconstruction
// ----------------------

withName: 'CONVERT_PHYLIP' {
publishDir = [
path: { "${params.outdir}/trees/convert" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'IQTREE' {
ext.args = '-m TEST' + (params.iqtree_bootstrap > 0 ? ' -bb ' + params.iqtree_bootstrap : '')
publishDir = [
Expand Down Expand Up @@ -247,19 +209,12 @@ process {
// Report generation
// ----------------------

withName: 'CONVERT_FASTA' {
publishDir = [
path: { "${params.outdir}/report/convert" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: 'DUMP_PARAMS' {
publishDir = [
path: { "${params.outdir}/report/params" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: params.output_intermediates
]
}

Expand Down
8 changes: 7 additions & 1 deletion docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,18 @@ If you want to use local database copies for the run, you must provide the requi
| `eggnog_path` | `1_members.tsv.gz` |
| `eggnog_idmap_path` | `latest.Eukaryota.tsv.gz` |

If you need reduced versions of the local databases for testing, you can find them [here](https://github.com/nf-core/test-datasets/tree/reportho/testdata/databases). Note that they were designed to work with the [test samplesheet](https://github.com/nf-core/test-datasets/blob/reportho/testdata/samplesheet/samplesheet.csv) and will likely not provide any result for other queries.

### Running offline

With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled. You can check `test_offline.config` to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures.
With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled, and the pipeline will be aborted if this is attempted. You can check [test_offline.config](https://github.com/nf-core/reportho/blob/master/conf/test_offline.config) to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures.

While those options allow the pipeline to run its steps offline, the pipeline requires certain configuration files and container images that are downloaded from the internet. If you wish to run the pipeline on a machine without a connection, you can pre-download the required files with `nf-core download`. See [the nf-core tools documentation](https://nf-co.re/docs/nf-core-tools/pipelines/download) for details.

### Downstream analysis

Downstream analysis (i.e. MSA and phylogeny) relies on online resources to obtain sequences and structures, and thus cannot be run offline. For your convenience, it will be automatically disabled if you enable `offline_run`. Note that in case some sequences or structures cannot be obtained, the corresponding ortholog will be excluded from the alignment and phylogeny. In particular, only the orthologs with both a sequence and a structure available will be retained if `use_structures` is enabled.

### Updating the pipeline

When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
params {
// Input options
input = null
output_intermediates = false

// MultiQC options
multiqc_config = null
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
"description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
"fa_icon": "fas fa-folder-open"
},
"output_intermediates": {
"type": "boolean",
"default": "false",
"description": "Output intermediate files, including specific prediction lists.",
"fa_icon": "fas fa-folder-open"
},
"email": {
"type": "string",
"description": "Email address for completion summary.",
Expand Down
Loading

0 comments on commit cb220ff

Please sign in to comment.