Skip to content

Commit

Permalink
BD rhapsody sequence analysis (#96)
Browse files Browse the repository at this point in the history
* wip

* fix test

* add help

* update 2.2 args

* fix bug

* extend test data

* output separate files

* analyse missing args

* tweaks to test

* fix script

* fix test

* fix test

* move small reference

* wip generate wta test data

* don't forget about umi in r1

* remove unneeded pkg

* load reference in memory just once

* fix random choices

* extend test

* add abc immunediscoverypanel

* wip abc testing code

* fix abc test; need unique instrument, run and flowcell ids for each sample

* add smk data

* add entry to changelog

* remove old test file

* adapt test for missing read

* update description

* add comment

* ensure cwl files are absolute

* Apply suggestions from code review

Co-authored-by: Dries Schaumont <[email protected]>

* fix suggestion

* newer pipelines have docker requirements as a hint instead of a strict requirement

* rename str to content

* remove deleted resources

* fix containers

* fix script

* fix suggestion

* fix suggestion...

* fix test

* fix component name

* fix test

* apply suggestions

* fix test

* added note

* fix changelog

* fix changelog again

* splitting hairs here

---------

Co-authored-by: Dries Schaumont <[email protected]>
  • Loading branch information
rcannood and DriesSchaumont authored Sep 17, 2024
1 parent 38f635a commit 7f8bcc2
Show file tree
Hide file tree
Showing 17 changed files with 2,532 additions and 172 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
* `agat`:
- `agat/agat_convert_genscan2gff`: convert a genscan file into a GFF file (PR #100).

* `bd_rhapsody/bd_rhapsody_sequence_analysis`: BD Rhapsody Sequence Analysis CWL pipeline (PR #96).

## MINOR CHANGES

* Upgrade to Viash 0.9.0.
Expand Down
14 changes: 10 additions & 4 deletions src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,11 @@ argument_groups:
resources:
- type: python_script
path: script.py
- path: make_rhap_reference_2.2.1_nodocker.cwl

test_resources:
- type: bash_script
path: test.sh
- path: test_data
- path: ../test_data

requirements:
commands: [ "cwl-runner" ]
Expand All @@ -131,12 +130,19 @@ engines:
image: bdgenomics/rhapsody:2.2.1
setup:
- type: apt
packages: [procps]
packages: [procps, git]
- type: python
packages: [cwlref-runner, cwl-runner]
- type: docker
run: |
echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt
mkdir /var/bd_rhapsody_cwl && \
cd /var/bd_rhapsody_cwl && \
git clone https://bitbucket.org/CRSwDev/cwl.git . && \
git checkout 8feeace1141b24749ea6003f8e6ad6d3ad5232de
- type: docker
run:
- VERSION=$(ls -v /var/bd_rhapsody_cwl | grep '^v' | sed 's#v##' | tail -1)
- 'echo "bdgenomics/rhapsody: \"$VERSION\"" > /var/software_versions.txt'

runners:
- type: executable
Expand Down

This file was deleted.

12 changes: 6 additions & 6 deletions src/bd_rhapsody/bd_rhapsody_make_reference/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,21 @@ def generate_config(par: dict[str, Any], meta, config) -> str:

for config_key, arg_type, par_value in config_key_value_pairs:
if arg_type == "file":
str = strip_margin(f"""\
content = strip_margin(f"""\
|{config_key}:
|""")
if isinstance(par_value, list):
for file in par_value:
str += strip_margin(f"""\
content += strip_margin(f"""\
| - class: File
| location: "{file}"
|""")
else:
str += strip_margin(f"""\
content += strip_margin(f"""\
| class: File
| location: "{par_value}"
|""")
content_list.append(str)
content_list.append(content)
else:
content_list.append(strip_margin(f"""\
|{config_key}: {par_value}
Expand All @@ -108,9 +108,9 @@ def generate_config(par: dict[str, Any], meta, config) -> str:

def get_cwl_file(meta: dict[str, Any]) -> str:
# create cwl file (if need be)
cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl")
cwl_file="/var/bd_rhapsody_cwl/v2.2.1/Extra_Utilities/make_rhap_reference_2.2.1.cwl"

return cwl_file
return os.path.abspath(cwl_file)

def main(par: dict[str, Any], meta: dict[str, Any]):
config = read_config(meta["config"])
Expand Down
47 changes: 0 additions & 47 deletions src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh

This file was deleted.

116 changes: 116 additions & 0 deletions src/bd_rhapsody/bd_rhapsody_sequence_analysis/_process_cwl.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# Extract arguments from CWL file and write them to arguments.yaml
#
# This script:
# - reads the CWL file
# - extracts the main workflow arguments
# - compares cwl arguments to viash config arguments
# - writes the arguments to arguments.yaml
#
# It can be used to update the arguments in the viash config after an
# update to the CWL file has been made.
#
# Dependencies: tidyverse, jsonlite, yaml, dynutils
#
# Install dependencies:
# ```R
# install.packages(c("tidyverse", "jsonlite", "yaml", "dynutils"))
# ```
#
# Usage:
# ```bash
# Rscript src/bd_rhapsody/bd_rhapsody_sequence_analysis/_process_cwl.R
# ```

library(tidyverse)

# fetch and read cwl file
lines <- read_lines("https://bitbucket.org/CRSwDev/cwl/raw/8feeace1141b24749ea6003f8e6ad6d3ad5232de/v2.2.1/rhapsody_pipeline_2.2.1.cwl")
cwl_header <- lines[[1]]
cwl_obj <- jsonlite::fromJSON(lines[-1], simplifyVector = FALSE)

# detect main workflow arguments
gr <- dynutils::list_as_tibble(cwl_obj$`$graph`)

gr %>% print(n = 100)

main <- gr %>% filter(gr$id == "#main")

main_inputs <- main$inputs[[1]]

input_ids <- main_inputs %>% map_chr("id") %>% gsub("^#main/", "", .)

# check whether in config
config <- yaml::read_yaml("src/bd_rhapsody/bd_rhapsody_sequence_analysis/config.vsh.yaml")
config$all_arguments <- config$argument_groups %>% map("arguments") %>% list_flatten()
arg_names <- config$all_arguments %>% map_chr("name") %>% gsub("^--", "", .)

# arguments in cwl but not in config
setdiff(tolower(input_ids), arg_names)

# arguments in config but not in cwl
setdiff(arg_names, tolower(input_ids))

# create arguments from main_inputs
arguments <- map(main_inputs, function(main_input) {
input_id <- main_input$id %>% gsub("^#main/", "", .)
input_type <- main_input$type[[2]]

if (is.list(input_type) && input_type$type == "array") {
multiple <- TRUE
input_type <- input_type$items
} else {
multiple <- FALSE
}

if (is.list(input_type) && input_type$type == "enum") {
choices <- input_type$symbols %>%
gsub(paste0(input_type$name, "/"), "", .)
input_type <- "enum"
} else {
choices <- NULL
}

description <-
if (is.null(main_input$label)) {
main_input$doc
} else if (is.null(main_input$doc)) {
main_input$label
} else {
paste0(main_input$label, ". ", main_input$doc)
}

type_map <- c(
"float" = "double",
"int" = "integer",
"string" = "string",
"boolean" = "boolean",
"File" = "file",
"enum" = "string"
)

out <- list(
name = paste0("--", tolower(input_id)),
type = type_map[input_type],
# TODO: use summary when viash 0.9 is released
# summary = main_input$doc,
# description = main_input$doc,
description = description,
multiple = multiple,
choices = choices,
info = list(
config_key = input_id
)
)

out[!sapply(out, is.null)]
})



yaml::write_yaml(
arguments,
"src/bd_rhapsody/bd_rhapsody_sequence_analysis/arguments.yaml",
handlers = list(
logical = yaml::verbatim_logical
)
)
Loading

0 comments on commit 7f8bcc2

Please sign in to comment.