From 508a40aab1912a332cfeacc737d827a545d78a04 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Tue, 9 Jul 2024 16:43:10 +0200 Subject: [PATCH 01/25] add vscode to gitignore --- .gitignore | 1 + .vscode/settings.json | 9 - .vscode/viash_config.yaml | 3005 ------------------------------------- 3 files changed, 1 insertion(+), 3014 deletions(-) delete mode 100644 .vscode/settings.json delete mode 100644 .vscode/viash_config.yaml diff --git a/.gitignore b/.gitignore index ca5262bc..2a64eaac 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ # IDE ignores .idea/ +.vscode/ # R specific ignores .Rhistory diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index df05379a..00000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "yaml.schemas": { - ".vscode/viash_config.yaml": "**.vsh.yaml", - }, - "files.watcherExclude": { - "**/target": true, - ".github": true - } -} \ No newline at end of file diff --git a/.vscode/viash_config.yaml b/.vscode/viash_config.yaml deleted file mode 100644 index 0e38195f..00000000 --- a/.vscode/viash_config.yaml +++ /dev/null @@ -1,3005 +0,0 @@ -$schema: "https://json-schema.org/draft-07/schema#" -definitions: - Config: - description: "A Viash configuration is a YAML file which contains metadata to\ - \ describe the behaviour and build target(s) of a component. \nWe commonly\ - \ name this file `config.vsh.yaml` in our examples, but you can name it however\ - \ you choose. \n" - type: "object" - properties: - organization: - description: "The organization of the package." - type: "string" - license: - description: "The license of the package." - type: "string" - authors: - description: "A list of authors. An author must at least have a name, but\ - \ can also have a list of roles, an e-mail address, and a map of custom\ - \ properties.\n\nSuggested values for roles are:\n \n| Role | Abbrev. |\ - \ Description |\n|------|---------|-------------|\n| maintainer | mnt |\ - \ for the maintainer of the code. Ideally, exactly one maintainer is specified.\ - \ |\n| author | aut | for persons who have made substantial contributions\ - \ to the software. |\n| contributor | ctb| for persons who have made smaller\ - \ contributions (such as code patches).\n| datacontributor | dtc | for persons\ - \ or organisations that contributed data sets for the software\n| copyrightholder\ - \ | cph | for all copyright holders. This is a legal concept so should use\ - \ the legal name of an institution or corporate body.\n| funder | fnd |\ - \ for persons or organizations that furnished financial support for the\ - \ development of the software\n\nThe [full list of roles](https://www.loc.gov/marc/relators/relaterm.html)\ - \ is extremely comprehensive.\n" - type: "array" - items: - $ref: "#/definitions/Author" - status: - description: "Allows setting a component to active, deprecated or disabled." - $ref: "#/definitions/Status" - requirements: - description: "Computational requirements related to running the component.\ - \ \n`cpus` specifies the maximum number of (logical) cpus a component is\ - \ allowed to use., whereas\n`memory` specifies the maximum amount of memory\ - \ a component is allowed to allicate. Memory units must be\nin B, KB, MB,\ - \ GB, TB or PB." - $ref: "#/definitions/ComputationalRequirements" - repositories: - description: "(Pre-)defines repositories that can be used as repository in\ - \ dependencies.\nAllows reusing repository definitions in case it is used\ - \ in multiple dependencies." - type: "array" - items: - $ref: "#/definitions/RepositoryWithName" - dependencies: - description: "Allows listing Viash components required by this Viash component" - type: "array" - items: - $ref: "#/definitions/Dependency" - namespace: - description: "Namespace this component is a part of. See the Namespaces guide\ - \ for more information on namespaces." - type: "string" - functionality: - description: "The functionality describes the behaviour of the script in terms\ - \ of arguments and resources.\nBy specifying a few restrictions (e.g. mandatory\ - \ arguments) and adding some descriptions, Viash will automatically generate\ - \ a stylish command-line interface for you.\n" - $ref: "#/definitions/Functionality" - runners: - description: "A list of runners to execute target artifacts.\n\n - ExecutableRunner\n\ - \ - NextflowRunner\n" - type: "array" - items: - $ref: "#/definitions/Runner" - name: - description: "Name of the component and the filename of the executable when\ - \ built with `viash build`." - type: "string" - build_info: - $ref: "#/definitions/BuildInfo" - argument_groups: - description: "A grouping of the arguments, used to display the help message.\n\ - \n - `name: foo`, the name of the argument group. \n - `description: Description\ - \ of foo`, a description of the argument group. Multiline descriptions are\ - \ supported.\n - `arguments: [arg1, arg2, ...]`, list of the arguments.\n\ - \n" - type: "array" - items: - $ref: "#/definitions/ArgumentGroup" - description: - description: "A description of the component. This will be displayed with\ - \ `--help`." - type: "string" - usage: - description: "A description on how to use the component. This will be displayed\ - \ with `--help` under the 'Usage:' section." - type: "string" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - package_config: - description: "The package config content used during build." - $ref: "#/definitions/PackageConfig" - platforms: - description: "A list of platforms to generate target artifacts for.\n\n -\ - \ Native\n - Docker\n - Nextflow\n" - type: "array" - items: - $ref: "#/definitions/Platform" - version: - description: "Version of the component. This field will be used to version\ - \ the executable and the Docker container." - type: "string" - links: - description: "External links of the component." - $ref: "#/definitions/Links" - references: - description: "References to external resources related to the component." - $ref: "#/definitions/References" - engines: - description: "A list of engine environments to execute target artifacts in.\n\ - \n - NativeEngine\n - DockerEngine\n" - type: "array" - items: - $ref: "#/definitions/Engine" - resources: - description: "Resources are files that support the component. The first resource\ - \ should be a script that will be executed when the component is run. Additional\ - \ resources will be copied to the same directory.\n\nCommon properties:\n\ - \n * type: `file` / `r_script` / `python_script` / `bash_script` / `javascript_script`\ - \ / `scala_script` / `csharp_script`, specifies the type of the resource.\ - \ The first resource cannot be of type `file`. When the type is not specified,\ - \ the default type is simply `file`.\n * dest: filename, the resulting name\ - \ of the resource. From within a script, the file can be accessed at `meta[\"\ - resources_dir\"] + \"/\" + dest`. If unspecified, `dest` will be set to\ - \ the basename of the `path` parameter.\n * path: `path/to/file`, the path\ - \ of the input file. Can be a relative or an absolute path, or a URI. Mutually\ - \ exclusive with `text`.\n * text: ...multiline text..., the content of\ - \ the resulting file specified as a string. Mutually exclusive with `path`.\n\ - \ * is_executable: `true` / `false`, whether the resulting resource file\ - \ should be made executable.\n" - type: "array" - items: - $ref: "#/definitions/Resource" - keywords: - description: "The keywords of the components." - type: "array" - items: - type: "string" - test_resources: - description: "One or more scripts to be used to test the component behaviour\ - \ when `viash test` is invoked. Additional files of type `file` will be\ - \ made available only during testing. Each test script should expect no\ - \ command-line inputs, be platform-independent, and return an exit code\ - \ >0 when unexpected behaviour occurs during testing. See Unit Testing for\ - \ more info." - type: "array" - items: - $ref: "#/definitions/Resource" - required: - - "name" - additionalProperties: false - PackageConfig: - description: "A Viash package configuration file. It's name should be `_viash.yaml`." - type: "object" - properties: - organization: - description: "The organization of the package." - type: "string" - name: - description: "The name of the package." - type: "string" - source: - description: "Which source directory to use for the `viash ns` commands." - type: "string" - description: - description: "A description of the package." - type: "string" - viash_version: - description: "Which version of Viash to use." - type: "string" - config_mods: - oneOf: - - description: "Which config mods to apply." - type: "string" - - type: "array" - items: - description: "Which config mods to apply." - type: "string" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - license: - description: "The license of the package." - type: "string" - references: - description: "References to external resources related to the package." - $ref: "#/definitions/References" - authors: - description: "The authors of the package." - type: "array" - items: - $ref: "#/definitions/Author" - repositories: - description: "Common repository definitions for component dependencies." - type: "array" - items: - $ref: "#/definitions/RepositoryWithName" - keywords: - description: "The keywords of the package." - type: "array" - items: - type: "string" - target: - description: "Which target directory to use for `viash ns build`." - type: "string" - version: - description: "The version of the package." - type: "string" - links: - description: "External links of the package." - $ref: "#/definitions/Links" - required: [] - additionalProperties: false - BuildInfo: - description: "Meta information fields filled in by Viash during build." - type: "object" - properties: - git_tag: - description: "Git tag." - type: "string" - git_remote: - description: "Git remote name." - type: "string" - viash_version: - description: "The Viash version that was used to build the component." - type: "string" - output: - description: "Folder path to the build artifacts." - type: "string" - git_commit: - description: "Git commit hash." - type: "string" - executable: - description: "Output folder with main executable path." - type: "string" - engine: - description: "The engine id used during build." - type: "string" - runner: - description: "The runner id used during build." - type: "string" - config: - description: "Path to the config used during build." - type: "string" - required: - - "config" - additionalProperties: false - Functionality: - description: "The functionality-part of the config file describes the behaviour\ - \ of the script in terms of arguments and resources.\nBy specifying a few restrictions\ - \ (e.g. mandatory arguments) and adding some descriptions, Viash will automatically\ - \ generate a stylish command-line interface for you.\n" - type: "object" - properties: - organization: - description: "The organization of the package." - type: "string" - name: - description: "Name of the component and the filename of the executable when\ - \ built with `viash build`." - type: "string" - argument_groups: - description: "A grouping of the arguments, used to display the help message.\n\ - \n - `name: foo`, the name of the argument group. \n - `description: Description\ - \ of foo`, a description of the argument group. Multiline descriptions are\ - \ supported.\n - `arguments: [arg1, arg2, ...]`, list of the arguments.\n\ - \n" - type: "array" - items: - $ref: "#/definitions/ArgumentGroup" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - license: - description: "The license of the package." - type: "string" - references: - description: "References to external resources related to the component." - $ref: "#/definitions/References" - authors: - description: "A list of authors. An author must at least have a name, but\ - \ can also have a list of roles, an e-mail address, and a map of custom\ - \ properties.\n\nSuggested values for roles are:\n \n| Role | Abbrev. |\ - \ Description |\n|------|---------|-------------|\n| maintainer | mnt |\ - \ for the maintainer of the code. Ideally, exactly one maintainer is specified.\ - \ |\n| author | aut | for persons who have made substantial contributions\ - \ to the software. |\n| contributor | ctb| for persons who have made smaller\ - \ contributions (such as code patches).\n| datacontributor | dtc | for persons\ - \ or organisations that contributed data sets for the software\n| copyrightholder\ - \ | cph | for all copyright holders. This is a legal concept so should use\ - \ the legal name of an institution or corporate body.\n| funder | fnd |\ - \ for persons or organizations that furnished financial support for the\ - \ development of the software\n\nThe [full list of roles](https://www.loc.gov/marc/relators/relaterm.html)\ - \ is extremely comprehensive.\n" - type: "array" - items: - $ref: "#/definitions/Author" - status: - description: "Allows setting a component to active, deprecated or disabled." - $ref: "#/definitions/Status" - requirements: - description: "Computational requirements related to running the component.\ - \ \n`cpus` specifies the maximum number of (logical) cpus a component is\ - \ allowed to use., whereas\n`memory` specifies the maximum amount of memory\ - \ a component is allowed to allicate. Memory units must be\nin B, KB, MB,\ - \ GB, TB or PB." - $ref: "#/definitions/ComputationalRequirements" - repositories: - description: "(Pre-)defines repositories that can be used as repository in\ - \ dependencies.\nAllows reusing repository definitions in case it is used\ - \ in multiple dependencies." - type: "array" - items: - $ref: "#/definitions/RepositoryWithName" - test_resources: - description: "One or more scripts to be used to test the component behaviour\ - \ when `viash test` is invoked. Additional files of type `file` will be\ - \ made available only during testing. Each test script should expect no\ - \ command-line inputs, be platform-independent, and return an exit code\ - \ >0 when unexpected behaviour occurs during testing. See Unit Testing for\ - \ more info." - type: "array" - items: - $ref: "#/definitions/Resource" - dependencies: - description: "Allows listing Viash components required by this Viash component" - type: "array" - items: - $ref: "#/definitions/Dependency" - description: - description: "A description of the component. This will be displayed with\ - \ `--help`." - type: "string" - usage: - description: "A description on how to use the component. This will be displayed\ - \ with `--help` under the 'Usage:' section." - type: "string" - version: - description: "Version of the component. This field will be used to version\ - \ the executable and the Docker container." - type: "string" - links: - description: "External links of the component." - $ref: "#/definitions/Links" - resources: - description: "Resources are files that support the component. The first resource\ - \ should be a script that will be executed when the functionality is run.\ - \ Additional resources will be copied to the same directory.\n\nCommon properties:\n\ - \n * type: `file` / `r_script` / `python_script` / `bash_script` / `javascript_script`\ - \ / `scala_script` / `csharp_script`, specifies the type of the resource.\ - \ The first resource cannot be of type `file`. When the type is not specified,\ - \ the default type is simply `file`.\n * dest: filename, the resulting name\ - \ of the resource. From within a script, the file can be accessed at `meta[\"\ - resources_dir\"] + \"/\" + dest`. If unspecified, `dest` will be set to\ - \ the basename of the `path` parameter.\n * path: `path/to/file`, the path\ - \ of the input file. Can be a relative or an absolute path, or a URI. Mutually\ - \ exclusive with `text`.\n * text: ...multiline text..., the content of\ - \ the resulting file specified as a string. Mutually exclusive with `path`.\n\ - \ * is_executable: `true` / `false`, whether the resulting resource file\ - \ should be made executable.\n" - type: "array" - items: - $ref: "#/definitions/Resource" - keywords: - description: "The keywords of the components." - type: "array" - items: - type: "string" - namespace: - description: "Namespace this component is a part of. See the Namespaces guide\ - \ for more information on namespaces." - type: "string" - arguments: - description: "A list of arguments for this component. For each argument, a\ - \ type and a name must be specified. Depending on the type of argument,\ - \ different properties can be set. See these reference pages per type for\ - \ more information: \n\n - string\n - file\n - integer\n - double\n - boolean\n\ - \ - boolean_true\n - boolean_false\n" - type: "array" - items: - $ref: "#/definitions/Argument" - required: - - "name" - additionalProperties: false - Author: - description: "Author metadata." - type: "object" - properties: - name: - description: "Full name of the author, usually in the name of FirstName MiddleName\ - \ LastName." - type: "string" - email: - description: "E-mail of the author." - type: "string" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - roles: - oneOf: - - description: "Role of the author. Suggested items:\n\n* \"author\": Authors\ - \ who have made substantial contributions to the component.\n* \"maintainer\"\ - : The maintainer of the component.\n* \"contributor\": Authors who have\ - \ made smaller contributions (such as code patches etc.).\n" - type: "string" - - type: "array" - items: - description: "Role of the author. Suggested items:\n\n* \"author\": Authors\ - \ who have made substantial contributions to the component.\n* \"maintainer\"\ - : The maintainer of the component.\n* \"contributor\": Authors who have\ - \ made smaller contributions (such as code patches etc.).\n" - type: "string" - required: - - "name" - additionalProperties: false - ComputationalRequirements: - description: "Computational requirements related to running the component." - type: "object" - properties: - cpus: - description: "The maximum number of (logical) cpus a component is allowed\ - \ to use." - type: "integer" - commands: - description: "A list of commands which should be present on the system for\ - \ the script to function." - type: "array" - items: - type: "string" - memory: - description: "The maximum amount of memory a component is allowed to allocate.\ - \ Unit must be one of B, KB, MB, GB, TB or PB." - type: "string" - required: [] - additionalProperties: false - ArgumentGroup: - description: "A grouping of the arguments, used to display the help message." - type: "object" - properties: - name: - description: "The name of the argument group." - type: "string" - description: - description: "Description of foo`, a description of the argument group. Multiline\ - \ descriptions are supported." - type: "string" - arguments: - description: "List of arguments." - type: "array" - items: - $ref: "#/definitions/Argument" - required: - - "name" - - "arguments" - additionalProperties: false - Links: - description: "Links to external resources related to the component." - type: "object" - properties: - repository: - description: "Source repository url." - type: "string" - documentation: - description: "Documentation website url." - type: "string" - docker_registry: - description: "Docker registry url." - type: "string" - homepage: - description: "Homepage website url." - type: "string" - issue_tracker: - description: "Issue tracker url." - type: "string" - required: [] - additionalProperties: false - References: - description: "References to external resources related to the component." - type: "object" - properties: - bibtex: - oneOf: - - description: "One or multiple BibTeX reference(s) of the component." - type: "string" - - type: "array" - items: - description: "One or multiple BibTeX reference(s) of the component." - type: "string" - doi: - oneOf: - - description: "One or multiple DOI reference(s) of the component." - type: "string" - - type: "array" - items: - description: "One or multiple DOI reference(s) of the component." - type: "string" - additionalProperties: false - Runner: - oneOf: - - $ref: "#/definitions/ExecutableRunner" - - $ref: "#/definitions/NextflowRunner" - ExecutableRunner: - description: "Run code as an executable.\n\nThis runner is the default runner.\ - \ It will generate a bash script that can be run directly.\n\nThis runner is\ - \ also used for the native engine.\n\nThis runner is also used for the docker\ - \ engine.\n" - type: "object" - properties: - docker_setup_strategy: - description: "The Docker setup strategy to use when building a docker engine\ - \ enrivonment.\n\n| Strategy | Description |\n|-----|----------|\n| `alwaysbuild`\ - \ / `build` / `b` | Always build the image from the dockerfile. This is\ - \ the default setup strategy.\n| `alwayscachedbuild` / `cachedbuild` / `cb`\ - \ | Always build the image from the dockerfile, with caching enabled.\n\ - | `ifneedbebuild` | Build the image if it does not exist locally.\n| `ifneedbecachedbuild`\ - \ | Build the image with caching enabled if it does not exist locally, with\ - \ caching enabled.\n| `alwayspull` / `pull` / `p` | Try to pull the container\ - \ from [Docker Hub](https://hub.docker.com) or the specified docker registry.\n\ - | `alwayspullelsebuild` / `pullelsebuild` | Try to pull the image from\ - \ a registry and build it if it doesn't exist.\n| `alwayspullelsecachedbuild`\ - \ / `pullelsecachedbuild` | Try to pull the image from a registry and build\ - \ it with caching if it doesn't exist.\n| `ifneedbepull` | If the image\ - \ does not exist locally, pull the image.\n| `ifneedbepullelsebuild` | \ - \ If the image does not exist locally, pull the image. If the image does\ - \ exist, build it.\n| `ifneedbepullelsecachedbuild` | If the image does\ - \ not exist locally, pull the image. If the image does exist, build it with\ - \ caching enabled.\n| `push` | Push the container to [Docker Hub](https://hub.docker.com)\ - \ or the specified docker registry.\n| `pushifnotpresent` | Push the container\ - \ to [Docker Hub](https://hub.docker.com) or the specified docker registry\ - \ if the tag does not exist yet.\n| `donothing` / `meh` | Do not build or\ - \ pull anything.\n\n" - $ref: "#/definitions/DockerSetupStrategy" - workdir: - description: "The working directory when starting the engine. This doesn't\ - \ change the Dockerfile but gets added as a command-line argument at runtime." - type: "string" - docker_run_args: - oneOf: - - description: "Provide runtime arguments to Docker. See the documentation\ - \ on [`docker run`](https://docs.docker.com/engine/reference/run/) for\ - \ more information." - type: "string" - - type: "array" - items: - description: "Provide runtime arguments to Docker. See the documentation\ - \ on [`docker run`](https://docs.docker.com/engine/reference/run/) for\ - \ more information." - type: "string" - id: - description: "Name of the runner. As with all runners, you can give an runner\ - \ a different name. By specifying `id: foo`, you can target this executor\ - \ (only) by specifying `...` in any of the Viash commands." - type: "string" - port: - oneOf: - - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "integer" - - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "string" - - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "array" - items: - type: "integer" - - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "array" - items: - type: "string" - type: - description: "Run code as an executable.\n\nThis runner is the default runner.\ - \ It will generate a bash script that can be run directly.\n\nThis runner\ - \ is also used for the native engine.\n\nThis runner is also used for the\ - \ docker engine.\n" - const: "executable" - required: - - "type" - additionalProperties: false - NextflowRunner: - description: "Run a Viash component on a Nextflow backend engine.\n" - type: "object" - properties: - auto: - description: "Automated processing flags which can be toggled on or off:\n\ - \n| Flag | Description | Default |\n|---|---------|----|\n| `simplifyInput`\ - \ | If `true`, an input tuple only containing only a single File (e.g. `[\"\ - foo\", file(\"in.h5ad\")]`) is automatically transformed to a map (i.e.\ - \ `[\"foo\", [ input: file(\"in.h5ad\") ] ]`). | `true` |\n| `simplifyOutput`\ - \ | If `true`, an output tuple containing a map with a File (e.g. `[\"foo\"\ - , [ output: file(\"out.h5ad\") ] ]`) is automatically transformed to a map\ - \ (i.e. `[\"foo\", file(\"out.h5ad\")]`). | `false` |\n| `transcript` |\ - \ If `true`, the module's transcripts from `work/` are automatically published\ - \ to `params.transcriptDir`. If not defined, `params.publishDir + \"/_transcripts\"\ - ` will be used. Will throw an error if neither are defined. | `false` |\n\ - | `publish` | If `true`, the module's outputs are automatically published\ - \ to `params.publishDir`. If equal to \"state\", also a `.state.yaml` file\ - \ will be published in the publish dir. Will throw an error if `params.publishDir`\ - \ is not defined. | `false` |\n\n" - $ref: "#/definitions/NextflowAuto" - directives: - description: "Directives are optional settings that affect the execution of\ - \ the process. These mostly match up with the Nextflow counterparts. \n" - $ref: "#/definitions/NextflowDirectives" - container: - description: "Specifies the Docker engine id to be used to run Nextflow." - type: "string" - config: - description: "Allows tweaking how the Nextflow Config file is generated." - $ref: "#/definitions/NextflowConfig" - debug: - description: "Whether or not to print debug messages." - type: "boolean" - id: - description: "Name of the runner. As with all runners, you can give an runner\ - \ a different name. By specifying `id: foo`, you can target this runner\ - \ (only) by specifying `...` in any of the Viash commands." - type: "string" - type: - description: "Run a Viash component on a Nextflow backend engine.\n" - const: "nextflow" - required: - - "type" - additionalProperties: false - Engine: - oneOf: - - $ref: "#/definitions/DockerEngine" - - $ref: "#/definitions/NativeEngine" - NativeEngine: - description: "Running a Viash component on a native engine means that the script\ - \ will be executed in your current environment.\nAny dependencies are assumed\ - \ to have been installed by the user, so the native engine is meant for developers\ - \ (who know what they're doing) or for simple bash scripts (which have no extra\ - \ dependencies).\n" - type: "object" - properties: - id: - description: "Name of the engine. As with all engines, you can give an engine\ - \ a different name. By specifying `id: foo`, you can target this engine\ - \ (only) by specifying `...` in any of the Viash commands." - type: "string" - type: - description: "Running a Viash component on a native engine means that the\ - \ script will be executed in your current environment.\nAny dependencies\ - \ are assumed to have been installed by the user, so the native engine is\ - \ meant for developers (who know what they're doing) or for simple bash\ - \ scripts (which have no extra dependencies).\n" - const: "native" - required: - - "type" - additionalProperties: false - DockerEngine: - description: "Run a Viash component on a Docker backend engine.\nBy specifying\ - \ which dependencies your component needs, users will be able to build a docker\ - \ container from scratch using the setup flag, or pull it from a docker repository.\n" - type: "object" - properties: - organization: - description: "Name of a container's [organization](https://docs.docker.com/docker-hub/orgs/)." - type: "string" - registry: - description: "The URL to the a [custom Docker registry](https://docs.docker.com/registry/)" - type: "string" - image: - description: "The base container to start from. You can also add the tag here\ - \ if you wish." - type: "string" - tag: - description: "Specify a Docker image based on its tag." - type: "string" - target_image: - description: "If anything is specified in the setup section, running the `---setup`\ - \ will result in an image with the name of `:`. If\ - \ nothing is specified in the `setup` section, simply `image` will be used.\ - \ Advanced usage only." - type: "string" - target_tag: - description: "The tag the resulting image gets. Advanced usage only." - type: "string" - namespace_separator: - description: "The separator between the namespace and the name of the component,\ - \ used for determining the image name. Default: \"/\"." - type: "string" - id: - description: "Name of the engine. As with all engines, you can give a engine\ - \ a different name. By specifying `id: foo`, you can target this engine\ - \ (only) by specifying `...` in any of the Viash commands." - type: "string" - target_registry: - description: "The URL where the resulting image will be pushed to. Advanced\ - \ usage only." - type: "string" - type: - description: "Run a Viash component on a Docker backend engine.\nBy specifying\ - \ which dependencies your component needs, users will be able to build a\ - \ docker container from scratch using the setup flag, or pull it from a\ - \ docker repository.\n" - const: "docker" - target_organization: - description: "The organization set in the resulting image. Advanced usage\ - \ only." - type: "string" - setup: - description: "A list of requirements for installing the following types of\ - \ packages:\n\n - apt\n - apk\n - Docker setup instructions\n - JavaScript\n\ - \ - Python\n - R\n - Ruby\n - yum\n\nThe order in which these dependencies\ - \ are specified determines the order in which they will be installed.\n" - type: "array" - items: - $ref: "#/definitions/Requirements" - cmd: - oneOf: - - description: "Set the default command being executed when running the Docker\ - \ container." - type: "string" - - description: "Set the default command being executed when running the Docker\ - \ container." - type: "array" - items: - type: "string" - target_image_source: - description: "The source of the target image. This is used for defining labels\ - \ in the dockerfile." - type: "string" - test_setup: - description: "Additional requirements specific for running unit tests." - type: "array" - items: - $ref: "#/definitions/Requirements" - entrypoint: - oneOf: - - description: "Override the entrypoint of the base container. Default set\ - \ `ENTRYPOINT []`." - type: "string" - - description: "Override the entrypoint of the base container. Default set\ - \ `ENTRYPOINT []`." - type: "array" - items: - type: "string" - required: - - "image" - - "type" - additionalProperties: false - Platform: - oneOf: - - $ref: "#/definitions/NativePlatform" - - $ref: "#/definitions/DockerPlatform" - - $ref: "#/definitions/NextflowPlatform" - NativePlatform: - description: "Running a Viash component on a native platform means that the script\ - \ will be executed in your current environment.\nAny dependencies are assumed\ - \ to have been installed by the user, so the native platform is meant for developers\ - \ (who know what they're doing) or for simple bash scripts (which have no extra\ - \ dependencies).\n" - type: "object" - properties: - id: - description: "As with all platforms, you can give a platform a different name.\ - \ By specifying `id: foo`, you can target this platform (only) by specifying\ - \ `-p foo` in any of the Viash commands." - type: "string" - type: - description: "Running a Viash component on a native platform means that the\ - \ script will be executed in your current environment.\nAny dependencies\ - \ are assumed to have been installed by the user, so the native platform\ - \ is meant for developers (who know what they're doing) or for simple bash\ - \ scripts (which have no extra dependencies).\n" - const: "native" - required: - - "type" - additionalProperties: false - DockerPlatform: - description: "Run a Viash component on a Docker backend platform.\nBy specifying\ - \ which dependencies your component needs, users will be able to build a docker\ - \ container from scratch using the setup flag, or pull it from a docker repository.\n" - type: "object" - properties: - organization: - description: "Name of a container's [organization](https://docs.docker.com/docker-hub/orgs/)." - type: "string" - registry: - description: "The URL to the a [custom Docker registry](https://docs.docker.com/registry/)" - type: "string" - image: - description: "The base container to start from. You can also add the tag here\ - \ if you wish." - type: "string" - tag: - description: "Specify a Docker image based on its tag." - type: "string" - target_tag: - description: "The tag the resulting image gets. Advanced usage only." - type: "string" - run_args: - oneOf: - - description: "Add [docker run](https://docs.docker.com/engine/reference/run/)\ - \ arguments." - type: "string" - - type: "array" - items: - description: "Add [docker run](https://docs.docker.com/engine/reference/run/)\ - \ arguments." - type: "string" - namespace_separator: - description: "The separator between the namespace and the name of the component,\ - \ used for determining the image name. Default: \"/\"." - type: "string" - resolve_volume: - description: "Enables or disables automatic volume mapping. Enabled when set\ - \ to `Automatic` or disabled when set to `Manual`. Default: `Automatic`." - $ref: "#/definitions/DockerResolveVolume" - cmd: - oneOf: - - description: "Set the default command being executed when running the Docker\ - \ container." - type: "string" - - description: "Set the default command being executed when running the Docker\ - \ container." - type: "array" - items: - type: "string" - id: - description: "As with all platforms, you can give a platform a different name.\ - \ By specifying `id: foo`, you can target this platform (only) by specifying\ - \ `-p foo` in any of the Viash commands." - type: "string" - port: - oneOf: - - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "string" - - type: "array" - items: - description: "A list of enabled ports. This doesn't change the Dockerfile\ - \ but gets added as a command-line argument at runtime." - type: "string" - target_registry: - description: "The URL where the resulting image will be pushed to. Advanced\ - \ usage only." - type: "string" - setup: - description: "A list of requirements for installing the following types of\ - \ packages:\n\n - apt\n - apk\n - Docker setup instructions\n - JavaScript\n\ - \ - Python\n - R\n - Ruby\n - yum\n\nThe order in which these dependencies\ - \ are specified determines the order in which they will be installed.\n" - type: "array" - items: - $ref: "#/definitions/Requirements" - workdir: - description: "The working directory when starting the container. This doesn't\ - \ change the Dockerfile but gets added as a command-line argument at runtime." - type: "string" - target_image: - description: "If anything is specified in the setup section, running the `---setup`\ - \ will result in an image with the name of `:`. If\ - \ nothing is specified in the `setup` section, simply `image` will be used.\ - \ Advanced usage only." - type: "string" - target_image_source: - description: "The source of the target image. This is used for defining labels\ - \ in the dockerfile." - type: "string" - test_setup: - description: "Additional requirements specific for running unit tests." - type: "array" - items: - $ref: "#/definitions/Requirements" - entrypoint: - oneOf: - - description: "Override the entrypoint of the base container. Default set\ - \ `ENTRYPOINT []`." - type: "string" - - description: "Override the entrypoint of the base container. Default set\ - \ `ENTRYPOINT []`." - type: "array" - items: - type: "string" - setup_strategy: - description: "The Docker setup strategy to use when building a container.\n\ - \n| Strategy | Description |\n|-----|----------|\n| `alwaysbuild` / `build`\ - \ / `b` | Always build the image from the dockerfile. This is the default\ - \ setup strategy.\n| `alwayscachedbuild` / `cachedbuild` / `cb` | Always\ - \ build the image from the dockerfile, with caching enabled.\n| `ifneedbebuild`\ - \ | Build the image if it does not exist locally.\n| `ifneedbecachedbuild`\ - \ | Build the image with caching enabled if it does not exist locally, with\ - \ caching enabled.\n| `alwayspull` / `pull` / `p` | Try to pull the container\ - \ from [Docker Hub](https://hub.docker.com) or the specified docker registry.\n\ - | `alwayspullelsebuild` / `pullelsebuild` | Try to pull the image from\ - \ a registry and build it if it doesn't exist.\n| `alwayspullelsecachedbuild`\ - \ / `pullelsecachedbuild` | Try to pull the image from a registry and build\ - \ it with caching if it doesn't exist.\n| `ifneedbepull` | If the image\ - \ does not exist locally, pull the image.\n| `ifneedbepullelsebuild` | \ - \ If the image does not exist locally, pull the image. If the image does\ - \ exist, build it.\n| `ifneedbepullelsecachedbuild` | If the image does\ - \ not exist locally, pull the image. If the image does exist, build it with\ - \ caching enabled.\n| `push` | Push the container to [Docker Hub](https://hub.docker.com)\ - \ or the specified docker registry.\n| `pushifnotpresent` | Push the container\ - \ to [Docker Hub](https://hub.docker.com) or the specified docker registry\ - \ if the tag does not exist yet.\n| `donothing` / `meh` | Do not build or\ - \ pull anything.\n\n" - $ref: "#/definitions/DockerSetupStrategy" - type: - description: "Run a Viash component on a Docker backend platform.\nBy specifying\ - \ which dependencies your component needs, users will be able to build a\ - \ docker container from scratch using the setup flag, or pull it from a\ - \ docker repository.\n" - const: "docker" - target_organization: - description: "The organization set in the resulting image. Advanced usage\ - \ only." - type: "string" - required: - - "image" - - "type" - additionalProperties: false - NextflowPlatform: - description: "Platform for generating Nextflow VDSL3 modules." - type: "object" - properties: - auto: - description: "Automated processing flags which can be toggled on or off:\n\ - \n| Flag | Description | Default |\n|---|---------|----|\n| `simplifyInput`\ - \ | If `true`, an input tuple only containing only a single File (e.g. `[\"\ - foo\", file(\"in.h5ad\")]`) is automatically transformed to a map (i.e.\ - \ `[\"foo\", [ input: file(\"in.h5ad\") ] ]`). | `true` |\n| `simplifyOutput`\ - \ | If `true`, an output tuple containing a map with a File (e.g. `[\"foo\"\ - , [ output: file(\"out.h5ad\") ] ]`) is automatically transformed to a map\ - \ (i.e. `[\"foo\", file(\"out.h5ad\")]`). | `false` |\n| `transcript` |\ - \ If `true`, the module's transcripts from `work/` are automatically published\ - \ to `params.transcriptDir`. If not defined, `params.publishDir + \"/_transcripts\"\ - ` will be used. Will throw an error if neither are defined. | `false` |\n\ - | `publish` | If `true`, the module's outputs are automatically published\ - \ to `params.publishDir`. If equal to \"state\", also a `.state.yaml` file\ - \ will be published in the publish dir. Will throw an error if `params.publishDir`\ - \ is not defined. | `false` |\n\n" - $ref: "#/definitions/NextflowAuto" - directives: - description: "Directives are optional settings that affect the execution of\ - \ the process. These mostly match up with the Nextflow counterparts. \n" - $ref: "#/definitions/NextflowDirectives" - container: - description: "Specifies the Docker platform id to be used to run Nextflow." - type: "string" - config: - description: "Allows tweaking how the Nextflow Config file is generated." - $ref: "#/definitions/NextflowConfig" - debug: - description: "Whether or not to print debug messages." - type: "boolean" - id: - description: "Every platform can be given a specific id that can later be\ - \ referred to explicitly when running or building the Viash component." - type: "string" - type: - description: "Platform for generating Nextflow VDSL3 modules." - const: "nextflow" - required: - - "type" - additionalProperties: false - Requirements: - oneOf: - - $ref: "#/definitions/ApkRequirements" - - $ref: "#/definitions/AptRequirements" - - $ref: "#/definitions/DockerRequirements" - - $ref: "#/definitions/JavaScriptRequirements" - - $ref: "#/definitions/PythonRequirements" - - $ref: "#/definitions/RRequirements" - - $ref: "#/definitions/RubyRequirements" - - $ref: "#/definitions/YumRequirements" - ApkRequirements: - description: "Specify which apk packages should be available in order to run the\ - \ component." - type: "object" - properties: - type: - description: "Specify which apk packages should be available in order to run\ - \ the component." - const: "apk" - packages: - oneOf: - - description: "Specifies which packages to install." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install." - type: "string" - required: - - "type" - additionalProperties: false - AptRequirements: - description: "Specify which apt packages should be available in order to run the\ - \ component." - type: "object" - properties: - interactive: - description: "If `false`, the Debian frontend is set to non-interactive (recommended).\ - \ Default: false." - type: "boolean" - type: - description: "Specify which apt packages should be available in order to run\ - \ the component." - const: "apt" - packages: - oneOf: - - description: "Specifies which packages to install." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install." - type: "string" - required: - - "type" - additionalProperties: false - DockerRequirements: - description: "Specify which Docker commands should be run during setup." - type: "object" - properties: - run: - oneOf: - - description: "Specifies which `RUN` entries to add to the Dockerfile while\ - \ building it." - type: "string" - - type: "array" - items: - description: "Specifies which `RUN` entries to add to the Dockerfile while\ - \ building it." - type: "string" - label: - oneOf: - - description: "Specifies which `LABEL` entries to add to the Dockerfile while\ - \ building it." - type: "string" - - type: "array" - items: - description: "Specifies which `LABEL` entries to add to the Dockerfile\ - \ while building it." - type: "string" - build_args: - oneOf: - - description: "Specifies which `ARG` entries to add to the Dockerfile while\ - \ building it." - type: "string" - - type: "array" - items: - description: "Specifies which `ARG` entries to add to the Dockerfile while\ - \ building it." - type: "string" - copy: - oneOf: - - description: "Specifies which `COPY` entries to add to the Dockerfile while\ - \ building it." - type: "string" - - type: "array" - items: - description: "Specifies which `COPY` entries to add to the Dockerfile\ - \ while building it." - type: "string" - type: - description: "Specify which Docker commands should be run during setup." - const: "docker" - add: - oneOf: - - description: "Specifies which `ADD` entries to add to the Dockerfile while\ - \ building it." - type: "string" - - type: "array" - items: - description: "Specifies which `ADD` entries to add to the Dockerfile while\ - \ building it." - type: "string" - env: - oneOf: - - description: "Specifies which `ENV` entries to add to the Dockerfile while\ - \ building it. Unlike `ARG`, `ENV` entries are also accessible from inside\ - \ the container." - type: "string" - - type: "array" - items: - description: "Specifies which `ENV` entries to add to the Dockerfile while\ - \ building it. Unlike `ARG`, `ENV` entries are also accessible from\ - \ inside the container." - type: "string" - required: - - "type" - additionalProperties: false - JavaScriptRequirements: - description: "Specify which JavaScript packages should be available in order to\ - \ run the component." - type: "object" - properties: - github: - oneOf: - - description: "Specifies which packages to install from GitHub." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from GitHub." - type: "string" - url: - oneOf: - - description: "Specifies which packages to install using a generic URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a generic URI." - type: "string" - git: - oneOf: - - description: "Specifies which packages to install using a Git URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a Git URI." - type: "string" - npm: - oneOf: - - description: "Specifies which packages to install from npm." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from npm." - type: "string" - type: - description: "Specify which JavaScript packages should be available in order\ - \ to run the component." - const: "javascript" - packages: - oneOf: - - description: "Specifies which packages to install from npm." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from npm." - type: "string" - required: - - "type" - additionalProperties: false - PythonRequirements: - description: "Specify which Python packages should be available in order to run\ - \ the component." - type: "object" - properties: - github: - oneOf: - - description: "Specifies which packages to install from GitHub." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from GitHub." - type: "string" - gitlab: - oneOf: - - description: "Specifies which packages to install from GitLab." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from GitLab." - type: "string" - pip: - oneOf: - - description: "Specifies which packages to install from pip." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from pip." - type: "string" - pypi: - oneOf: - - description: "Specifies which packages to install from PyPI using pip." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from PyPI using pip." - type: "string" - git: - oneOf: - - description: "Specifies which packages to install using a Git URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a Git URI." - type: "string" - upgrade: - description: "Sets the `--upgrade` flag when set to true. Default: true." - type: "boolean" - packages: - oneOf: - - description: "Specifies which packages to install from pip." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from pip." - type: "string" - url: - oneOf: - - description: "Specifies which packages to install using a generic URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a generic URI." - type: "string" - svn: - oneOf: - - description: "Specifies which packages to install using an SVN URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using an SVN URI." - type: "string" - bazaar: - oneOf: - - description: "Specifies which packages to install using a Bazaar URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a Bazaar URI." - type: "string" - script: - oneOf: - - description: "Specifies a code block to run as part of the build." - type: "string" - - type: "array" - items: - description: "Specifies a code block to run as part of the build." - type: "string" - type: - description: "Specify which Python packages should be available in order to\ - \ run the component." - const: "python" - mercurial: - oneOf: - - description: "Specifies which packages to install using a Mercurial URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a Mercurial URI." - type: "string" - user: - description: "Sets the `--user` flag when set to true. Default: false." - type: "boolean" - required: - - "type" - additionalProperties: false - RRequirements: - description: "Specify which R packages should be available in order to run the\ - \ component." - type: "object" - properties: - bioc: - oneOf: - - description: "Specifies which packages to install from BioConductor." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from BioConductor." - type: "string" - github: - oneOf: - - description: "Specifies which packages to install from GitHub." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from GitHub." - type: "string" - gitlab: - oneOf: - - description: "Specifies which packages to install from GitLab." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from GitLab." - type: "string" - url: - oneOf: - - description: "Specifies which packages to install using a generic URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a generic URI." - type: "string" - bioc_force_install: - description: "Forces packages specified in `bioc` to be reinstalled, even\ - \ if they are already present in the container. Default: false." - type: "boolean" - git: - oneOf: - - description: "Specifies which packages to install using a Git URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using a Git URI." - type: "string" - cran: - oneOf: - - description: "Specifies which packages to install from CRAN." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from CRAN." - type: "string" - bitbucket: - oneOf: - - description: "Specifies which packages to install from Bitbucket." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from Bitbucket." - type: "string" - svn: - oneOf: - - description: "Specifies which packages to install using an SVN URI." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install using an SVN URI." - type: "string" - packages: - oneOf: - - description: "Specifies which packages to install from CRAN." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install from CRAN." - type: "string" - script: - oneOf: - - description: "Specifies a code block to run as part of the build." - type: "string" - - type: "array" - items: - description: "Specifies a code block to run as part of the build." - type: "string" - type: - description: "Specify which R packages should be available in order to run\ - \ the component." - const: "r" - required: - - "type" - additionalProperties: false - RubyRequirements: - description: "Specify which Ruby packages should be available in order to run\ - \ the component." - type: "object" - properties: - type: - description: "Specify which Ruby packages should be available in order to\ - \ run the component." - const: "ruby" - packages: - oneOf: - - description: "Specifies which packages to install." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install." - type: "string" - required: - - "type" - additionalProperties: false - YumRequirements: - description: "Specify which yum packages should be available in order to run the\ - \ component." - type: "object" - properties: - type: - description: "Specify which yum packages should be available in order to run\ - \ the component." - const: "yum" - packages: - oneOf: - - description: "Specifies which packages to install." - type: "string" - - type: "array" - items: - description: "Specifies which packages to install." - type: "string" - required: - - "type" - additionalProperties: false - Argument: - oneOf: - - $ref: "#/definitions/BooleanArgument" - - $ref: "#/definitions/BooleanTrueArgument" - - $ref: "#/definitions/BooleanFalseArgument" - - $ref: "#/definitions/DoubleArgument" - - $ref: "#/definitions/FileArgument" - - $ref: "#/definitions/IntegerArgument" - - $ref: "#/definitions/LongArgument" - - $ref: "#/definitions/StringArgument" - BooleanArgument: - description: "A `boolean` type argument has two possible values: `true` or `false`." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--trim`, `-t`\ - \ or `trim`. The number of dashes determines how values can be passed: \ - \ \n\n - `--trim` is a long option, which can be passed with `executable_name\ - \ --trim`\n - `-t` is a short option, which can be passed with `executable_name\ - \ -t`\n - `trim` is an argument, which can be passed with `executable_name\ - \ trim` \n" - type: "string" - direction: - $ref: "#/definitions/Direction" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "boolean" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "boolean" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "boolean" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "boolean" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "A `boolean` type argument has two possible values: `true` or\ - \ `false`." - const: "boolean" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - BooleanTrueArgument: - description: "An argument of the `boolean_true` type acts like a `boolean` flag\ - \ with a default value of `false`. When called as an argument it sets the `boolean`\ - \ to `true`." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--silent`,\ - \ `-s` or `silent`. The number of dashes determines how values can be passed:\ - \ \n\n - `--silent` is a long option, which can be passed with `executable_name\ - \ --silent`\n - `-s` is a short option, which can be passed with `executable_name\ - \ -s`\n - `silent` is an argument, which can be passed with `executable_name\ - \ silent` \n" - type: "string" - direction: - $ref: "#/definitions/Direction" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - type: - description: "An argument of the `boolean_true` type acts like a `boolean`\ - \ flag with a default value of `false`. When called as an argument it sets\ - \ the `boolean` to `true`." - const: "boolean_true" - required: - - "name" - - "type" - additionalProperties: false - BooleanFalseArgument: - description: "An argument of the `boolean_false` type acts like an inverted `boolean`\ - \ flag with a default value of `true`. When called as an argument it sets the\ - \ `boolean` to `false`." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--no-log`,\ - \ `-n` or `no-log`. The number of dashes determines how values can be passed:\ - \ \n\n - `--no-log` is a long option, which can be passed with `executable_name\ - \ --no-log`\n - `-n` is a short option, which can be passed with `executable_name\ - \ -n`\n - `no-log` is an argument, which can be passed with `executable_name\ - \ no-log` \n" - type: "string" - direction: - $ref: "#/definitions/Direction" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - type: - description: "An argument of the `boolean_false` type acts like an inverted\ - \ `boolean` flag with a default value of `true`. When called as an argument\ - \ it sets the `boolean` to `false`." - const: "boolean_false" - required: - - "name" - - "type" - additionalProperties: false - DoubleArgument: - description: "A `double` type argument has a numeric value with decimal points" - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--foo`, `-f`\ - \ or `foo`. The number of dashes determines how values can be passed: \n\ - \n - `--foo` is a long option, which can be passed with `executable_name\ - \ --foo=value` or `executable_name --foo value`\n - `-f` is a short option,\ - \ which can be passed with `executable_name -f value`\n - `foo` is an argument,\ - \ which can be passed with `executable_name value` \n" - type: "string" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - max: - description: "Maximum allowed value for this argument. If set and the provided\ - \ value is higher than the maximum, an error will be produced. Can be combined\ - \ with [`min`](#min) to clamp values." - $ref: "#/definitions/DoubleWithInf" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - $ref: "#/definitions/DoubleWithInf" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - $ref: "#/definitions/DoubleWithInf" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - $ref: "#/definitions/DoubleWithInf" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - $ref: "#/definitions/DoubleWithInf" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - min: - description: "Minimum allowed value for this argument. If set and the provided\ - \ value is lower than the minimum, an error will be produced. Can be combined\ - \ with [`max`](#max) to clamp values." - $ref: "#/definitions/DoubleWithInf" - direction: - $ref: "#/definitions/Direction" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "A `double` type argument has a numeric value with decimal points" - const: "double" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - FileArgument: - description: "A `file` type argument has a string value that points to a file\ - \ or folder path." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--foo`, `-f`\ - \ or `foo`. The number of dashes determines how values can be passed: \n\ - \n - `--foo` is a long option, which can be passed with `executable_name\ - \ --foo=value` or `executable_name --foo value`\n - `-f` is a short option,\ - \ which can be passed with `executable_name -f value`\n - `foo` is an argument,\ - \ which can be passed with `executable_name value` \n" - type: "string" - create_parent: - description: "If the output filename is a path and it does not exist, create\ - \ it before executing the script (only for `direction: output`)." - type: "boolean" - direction: - description: "Makes this argument an `input` or an `output`, as in does the\ - \ file/folder needs to be read or written. `input` by default." - $ref: "#/definitions/Direction" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - must_exist: - description: "Checks whether the file or folder exists. For input files, this\ - \ check will happen before the execution of the script, while for output\ - \ files the check will happen afterwards." - type: "boolean" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "string" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "string" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "string" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "string" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "A `file` type argument has a string value that points to a file\ - \ or folder path." - const: "file" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - IntegerArgument: - description: "An `integer` type argument has a numeric value without decimal points." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--foo`, `-f`\ - \ or `foo`. The number of dashes determines how values can be passed: \n\ - \n - `--foo` is a long option, which can be passed with `executable_name\ - \ --foo=value` or `executable_name --foo value`\n - `-f` is a short option,\ - \ which can be passed with `executable_name -f value`\n - `foo` is an argument,\ - \ which can be passed with `executable_name value` \n" - type: "string" - choices: - description: "Limit the amount of valid values for this argument to those\ - \ set in this list. When set and a value not present in the list is provided,\ - \ an error will be produced." - type: "array" - items: - type: "integer" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - max: - description: "Maximum allowed value for this argument. If set and the provided\ - \ value is higher than the maximum, an error will be produced. Can be combined\ - \ with [`min`](#min) to clamp values." - type: "integer" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "integer" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "integer" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "integer" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "integer" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - min: - description: "Minimum allowed value for this argument. If set and the provided\ - \ value is lower than the minimum, an error will be produced. Can be combined\ - \ with [`max`](#max) to clamp values." - type: "integer" - direction: - $ref: "#/definitions/Direction" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "An `integer` type argument has a numeric value without decimal\ - \ points." - const: "integer" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - LongArgument: - description: "An `long` type argument has a numeric value without decimal points." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--foo`, `-f`\ - \ or `foo`. The number of dashes determines how values can be passed: \n\ - \n - `--foo` is a long option, which can be passed with `executable_name\ - \ --foo=value` or `executable_name --foo value`\n - `-f` is a short option,\ - \ which can be passed with `executable_name -f value`\n - `foo` is an argument,\ - \ which can be passed with `executable_name value` \n" - type: "string" - choices: - description: "Limit the amount of valid values for this argument to those\ - \ set in this list. When set and a value not present in the list is provided,\ - \ an error will be produced." - type: "array" - items: - type: "integer" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - max: - description: "Maximum allowed value for this argument. If set and the provided\ - \ value is higher than the maximum, an error will be produced. Can be combined\ - \ with [`min`](#min) to clamp values." - type: "integer" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "integer" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "integer" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "integer" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "integer" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - min: - description: "Minimum allowed value for this argument. If set and the provided\ - \ value is lower than the minimum, an error will be produced. Can be combined\ - \ with [`max`](#max) to clamp values." - type: "integer" - direction: - $ref: "#/definitions/Direction" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "An `long` type argument has a numeric value without decimal\ - \ points." - const: "long" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - StringArgument: - description: "A `string` type argument has a value made up of an ordered sequences\ - \ of characters, like \"Hello\" or \"I'm a string\"." - type: "object" - properties: - alternatives: - oneOf: - - description: "List of alternative format variations for this argument." - type: "string" - - type: "array" - items: - description: "List of alternative format variations for this argument." - type: "string" - name: - description: "The name of the argument. Can be in the formats `--foo`, `-f`\ - \ or `foo`. The number of dashes determines how values can be passed: \n\ - \n - `--foo` is a long option, which can be passed with `executable_name\ - \ --foo=value` or `executable_name --foo value`\n - `-f` is a short option,\ - \ which can be passed with `executable_name -f value`\n - `foo` is an argument,\ - \ which can be passed with `executable_name value` \n" - type: "string" - choices: - description: "Limit the amount of valid values for this argument to those\ - \ set in this list. When set and a value not present in the list is provided,\ - \ an error will be produced." - type: "array" - items: - type: "string" - direction: - $ref: "#/definitions/Direction" - info: - description: "Structured information. Can be any shape: a string, vector,\ - \ map or even nested map." - type: "object" - default: - oneOf: - - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "string" - - type: "array" - items: - description: "The default value when no argument value is provided. This\ - \ will not work if the [`required`](#required) property is enabled." - type: "string" - example: - oneOf: - - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "string" - - type: "array" - items: - description: "An example value for this argument. If no [`default`](#default)\ - \ property was specified, this will be used for that purpose." - type: "string" - description: - description: "A description of the argument. This will be displayed with `--help`." - type: "string" - multiple_sep: - description: "The delimiter character for providing [`multiple`](#multiple)\ - \ values. `:` by default." - type: "string" - multiple: - description: "Treat the argument value as an array. Arrays can be passed using\ - \ the delimiter `--foo=1:2:3` or by providing the same argument multiple\ - \ times `--foo 1 --foo 2`. You can use a custom delimiter by using the [`multiple_sep`](#multiple_sep)\ - \ property. `false` by default." - type: "boolean" - type: - description: "A `string` type argument has a value made up of an ordered sequences\ - \ of characters, like \"Hello\" or \"I'm a string\"." - const: "string" - required: - description: "Make the value for this argument required. If set to `true`,\ - \ an error will be produced if no value was provided. `false` by default." - type: "boolean" - required: - - "name" - - "type" - additionalProperties: false - Resource: - oneOf: - - $ref: "#/definitions/BashScript" - - $ref: "#/definitions/CSharpScript" - - $ref: "#/definitions/Executable" - - $ref: "#/definitions/JavaScriptScript" - - $ref: "#/definitions/NextflowScript" - - $ref: "#/definitions/PlainFile" - - $ref: "#/definitions/PythonScript" - - $ref: "#/definitions/RScript" - - $ref: "#/definitions/ScalaScript" - BashScript: - description: "An executable Bash script.\nWhen defined in resources, only the\ - \ first entry will be executed when running the built component or when running\ - \ `viash run`.\nWhen defined in test_resources, all entries will be executed\ - \ during `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable Bash script.\nWhen defined in resources, only\ - \ the first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will\ - \ be executed during `viash test`." - const: "bash_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - CSharpScript: - description: "An executable C# script.\nWhen defined in resources, only the first\ - \ entry will be executed when running the built component or when running `viash\ - \ run`.\nWhen defined in test_resources, all entries will be executed during\ - \ `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable C# script.\nWhen defined in resources, only the\ - \ first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will\ - \ be executed during `viash test`." - const: "csharp_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - Executable: - description: "An executable file." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable file." - const: "executable" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - JavaScriptScript: - description: "An executable JavaScript script.\nWhen defined in resources, only\ - \ the first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will be\ - \ executed during `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable JavaScript script.\nWhen defined in resources,\ - \ only the first entry will be executed when running the built component\ - \ or when running `viash run`.\nWhen defined in test_resources, all entries\ - \ will be executed during `viash test`." - const: "javascript_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - NextflowScript: - description: "A Nextflow script. Work in progress; added mainly for annotation\ - \ at the moment." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - entrypoint: - description: "The name of the workflow to be wrapped." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "A Nextflow script. Work in progress; added mainly for annotation\ - \ at the moment." - const: "nextflow_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "entrypoint" - - "type" - additionalProperties: false - PlainFile: - description: "A plain file. This can only be used as a supporting resource for\ - \ the main script or unit tests." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "A plain file. This can only be used as a supporting resource\ - \ for the main script or unit tests." - const: "file" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: [] - additionalProperties: false - PythonScript: - description: "An executable Python script.\nWhen defined in resources, only the\ - \ first entry will be executed when running the built component or when running\ - \ `viash run`.\nWhen defined in test_resources, all entries will be executed\ - \ during `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable Python script.\nWhen defined in resources, only\ - \ the first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will\ - \ be executed during `viash test`." - const: "python_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - RScript: - description: "An executable R script.\nWhen defined in resources, only the first\ - \ entry will be executed when running the built component or when running `viash\ - \ run`.\nWhen defined in test_resources, all entries will be executed during\ - \ `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable R script.\nWhen defined in resources, only the\ - \ first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will\ - \ be executed during `viash test`." - const: "r_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - ScalaScript: - description: "An executable Scala script.\nWhen defined in resources, only the\ - \ first entry will be executed when running the built component or when running\ - \ `viash run`.\nWhen defined in test_resources, all entries will be executed\ - \ during `viash test`." - type: "object" - properties: - path: - description: "The path of the input file. Can be a relative or an absolute\ - \ path, or a URI. Mutually exclusive with `text`." - type: "string" - text: - description: "The content of the resulting file specified as a string. Mutually\ - \ exclusive with `path`." - type: "string" - is_executable: - description: "Whether the resulting resource file should be made executable." - type: "boolean" - type: - description: "An executable Scala script.\nWhen defined in resources, only\ - \ the first entry will be executed when running the built component or when\ - \ running `viash run`.\nWhen defined in test_resources, all entries will\ - \ be executed during `viash test`." - const: "scala_script" - dest: - description: "Resulting filename of the resource. From within a script, the\ - \ file can be accessed at `meta[\"resources_dir\"] + \"/\" + dest`. If unspecified,\ - \ `dest` will be set to the basename of the `path` parameter." - type: "string" - required: - - "type" - additionalProperties: false - NextflowDirectives: - description: "Directives are optional settings that affect the execution of the\ - \ process.\n" - type: "object" - properties: - beforeScript: - description: "The `beforeScript` directive allows you to execute a custom\ - \ (Bash) snippet before the main process script is run. This may be useful\ - \ to initialise the underlying cluster environment or for other custom initialisation.\n\ - \nSee [`beforeScript`](https://www.nextflow.io/docs/latest/process.html#beforeScript).\n" - type: "string" - module: - oneOf: - - description: "Environment Modules is a package manager that allows you to\ - \ dynamically configure your execution environment and easily switch between\ - \ multiple versions of the same software tool.\n\nIf it is available in\ - \ your system you can use it with Nextflow in order to configure the processes\ - \ execution environment in your pipeline.\n\nIn a process definition you\ - \ can use the `module` directive to load a specific module version to\ - \ be used in the process execution environment.\n\nSee [`module`](https://www.nextflow.io/docs/latest/process.html#module).\n" - type: "string" - - type: "array" - items: - description: "Environment Modules is a package manager that allows you\ - \ to dynamically configure your execution environment and easily switch\ - \ between multiple versions of the same software tool.\n\nIf it is available\ - \ in your system you can use it with Nextflow in order to configure\ - \ the processes execution environment in your pipeline.\n\nIn a process\ - \ definition you can use the `module` directive to load a specific module\ - \ version to be used in the process execution environment.\n\nSee [`module`](https://www.nextflow.io/docs/latest/process.html#module).\n" - type: "string" - queue: - oneOf: - - description: "The `queue` directory allows you to set the queue where jobs\ - \ are scheduled when using a grid based executor in your pipeline.\n\n\ - See [`queue`](https://www.nextflow.io/docs/latest/process.html#queue).\n" - type: "string" - - type: "array" - items: - description: "The `queue` directory allows you to set the queue where\ - \ jobs are scheduled when using a grid based executor in your pipeline.\n\ - \nSee [`queue`](https://www.nextflow.io/docs/latest/process.html#queue).\n" - type: "string" - label: - oneOf: - - description: "The `label` directive allows the annotation of processes with\ - \ mnemonic identifier of your choice.\n\nSee [`label`](https://www.nextflow.io/docs/latest/process.html#label).\n" - type: "string" - - type: "array" - items: - description: "The `label` directive allows the annotation of processes\ - \ with mnemonic identifier of your choice.\n\nSee [`label`](https://www.nextflow.io/docs/latest/process.html#label).\n" - type: "string" - container: - oneOf: - - description: "The `container` directive allows you to execute the process\ - \ script in a Docker container.\n\nIt requires the Docker daemon to be\ - \ running in machine where the pipeline is executed, i.e. the local machine\ - \ when using the local executor or the cluster nodes when the pipeline\ - \ is deployed through a grid executor.\n\nViash implements allows either\ - \ a string value or a map. In case a map is used, the allowed keys are:\ - \ `registry`, `image`, and `tag`. The `image` value must be specified.\n\ - \nSee [`container`](https://www.nextflow.io/docs/latest/process.html#container).\n" - type: "object" - additionalProperties: - type: "string" - - description: "The `container` directive allows you to execute the process\ - \ script in a Docker container.\n\nIt requires the Docker daemon to be\ - \ running in machine where the pipeline is executed, i.e. the local machine\ - \ when using the local executor or the cluster nodes when the pipeline\ - \ is deployed through a grid executor.\n\nViash implements allows either\ - \ a string value or a map. In case a map is used, the allowed keys are:\ - \ `registry`, `image`, and `tag`. The `image` value must be specified.\n\ - \nSee [`container`](https://www.nextflow.io/docs/latest/process.html#container).\n" - type: "string" - publishDir: - oneOf: - - oneOf: - - description: "The `publishDir` directive allows you to publish the process\ - \ output files to a specified folder.\n\nViash implements this directive\ - \ as a plain string or a map. The allowed keywords for the map are:\ - \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The `path`\ - \ key and value are required.\nThe allowed values for `mode` are: `symlink`,\ - \ `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n" - type: "string" - - description: "The `publishDir` directive allows you to publish the process\ - \ output files to a specified folder.\n\nViash implements this directive\ - \ as a plain string or a map. The allowed keywords for the map are:\ - \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The `path`\ - \ key and value are required.\nThe allowed values for `mode` are: `symlink`,\ - \ `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n" - type: "object" - additionalProperties: - type: "string" - - type: "array" - items: - oneOf: - - description: "The `publishDir` directive allows you to publish the process\ - \ output files to a specified folder.\n\nViash implements this directive\ - \ as a plain string or a map. The allowed keywords for the map are:\ - \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The\ - \ `path` key and value are required.\nThe allowed values for `mode`\ - \ are: `symlink`, `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\ - \nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n" - type: "string" - - description: "The `publishDir` directive allows you to publish the process\ - \ output files to a specified folder.\n\nViash implements this directive\ - \ as a plain string or a map. The allowed keywords for the map are:\ - \ `path`, `mode`, `overwrite`, `pattern`, `saveAs`, `enabled`. The\ - \ `path` key and value are required.\nThe allowed values for `mode`\ - \ are: `symlink`, `rellink`, `link`, `copy`, `copyNoFollow`, `move`.\n\ - \nSee [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir).\n" - type: "object" - additionalProperties: - type: "string" - maxForks: - oneOf: - - description: "The `maxForks` directive allows you to define the maximum\ - \ number of process instances that can be executed in parallel. By default\ - \ this value is equals to the number of CPU cores available minus 1.\n\ - \nIf you want to execute a process in a sequential manner, set this directive\ - \ to one.\n\nSee [`maxForks`](https://www.nextflow.io/docs/latest/process.html#maxforks).\n" - type: "string" - - description: "The `maxForks` directive allows you to define the maximum\ - \ number of process instances that can be executed in parallel. By default\ - \ this value is equals to the number of CPU cores available minus 1.\n\ - \nIf you want to execute a process in a sequential manner, set this directive\ - \ to one.\n\nSee [`maxForks`](https://www.nextflow.io/docs/latest/process.html#maxforks).\n" - type: "integer" - maxErrors: - oneOf: - - description: "The `maxErrors` directive allows you to specify the maximum\ - \ number of times a process can fail when using the `retry` error strategy.\ - \ By default this directive is disabled.\n\nSee [`maxErrors`](https://www.nextflow.io/docs/latest/process.html#maxerrors).\n" - type: "string" - - description: "The `maxErrors` directive allows you to specify the maximum\ - \ number of times a process can fail when using the `retry` error strategy.\ - \ By default this directive is disabled.\n\nSee [`maxErrors`](https://www.nextflow.io/docs/latest/process.html#maxerrors).\n" - type: "integer" - cpus: - oneOf: - - description: "The `cpus` directive allows you to define the number of (logical)\ - \ CPU required by the process' task.\n\nSee [`cpus`](https://www.nextflow.io/docs/latest/process.html#cpus).\n" - type: "integer" - - description: "The `cpus` directive allows you to define the number of (logical)\ - \ CPU required by the process' task.\n\nSee [`cpus`](https://www.nextflow.io/docs/latest/process.html#cpus).\n" - type: "string" - accelerator: - description: "The `accelerator` directive allows you to specify the hardware\ - \ accelerator requirement for the task execution e.g. GPU processor.\n\n\ - Viash implements this directive as a map with accepted keywords: `type`,\ - \ `limit`, `request`, and `runtime`.\n\nSee [`accelerator`](https://www.nextflow.io/docs/latest/process.html#accelerator).\n" - type: "object" - additionalProperties: - type: "string" - time: - description: "The `time` directive allows you to define how long a process\ - \ is allowed to run.\n\nSee [`time`](https://www.nextflow.io/docs/latest/process.html#time).\n" - type: "string" - afterScript: - description: "The `afterScript` directive allows you to execute a custom (Bash)\ - \ snippet immediately after the main process has run. This may be useful\ - \ to clean up your staging area.\n\nSee [`afterScript`](https://www.nextflow.io/docs/latest/process.html#afterscript).\n" - type: "string" - executor: - description: "The `executor` defines the underlying system where processes\ - \ are executed. By default a process uses the executor defined globally\ - \ in the nextflow.config file.\n\nThe `executor` directive allows you to\ - \ configure what executor has to be used by the process, overriding the\ - \ default configuration. The following values can be used:\n\n| Name | Executor\ - \ |\n|------|----------|\n| awsbatch | The process is executed using the\ - \ AWS Batch service. | \n| azurebatch | The process is executed using the\ - \ Azure Batch service. | \n| condor | The process is executed using the\ - \ HTCondor job scheduler. | \n| google-lifesciences | The process is executed\ - \ using the Google Genomics Pipelines service. | \n| ignite | The process\ - \ is executed using the Apache Ignite cluster. | \n| k8s | The process is\ - \ executed using the Kubernetes cluster. | \n| local | The process is executed\ - \ in the computer where Nextflow is launched. | \n| lsf | The process is\ - \ executed using the Platform LSF job scheduler. | \n| moab | The process\ - \ is executed using the Moab job scheduler. | \n| nqsii | The process is\ - \ executed using the NQSII job scheduler. | \n| oge | Alias for the sge\ - \ executor. | \n| pbs | The process is executed using the PBS/Torque job\ - \ scheduler. | \n| pbspro | The process is executed using the PBS Pro job\ - \ scheduler. | \n| sge | The process is executed using the Sun Grid Engine\ - \ / Open Grid Engine. | \n| slurm | The process is executed using the SLURM\ - \ job scheduler. | \n| tes | The process is executed using the GA4GH TES\ - \ service. | \n| uge | Alias for the sge executor. |\n\nSee [`executor`](https://www.nextflow.io/docs/latest/process.html#executor).\n" - type: "string" - containerOptions: - oneOf: - - description: "The `containerOptions` directive allows you to specify any\ - \ container execution option supported by the underlying container engine\ - \ (ie. Docker, Singularity, etc). This can be useful to provide container\ - \ settings only for a specific process e.g. mount a custom path.\n\nSee\ - \ [`containerOptions`](https://www.nextflow.io/docs/latest/process.html#containeroptions).\n" - type: "string" - - type: "array" - items: - description: "The `containerOptions` directive allows you to specify any\ - \ container execution option supported by the underlying container engine\ - \ (ie. Docker, Singularity, etc). This can be useful to provide container\ - \ settings only for a specific process e.g. mount a custom path.\n\n\ - See [`containerOptions`](https://www.nextflow.io/docs/latest/process.html#containeroptions).\n" - type: "string" - disk: - description: "The `disk` directive allows you to define how much local disk\ - \ storage the process is allowed to use.\n\nSee [`disk`](https://www.nextflow.io/docs/latest/process.html#disk).\n" - type: "string" - tag: - description: "The `tag` directive allows you to associate each process execution\ - \ with a custom label, so that it will be easier to identify them in the\ - \ log file or in the trace execution report.\n\nFor ease of use, the default\ - \ tag is set to \"$id\", which allows tracking the progression of the channel\ - \ events through the workflow more easily.\n\nSee [`tag`](https://www.nextflow.io/docs/latest/process.html#tag).\n" - type: "string" - conda: - oneOf: - - description: "The `conda` directive allows for the definition of the process\ - \ dependencies using the Conda package manager.\n\nNextflow automatically\ - \ sets up an environment for the given package names listed by in the\ - \ `conda` directive.\n\nSee [`conda`](https://www.nextflow.io/docs/latest/process.html#conda).\n" - type: "string" - - type: "array" - items: - description: "The `conda` directive allows for the definition of the process\ - \ dependencies using the Conda package manager.\n\nNextflow automatically\ - \ sets up an environment for the given package names listed by in the\ - \ `conda` directive.\n\nSee [`conda`](https://www.nextflow.io/docs/latest/process.html#conda).\n" - type: "string" - machineType: - description: " The `machineType` can be used to specify a predefined Google\ - \ Compute Platform machine type when running using the Google Life Sciences\ - \ executor.\n\nSee [`machineType`](https://www.nextflow.io/docs/latest/process.html#machinetype).\n" - type: "string" - stageInMode: - description: "The `stageInMode` directive defines how input files are staged-in\ - \ to the process work directory. The following values are allowed:\n\n|\ - \ Value | Description |\n|-------|-------------| \n| copy | Input files\ - \ are staged in the process work directory by creating a copy. | \n| link\ - \ | Input files are staged in the process work directory by creating an\ - \ (hard) link for each of them. | \n| symlink | Input files are staged in\ - \ the process work directory by creating a symbolic link with an absolute\ - \ path for each of them (default). | \n| rellink | Input files are staged\ - \ in the process work directory by creating a symbolic link with a relative\ - \ path for each of them. | \n\nSee [`stageInMode`](https://www.nextflow.io/docs/latest/process.html#stageinmode).\n" - type: "string" - cache: - oneOf: - - description: "The `cache` directive allows you to store the process results\ - \ to a local cache. When the cache is enabled and the pipeline is launched\ - \ with the resume option, any following attempt to execute the process,\ - \ along with the same inputs, will cause the process execution to be skipped,\ - \ producing the stored data as the actual results.\n\nThe caching feature\ - \ generates a unique key by indexing the process script and inputs. This\ - \ key is used to identify univocally the outputs produced by the process\ - \ execution.\n\nThe `cache` is enabled by default, you can disable it\ - \ for a specific process by setting the cache directive to `false`.\n\n\ - Accepted values are: `true`, `false`, \"deep\", and \"lenient\".\n\nSee\ - \ [`cache`](https://www.nextflow.io/docs/latest/process.html#cache).\n" - type: "boolean" - - description: "The `cache` directive allows you to store the process results\ - \ to a local cache. When the cache is enabled and the pipeline is launched\ - \ with the resume option, any following attempt to execute the process,\ - \ along with the same inputs, will cause the process execution to be skipped,\ - \ producing the stored data as the actual results.\n\nThe caching feature\ - \ generates a unique key by indexing the process script and inputs. This\ - \ key is used to identify univocally the outputs produced by the process\ - \ execution.\n\nThe `cache` is enabled by default, you can disable it\ - \ for a specific process by setting the cache directive to `false`.\n\n\ - Accepted values are: `true`, `false`, \"deep\", and \"lenient\".\n\nSee\ - \ [`cache`](https://www.nextflow.io/docs/latest/process.html#cache).\n" - type: "string" - pod: - oneOf: - - description: "The `pod` directive allows the definition of pods specific\ - \ settings, such as environment variables, secrets and config maps when\ - \ using the Kubernetes executor.\n\nSee [`pod`](https://www.nextflow.io/docs/latest/process.html#pod).\n" - type: "object" - additionalProperties: - type: "string" - - type: "array" - items: - description: "The `pod` directive allows the definition of pods specific\ - \ settings, such as environment variables, secrets and config maps when\ - \ using the Kubernetes executor.\n\nSee [`pod`](https://www.nextflow.io/docs/latest/process.html#pod).\n" - type: "object" - additionalProperties: - type: "string" - penv: - description: "The `penv` directive allows you to define the parallel environment\ - \ to be used when submitting a parallel task to the SGE resource manager.\n\ - \nSee [`penv`](https://www.nextflow.io/docs/latest/process.html#penv).\n" - type: "string" - scratch: - oneOf: - - description: "The `scratch` directive allows you to execute the process\ - \ in a temporary folder that is local to the execution node.\n\nSee [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch).\n" - type: "boolean" - - description: "The `scratch` directive allows you to execute the process\ - \ in a temporary folder that is local to the execution node.\n\nSee [`scratch`](https://www.nextflow.io/docs/latest/process.html#scratch).\n" - type: "string" - storeDir: - description: "The `storeDir` directive allows you to define a directory that\ - \ is used as a permanent cache for your process results.\n\nSee [`storeDir`](https://www.nextflow.io/docs/latest/process.html#storeDir).\n" - type: "string" - maxRetries: - oneOf: - - description: "The `maxRetries` directive allows you to define the maximum\ - \ number of times a process instance can be re-submitted in case of failure.\ - \ This value is applied only when using the retry error strategy. By default\ - \ only one retry is allowed.\n\nSee [`maxRetries`](https://www.nextflow.io/docs/latest/process.html#maxretries).\n" - type: "string" - - description: "The `maxRetries` directive allows you to define the maximum\ - \ number of times a process instance can be re-submitted in case of failure.\ - \ This value is applied only when using the retry error strategy. By default\ - \ only one retry is allowed.\n\nSee [`maxRetries`](https://www.nextflow.io/docs/latest/process.html#maxretries).\n" - type: "integer" - echo: - oneOf: - - description: "By default the stdout produced by the commands executed in\ - \ all processes is ignored. By setting the `echo` directive to true, you\ - \ can forward the process stdout to the current top running process stdout\ - \ file, showing it in the shell terminal.\n \nSee [`echo`](https://www.nextflow.io/docs/latest/process.html#echo).\n" - type: "boolean" - - description: "By default the stdout produced by the commands executed in\ - \ all processes is ignored. By setting the `echo` directive to true, you\ - \ can forward the process stdout to the current top running process stdout\ - \ file, showing it in the shell terminal.\n \nSee [`echo`](https://www.nextflow.io/docs/latest/process.html#echo).\n" - type: "string" - errorStrategy: - description: "The `errorStrategy` directive allows you to define how an error\ - \ condition is managed by the process. By default when an error status is\ - \ returned by the executed script, the process stops immediately. This in\ - \ turn forces the entire pipeline to terminate.\n\nTable of available error\ - \ strategies:\n| Name | Executor |\n|------|----------|\n| `terminate` |\ - \ Terminates the execution as soon as an error condition is reported. Pending\ - \ jobs are killed (default) |\n| `finish` | Initiates an orderly pipeline\ - \ shutdown when an error condition is raised, waiting the completion of\ - \ any submitted job. |\n| `ignore` | Ignores processes execution errors.\ - \ |\n| `retry` | Re-submit for execution a process returning an error condition.\ - \ |\n\nSee [`errorStrategy`](https://www.nextflow.io/docs/latest/process.html#errorstrategy).\n" - type: "string" - memory: - description: "The `memory` directive allows you to define how much memory\ - \ the process is allowed to use.\n\nSee [`memory`](https://www.nextflow.io/docs/latest/process.html#memory).\n" - type: "string" - stageOutMode: - description: "The `stageOutMode` directive defines how output files are staged-out\ - \ from the scratch directory to the process work directory. The following\ - \ values are allowed:\n\n| Value | Description |\n|-------|-------------|\ - \ \n| copy | Output files are copied from the scratch directory to the work\ - \ directory. | \n| move | Output files are moved from the scratch directory\ - \ to the work directory. | \n| rsync | Output files are copied from the\ - \ scratch directory to the work directory by using the rsync utility. |\n\ - \nSee [`stageOutMode`](https://www.nextflow.io/docs/latest/process.html#stageoutmode).\n" - type: "string" - required: [] - additionalProperties: false - NextflowAuto: - description: "Automated processing flags which can be toggled on or off." - type: "object" - properties: - simplifyInput: - description: "If `true`, an input tuple only containing only a single File\ - \ (e.g. `[\"foo\", file(\"in.h5ad\")]`) is automatically transformed to\ - \ a map (i.e. `[\"foo\", [ input: file(\"in.h5ad\") ] ]`).\n\nDefault: `true`.\n" - type: "boolean" - simplifyOutput: - description: "If `true`, an output tuple containing a map with a File (e.g.\ - \ `[\"foo\", [ output: file(\"out.h5ad\") ] ]`) is automatically transformed\ - \ to a map (i.e. `[\"foo\", file(\"out.h5ad\")]`).\n\nDefault: `false`.\n" - type: "boolean" - publish: - oneOf: - - description: "If `true`, the module's outputs are automatically published\ - \ to `params.publishDir`.\nIf equal to \"state\", also a `.state.yaml`\ - \ file will be published in the publish dir.\nWill throw an error if `params.publishDir`\ - \ is not defined.\n\nDefault: `false`.\n" - type: "boolean" - - description: "If `true`, the module's outputs are automatically published\ - \ to `params.publishDir`.\nIf equal to \"state\", also a `.state.yaml`\ - \ file will be published in the publish dir.\nWill throw an error if `params.publishDir`\ - \ is not defined.\n\nDefault: `false`.\n" - type: "string" - transcript: - description: "If `true`, the module's transcripts from `work/` are automatically\ - \ published to `params.transcriptDir`.\nIf not defined, `params.publishDir\ - \ + \"/_transcripts\"` will be used.\nWill throw an error if neither are\ - \ defined.\n\nDefault: `false`.\n" - type: "boolean" - required: [] - additionalProperties: false - NextflowConfig: - description: "Allows tweaking how the Nextflow Config file is generated." - type: "object" - properties: - labels: - description: "A series of default labels to specify memory and cpu constraints.\n\ - \nThe default memory labels are defined as \"mem1gb\", \"mem2gb\", \"mem4gb\"\ - , ... upto \"mem512tb\" and follows powers of 2.\nThe default cpu labels\ - \ are defined as \"cpu1\", \"cpu2\", \"cpu5\", \"cpu10\", ... upto \"cpu1000\"\ - \ and follows a semi logarithmic scale (1, 2, 5 per decade).\n\nConceptually\ - \ it is possible for a Viash Config to overwrite the full labels parameter,\ - \ however likely it is more efficient to add additional labels\nin the Viash\ - \ Package with a config mod.\n" - type: "object" - additionalProperties: - type: "string" - script: - oneOf: - - description: "Includes a single string or list of strings into the nextflow.config\ - \ file.\nThis can be used to add custom profiles or include an additional\ - \ config file.\n" - type: "string" - - type: "array" - items: - description: "Includes a single string or list of strings into the nextflow.config\ - \ file.\nThis can be used to add custom profiles or include an additional\ - \ config file.\n" - type: "string" - required: [] - additionalProperties: false - Dependency: - description: "Specifies a Viash component (script or executable) that should be\ - \ made available for the code defined in the component.\nThe dependency components\ - \ are collected and copied to the output folder during the Viash build step.\n" - type: "object" - properties: - name: - description: "The full name of the dependency component. This should include\ - \ the namespace." - type: "string" - repository: - oneOf: - - description: "Specifies the repository location where the dependency component\ - \ can be found.\nThis must either be a full definition of the repository\ - \ or the name of a repository referenced as it is defined under repositories.\n\ - Additionally, the full definition can be specified as a single string\ - \ where all parameters such as repository type, url, branch or tag are\ - \ specified.\nOmitting the value sets the dependency as a local dependency,\ - \ ie. the dependency is available in the same namespace as the component.\n" - type: "string" - - description: "Specifies the repository location where the dependency component\ - \ can be found.\nThis must either be a full definition of the repository\ - \ or the name of a repository referenced as it is defined under repositories.\n\ - Additionally, the full definition can be specified as a single string\ - \ where all parameters such as repository type, url, branch or tag are\ - \ specified.\nOmitting the value sets the dependency as a local dependency,\ - \ ie. the dependency is available in the same namespace as the component.\n" - $ref: "#/definitions/Repository" - alias: - description: "An alternative name for the dependency component. This can include\ - \ a namespace if so needed." - type: "string" - required: - - "name" - additionalProperties: false - Repository: - oneOf: - - $ref: "#/definitions/LocalRepository" - - $ref: "#/definitions/GitRepository" - - $ref: "#/definitions/GithubRepository" - - $ref: "#/definitions/ViashhubRepository" - LocalRepository: - description: "Defines a locally present and available repository.\nThis can be\ - \ used to define components from the same code base as the current component.\n\ - Alternatively, this can be used to refer to a code repository present on the\ - \ local hard-drive instead of fetchable remotely, for example during development.\n" - type: "object" - properties: - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - type: - description: "Defines a locally present and available repository.\nThis can\ - \ be used to define components from the same code base as the current component.\n\ - Alternatively, this can be used to refer to a code repository present on\ - \ the local hard-drive instead of fetchable remotely, for example during\ - \ development.\n" - const: "local" - required: - - "type" - additionalProperties: false - GitRepository: - description: "A Git repository where remote dependency components can be found." - type: "object" - properties: - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - uri: - description: "The URI of the Git repository." - type: "string" - type: - description: "A Git repository where remote dependency components can be found." - const: "git" - required: - - "uri" - - "type" - additionalProperties: false - GithubRepository: - description: "A GitHub repository where remote dependency components can be found." - type: "object" - properties: - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - repo: - description: "The name of the GitHub repository." - type: "string" - type: - description: "A GitHub repository where remote dependency components can be\ - \ found." - const: "github" - required: - - "repo" - - "type" - additionalProperties: false - ViashhubRepository: - description: "A Viash-Hub repository where remote dependency components can be\ - \ found." - type: "object" - properties: - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - repo: - description: "The name of the Viash-Hub repository." - type: "string" - type: - description: "A Viash-Hub repository where remote dependency components can\ - \ be found." - const: "viashhub" - required: - - "repo" - - "type" - additionalProperties: false - RepositoryWithName: - oneOf: - - $ref: "#/definitions/LocalRepositoryWithName" - - $ref: "#/definitions/GitRepositoryWithName" - - $ref: "#/definitions/GithubRepositoryWithName" - - $ref: "#/definitions/ViashhubRepositoryWithName" - LocalRepositoryWithName: - description: "Defines a locally present and available repository.\nThis can be\ - \ used to define components from the same code base as the current component.\n\ - Alternatively, this can be used to refer to a code repository present on the\ - \ local hard-drive instead of fetchable remotely, for example during development.\n" - type: "object" - properties: - name: - description: "The identifier used to refer to this repository from dependencies." - type: "string" - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - type: - description: "Defines a locally present and available repository.\nThis can\ - \ be used to define components from the same code base as the current component.\n\ - Alternatively, this can be used to refer to a code repository present on\ - \ the local hard-drive instead of fetchable remotely, for example during\ - \ development.\n" - const: "localwithname" - required: - - "name" - - "type" - additionalProperties: false - GitRepositoryWithName: - description: "A Git repository where remote dependency components can be found." - type: "object" - properties: - name: - description: "The identifier used to refer to this repository from dependencies." - type: "string" - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - uri: - description: "The URI of the Git repository." - type: "string" - type: - description: "A Git repository where remote dependency components can be found." - const: "gitwithname" - required: - - "name" - - "uri" - - "type" - additionalProperties: false - GithubRepositoryWithName: - description: "A GitHub repository where remote dependency components can be found." - type: "object" - properties: - name: - description: "The identifier used to refer to this repository from dependencies." - type: "string" - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - repo: - description: "The name of the GitHub repository." - type: "string" - type: - description: "A GitHub repository where remote dependency components can be\ - \ found." - const: "githubwithname" - required: - - "name" - - "repo" - - "type" - additionalProperties: false - ViashhubRepositoryWithName: - description: "A Viash-Hub repository where remote dependency components can be\ - \ found." - type: "object" - properties: - name: - description: "The identifier used to refer to this repository from dependencies." - type: "string" - path: - description: "Defines a subfolder of the repository to use as base to look\ - \ for the dependency components." - type: "string" - tag: - description: "Defines which version of the dependency component to use. Typically\ - \ this can be a specific tag, branch or commit hash." - type: "string" - repo: - description: "The name of the Viash-Hub repository." - type: "string" - type: - description: "A Viash-Hub repository where remote dependency components can\ - \ be found." - const: "viashhubwithname" - required: - - "name" - - "repo" - - "type" - additionalProperties: false - DockerSetupStrategy: - enum: - - "cb" - - "ifneedbepullelsecachedbuild" - - "donothing" - - "gentlepush" - - "alwayspullelsebuild" - - "build" - - "alwayspull" - - "alwaysbuild" - - "ifneedbebuild" - - "pullelsebuild" - - "p" - - "alwayspullelsecachedbuild" - - "pull" - - "maybepush" - - "ifneedbepullelsebuild" - - "cachedbuild" - - "pullelsecachedbuild" - - "push" - - "forcepush" - - "alwayspush" - - "b" - - "pushifnotpresent" - - "alwayscachedbuild" - - "meh" - - "ifneedbepull" - - "ifneedbecachedbuild" - $comment: "TODO add descriptions to different strategies" - description: "The Docker setup strategy to use when building a container." - Direction: - enum: - - "input" - - "output" - description: "Makes this argument an `input` or an `output`, as in does the file/folder\ - \ needs to be read or written. `input` by default." - Status: - enum: - - "enabled" - - "disabled" - - "deprecated" - description: "Allows setting a component to active, deprecated or disabled." - DockerResolveVolume: - enum: - - "manual" - - "automatic" - - "auto" - - "Manual" - - "Automatic" - - "Auto" - $comment: "TODO make fully case insensitive" - description: "Enables or disables automatic volume mapping. Enabled when set to\ - \ `Automatic` or disabled when set to `Manual`. Default: `Automatic`" - DoubleStrings: - enum: - - "+.inf" - - "+inf" - - "+infinity" - - "positiveinfinity" - - "positiveinf" - - "-.inf" - - "-inf" - - "-infinity" - - "negativeinfinity" - - "negativeinf" - - ".nan" - - "nan" - DoubleWithInf: - oneOf: - - type: "number" - - $ref: "#/definitions/DoubleStrings" -oneOf: -- $ref: "#/definitions/Config" From f71ed871a8f76a8c4848e1c0efe5bb9904f282eb Mon Sep 17 00:00:00 2001 From: Dorien <41797896+dorien-er@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:33:33 +0200 Subject: [PATCH 02/25] update multiple separator (#81) * update multiple separator * update changelog * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * update ifs --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 ++ src/multiqc/config.vsh.yaml | 12 ++++-------- src/multiqc/script.sh | 8 ++++---- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fc960f4..80b8b9f3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ * `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`. +* `multiqc`: update multiple separator to `;` (PR #81). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1`. diff --git a/src/multiqc/config.vsh.yaml b/src/multiqc/config.vsh.yaml index 0a3a784b..df5e38e1 100644 --- a/src/multiqc/config.vsh.yaml +++ b/src/multiqc/config.vsh.yaml @@ -54,25 +54,21 @@ argument_groups: - name: "--include_modules" type: string multiple: true - multiple_sep: "," - example: fastqc,cutadapt + example: [fastqc, cutadapt] description: Use only these module - name: "--exclude_modules" type: string multiple: true - multiple_sep: "," - example: fastqc,cutadapt + example: [fastqc, cutadapt] description: Do not use only these modules - name: "--ignore_analysis" type: string multiple: true - multiple_sep: "," - example: run_one/*,run_two/* + example: [run_one/*, run_two/*] - name: "--ignore_samples" type: string multiple: true - multiple_sep: "," - example: sample_1*,sample_3* + example: [sample_1*, sample_3*] - name: "--ignore_symlinks" type: boolean_true description: Ignore symlinked directories and files diff --git a/src/multiqc/script.sh b/src/multiqc/script.sh index 6353eb11..ad8c1c0c 100755 --- a/src/multiqc/script.sh +++ b/src/multiqc/script.sh @@ -38,7 +38,7 @@ IFS=";" read -ra inputs <<< $par_input if [[ -n "$par_include_modules" ]]; then include_modules="" - IFS="," read -ra incl_modules <<< $par_include_modules + IFS=";" read -ra incl_modules <<< $par_include_modules for i in "${incl_modules[@]}"; do include_modules+="--include $i " done @@ -47,7 +47,7 @@ fi if [[ -n "$par_exclude_modules" ]]; then exclude_modules="" - IFS="," read -ra excl_modules <<< $par_exclude_modules + IFS=";" read -ra excl_modules <<< $par_exclude_modules for i in "${excl_modules[@]}"; do exclude_modules+="--exclude $i" done @@ -56,7 +56,7 @@ fi if [[ -n "$par_ignore_analysis" ]]; then ignore="" - IFS="," read -ra ignore_analysis <<< $par_ignore_analysis + IFS=";" read -ra ignore_analysis <<< $par_ignore_analysis for i in "${ignore_analysis[@]}"; do ignore+="--ignore $i " done @@ -65,7 +65,7 @@ fi if [[ -n "$par_ignore_samples" ]]; then ignore_samples="" - IFS="," read -ra ign_samples <<< $par_ignore_samples + IFS=";" read -ra ign_samples <<< $par_ignore_samples for i in "${ign_samples[@]}"; do ignore_samples+="--ignore-samples $i" done From 7d99065ecf66e6bc42b03f8ffcfcfc95ef2d2b72 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 17 Jul 2024 17:46:44 +0200 Subject: [PATCH 03/25] `bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (#75) * `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline * add missing metadata * remove unicode * trigger * process comments * add authors * Apply suggestions from code review Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com> --------- Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com> --- CHANGELOG.md | 6 + src/_authors/robrecht_cannoodt.yaml | 14 ++ src/_authors/weiwei_schultz.yaml | 5 + .../config.vsh.yaml | 143 ++++++++++++++++ .../bd_rhapsody_make_reference/help.txt | 66 +++++++ .../make_rhap_reference_2.2.1_nodocker.cwl | 115 +++++++++++++ .../bd_rhapsody_make_reference/script.py | 161 ++++++++++++++++++ .../bd_rhapsody_make_reference/test.sh | 68 ++++++++ .../test_data/reference_small.fa | 27 +++ .../test_data/reference_small.gtf | 8 + .../test_data/script.sh | 47 +++++ 11 files changed, 660 insertions(+) create mode 100644 src/_authors/robrecht_cannoodt.yaml create mode 100644 src/_authors/weiwei_schultz.yaml create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/help.txt create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/script.py create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test.sh create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf create mode 100644 src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 80b8b9f3..9cfacdbc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # biobox x.x.x +## NEW FEATURES + +* `bd_rhapsody`: + + - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75). + ## BUG FIXES * `pear`: fix component not exiting with the correct exitcode when PEAR fails. diff --git a/src/_authors/robrecht_cannoodt.yaml b/src/_authors/robrecht_cannoodt.yaml new file mode 100644 index 00000000..d7c0f283 --- /dev/null +++ b/src/_authors/robrecht_cannoodt.yaml @@ -0,0 +1,14 @@ +name: Robrecht Cannoodt +info: + links: + email: robrecht@data-intuitive.com + github: rcannood + orcid: "0000-0003-3641-729X" + linkedin: robrechtcannoodt + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Data Science Engineer + - name: Open Problems + href: https://openproblems.bio + role: Core Member \ No newline at end of file diff --git a/src/_authors/weiwei_schultz.yaml b/src/_authors/weiwei_schultz.yaml new file mode 100644 index 00000000..324f9378 --- /dev/null +++ b/src/_authors/weiwei_schultz.yaml @@ -0,0 +1,5 @@ +name: Weiwei Schultz +info: + organizations: + - name: Janssen R&D US + role: Associate Director Data Sciences \ No newline at end of file diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml new file mode 100644 index 00000000..e596bf06 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/config.vsh.yaml @@ -0,0 +1,143 @@ +name: bd_rhapsody_make_reference +namespace: bd_rhapsody +description: | + The Reference Files Generator creates an archive containing Genome Index + and Transcriptome annotation files needed for the BD Rhapsody Sequencing + Analysis Pipeline. The app takes as input one or more FASTA and GTF files + and produces a compressed archive in the form of a tar.gz file. The + archive contains: + + - STAR index + - Filtered GTF file +keywords: [genome, reference, index, align] +links: + repository: https://bitbucket.org/CRSwDev/cwl/src/master/v2.2.1/Extra_Utilities/ + documentation: https://bd-rhapsody-bioinfo-docs.genomics.bd.com/resources/extra_utilities.html#make-rhapsody-reference +license: Unknown +authors: + - __merge__: /src/_authors/robrecht_cannoodt.yaml + roles: [ author, maintainer ] + - __merge__: /src/_authors/weiwei_schultz.yaml + roles: [ contributor ] + +argument_groups: + - name: Inputs + arguments: + - type: file + name: --genome_fasta + required: true + description: Reference genome file in FASTA or FASTA.GZ format. The BD Rhapsody Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse. + example: genome_sequence.fa.gz + multiple: true + info: + config_key: Genome_fasta + - type: file + name: --gtf + required: true + description: | + File path to the transcript annotation files in GTF or GTF.GZ format. The Sequence Analysis Pipeline requires the 'gene_name' or + 'gene_id' attribute to be set on each gene and exon feature. Gene and exon feature lines must have the same attribute, and exons + must have a corresponding gene with the same value. For TCR/BCR assays, the TCR or BCR gene segments must have the 'gene_type' or + 'gene_biotype' attribute set, and the value should begin with 'TR' or 'IG', respectively. + example: transcriptome_annotation.gtf.gz + multiple: true + info: + config_key: Gtf + - type: file + name: --extra_sequences + description: | + File path to additional sequences in FASTA format to use when building the STAR index. (e.g. transgenes or CRISPR guide barcodes). + GTF lines for these sequences will be automatically generated and combined with the main GTF. + required: false + multiple: true + info: + config_key: Extra_sequences + - name: Outputs + arguments: + - type: file + name: --reference_archive + direction: output + required: true + description: | + A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an + input in the BD Rhapsody Sequencing Analysis Pipeline. + example: star_index.tar.gz + - name: Arguments + arguments: + - type: string + name: --mitochondrial_contigs + description: | + Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are + identified as 'nuclear fragments' in the ATACseq analysis pipeline. + required: false + multiple: true + default: [chrM, chrMT, M, MT] + info: + config_key: Mitochondrial_contigs + - type: boolean_true + name: --filtering_off + description: | + By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features + having the following attribute values are kept: + + - protein_coding + - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97) + - IG_LV_gene + - IG_V_gene + - IG_V_pseudogene + - IG_D_gene + - IG_J_gene + - IG_J_pseudogene + - IG_C_gene + - IG_C_pseudogene + - TR_V_gene + - TR_V_pseudogene + - TR_D_gene + - TR_J_gene + - TR_J_pseudogene + - TR_C_gene + + If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True. + info: + config_key: Filtering_off + - type: boolean_true + name: --wta_only_index + description: Build a WTA only index, otherwise builds a WTA + ATAC index. + info: + config_key: Wta_Only + - type: string + name: --extra_star_params + description: Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line. + example: --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11 + required: false + info: + config_key: Extra_STAR_params + +resources: + - type: python_script + path: script.py + - path: make_rhap_reference_2.2.1_nodocker.cwl + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +requirements: + commands: [ "cwl-runner" ] + +engines: + - type: docker + image: bdgenomics/rhapsody:2.2.1 + setup: + - type: apt + packages: [procps] + - type: python + packages: [cwlref-runner, cwl-runner] + - type: docker + run: | + echo "bdgenomics/rhapsody: 2.2.1" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt new file mode 100644 index 00000000..cd038b25 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/help.txt @@ -0,0 +1,66 @@ +```bash +cwl-runner src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl --help +``` + +usage: src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl + [-h] [--Archive_prefix ARCHIVE_PREFIX] + [--Extra_STAR_params EXTRA_STAR_PARAMS] + [--Extra_sequences EXTRA_SEQUENCES] [--Filtering_off] --Genome_fasta + GENOME_FASTA --Gtf GTF [--Maximum_threads MAXIMUM_THREADS] + [--Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS] [--WTA_Only] + [job_order] + +The Reference Files Generator creates an archive containing Genome Index and +Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing +Analysis Pipeline. The app takes as input one or more FASTA and GTF files and +produces a compressed archive in the form of a tar.gz file. The archive +contains:\n - STAR index\n - Filtered GTF file + +positional arguments: + job_order Job input json file + +options: + -h, --help show this help message and exit + --Archive_prefix ARCHIVE_PREFIX + A prefix for naming the compressed archive file + containing the Reference genome index and annotation + files. The default value is constructed based on the + input Reference files. + --Extra_STAR_params EXTRA_STAR_PARAMS + Additional parameters to pass to STAR when building + the genome index. Specify exactly like how you would + on the command line. Example: --limitGenomeGenerateRAM + 48000 --genomeSAindexNbases 11 + --Extra_sequences EXTRA_SEQUENCES + Additional sequences in FASTA format to use when + building the STAR index. (E.g. phiX genome) + --Filtering_off By default the input Transcript Annotation files are + filtered based on the gene_type/gene_biotype + attribute. Only features having the following + attribute values are are kept: - protein_coding - + lncRNA (lincRNA and antisense for Gencode < + v31/M22/Ensembl97) - IG_LV_gene - IG_V_gene - + IG_V_pseudogene - IG_D_gene - IG_J_gene - + IG_J_pseudogene - IG_C_gene - IG_C_pseudogene - + TR_V_gene - TR_V_pseudogene - TR_D_gene - TR_J_gene - + TR_J_pseudogene - TR_C_gene If you have already pre- + filtered the input Annotation files and/or wish to + turn-off the filtering, please set this option to + True. + --Genome_fasta GENOME_FASTA + Reference genome file in FASTA format. The BD + Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 + for Human and GRCm39 for Mouse. + --Gtf GTF Transcript annotation files in GTF format. The BD + Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode + v42 for Human and M31 for Mouse. + --Maximum_threads MAXIMUM_THREADS + The maximum number of threads to use in the pipeline. + By default, all available cores are used. + --Mitochondrial_Contigs MITOCHONDRIAL_CONTIGS + Names of the Mitochondrial contigs in the provided + Reference Genome. Fragments originating from contigs + other than these are identified as 'nuclear fragments' + in the ATACseq analysis pipeline. + --WTA_Only Build a WTA only index, otherwise builds a WTA + ATAC + index. diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl new file mode 100644 index 00000000..fead2c02 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/make_rhap_reference_2.2.1_nodocker.cwl @@ -0,0 +1,115 @@ +requirements: + InlineJavascriptRequirement: {} +class: CommandLineTool +label: Reference Files Generator for BD Rhapsodyâ„¢ Sequencing Analysis Pipeline +cwlVersion: v1.2 +doc: >- + The Reference Files Generator creates an archive containing Genome Index and Transcriptome annotation files needed for the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. The app takes as input one or more FASTA and GTF files and produces a compressed archive in the form of a tar.gz file. The archive contains:\n - STAR index\n - Filtered GTF file + + +baseCommand: run_reference_generator.sh +inputs: + Genome_fasta: + type: File[] + label: Reference Genome + doc: |- + Reference genome file in FASTA format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses GRCh38 for Human and GRCm39 for Mouse. + inputBinding: + prefix: --reference-genome + shellQuote: false + Gtf: + type: File[] + label: Transcript Annotations + doc: |- + Transcript annotation files in GTF format. The BD Rhapsodyâ„¢ Sequencing Analysis Pipeline uses Gencode v42 for Human and M31 for Mouse. + inputBinding: + prefix: --gtf + shellQuote: false + Extra_sequences: + type: File[]? + label: Extra Sequences + doc: |- + Additional sequences in FASTA format to use when building the STAR index. (E.g. phiX genome) + inputBinding: + prefix: --extra-sequences + shellQuote: false + Mitochondrial_Contigs: + type: string[]? + default: ["chrM", "chrMT", "M", "MT"] + label: Mitochondrial Contig Names + doc: |- + Names of the Mitochondrial contigs in the provided Reference Genome. Fragments originating from contigs other than these are identified as 'nuclear fragments' in the ATACseq analysis pipeline. + inputBinding: + prefix: --mitochondrial-contigs + shellQuote: false + Filtering_off: + type: boolean? + label: Turn off filtering + doc: |- + By default the input Transcript Annotation files are filtered based on the gene_type/gene_biotype attribute. Only features having the following attribute values are are kept: + - protein_coding + - lncRNA (lincRNA and antisense for Gencode < v31/M22/Ensembl97) + - IG_LV_gene + - IG_V_gene + - IG_V_pseudogene + - IG_D_gene + - IG_J_gene + - IG_J_pseudogene + - IG_C_gene + - IG_C_pseudogene + - TR_V_gene + - TR_V_pseudogene + - TR_D_gene + - TR_J_gene + - TR_J_pseudogene + - TR_C_gene + If you have already pre-filtered the input Annotation files and/or wish to turn-off the filtering, please set this option to True. + inputBinding: + prefix: --filtering-off + shellQuote: false + WTA_Only: + type: boolean? + label: WTA only index + doc: Build a WTA only index, otherwise builds a WTA + ATAC index. + inputBinding: + prefix: --wta-only-index + shellQuote: false + Archive_prefix: + type: string? + label: Archive Prefix + doc: |- + A prefix for naming the compressed archive file containing the Reference genome index and annotation files. The default value is constructed based on the input Reference files. + inputBinding: + prefix: --archive-prefix + shellQuote: false + Extra_STAR_params: + type: string? + label: Extra STAR Params + doc: |- + Additional parameters to pass to STAR when building the genome index. Specify exactly like how you would on the command line. + Example: + --limitGenomeGenerateRAM 48000 --genomeSAindexNbases 11 + inputBinding: + prefix: --extra-star-params + shellQuote: true + + Maximum_threads: + type: int? + label: Maximum Number of Threads + doc: |- + The maximum number of threads to use in the pipeline. By default, all available cores are used. + inputBinding: + prefix: --maximum-threads + shellQuote: false + +outputs: + + Archive: + type: File + doc: |- + A Compressed archive containing the Reference Genome Index and annotation GTF files. This archive is meant to be used as an input in the BD Rhapsodyâ„¢ Sequencing Analysis Pipeline. + id: Reference_Archive + label: Reference Files Archive + outputBinding: + glob: '*.tar.gz' + diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/script.py b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py new file mode 100644 index 00000000..ca635508 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/script.py @@ -0,0 +1,161 @@ +import os +import re +import subprocess +import tempfile +from typing import Any +import yaml +import shutil + +## VIASH START +par = { + "genome_fasta": [], + "gtf": [], + "extra_sequences": [], + "mitochondrial_contigs": ["chrM", "chrMT", "M", "MT"], + "filtering_off": False, + "wta_only_index": False, + "extra_star_params": None, + "reference_archive": "output.tar.gz", +} +meta = { + "config": "target/nextflow/reference/build_bdrhap_2_reference/.config.vsh.yaml", + "resources_dir": os.path.abspath("src/reference/build_bdrhap_2_reference"), + "temp_dir": os.getenv("VIASH_TEMP"), + "memory_mb": None, + "cpus": None +} +## VIASH END + +def clean_arg(argument): + argument["clean_name"] = re.sub("^-*", "", argument["name"]) + return argument + +def read_config(path: str) -> dict[str, Any]: + with open(path, "r") as f: + config = yaml.safe_load(f) + + config["all_arguments"] = [ + clean_arg(arg) + for grp in config["argument_groups"] + for arg in grp["arguments"] + ] + + return config + +def strip_margin(text: str) -> str: + return re.sub("(\n?)[ \t]*\|", "\\1", text) + +def process_params(par: dict[str, Any], config) -> str: + # check input parameters + assert par["genome_fasta"], "Pass at least one set of inputs to --genome_fasta." + assert par["gtf"], "Pass at least one set of inputs to --gtf." + assert par["reference_archive"].endswith(".tar.gz"), "Output reference_archive must end with .tar.gz." + + # make paths absolute + for argument in config["all_arguments"]: + if par[argument["clean_name"]] and argument["type"] == "file": + if isinstance(par[argument["clean_name"]], list): + par[argument["clean_name"]] = [ os.path.abspath(f) for f in par[argument["clean_name"]] ] + else: + par[argument["clean_name"]] = os.path.abspath(par[argument["clean_name"]]) + + return par + +def generate_config(par: dict[str, Any], meta, config) -> str: + content_list = [strip_margin(f"""\ + |#!/usr/bin/env cwl-runner + | + |""")] + + + config_key_value_pairs = [] + for argument in config["all_arguments"]: + config_key = (argument.get("info") or {}).get("config_key") + arg_type = argument["type"] + par_value = par[argument["clean_name"]] + if par_value and config_key: + config_key_value_pairs.append((config_key, arg_type, par_value)) + + if meta["cpus"]: + config_key_value_pairs.append(("Maximum_threads", "integer", meta["cpus"])) + + # print(config_key_value_pairs) + + for config_key, arg_type, par_value in config_key_value_pairs: + if arg_type == "file": + str = strip_margin(f"""\ + |{config_key}: + |""") + if isinstance(par_value, list): + for file in par_value: + str += strip_margin(f"""\ + | - class: File + | location: "{file}" + |""") + else: + str += strip_margin(f"""\ + | class: File + | location: "{par_value}" + |""") + content_list.append(str) + else: + content_list.append(strip_margin(f"""\ + |{config_key}: {par_value} + |""")) + + ## Write config to file + return "".join(content_list) + +def get_cwl_file(meta: dict[str, Any]) -> str: + # create cwl file (if need be) + cwl_file=os.path.join(meta["resources_dir"], "make_rhap_reference_2.2.1_nodocker.cwl") + + return cwl_file + +def main(par: dict[str, Any], meta: dict[str, Any]): + config = read_config(meta["config"]) + + # Preprocess params + par = process_params(par, config) + + # fetch cwl file + cwl_file = get_cwl_file(meta) + + # Create output dir if not exists + outdir = os.path.dirname(par["reference_archive"]) + if not os.path.exists(outdir): + os.makedirs(outdir) + + ## Run pipeline + with tempfile.TemporaryDirectory(prefix="cwl-bd_rhapsody_wta-", dir=meta["temp_dir"]) as temp_dir: + # Create params file + config_file = os.path.join(temp_dir, "config.yml") + config_content = generate_config(par, meta, config) + with open(config_file, "w") as f: + f.write(config_content) + + + cmd = [ + "cwl-runner", + "--no-container", + "--preserve-entire-environment", + "--outdir", + temp_dir, + cwl_file, + config_file + ] + + env = dict(os.environ) + env["TMPDIR"] = temp_dir + + print("> " + " ".join(cmd), flush=True) + _ = subprocess.check_call( + cmd, + cwd=os.path.dirname(config_file), + env=env + ) + + shutil.move(os.path.join(temp_dir, "Rhap_reference.tar.gz"), par["reference_archive"]) + +if __name__ == "__main__": + main(par, meta) diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh new file mode 100644 index 00000000..3637160a --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +set -e + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } +} +assert_file_empty() { + # () will execute in a shubshell, could you use {;}? + [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } +} +assert_file_not_empty() { + # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_file_not_contains() { + # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) + grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } +} + +in_fa="$meta_resources_dir/test_data/reference_small.fa" +in_gtf="$meta_resources_dir/test_data/reference_small.gtf" + +echo "#############################################" +echo "> Simple run" + +mkdir simple_run +cd simple_run + +out_tar="myreference.tar.gz" + +echo "> Running $meta_name." +$meta_executable \ + --genome_fasta "$in_fa" \ + --gtf "$in_gtf" \ + --reference_archive "$out_tar" \ + --extra_star_params "--genomeSAindexNbases 6" \ + ---cpus 2 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +assert_file_exists "$out_tar" +assert_file_not_empty "$out_tar" + +echo ">> Checking whether output contains the expected files" +tar -xvf "$out_tar" > /dev/null +assert_file_exists "BD_Rhapsody_Reference_Files/star_index/genomeParameters.txt" +assert_file_exists "BD_Rhapsody_Reference_Files/bwa-mem2_index/reference_small.ann" +assert_file_exists "BD_Rhapsody_Reference_Files/reference_small-processed.gtf" +assert_file_exists "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt" +assert_file_contains "BD_Rhapsody_Reference_Files/reference_small-processed.gtf" "chr1.*HAVANA.*ENSG00000243485" +assert_file_contains "BD_Rhapsody_Reference_Files/mitochondrial_contigs.txt" 'chrMT' + +cd .. + +echo "#############################################" + +echo "> Tests succeeded!" \ No newline at end of file diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa new file mode 100644 index 00000000..386d887c --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.fa @@ -0,0 +1,27 @@ +>chr1 1 +TGGGGAAGCAAGGCGGAGTTGGGCAGCTCGTGTTCAATGGGTAGAGTTTCAGGCTGGGGT +GATGGAAGGGTGCTGGAAATGAGTGGTAGTGATGGCGGCACAACAGTGTGAATCTACTTA +ATCCCACTGAACTGTATGCTGAAAAATGGTTTAGACGGTGAATTTTAGGTTATGTATGTT +TTACCACAATTTTTAAAAAGCTAGTGAAAAGCTGGTAAAAAGAAAGAAAAGAGGCTTTTT +TAAAAAGTTAAATATATAAAAAGAGCATCATCAGTCCAAAGTCCAGCAGTTGTCCCTCCT +GGAATCCGTTGGCTTGCCTCCGGCATTTTTGGCCCTTGCCTTTTAGGGTTGCCAGATTAA +AAGACAGGATGCCCAGCTAGTTTGAATTTTAGATAAACAACGAATAATTTCGTAGCATAA +ATATGTCCCAAGCTTAGTTTGGGACATACTTATGCTAAAAAACATTATTGGTTGTTTATC +TGAGATTCAGAATTAAGCATTTTATATTTTATTTGCTGCCTCTGGCCACCCTACTCTCTT +CCTAACACTCTCTCCCTCTCCCAGTTTTGTCCGCCTTCCCTGCCTCCTCTTCTGGGGGAG +TTAGATCGAGTTGTAACAAGAACATGCCACTGTCTCGCTGGCTGCAGCGTGTGGTCCCCT +TACCAGAGGTAAAGAAGAGATGGATCTCCACTCATGTTGTAGACAGAATGTTTATGTCCT +CTCCAAATGCTTATGTTGAAACCCTAACCCCTAATGTGATGGTATGTGGAGATGGGCCTT +TGGTAGGTAATTACGGTTAGATGAGGTCATGGGGTGGGGCCCTCATTATAGATCTGGTAA +GAAAAGAGAGCATTGTCTCTGTGTCTCCCTCTCTCTCTCTCTCTCTCTCTCTCATTTCTC +TCTATCTCATTTCTCTCTCTCTCGCTATCTCATTTTTCTCTCTCTCTCTTTCTCTCCTCT +GTCTTTTCCCACCAAGTGAGGATGCGAAGAGAAGGTGGCTGTCTGCAAACCAGGAAGAGA +GCCCTCACCGGGAACCCGTCCAGCTGCCACCTTGAACTTGGACTTCCAAGCCTCCAGAAC +TGTGAGGGATAAATGTATGATTTTAAAGTCGCCCAGTGTGTGGTATTTTGTTTTGACTAA +TACAACCTGAAAACATTTTCCCCTCACTCCACCTGAGCAATATCTGAGTGGCTTAAGGTA +CTCAGGACACAACAAAGGAGAAATGTCCCATGCACAAGGTGCACCCATGCCTGGGTAAAG +CAGCCTGGCACAGAGGGAAGCACACAGGCTCAGGGATCTGCTATTCATTCTTTGTGTGAC +CCTGGGCAAGCCATGAATGGAGCTTCAGTCACCCCATTTGTAATGGGATTTAATTGTGCT +TGCCCTGCCTCCTTTTGAGGGCTGTAGAGAAAAGATGTCAAAGTATTTTGTAATCTGGCT +GGGCGTGGTGGCTCATGCCTGTAATCCTAGCACTTTGGTAGGCTGACGCGAGAGGACTGC +T diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf new file mode 100644 index 00000000..7ba83523 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/reference_small.gtf @@ -0,0 +1,8 @@ +chr1 HAVANA exon 565 668 . + . gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 2; exon_id "ENSE00001922571.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1"; +chr1 HAVANA exon 977 1098 . + . gene_id "ENSG00000243485.5"; transcript_id "ENST00000473358.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-202"; exon_number 3; exon_id "ENSE00001827679.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "dotter_confirmed"; tag "basic"; tag "Ensembl_canonical"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002840.1"; +chr1 HAVANA transcript 268 1110 . + . gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2"; +chr1 HAVANA exon 268 668 . + . gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 1; exon_id "ENSE00001841699.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2"; +chr1 HAVANA exon 977 1110 . + . gene_id "ENSG00000243485.5"; transcript_id "ENST00000469289.1"; gene_type "lncRNA"; gene_name "MIR1302-2HG"; transcript_type "lncRNA"; transcript_name "MIR1302-2HG-201"; exon_number 2; exon_id "ENSE00001890064.1"; level 2; transcript_support_level "5"; hgnc_id "HGNC:52482"; tag "not_best_in_genome_evidence"; tag "basic"; havana_gene "OTTHUMG00000000959.2"; havana_transcript "OTTHUMT00000002841.2"; +chr1 ENSEMBL gene 367 504 . + . gene_id "ENSG00000284332.1"; gene_type "miRNA"; gene_name "MIR1302-2"; level 3; hgnc_id "HGNC:35294"; +chr1 ENSEMBL transcript 367 504 . + . gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical"; +chr1 ENSEMBL exon 367 504 . + . gene_id "ENSG00000284332.1"; transcript_id "ENST00000607096.1"; gene_type "miRNA"; gene_name "MIR1302-2"; transcript_type "miRNA"; transcript_name "MIR1302-2-201"; exon_number 1; exon_id "ENSE00003695741.1"; level 3; transcript_support_level "NA"; hgnc_id "HGNC:35294"; tag "basic"; tag "Ensembl_canonical"; diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh new file mode 100644 index 00000000..8d468064 --- /dev/null +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test_data/script.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +TMP_DIR=/tmp/bd_rhapsody_make_reference +OUT_DIR=src/bd_rhapsody/bd_rhapsody_make_reference/test_data + +# check if seqkit is installed +if ! command -v seqkit &> /dev/null; then + echo "seqkit could not be found" + exit 1 +fi + +# create temporary directory and clean up on exit +mkdir -p $TMP_DIR +function clean_up { + rm -rf "$TMP_DIR" +} +trap clean_up EXIT + +# fetch reference +ORIG_FA=$TMP_DIR/reference.fa.gz +if [ ! -f $ORIG_FA ]; then + wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/GRCh38.primary_assembly.genome.fa.gz \ + -O $ORIG_FA +fi + +ORIG_GTF=$TMP_DIR/reference.gtf.gz +if [ ! -f $ORIG_GTF ]; then + wget https://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_41/gencode.v41.annotation.gtf.gz \ + -O $ORIG_GTF +fi + +# create small reference +START=30000 +END=31500 +CHR=chr1 + +# subset to small region +seqkit grep -r -p "^$CHR\$" "$ORIG_FA" | \ + seqkit subseq -r "$START:$END" > $OUT_DIR/reference_small.fa + +zcat "$ORIG_GTF" | \ + awk -v FS='\t' -v OFS='\t' " + \$1 == \"$CHR\" && \$4 >= $START && \$5 <= $END { + \$4 = \$4 - $START + 1; + \$5 = \$5 - $START + 1; + print; + }" > $OUT_DIR/reference_small.gtf From c2e340d92ea7f153d0c5c9de1cffbc6b88fc4124 Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Wed, 17 Jul 2024 18:10:37 +0200 Subject: [PATCH 04/25] Remove multiple_sep (#78) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * get rid of multiple_sep fields in configs * Fix coverage argument's format in config --- src/gffread/config.vsh.yaml | 5 +-- src/gffread/script.sh | 2 ++ src/gffread/test.sh | 2 +- src/samtools/samtools_stats/config.vsh.yaml | 40 ++++++++++----------- src/samtools/samtools_stats/script.sh | 3 ++ src/samtools/samtools_stats/test.sh | 2 +- 6 files changed, 28 insertions(+), 26 deletions(-) diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml index d2c41a87..7477a284 100644 --- a/src/gffread/config.vsh.yaml +++ b/src/gffread/config.vsh.yaml @@ -8,8 +8,6 @@ links: references: doi: 10.12688/f1000research.23297.2 license: MIT -requirements: - commands: [ gffread ] argument_groups: - name: Inputs arguments: @@ -52,7 +50,7 @@ argument_groups: required: true description: | Write the output records into . - default: output.gff + example: output.gff - name: --force_exons type: boolean_true description: | @@ -154,7 +152,6 @@ argument_groups: - name: --table type: string multiple: true - multiple_sep: "," description: | Output a simple tab delimited format instead of GFF, with columns having the values of GFF attributes given in ; special pseudo-attributes (prefixed by @) are diff --git a/src/gffread/script.sh b/src/gffread/script.sh index 9c4a2b8f..cd4abf14 100644 --- a/src/gffread/script.sh +++ b/src/gffread/script.sh @@ -50,6 +50,8 @@ [[ "$par_expose_dups" == "false" ]] && unset par_expose_dups [[ "$par_cluster_only" == "false" ]] && unset par_cluster_only +# if par_table is not empty, replace ";" with "," +par_table=$(echo "$par_table" | tr ';' ',') $(which gffread) \ "$par_input" \ diff --git a/src/gffread/test.sh b/src/gffread/test.sh index 326fce50..ea23edcb 100755 --- a/src/gffread/test.sh +++ b/src/gffread/test.sh @@ -86,7 +86,7 @@ diff "$expected_output_dir/transcripts.fa" "$test_output_dir/transcripts.fa" || echo "> Test 4 - Generate table from GFF annotation file" "$meta_executable" \ - --table @id,@chr,@start,@end,@strand,@exons,Name,gene,product \ + --table "@id;@chr;@start;@end;@strand;@exons;Name;gene;product" \ --outfile "$test_output_dir/annotation.tbl" \ --input "$test_dir/sequence.gff3" diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml index 0d8f57a4..ca630876 100644 --- a/src/samtools/samtools_stats/config.vsh.yaml +++ b/src/samtools/samtools_stats/config.vsh.yaml @@ -30,10 +30,10 @@ argument_groups: - name: --coverage alternatives: -c type: integer - description: | - Coverage distribution min,max,step [1,1000,1]. multiple: true - multiple_sep: ',' + description: | + Coverage distribution min;max;step. Default: [1, 1000, 1]. + example: [1, 1000, 1] - name: --remove_dups alternatives: -d type: boolean_true @@ -48,25 +48,25 @@ argument_groups: alternatives: -f type: string description: | - Required flag, 0 for unset. See also `samtools flags`. - default: "0" + Required flag, 0 for unset. See also `samtools flags`. Default: `"0"`. + example: "0" - name: --filtering_flag alternatives: -F type: string description: | - Filtering flag, 0 for unset. See also `samtools flags`. - default: "0" + Filtering flag, 0 for unset. See also `samtools flags`. Default: `0`. + example: "0" - name: --GC_depth type: double description: | - The size of GC-depth bins (decreasing bin size increases memory requirement). - default: 20000.0 + The size of GC-depth bins (decreasing bin size increases memory requirement). Default: `20000`. + example: 20000.0 - name: --insert_size alternatives: -i type: integer description: | - Maximum insert size. - default: 8000 + Maximum insert size. Default: `8000`. + example: 8000 - name: --id alternatives: -I type: string @@ -76,14 +76,14 @@ argument_groups: alternatives: -l type: integer description: | - Include in the statistics only reads with the given read length. - default: -1 + Include in the statistics only reads with the given read length. Default: `-1`. + example: -1 - name: --most_inserts alternatives: -m type: double description: | - Report only the main part of inserts. - default: 0.99 + Report only the main part of inserts. Default: `0.99`. + example: 0.99 - name: --split_prefix alternatives: -P type: string @@ -93,8 +93,8 @@ argument_groups: alternatives: -q type: integer description: | - The BWA trimming parameter. - default: 0 + The BWA trimming parameter. Default: `0`. + example: 0 - name: --ref_seq alternatives: -r type: file @@ -124,8 +124,8 @@ argument_groups: alternatives: -g type: integer description: | - Only bases with coverage above this value will be included in the target percentage computation. - default: 0 + Only bases with coverage above this value will be included in the target percentage computation. Default: `0`. + example: 0 - name: --input_fmt_option type: string description: | @@ -141,7 +141,7 @@ argument_groups: type: file description: | Output file. - default: "out.txt" + example: "out.txt" required: true direction: output diff --git a/src/samtools/samtools_stats/script.sh b/src/samtools/samtools_stats/script.sh index 6e32e9a5..e3872fc6 100644 --- a/src/samtools/samtools_stats/script.sh +++ b/src/samtools/samtools_stats/script.sh @@ -10,6 +10,9 @@ set -e [[ "$par_sparse" == "false" ]] && unset par_sparse [[ "$par_remove_overlaps" == "false" ]] && unset par_remove_overlaps +# change the coverage input from X;X;X to X,X,X +par_coverage=$(echo "$par_coverage" | tr ';' ',') + samtools stats \ ${par_coverage:+-c "$par_coverage"} \ ${par_remove_dups:+-d} \ diff --git a/src/samtools/samtools_stats/test.sh b/src/samtools/samtools_stats/test.sh index 05d70d30..b515100e 100644 --- a/src/samtools/samtools_stats/test.sh +++ b/src/samtools/samtools_stats/test.sh @@ -17,7 +17,7 @@ echo ">>> Checking whether output is non-empty" [ ! -s "$test_dir/test.paired_end.sorted.txt" ] && echo "File 'test.paired_end.sorted.txt' is empty!" && exit 1 echo ">>> Checking whether output is correct" -# compare using diff, ignoring the line stating the command that was passed. +# compare using diff, ignoring the line stating the command that was passed. diff <(grep -v "^# The command" "$test_dir/test.paired_end.sorted.txt") \ <(grep -v "^# The command" "$test_dir/ref.paired_end.sorted.txt") || \ (echo "Output file ref.paired_end.sorted.txt does not match expected output" && exit 1) From 8e9abad885b27120a56a580ca7d961c64b96ad60 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 17 Jul 2024 18:14:21 +0200 Subject: [PATCH 05/25] Update CONTRIBUTING.md (#82) * Update CONTRIBUTING.md * update ctb * clean up helper functions * update changelog * update changelog --- CHANGELOG.md | 28 +++- CONTRIBUTING.md | 151 +++++++++++------- .../bd_rhapsody_make_reference/test.sh | 5 +- src/cutadapt/test.sh | 14 +- src/star/star_align_reads/test.sh | 21 ++- 5 files changed, 130 insertions(+), 89 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cfacdbc..2aad0cb8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,19 +6,33 @@ - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75). -## BUG FIXES +## MINOR CHANGES -* `pear`: fix component not exiting with the correct exitcode when PEAR fails. +* `busco` components: update BUSCO to `5.7.1` (PR #72). -* `cutadapt`: fix `--par_quality_cutoff_r2` argument. +## DOCUMENTATION -* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode`. +* Extend the contributing guidelines (PR #82): -* `multiqc`: update multiple separator to `;` (PR #81). + - Update format to Viash 0.9. -## MINOR CHANGES + - Descriptions should be formatted in markdown. + + - Add defaults to descriptions, not as a default of the argument. + + - Explain parameter expansion. -* `busco` components: update BUSCO to `5.7.1`. + - Mention that the contents of the output of components in tests should be checked. + +## BUG FIXES + +* `pear`: fix component not exiting with the correct exitcode when PEAR fails (PR #70). + +* `cutadapt`: fix `--par_quality_cutoff_r2` argument (PR #69). + +* `cutadapt`: demultiplexing is now disabled by default. It can be re-enabled by using `demultiplex_mode` (PR #69). + +* `multiqc`: update multiple separator to `;` (PR #81). # biobox 0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7393bc7e..cee4249a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -65,22 +65,21 @@ runners: Fill in the relevant metadata fields in the config. Here is an example of the metadata of an existing component. ```yaml -functionality: - name: arriba - description: Detect gene fusions from RNA-Seq data - keywords: [Gene fusion, RNA-Seq] - links: - homepage: https://arriba.readthedocs.io/en/latest/ - documentation: https://arriba.readthedocs.io/en/latest/ - repository: https://github.com/suhrig/arriba - issue_tracker: https://github.com/suhrig/arriba/issues - references: - doi: 10.1101/gr.257246.119 - bibtex: | - @article{ - ... a bibtex entry in case the doi is not available ... - } - license: MIT +name: arriba +description: Detect gene fusions from RNA-Seq data +keywords: [Gene fusion, RNA-Seq] +links: + homepage: https://arriba.readthedocs.io/en/latest/ + documentation: https://arriba.readthedocs.io/en/latest/ + repository: https://github.com/suhrig/arriba + issue_tracker: https://github.com/suhrig/arriba/issues +references: + doi: 10.1101/gr.257246.119 + bibtex: | + @article{ + ... a bibtex entry in case the doi is not available ... + } +license: MIT ``` ### Step 4: Find a suitable container @@ -162,7 +161,7 @@ argument_groups: type: file description: | File in SAM/BAM/CRAM format with main alignments as generated by STAR - (Aligned.out.sam). Arriba extracts candidate reads from this file. + (`Aligned.out.sam`). Arriba extracts candidate reads from this file. required: true example: Aligned.out.bam ``` @@ -175,7 +174,7 @@ Several notes: * Input arguments can have `multiple: true` to allow the user to specify multiple files. - +* The description should be formatted in markdown. ### Step 8: Add arguments for the output files @@ -220,7 +219,7 @@ argument_groups: Note: -* Preferably, these outputs should not be directores but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory). +* Preferably, these outputs should not be directories but files. For example, if a tool outputs a directory `foo/` containing files `foo/bar.txt` and `foo/baz.txt`, there should be two output arguments `--bar` and `--baz` (as opposed to one output argument which outputs the whole `foo/` directory). ### Step 9: Add arguments for the other arguments @@ -230,6 +229,8 @@ Finally, add all other arguments to the config file. There are a few exceptions: * Arguments related to printing the information such as printing the version (`-v`, `--version`) or printing the help (`-h`, `--help`) should not be added to the config file. +* If the help lists defaults, do not add them as defaults but to the description. Example: `description: . Default: 10.` + ### Step 10: Add a Docker engine @@ -275,10 +276,13 @@ Next, we need to write a runner script that runs the tool with the input argumen ## VIASH START ## VIASH END +# unset flags +[[ "$par_option" == "false" ]] && unset par_option + xxx \ --input "$par_input" \ --output "$par_output" \ - $([ "$par_option" = "true" ] && echo "--option") + ${par_option:+--option} ``` When building a Viash component, Viash will automatically replace the `## VIASH START` and `## VIASH END` lines (and anything in between) with environment variables based on the arguments specified in the config. @@ -291,6 +295,11 @@ As an example, this is what the Bash script for the `arriba` component looks lik ## VIASH START ## VIASH END +# unset flags +[[ "$par_skip_duplicate_marking" == "false" ]] && unset par_skip_duplicate_marking +[[ "$par_extra_information" == "false" ]] && unset par_extra_information +[[ "$par_fill_gaps" == "false" ]] && unset par_fill_gaps + arriba \ -x "$par_bam" \ -a "$par_genome" \ @@ -298,26 +307,30 @@ arriba \ -o "$par_fusions" \ ${par_known_fusions:+-k "${par_known_fusions}"} \ ${par_blacklist:+-b "${par_blacklist}"} \ - ${par_structural_variants:+-d "${par_structural_variants}"} \ - $([ "$par_skip_duplicate_marking" = "true" ] && echo "-u") \ - $([ "$par_extra_information" = "true" ] && echo "-X") \ - $([ "$par_fill_gaps" = "true" ] && echo "-I") + # ... + ${par_extra_information:+-X} \ + ${par_fill_gaps:+-I} ``` +Notes: -### Step 12: Create test script +* If your arguments can contain special variables (e.g. `$`), you can use quoting (need to find a documentation page for this) to make sure you can use the string as input. Example: `-x ${par_bam@Q}`. +* Optional arguments can be passed to the command conditionally using Bash [parameter expansion](https://www.gnu.org/software/bash/manual/html_node/Shell-Parameter-Expansion.html). For example: `${par_known_fusions:+-k ${par_known_fusions@Q}}` + +* If your tool allows for multiple inputs using a separator other than `;` (which is the default Viash multiple separator), you can substitute these values with a command like: `par_disable_filters=$(echo $par_disable_filters | tr ';' ',')`. + + +### Step 12: Create test script If the unit test requires test resources, these should be provided in the `test_resources` section of the component. ```yaml -functionality: - # ... - test_resources: - - type: bash_script - path: test.sh - - type: file - path: test_data +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data ``` Create a test script at `src/xxx/test.sh` that runs the component with the test data. This script should run the component (available with `$meta_executable`) with the test data and check if the output is as expected. The script should exit with a non-zero exit code if the output is not as expected. For example: @@ -325,48 +338,64 @@ Create a test script at `src/xxx/test.sh` that runs the component with the test ```bash #!/bin/bash +set -e + ## VIASH START ## VIASH END -echo "> Run xxx with test data" +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } +} +assert_file_empty() { + [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_file_not_contains() { + grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } +} +assert_file_contains_regex() { + grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_file_not_contains_regex() { + grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } +} +############################################# + +echo "> Run $meta_name with test data" "$meta_executable" \ - --input "$meta_resources_dir/test_data/input.txt" \ + --input "$meta_resources_dir/test_data/reads_R1.fastq" \ --output "output.txt" \ --option -echo ">> Checking output" -[ ! -f "output.txt" ] && echo "Output file output.txt does not exist" && exit 1 -``` +echo ">> Check if output exists" +assert_file_exists "output.txt" +echo ">> Check if output is empty" +assert_file_not_empty "output.txt" -For example, this is what the test script for the `arriba` component looks like: +echo ">> Check if output is correct" +assert_file_contains "output.txt" "some expected output" -```bash -#!/bin/bash +echo "> All tests succeeded!" +``` -## VIASH START -## VIASH END +Notes: -echo "> Run arriba with blacklist" -"$meta_executable" \ - --bam "$meta_resources_dir/test_data/A.bam" \ - --genome "$meta_resources_dir/test_data/genome.fasta" \ - --gene_annotation "$meta_resources_dir/test_data/annotation.gtf" \ - --blacklist "$meta_resources_dir/test_data/blacklist.tsv" \ - --fusions "fusions.tsv" \ - --fusions_discarded "fusions_discarded.tsv" \ - --interesting_contigs "1,2" - -echo ">> Checking output" -[ ! -f "fusions.tsv" ] && echo "Output file fusions.tsv does not exist" && exit 1 -[ ! -f "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv does not exist" && exit 1 +* Do always check the contents of the output file. If the output is not deterministic, you can use regular expressions to check the output. -echo ">> Check if output is empty" -[ ! -s "fusions.tsv" ] && echo "Output file fusions.tsv is empty" && exit 1 -[ ! -s "fusions_discarded.tsv" ] && echo "Output file fusions_discarded.tsv is empty" && exit 1 -``` +* If possible, generate your own test data instead of copying it from an external resource. -### Step 12: Create a `/var/software_versions.txt` file +### Step 13: Create a `/var/software_versions.txt` file For the sake of transparency and reproducibility, we require that the versions of the software used in the component are documented. @@ -378,6 +407,8 @@ engines: image: quay.io/biocontainers/xxx:0.1.0--py_0 setup: - type: docker + # note: /var/software_versions.txt should contain: + # arriba: "2.4.0" run: | echo "xxx: \"0.1.0\"" > /var/software_versions.txt ``` diff --git a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh index 3637160a..845c1739 100644 --- a/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh +++ b/src/bd_rhapsody/bd_rhapsody_make_reference/test.sh @@ -11,21 +11,18 @@ assert_file_doesnt_exist() { [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } } assert_file_empty() { - # () will execute in a shubshell, could you use {;}? [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } } assert_file_not_empty() { - # [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } } assert_file_contains() { - # grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } } assert_file_not_contains() { - # grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } } +############################################# in_fa="$meta_resources_dir/test_data/reference_small.fa" in_gtf="$meta_resources_dir/test_data/reference_small.gtf" diff --git a/src/cutadapt/test.sh b/src/cutadapt/test.sh index 1d6d9c18..28248742 100644 --- a/src/cutadapt/test.sh +++ b/src/cutadapt/test.sh @@ -6,25 +6,25 @@ set -eo pipefail ############################################# # helper functions assert_file_exists() { - [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1) + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } } assert_file_doesnt_exist() { - [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1) + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } } assert_file_empty() { - [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1) + [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } } assert_file_not_empty() { - [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } } assert_file_contains() { - grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } } assert_file_not_contains() { - grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) + grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } } - ############################################# + mkdir test_multiple_output cd test_multiple_output diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh index a15ea599..bd78094d 100644 --- a/src/star/star_align_reads/test.sh +++ b/src/star/star_align_reads/test.sh @@ -7,35 +7,34 @@ meta_executable="target/docker/star/star_align_reads/star_align_reads" meta_resources_dir="src/star/star_align_reads" ## VIASH END -######################################################################################### - +############################################# # helper functions assert_file_exists() { - [ -f "$1" ] || (echo "File '$1' does not exist" && exit 1) + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } } assert_file_doesnt_exist() { - [ ! -f "$1" ] || (echo "File '$1' exists but shouldn't" && exit 1) + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } } assert_file_empty() { - [ ! -s "$1" ] || (echo "File '$1' is not empty but should be" && exit 1) + [ ! -s "$1" ] || { echo "File '$1' is not empty but should be" && exit 1; } } assert_file_not_empty() { - [ -s "$1" ] || (echo "File '$1' is empty but shouldn't be" && exit 1) + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } } assert_file_contains() { - grep -q "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } } assert_file_not_contains() { - grep -q "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) + grep -q "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } } assert_file_contains_regex() { - grep -q -E "$2" "$1" || (echo "File '$1' does not contain '$2'" && exit 1) + grep -q -E "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } } assert_file_not_contains_regex() { - grep -q -E "$2" "$1" && (echo "File '$1' contains '$2' but shouldn't" && exit 1) + grep -q -E "$2" "$1" && { echo "File '$1' contains '$2' but shouldn't" && exit 1; } } +############################################# -######################################################################################### echo "> Prepare test data" cat > reads_R1.fastq <<'EOF' From 13c5439a0c36f8a1bd3889e68d68ca85672daa62 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 17 Jul 2024 18:15:08 +0200 Subject: [PATCH 06/25] Add agat convertspgff2gtf (#76) * Fill in the metadata * add help.txt * add test data * update help.txt * add arguments for input file, output file and other arguments * add a Docker engine * Write a runner script * correct --gtf_version choices * update description * update keywords * Create test script * Create a /var/software_versions.txt file * remove duplicated argument * update config * change name to agat_convert_sp_gff2gtf * update license * replace module name by $meta_name in test.sh * Add more info to --gtf_version description * remove extra \ * add additional test: check if the D column in the first line of the GFF was correctly converted into GTF format * update changelog * Markdown: add newline before listing * add test to check if the header contains the right GTF version * cleanup * fix formatting --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 3 + .../agat_convert_sp_gff2gtf/config.vsh.yaml | 90 ++++++++++++++++ src/agat/agat_convert_sp_gff2gtf/help.txt | 102 ++++++++++++++++++ src/agat/agat_convert_sp_gff2gtf/script.sh | 10 ++ src/agat/agat_convert_sp_gff2gtf/test.sh | 37 +++++++ .../test_data/0_test.gff | 36 +++++++ .../test_data/script.sh | 9 ++ 7 files changed, 287 insertions(+) create mode 100644 src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml create mode 100644 src/agat/agat_convert_sp_gff2gtf/help.txt create mode 100644 src/agat/agat_convert_sp_gff2gtf/script.sh create mode 100644 src/agat/agat_convert_sp_gff2gtf/test.sh create mode 100644 src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff create mode 100755 src/agat/agat_convert_sp_gff2gtf/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aad0cb8..8f56b22e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -100,6 +100,9 @@ - `bedtools_getfasta`: extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file (PR #59). +* `agat`: + - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + ## MINOR CHANGES * Uniformize component metadata (PR #23). diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml new file mode 100644 index 00000000..b788c7c7 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml @@ -0,0 +1,90 @@ +name: agat_convert_sp_gff2gtf +namespace: agat +description: | + The script aims to convert any GTF/GFF file into a proper GTF file. Full + information about the format can be found here: + https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7 + different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the + version selected the script will filter out the features that are not + accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene + pseudogene) will be converted into gene feature and every level2 feature + (e.g mRNA ncRNA) will be converted into transcript feature. Using the + "relax" option you will produce a GTF-like output keeping all original + feature types (3rd column). No modification will occur e.g. mRNA to + transcript. + + To be fully GTF compliant all feature have a gene_id and a transcript_id + attribute. The gene_id is unique identifier for the genomic source of + the transcript, which is used to group transcripts into genes. The + transcript_id is a unique identifier for the predicted transcript, which + is used to group features into transcripts. +keywords: [gene annotations, GTF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/ + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-i] + description: Input GFF/GTF file that will be read + type: file + required: true + direction: input + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile, --gtf] + description: Output GTF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: true + example: output.gtf + - name: Arguments + arguments: + - name: --gtf_version + description: | + Version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). Default value from AGAT config file (relax for the default config). The script option has the higher priority. + + * relax: all feature types are accepted. + * GTF3 (9 feature types accepted): gene, transcript, exon, CDS, Selenocysteine, start_codon, stop_codon, three_prime_utr and five_prime_utr. + * GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, UTR, start_codon, stop_codon, Selenocysteine. + * GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon, 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon. + * GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, exon, 5UTR, 3UTR. + * GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, exon. + * GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, exon, intron. + type: string + choices: [relax, "1", "2", "2.1", "2.2", "2.5", "3"] + required: false + example: "3" + - name: --config + alternatives: [-c] + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gff2gtf/help.txt b/src/agat/agat_convert_sp_gff2gtf/help.txt new file mode 100644 index 00000000..fdd45507 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/help.txt @@ -0,0 +1,102 @@ +```sh +agat_convert_sp_gff2gtf.pl --help +``` + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_sp_gff2gtf.pl + +Description: + The script aims to convert any GTF/GFF file into a proper GTF file. Full + information about the format can be found here: + https://agat.readthedocs.io/en/latest/gxf.html You can choose among 7 + different GTF types (1, 2, 2.1, 2.2, 2.5, 3 or relax). Depending the + version selected the script will filter out the features that are not + accepted. For GTF2.5 and 3, every level1 feature (e.g nc_gene + pseudogene) will be converted into gene feature and every level2 feature + (e.g mRNA ncRNA) will be converted into transcript feature. Using the + "relax" option you will produce a GTF-like output keeping all original + feature types (3rd column). No modification will occur e.g. mRNA to + transcript. + + To be fully GTF compliant all feature have a gene_id and a transcript_id + attribute. The gene_id is unique identifier for the genomic source of + the transcript, which is used to group transcripts into genes. The + transcript_id is a unique identifier for the predicted transcript, which + is used to group features into transcripts. + +Usage: + agat_convert_sp_gff2gtf.pl --gff infile.gff [ -o outfile ] + agat_convert_sp_gff2gtf -h + +Options: + --gff, --gtf or -i + Input GFF/GTF file that will be read + + --gtf_version version of the GTF output (1,2,2.1,2.2,2.5,3 or relax). + Default value from AGAT config file (relax for the default config). The + script option has the higher priority. + relax: all feature types are accepted. + + GTF3 (9 feature types accepted): gene, transcript, exon, CDS, + Selenocysteine, start_codon, stop_codon, three_prime_utr and + five_prime_utr + + GTF2.5 (8 feature types accepted): gene, transcript, exon, CDS, + UTR, start_codon, stop_codon, Selenocysteine + + GTF2.2 (9 feature types accepted): CDS, start_codon, stop_codon, + 5UTR, 3UTR, inter, inter_CNS, intron_CNS and exon + + GTF2.1 (6 feature types accepted): CDS, start_codon, stop_codon, + exon, 5UTR, 3UTR + + GTF2 (4 feature types accepted): CDS, start_codon, stop_codon, + exon + + GTF1 (5 feature types accepted): CDS, start_codon, stop_codon, + exon, intron + + -o , --output , --out , --outfile or --gtf + Output GTF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md + diff --git a/src/agat/agat_convert_sp_gff2gtf/script.sh b/src/agat/agat_convert_sp_gff2gtf/script.sh new file mode 100644 index 00000000..69d66739 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +agat_convert_sp_gff2gtf.pl \ + -i "$par_gff" \ + -o "$par_output" \ + ${par_gtf_version:+--gtf_version "${par_gtf_version}"} \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_convert_sp_gff2gtf/test.sh b/src/agat/agat_convert_sp_gff2gtf/test.sh new file mode 100644 index 00000000..1e7cc142 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/test.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/0_test.gff" \ + --output "output.gtf" + +echo ">> Checking output" +[ ! -f "output.gtf" ] && echo "Output file output.gtf does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "output.gtf" ] && echo "Output file output.gtf is empty" && exit 1 + +echo ">> Check if the conversion resulted in the right GTF format" +idGFF=$(head -n 2 "$test_dir/0_test.gff" | grep -o 'ID=[^;]*' | cut -d '=' -f 2-) +expectedGTF="gene_id \"$idGFF\"; ID \"$idGFF\";" +extractedGTF=$(head -n 3 "output.gtf" | grep -o 'gene_id "[^"]*"; ID "[^"]*";') +[ "$extractedGTF" != "$expectedGTF" ] && echo "Output file output.gtf does not have the right format" && exit 1 + +rm output.gtf + +echo "> Run $meta_name with test data and GTF version 2.5" +"$meta_executable" \ + --gff "$test_dir/0_test.gff" \ + --output "output.gtf" \ + --gtf_version "2.5" + +echo ">> Check if the output file header display the right GTF version" +grep -q "##gtf-version 2.5" "output.gtf" +[ $? -ne 0 ] && echo "Output file output.gtf header does not display the right GTF version" && exit 1 + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff new file mode 100644 index 00000000..fafe86ed --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/test_data/0_test.gff @@ -0,0 +1,36 @@ +##gff-version 3 +scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 +scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 +scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 +scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 +scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 +scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 +scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 +scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 +scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 +scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 +scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh new file mode 100755 index 00000000..e453e772 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2gtf/test_data/script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/gff_syntax/in/0_test.gff src/agat/agat_convert_sp_gff2gtf/test_data From e615d2abb92e56cfc1e1ace9baa308ce10656f9f Mon Sep 17 00:00:00 2001 From: Jakub Majercik <57993790+jakubmajercik@users.noreply.github.com> Date: Wed, 17 Jul 2024 19:44:21 +0200 Subject: [PATCH 07/25] Seqtk sample (#68) * tests added * tests extended * changelog entry added * reorganized seqtk namespace + added seqtk subseq config and script * added subseq help.txt * revert to seqtk sample only * remove subseq * updated tests, added tags * Update two_pass_mode Co-authored-by: Robrecht Cannoodt * author added to config --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 + src/_authors/jakub_majercik.yaml | 10 +++ src/seqtk/seqtk_sample/config.vsh.yaml | 57 ++++++++++++++ src/seqtk/seqtk_sample/help.txt | 9 +++ src/seqtk/seqtk_sample/script.sh | 11 +++ src/seqtk/seqtk_sample/test.sh | 104 +++++++++++++++++++++++++ src/seqtk/test_data/reads/a.1.fastq.gz | Bin 0 -> 100 bytes src/seqtk/test_data/reads/a.2.fastq.gz | Bin 0 -> 100 bytes src/seqtk/test_data/reads/a.fastq | 4 + src/seqtk/test_data/reads/a.fastq.gz | Bin 0 -> 44 bytes src/seqtk/test_data/reads/id.list | 1 + src/seqtk/test_data/script.sh | 9 +++ 12 files changed, 207 insertions(+) create mode 100644 src/_authors/jakub_majercik.yaml create mode 100644 src/seqtk/seqtk_sample/config.vsh.yaml create mode 100644 src/seqtk/seqtk_sample/help.txt create mode 100644 src/seqtk/seqtk_sample/script.sh create mode 100644 src/seqtk/seqtk_sample/test.sh create mode 100644 src/seqtk/test_data/reads/a.1.fastq.gz create mode 100644 src/seqtk/test_data/reads/a.2.fastq.gz create mode 100644 src/seqtk/test_data/reads/a.fastq create mode 100644 src/seqtk/test_data/reads/a.fastq.gz create mode 100644 src/seqtk/test_data/reads/id.list create mode 100755 src/seqtk/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f56b22e..f6a8676f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -93,6 +93,8 @@ * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43). +* `seqtk/seqtk_sample`: Sample sequences from FASTA/Q(.gz) files to FASTA/Q (PR #68). + * `umitools`: - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54). diff --git a/src/_authors/jakub_majercik.yaml b/src/_authors/jakub_majercik.yaml new file mode 100644 index 00000000..3b75fffe --- /dev/null +++ b/src/_authors/jakub_majercik.yaml @@ -0,0 +1,10 @@ +name: Jakub Majercik +info: + links: + email: jakub@data-intuitive.com + github: jakubmajercik + linkedin: jakubmajercik + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Bioinformatics Engineer \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/config.vsh.yaml b/src/seqtk/seqtk_sample/config.vsh.yaml new file mode 100644 index 00000000..0cd369e7 --- /dev/null +++ b/src/seqtk/seqtk_sample/config.vsh.yaml @@ -0,0 +1,57 @@ +name: seqtk_sample +namespace: seqtk +description: Subsamples sequences from FASTA/Q files. +keywords: [sample, FASTA, FASTQ] +links: + repository: https://github.com/lh3/seqtk/tree/v1.4 +license: MIT +authors: + - __merge__: /src/_authors/jakub_majercik.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + type: file + description: The input FASTA/Q file. + required: true + + - name: Outputs + arguments: + - name: --output + type: file + description: The output FASTA/Q file. + required: true + direction: output + + - name: Options + arguments: + - name: --seed + type: integer + description: Seed for random generator. + example: 42 + - name: --fraction_number + type: double + description: Fraction or number of sequences to sample. + required: true + example: 0.1 + - name: --two_pass_mode + type: boolean_true + description: Twice as slow but with much reduced memory + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: ../test_data + +engines: + - type: docker + image: quay.io/biocontainers/seqtk:1.4--he4a0461_2 +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/help.txt b/src/seqtk/seqtk_sample/help.txt new file mode 100644 index 00000000..49f8001b --- /dev/null +++ b/src/seqtk/seqtk_sample/help.txt @@ -0,0 +1,9 @@ +``` +seqtk_subseq +``` +Usage: seqtk subseq [options] | +Options: + -t TAB delimited output + -s strand aware + -l INT sequence line length [0] +Note: Use 'samtools faidx' if only a few regions are intended. \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/script.sh b/src/seqtk/seqtk_sample/script.sh new file mode 100644 index 00000000..01d981b3 --- /dev/null +++ b/src/seqtk/seqtk_sample/script.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +seqtk sample \ + ${par_two_pass_mode:+-2} \ + ${par_seed:+-s "$par_seed"} \ + "$par_input" \ + "$par_fraction_number" \ + > "$par_output" \ No newline at end of file diff --git a/src/seqtk/seqtk_sample/test.sh b/src/seqtk/seqtk_sample/test.sh new file mode 100644 index 00000000..cba5f613 --- /dev/null +++ b/src/seqtk/seqtk_sample/test.sh @@ -0,0 +1,104 @@ +#!/bin/bash + +set -e + +## VIASH START +meta_executable="target/executable/seqtk/seqtk_sample" +meta_resources_dir="src/seqtk" +## VIASH END + +######################################################################################### +mkdir seqtk_sample_se +cd seqtk_sample_se + +echo "> Run seqtk_sample on fastq SE" +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled.fastq" + +echo ">> Check if output exists" +if [ ! -f "sampled.fastq" ]; then + echo ">> sampled.fastq does not exist" + exit 1 +fi + +echo ">> Count number of samples" +num_samples=$(grep -c '^@' sampled.fastq) +if [ "$num_samples" -ne 3 ]; then + echo ">> sampled.fastq does not contain 3 samples" + exit 1 +fi + +######################################################################################### +cd .. +mkdir seqtk_sample_pe_number +cd seqtk_sample_pe_number + +echo ">> Run seqtk_sample on fastq.gz PE with number of reads" +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled_1.fastq" + +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \ + --seed 42 \ + --fraction_number 3 \ + --output "sampled_2.fastq" + +echo ">> Check if output exists" +if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then + echo ">> One or both output files do not exist" + exit 1 +fi + +echo ">> Compare reads" +# Extract headers +headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort) +headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort) + +# Compare headers +diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; } + +echo ">> Count number of samples" +num_headers=$(echo "$headers1" | wc -l) +if [ "$num_headers" -ne 3 ]; then + echo ">> sampled_1.fastq does not contain 3 headers" + exit 1 +fi + +######################################################################################### +cd .. +mkdir seqtk_sample_pe_fraction +cd seqtk_sample_pe_fraction + +echo ">> Run seqtk_sample on fastq.gz PE with fraction of reads" +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.1.fastq.gz" \ + --seed 42 \ + --fraction_number 0.5 \ + --output "sampled_1.fastq" + +"$meta_executable" \ + --input "$meta_resources_dir/test_data/reads/a.2.fastq.gz" \ + --seed 42 \ + --fraction_number 0.5 \ + --output "sampled_2.fastq" + +echo ">> Check if output exists" +if [ ! -f "sampled_1.fastq" ] || [ ! -f "sampled_2.fastq" ]; then + echo ">> One or both output files do not exist" + exit 1 +fi + +echo ">> Compare reads" +# Extract headers +headers1=$(grep '^@' sampled_1.fastq | sed -e's/ 1$//' | sort) +headers2=$(grep '^@' sampled_2.fastq | sed -e 's/ 2$//' | sort) + +# Compare headers +diff <(echo "$headers1") <(echo "$headers2") || { echo "Mismatch detected"; exit 1; } + diff --git a/src/seqtk/test_data/reads/a.1.fastq.gz b/src/seqtk/test_data/reads/a.1.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..97a72ce5d48317556a145f93c32c87f0e9e5500f GIT binary patch literal 100 zcmV-q0Gt0GiwFRnrn+7N10Bw(6~jOf1wpPTJlJGMx7b%C&OZykT2i1C{p;n6 zLRM|nP{^ij8VcF9T|*&&2 literal 0 HcmV?d00001 diff --git a/src/seqtk/test_data/reads/a.2.fastq.gz b/src/seqtk/test_data/reads/a.2.fastq.gz new file mode 100644 index 0000000000000000000000000000000000000000..038bc976ac32e8f26be16949bf5632c7090e635b GIT binary patch literal 100 zcmV-q0Gt0GiwFRnrn+7N10Bw*5yUVM1wrm8Zt)RG{ Date: Wed, 17 Jul 2024 23:23:51 +0200 Subject: [PATCH 08/25] switch to viash actions for ci (#86) * switch to viash actions for ci * add changelog entry * ci force --- .github/workflows/test.yaml | 6 ++++-- CHANGELOG.md | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2591978f..30f98b03 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -1,9 +1,11 @@ -name: Component Testing +name: Test components on: pull_request: push: + branches: + - main jobs: test: - uses: viash-hub/toolbox/.github/workflows/test.yaml@main \ No newline at end of file + uses: viash-io/viash-actions/.github/workflows/test.yaml@v6 \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index f6a8676f..c9f8b222 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ * `busco` components: update BUSCO to `5.7.1` (PR #72). +* Update CI to reusable workflow in `viash-io/viash-actions` (PR #86). + ## DOCUMENTATION * Extend the contributing guidelines (PR #82): From e8b82b5d968524f495e80afa8092098408d66d1d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Wed, 17 Jul 2024 23:25:07 +0200 Subject: [PATCH 09/25] fix authorship (#88) * fix authorship * add author * add missing newlines * update changelog * update changelog --- CHANGELOG.md | 2 ++ src/_authors/angela_o_pisco.yaml | 14 ++++++++++++++ src/_authors/dorien_roosen.yaml | 10 ++++++++++ src/_authors/dries_schaumont.yaml | 11 +++++++++++ src/_authors/emma_rousseau.yaml | 10 ++++++++++ src/_authors/jakub_majercik.yaml | 2 +- src/_authors/kai_waldrant.yaml | 14 ++++++++++++++ src/_authors/leila_paquay.yaml | 10 ++++++++++ src/_authors/robrecht_cannoodt.yaml | 2 +- src/_authors/sai_nirmayi_yasa.yaml | 10 ++++++++++ src/_authors/toni_verbeiren.yaml | 9 +++++++++ src/_authors/weiwei_schultz.yaml | 2 +- src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml | 4 ++++ src/arriba/config.vsh.yaml | 3 +++ src/bcl_convert/config.vsh.yaml | 11 +++++++++++ src/bedtools/bedtools_getfasta/config.vsh.yaml | 3 +++ src/busco/busco_download_datasets/config.vsh.yaml | 3 +++ src/busco/busco_list_datasets/config.vsh.yaml | 3 +++ src/busco/busco_run/config.vsh.yaml | 3 +++ src/cutadapt/config.vsh.yaml | 3 +++ src/falco/config.vsh.yaml | 3 +++ src/fastp/config.vsh.yaml | 3 +++ src/featurecounts/config.vsh.yaml | 4 +++- src/gffread/config.vsh.yaml | 3 +++ src/lofreq/call/config.vsh.yaml | 3 +++ src/lofreq/indelqual/config.vsh.yaml | 3 +++ src/multiqc/config.vsh.yaml | 4 +++- src/pear/config.vsh.yaml | 5 ++++- src/salmon/salmon_index/config.vsh.yaml | 4 +++- src/salmon/salmon_quant/config.vsh.yaml | 4 +++- src/samtools/samtools_collate/config.vsh.yaml | 4 +++- src/samtools/samtools_faidx/config.vsh.yaml | 4 +++- src/samtools/samtools_fasta/config.vsh.yaml | 4 +++- src/samtools/samtools_fastq/config.vsh.yaml | 4 +++- src/samtools/samtools_flagstat/config.vsh.yaml | 4 +++- src/samtools/samtools_idxstats/config.vsh.yaml | 4 +++- src/samtools/samtools_index/config.vsh.yaml | 4 +++- src/samtools/samtools_sort/config.vsh.yaml | 4 +++- src/samtools/samtools_stats/config.vsh.yaml | 4 +++- src/samtools/samtools_view/config.vsh.yaml | 4 +++- src/star/star_align_reads/config.vsh.yaml | 5 +++++ src/star/star_genome_generate/config.vsh.yaml | 4 +++- src/umi_tools/umi_tools_dedup/config.vsh.yaml | 4 +++- 43 files changed, 198 insertions(+), 20 deletions(-) create mode 100644 src/_authors/angela_o_pisco.yaml create mode 100644 src/_authors/dorien_roosen.yaml create mode 100644 src/_authors/dries_schaumont.yaml create mode 100644 src/_authors/emma_rousseau.yaml create mode 100644 src/_authors/kai_waldrant.yaml create mode 100644 src/_authors/leila_paquay.yaml create mode 100644 src/_authors/sai_nirmayi_yasa.yaml create mode 100644 src/_authors/toni_verbeiren.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index c9f8b222..4e6a0369 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,8 @@ - Mention that the contents of the output of components in tests should be checked. +* Add authorship to existing components (PR #88). + ## BUG FIXES * `pear`: fix component not exiting with the correct exitcode when PEAR fails (PR #70). diff --git a/src/_authors/angela_o_pisco.yaml b/src/_authors/angela_o_pisco.yaml new file mode 100644 index 00000000..1f0bf58f --- /dev/null +++ b/src/_authors/angela_o_pisco.yaml @@ -0,0 +1,14 @@ +name: Angela Oliveira Pisco +info: + role: Contributor + links: + github: aopisco + orcid: "0000-0003-0142-2355" + linkedin: aopisco + organizations: + - name: Insitro + href: https://insitro.com + role: Director of Computational Biology + - name: Open Problems + href: https://openproblems.bio + role: Core Member diff --git a/src/_authors/dorien_roosen.yaml b/src/_authors/dorien_roosen.yaml new file mode 100644 index 00000000..d67448d8 --- /dev/null +++ b/src/_authors/dorien_roosen.yaml @@ -0,0 +1,10 @@ +name: Dorien Roosen +info: + links: + email: dorien@data-intuitive.com + github: dorien-er + linkedin: dorien-roosen + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Data Scientist diff --git a/src/_authors/dries_schaumont.yaml b/src/_authors/dries_schaumont.yaml new file mode 100644 index 00000000..b2678081 --- /dev/null +++ b/src/_authors/dries_schaumont.yaml @@ -0,0 +1,11 @@ +name: Dries Schaumont +info: + links: + email: dries@data-intuitive.com + github: DriesSchaumont + orcid: "0000-0002-4389-0440" + linkedin: dries-schaumont + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Data Scientist diff --git a/src/_authors/emma_rousseau.yaml b/src/_authors/emma_rousseau.yaml new file mode 100644 index 00000000..1a9ac456 --- /dev/null +++ b/src/_authors/emma_rousseau.yaml @@ -0,0 +1,10 @@ +name: Emma Rousseau +info: + links: + email: emma@data-intuitive.com + github: emmarousseau + linkedin: emmarousseau1 + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Bioinformatician diff --git a/src/_authors/jakub_majercik.yaml b/src/_authors/jakub_majercik.yaml index 3b75fffe..c2a7867d 100644 --- a/src/_authors/jakub_majercik.yaml +++ b/src/_authors/jakub_majercik.yaml @@ -7,4 +7,4 @@ info: organizations: - name: Data Intuitive href: https://www.data-intuitive.com - role: Bioinformatics Engineer \ No newline at end of file + role: Bioinformatics Engineer diff --git a/src/_authors/kai_waldrant.yaml b/src/_authors/kai_waldrant.yaml new file mode 100644 index 00000000..a132c528 --- /dev/null +++ b/src/_authors/kai_waldrant.yaml @@ -0,0 +1,14 @@ +name: Kai Waldrant +info: + links: + email: kai@data-intuitive.com + github: KaiWaldrant + orcid: "0009-0003-8555-1361" + linkedin: kaiwaldrant + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Bioinformatician + - name: Open Problems + href: https://openproblems.bio + role: Contributor diff --git a/src/_authors/leila_paquay.yaml b/src/_authors/leila_paquay.yaml new file mode 100644 index 00000000..21aa532d --- /dev/null +++ b/src/_authors/leila_paquay.yaml @@ -0,0 +1,10 @@ +name: Leïla Paquay +info: + links: + email: leila@data-intuitive.com + github: Leila011 + linkedin: leilapaquay + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Software Developer diff --git a/src/_authors/robrecht_cannoodt.yaml b/src/_authors/robrecht_cannoodt.yaml index d7c0f283..c4c1bdec 100644 --- a/src/_authors/robrecht_cannoodt.yaml +++ b/src/_authors/robrecht_cannoodt.yaml @@ -11,4 +11,4 @@ info: role: Data Science Engineer - name: Open Problems href: https://openproblems.bio - role: Core Member \ No newline at end of file + role: Core Member diff --git a/src/_authors/sai_nirmayi_yasa.yaml b/src/_authors/sai_nirmayi_yasa.yaml new file mode 100644 index 00000000..9f560c58 --- /dev/null +++ b/src/_authors/sai_nirmayi_yasa.yaml @@ -0,0 +1,10 @@ +name: Sai Nirmayi Yasa +info: + links: + email: nirmayi@data-intuitive.com + github: sainirmayi + linkedin: sai-nirmayi-yasa + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Junior Bioinformatics Researcher diff --git a/src/_authors/toni_verbeiren.yaml b/src/_authors/toni_verbeiren.yaml new file mode 100644 index 00000000..2f2f851f --- /dev/null +++ b/src/_authors/toni_verbeiren.yaml @@ -0,0 +1,9 @@ +name: Toni Verbeiren +info: + links: + github: tverbeiren + linkedin: verbeiren + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Data Scientist and CEO diff --git a/src/_authors/weiwei_schultz.yaml b/src/_authors/weiwei_schultz.yaml index 324f9378..e4945078 100644 --- a/src/_authors/weiwei_schultz.yaml +++ b/src/_authors/weiwei_schultz.yaml @@ -2,4 +2,4 @@ name: Weiwei Schultz info: organizations: - name: Janssen R&D US - role: Associate Director Data Sciences \ No newline at end of file + role: Associate Director Data Sciences diff --git a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml index b788c7c7..757cbd85 100644 --- a/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml +++ b/src/agat/agat_convert_sp_gff2gtf/config.vsh.yaml @@ -27,6 +27,10 @@ links: references: doi: 10.5281/zenodo.3552717 license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + argument_groups: - name: Inputs arguments: diff --git a/src/arriba/config.vsh.yaml b/src/arriba/config.vsh.yaml index 8d72d7eb..db5960cf 100644 --- a/src/arriba/config.vsh.yaml +++ b/src/arriba/config.vsh.yaml @@ -11,6 +11,9 @@ license: MIT requirements: cpus: 1 commands: [ arriba ] +authors: + - __merge__: /src/_authors/robrecht_cannoodt.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/bcl_convert/config.vsh.yaml b/src/bcl_convert/config.vsh.yaml index 657fb1f0..81103776 100644 --- a/src/bcl_convert/config.vsh.yaml +++ b/src/bcl_convert/config.vsh.yaml @@ -4,6 +4,17 @@ description: | Information about upgrading from bcl2fastq via [Upgrading from bcl2fastq to BCL Convert](https://emea.support.illumina.com/bulletins/2020/10/upgrading-from-bcl2fastq-to-bcl-convert.html) and [BCL Convert Compatible Products](https://support.illumina.com/sequencing/sequencing_software/bcl-convert/compatibility.html) +keywords: [demultiplex, fastq, bcl, illumina] +links: + homepage: https://support.illumina.com/sequencing/sequencing_software/bcl-convert.html + documentation: https://support.illumina.com/downloads/bcl-convert-user-guide.html +license: Proprietary +authors: + - __merge__: /src/_authors/toni_verbeiren.yaml + roles: [ author, maintainer ] + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author ] + argument_groups: - name: Input arguments arguments: diff --git a/src/bedtools/bedtools_getfasta/config.vsh.yaml b/src/bedtools/bedtools_getfasta/config.vsh.yaml index f1f49a87..fe160b20 100644 --- a/src/bedtools/bedtools_getfasta/config.vsh.yaml +++ b/src/bedtools/bedtools_getfasta/config.vsh.yaml @@ -10,6 +10,9 @@ references: license: GPL-2.0 requirements: commands: [bedtools] +authors: + - __merge__: /src/_authors/dries_schaumont.yaml + roles: [ author, maintainer ] argument_groups: - name: Input arguments diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml index 5297af2e..cce3faa0 100644 --- a/src/busco/busco_download_datasets/config.vsh.yaml +++ b/src/busco/busco_download_datasets/config.vsh.yaml @@ -9,6 +9,9 @@ links: references: doi: 10.1007/978-1-4939-9173-0_14 license: MIT +authors: + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml index cac34cc6..93fd0559 100644 --- a/src/busco/busco_list_datasets/config.vsh.yaml +++ b/src/busco/busco_list_datasets/config.vsh.yaml @@ -9,6 +9,9 @@ links: references: doi: 10.1007/978-1-4939-9173-0_14 license: MIT +authors: + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author, maintainer ] argument_groups: - name: Outputs arguments: diff --git a/src/busco/busco_run/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml index 23ee95fb..435e9d2a 100644 --- a/src/busco/busco_run/config.vsh.yaml +++ b/src/busco/busco_run/config.vsh.yaml @@ -9,6 +9,9 @@ links: references: doi: 10.1007/978-1-4939-9173-0_14 license: MIT +authors: + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/cutadapt/config.vsh.yaml b/src/cutadapt/config.vsh.yaml index b315d0ce..7e36a8e0 100644 --- a/src/cutadapt/config.vsh.yaml +++ b/src/cutadapt/config.vsh.yaml @@ -9,6 +9,9 @@ links: references: doi: 10.14806/ej.17.1.200 license: MIT +authors: + - __merge__: /src/_authors/toni_verbeiren.yaml + roles: [ author, maintainer ] argument_groups: #################################################################### - name: Specify Adapters for R1 diff --git a/src/falco/config.vsh.yaml b/src/falco/config.vsh.yaml index 4d9cf656..de9906ef 100644 --- a/src/falco/config.vsh.yaml +++ b/src/falco/config.vsh.yaml @@ -9,6 +9,9 @@ references: license: GPL-3.0 requirements: commands: [falco] +authors: + - __merge__: /src/_authors/toni_verbeiren.yaml + roles: [ author, maintainer ] # Notes: # - falco as arguments similar to -subsample and we update those to --subsample diff --git a/src/fastp/config.vsh.yaml b/src/fastp/config.vsh.yaml index b7d9062a..f1f8f1ed 100644 --- a/src/fastp/config.vsh.yaml +++ b/src/fastp/config.vsh.yaml @@ -26,6 +26,9 @@ links: references: doi: "10.1093/bioinformatics/bty560" license: MIT +authors: + - __merge__: /src/_authors/robrecht_cannoodt.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs description: | diff --git a/src/featurecounts/config.vsh.yaml b/src/featurecounts/config.vsh.yaml index 8697b1fe..e17d9ac0 100644 --- a/src/featurecounts/config.vsh.yaml +++ b/src/featurecounts/config.vsh.yaml @@ -11,7 +11,9 @@ references: license: GPL-3.0 requirements: commands: [ featureCounts ] - +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/gffread/config.vsh.yaml b/src/gffread/config.vsh.yaml index 7477a284..bd985ffb 100644 --- a/src/gffread/config.vsh.yaml +++ b/src/gffread/config.vsh.yaml @@ -8,6 +8,9 @@ links: references: doi: 10.12688/f1000research.23297.2 license: MIT +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/lofreq/call/config.vsh.yaml b/src/lofreq/call/config.vsh.yaml index c547de9d..286a040a 100644 --- a/src/lofreq/call/config.vsh.yaml +++ b/src/lofreq/call/config.vsh.yaml @@ -17,6 +17,9 @@ references: license: "MIT" requirements: commands: [ lofreq ] +authors: + - __merge__: /src/_authors/kai_waldrant.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/lofreq/indelqual/config.vsh.yaml b/src/lofreq/indelqual/config.vsh.yaml index 0524458e..29696c81 100644 --- a/src/lofreq/indelqual/config.vsh.yaml +++ b/src/lofreq/indelqual/config.vsh.yaml @@ -18,6 +18,9 @@ references: license: "MIT" requirements: commands: [ lofreq ] +authors: + - __merge__: /src/_authors/kai_waldrant.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/multiqc/config.vsh.yaml b/src/multiqc/config.vsh.yaml index df5e38e1..ba305025 100644 --- a/src/multiqc/config.vsh.yaml +++ b/src/multiqc/config.vsh.yaml @@ -11,7 +11,9 @@ info: references: doi: 10.1093/bioinformatics/btw354 licence: GPL v3 or later - +authors: + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author, maintainer ] argument_groups: - name: "Input" arguments: diff --git a/src/pear/config.vsh.yaml b/src/pear/config.vsh.yaml index d6dbe6c9..acae10cc 100644 --- a/src/pear/config.vsh.yaml +++ b/src/pear/config.vsh.yaml @@ -12,7 +12,10 @@ references: doi: 10.1093/bioinformatics/btt593 license: "CC-BY-NC-SA-3.0" requirements: - commands: [ pear , gzip ] + commands: [ pear, gzip ] +authors: + - __merge__: /src/_authors/kai_waldrant.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/salmon/salmon_index/config.vsh.yaml b/src/salmon/salmon_index/config.vsh.yaml index 41c1e05b..925c3000 100644 --- a/src/salmon/salmon_index/config.vsh.yaml +++ b/src/salmon/salmon_index/config.vsh.yaml @@ -12,7 +12,9 @@ references: license: GPL-3.0 requirements: commands: [ salmon ] - +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/salmon/salmon_quant/config.vsh.yaml b/src/salmon/salmon_quant/config.vsh.yaml index b7e303f4..1f96f0c9 100644 --- a/src/salmon/salmon_quant/config.vsh.yaml +++ b/src/salmon/salmon_quant/config.vsh.yaml @@ -12,7 +12,9 @@ references: license: GPL-3.0 requirements: commands: [ salmon ] - +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] argument_groups: - name: Common input options arguments: diff --git a/src/samtools/samtools_collate/config.vsh.yaml b/src/samtools/samtools_collate/config.vsh.yaml index 669f4cdf..84a3195c 100644 --- a/src/samtools/samtools_collate/config.vsh.yaml +++ b/src/samtools/samtools_collate/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_faidx/config.vsh.yaml b/src/samtools/samtools_faidx/config.vsh.yaml index c1c9325d..937b0804 100644 --- a/src/samtools/samtools_faidx/config.vsh.yaml +++ b/src/samtools/samtools_faidx/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_fasta/config.vsh.yaml b/src/samtools/samtools_fasta/config.vsh.yaml index 23517f6c..70ba72b9 100644 --- a/src/samtools/samtools_fasta/config.vsh.yaml +++ b/src/samtools/samtools_fasta/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_fastq/config.vsh.yaml b/src/samtools/samtools_fastq/config.vsh.yaml index cac7653b..09014ced 100644 --- a/src/samtools/samtools_fastq/config.vsh.yaml +++ b/src/samtools/samtools_fastq/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_flagstat/config.vsh.yaml b/src/samtools/samtools_flagstat/config.vsh.yaml index 9b4dfbe1..b30f1867 100644 --- a/src/samtools/samtools_flagstat/config.vsh.yaml +++ b/src/samtools/samtools_flagstat/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_idxstats/config.vsh.yaml b/src/samtools/samtools_idxstats/config.vsh.yaml index 30f21348..16e901d7 100644 --- a/src/samtools/samtools_idxstats/config.vsh.yaml +++ b/src/samtools/samtools_idxstats/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_index/config.vsh.yaml b/src/samtools/samtools_index/config.vsh.yaml index 8c59a20e..4220c691 100644 --- a/src/samtools/samtools_index/config.vsh.yaml +++ b/src/samtools/samtools_index/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_sort/config.vsh.yaml b/src/samtools/samtools_sort/config.vsh.yaml index a78800da..e0776c2d 100644 --- a/src/samtools/samtools_sort/config.vsh.yaml +++ b/src/samtools/samtools_sort/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_stats/config.vsh.yaml b/src/samtools/samtools_stats/config.vsh.yaml index ca630876..b115b4df 100644 --- a/src/samtools/samtools_stats/config.vsh.yaml +++ b/src/samtools/samtools_stats/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/samtools/samtools_view/config.vsh.yaml b/src/samtools/samtools_view/config.vsh.yaml index 206b87ac..86dde146 100644 --- a/src/samtools/samtools_view/config.vsh.yaml +++ b/src/samtools/samtools_view/config.vsh.yaml @@ -9,7 +9,9 @@ links: references: doi: [10.1093/bioinformatics/btp352, 10.1093/gigascience/giab008] license: MIT/Expat - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: diff --git a/src/star/star_align_reads/config.vsh.yaml b/src/star/star_align_reads/config.vsh.yaml index eab65b35..bdc956d3 100644 --- a/src/star/star_align_reads/config.vsh.yaml +++ b/src/star/star_align_reads/config.vsh.yaml @@ -11,6 +11,11 @@ references: license: MIT requirements: commands: [ STAR, python, ps, zcat, bzcat ] +authors: + - __merge__: /src/_authors/angela_o_pisco.yaml + roles: [ author ] + - __merge__: /src/_authors/robrecht_cannoodt.yaml + roles: [ author, maintainer ] # manually taking care of the main input and output arguments argument_groups: - name: Inputs diff --git a/src/star/star_genome_generate/config.vsh.yaml b/src/star/star_genome_generate/config.vsh.yaml index 3adaf7a2..60fa3839 100644 --- a/src/star/star_genome_generate/config.vsh.yaml +++ b/src/star/star_genome_generate/config.vsh.yaml @@ -11,7 +11,9 @@ references: license: MIT requirements: commands: [ STAR ] - +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] argument_groups: - name: "Input" arguments: diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml index a02e70a1..e6953e6e 100644 --- a/src/umi_tools/umi_tools_dedup/config.vsh.yaml +++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml @@ -10,7 +10,9 @@ links: references: doi: 10.1101/gr.209601.116 license: MIT - +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] argument_groups: - name: Inputs arguments: From 2f9c7f7b38c45e856eda9e35204c8f6befc32f84 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Thu, 18 Jul 2024 17:19:44 -0300 Subject: [PATCH 10/25] Seqtk subseq (#85) * Config file and help.txt file Created: - config.vsh.yaml - help.txt * Added script.sh File added: - script.sh * Created test.sh updates: - changes to config.vsh.yaml - created test.sh - created some test files Problems: - there is some error in config file that is preventing me from running the component and testing * Update on test.sh * update * Bug fixes - config required: false bug * Update test * Update CHANGELOG.md * Improvement on test.sh * Added more test I tried out different option of the command with different fasta and fastq files and different list, but the output does not seem to change. * Update on tests - got unstuck - I need to create a docker image with the lastest version of seqtk - * Bug fixed - removed some test files - fixed bug with the help of Toni - added correct software_versions.txt to config Still Needs: - add one more test to strand aware - fix tab test * Update CHANGELOG.md * Fixed Tabular test bug * Strand Aware Test - implementation of strand aware test - change of format for reg.bed file * Input validation for list file - input validation for name_list parameter * Sugested Changes - removed test_data dir - removed input validation * Added author info * Update CHANGELOG.md * Update theodoro_gasperin.yaml * add newline * add newline * Update src/seqtk/seqtk_subseq/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Version Fix * Update on config * Helper bed.sh * Deleted _helpers * don't forget exit when a test fails --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 30 ++-- src/_authors/theodoro_gasperin.yaml | 10 ++ src/seqtk/seqtk_subseq/config.vsh.yaml | 78 +++++++++++ src/seqtk/seqtk_subseq/help.txt | 9 ++ src/seqtk/seqtk_subseq/script.sh | 15 ++ src/seqtk/seqtk_subseq/test.sh | 182 +++++++++++++++++++++++++ 6 files changed, 305 insertions(+), 19 deletions(-) create mode 100644 src/_authors/theodoro_gasperin.yaml create mode 100644 src/seqtk/seqtk_subseq/config.vsh.yaml create mode 100644 src/seqtk/seqtk_subseq/help.txt create mode 100644 src/seqtk/seqtk_subseq/script.sh create mode 100644 src/seqtk/seqtk_subseq/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e6a0369..d6256e72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,16 @@ ## NEW FEATURES -* `bd_rhapsody`: +* `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75). - - `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75). +* `umitools/umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54). + +* `seqtk`: + - `seqtk/seqtk_sample`: Subsamples sequences from FASTA/Q files (PR #68). + - `seqtk/seqtk_subseq`: Extract the sequences (complete or subsequence) from the FASTA/FASTQ files + based on a provided sequence IDs or region coordinates file (PR #85). + +* `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). ## MINOR CHANGES @@ -38,14 +45,8 @@ * `multiqc`: update multiple separator to `;` (PR #81). -# biobox 0.1.0 - -## BREAKING CHANGES - -* Change default `multiple_sep` to `;` (PR #25). This aligns with an upcoming breaking change in - Viash 0.9.0 in order to avoid issues with the current default separator `:` unintentionally - splitting up certain file paths. +# biobox 0.1.0 ## NEW FEATURES @@ -94,21 +95,12 @@ - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTQ (PR #52). - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTA (PR #53). - * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43). -* `seqtk/seqtk_sample`: Sample sequences from FASTA/Q(.gz) files to FASTA/Q (PR #68). - -* `umitools`: - - `umitools_dedup`: Deduplicate reads based on the mapping co-ordinate and the UMI attached to the read (PR #54). - * `bedtools`: - `bedtools_getfasta`: extract sequences from a FASTA file for each of the intervals defined in a BED/GFF/VCF file (PR #59). -* `agat`: - - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - ## MINOR CHANGES * Uniformize component metadata (PR #23). @@ -129,4 +121,4 @@ * Add escaping character before leading hashtag in the description field of the config file (PR #50). -* Format URL in biobase/bcl_convert description (PR #55). \ No newline at end of file +* Format URL in biobase/bcl_convert description (PR #55). diff --git a/src/_authors/theodoro_gasperin.yaml b/src/_authors/theodoro_gasperin.yaml new file mode 100644 index 00000000..47af96a9 --- /dev/null +++ b/src/_authors/theodoro_gasperin.yaml @@ -0,0 +1,10 @@ +name: Theodoro Gasperin Terra Camargo +info: + links: + email: theodorogtc@gmail.com + github: tgaspe + linkedin: theodoro-gasperin-terra-camargo + organizations: + - name: Data Intuitive + href: https://www.data-intuitive.com + role: Bioinformatician diff --git a/src/seqtk/seqtk_subseq/config.vsh.yaml b/src/seqtk/seqtk_subseq/config.vsh.yaml new file mode 100644 index 00000000..1c2e8c08 --- /dev/null +++ b/src/seqtk/seqtk_subseq/config.vsh.yaml @@ -0,0 +1,78 @@ +name: seqtk_subseq +namespace: seqtk +description: | + Extract subsequences from FASTA/Q files. Takes as input a FASTA/Q file and a name.lst (sequence ids file) or a reg.bed (genomic regions file). +keywords: [subseq, FASTA, FASTQ] +links: + repository: https://github.com/lh3/seqtk/tree/v1.4 +license: MIT +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: "--input" + type: file + direction: input + description: The input FASTA/Q file. + required: true + example: input.fa + + - name: "--name_list" + type: file + direction: input + description: | + List of sequence names (name.lst) or genomic regions (reg.bed) to extract. + required: true + example: list.lst + + - name: Outputs + arguments: + - name: "--output" + alternatives: -o + type: file + direction: output + description: The output FASTA/Q file. + required: true + default: output.fa + + - name: Options + arguments: + - name: "--tab" + alternatives: -t + type: boolean_true + description: TAB delimited output. + + - name: "--strand_aware" + alternatives: -s + type: boolean_true + description: Strand aware. + + - name: "--sequence_line_length" + alternatives: -l + type: integer + description: | + Sequence line length of input fasta file. Default: 0. + example: 0 + + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: quay.io/biocontainers/seqtk:1.4--he4a0461_2 + setup: + - type: docker + run: | + echo $(echo $(seqtk 2>&1) | sed -n 's/.*\(Version: [^ ]*\).*/\1/p') > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow diff --git a/src/seqtk/seqtk_subseq/help.txt b/src/seqtk/seqtk_subseq/help.txt new file mode 100644 index 00000000..5768e4ff --- /dev/null +++ b/src/seqtk/seqtk_subseq/help.txt @@ -0,0 +1,9 @@ +```bash +seqtk subseq +``` +Usage: seqtk subseq [options] | +Options: + -t TAB delimited output + -s strand aware + -l INT sequence line length [0] +Note: Use 'samtools faidx' if only a few regions are intended. \ No newline at end of file diff --git a/src/seqtk/seqtk_subseq/script.sh b/src/seqtk/seqtk_subseq/script.sh new file mode 100644 index 00000000..0aceaf29 --- /dev/null +++ b/src/seqtk/seqtk_subseq/script.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +[[ "$par_tab" == "false" ]] && unset par_tab +[[ "$par_strand_aware" == "false" ]] && unset par_strand_aware + +seqtk subseq \ + ${par_tab:+-t} \ + ${par_strand_aware:+-s} \ + ${par_sequence_line_length:+-l "$par_sequence_line_length"} \ + "$par_input" \ + "$par_name_list" \ + > "$par_output" diff --git a/src/seqtk/seqtk_subseq/test.sh b/src/seqtk/seqtk_subseq/test.sh new file mode 100644 index 00000000..f19cfa4a --- /dev/null +++ b/src/seqtk/seqtk_subseq/test.sh @@ -0,0 +1,182 @@ +#!/bin/bash + +# exit on error +set -e + +## VIASH START +meta_executable="target/executable/seqtk/seqtk_subseq" +meta_resources_dir="src/seqtk" +## VIASH END + +# Create directories for tests +echo "Creating Test Data..." +mkdir test_data + +# Create and populate input.fasta +cat > "test_data/input.fasta" <KU562861.1 +GGAGCAGGAGAGTGTTCGAGTTCAGAGATGTCCATGGCGCCGTACGAGAAGGTGATGGATGACCTGGCCA +AGGGGCAGCAGTTCGCGACGCAGCTGCAGGGCCTCCTCCGGGACTCCCCCAAGGCCGGCCACATCATGGA +>GU056837.1 +CTAATTTTATTTTTTTATAATAATTATTGGAGGAACTAAAACATTAATGAAATAATAATTATCATAATTA +TTAATTACATATTTATTAGGTATAATATTTAAGGAAAAATATATTTTATGTTAATTGTAATAATTAGAAC +>CP097510.1 +CGATTTAGATCGGTGTAGTCAACACACATCCTCCACTTCCATTAGGCTTCTTGACGAGGACTACATTGAC +AGCCACCGAGGGAACCGACCTCCTCAATGAAGTCAGACGCCAAGAGCCTATCAACTTCCTTCTGCACAGC +>JAMFTS010000002.1 +CCTAAACCCTAAACCCTAAACCCCCTACAAACCTTACCCTAAACCCTAAACCCTAAACCCTAAACCCTAA +ACCCGAAACCCTATACCCTAAACCCTAAACCCTAAACCCTAAACCCTAACCCAAACCTAATCCCTAAACC +>MH150936.1 +TAGAAGCTAATGAAAACTTTTCCTTTACTAAAAACCGTCAAACACGGTAAGAAACGCTTTTAATCATTTC +AAAAGCAATCCCAATAGTGGTTACATCCAAACAAAACCCATTTCTTATATTTTCTCAAAAACAGTGAGAG +EOL + +# Update id.list with new entries +cat > "test_data/id.list" < "test_data/reg.bed" < Run seqtk_subseq on FASTA/Q file" +"$meta_executable" \ + --input "../test_data/input.fasta" \ + --name_list "../test_data/id.list" \ + --output "sub_sample.fq" + +expected_output_basic=">KU562861.1 +GGAGCAGGAGAGTGTTCGAGTTCAGAGATGTCCATGGCGCCGTACGAGAAGGTGATGGATGACCTGGCCAAGGGGCAGCAGTTCGCGACGCAGCTGCAGGGCCTCCTCCGGGACTCCCCCAAGGCCGGCCACATCATGGA +>MH150936.1 +TAGAAGCTAATGAAAACTTTTCCTTTACTAAAAACCGTCAAACACGGTAAGAAACGCTTTTAATCATTTCAAAAGCAATCCCAATAGTGGTTACATCCAAACAAAACCCATTTCTTATATTTTCTCAAAAACAGTGAGAG" +output_basic=$(cat sub_sample.fq) + +if [ "$output_basic" != "$expected_output_basic" ]; then + echo "Test failed" + echo "Expected:" + echo "$expected_output_basic" + echo "Got:" + echo "$output_basic" + exit 1 +fi + +######################################################################################### +# Run reg.bed as name list input test +cd .. +mkdir test2 +cd test2 + +echo "> Run seqtk_subseq on FASTA/Q file with BED file as name list" +"$meta_executable" \ + --input "../test_data/input.fasta" \ + --name_list "../test_data/reg.bed" \ + --output "sub_sample.fq" + +expected_output_basic=">KU562861.1:11-20 +AGTGTTCGAG +>MH150936.1:11-20 +TGAAAACTTT" +output_basic=$(cat sub_sample.fq) + +if [ "$output_basic" != "$expected_output_basic" ]; then + echo "Test failed" + echo "Expected:" + echo "$expected_output_basic" + echo "Got:" + echo "$output_basic" + exit 1 +fi + +######################################################################################### +# Run tab option output test +cd .. +mkdir test3 +cd test3 + +echo "> Run seqtk_subseq with TAB option" +"$meta_executable" \ + --tab \ + --input "../test_data/input.fasta" \ + --name_list "../test_data/reg.bed" \ + --output "sub_sample.fq" + +expected_output_tabular=$'KU562861.1\t11\tAGTGTTCGAG\nMH150936.1\t11\tTGAAAACTTT' +output_tabular=$(cat sub_sample.fq) + +if [ "$output_tabular" != "$expected_output_tabular" ]; then + echo "Test failed" + echo "Expected:" + echo "$expected_output_tabular" + echo "Got:" + echo "$output_tabular" + exit 1 +fi + +######################################################################################### +# Run line option output test +cd .. +mkdir test4 +cd test4 + +echo "> Run seqtk_subseq with line length option" +"$meta_executable" \ + --sequence_line_length 5 \ + --input "../test_data/input.fasta" \ + --name_list "../test_data/reg.bed" \ + --output "sub_sample.fq" + +expected_output_wrapped=">KU562861.1:11-20 +AGTGT +TCGAG +>MH150936.1:11-20 +TGAAA +ACTTT" +output_wrapped=$(cat sub_sample.fq) + +if [ "$output_wrapped" != "$expected_output_wrapped" ]; then + echo "Test failed" + echo "Expected:" + echo "$expected_output_wrapped" + echo "Got:" + echo "$output_wrapped" + exit 1 +fi + +######################################################################################### +# Run Strand Aware option output test +cd .. +mkdir test5 +cd test5 + +echo "> Run seqtk_subseq with strand aware option" +"$meta_executable" \ + --strand_aware \ + --input "../test_data/input.fasta" \ + --name_list "../test_data/reg.bed" \ + --output "sub_sample.fq" + +expected_output_wrapped=">KU562861.1:11-20 +AGTGTTCGAG +>MH150936.1:11-20 +AAAGTTTTCA" +output_wrapped=$(cat sub_sample.fq) + +if [ "$output_wrapped" != "$expected_output_wrapped" ]; then + echo "Test failed" + echo "Expected:" + echo "$expected_output_wrapped" + echo "Got:" + echo "$output_wrapped" + exit 1 +fi + +echo "All tests succeeded!" From 3566232d39446d6ac19db154e6046e8b000f51af Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Wed, 24 Jul 2024 17:46:02 +0200 Subject: [PATCH 11/25] UMI-tools extract (#71) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * config and help files * tests template * script, preliminary version of tests * new test data * include arguments and script from the rnaseq.vsh version * new version of the script * Fixed bugs, component functional * Small formatting changes, update chagelog * Argument formatting in config * md formatting and ordering changes * remove second link to doc * Update src/umi_tools/umi_tools_extract/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Reduce the size of the test data and add back missing arguments to config * simplify argument names * Update src/umi_tools/umi_tools_extract/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * more consistent arg names, remove "paired" argument and adapt script --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 3 + .../umi_tools_extract/config.vsh.yaml | 197 ++++++++++++++++++ src/umi_tools/umi_tools_extract/help.txt | 106 ++++++++++ src/umi_tools/umi_tools_extract/script.sh | 88 ++++++++ src/umi_tools/umi_tools_extract/test.sh | 86 ++++++++ .../test_data/scrb_seq_fastq.1_30 | 120 +++++++++++ .../test_data/scrb_seq_fastq.1_30.extract | 120 +++++++++++ .../test_data/scrb_seq_fastq.2_30 | 120 +++++++++++ .../test_data/scrb_seq_fastq.2_30.extract | 120 +++++++++++ .../umi_tools_extract/test_data/script.sh | 34 +++ .../test_data/slim_30.extract | 120 +++++++++++ .../umi_tools_extract/test_data/slim_30.fastq | 120 +++++++++++ 12 files changed, 1234 insertions(+) create mode 100644 src/umi_tools/umi_tools_extract/config.vsh.yaml create mode 100644 src/umi_tools/umi_tools_extract/help.txt create mode 100644 src/umi_tools/umi_tools_extract/script.sh create mode 100644 src/umi_tools/umi_tools_extract/test.sh create mode 100644 src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.1_30 create mode 100644 src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.1_30.extract create mode 100644 src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.2_30 create mode 100644 src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.2_30.extract create mode 100755 src/umi_tools/umi_tools_extract/test_data/script.sh create mode 100644 src/umi_tools/umi_tools_extract/test_data/slim_30.extract create mode 100644 src/umi_tools/umi_tools_extract/test_data/slim_30.fastq diff --git a/CHANGELOG.md b/CHANGELOG.md index d6256e72..665b587d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,9 @@ - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTQ (PR #52). - `samtools/samtools_fastq`: Converts a SAM/BAM/CRAM file to FASTA (PR #53). +* `umi_tools`: + -`umi_tools/umi_tools_extract`: Flexible removal of UMI sequences from fastq reads (PR #71). + * `falco`: A C++ drop-in replacement of FastQC to assess the quality of sequence read data (PR #43). * `bedtools`: diff --git a/src/umi_tools/umi_tools_extract/config.vsh.yaml b/src/umi_tools/umi_tools_extract/config.vsh.yaml new file mode 100644 index 00000000..b93c8cb9 --- /dev/null +++ b/src/umi_tools/umi_tools_extract/config.vsh.yaml @@ -0,0 +1,197 @@ +name: umi_tools_extract +namespace: umi_tools +description: | + Flexible removal of UMI sequences from fastq reads. + UMIs are removed and appended to the read name. Any other barcode, for example a library barcode, + is left on the read. Can also filter reads by quality or against a whitelist. +keywords: [ extract, umi-tools, umi, fastq ] +links: + homepage: https://umi-tools.readthedocs.io/en/latest/ + documentation: https://umi-tools.readthedocs.io/en/latest/reference/extract.html + repository: https://github.com/CGATOxford/UMI-tools +references: + doi: 10.1101/gr.209601.116 +license: MIT + +argument_groups: + + - name: Input + arguments: + - name: --input + type: file + required: true + description: File containing the input data. + example: sample.fastq + - name: --read2_in + type: file + required: false + description: File containing the input data for the R2 reads (if paired). If provided, a need to be provided. + example: sample_R2.fastq + - name: --bc_pattern + alternatives: -p + type: string + description: | + The UMI barcode pattern to use e.g. 'NNNNNN' indicates that the first 6 nucleotides + of the read are from the UMI. + - name: --bc_pattern2 + type: string + description: The UMI barcode pattern to use for read 2. + + - name: "Output" + arguments: + - name: --output + type: file + required: true + description: Output file for read 1. + direction: output + - name: --read2_out + type: file + description: Output file for read 2. + direction: output + - name: --filtered_out + type: file + description: | + Write out reads not matching regex pattern or cell barcode whitelist to this file. + - name: --filtered_out2 + type: file + description: | + Write out read pairs not matching regex pattern or cell barcode whitelist to this file. + + - name: Extract Options + arguments: + - name: --extract_method + type: string + choices: [string, regex] + description: | + UMI pattern to use. Default: `string`. + example: "string" + - name: --error_correct_cell + type: boolean_true + description: Error correct cell barcodes to the whitelist. + - name: --whitelist + type: file + description: | + Whitelist of accepted cell barcodes tab-separated format, where column 1 is the whitelisted + cell barcodes and column 2 is the list (comma-separated) of other cell barcodes which should + be corrected to the barcode in column 1. If the --error_correct_cell option is not used, this + column will be ignored. + - name: --blacklist + type: file + description: BlackWhitelist of cell barcodes to discard. + - name: --subset_reads + type: integer + description: Only parse the first N reads. + - name: --quality_filter_threshold + type: integer + description: Remove reads where any UMI base quality score falls below this threshold. + - name: --quality_filter_mask + type: string + description: | + If a UMI base has a quality below this threshold, replace the base with 'N'. + - name: --quality_encoding + type: string + choices: [phred33, phred64, solexa] + description: | + Quality score encoding. Choose from: + * phred33 [33-77] + * phred64 [64-106] + * solexa [59-106] + - name: --reconcile_pairs + type: boolean_true + description: | + Allow read 2 infile to contain reads not in read 1 infile. This enables support for upstream protocols + where read one contains cell barcodes, and the read pairs have been filtered and corrected without regard + to the read2. + - name: --three_prime + alternatives: --3prime + type: boolean_true + description: | + By default the barcode is assumed to be on the 5' end of the read, but use this option to sepecify that it is + on the 3' end instead. This option only works with --extract_method=string since 3' encoding can be specified + explicitly with a regex, e.g `.*(?P.{5})$`. + - name: --ignore_read_pair_suffixes + type: boolean_true + description: | + Ignore "/1" and "/2" read name suffixes. Note that this options is required if the suffixes are not whitespace + separated from the rest of the read name. + arguments: + - name: --umi_separator + type: string + description: | + The character that separates the UMI in the read name. Most likely a colon if you skipped the extraction with + UMI-tools and used other software. Default: `_` + example: "_" + - name: --grouping_method + type: string + choices: [unique, percentile, cluster, adjacency, directional] + description: | + Method to use to determine read groups by subsuming those with similar UMIs. All methods start by identifying + the reads with the same mapping position, but treat similar yet nonidentical UMIs differently. Default: `directional` + example: "directional" + - name: --umi_discard_read + type: integer + choices: [0, 1, 2] + description: | + After UMI barcode extraction discard either R1 or R2 by setting this parameter to 1 or 2, respectively. Default: `0` + example: 0 + + - name: Common Options + arguments: + - name: --log + type: file + description: File with logging information. + direction: output + - name: --log2stderr + type: boolean_true + description: Send logging information to stderr. + direction: output + - name: --verbose + type: integer + description: Log level. The higher, the more output. + - name: --error + type: file + description: File with error information. + direction: output + - name: --temp_dir + type: string + description: | + Directory for temporary files. If not set, the bash environmental variable TMPDIR is used. + - name: --compresslevel + type: integer + description: | + Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9). + Default `6`. + example: 6 + - name: --timeit + type: file + description: Store timing information in file. + direction: output + - name: --timeit_name + type: string + description: Name in timing file for this class of jobs. + default: all + - name: --timeit_header + type: boolean_true + description: Add header for timing information. + - name: --random_seed + type: integer + description: Random seed to initialize number generator with. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/umi_tools:1.1.4--py310h4b81fae_2 + setup: + - type: docker + run: | + umi_tools -v | sed 's/ version//g' > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/umi_tools/umi_tools_extract/help.txt b/src/umi_tools/umi_tools_extract/help.txt new file mode 100644 index 00000000..46c77ed0 --- /dev/null +++ b/src/umi_tools/umi_tools_extract/help.txt @@ -0,0 +1,106 @@ +''' +Generated from the following UMI-tools documentation: + https://umi-tools.readthedocs.io/en/latest/common_options.html#common-options + https://umi-tools.readthedocs.io/en/latest/reference/extract.html +''' + +extract - Extract UMI from fastq + +Usage: + + Single-end: + umi_tools extract [OPTIONS] -p PATTERN [-I IN_FASTQ[.gz]] [-S OUT_FASTQ[.gz]] + + Paired end: + umi_tools extract [OPTIONS] -p PATTERN [-I IN_FASTQ[.gz]] [-S OUT_FASTQ[.gz]] --read2-in=IN2_FASTQ[.gz] --read2-out=OUT2_FASTQ[.gz] + + note: If -I/-S are ommited standard in and standard out are used + for input and output. To generate a valid BAM file on + standard out, please redirect log with --log=LOGFILE or + --log2stderr. Input/Output will be (de)compressed if a + filename provided to -S/-I/--read2-in/read2-out ends in .gz + +Common UMI-tools Options: + + -S, --stdout File where output is to go [default = stdout]. + -L, --log File with logging information [default = stdout]. + --log2stderr Send logging information to stderr [default = False]. + -v, --verbose Log level. The higher, the more output [default = 1]. + -E, --error File with error information [default = stderr]. + --temp-dir Directory for temporary files. If not set, the bash environmental variable TMPDIR is used[default = None]. + --compresslevel Level of Gzip compression to use. Default=6 matches GNU gzip rather than python gzip default (which is 9) + + profiling and debugging options: + --timeit Store timing information in file [default=none]. + --timeit-name Name in timing file for this class of jobs [default=all]. + --timeit-header Add header for timing information [default=none]. + --random-seed Random seed to initialize number generator with [default=none]. + +Extract Options: + -I, --stdin File containing the input data [default = stdin]. + --error-correct-cell Error correct cell barcodes to the whitelist (see --whitelist) + --whitelist Whitelist of accepted cell barcodes. The whitelist should be in the following format (tab-separated): + AAAAAA AGAAAA + AAAATC + AAACAT + AAACTA AAACTN,GAACTA + AAATAC + AAATCA GAATCA + AAATGT AAAGGT,CAATGT + Where column 1 is the whitelisted cell barcodes and column 2 is the list (comma-separated) of other cell + barcodes which should be corrected to the barcode in column 1. If the --error-correct-cell option is not + used, this column will be ignored. Any additional columns in the whitelist input, such as the counts columns + from the output of umi_tools whitelist, will be ignored. + --blacklist BlackWhitelist of cell barcodes to discard + --subset-reads=[N] Only parse the first N reads + --quality-filter-threshold Remove reads where any UMI base quality score falls below this threshold + --quality-filter-mask If a UMI base has a quality below this threshold, replace the base with 'N' + --quality-encoding Quality score encoding. Choose from: + 'phred33' [33-77] + 'phred64' [64-106] + 'solexa' [59-106] + --reconcile-pairs Allow read 2 infile to contain reads not in read 1 infile. This enables support for upstream protocols + where read one contains cell barcodes, and the read pairs have been filtered and corrected without regard + to the read2s. + +Experimental options: + Note: These options have not been extensively testing to ensure behaviour is as expected. If you have some suitable input files which + we can use for testing, please contact us. + If you have a library preparation method where the UMI may be in either read, you can use the following options to search for the + UMI in either read: + + --either-read --extract-method --bc-pattern=[PATTERN1] --bc-pattern2=[PATTERN2] + + Where both patterns match, the default behaviour is to discard both reads. If you want to select the read with the UMI with highest + sequence quality, provide --either-read-resolve=quality. + + + --bc-pattern Pattern for barcode(s) on read 1. See --extract-method + --bc-pattern2 Pattern for barcode(s) on read 2. See --extract-method + --extract-method There are two methods enabled to extract the umi barcode (+/- cell barcode). For both methods, the patterns + should be provided using the --bc-pattern and --bc-pattern2 options.x + string: + This should be used where the barcodes are always in the same place in the read. + N = UMI position (required) + C = cell barcode position (optional) + X = sample position (optional) + Bases with Ns and Cs will be extracted and added to the read name. The corresponding sequence qualities will + be removed from the read. Bases with an X will be reattached to the read. + regex: + This method allows for more flexible barcode extraction and should be used where the cell barcodes are variable + in length. Alternatively, the regex option can also be used to filter out reads which do not contain an expected + adapter sequence. The regex must contain groups to define how the barcodes are encoded in the read. + The expected groups in the regex are: + umi_n = UMI positions, where n can be any value (required) + cell_n = cell barcode positions, where n can be any value (optional) + discard_n = positions to discard, where n can be any value (optional) + --3prime By default the barcode is assumed to be on the 5' end of the read, but use this option to sepecify that it is + on the 3' end instead. This option only works with --extract-method=string since 3' encoding can be specified + explicitly with a regex, e.g .*(?P.{5})$ + --read2-in Filename for read pairs + --filtered-out Write out reads not matching regex pattern or cell barcode whitelist to this file + --filtered-out2 Write out read pairs not matching regex pattern or cell barcode whitelist to this file + --ignore-read-pair-suffixes Ignore SOH and STX read name suffixes. Note that this options is required if the suffixes are not whitespace + separated from the rest of the read name + +For full UMI-tools documentation, see https://umi-tools.readthedocs.io/en/latest/ \ No newline at end of file diff --git a/src/umi_tools/umi_tools_extract/script.sh b/src/umi_tools/umi_tools_extract/script.sh new file mode 100644 index 00000000..5e41865d --- /dev/null +++ b/src/umi_tools/umi_tools_extract/script.sh @@ -0,0 +1,88 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -exo pipefail + +test_dir="${metal_executable}/test_data" + +[[ "$par_error_correct_cell" == "false" ]] && unset par_error_correct_cell +[[ "$par_reconcile_pairs" == "false" ]] && unset par_reconcile_pairs +[[ "$par_three_prime" == "false" ]] && unset par_three_prime +[[ "$par_ignore_read_pair_suffixes" == "false" ]] && unset par_ignore_read_pair_suffixes +[[ "$par_timeit_header" == "false" ]] && unset par_timeit_header +[[ "$par_log2stderr" == "false" ]] && unset par_log2stderr + + +# Check if we have the correct number of input files and patterns for paired-end or single-end reads + +# For paired-end rends, check that we have two read files, two patterns +# Check for paired-end inputs +if [ -n "$par_input" ] && [ -n "$par_read2_in" ]; then + # Paired-end checks: Ensure both UMI patterns are provided + if [ -z "$par_bc_pattern" ] || [ -z "$par_bc_pattern2" ]; then + echo "Paired end input requires two UMI patterns." + exit 1 + fi +elif [ -n "$par_input" ]; then + # Single-end checks: Ensure no second read or UMI pattern for the second read is provided + if [ -n "$par_bc_pattern2" ]; then + echo "Single end input requires only one read file and one UMI pattern." + exit 1 + fi + # Check that discard_read is not set or set to 0 for single-end reads + if [ -n "$par_umi_discard_read" ] && [ "$par_umi_discard_read" != 0 ]; then + echo "umi_discard_read is only valid when processing paired end reads." + exit 1 + fi +else + # No inputs provided + echo "No input files provided." + exit 1 +fi + + + + +umi_tools extract \ + -I "$par_input" \ + ${par_read2_in:+ --read2-in "$par_read2_in"} \ + -S "$par_output" \ + ${par_read2_out:+--read2-out "$par_read2_out"} \ + ${par_extract_method:+--extract-method "$par_extract_method"} \ + --bc-pattern "$par_bc_pattern" \ + ${par_bc_pattern2:+ --bc-pattern2 "$par_bc_pattern2"} \ + ${par_umi_separator:+--umi-separator "$par_umi_separator"} \ + ${par_output_stats:+--output-stats "$par_output_stats"} \ + ${par_error_correct_cell:+--error-correct-cell} \ + ${par_whitelist:+--whitelist "$par_whitelist"} \ + ${par_blacklist:+--blacklist "$par_blacklist"} \ + ${par_subset_reads:+--subset-reads "$par_subset_reads"} \ + ${par_quality_filter_threshold:+--quality-filter-threshold "$par_quality_filter_threshold"} \ + ${par_quality_filter_mask:+--quality-filter-mask "$par_quality_filter_mask"} \ + ${par_quality_encoding:+--quality-encoding "$par_quality_encoding"} \ + ${par_reconcile_pairs:+--reconcile-pairs} \ + ${par_three_prime:+--3prime} \ + ${par_filtered_out:+--filtered-out "$par_filtered_out"} \ + ${par_filtered_out2:+--filtered-out2 "$par_filtered_out2"} \ + ${par_ignore_read_pair_suffixes:+--ignore-read-pair-suffixes} \ + ${par_random_seed:+--random-seed "$par_random_seed"} \ + ${par_temp_dir:+--temp-dir "$par_temp_dir"} \ + ${par_compresslevel:+--compresslevel "$par_compresslevel"} \ + ${par_timeit:+--timeit "$par_timeit"} \ + ${par_timeit_name:+--timeit-name "$par_timeit_name"} \ + ${par_timeit_header:+--timeit-header} \ + ${par_log:+--log "$par_log"} \ + ${par_log2stderr:+--log2stderr} \ + ${par_verbose:+--verbose "$par_verbose"} \ + ${par_error:+--error "$par_error"} + + +if [ "$par_umi_discard_read" == 1 ]; then + # discard read 1 + rm "$par_read1_out" +elif [ "$par_umi_discard_read" == 2 ]; then + # discard read 2 (-f to bypass file existence check) + rm -f "$par_read2_out" +fi \ No newline at end of file diff --git a/src/umi_tools/umi_tools_extract/test.sh b/src/umi_tools/umi_tools_extract/test.sh new file mode 100644 index 00000000..0de5d5b3 --- /dev/null +++ b/src/umi_tools/umi_tools_extract/test.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +test_dir="${meta_resources_dir}/test_data" + +echo ">>> Testing $meta_functionality_name" + +############################################################################################################ + +echo ">>> Test 1: Testing for paired-end reads" +"$meta_executable" \ + --input "$test_dir/scrb_seq_fastq.1_30"\ + --read2_in "$test_dir/scrb_seq_fastq.2_30" \ + --bc_pattern "CCCCCCNNNNNNNNNN"\ + --bc_pattern2 "CCCCCCNNNNNNNNNN" \ + --extract_method string \ + --umi_separator '_' \ + --grouping_method directional \ + --umi_discard_read 0 \ + --output scrb_seq_fastq.1_30.extract \ + --read2_out scrb_seq_fastq.2_30.extract \ + --random_seed 1 + +echo ">> Checking if the correct files are present" +[[ ! -f "scrb_seq_fastq.1_30.extract" ]] || [[ ! -f "scrb_seq_fastq.2_30.extract" ]] && echo "Reads file missing" && exit 1 +[ ! -s "scrb_seq_fastq.1_30.extract" ] && echo "Read 1 file is empty" && exit 1 +[ ! -s "scrb_seq_fastq.2_30.extract" ] && echo "Read 2 file is empty" && exit 1 + + +echo ">> Checking if the files are correct" +diff -q "${meta_resources_dir}/scrb_seq_fastq.1_30.extract" "$test_dir/scrb_seq_fastq.1_30.extract" || \ + (echo "Read 1 file is not correct" && exit 1) +diff -q "${meta_resources_dir}/scrb_seq_fastq.2_30.extract" "$test_dir/scrb_seq_fastq.2_30.extract" || \ + (echo "Read 2 file is not correct" && exit 1) + +rm scrb_seq_fastq.1_30.extract scrb_seq_fastq.2_30.extract + +############################################################################################################ + +echo ">>> Test 2: Testing for paired-end reads with umi_discard_reads option" +"$meta_executable" \ + --input "$test_dir/scrb_seq_fastq.1_30" \ + --read2_in "$test_dir/scrb_seq_fastq.2_30" \ + --bc_pattern CCCCCCNNNNNNNNNN \ + --bc_pattern2 CCCCCCNNNNNNNNNN \ + --extract_method string \ + --umi_separator '_' \ + --grouping_method directional \ + --umi_discard_read 2 \ + --output scrb_seq_fastq.1_30.extract \ + --random_seed 1 + +echo ">> Checking if the correct files are present" +[ ! -f "scrb_seq_fastq.1_30.extract" ] && echo "Read 1 file is missing" && exit 1 +[ ! -s "scrb_seq_fastq.1_30.extract" ] && echo "Read 1 file is empty" && exit 1 +[ -f "scrb_seq_fastq.2_30.extract" ] && echo "Read 2 is not discarded" && exit 1 + +echo ">> Checking if the files are correct" +diff -q "${meta_resources_dir}/scrb_seq_fastq.1_30.extract" "$test_dir/scrb_seq_fastq.1_30.extract" || \ + (echo "Read 1 file is not correct" && exit 1) + +rm scrb_seq_fastq.1_30.extract + +############################################################################################################ + +echo ">>> Test 3: Testing for single-end reads" +"$meta_executable" \ + --input "$test_dir/slim_30.fastq" \ + --bc_pattern "^(?P.{3}).{4}(?P.{2})" \ + --extract_method regex \ + --umi_separator '_' \ + --grouping_method directional \ + --output slim_30.extract \ + --random_seed 1 + +echo ">> Checking if the correct files are present" +[ ! -f "slim_30.extract" ] && echo "Trimmed reads file missing" && exit 1 +[ ! -s "slim_30.extract" ] && echo "Trimmed reads file is empty" && exit 1 + +echo ">> Checking if the files are correct" +diff -q "${meta_resources_dir}/slim_30.extract" "$test_dir/slim_30.extract" || \ + (echo "Trimmed reads file is not correct" && exit 1) + +rm slim_30.extract + +echo ">>> Test finished successfully" +exit 0 \ No newline at end of file diff --git a/src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.1_30 b/src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.1_30 new file mode 100644 index 00000000..639f6243 --- /dev/null +++ b/src/umi_tools/umi_tools_extract/test_data/scrb_seq_fastq.1_30 @@ -0,0 +1,120 @@ +@SRR1058032.1 HISEQ:653:H12WDADXX:1:1101:1210:2217 length=17 +AATAACTTCCCGCGTCG ++SRR1058032.1 HISEQ:653:H12WDADXX:1:1101:1210:2217 length=17 +@@@DDDBDDF>FFHGIB +@SRR1058032.2 HISEQ:653:H12WDADXX:1:1101:1191:2236 length=17 +AGCGGGGTGCTCGTCGT ++SRR1058032.2 HISEQ:653:H12WDADXX:1:1101:1191:2236 length=17 +CCCFFFFFHHHHHJJJJ +@SRR1058032.3 HISEQ:653:H12WDADXX:1:1101:1715:2245 length=17 +CTTTAGTACCAGTCCTT ++SRR1058032.3 HISEQ:653:H12WDADXX:1:1101:1715:2245 length=17 +BBCFFDADHHHHHHIJJ +@SRR1058032.4 HISEQ:653:H12WDADXX:1:1101:1905:2212 length=17 +AGGCGTTGTTTTTTTTT ++SRR1058032.4 HISEQ:653:H12WDADXX:1:1101:1905:2212 length=17 +CCCFFFFFHHHHHJJJJ +@SRR1058032.5 HISEQ:653:H12WDADXX:1:1101:1927:2237 length=17 +ATCGAGACATAATTGAT ++SRR1058032.5 HISEQ:653:H12WDADXX:1:1101:1927:2237 length=17 +@B@FFFFFHHHHHJJJJ +@SRR1058032.6 HISEQ:653:H12WDADXX:1:1101:1876:2243 length=17 +TGGGGGCGGTACATGAT ++SRR1058032.6 HISEQ:653:H12WDADXX:1:1101:1876:2243 length=17 +BBBFFFFFHHHHHJJJJ +@SRR1058032.7 HISEQ:653:H12WDADXX:1:1101:2491:2207 length=17 +CTATATGTTTGCGCTGT ++SRR1058032.7 HISEQ:653:H12WDADXX:1:1101:2491:2207 length=17 +1=BDFFFFHHHHHJJJJ +@SRR1058032.8 HISEQ:653:H12WDADXX:1:1101:2513:2219 length=17 +CTCCCGCATGCTGCTGT ++SRR1058032.8 HISEQ:653:H12WDADXX:1:1101:2513:2219 length=17 +?BBFFFFFHHHHHJJJJ +@SRR1058032.9 HISEQ:653:H12WDADXX:1:1101:2604:2231 length=17 +GAGCCCTGAGGGGATCT ++SRR1058032.9 HISEQ:653:H12WDADXX:1:1101:2604:2231 length=17 +1??DDDFD>DFDGFGHG +@SRR1058032.10 HISEQ:653:H12WDADXX:1:1101:2936:2218 length=17 +AGCGGGGTTCGCGGTTT ++SRR1058032.10 HISEQ:653:H12WDADXX:1:1101:2936:2218 length=17 +CCCFFFFFHHHHHJIJI +@SRR1058032.11 HISEQ:653:H12WDADXX:1:1101:3447:2241 length=17 +AGAATTGCCTGGATTTT ++SRR1058032.11 HISEQ:653:H12WDADXX:1:1101:3447:2241 length=17 +@CCFFFFAFHHHGJJJJ +@SRR1058032.12 HISEQ:653:H12WDADXX:1:1101:3620:2196 length=17 +AGGCGGGGCAACGGGTT ++SRR1058032.12 HISEQ:653:H12WDADXX:1:1101:3620:2196 length=17 +CCCFFFFFHHGHHJJHH +@SRR1058032.13 HISEQ:653:H12WDADXX:1:1101:3875:2206 length=17 +GTCCCCGCGTCGTGTAG ++SRR1058032.13 HISEQ:653:H12WDADXX:1:1101:3875:2206 length=17 +@C@FFFFFHFFGHJJJJ +@SRR1058032.14 HISEQ:653:H12WDADXX:1:1101:4131:2215 length=17 +CCACGCATTCACTCGGT ++SRR1058032.14 HISEQ:653:H12WDADXX:1:1101:4131:2215 length=17 +BBBDFFFFHHHHHJJJJ +@SRR1058032.15 HISEQ:653:H12WDADXX:1:1101:4284:2241 length=17 +TGCGCAATAAGCGCTAT ++SRR1058032.15 HISEQ:653:H12WDADXX:1:1101:4284:2241 length=17 ++:=DDDDDBHHGDIBEH +@SRR1058032.16 HISEQ:653:H12WDADXX:1:1101:4599:2232 length=17 +CGCTGGCAGAGCCCGGT ++SRR1058032.16 HISEQ:653:H12WDADXX:1:1101:4599:2232 length=17 +@BCFFFFFHHHHHJJJJ +@SRR1058032.17 HISEQ:653:H12WDADXX:1:1101:5428:2200 length=17 +AGGCGGTGCATAGTCTT ++SRR1058032.17 HISEQ:653:H12WDADXX:1:1101:5428:2200 length=17 +CCCFFFFFHHHHHIJIH +@SRR1058032.18 HISEQ:653:H12WDADXX:1:1101:5336:2218 length=17 +GTCCCCCGCGTGTGACT ++SRR1058032.18 HISEQ:653:H12WDADXX:1:1101:5336:2218 length=17 +GEGCDG9FD# +@SRR1058032.5 HISEQ:653:H12WDADXX:1:1101:1927:2237 length=34 +GTGTAGGGAAAGAGTGTAAGGAAAGAGTGTAGCN ++SRR1058032.5 HISEQ:653:H12WDADXX:1:1101:1927:2237 length=34 +?=??B?DB2ACCAEAEFHHIHHHIHFHCEHHIG# +@SRR1058032.6 HISEQ:653:H12WDADXX:1:1101:1876:2243 length=34 +CCTATATAGTATAGCTTCCCATCTTCTTTGAGAN ++SRR1058032.6 HISEQ:653:H12WDADXX:1:1101:1876:2243 length=34 +CCCFFFFFHDHBHEIIJJJJIIIJJJGGGIGIE# +@SRR1058032.7 HISEQ:653:H12WDADXX:1:1101:2491:2207 length=34 +ATTAAAGACAAACTACAACTCATATGAGGCATTN ++SRR1058032.7 HISEQ:653:H12WDADXX:1:1101:2491:2207 length=34 +@@@DDDADDHHHFBFAHIGBHHFAH;E@@?AB>F@BF3;3?1C?<# +@SRR1058032.11 HISEQ:653:H12WDADXX:1:1101:3447:2241 length=34 +CCCACACTCTTTCCCTACACGACGCTACACTCTN ++SRR1058032.11 HISEQ:653:H12WDADXX:1:1101:3447:2241 length=34 +@@@DDFDDBHBFHGI)C:D@@@B# +@SRR1058032.12 HISEQ:653:H12WDADXX:1:1101:3620:2196 length=34 +GTGTATGGAAAGAGTGTAGGGAAAGAGTGTAGGN ++SRR1058032.12 HISEQ:653:H12WDADXX:1:1101:3620:2196 length=34 +@@@DDDDAHHHFHIABEEEAB??CFBF?C@BFF# +@SRR1058032.13 HISEQ:653:H12WDADXX:1:1101:3875:2206 length=34 +CTCTTTCCCTACACTCTTTCCCTACACGACGCTN ++SRR1058032.13 HISEQ:653:H12WDADXX:1:1101:3875:2206 length=34 +@@@DDDAAADHDHDGDGIIIIIJJJJJJIJIIJ# +@SRR1058032.14 HISEQ:653:H12WDADXX:1:1101:4131:2215 length=34 +GTGTAGCGTCGTGTAGGGAAAGAGTGTGTGGAAN ++SRR1058032.14 HISEQ:653:H12WDADXX:1:1101:4131:2215 length=34 +@@@DDDDD?DFDCAEFHIGGFHEH:D1C:CG@F# +@SRR1058032.15 HISEQ:653:H12WDADXX:1:1101:4284:2241 length=34 +GTGTATGGAAAGAGTGTGCGTCGTACGTGTAGAN ++SRR1058032.15 HISEQ:653:H12WDADXX:1:1101:4284:2241 length=34 +@?@DDFFFHHHHGDAC:CHGGIIGIIIFHFGHB# +@SRR1058032.16 HISEQ:653:H12WDADXX:1:1101:4599:2232 length=34 +ACTCTTTCCCTACACTCTTTCCCTACACGACGCN ++SRR1058032.16 HISEQ:653:H12WDADXX:1:1101:4599:2232 length=34 +@@BCBE@9;EGGGGGIHJJIJHIGG# +@SRR1058032.17 HISEQ:653:H12WDADXX:1:1101:5428:2200 length=34 +GATTCTTCAAATGAGGACTATGCGGGACATGAAN ++SRR1058032.17 HISEQ:653:H12WDADXX:1:1101:5428:2200 length=34 +@@@DDDDDFHHFAHB;FHIIIIIIIIFHEHIHI# +@SRR1058032.18 HISEQ:653:H12WDADXX:1:1101:5336:2218 length=34 +GCGTCGTGTAGGGAAAGAGTGTAGCGTCGTGTAN ++SRR1058032.18 HISEQ:653:H12WDADXX:1:1101:5336:2218 length=34 +@@@DDDDDBHEGGFGHGGIEGII# +@SRR1058032.24 HISEQ:653:H12WDADXX:1:1101:5649:2244 length=34 +AGACGGACCAGAGCGAAAGCATTTGCCAAGAATN ++SRR1058032.24 HISEQ:653:H12WDADXX:1:1101:5649:2244 length=34 +CCCFFFDFGHHHGJIIJJIJHEDD919CGGHJ@# +@SRR1058032.25 HISEQ:653:H12WDADXX:1:1101:5910:2207 length=34 +GAGTATAGGGAAAGAGTTTTTTTTTTTTTTTTTN ++SRR1058032.25 HISEQ:653:H12WDADXX:1:1101:5910:2207 length=34 +?=?DDDD>AB:ACEEGHIJJIJJJJIIJJHFDD# +@SRR1058032.26 HISEQ:653:H12WDADXX:1:1101:5757:2217 length=34 +CCTTTTATACAATACAAAGCTTTGCTTTTTTTTN ++SRR1058032.26 HISEQ:653:H12WDADXX:1:1101:5757:2217 length=34 +???DDDDDDDDD4EEEII@A<:33<33,22110# +@SRR1058032.27 HISEQ:653:H12WDADXX:1:1101:5790:2248 length=34 +ATCACAGCTGGAGAGATCTTGATCTTCATGGTGN ++SRR1058032.27 HISEQ:653:H12WDADXX:1:1101:5790:2248 length=34 +CCCFFFFFHHFHGGIIIIJIEAHCEHHEFECGD# +@SRR1058032.28 HISEQ:653:H12WDADXX:1:1101:6079:2195 length=34 +GTACTAGGCATCGTCATCCAATGCGACGAGTCCN ++SRR1058032.28 HISEQ:653:H12WDADXX:1:1101:6079:2195 length=34 +@@CFFDDFHHGHHIJJJIJJJIGGHIDGGEGCDG9FD# +@SRR1058032.5_ATCGAGGTGTAG_ACATAATTGAGGAAAGAGTG HISEQ:653:H12WDADXX:1:1101:1927:2237 length=34 +TAAGGAAAGAGTGTAGCN ++ +FHHIHHHIHFHCEHHIG# +@SRR1058032.6_TGGGGGCCTATA_CGGTACATGATAGTATAGCT HISEQ:653:H12WDADXX:1:1101:1876:2243 length=34 +TCCCATCTTCTTTGAGAN ++ +JJJJIIIJJJGGGIGIE# +@SRR1058032.7_CTATATATTAAA_GTTTGCGCTGGACAAACTAC HISEQ:653:H12WDADXX:1:1101:2491:2207 length=34 +AACTCATATGAGGCATTN ++ +HIGBHHF@BF3;3?1C?<# +@SRR1058032.11_AGAATTCCCACA_GCCTGGATTTCTCTTTCCCT HISEQ:653:H12WDADXX:1:1101:3447:2241 length=34 +ACACGACGCTACACTCTN ++ +F@GFBFEE>)C:D@@@B# +@SRR1058032.12_AGGCGGGTGTAT_GGCAACGGGTGGAAAGAGTG HISEQ:653:H12WDADXX:1:1101:3620:2196 length=34 +TAGGGAAAGAGTGTAGGN ++ +EEEAB??CFBF?C@BFF# +@SRR1058032.13_GTCCCCCTCTTT_GCGTCGTGTACCCTACACTC HISEQ:653:H12WDADXX:1:1101:3875:2206 length=34 +TTTCCCTACACGACGCTN ++ +GIIIIIJJJJJJIJIIJ# +@SRR1058032.14_CCACGCGTGTAG_ATTCACTCGGCGTCGTGTAG HISEQ:653:H12WDADXX:1:1101:4131:2215 length=34 +GGAAAGAGTGTGTGGAAN ++ +HIGGFHEH:D1C:CG@F# +@SRR1058032.15_TGCGCAGTGTAT_ATAAGCGCTAGGAAAGAGTG HISEQ:653:H12WDADXX:1:1101:4284:2241 length=34 +TGCGTCGTACGTGTAGAN ++ +:CHGGIIGIIIFHFGHB# +@SRR1058032.16_CGCTGGACTCTT_CAGAGCCCGGTCCCTACACT HISEQ:653:H12WDADXX:1:1101:4599:2232 length=34 +CTTTCCCTACACGACGCN ++ +;EGGGGGIHJJIJHIGG# +@SRR1058032.17_AGGCGGGATTCT_TGCATAGTCTTCAAATGAGG HISEQ:653:H12WDADXX:1:1101:5428:2200 length=34 +ACTATGCGGGACATGAAN ++ +FHIIIIIIIIFHEHIHI# +@SRR1058032.18_GTCCCCGCGTCG_CGCGTGTGACTGTAGGGAAA HISEQ:653:H12WDADXX:1:1101:5336:2218 length=34 +GAGTGTAGCGTCGTGTAN ++ +DGF+<BHEGGFGHGGIEGII# +@SRR1058032.24_CGTTAAAGACGG_TAATTGTGGTACCAGAGCGA HISEQ:653:H12WDADXX:1:1101:5649:2244 length=34 +AAGCATTTGCCAAGAATN ++ +JJIJHEDD919CGGHJ@# +@SRR1058032.25_AAAAAAGAGTAT_AAAAAAAAAAAGGGAAAGAG HISEQ:653:H12WDADXX:1:1101:5910:2207 length=34 +TTTTTTTTTTTTTTTTTN ++ +HIJJIJJJJIIJJHFDD# +@SRR1058032.26_GCCGACCCTTTT_CAACGATTTTATACAATACA HISEQ:653:H12WDADXX:1:1101:5757:2217 length=34 +AAGCTTTGCTTTTTTTTN ++ +II@A<:33<33,22110# +@SRR1058032.27_AATCAAATCACA_GACCACTGAAGCTGGAGAGA HISEQ:653:H12WDADXX:1:1101:5790:2248 length=34 +TCTTGATCTTCATGGTGN ++ +IIJIEAHCEHHEFECGD# +@SRR1058032.28_CGCGCTGTACTA_TTTGTTTTTTGGCATCGTCA HISEQ:653:H12WDADXX:1:1101:6079:2195 length=34 +TCCAATGCGACGAGTCCN ++ +JIJJJIGGHIDG slim_30.fastq +head -n 120 scrb_seq_fastq.1 > scrb_seq_fastq.1_30 +head -n 120 scrb_seq_fastq.2 > scrb_seq_fastq.2_30 +rm slim.fastq scrb_seq_fastq.1 scrb_seq_fastq.2 + +# Generate expected output +# Test 1 and 2 +umi_tools extract \ + --stdin "scrb_seq_fastq.1_30" \ + --read2-in "scrb_seq_fastq.2_30" \ + --bc-pattern "CCCCCCNNNNNNNNNN" \ + --bc-pattern2 "CCCCCCNNNNNNNNNN" \ + --extract-method string \ + --stdout scrb_seq_fastq.1_30.extract \ + --read2-out scrb_seq_fastq.2_30.extract \ + --random-seed 1 + +# Test 3 +umi_tools extract \ + --stdin "slim_30.fastq" \ + --bc-pattern "^(?P.{3}).{4}(?P.{2})" \ + --extract-method regex \ + --stdout slim_30.extract \ + --random-seed 1 \ No newline at end of file diff --git a/src/umi_tools/umi_tools_extract/test_data/slim_30.extract b/src/umi_tools/umi_tools_extract/test_data/slim_30.extract new file mode 100644 index 00000000..1c20f782 --- /dev/null +++ b/src/umi_tools/umi_tools_extract/test_data/slim_30.extract @@ -0,0 +1,120 @@ +@SRR2057595.7_CAGAA +GTTCTCTCGGTGGGACCTC ++ +FFFFHHHJJJFGIJIJJIJ +@SRR2057595.9_TTGAA +GTTCTCTGATGCCCTCTTCTGGTGCATCTGAAGACAGCTACAGTGTACTTAGATATAATAAATAAATCTT ++ +FDBDFHHIGGEHJGGIHGHGGCAFCHGIGEHIJJJJIJJJIHIIIIIIJIIIIIGHIIGGIJGIIJIIJ@ +@SRR2057595.14_TGGAT +GTTAGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++ +FFFFHHHJJIJJJJIGHJJIIJJJJJIJHFHHFFEDEEEEDDDDBDDDD +@SRR2057595.22_ACGAT +GTTAGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGC ++ +FFFFHHHJJJJJJJJIJJJJJJJJJJJJHHHFFFEDEEEEDDDDBDDD +@SRR2057595.23_GCGTT +GTTACCTAAGGCGAGCTCAGGGAGGACAGAAACCTCCCGTGGAGCAGAAGGGCAAAAGCTCGCTTGATCT ++ +FFFFHHHJJJJJJJJJJJJJJJIJJIIJJJJJJJJJJJJIJJHHHHHFFFFDDDDDDDDDDDDDDDDDDA +@SRR2057595.29_ACGTT +GTTCGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCTT ++ +FFFFHHHJJJJJJJJHIJJJJJJIJJJJHHHFFDEDEEDDCDDDBDDDDD +@SRR2057595.30_GAGAA +GTTGAATCCGTGCTAAGAAGAA ++ +DFFFHHHJJJJIJJJJJJJJJJ +@SRR2057595.33_TCGAT +GTTTCTCGTCTGATCTCGGAAGCTAAGCAGGGTCGGGCCTGGTTAGTACTTGGATGGGAGACCGCCTGGG ++ +FFFFHHHJJJJJJJJJJJJJJJJJJJJJJJJJDHIJJJJIJJJHGGEEHFFFFFFEDDEDDDDDDDDDDB +@SRR2057595.35_ACGCT +GTTACCCGGGGCTACGCCTGTCTGAGCGTCGCT ++ +DFFFHHHJJJJJJIJJJJJJIJJJJJJJHIIJJ +@SRR2057595.38_GGGCC +GTTATGCATGTTTATAGTTTCTAGTTTTGGCATTTTGTGTGGTCTCTTTTTTGTT ++ +DFFFHHHJJJJJJJJJJHJJIJJJIJJJJJJJJJJJJGIGHJHIJJIJJJJJJJJ +@SRR2057595.42_TAGGA +GTTGTAAGTTATACACTGACTAAGTCATCTGTTACTGCCTTCACTGAGTTTTTATTTCCTTT ++ +DFFFHHHJJJJJJJJJJJJJJJJJIIJJJJGJJJJJJJJJJJJJJJIIHIJJJJJJJIJJJI +@SRR2057595.45_CTGGC +GTTTTGCGGAAGGATCATTA ++ +DDDDFFDFFAGFEB@ACB9< +@SRR2057595.65_GCGCG +GTTTGAGCTTGCTCCGTCCACTCAACGCATCGACCTGGTATTGCAGTACCTCCAGGAACGGTGCACCAAG ++ +FFFFHHHJJJJJJHJIHHIIIIIIIJHJBHIHBFHHJI@EHJJHHHHHHHFFFBDE?AEBD=AB@CDBD? +@SRR2057595.67_AAGGT +GTTGTTTTGAGGTCCTGCTCGTGCAGGGT ++ +DDDFHHHHGFHGFGGIIDGHHIGIJJJJ9 +@SRR2057595.69_ATTAT +GGTTTTTGTTTTTCCTCCTTCTCTTTCTAAA ++ +FFFFHHHHJJJJJJJJJJJJJJJJJJIJIJJ +@SRR2057595.70_TTAAA +GGTTTTGTAATTTTATGAGGTCCCATTTGTCAATTCTT ++ +DDDD2CDFA@FBGHCCHFHGBFHGHIGGDHGHIIFCFF +@SRR2057595.71_TGCCA +GGTTTATTAGCATGGCCCCTGCGCAAGGATGACACGCAAATTCGTGAAGCGTTCCATATTT ++ +FFFFHGHHJJJJJJJJJJIIJJIJIJJIFHJIIIJJJIJJJJJJHIIHHHHFFFDEECEEE +@SRR2057595.73_TGACA +GGTTGCGAGTGCCTAGTGGGCCACTTTTGGTAAGCAGAACTGGCGCTGCGGGA ++ +FFFFGFFHC@EBHGHGAEGIIHIIIIJJJJGHIIIJIJIIGHIJIJJIGGEFD +@SRR2057595.74_AATTC +GGTTTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++ +FFFFDFFHFIJJJGGGGJJGDDDDDDDDDDDDDBDDDDDDBBDDDDDDDDDDDDDDDDDDDBBBDDDDBD> +@SRR2057595.77_GCGGA +GTTCTCCCACTTCTGAC ++ +FFFFDHHHIJJJIJJJJ +@SRR2057595.82_GAGAC +GGTTTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++ +FFFFHHHHJJJJJJJJJJJJJJJJIJJJJJIJIIJJJH +@SRR2057595.83_TGGAT +GTTGCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++ +DFFFHHHJJIJJJJJJIJJJIJJIGGHIFHGEH +@SRR2057595.86_ACCAC +GGTTTTTTTTTAAATGTAAAGCATAAATAAAAAGCCTTTGTGGACTGTGAAAAAAAAAAAAAAAAAAAAAA ++ +FFFFHHHHJJJJJJJJIIJJJJJJJJJIJJJJJJJJJJJJGIJIIJJIJJJJJJHFDDDDDDDDDDDDDB> +@SRR2057595.88_TCAGC +GGTTCTAAGCATAGATAACCATATATCAGGGGGAGCTCCATGTTCTAGTCCTGCAAGCGCCTGGGCAATAA ++ +FFFFHHHHJJJJJJIJJJJJIJJJJJJIJJIJJIJJJJJJJJJHIJJJJJJIIIHJIHHHFFDDDDEDDD@ +@SRR2057595.99_TGACA +GGTTTCGCTGCGATCTATTGAAAGTCAGCCCTCGACACAAGGGTTTGT ++ +FFFFDHHHIHIIIJJIJJJJJIGEHGFHIJJGHIHADHIIJIJJJIJG diff --git a/src/umi_tools/umi_tools_extract/test_data/slim_30.fastq b/src/umi_tools/umi_tools_extract/test_data/slim_30.fastq new file mode 100644 index 00000000..444a7a7a --- /dev/null +++ b/src/umi_tools/umi_tools_extract/test_data/slim_30.fastq @@ -0,0 +1,120 @@ +@SRR2057595.7 +CAGGTTCAATCTCGGTGGGACCTC ++SRR2057595.7 +1=DFFFFHHHHHJJJFGIJIJJIJ +@SRR2057595.9 +TTGGTTCAATCTGATGCCCTCTTCTGGTGCATCTGAAGACAGCTACAGTGTACTTAGATATAATAAATAAATCTT ++SRR2057595.9 +4=DFDBDHHFHHIGGEHJGGIHGHGGCAFCHGIGEHIJJJJIJJJIHIIIIIIJIIIIIGHIIGGIJGIIJIIJ@ +@SRR2057595.14 +TGGGTTAATGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++SRR2057595.14 +1=DFFFFHHHHHJJIJJJJIGHJJIIJJJJJIJHFHHFFEDEEEEDDDDBDDDD +@SRR2057595.22 +ACGGTTAATGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGC ++SRR2057595.22 +1=DFFFFHHHHHJJJJJJJJIJJJJJJJJJJJJHHHFFFEDEEEEDDDDBDDD +@SRR2057595.23 +GCGGTTATTCCTAAGGCGAGCTCAGGGAGGACAGAAACCTCCCGTGGAGCAGAAGGGCAAAAGCTCGCTTGATCT ++SRR2057595.23 +1=DFFFFHHHHHJJJJJJJJJJJJJJJIJJIIJJJJJJJJJJJJIJJHHHHHFFFFDDDDDDDDDDDDDDDDDDA +@SRR2057595.29 +ACGGTTCTTGCGGCCCCGGGTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCTT ++SRR2057595.29 +1=DFFFFHHHHHJJJJJJJJHIJJJJJJIJJJJHHHFFDEDEEDDCDDDBDDDDD +@SRR2057595.30 +GAGGTTGAAAATCCGTGCTAAGAAGAA ++SRR2057595.30 +4=DDFFFHHHHHJJJJIJJJJJJJJJJ +@SRR2057595.33 +TCGGTTTATCTCGTCTGATCTCGGAAGCTAAGCAGGGTCGGGCCTGGTTAGTACTTGGATGGGAGACCGCCTGGG ++SRR2057595.33 +1=DFFFFHHHHHJJJJJJJJJJJJJJJJJJJJJJJJJDHIJJJJIJJJHGGEEHFFFFFFEDDEDDDDDDDDDDB +@SRR2057595.35 +ACGGTTACTCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++SRR2057595.35 +1=DDFFFHHHHHJJJJJJIJJJJJJIJJJJJJJHIIJJ +@SRR2057595.38 +GGGGTTACCTGCATGTTTATAGTTTCTAGTTTTGGCATTTTGTGTGGTCTCTTTTTTGTT ++SRR2057595.38 +1=DDFFFHHHHHJJJJJJJJJJHJJIJJJIJJJJJJJJJJJJGIGHJHIJJIJJJJJJJJ +@SRR2057595.42 +TAGGTTGGATAAGTTATACACTGACTAAGTCATCTGTTACTGCCTTCACTGAGTTTTTATTTCCTTT ++SRR2057595.42 +1=DDFFFHHHHHJJJJJJJJJJJJJJJJJIIJJJJGJJJJJJJJJJJJJJJIIHIJJJJJJJIJJJI +@SRR2057595.45 +CTGGTTTGCTGCGGAAGGATCATTA ++SRR2057595.45 +1:DDDDDDDFFDFFAGFEB@ACB9< +@SRR2057595.65 +GCGGTTTCGGAGCTTGCTCCGTCCACTCAACGCATCGACCTGGTATTGCAGTACCTCCAGGAACGGTGCACCAAG ++SRR2057595.65 +1=DFFFFHHHHHJJJJJJHJIHHIIIIIIIJHJBHIHBFHHJI@EHJJHHHHHHHFFFBDE?AEBD=AB@CDBD? +@SRR2057595.67 +AAGGTTGGTTTTTGAGGTCCTGCTCGTGCAGGGT ++SRR2057595.67 +1:BDDDFHFHHHHGFHGFGGIIDGHHIGIJJJJ9 +@SRR2057595.69 +ATTGGTTATTTTGTTTTTCCTCCTTCTCTTTCTAAA ++SRR2057595.69 +CCCFFFFFHHHHHJJJJJJJJJJJJJJJJJJIJIJJ +@SRR2057595.70 +TTAGGTTAATTGTAATTTTATGAGGTCCCATTTGTCAATTCTT ++SRR2057595.70 +@@@DDDDD+2CDFA@FBGHCCHFHGBFHGHIGGDHGHIIFCFF +@SRR2057595.71 +TGCGGTTCATATTAGCATGGCCCCTGCGCAAGGATGACACGCAAATTCGTGAAGCGTTCCATATTT ++SRR2057595.71 +CCCFFFFFHHGHHJJJJJJJJJJIIJJIJIJJIFHJIIIJJJIJJJJJJHIIHHHHFFFDEECEEE +@SRR2057595.73 +TGAGGTTCAGCGAGTGCCTAGTGGGCCACTTTTGGTAAGCAGAACTGGCGCTGCGGGA ++SRR2057595.73 +@@@FFFFFHGFFHC@EBHGHGAEGIIHIIIIJJJJGHIIIJIJIIGHIJIJJIGGEFD +@SRR2057595.74 +AATGGTTTCTAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ++SRR2057595.74 +@CCFFFFFGDFFHFIJJJGGGGJJGDDDDDDDDDDDDDBDDDDDDBBDDDDDDDDDDDDDDDDDDDBBBDDDDBD> +@SRR2057595.77 +GCGGTTCGATCCCACTTCTGAC ++SRR2057595.77 +1=DFFFFHGDHHHIJJJIJJJJ +@SRR2057595.82 +GAGGGTTACTTCCTCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++SRR2057595.82 +CBCFFFFFHHHHHJJJJJJJJJJJJJJJJIJJJJJIJIIJJJH +@SRR2057595.83 +TGGGTTGATCCCGGGGCTACGCCTGTCTGAGCGTCGCT ++SRR2057595.83 +1=DDFFFHHHHHJJIJJJJJJIJJJIJJIGGHIFHGEH +@SRR2057595.86 +ACCGGTTACTTTTTTTAAATGTAAAGCATAAATAAAAAGCCTTTGTGGACTGTGAAAAAAAAAAAAAAAAAAAAAA ++SRR2057595.86 +BCCFFFFFHHHHHJJJJJJJJIIJJJJJJJJJIJJJJJJJJJJJJGIJIIJJIJJJJJJHFDDDDDDDDDDDDDB> +@SRR2057595.88 +TCAGGTTGCCTAAGCATAGATAACCATATATCAGGGGGAGCTCCATGTTCTAGTCCTGCAAGCGCCTGGGCAATAA ++SRR2057595.88 +CCCFFFFFHHHHHJJJJJJIJJJJJIJJJJJJIJJIJJIJJJJJJJJJHIJJJJJJIIIHJIHHHFFDDDDEDDD@ +@SRR2057595.99 +TGAGGTTCATCGCTGCGATCTATTGAAAGTCAGCCCTCGACACAAGGGTTTGT ++SRR2057595.99 +B@CFFFFFFDHHHIHIIIJJIJJJJJIGEHGFHIJJGHIHADHIIJIJJJIJG From da414e72c60758895b16818309d6c147c288dd84 Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Mon, 29 Jul 2024 09:55:17 +0200 Subject: [PATCH 12/25] Add star solo component (#62) * add star solo component * change arguments from camelCase to snake_case * get rid of multiple_sep * drop star_solo component and just add arguments to star_align_reads * Update src/star/star_align_reads/script.py Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --------- Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> --- CHANGELOG.md | 10 +- .../star_align_reads/argument_groups.yaml | 1034 +++++++++++++---- src/star/star_align_reads/config.vsh.yaml | 2 + src/star/star_align_reads/script.py | 20 +- src/star/star_align_reads/test.sh | 12 +- .../star_align_reads/utils/process_params.R | 54 +- src/star/star_genome_generate/config.vsh.yaml | 43 +- src/star/star_genome_generate/script.sh | 30 +- src/star/star_genome_generate/test.sh | 6 +- 9 files changed, 903 insertions(+), 308 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 665b587d..c4575cb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,14 @@ # biobox x.x.x -## NEW FEATURES +## BREAKING CHANGES + +* `star/star_align_reads`: Change all arguments from `--camelCase` to `--snake_case` (PR #62). + +* `star/star_genome_generate`: Change all arguments from `--camelCase` to `--snake_case` (PR #62). + +## NEW FUNCTIONALITY + +* `star/star_align_reads`: Add star solo related arguments (PR #62). * `bd_rhapsody/bd_rhapsody_make_reference`: Create a reference for the BD Rhapsody pipeline (PR #75). diff --git a/src/star/star_align_reads/argument_groups.yaml b/src/star/star_align_reads/argument_groups.yaml index e6a1c874..7c804dd3 100644 --- a/src/star/star_align_reads/argument_groups.yaml +++ b/src/star/star_align_reads/argument_groups.yaml @@ -1,19 +1,23 @@ argument_groups: - name: Run Parameters arguments: - - name: --runRNGseed + - name: --run_rng_seed type: integer description: random number generator seed. + info: + orig_name: --runRNGseed example: 777 - name: Genome Parameters arguments: - - name: --genomeDir + - name: --genome_dir type: file description: path to the directory where genome files are stored (for --runMode alignReads) or will be generated (for --runMode generateGenome) + info: + orig_name: --genomeDir example: ./GenomeDir/ - required: yes - - name: --genomeLoad + required: true + - name: --genome_load type: string description: |- mode of shared memory usage for the genome files. Only used with --runMode alignReads. @@ -23,132 +27,162 @@ argument_groups: - LoadAndExit ... load genome into shared memory and exit, keeping the genome in memory for future runs - Remove ... do not map anything, just remove loaded genome from memory - NoSharedMemory ... do not use shared memory, each job will have its own private copy of the genome + info: + orig_name: --genomeLoad example: NoSharedMemory - - name: --genomeFastaFiles + - name: --genome_fasta_files type: file description: |- path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped. Required for the genome generation (--runMode genomeGenerate). Can also be used in the mapping (--runMode alignReads) to add extra (new) sequences to the genome (e.g. spike-ins). - multiple: yes - multiple_sep: ; - - name: --genomeFileSizes + info: + orig_name: --genomeFastaFiles + multiple: true + - name: --genome_file_sizes type: integer description: genome files exact sizes in bytes. Typically, this should not be defined by the user. + info: + orig_name: --genomeFileSizes example: 0 - multiple: yes - multiple_sep: ; - - name: --genomeTransformOutput + multiple: true + - name: --genome_transform_output type: string description: |- which output to transform back to original genome - SAM ... SAM/BAM alignments - SJ ... splice junctions (SJ.out.tab) - - Quant ... quantifications (from --quantMode option) + - Quant ... quantifications (from --quant_mode option) - None ... no transformation of the output - multiple: yes - multiple_sep: ; - - name: --genomeChrSetMitochondrial + info: + orig_name: --genomeTransformOutput + multiple: true + - name: --genome_chr_set_mitochondrial type: string description: names of the mitochondrial chromosomes. Presently only used for STARsolo statistics output/ + info: + orig_name: --genomeChrSetMitochondrial example: - chrM - M - MT - multiple: yes - multiple_sep: ; + multiple: true - name: Splice Junctions Database arguments: - - name: --sjdbFileChrStartEnd + - name: --sjdb_file_chr_start_end type: string description: path to the files with genomic coordinates (chr start end strand) for the splice junction introns. Multiple files can be supplied and will be concatenated. - multiple: yes - multiple_sep: ; - - name: --sjdbGTFfile + info: + orig_name: --sjdbFileChrStartEnd + multiple: true + - name: --sjdb_gtf_file type: file description: path to the GTF file with annotations - - name: --sjdbGTFchrPrefix + info: + orig_name: --sjdbGTFfile + - name: --sjdb_gtf_chr_prefix type: string description: prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes) - - name: --sjdbGTFfeatureExon + info: + orig_name: --sjdbGTFchrPrefix + - name: --sjdb_gtf_feature_exon type: string description: feature type in GTF file to be used as exons for building transcripts + info: + orig_name: --sjdbGTFfeatureExon example: exon - - name: --sjdbGTFtagExonParentTranscript + - name: --sjdb_gtf_tag_exon_parent_transcript type: string description: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files) + info: + orig_name: --sjdbGTFtagExonParentTranscript example: transcript_id - - name: --sjdbGTFtagExonParentGene + - name: --sjdb_gtf_tag_exon_parent_gene type: string description: GTF attribute name for parent gene ID (default "gene_id" works for GTF files) + info: + orig_name: --sjdbGTFtagExonParentGene example: gene_id - - name: --sjdbGTFtagExonParentGeneName + - name: --sjdb_gtf_tag_exon_parent_gene_name type: string description: GTF attribute name for parent gene name + info: + orig_name: --sjdbGTFtagExonParentGeneName example: gene_name - multiple: yes - multiple_sep: ; - - name: --sjdbGTFtagExonParentGeneType + multiple: true + - name: --sjdb_gtf_tag_exon_parent_gene_type type: string description: GTF attribute name for parent gene type + info: + orig_name: --sjdbGTFtagExonParentGeneType example: - gene_type - gene_biotype - multiple: yes - multiple_sep: ; - - name: --sjdbOverhang + multiple: true + - name: --sjdb_overhang type: integer description: length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1) + info: + orig_name: --sjdbOverhang example: 100 - - name: --sjdbScore + - name: --sjdb_score type: integer description: extra alignment score for alignments that cross database junctions + info: + orig_name: --sjdbScore example: 2 - - name: --sjdbInsertSave + - name: --sjdb_insert_save type: string description: |- which files to save when sjdb junctions are inserted on the fly at the mapping step - Basic ... only small junction / transcript files - All ... all files including big Genome, SA and SAindex - this will create a complete genome directory + info: + orig_name: --sjdbInsertSave example: Basic - name: Variation parameters arguments: - - name: --varVCFfile + - name: --var_vcf_file type: string description: path to the VCF file that contains variation data. The 10th column should contain the genotype information, e.g. 0/1 + info: + orig_name: --varVCFfile - name: Read Parameters arguments: - - name: --readFilesType + - name: --read_files_type type: string description: |- format of input read files - Fastx ... FASTA or FASTQ - - SAM SE ... SAM or BAM single-end reads; for BAM use --readFilesCommand samtools view - - SAM PE ... SAM or BAM paired-end reads; for BAM use --readFilesCommand samtools view + - SAM SE ... SAM or BAM single-end reads; for BAM use --read_files_command samtools view + - SAM PE ... SAM or BAM paired-end reads; for BAM use --read_files_command samtools view + info: + orig_name: --readFilesType example: Fastx - - name: --readFilesSAMattrKeep + - name: --read_files_sam_attr_keep type: string description: |- - for --readFilesType SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL + for --read_files_type SAM SE/PE, which SAM tags to keep in the output BAM, e.g.: --readFilesSAMtagsKeep RG PL - All ... keep all tags - None ... do not keep any tags + info: + orig_name: --readFilesSAMattrKeep example: All - multiple: yes - multiple_sep: ; - - name: --readFilesManifest + multiple: true + - name: --read_files_manifest type: file description: |- path to the "manifest" file with the names of read files. The manifest file should contain 3 tab-separated columns: @@ -158,45 +192,57 @@ argument_groups: Spaces, but not tabs are allowed in file names. If read_group_line does not start with ID:, it can only contain one ID field, and ID: will be added to it. If read_group_line starts with ID:, it can contain several fields separated by $tab$, and all fields will be be copied verbatim into SAM @RG header line. - - name: --readFilesPrefix + info: + orig_name: --readFilesManifest + - name: --read_files_prefix type: string description: prefix for the read files names, i.e. it will be added in front of the strings in --readFilesIn - - name: --readFilesCommand + info: + orig_name: --readFilesPrefix + - name: --read_files_command type: string description: |- command line to execute for each of the input file. This command should generate FASTA or FASTQ text and send it to stdout For example: zcat - to uncompress .gz files, bzcat - to uncompress .bz2 files, etc. - multiple: yes - multiple_sep: ; - - name: --readMapNumber + info: + orig_name: --readFilesCommand + multiple: true + - name: --read_map_number type: integer description: |- number of reads to map from the beginning of the file -1: map all reads + info: + orig_name: --readMapNumber example: -1 - - name: --readMatesLengthsIn + - name: --read_mates_lengths_in type: string description: Equal/NotEqual - lengths of names,sequences,qualities for both mates are the same / not the same. NotEqual is safe in all situations. + info: + orig_name: --readMatesLengthsIn example: NotEqual - - name: --readNameSeparator + - name: --read_name_separator type: string description: character(s) separating the part of the read names that will be trimmed in output (read name after space is always trimmed) + info: + orig_name: --readNameSeparator example: / - multiple: yes - multiple_sep: ; - - name: --readQualityScoreBase + multiple: true + - name: --read_quality_score_base type: integer description: number to be subtracted from the ASCII code to get Phred quality score + info: + orig_name: --readQualityScoreBase example: 33 - name: Read Clipping arguments: - - name: --clipAdapterType + - name: --clip_adapter_type type: string description: |- adapter clipping type @@ -204,129 +250,161 @@ argument_groups: - Hamming ... adapter clipping based on Hamming distance, with the number of mismatches controlled by --clip5pAdapterMMp - CellRanger4 ... 5p and 3p adapter clipping similar to CellRanger4. Utilizes Opal package by Martin Sosic: https://github.com/Martinsos/opal - None ... no adapter clipping, all other clip* parameters are disregarded + info: + orig_name: --clipAdapterType example: Hamming - - name: --clip3pNbases + - name: --clip3p_nbases type: integer description: number(s) of bases to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates. + info: + orig_name: --clip3pNbases example: 0 - multiple: yes - multiple_sep: ; - - name: --clip3pAdapterSeq + multiple: true + - name: --clip3p_adapter_seq type: string description: |- adapter sequences to clip from 3p of each mate. If one value is given, it will be assumed the same for both mates. - polyA ... polyA sequence with the length equal to read length - multiple: yes - multiple_sep: ; - - name: --clip3pAdapterMMp + info: + orig_name: --clip3pAdapterSeq + multiple: true + - name: --clip3p_adapter_mm_p type: double description: max proportion of mismatches for 3p adapter clipping for each mate. If one value is given, it will be assumed the same for both mates. + info: + orig_name: --clip3pAdapterMMp example: 0.1 - multiple: yes - multiple_sep: ; - - name: --clip3pAfterAdapterNbases + multiple: true + - name: --clip3p_after_adapter_nbases type: integer description: number of bases to clip from 3p of each mate after the adapter clipping. If one value is given, it will be assumed the same for both mates. + info: + orig_name: --clip3pAfterAdapterNbases example: 0 - multiple: yes - multiple_sep: ; - - name: --clip5pNbases + multiple: true + - name: --clip5p_nbases type: integer description: number(s) of bases to clip from 5p of each mate. If one value is given, it will be assumed the same for both mates. + info: + orig_name: --clip5pNbases example: 0 - multiple: yes - multiple_sep: ; + multiple: true - name: Limits arguments: - - name: --limitGenomeGenerateRAM + - name: --limit_genome_generate_ram type: long description: maximum available RAM (bytes) for genome generation + info: + orig_name: --limitGenomeGenerateRAM example: '31000000000' - - name: --limitIObufferSize + - name: --limit_io_buffer_size type: long description: max available buffers size (bytes) for input/output, per thread + info: + orig_name: --limitIObufferSize example: - 30000000 - 50000000 - multiple: yes - multiple_sep: ; - - name: --limitOutSAMoneReadBytes + multiple: true + - name: --limit_out_sam_one_read_bytes type: long description: 'max size of the SAM record (bytes) for one read. Recommended value: >(2*(LengthMate1+LengthMate2+100)*outFilterMultimapNmax' + info: + orig_name: --limitOutSAMoneReadBytes example: 100000 - - name: --limitOutSJoneRead + - name: --limit_out_sj_one_read type: integer description: max number of junctions for one read (including all multi-mappers) + info: + orig_name: --limitOutSJoneRead example: 1000 - - name: --limitOutSJcollapsed + - name: --limit_out_sj_collapsed type: integer description: max number of collapsed junctions + info: + orig_name: --limitOutSJcollapsed example: 1000000 - - name: --limitBAMsortRAM + - name: --limit_bam_sort_ram type: long description: maximum available RAM (bytes) for sorting BAM. If =0, it will be - set to the genome index size. 0 value can only be used with --genomeLoad NoSharedMemory + set to the genome index size. 0 value can only be used with --genome_load NoSharedMemory option. + info: + orig_name: --limitBAMsortRAM example: 0 - - name: --limitSjdbInsertNsj + - name: --limit_sjdb_insert_nsj type: integer description: maximum number of junctions to be inserted to the genome on the fly at the mapping stage, including those from annotations and those detected in the 1st step of the 2-pass run + info: + orig_name: --limitSjdbInsertNsj example: 1000000 - - name: --limitNreadsSoft + - name: --limit_nreads_soft type: integer description: soft limit on the number of reads + info: + orig_name: --limitNreadsSoft example: -1 - name: 'Output: general' arguments: - - name: --outTmpKeep + - name: --out_tmp_keep type: string description: |- whether to keep the temporary files after STAR runs is finished - None ... remove all temporary files - All ... keep all files - - name: --outStd + info: + orig_name: --outTmpKeep + - name: --out_std type: string description: |- which output will be directed to stdout (standard out) - Log ... log messages - SAM ... alignments in SAM format (which normally are output to Aligned.out.sam file), normal standard output will go into Log.std.out - - BAM_Unsorted ... alignments in BAM format, unsorted. Requires --outSAMtype BAM Unsorted - - BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --outSAMtype BAM SortedByCoordinate - - BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quantMode TranscriptomeSAM + - BAM_Unsorted ... alignments in BAM format, unsorted. Requires --out_sam_type BAM Unsorted + - BAM_SortedByCoordinate ... alignments in BAM format, sorted by coordinate. Requires --out_sam_type BAM SortedByCoordinate + - BAM_Quant ... alignments to transcriptome in BAM format, unsorted. Requires --quant_mode TranscriptomeSAM + info: + orig_name: --outStd example: Log - - name: --outReadsUnmapped + - name: --out_reads_unmapped type: string description: |- output of unmapped and partially mapped (i.e. mapped only one mate of a paired end read) reads in separate file(s). - None ... no output - Fastx ... output in separate fasta/fastq files, Unmapped.out.mate1/2 - - name: --outQSconversionAdd + info: + orig_name: --outReadsUnmapped + - name: --out_qs_conversion_add type: integer description: add this number to the quality score (e.g. to convert from Illumina to Sanger, use -31) + info: + orig_name: --outQSconversionAdd example: 0 - - name: --outMultimapperOrder + - name: --out_multimapper_order type: string description: |- order of multimapping alignments in the output files - Old_2.4 ... quasi-random order used before 2.5.0 - Random ... random order of alignments for each multi-mapper. Read mates (pairs) are always adjacent, all alignment for each read stay together. This option will become default in the future releases. + info: + orig_name: --outMultimapperOrder example: Old_2.4 - name: 'Output: SAM and BAM' arguments: - - name: --outSAMtype + - name: --out_sam_type type: string description: |- type of SAM/BAM output @@ -337,11 +415,12 @@ argument_groups: - None ... no SAM/BAM output 2nd, 3rd: - Unsorted ... standard unsorted - - SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limitBAMsortRAM. + - SortedByCoordinate ... sorted by coordinate. This option will allocate extra memory for sorting which can be specified by --limit_bam_sort_ram. + info: + orig_name: --outSAMtype example: SAM - multiple: yes - multiple_sep: ; - - name: --outSAMmode + multiple: true + - name: --out_sam_mode type: string description: |- mode of SAM output @@ -349,15 +428,19 @@ argument_groups: - None ... no SAM output - Full ... full SAM output - NoQS ... full SAM but without quality scores + info: + orig_name: --outSAMmode example: Full - - name: --outSAMstrandField + - name: --out_sam_strand_field type: string description: |- Cufflinks-like strand field flag - None ... not used - intronMotif ... strand derived from the intron motif. This option changes the output alignments: reads with inconsistent and/or non-canonical introns are filtered out. - - name: --outSAMattributes + info: + orig_name: --outSAMstrandField + - name: --out_sam_attributes type: string description: |- a string of desired SAM attributes, in the order desired for the output SAM. Tags can be listed in any combination/order. @@ -368,27 +451,27 @@ argument_groups: - All ... NH HI AS nM NM MD jM jI MC ch ***Alignment: - NH ... number of loci the reads maps to: =1 for unique mappers, >1 for multimappers. Standard SAM tag. - - HI ... multiple alignment index, starts with --outSAMattrIHstart (=1 by default). Standard SAM tag. + - HI ... multiple alignment index, starts with --out_sam_attr_ih_start (=1 by default). Standard SAM tag. - AS ... local alignment score, +1/-1 for matches/mismateches, score* penalties for indels and gaps. For PE reads, total score for two mates. Stadnard SAM tag. - nM ... number of mismatches. For PE reads, sum over two mates. - NM ... edit distance to the reference (number of mismatched + inserted + deleted bases) for each mate. Standard SAM tag. - MD ... string encoding mismatched and deleted reference bases (see standard SAM specifications). Standard SAM tag. - jM ... intron motifs for all junctions (i.e. N in CIGAR): 0: non-canonical; 1: GT/AG, 2: CT/AC, 3: GC/AG, 4: CT/GC, 5: AT/AC, 6: GT/AT. If splice junctions database is used, and a junction is annotated, 20 is added to its motif value. - jI ... start and end of introns for all junctions (1-based). - - XS ... alignment strand according to --outSAMstrandField. + - XS ... alignment strand according to --out_sam_strand_field. - MC ... mate's CIGAR string. Standard SAM tag. - - ch ... marks all segment of all chimeric alingments for --chimOutType WithinBAM output. + - ch ... marks all segment of all chimeric alingments for --chim_out_type WithinBAM output. - cN ... number of bases clipped from the read ends: 5' and 3' ***Variation: - vA ... variant allele - vG ... genomic coordinate of the variant overlapped by the read. - - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --waspOutputMode SAMtag. + - vW ... 1 - alignment passes WASP filtering; 2,3,4,5,6,7 - alignment does not pass WASP filtering. Requires --wasp_output_mode SAMtag. - ha ... haplotype (1/2) when mapping to the diploid genome. Requires genome generated with --genomeTransformType Diploid . ***STARsolo: - CR CY UR UY ... sequences and quality scores of cell barcodes and UMIs for the solo* demultiplexing. - GX GN ... gene ID and gene name for unique-gene reads. - gx gn ... gene IDs and gene names for unique- and multi-gene reads. - - CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --outSAMtype BAM SortedByCoordinate. + - CB UB ... error-corrected cell barcodes and UMIs for solo* demultiplexing. Requires --out_sam_type BAM SortedByCoordinate. - sM ... assessment of CB and UMI. - sS ... sequence of the entire barcode (CB,UMI,adapter). - sQ ... quality of the entire barcode. @@ -396,15 +479,18 @@ argument_groups: ***Unsupported/undocumented: - rB ... alignment block read/genomic coordinates. - vR ... read coordinate of the variant. + info: + orig_name: --outSAMattributes example: Standard - multiple: yes - multiple_sep: ; - - name: --outSAMattrIHstart + multiple: true + - name: --out_sam_attr_ih_start type: integer description: start value for the IH attribute. 0 may be required by some downstream software, such as Cufflinks or StringTie. + info: + orig_name: --outSAMattrIHstart example: 1 - - name: --outSAMunmapped + - name: --out_sam_unmapped type: string description: |- output of unmapped reads in the SAM format @@ -414,112 +500,141 @@ argument_groups: - Within ... output unmapped reads within the main SAM file (i.e. Aligned.out.sam) 2nd word: - KeepPairs ... record unmapped mate for each alignment, and, in case of unsorted output, keep it adjacent to its mapped mate. Only affects multi-mapping reads. - multiple: yes - multiple_sep: ; - - name: --outSAMorder + info: + orig_name: --outSAMunmapped + multiple: true + - name: --out_sam_order type: string description: |- type of sorting for the SAM output Paired: one mate after the other for all paired alignments PairedKeepInputOrder: one mate after the other for all paired alignments, the order is kept the same as in the input FASTQ files + info: + orig_name: --outSAMorder example: Paired - - name: --outSAMprimaryFlag + - name: --out_sam_primary_flag type: string description: |- which alignments are considered primary - all others will be marked with 0x100 bit in the FLAG - OneBestScore ... only one alignment with the best score is primary - AllBestScore ... all alignments with the best score are primary + info: + orig_name: --outSAMprimaryFlag example: OneBestScore - - name: --outSAMreadID + - name: --out_sam_read_id type: string description: |- read ID record type - Standard ... first word (until space) from the FASTx read ID line, removing /1,/2 from the end - Number ... read number (index) in the FASTx file + info: + orig_name: --outSAMreadID example: Standard - - name: --outSAMmapqUnique + - name: --out_sam_mapq_unique type: integer description: '0 to 255: the MAPQ value for unique mappers' + info: + orig_name: --outSAMmapqUnique example: 255 - - name: --outSAMflagOR + - name: --out_sam_flag_or type: integer description: '0 to 65535: sam FLAG will be bitwise OR''d with this value, i.e. FLAG=FLAG | outSAMflagOR. This is applied after all flags have been set by STAR, and after outSAMflagAND. Can be used to set specific bits that are not set otherwise.' + info: + orig_name: --outSAMflagOR example: 0 - - name: --outSAMflagAND + - name: --out_sam_flag_and type: integer description: '0 to 65535: sam FLAG will be bitwise AND''d with this value, i.e. FLAG=FLAG & outSAMflagOR. This is applied after all flags have been set by STAR, but before outSAMflagOR. Can be used to unset specific bits that are not set otherwise.' + info: + orig_name: --outSAMflagAND example: 65535 - - name: --outSAMattrRGline + - name: --out_sam_attr_rg_line type: string description: |- - SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:", e.g. --outSAMattrRGline ID:xxx CN:yy "DS:z z z". + SAM/BAM read group line. The first word contains the read group identifier and must start with "ID:", e.g. --out_sam_attr_rg_line ID:xxx CN:yy "DS:z z z". xxx will be added as RG tag to each output alignment. Any spaces in the tag values have to be double quoted. Comma separated RG lines correspons to different (comma separated) input files in --readFilesIn. Commas have to be surrounded by spaces, e.g. - --outSAMattrRGline ID:xxx , ID:zzz "DS:z z" , ID:yyy DS:yyyy - multiple: yes - multiple_sep: ; - - name: --outSAMheaderHD + --out_sam_attr_rg_line ID:xxx , ID:zzz "DS:z z" , ID:yyy DS:yyyy + info: + orig_name: --outSAMattrRGline + multiple: true + - name: --out_sam_header_hd type: string description: '@HD (header) line of the SAM header' - multiple: yes - multiple_sep: ; - - name: --outSAMheaderPG + info: + orig_name: --outSAMheaderHD + multiple: true + - name: --out_sam_header_pg type: string description: extra @PG (software) line of the SAM header (in addition to STAR) - multiple: yes - multiple_sep: ; - - name: --outSAMheaderCommentFile + info: + orig_name: --outSAMheaderPG + multiple: true + - name: --out_sam_header_comment_file type: string description: path to the file with @CO (comment) lines of the SAM header - - name: --outSAMfilter + info: + orig_name: --outSAMheaderCommentFile + - name: --out_sam_filter type: string description: |- filter the output into main SAM/BAM files - - KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genomeFastaFiles at the mapping stage. - - KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genomeFastaFiles at the mapping stage. - multiple: yes - multiple_sep: ; - - name: --outSAMmultNmax + - KeepOnlyAddedReferences ... only keep the reads for which all alignments are to the extra reference sequences added with --genome_fasta_files at the mapping stage. + - KeepAllAddedReferences ... keep all alignments to the extra reference sequences added with --genome_fasta_files at the mapping stage. + info: + orig_name: --outSAMfilter + multiple: true + - name: --out_sam_mult_nmax type: integer description: |- max number of multiple alignments for a read that will be output to the SAM/BAM files. Note that if this value is not equal to -1, the top scoring alignment will be output first - - -1 ... all alignments (up to --outFilterMultimapNmax) will be output + - -1 ... all alignments (up to --out_filter_multimap_nmax) will be output + info: + orig_name: --outSAMmultNmax example: -1 - - name: --outSAMtlen + - name: --out_sam_tlen type: integer description: |- calculation method for the TLEN field in the SAM/BAM files - 1 ... leftmost base of the (+)strand mate to rightmost base of the (-)mate. (+)sign for the (+)strand mate - 2 ... leftmost base of any mate to rightmost base of any mate. (+)sign for the mate with the leftmost base. This is different from 1 for overlapping mates with protruding ends + info: + orig_name: --outSAMtlen example: 1 - - name: --outBAMcompression + - name: --out_bam_compression type: integer description: -1 to 10 BAM compression level, -1=default compression (6?), 0=no compression, 10=maximum compression + info: + orig_name: --outBAMcompression example: 1 - - name: --outBAMsortingThreadN + - name: --out_bam_sorting_thread_n type: integer description: '>=0: number of threads for BAM sorting. 0 will default to min(6,--runThreadN).' + info: + orig_name: --outBAMsortingThreadN example: 0 - - name: --outBAMsortingBinsN + - name: --out_bam_sorting_bins_n type: integer description: '>0: number of genome bins for coordinate-sorting' + info: + orig_name: --outBAMsortingBinsN example: 50 - name: BAM processing arguments: - - name: --bamRemoveDuplicatesType + - name: --bam_remove_duplicates_type type: string description: |- mark duplicates in the BAM file, for now only works with (i) sorted BAM fed with inputBAMfile, and (ii) for paired-end alignments only @@ -527,17 +642,21 @@ argument_groups: - - ... no duplicate removal/marking - UniqueIdentical ... mark all multimappers, and duplicate unique mappers. The coordinates, FLAG, CIGAR must be identical - UniqueIdenticalNotMulti ... mark duplicate unique mappers but not multimappers. - - name: --bamRemoveDuplicatesMate2basesN + info: + orig_name: --bamRemoveDuplicatesType + - name: --bam_remove_duplicates_mate2bases_n type: integer description: number of bases from the 5' of mate 2 to use in collapsing (e.g. for RAMPAGE) + info: + orig_name: --bamRemoveDuplicatesMate2basesN example: 0 - name: Output Wiggle arguments: - - name: --outWigType + - name: --out_wig_type type: string description: |- - type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p". Requires sorted BAM: --outSAMtype BAM SortedByCoordinate . + type of signal output, e.g. "bedGraph" OR "bedGraph read1_5p". Requires sorted BAM: --out_sam_type BAM SortedByCoordinate . 1st word: - None ... no signal output @@ -546,85 +665,112 @@ argument_groups: 2nd word: - read1_5p ... signal from only 5' of the 1st read, useful for CAGE/RAMPAGE etc - read2 ... signal from only 2nd read - multiple: yes - multiple_sep: ; - - name: --outWigStrand + info: + orig_name: --outWigType + multiple: true + - name: --out_wig_strand type: string description: |- strandedness of wiggle/bedGraph output - Stranded ... separate strands, str1 and str2 - Unstranded ... collapsed strands + info: + orig_name: --outWigStrand example: Stranded - - name: --outWigReferencesPrefix + - name: --out_wig_references_prefix type: string description: prefix matching reference names to include in the output wiggle file, e.g. "chr", default "-" - include all references - - name: --outWigNorm + info: + orig_name: --outWigReferencesPrefix + - name: --out_wig_norm type: string description: |- type of normalization for the signal - RPM ... reads per million of mapped reads - None ... no normalization, "raw" counts + info: + orig_name: --outWigNorm example: RPM - name: Output Filtering arguments: - - name: --outFilterType + - name: --out_filter_type type: string description: |- type of filtering - Normal ... standard filtering using only current alignment - BySJout ... keep only those reads that contain junctions that passed filtering into SJ.out.tab + info: + orig_name: --outFilterType example: Normal - - name: --outFilterMultimapScoreRange + - name: --out_filter_multimap_score_range type: integer description: the score range below the maximum score for multimapping alignments + info: + orig_name: --outFilterMultimapScoreRange example: 1 - - name: --outFilterMultimapNmax + - name: --out_filter_multimap_nmax type: integer description: |- maximum number of loci the read is allowed to map to. Alignments (all of them) will be output only if the read maps to no more loci than this value. Otherwise no alignments will be output, and the read will be counted as "mapped to too many loci" in the Log.final.out . + info: + orig_name: --outFilterMultimapNmax example: 10 - - name: --outFilterMismatchNmax + - name: --out_filter_mismatch_nmax type: integer description: alignment will be output only if it has no more mismatches than this value. + info: + orig_name: --outFilterMismatchNmax example: 10 - - name: --outFilterMismatchNoverLmax + - name: --out_filter_mismatch_nover_lmax type: double description: alignment will be output only if its ratio of mismatches to *mapped* length is less than or equal to this value. + info: + orig_name: --outFilterMismatchNoverLmax example: 0.3 - - name: --outFilterMismatchNoverReadLmax + - name: --out_filter_mismatch_nover_read_lmax type: double description: alignment will be output only if its ratio of mismatches to *read* length is less than or equal to this value. + info: + orig_name: --outFilterMismatchNoverReadLmax example: 1.0 - - name: --outFilterScoreMin + - name: --out_filter_score_min type: integer description: alignment will be output only if its score is higher than or equal to this value. + info: + orig_name: --outFilterScoreMin example: 0 - - name: --outFilterScoreMinOverLread + - name: --out_filter_score_min_over_lread type: double description: same as outFilterScoreMin, but normalized to read length (sum of mates' lengths for paired-end reads) + info: + orig_name: --outFilterScoreMinOverLread example: 0.66 - - name: --outFilterMatchNmin + - name: --out_filter_match_nmin type: integer description: alignment will be output only if the number of matched bases is higher than or equal to this value. + info: + orig_name: --outFilterMatchNmin example: 0 - - name: --outFilterMatchNminOverLread + - name: --out_filter_match_nmin_over_lread type: double description: sam as outFilterMatchNmin, but normalized to the read length (sum of mates' lengths for paired-end reads). + info: + orig_name: --outFilterMatchNminOverLread example: 0.66 - - name: --outFilterIntronMotifs + - name: --out_filter_intron_motifs type: string description: |- filter alignment using their motifs @@ -632,244 +778,316 @@ argument_groups: - None ... no filtering - RemoveNoncanonical ... filter out alignments that contain non-canonical junctions - RemoveNoncanonicalUnannotated ... filter out alignments that contain non-canonical unannotated junctions when using annotated splice junctions database. The annotated non-canonical junctions will be kept. - - name: --outFilterIntronStrands + info: + orig_name: --outFilterIntronMotifs + - name: --out_filter_intron_strands type: string description: |- filter alignments - RemoveInconsistentStrands ... remove alignments that have junctions with inconsistent strands - None ... no filtering + info: + orig_name: --outFilterIntronStrands example: RemoveInconsistentStrands - name: Output splice junctions (SJ.out.tab) arguments: - - name: --outSJtype + - name: --out_sj_type type: string description: |- type of splice junction output - Standard ... standard SJ.out.tab output - None ... no splice junction output + info: + orig_name: --outSJtype example: Standard - name: 'Output Filtering: Splice Junctions' arguments: - - name: --outSJfilterReads + - name: --out_sj_filter_reads type: string description: |- which reads to consider for collapsed splice junctions output - All ... all reads, unique- and multi-mappers - Unique ... uniquely mapping reads only + info: + orig_name: --outSJfilterReads example: All - - name: --outSJfilterOverhangMin + - name: --out_sj_filter_overhang_min type: integer description: |- minimum overhang length for splice junctions on both sides for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif does not apply to annotated junctions + info: + orig_name: --outSJfilterOverhangMin example: - 30 - 12 - 12 - 12 - multiple: yes - multiple_sep: ; - - name: --outSJfilterCountUniqueMin + multiple: true + - name: --out_sj_filter_count_unique_min type: integer description: |- minimum uniquely mapping read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions + info: + orig_name: --outSJfilterCountUniqueMin example: - 3 - 1 - 1 - 1 - multiple: yes - multiple_sep: ; - - name: --outSJfilterCountTotalMin + multiple: true + - name: --out_sj_filter_count_total_min type: integer description: |- minimum total (multi-mapping+unique) read count per junction for: (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. -1 means no output for that motif Junctions are output if one of outSJfilterCountUniqueMin OR outSJfilterCountTotalMin conditions are satisfied does not apply to annotated junctions + info: + orig_name: --outSJfilterCountTotalMin example: - 3 - 1 - 1 - 1 - multiple: yes - multiple_sep: ; - - name: --outSJfilterDistToOtherSJmin + multiple: true + - name: --out_sj_filter_dist_to_other_sj_min type: integer description: |- minimum allowed distance to other junctions' donor/acceptor does not apply to annotated junctions + info: + orig_name: --outSJfilterDistToOtherSJmin example: - 10 - 0 - 5 - 10 - multiple: yes - multiple_sep: ; - - name: --outSJfilterIntronMaxVsReadN + multiple: true + - name: --out_sj_filter_intron_max_vs_read_n type: integer description: |- maximum gap allowed for junctions supported by 1,2,3,,,N reads i.e. by default junctions supported by 1 read can have gaps <=50000b, by 2 reads: <=100000b, by 3 reads: <=200000. by >=4 reads any gap <=alignIntronMax does not apply to annotated junctions + info: + orig_name: --outSJfilterIntronMaxVsReadN example: - 50000 - 100000 - 200000 - multiple: yes - multiple_sep: ; + multiple: true - name: Scoring arguments: - - name: --scoreGap + - name: --score_gap type: integer description: splice junction penalty (independent on intron motif) + info: + orig_name: --scoreGap example: 0 - - name: --scoreGapNoncan + - name: --score_gap_noncan type: integer description: non-canonical junction penalty (in addition to scoreGap) + info: + orig_name: --scoreGapNoncan example: -8 - - name: --scoreGapGCAG + - name: --score_gap_gcag type: integer description: GC/AG and CT/GC junction penalty (in addition to scoreGap) + info: + orig_name: --scoreGapGCAG example: -4 - - name: --scoreGapATAC + - name: --score_gap_atac type: integer description: AT/AC and GT/AT junction penalty (in addition to scoreGap) + info: + orig_name: --scoreGapATAC example: -8 - - name: --scoreGenomicLengthLog2scale + - name: --score_genomic_length_log2scale type: integer description: 'extra score logarithmically scaled with genomic length of the alignment: scoreGenomicLengthLog2scale*log2(genomicLength)' + info: + orig_name: --scoreGenomicLengthLog2scale example: 0 - - name: --scoreDelOpen + - name: --score_del_open type: integer description: deletion open penalty + info: + orig_name: --scoreDelOpen example: -2 - - name: --scoreDelBase + - name: --score_del_base type: integer description: deletion extension penalty per base (in addition to scoreDelOpen) + info: + orig_name: --scoreDelBase example: -2 - - name: --scoreInsOpen + - name: --score_ins_open type: integer description: insertion open penalty + info: + orig_name: --scoreInsOpen example: -2 - - name: --scoreInsBase + - name: --score_ins_base type: integer description: insertion extension penalty per base (in addition to scoreInsOpen) + info: + orig_name: --scoreInsBase example: -2 - - name: --scoreStitchSJshift + - name: --score_stitch_sj_shift type: integer description: maximum score reduction while searching for SJ boundaries in the stitching step + info: + orig_name: --scoreStitchSJshift example: 1 - name: Alignments and Seeding arguments: - - name: --seedSearchStartLmax + - name: --seed_search_start_lmax type: integer description: defines the search start point through the read - the read is split into pieces no longer than this value + info: + orig_name: --seedSearchStartLmax example: 50 - - name: --seedSearchStartLmaxOverLread + - name: --seed_search_start_lmax_over_lread type: double description: seedSearchStartLmax normalized to read length (sum of mates' lengths for paired-end reads) + info: + orig_name: --seedSearchStartLmaxOverLread example: 1.0 - - name: --seedSearchLmax + - name: --seed_search_lmax type: integer description: defines the maximum length of the seeds, if =0 seed length is not limited + info: + orig_name: --seedSearchLmax example: 0 - - name: --seedMultimapNmax + - name: --seed_multimap_nmax type: integer description: only pieces that map fewer than this value are utilized in the stitching procedure + info: + orig_name: --seedMultimapNmax example: 10000 - - name: --seedPerReadNmax + - name: --seed_per_read_nmax type: integer description: max number of seeds per read + info: + orig_name: --seedPerReadNmax example: 1000 - - name: --seedPerWindowNmax + - name: --seed_per_window_nmax type: integer description: max number of seeds per window + info: + orig_name: --seedPerWindowNmax example: 50 - - name: --seedNoneLociPerWindow + - name: --seed_none_loci_per_window type: integer description: max number of one seed loci per window + info: + orig_name: --seedNoneLociPerWindow example: 10 - - name: --seedSplitMin + - name: --seed_split_min type: integer description: min length of the seed sequences split by Ns or mate gap + info: + orig_name: --seedSplitMin example: 12 - - name: --seedMapMin + - name: --seed_map_min type: integer description: min length of seeds to be mapped + info: + orig_name: --seedMapMin example: 5 - - name: --alignIntronMin + - name: --align_intron_min type: integer description: minimum intron size, genomic gap is considered intron if its length>=alignIntronMin, otherwise it is considered Deletion + info: + orig_name: --alignIntronMin example: 21 - - name: --alignIntronMax + - name: --align_intron_max type: integer description: maximum intron size, if 0, max intron size will be determined by (2^winBinNbits)*winAnchorDistNbins + info: + orig_name: --alignIntronMax example: 0 - - name: --alignMatesGapMax + - name: --align_mates_gap_max type: integer description: maximum gap between two mates, if 0, max intron gap will be determined by (2^winBinNbits)*winAnchorDistNbins + info: + orig_name: --alignMatesGapMax example: 0 - - name: --alignSJoverhangMin + - name: --align_sj_overhang_min type: integer description: minimum overhang (i.e. block size) for spliced alignments + info: + orig_name: --alignSJoverhangMin example: 5 - - name: --alignSJstitchMismatchNmax + - name: --align_sj_stitch_mismatch_nmax type: integer description: |- maximum number of mismatches for stitching of the splice junctions (-1: no limit). (1) non-canonical motifs, (2) GT/AG and CT/AC motif, (3) GC/AG and CT/GC motif, (4) AT/AC and GT/AT motif. + info: + orig_name: --alignSJstitchMismatchNmax example: - 0 - -1 - 0 - 0 - multiple: yes - multiple_sep: ; - - name: --alignSJDBoverhangMin + multiple: true + - name: --align_sjdb_overhang_min type: integer description: minimum overhang (i.e. block size) for annotated (sjdb) spliced alignments + info: + orig_name: --alignSJDBoverhangMin example: 3 - - name: --alignSplicedMateMapLmin + - name: --align_spliced_mate_map_lmin type: integer description: minimum mapped length for a read mate that is spliced + info: + orig_name: --alignSplicedMateMapLmin example: 0 - - name: --alignSplicedMateMapLminOverLmate + - name: --align_spliced_mate_map_lmin_over_lmate type: double description: alignSplicedMateMapLmin normalized to mate length + info: + orig_name: --alignSplicedMateMapLminOverLmate example: 0.66 - - name: --alignWindowsPerReadNmax + - name: --align_windows_per_read_nmax type: integer description: max number of windows per read + info: + orig_name: --alignWindowsPerReadNmax example: 10000 - - name: --alignTranscriptsPerWindowNmax + - name: --align_transcripts_per_window_nmax type: integer description: max number of transcripts per window + info: + orig_name: --alignTranscriptsPerWindowNmax example: 100 - - name: --alignTranscriptsPerReadNmax + - name: --align_transcripts_per_read_nmax type: integer description: max number of different alignments per read to consider + info: + orig_name: --alignTranscriptsPerReadNmax example: 10000 - - name: --alignEndsType + - name: --align_ends_type type: string description: |- type of read ends alignment @@ -878,8 +1096,10 @@ argument_groups: - EndToEnd ... force end-to-end read alignment, do not soft-clip - Extend5pOfRead1 ... fully extend only the 5p of the read1, all other ends: local alignment - Extend5pOfReads12 ... fully extend only the 5p of the both read1 and read2, all other ends: local alignment + info: + orig_name: --alignEndsType example: Local - - name: --alignEndsProtrude + - name: --align_ends_protrude type: string description: |- allow protrusion of alignment ends, i.e. start (end) of the +strand mate downstream of the start (end) of the -strand mate @@ -888,68 +1108,90 @@ argument_groups: 2nd word: string: - ConcordantPair ... report alignments with non-zero protrusion as concordant pairs - DiscordantPair ... report alignments with non-zero protrusion as discordant pairs + info: + orig_name: --alignEndsProtrude example: 0 ConcordantPair - - name: --alignSoftClipAtReferenceEnds + - name: --align_soft_clip_at_reference_ends type: string description: |- allow the soft-clipping of the alignments past the end of the chromosomes - Yes ... allow - No ... prohibit, useful for compatibility with Cufflinks + info: + orig_name: --alignSoftClipAtReferenceEnds example: 'Yes' - - name: --alignInsertionFlush + - name: --align_insertion_flush type: string description: |- how to flush ambiguous insertion positions - None ... insertions are not flushed - Right ... insertions are flushed to the right + info: + orig_name: --alignInsertionFlush - name: Paired-End reads arguments: - - name: --peOverlapNbasesMin + - name: --pe_overlap_nbases_min type: integer description: minimum number of overlapping bases to trigger mates merging and realignment. Specify >0 value to switch on the "merginf of overlapping mates" algorithm. + info: + orig_name: --peOverlapNbasesMin example: 0 - - name: --peOverlapMMp + - name: --pe_overlap_mm_p type: double description: maximum proportion of mismatched bases in the overlap area + info: + orig_name: --peOverlapMMp example: 0.01 - name: Windows, Anchors, Binning arguments: - - name: --winAnchorMultimapNmax + - name: --win_anchor_multimap_nmax type: integer description: max number of loci anchors are allowed to map to + info: + orig_name: --winAnchorMultimapNmax example: 50 - - name: --winBinNbits + - name: --win_bin_nbits type: integer description: =log2(winBin), where winBin is the size of the bin for the windows/clustering, each window will occupy an integer number of bins. + info: + orig_name: --winBinNbits example: 16 - - name: --winAnchorDistNbins + - name: --win_anchor_dist_nbins type: integer description: max number of bins between two anchors that allows aggregation of anchors into one window + info: + orig_name: --winAnchorDistNbins example: 9 - - name: --winFlankNbins + - name: --win_flank_nbins type: integer description: log2(winFlank), where win Flank is the size of the left and right flanking regions for each window + info: + orig_name: --winFlankNbins example: 4 - - name: --winReadCoverageRelativeMin + - name: --win_read_coverage_relative_min type: double description: minimum relative coverage of the read sequence by the seeds in a window, for STARlong algorithm only. + info: + orig_name: --winReadCoverageRelativeMin example: 0.5 - - name: --winReadCoverageBasesMin + - name: --win_read_coverage_bases_min type: integer description: minimum number of bases covered by the seeds in a window , for STARlong algorithm only. + info: + orig_name: --winReadCoverageBasesMin example: 0 - name: Chimeric Alignments arguments: - - name: --chimOutType + - name: --chim_out_type type: string description: |- type of chimeric output @@ -959,83 +1201,109 @@ argument_groups: - WithinBAM ... output into main aligned BAM files (Aligned.*.bam) - WithinBAM HardClip ... (default) hard-clipping in the CIGAR for supplemental chimeric alignments (default if no 2nd word is present) - WithinBAM SoftClip ... soft-clipping in the CIGAR for supplemental chimeric alignments + info: + orig_name: --chimOutType example: Junctions - multiple: yes - multiple_sep: ; - - name: --chimSegmentMin + multiple: true + - name: --chim_segment_min type: integer description: minimum length of chimeric segment length, if ==0, no chimeric output + info: + orig_name: --chimSegmentMin example: 0 - - name: --chimScoreMin + - name: --chim_score_min type: integer description: minimum total (summed) score of the chimeric segments + info: + orig_name: --chimScoreMin example: 0 - - name: --chimScoreDropMax + - name: --chim_score_drop_max type: integer description: max drop (difference) of chimeric score (the sum of scores of all chimeric segments) from the read length + info: + orig_name: --chimScoreDropMax example: 20 - - name: --chimScoreSeparation + - name: --chim_score_separation type: integer description: minimum difference (separation) between the best chimeric score and the next one + info: + orig_name: --chimScoreSeparation example: 10 - - name: --chimScoreJunctionNonGTAG + - name: --chim_score_junction_non_gtag type: integer description: penalty for a non-GT/AG chimeric junction + info: + orig_name: --chimScoreJunctionNonGTAG example: -1 - - name: --chimJunctionOverhangMin + - name: --chim_junction_overhang_min type: integer description: minimum overhang for a chimeric junction + info: + orig_name: --chimJunctionOverhangMin example: 20 - - name: --chimSegmentReadGapMax + - name: --chim_segment_read_gap_max type: integer description: maximum gap in the read sequence between chimeric segments + info: + orig_name: --chimSegmentReadGapMax example: 0 - - name: --chimFilter + - name: --chim_filter type: string description: |- different filters for chimeric alignments - None ... no filtering - banGenomicN ... Ns are not allowed in the genome sequence around the chimeric junction + info: + orig_name: --chimFilter example: banGenomicN - multiple: yes - multiple_sep: ; - - name: --chimMainSegmentMultNmax + multiple: true + - name: --chim_main_segment_mult_nmax type: integer description: maximum number of multi-alignments for the main chimeric segment. =1 will prohibit multimapping main segments. + info: + orig_name: --chimMainSegmentMultNmax example: 10 - - name: --chimMultimapNmax + - name: --chim_multimap_nmax type: integer description: |- maximum number of chimeric multi-alignments - 0 ... use the old scheme for chimeric detection which only considered unique alignments + info: + orig_name: --chimMultimapNmax example: 0 - - name: --chimMultimapScoreRange + - name: --chim_multimap_score_range type: integer description: the score range for multi-mapping chimeras below the best chimeric - score. Only works with --chimMultimapNmax > 1 + score. Only works with --chim_multimap_nmax > 1 + info: + orig_name: --chimMultimapScoreRange example: 1 - - name: --chimNonchimScoreDropMin + - name: --chim_nonchim_score_drop_min type: integer description: to trigger chimeric detection, the drop in the best non-chimeric alignment score with respect to the read length has to be greater than this value + info: + orig_name: --chimNonchimScoreDropMin example: 20 - - name: --chimOutJunctionFormat + - name: --chim_out_junction_format type: integer description: |- formatting type for the Chimeric.out.junction file - 0 ... no comment lines/headers - 1 ... comment lines at the end of the file: command line and Nreads: total, unique/multi-mapping + info: + orig_name: --chimOutJunctionFormat example: 0 - name: Quantification of Annotations arguments: - - name: --quantMode + - name: --quant_mode type: string description: |- types of quantification requested @@ -1043,9 +1311,10 @@ argument_groups: - - ... none - TranscriptomeSAM ... output SAM/BAM alignments to transcriptome into a separate file - GeneCounts ... count reads per gene - multiple: yes - multiple_sep: ; - - name: --quantTranscriptomeBAMcompression + info: + orig_name: --quantMode + multiple: true + - name: --quant_transcriptome_bam_compression type: integer description: |- -2 to 10 transcriptome BAM compression level @@ -1054,8 +1323,10 @@ argument_groups: - -1 ... default compression (6?) - 0 ... no compression - 10 ... maximum compression + info: + orig_name: --quantTranscriptomeBAMcompression example: 1 - - name: --quantTranscriptomeSAMoutput + - name: --quant_transcriptome_sam_output type: string description: |- alignment filtering for TranscriptomeSAM output @@ -1063,26 +1334,301 @@ argument_groups: - BanSingleEnd_BanIndels_ExtendSoftclip ... prohibit indels and single-end alignments, extend softclips - compatible with RSEM - BanSingleEnd ... prohibit single-end alignments, allow indels and softclips - BanSingleEnd_ExtendSoftclip ... prohibit single-end alignments, extend softclips, allow indels + info: + orig_name: --quantTranscriptomeSAMoutput example: BanSingleEnd_BanIndels_ExtendSoftclip - name: 2-pass Mapping arguments: - - name: --twopassMode + - name: --twopass_mode type: string description: |- 2-pass mapping mode. - None ... 1-pass mapping - Basic ... basic 2-pass mapping, with all 1st pass junctions inserted into the genome indices on the fly - - name: --twopass1readsN + info: + orig_name: --twopassMode + - name: --twopass1reads_n type: integer description: number of reads to process for the 1st step. Use very large number (or default -1) to map all reads in the first step. + info: + orig_name: --twopass1readsN example: -1 - name: WASP parameters arguments: - - name: --waspOutputMode + - name: --wasp_output_mode type: string description: |- WASP allele-specific output type. This is re-implementation of the original WASP mappability filtering by Bryce van de Geijn, Graham McVicker, Yoav Gilad & Jonathan K Pritchard. Please cite the original WASP paper: Nature Methods 12, 1061-1063 (2015), https://www.nature.com/articles/nmeth.3582 . - SAMtag ... add WASP tags to the alignments that pass WASP filtering + info: + orig_name: --waspOutputMode +- name: STARsolo (single cell RNA-seq) parameters + arguments: + - name: --solo_type + type: string + description: |- + type of single-cell RNA-seq + + - CB_UMI_Simple ... (a.k.a. Droplet) one UMI and one Cell Barcode of fixed length in read2, e.g. Drop-seq and 10X Chromium. + - CB_UMI_Complex ... multiple Cell Barcodes of varying length, one UMI of fixed length and one adapter sequence of fixed length are allowed in read2 only (e.g. inDrop, ddSeq). + - CB_samTagOut ... output Cell Barcode as CR and/or CB SAm tag. No UMI counting. --readFilesIn cDNA_read1 [cDNA_read2 if paired-end] CellBarcode_read . Requires --out_sam_type BAM Unsorted [and/or SortedByCoordinate] + - SmartSeq ... Smart-seq: each cell in a separate FASTQ (paired- or single-end), barcodes are corresponding read-groups, no UMI sequences, alignments deduplicated according to alignment start and end (after extending soft-clipped bases) + info: + orig_name: --soloType + multiple: true + - name: --solo_cb_type + type: string + description: |- + cell barcode type + + Sequence: cell barcode is a sequence (standard option) + String: cell barcode is an arbitrary string + info: + orig_name: --soloCBtype + example: Sequence + - name: --solo_cb_whitelist + type: string + description: |- + file(s) with whitelist(s) of cell barcodes. Only --solo_type CB_UMI_Complex allows more than one whitelist file. + + - None ... no whitelist: all cell barcodes are allowed + info: + orig_name: --soloCBwhitelist + multiple: true + - name: --solo_cb_start + type: integer + description: cell barcode start base + info: + orig_name: --soloCBstart + example: 1 + - name: --solo_cb_len + type: integer + description: cell barcode length + info: + orig_name: --soloCBlen + example: 16 + - name: --solo_umi_start + type: integer + description: UMI start base + info: + orig_name: --soloUMIstart + example: 17 + - name: --solo_umi_len + type: integer + description: UMI length + info: + orig_name: --soloUMIlen + example: 10 + - name: --solo_barcode_read_length + type: integer + description: |- + length of the barcode read + + - 1 ... equal to sum of soloCBlen+soloUMIlen + - 0 ... not defined, do not check + info: + orig_name: --soloBarcodeReadLength + example: 1 + - name: --solo_barcode_mate + type: integer + description: |- + identifies which read mate contains the barcode (CB+UMI) sequence + + - 0 ... barcode sequence is on separate read, which should always be the last file in the --readFilesIn listed + - 1 ... barcode sequence is a part of mate 1 + - 2 ... barcode sequence is a part of mate 2 + info: + orig_name: --soloBarcodeMate + example: 0 + - name: --solo_cb_position + type: string + description: |- + position of Cell Barcode(s) on the barcode read. + + Presently only works with --solo_type CB_UMI_Complex, and barcodes are assumed to be on Read2. + Format for each barcode: startAnchor_startPosition_endAnchor_endPosition + start(end)Anchor defines the Anchor Base for the CB: 0: read start; 1: read end; 2: adapter start; 3: adapter end + start(end)Position is the 0-based position with of the CB start(end) with respect to the Anchor Base + String for different barcodes are separated by space. + Example: inDrop (Zilionis et al, Nat. Protocols, 2017): + --solo_cb_position 0_0_2_-1 3_1_3_8 + info: + orig_name: --soloCBposition + multiple: true + - name: --solo_umi_position + type: string + description: |- + position of the UMI on the barcode read, same as soloCBposition + + Example: inDrop (Zilionis et al, Nat. Protocols, 2017): + --solo_cb_position 3_9_3_14 + info: + orig_name: --soloUMIposition + - name: --solo_adapter_sequence + type: string + description: adapter sequence to anchor barcodes. Only one adapter sequence is + allowed. + info: + orig_name: --soloAdapterSequence + - name: --solo_adapter_mismatches_nmax + type: integer + description: maximum number of mismatches allowed in adapter sequence. + info: + orig_name: --soloAdapterMismatchesNmax + example: 1 + - name: --solo_cb_match_wl_type + type: string + description: |- + matching the Cell Barcodes to the WhiteList + + - Exact ... only exact matches allowed + - 1MM ... only one match in whitelist with 1 mismatched base allowed. Allowed CBs have to have at least one read with exact match. + - 1MM_multi ... multiple matches in whitelist with 1 mismatched base allowed, posterior probability calculation is used choose one of the matches. + Allowed CBs have to have at least one read with exact match. This option matches best with CellRanger 2.2.0 + - 1MM_multi_pseudocounts ... same as 1MM_Multi, but pseudocounts of 1 are added to all whitelist barcodes. + - 1MM_multi_Nbase_pseudocounts ... same as 1MM_multi_pseudocounts, multimatching to WL is allowed for CBs with N-bases. This option matches best with CellRanger >= 3.0.0 + - EditDist_2 ... allow up to edit distance of 3 fpr each of the barcodes. May include one deletion + one insertion. Only works with --solo_type CB_UMI_Complex. Matches to multiple passlist barcdoes are not allowed. Similar to ParseBio Split-seq pipeline. + info: + orig_name: --soloCBmatchWLtype + example: 1MM_multi + - name: --solo_input_sam_attr_barcode_seq + type: string + description: |- + when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode sequence (in proper order). + + For instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_seq CR UR . + This parameter is required when running STARsolo with input from SAM. + info: + orig_name: --soloInputSAMattrBarcodeSeq + multiple: true + - name: --solo_input_sam_attr_barcode_qual + type: string + description: |- + when inputting reads from a SAM file (--readsFileType SAM SE/PE), these SAM attributes mark the barcode qualities (in proper order). + + For instance, for 10X CellRanger or STARsolo BAMs, use --solo_input_sam_attr_barcode_qual CY UY . + If this parameter is '-' (default), the quality 'H' will be assigned to all bases. + info: + orig_name: --soloInputSAMattrBarcodeQual + multiple: true + - name: --solo_strand + type: string + description: |- + strandedness of the solo libraries: + + - Unstranded ... no strand information + - Forward ... read strand same as the original RNA molecule + - Reverse ... read strand opposite to the original RNA molecule + info: + orig_name: --soloStrand + example: Forward + - name: --solo_features + type: string + description: |- + genomic features for which the UMI counts per Cell Barcode are collected + + - Gene ... genes: reads match the gene transcript + - SJ ... splice junctions: reported in SJ.out.tab + - GeneFull ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns + - GeneFull_ExonOverIntron ... full gene (pre-mRNA): count all reads overlapping genes' exons and introns: prioritize 100% overlap with exons + - GeneFull_Ex50pAS ... full gene (pre-RNA): count all reads overlapping genes' exons and introns: prioritize >50% overlap with exons. Do not count reads with 100% exonic overlap in the antisense direction. + info: + orig_name: --soloFeatures + example: Gene + multiple: true + - name: --solo_multi_mappers + type: string + description: |- + counting method for reads mapping to multiple genes + + - Unique ... count only reads that map to unique genes + - Uniform ... uniformly distribute multi-genic UMIs to all genes + - Rescue ... distribute UMIs proportionally to unique+uniform counts (~ first iteration of EM) + - PropUnique ... distribute UMIs proportionally to unique mappers, if present, and uniformly if not. + - EM ... multi-gene UMIs are distributed using Expectation Maximization algorithm + info: + orig_name: --soloMultiMappers + example: Unique + multiple: true + - name: --solo_umi_dedup + type: string + description: |- + type of UMI deduplication (collapsing) algorithm + + - 1MM_All ... all UMIs with 1 mismatch distance to each other are collapsed (i.e. counted once). + - 1MM_Directional_UMItools ... follows the "directional" method from the UMI-tools by Smith, Heger and Sudbery (Genome Research 2017). + - 1MM_Directional ... same as 1MM_Directional_UMItools, but with more stringent criteria for duplicate UMIs + - Exact ... only exactly matching UMIs are collapsed. + - NoDedup ... no deduplication of UMIs, count all reads. + - 1MM_CR ... CellRanger2-4 algorithm for 1MM UMI collapsing. + info: + orig_name: --soloUMIdedup + example: 1MM_All + multiple: true + - name: --solo_umi_filtering + type: string + description: |- + type of UMI filtering (for reads uniquely mapping to genes) + + - - ... basic filtering: remove UMIs with N and homopolymers (similar to CellRanger 2.2.0). + - MultiGeneUMI ... basic + remove lower-count UMIs that map to more than one gene. + - MultiGeneUMI_All ... basic + remove all UMIs that map to more than one gene. + - MultiGeneUMI_CR ... basic + remove lower-count UMIs that map to more than one gene, matching CellRanger > 3.0.0 . + Only works with --solo_umi_dedup 1MM_CR + info: + orig_name: --soloUMIfiltering + multiple: true + - name: --solo_out_file_names + type: string + description: |- + file names for STARsolo output: + + file_name_prefix gene_names barcode_sequences cell_feature_count_matrix + info: + orig_name: --soloOutFileNames + example: + - Solo.out/ + - features.tsv + - barcodes.tsv + - matrix.mtx + multiple: true + - name: --solo_cell_filter + type: string + description: |- + cell filtering type and parameters + + - None ... do not output filtered cells + - TopCells ... only report top cells by UMI count, followed by the exact number of cells + - CellRanger2.2 ... simple filtering of CellRanger 2.2. + Can be followed by numbers: number of expected cells, robust maximum percentile for UMI count, maximum to minimum ratio for UMI count + The harcoded values are from CellRanger: nExpectedCells=3000; maxPercentile=0.99; maxMinRatio=10 + - EmptyDrops_CR ... EmptyDrops filtering in CellRanger flavor. Please cite the original EmptyDrops paper: A.T.L Lun et al, Genome Biology, 20, 63 (2019): https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1662-y + Can be followed by 10 numeric parameters: nExpectedCells maxPercentile maxMinRatio indMin indMax umiMin umiMinFracMedian candMaxN FDR simN + The harcoded values are from CellRanger: 3000 0.99 10 45000 90000 500 0.01 20000 0.01 10000 + info: + orig_name: --soloCellFilter + example: + - CellRanger2.2 + - '3000' + - '0.99' + - '10' + multiple: true + - name: --solo_out_format_features_gene_field3 + type: string + description: field 3 in the Gene features.tsv file. If "-", then no 3rd field + is output. + info: + orig_name: --soloOutFormatFeaturesGeneField3 + example: Gene Expression + multiple: true + - name: --solo_cell_read_stats + type: string + description: |- + Output reads statistics for each CB + + - Standard ... standard output + info: + orig_name: --soloCellReadStats diff --git a/src/star/star_align_reads/config.vsh.yaml b/src/star/star_align_reads/config.vsh.yaml index bdc956d3..a9a845a1 100644 --- a/src/star/star_align_reads/config.vsh.yaml +++ b/src/star/star_align_reads/config.vsh.yaml @@ -118,6 +118,8 @@ engines: rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip && \ apt-get --purge autoremove -y ${PACKAGES} && \ apt-get clean + - type: python + packages: [ pyyaml ] - type: docker run: | STAR --version | sed 's#\(.*\)#star: "\1"#' > /var/software_versions.txt diff --git a/src/star/star_align_reads/script.py b/src/star/star_align_reads/script.py index f3d64a57..4d9d046f 100644 --- a/src/star/star_align_reads/script.py +++ b/src/star/star_align_reads/script.py @@ -2,6 +2,7 @@ import subprocess import shutil from pathlib import Path +import yaml ## VIASH START par = { @@ -18,10 +19,20 @@ } meta = { "cpus": 8, - "temp_dir": "/tmp" + "temp_dir": "/tmp", + "config": "target/executable/star/star_align_reads/.config.vsh.yaml", } ## VIASH END +# read config +with open(meta["config"], 'r') as stream: + config = yaml.safe_load(stream) +all_arguments = { + arg["name"].lstrip('-'): arg + for argument_group in config["argument_groups"] + for arg in argument_group["arguments"] +} + ################################################## # check and process SE / PE R1 input files input_r1 = par["input"] @@ -87,8 +98,13 @@ cmd_args = [ "STAR" ] for name, value in par.items(): if value is not None: + if name in all_arguments: + arg_info = all_arguments[name].get("info", {}) + cli_name = arg_info.get("orig_name", f"--{name}") + else: + cli_name = f"--{name}" val_to_add = value if isinstance(value, list) else [value] - cmd_args.extend([f"--{name}"] + [str(x) for x in val_to_add]) + cmd_args.extend([cli_name] + [str(x) for x in val_to_add]) print("", flush=True) # run command diff --git a/src/star/star_align_reads/test.sh b/src/star/star_align_reads/test.sh index bd78094d..46566ec0 100644 --- a/src/star/star_align_reads/test.sh +++ b/src/star/star_align_reads/test.sh @@ -88,14 +88,14 @@ cd star_align_reads_se echo "> Run star_align_reads on SE" "$meta_executable" \ --input "../reads_R1.fastq" \ - --genomeDir "../index/" \ + --genome_dir "../index/" \ --aligned_reads "output.sam" \ --log "log.txt" \ - --outReadsUnmapped "Fastx" \ + --out_reads_unmapped "Fastx" \ --unmapped "unmapped.sam" \ - --quantMode "TranscriptomeSAM;GeneCounts" \ + --quant_mode "TranscriptomeSAM;GeneCounts" \ --reads_per_gene "reads_per_gene.tsv" \ - --outSJtype Standard \ + --out_sj_type Standard \ --splice_junctions "splice_junctions.tsv" \ --reads_aligned_to_transcriptome "transcriptome_aligned.bam" \ ${meta_cpus:+---cpus $meta_cpus} @@ -143,10 +143,10 @@ echo ">> Run star_align_reads on PE" "$meta_executable" \ --input ../reads_R1.fastq \ --input_r2 ../reads_R2.fastq \ - --genomeDir ../index/ \ + --genome_dir ../index/ \ --aligned_reads output.bam \ --log log.txt \ - --outReadsUnmapped Fastx \ + --out_reads_unmapped Fastx \ --unmapped unmapped_r1.bam \ --unmapped_r2 unmapped_r2.bam \ ${meta_cpus:+---cpus $meta_cpus} diff --git a/src/star/star_align_reads/utils/process_params.R b/src/star/star_align_reads/utils/process_params.R index ccdc50b3..eee1db65 100644 --- a/src/star/star_align_reads/utils/process_params.R +++ b/src/star/star_align_reads/utils/process_params.R @@ -14,6 +14,14 @@ param_txt <- iconv(param_txt, "UTF-8", "ASCII//TRANSLIT") dev_begin <- grep("#####UnderDevelopment_begin", param_txt) dev_end <- grep("#####UnderDevelopment_end", param_txt) +camel_case_to_snake_case <- function(x) { + x %>% + str_replace_all("([A-Z][A-Z][A-Z]*)", "_\\1_") %>% + str_replace_all("([a-z])([A-Z])", "\\1_\\2") %>% + str_to_lower() %>% + str_replace_all("_$", "") +} + # strip development sections nondev_ix <- unlist(map2(c(1, dev_end + 1), c(dev_begin - 1, length(param_txt)), function(i, j) { if (i >= 1 && i < j) { @@ -128,9 +136,8 @@ out2 <- out %>% # remove arguments that are related to a different runmode filter(!grepl("--runMode", description) | grepl("--runMode alignReads", description)) %>% filter(!grepl("--runMode", group_name) | grepl("--runMode alignReads", group_name)) %>% - filter(!grepl("STARsolo", group_name)) %>% mutate( - viash_arg = paste0("--", name), + viash_arg = paste0("--", camel_case_to_snake_case(name)), type_step1 = type %>% str_replace_all(".*(int, string|string|int|real|double)\\(?(s?).*", "\\1\\2"), viash_type = type_map[gsub("(int, string|string|int|real|double).*", "\\1", type_step1)], @@ -155,28 +162,41 @@ out2 <- out %>% group_name = gsub(" - .*", "", group_name), required = ifelse(name %in% required_args, TRUE, NA) ) -print(out2, n = 200) -out2 %>% mutate(i = row_number()) %>% - # filter(is.na(default_step1) != is.na(viash_default)) %>% - select(-group_name, -description) -out2 %>% filter(!grepl("--runMode", description) | grepl("--runMode alignReads", description)) +# change references to argument names +out3 <- out2 +for (i in seq_len(nrow(out2))) { + orig_name <- paste0("--", out2$name[[i]]) + new_name <- out2$viash_arg[[i]] + out3$description <- str_replace_all(out3$description, orig_name, new_name) +} + +# sanity checks +out3 %>% select(name, viash_arg) %>% as.data.frame() +print(out3, n = 200) +out3 %>% + mutate(i = row_number()) %>% + select(-group_name, -description) +out3 %>% filter(!grepl("--runMode", description) | grepl("--runMode alignReads", description)) -argument_groups <- map(unique(out2$group_name), function(group_name) { - args <- out2 %>% +# create argument groups +argument_groups <- map(unique(out3$group_name), function(group_name) { + args <- out3 %>% filter(group_name == !!group_name) %>% - pmap(function(viash_arg, viash_type, multiple, viash_default, description, required, ...) { - li <- lst( + pmap(function(viash_arg, viash_type, multiple, viash_default, description, required, name, ...) { + li <- list( name = viash_arg, type = viash_type, - description = description + description = description, + info = list( + orig_name = paste0("--", name) + ) ) if (all(!is.na(viash_default))) { li$example <- viash_default } if (!is.na(multiple) && multiple) { li$multiple <- multiple - li$multiple_sep <- ";" } if (!is.na(required) && required) { li$required <- required @@ -186,4 +206,10 @@ argument_groups <- map(unique(out2$group_name), function(group_name) { list(name = group_name, arguments = args) }) -yaml::write_yaml(list(argument_groups = argument_groups), yaml_file) +yaml::write_yaml( + list(argument_groups = argument_groups), + yaml_file, + handlers = list( + logical = yaml::verbatim_logical + ) +) diff --git a/src/star/star_genome_generate/config.vsh.yaml b/src/star/star_genome_generate/config.vsh.yaml index 60fa3839..71c58826 100644 --- a/src/star/star_genome_generate/config.vsh.yaml +++ b/src/star/star_genome_generate/config.vsh.yaml @@ -17,71 +17,68 @@ authors: argument_groups: - name: "Input" arguments: - - name: "--genomeFastaFiles" + - name: "--genome_fasta_files" type: file description: | Path(s) to the fasta files with the genome sequences, separated by spaces. These files should be plain text FASTA files, they *cannot* be zipped. required: true - multiple: yes - multiple_sep: ; - - name: "--sjdbGTFfile" + multiple: true + - name: "--sjdb_gtf_file" type: file description: Path to the GTF file with annotations - - name: --sjdbOverhang + - name: --sjdb_overhang type: integer description: Length of the donor/acceptor sequence on each side of the junctions, ideally = (mate_length - 1) example: 100 - - name: --sjdbGTFchrPrefix + - name: --sjdb_gtf_chr_prefix type: string description: Prefix for chromosome names in a GTF file (e.g. 'chr' for using ENSMEBL annotations with UCSC genomes) - - name: --sjdbGTFfeatureExon + - name: --sjdb_gtf_feature_exon type: string description: Feature type in GTF file to be used as exons for building transcripts example: exon - - name: --sjdbGTFtagExonParentTranscript + - name: --sjdb_gtf_tag_exon_parent_transcript type: string description: GTF attribute name for parent transcript ID (default "transcript_id" works for GTF files) example: transcript_id - - name: --sjdbGTFtagExonParentGene + - name: --sjdb_gtf_tag_exon_parent_gene type: string description: GTF attribute name for parent gene ID (default "gene_id" works for GTF files) example: gene_id - - name: --sjdbGTFtagExonParentGeneName + - name: --sjdb_gtf_tag_exon_parent_gene_name type: string description: GTF attribute name for parent gene name example: gene_name - multiple: yes - multiple_sep: ; - - name: --sjdbGTFtagExonParentGeneType + multiple: true + - name: --sjdb_gtf_tag_exon_parent_gene_type type: string description: GTF attribute name for parent gene type example: - gene_type - gene_biotype - multiple: yes - multiple_sep: ; - - name: --limitGenomeGenerateRAM + multiple: true + - name: --limit_genome_generate_ram type: long description: Maximum available RAM (bytes) for genome generation - example: '31000000000' - - name: --genomeSAindexNbases + example: 31000000000 + - name: --genome_sa_index_nbases type: integer description: Length (bases) of the SA pre-indexing string. Typically between 10 and 15. Longer strings will use much more memory, but allow faster searches. For small genomes, this parameter must be scaled down to min(14, log2(GenomeLength)/2 - 1). example: 14 - - name: --genomeChrBinNbits + - name: --genome_chr_bin_nbits type: integer description: Defined as log2(chrBin), where chrBin is the size of the bins for genome storage. Each chromosome will occupy an integer number of bins. For a genome with large number of contigs, it is recommended to scale this parameter as min(18, log2[max(GenomeLength/NumberOfReferences,ReadLength)]). example: 18 - - name: --genomeSAsparseD + - name: --genome_sa_sparse_d type: integer min: 0 example: 1 description: Suffux array sparsity, i.e. distance between indices. Use bigger numbers to decrease needed RAM at the cost of mapping speed reduction. - - name: --genomeSuffixLengthMax + - name: --genome_suffix_length_max type: integer description: Maximum length of the suffixes, has to be longer than read length. Use -1 for infinite length. example: -1 - - name: --genomeTransformType + - name: --genome_transform_type type: string description: | Type of genome transformation @@ -89,7 +86,7 @@ argument_groups: Haploid ... replace reference alleles with alternative alleles from VCF file (e.g. consensus allele) Diploid ... create two haplotypes for each chromosome listed in VCF file, for genotypes 1|2, assumes perfect phasing (e.g. personal genome) example: None - - name: --genomeTransformVCF + - name: --genome_transform_vcf type: file description: path to VCF file for genome transformation diff --git a/src/star/star_genome_generate/script.sh b/src/star/star_genome_generate/script.sh index cb3b906c..fc232672 100644 --- a/src/star/star_genome_generate/script.sh +++ b/src/star/star_genome_generate/script.sh @@ -10,20 +10,20 @@ mkdir -p $par_index STAR \ --runMode genomeGenerate \ --genomeDir $par_index \ - --genomeFastaFiles $par_genomeFastaFiles \ + --genomeFastaFiles $par_genome_fasta_files \ ${meta_cpus:+--runThreadN "${meta_cpus}"} \ - ${par_sjdbGTFfile:+--sjdbGTFfile "${par_sjdbGTFfile}"} \ + ${par_sjdb_gtf_file:+--sjdbGTFfile "${par_sjdb_gtf_file}"} \ ${par_sjdbOverhang:+--sjdbOverhang "${par_sjdbOverhang}"} \ - ${par_genomeSAindexNbases:+--genomeSAindexNbases "${par_genomeSAindexNbases}"} \ - ${par_sjdbGTFchrPrefix:+--sjdbGTFchrPrefix "${par_sjdbGTFchrPrefix}"} \ - ${par_sjdbGTFfeatureExon:+--sjdbGTFfeatureExon "${par_sjdbGTFfeatureExon}"} \ - ${par_sjdbGTFtagExonParentTranscript:+--sjdbGTFtagExonParentTranscript "${par_sjdbGTFtagExonParentTranscript}"} \ - ${par_sjdbGTFtagExonParentGene:+--sjdbGTFtagExonParentGene "${par_sjdbGTFtagExonParentGene}"} \ - ${par_sjdbGTFtagExonParentGeneName:+--sjdbGTFtagExonParentGeneName "${par_sjdbGTFtagExonParentGeneName}"} \ - ${par_sjdbGTFtagExonParentGeneType:+--sjdbGTFtagExonParentGeneType "${sjdbGTFtagExonParentGeneType}"} \ - ${par_limitGenomeGenerateRAM:+--limitGenomeGenerateRAM "${par_limitGenomeGenerateRAM}"} \ - ${par_genomeChrBinNbits:+--genomeChrBinNbits "${par_genomeChrBinNbits}"} \ - ${par_genomeSAsparseD:+--genomeSAsparseD "${par_genomeSAsparseD}"} \ - ${par_genomeSuffixLengthMax:+--genomeSuffixLengthMax "${par_genomeSuffixLengthMax}"} \ - ${par_genomeTransformType:+--genomeTransformType "${par_genomeTransformType}"} \ - ${par_genomeTransformVCF:+--genomeTransformVCF "${par_genomeTransformVCF}"} \ + ${par_genome_sa_index_nbases:+--genomeSAindexNbases "${par_genome_sa_index_nbases}"} \ + ${par_sjdb_gtf_chr_prefix:+--sjdbGTFchrPrefix "${par_sjdb_gtf_chr_prefix}"} \ + ${par_sjdb_gtf_feature_exon:+--sjdbGTFfeatureExon "${par_sjdb_gtf_feature_exon}"} \ + ${par_sjdb_gtf_tag_exon_parent_transcript:+--sjdbGTFtag_exon_parent_transcript "${par_sjdb_gtf_tag_exon_parent_transcript}"} \ + ${par_sjdb_gtf_tag_exon_parent_gene:+--sjdbGTFtag_exon_parent_gene "${par_sjdb_gtf_tag_exon_parent_gene}"} \ + ${par_sjdb_gtf_tag_exon_parent_geneName:+--sjdbGTFtag_exon_parent_geneName "${par_sjdb_gtf_tag_exon_parent_geneName}"} \ + ${par_sjdb_gtf_tag_exon_parent_geneType:+--sjdbGTFtag_exon_parent_geneType "${sjdbGTFtag_exon_parent_geneType}"} \ + ${par_limit_genome_generate_ram:+--limitGenomeGenerateRAM "${par_limit_genome_generate_ram}"} \ + ${par_genome_chr_bin_nbits:+--genomeChrBinNbits "${par_genome_chr_bin_nbits}"} \ + ${par_genome_sa_sparse_d:+--genomeSAsparseD "${par_genome_sa_sparse_d}"} \ + ${par_genome_suffix_length_max:+--genomeSuffixLengthMax "${par_genome_suffix_length_max}"} \ + ${par_genome_transform_type:+--genomeTransformType "${par_genome_transform_type}"} \ + ${par_genome_transform_vcf:+--genomeTransformVCF "${par_genome_transform_vCF}"} \ diff --git a/src/star/star_genome_generate/test.sh b/src/star/star_genome_generate/test.sh index fd0e4775..681f3494 100644 --- a/src/star/star_genome_generate/test.sh +++ b/src/star/star_genome_generate/test.sh @@ -27,9 +27,9 @@ echo "> Generate index" "$meta_executable" \ ${meta_cpus:+---cpus $meta_cpus} \ --index "star_index/" \ - --genomeFastaFiles "genome.fasta" \ - --sjdbGTFfile "genes.gtf" \ - --genomeSAindexNbases 2 + --genome_fasta_files "genome.fasta" \ + --sjdb_gtf_file "genes.gtf" \ + --genome_sa_index_nbases 4 files=("Genome" "Log.out" "SA" "SAindex" "chrLength.txt" "chrName.txt" "chrNameLength.txt" "chrStart.txt" "exonGeTrInfo.tab" "exonInfo.tab" "geneInfo.tab" "genomeParameters.txt" "sjdbInfo.txt" "sjdbList.fromGTF.out.tab" "sjdbList.out.tab" "transcriptInfo.tab") From 8f525f5e40301ad51bc1cd9587c0febbef84bd7d Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:20:11 -0300 Subject: [PATCH 13/25] Bedtools_Intersect (#94) * Initial Commit * Update config.vsh.yaml * creating templates * Update config.vsh.yaml * Update script.sh * Added output * Update config.vsh.yaml * Update test.sh * Update test.sh * More tests * small changes * update - change some var names - debugged - added more test * Update CHANGELOG.md * Update * Update help.txt --- CHANGELOG.md | 3 + .../bedtools_intersect/config.vsh.yaml | 255 +++++++++++++ src/bedtools/bedtools_intersect/help.txt | 119 ++++++ src/bedtools/bedtools_intersect/script.sh | 61 ++++ src/bedtools/bedtools_intersect/test.sh | 340 ++++++++++++++++++ 5 files changed, 778 insertions(+) create mode 100644 src/bedtools/bedtools_intersect/config.vsh.yaml create mode 100644 src/bedtools/bedtools_intersect/help.txt create mode 100644 src/bedtools/bedtools_intersect/script.sh create mode 100644 src/bedtools/bedtools_intersect/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index c4575cb9..36681056 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ * `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). +* `bedtools`: + - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/bedtools/bedtools_intersect/config.vsh.yaml b/src/bedtools/bedtools_intersect/config.vsh.yaml new file mode 100644 index 00000000..73dc0047 --- /dev/null +++ b/src/bedtools/bedtools_intersect/config.vsh.yaml @@ -0,0 +1,255 @@ +name: bedtools_intersect +namespace: bedtools +description: | + bedtools intersect allows one to screen for overlaps between two sets of genomic features. + Moreover, it allows one to have fine control as to how the intersections are reported. + bedtools intersect works with both BED/GFF/VCF and BAM files as input. +keywords: [feature intersection, BAM, BED, GFF, VCF] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + repository: https://github.com/arq5x/bedtools2 +references: + doi: 10.1093/bioinformatics/btq033 +license: GPL-2.0, MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input_a + alternatives: -a + type: file + direction: input + description: | + The input file (BED/GFF/VCF/BAM) to be used as the -a file. + required: true + example: input_a.bed + + - name: --input_b + alternatives: -b + type: file + direction: input + multiple: true + description: | + The input file(s) (BED/GFF/VCF/BAM) to be used as the -b file(s). + required: true + example: input_b.bed + + - name: Outputs + arguments: + - name: --output + type: file + direction: output + description: | + The output BED file. + required: true + example: output.bed + + - name: Options + arguments: + - name: --write_a + alternatives: -wa + type: boolean_true + description: Write the original A entry for each overlap. + + - name: --write_b + alternatives: -wb + type: boolean_true + description: | + Write the original B entry for each overlap. + Useful for knowing _what_ A overlaps. Restricted by -f and -r. + + - name: --left_outer_join + alternatives: -loj + type: boolean_true + description: | + Perform a "left outer join". That is, for each feature in A report each overlap with B. + If no overlaps are found, report a NULL feature for B. + + - name: --write_overlap + alternatives: -wo + type: boolean_true + description: | + Write the original A and B entries plus the number of base pairs of overlap between the two features. + - Overlaps restricted by -f and -r. + Only A features with overlap are reported. + + - name: --write_overlap_plus + alternatives: -wao + type: boolean_true + description: | + Write the original A and B entries plus the number of base pairs of overlap between the two features. + - Overlaps restricted by -f and -r. + However, A features w/o overlap are also reported with a NULL B feature and overlap = 0. + + - name: --report_A_if_no_overlap + alternatives: -u + type: boolean_true + description: | + Write the original A entry _if_ no overlap is found. + - In other words, just report the fact >=1 hit was found. + - Overlaps restricted by -f and -r. + + - name: --number_of_overlaps_A + alternatives: -c + type: boolean_true + description: | + For each entry in A, report the number of overlaps with B. + - Reports 0 for A entries that have no overlap with B. + - Overlaps restricted by -f and -r. + + - name: --report_no_overlaps_A + alternatives: -v + type: boolean_true + description: | + Only report those entries in A that have _no overlaps_ with B. + - Similar to "grep -v" (an homage). + + - name: --uncompressed_bam + alternatives: -ubam + type: boolean_true + description: Write uncompressed BAM output. Default writes compressed BAM. + + - name: --same_strand + alternatives: -s + type: boolean_true + description: | + Require same strandedness. That is, only report hits in B. + that overlap A on the _same_ strand. + - By default, overlaps are reported without respect to strand. + + - name: --opposite_strand + alternatives: -S + type: boolean_true + description: | + Require different strandedness. That is, only report hits in B + that overlap A on the _opposite_ strand. + - By default, overlaps are reported without respect to strand. + + - name: --min_overlap_A + alternatives: -f + type: double + description: | + Minimum overlap required as a fraction of A. + - Default is 1E-9 (i.e., 1bp). + - FLOAT (e.g. 0.50) + example: 0.50 + + - name: --min_overlap_B + alternatives: -F + type: double + description: | + Minimum overlap required as a fraction of B. + - Default is 1E-9 (i.e., 1bp). + - FLOAT (e.g. 0.50) + example: 0.50 + + - name: --reciprocal_overlap + alternatives: -r + type: boolean_true + description: | + Require that the fraction overlap be reciprocal for A AND B. + - In other words, if -f is 0.90 and -r is used, this requires + that B overlap 90% of A and A _also_ overlaps 90% of B. + + - name: --either_overlap + alternatives: -e + type: boolean_true + description: | + Require that the minimum fraction be satisfied for A OR B. + - In other words, if -e is used with -f 0.90 and -F 0.10 this requires + that either 90% of A is covered OR 10% of B is covered. + Without -e, both fractions would have to be satisfied. + + - name: --split + type: boolean_true + description: Treat "split" BAM or BED12 entries as distinct BED intervals. + + - name: --genome + alternatives: -g + type: file + description: | + Provide a genome file to enforce consistent chromosome + sort order across input files. Only applies when used + with -sorted option. + example: genome.txt + + - name: --nonamecheck + type: boolean_true + description: | + For sorted data, don't throw an error if the file + has different naming conventions for the same chromosome + (e.g., "chr1" vs "chr01"). + + - name: --sorted + type: boolean_true + description: | + Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input. + + - name: --names + type: string + description: | + When using multiple databases, provide an alias + for each that will appear instead of a fileId when + also printing the DB record. + + - name: --filenames + type: boolean_true + description: When using multiple databases, show each complete filename instead of a fileId when also printing the DB record. + + - name: --sortout + type: boolean_true + description: When using multiple databases, sort the output DB hits for each record. + + - name: --bed + type: boolean_true + description: If using BAM input, write output as BED. + + - name: --header + type: boolean_true + description: Print the header from the A file prior to results. + + - name: --no_buffer_output + alternatives: --nobuf + type: boolean_true + description: | + Disable buffered output. Using this option will cause each line + of output to be printed as it is generated, rather than saved + in a buffer. This will make printing large output files + noticeably slower, but can be useful in conjunction with + other software tools and scripts that need to process one + line of bedtools output at a time. + + - name: --io_buffer_size + alternatives: --iobuf + type: integer + description: | + Specify amount of memory to use for input buffer. + Takes an integer argument. Optional suffixes K/M/G supported. + Note: currently has no effect with compressed files. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow diff --git a/src/bedtools/bedtools_intersect/help.txt b/src/bedtools/bedtools_intersect/help.txt new file mode 100644 index 00000000..d1bbdc20 --- /dev/null +++ b/src/bedtools/bedtools_intersect/help.txt @@ -0,0 +1,119 @@ +```bash +bedtools intersect +``` + +Tool: bedtools intersect (aka intersectBed) +Version: v2.30.0 +Summary: Report overlaps between two feature files. + +Usage: bedtools intersect [OPTIONS] -a -b + + Note: -b may be followed with multiple databases and/or + wildcard (*) character(s). +Options: + -wa Write the original entry in A for each overlap. + + -wb Write the original entry in B for each overlap. + - Useful for knowing _what_ A overlaps. Restricted by -f and -r. + + -loj Perform a "left outer join". That is, for each feature in A + report each overlap with B. If no overlaps are found, + report a NULL feature for B. + + -wo Write the original A and B entries plus the number of base + pairs of overlap between the two features. + - Overlaps restricted by -f and -r. + Only A features with overlap are reported. + + -wao Write the original A and B entries plus the number of base + pairs of overlap between the two features. + - Overlapping features restricted by -f and -r. + However, A features w/o overlap are also reported + with a NULL B feature and overlap = 0. + + -u Write the original A entry _once_ if _any_ overlaps found in B. + - In other words, just report the fact >=1 hit was found. + - Overlaps restricted by -f and -r. + + -c For each entry in A, report the number of overlaps with B. + - Reports 0 for A entries that have no overlap with B. + - Overlaps restricted by -f, -F, -r, and -s. + + -C For each entry in A, separately report the number of + - overlaps with each B file on a distinct line. + - Reports 0 for A entries that have no overlap with B. + - Overlaps restricted by -f, -F, -r, and -s. + + -v Only report those entries in A that have _no overlaps_ with B. + - Similar to "grep -v" (an homage). + + -ubam Write uncompressed BAM output. Default writes compressed BAM. + + -s Require same strandedness. That is, only report hits in B + that overlap A on the _same_ strand. + - By default, overlaps are reported without respect to strand. + + -S Require different strandedness. That is, only report hits in B + that overlap A on the _opposite_ strand. + - By default, overlaps are reported without respect to strand. + + -f Minimum overlap required as a fraction of A. + - Default is 1E-9 (i.e., 1bp). + - FLOAT (e.g. 0.50) + + -F Minimum overlap required as a fraction of B. + - Default is 1E-9 (i.e., 1bp). + - FLOAT (e.g. 0.50) + + -r Require that the fraction overlap be reciprocal for A AND B. + - In other words, if -f is 0.90 and -r is used, this requires + that B overlap 90% of A and A _also_ overlaps 90% of B. + + -e Require that the minimum fraction be satisfied for A OR B. + - In other words, if -e is used with -f 0.90 and -F 0.10 this requires + that either 90% of A is covered OR 10% of B is covered. + Without -e, both fractions would have to be satisfied. + + -split Treat "split" BAM or BED12 entries as distinct BED intervals. + + -g Provide a genome file to enforce consistent chromosome sort order + across input files. Only applies when used with -sorted option. + + -nonamecheck For sorted data, don't throw an error if the file has different naming conventions + for the same chromosome. ex. "chr1" vs "chr01". + + -sorted Use the "chromsweep" algorithm for sorted (-k1,1 -k2,2n) input. + + -names When using multiple databases, provide an alias for each that + will appear instead of a fileId when also printing the DB record. + + -filenames When using multiple databases, show each complete filename + instead of a fileId when also printing the DB record. + + -sortout When using multiple databases, sort the output DB hits + for each record. + + -bed If using BAM input, write output as BED. + + -header Print the header from the A file prior to results. + + -nobuf Disable buffered output. Using this option will cause each line + of output to be printed as it is generated, rather than saved + in a buffer. This will make printing large output files + noticeably slower, but can be useful in conjunction with + other software tools and scripts that need to process one + line of bedtools output at a time. + + -iobuf Specify amount of memory to use for input buffer. + Takes an integer argument. Optional suffixes K/M/G supported. + Note: currently has no effect with compressed files. + +Notes: + (1) When a BAM file is used for the A file, the alignment is retained if overlaps exist, + and excluded if an overlap cannot be found. If multiple overlaps exist, they are not + reported, as we are only testing for one or more overlaps. + + + + +***** ERROR: No input file given. Exiting. ***** diff --git a/src/bedtools/bedtools_intersect/script.sh b/src/bedtools/bedtools_intersect/script.sh new file mode 100644 index 00000000..2141859d --- /dev/null +++ b/src/bedtools/bedtools_intersect/script.sh @@ -0,0 +1,61 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +[[ "$par_write_a" == "false" ]] && unset par_write_a +[[ "$par_write_b" == "false" ]] && unset par_write_b +[[ "$par_left_outer_join" == "false" ]] && unset par_left_outer_join +[[ "$par_write_overlap" == "false" ]] && unset par_write_overlap +[[ "$par_write_overlap_plus" == "false" ]] && unset par_write_overlap_plus +[[ "$par_report_A_if_no_overlap" == "false" ]] && unset par_report_A_if_no_overlap +[[ "$par_number_of_overlaps_A" == "false" ]] && unset par_number_of_overlaps_A +[[ "$par_report_no_overlaps_A" == "false" ]] && unset par_report_no_overlaps_A +[[ "$par_uncompressed_bam" == "false" ]] && unset par_uncompressed_bam +[[ "$par_same_strand" == "false" ]] && unset par_same_strand +[[ "$par_opposite_strand" == "false" ]] && unset par_opposite_strand +[[ "$par_reciprocal_overlap" == "false" ]] && unset par_reciprocal_overlap +[[ "$par_either_overlap" == "false" ]] && unset par_either_overlap +[[ "$par_split" == "false" ]] && unset par_split +[[ "$par_nonamecheck" == "false" ]] && unset par_nonamecheck +[[ "$par_sorted" == "false" ]] && unset par_sorted +[[ "$par_filenames" == "false" ]] && unset par_filenames +[[ "$par_sortout" == "false" ]] && unset par_sortout +[[ "$par_bed" == "false" ]] && unset par_bed +[[ "$par_header" == "false" ]] && unset par_header +[[ "$par_no_buffer_output" == "false" ]] && unset par_no_buffer_output + +# Create input array +IFS=";" read -ra input <<< $par_input_b + +bedtools intersect \ + ${par_write_a:+-wa} \ + ${par_write_b:+-wb} \ + ${par_left_outer_join:+-loj} \ + ${par_write_overlap:+-wo} \ + ${par_write_overlap_plus:+-wao} \ + ${par_report_A_if_no_overlap:+-u} \ + ${par_number_of_overlaps_A:+-c} \ + ${par_report_no_overlaps_A:+-v} \ + ${par_uncompressed_bam:+-ubam} \ + ${par_same_strand:+-s} \ + ${par_opposite_strand:+-S} \ + ${par_min_overlap_A:+-f "$par_min_overlap_A"} \ + ${par_min_overlap_B:+-F "$par_min_overlap_B"} \ + ${par_reciprocal_overlap:+-r} \ + ${par_either_overlap:+-e} \ + ${par_split:+-split} \ + ${par_genome:+-g "$par_genome"} \ + ${par_nonamecheck:+-nonamecheck} \ + ${par_sorted:+-sorted} \ + ${par_names:+-names "$par_names"} \ + ${par_filenames:+-filenames} \ + ${par_sortout:+-sortout} \ + ${par_bed:+-bed} \ + ${par_header:+-header} \ + ${par_no_buffer_output:+-nobuf} \ + ${par_io_buffer_size:+-iobuf "$par_io_buffer_size"} \ + -a "$par_input_a" \ + ${par_input_b:+ -b ${input[*]}} \ + > "$par_output" + \ No newline at end of file diff --git a/src/bedtools/bedtools_intersect/test.sh b/src/bedtools/bedtools_intersect/test.sh new file mode 100644 index 00000000..b9405a59 --- /dev/null +++ b/src/bedtools/bedtools_intersect/test.sh @@ -0,0 +1,340 @@ +#!/bin/bash + +# exit on error +set -e + +## VIASH START +meta_executable="target/executable/bedtools/bedtools_intersect/bedtools_intersect" +meta_resources_dir="src/bedtools/bedtools_intersect" +## VIASH END + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Create directories for tests +echo "Creating Test Data..." +mkdir -p test_data + +# Create and populate featuresA.bed +printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/featuresA.bed" + +# Create and populate featuresB.bed +printf "chr1\t180\t280\nchr1\t290\t390\nchr1\t500\t600\n" > "test_data/featuresB.bed" + +# Create and populate featuresC.bed +printf "chr1\t120\t220\nchr1\t250\t350\nchr1\t500\t580\n" > "test_data/featuresC.bed" + +# Create and populate examples gff files +# example1.gff +printf "##gff-version 3\n" > "test_data/example1.gff" +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/example1.gff" +printf "chr1\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/example1.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/example1.gff" +printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/example1.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/example1.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/example1.gff" +# example2.gff +printf "##gff-version 3\n" > "test_data/example2.gff" +printf "chr1\t.\tgene\t1200\t1800\t.\t-\t.\tID=gene2;Name=Gene2\n" >> "test_data/example2.gff" +printf "chr1\t.\tmRNA\t1400\t2000\t.\t-\t.\tID=transcript2;Parent=gene2\n" >> "test_data/example2.gff" +printf "chr1\t.\texon\t1400\t2000\t.\t-\t.\tID=exon3;Parent=transcript2\n" >> "test_data/example2.gff" +printf "chr1\t.\texon\t1600\t2000\t.\t-\t.\tID=exon4;Parent=transcript2\n" >> "test_data/example2.gff" +printf "chr1\t.\tCDS\t3000\t3200\t.\t-\t1\tID=cds3;Parent=transcript2\n" >> "test_data/example2.gff" +printf "chr1\t.\tCDS\t3500\t3700\t.\t-\t0\tID=cds4;Parent=transcript2\n" >> "test_data/example2.gff" + +# Create and populate expected output files for different tests +printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_default.bed" +printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_wa.bed" +printf "chr1\t180\t200\tchr1\t180\t280\nchr1\t180\t250\tchr1\t180\t280\nchr1\t300\t390\tchr1\t290\t390\n" > "test_data/expected_wb.bed" +printf "chr1\t100\t200\tchr1\t180\t280\nchr1\t150\t250\tchr1\t180\t280\nchr1\t300\t400\tchr1\t290\t390\n" > "test_data/expected_loj.bed" +printf "chr1\t100\t200\tchr1\t180\t280\t20\nchr1\t150\t250\tchr1\t180\t280\t70\nchr1\t300\t400\tchr1\t290\t390\t90\n" > "test_data/expected_wo.bed" +printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_u.bed" +printf "chr1\t100\t200\t1\nchr1\t150\t250\t1\nchr1\t300\t400\t1\n" > "test_data/expected_c.bed" +printf "chr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f50.bed" +printf "chr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f30.bed" +printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_f10.bed" +printf "chr1\t180\t200\nchr1\t180\t250\nchr1\t300\t390\n" > "test_data/expected_r.bed" +printf "chr1\t180\t200\nchr1\t120\t200\nchr1\t180\t250\nchr1\t150\t220\nchr1\t300\t390\nchr1\t300\t350\n" > "test_data/expected_multiple.bed" +# expected gff file +printf "chr1\t.\tgene\t1200\t1800\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tgene\t1400\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tgene\t1400\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tgene\t1600\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tmRNA\t1200\t1800\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tmRNA\t1400\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tmRNA\t1400\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\tmRNA\t1600\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected.gff" +printf "chr1\t.\texon\t1200\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\texon\t1600\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\tCDS\t1200\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff" +printf "chr1\t.\tCDS\t1600\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected.gff" + +# Test 1: Default intersect +mkdir test1 +cd test1 + +echo "> Run bedtools_intersect on BED files with default intersect" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_default.bed" +echo "- test1 succeeded -" + +cd .. + +# Test 2: Write A option +mkdir test2 +cd test2 + +echo "> Run bedtools_intersect on BED files with -wa option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --write_a + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_wa.bed" +echo "- test2 succeeded -" + +cd .. + +# Test 3: -wb option +mkdir test3 +cd test3 + +echo "> Run bedtools_intersect on BED files with -wb option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --write_b + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_wb.bed" +echo "- test3 succeeded -" + +cd .. + +# Test 4: -loj option +mkdir test4 +cd test4 + +echo "> Run bedtools_intersect on BED files with -loj option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --left_outer_join + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_loj.bed" +echo "- test4 succeeded -" + +cd .. + +# Test 5: -wo option +mkdir test5 +cd test5 + +echo "> Run bedtools_intersect on BED files with -wo option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --write_overlap + + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_wo.bed" +echo "- test5 succeeded -" + +cd .. + +# Test 6: -u option +mkdir test6 +cd test6 + +echo "> Run bedtools_intersect on BED files with -u option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --report_A_if_no_overlap true + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_u.bed" +echo "- test6 succeeded -" + +cd .. + +# Test 7: -c option +mkdir test7 +cd test7 + +echo "> Run bedtools_intersect on BED files with -c option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --number_of_overlaps_A true + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_c.bed" +echo "- test7 succeeded -" + +cd .. + +# Test 8: -f 0.50 option +mkdir test8 +cd test8 + +echo "> Run bedtools_intersect on BED files with -f 0.50 option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --min_overlap_A 0.50 + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_f50.bed" +echo "- test8 succeeded -" + +cd .. + +# Test 9: -f 0.30 option +mkdir test9 +cd test9 + +echo "> Run bedtools_intersect on BED files with -f 0.30 option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --min_overlap_A 0.30 + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_f30.bed" +echo "- test9 succeeded -" + +cd .. + +# Test 10: -f 0.10 option +mkdir test10 +cd test10 + +echo "> Run bedtools_intersect on BED files with -f 0.10 option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --min_overlap_A 0.10 + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_f10.bed" +echo "- test10 succeeded -" + +cd .. + +# Test 11: -r option +mkdir test11 +cd test11 + +echo "> Run bedtools_intersect on BED files with -r option" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --output "output.bed" \ + --reciprocal_overlap true + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_r.bed" +echo "- test11 succeeded -" + +cd .. + + +# Test 12: Multiple files +mkdir test12 +cd test12 + +echo "> Run bedtools_intersect on multiple BED files" +"$meta_executable" \ + --input_a "../test_data/featuresA.bed" \ + --input_b "../test_data/featuresB.bed" \ + --input_b "../test_data/featuresC.bed" \ + --output "output.bed" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_multiple.bed" +echo "- test12 succeeded -" + +cd .. + +# Test 13: VCF file format +mkdir test13 +cd test13 + +echo "> Run bedtools_intersect on GFF files" +"$meta_executable" \ + --input_a "../test_data/example1.gff" \ + --input_b "../test_data/example2.gff" \ + --output "output.bed" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected.gff" +echo "- test13 succeeded -" + +cd .. + +echo "---- All tests succeeded! ----" +exit 0 From de8b4248b64e0d2e04a6f20c35212403c57a1058 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Wed, 31 Jul 2024 16:21:15 -0300 Subject: [PATCH 14/25] Bedtools sort (#98) * Initial Commmit * config file * Update config.vsh.yaml * Update script.sh * Update test.sh * test files * Update test.sh * adding tests * two more test * more tests * more tests * Update CHANGELOG.md * removing some files * Update help.txt --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 1 + src/bedtools/bedtools_sort/config.vsh.yaml | 93 ++++++++ src/bedtools/bedtools_sort/help.txt | 21 ++ src/bedtools/bedtools_sort/script.sh | 27 +++ src/bedtools/bedtools_sort/test.sh | 264 +++++++++++++++++++++ 5 files changed, 406 insertions(+) create mode 100644 src/bedtools/bedtools_sort/config.vsh.yaml create mode 100644 src/bedtools/bedtools_sort/help.txt create mode 100644 src/bedtools/bedtools_sort/script.sh create mode 100644 src/bedtools/bedtools_sort/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 36681056..1debf12b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). + - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). ## MINOR CHANGES diff --git a/src/bedtools/bedtools_sort/config.vsh.yaml b/src/bedtools/bedtools_sort/config.vsh.yaml new file mode 100644 index 00000000..5024bd39 --- /dev/null +++ b/src/bedtools/bedtools_sort/config.vsh.yaml @@ -0,0 +1,93 @@ +name: bedtools_sort +namespace: bedtools +description: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria. +keywords: [sort, BED, GFF, VCF] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/sort.html + repository: https://github.com/arq5x/bedtools2 +references: + doi: 10.1093/bioinformatics/btq033 +license: GPL-2.0, MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input file (bed/gff/vcf) to be sorted. + required: true + + - name: Outputs + arguments: + - name: --output + alternatives: -o + type: file + direction: output + description: Output sorted file (bed/gff/vcf) to be written. + + - name: Options + arguments: + - name: --sizeA + type: boolean_true + description: Sort by feature size in ascending order. + + - name: --sizeD + type: boolean_true + description: Sort by feature size in descending order. + + - name: --chrThenSizeA + type: boolean_true + description: Sort by chrom (asc), then feature size (asc). + + - name: --chrThenSizeD + type: boolean_true + description: Sort by chrom (asc), then feature size (desc). + + - name: --chrThenScoreA + type: boolean_true + description: Sort by chrom (asc), then score (asc). + + - name: --chrThenScoreD + type: boolean_true + description: Sort by chrom (asc), then score (desc). + + - name: --genome + alternatives: -g + type: file + description: Sort according to the chromosomes declared in "genome.txt" + + - name: --faidx + type: file + description: Sort according to the chromosomes declared in "names.txt" + + - name: --header + type: boolean_true + description: Print the header from the A file prior to results. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_sort/help.txt b/src/bedtools/bedtools_sort/help.txt new file mode 100644 index 00000000..09266c69 --- /dev/null +++ b/src/bedtools/bedtools_sort/help.txt @@ -0,0 +1,21 @@ +```bash +bedtools sort +``` + +Tool: bedtools sort (aka sortBed) +Version: v2.30.0 +Summary: Sorts a feature file in various and useful ways. + +Usage: bedtools sort [OPTIONS] -i + +Options: + -sizeA Sort by feature size in ascending order. + -sizeD Sort by feature size in descending order. + -chrThenSizeA Sort by chrom (asc), then feature size (asc). + -chrThenSizeD Sort by chrom (asc), then feature size (desc). + -chrThenScoreA Sort by chrom (asc), then score (asc). + -chrThenScoreD Sort by chrom (asc), then score (desc). + -g (names.txt) Sort according to the chromosomes declared in "genome.txt" + -faidx (names.txt) Sort according to the chromosomes declared in "names.txt" + -header Print the header from the A file prior to results. + diff --git a/src/bedtools/bedtools_sort/script.sh b/src/bedtools/bedtools_sort/script.sh new file mode 100644 index 00000000..e7f712d7 --- /dev/null +++ b/src/bedtools/bedtools_sort/script.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# Unset parameters +[[ "$par_sizeA" == "false" ]] && unset par_sizeA +[[ "$par_sizeD" == "false" ]] && unset par_sizeD +[[ "$par_chrThenSizeA" == "false" ]] && unset par_chrThenSizeA +[[ "$par_chrThenSizeD" == "false" ]] && unset par_chrThenSizeD +[[ "$par_chrThenScoreA" == "false" ]] && unset par_chrThenScoreA +[[ "$par_chrThenScoreD" == "false" ]] && unset par_chrThenScoreD +[[ "$par_header" == "false" ]] && unset par_header + +# Execute bedtools sort with the provided arguments +bedtools sort \ + ${par_sizeA:+-sizeA} \ + ${par_sizeD:+-sizeD} \ + ${par_chrThenSizeA:+-chrThenSizeA} \ + ${par_chrThenSizeD:+-chrThenSizeD} \ + ${par_chrThenScoreA:+-chrThenScoreA} \ + ${par_chrThenScoreD:+-chrThenScoreD} \ + ${par_genome:+-g "$par_genome"} \ + ${par_faidx:+-faidx "$par_faidx"} \ + ${par_header:+-header} \ + -i "$par_input" \ + > "$par_output" diff --git a/src/bedtools/bedtools_sort/test.sh b/src/bedtools/bedtools_sort/test.sh new file mode 100644 index 00000000..bf402c35 --- /dev/null +++ b/src/bedtools/bedtools_sort/test.sh @@ -0,0 +1,264 @@ +#!/bin/bash + +# exit on error +set -e + +## VIASH START +meta_executable="target/executable/bedtools/bedtools_sort/bedtools_sort" +meta_resources_dir="src/bedtools/bedtools_sort" +## VIASH END + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Create directories for tests +echo "Creating Test Data..." +mkdir -p test_data + +# Create and populate example files +printf "#Header\nchr1\t300\t400\nchr1\t150\t250\nchr1\t100\t200" > "test_data/featureA.bed" +printf "chr2\t290\t400\nchr2\t180\t220\nchr1\t500\t600" > "test_data/featureB.bed" +printf "chr1\t100\t200\tfeature1\t960\nchr1\t150\t250\tfeature2\t850\nchr1\t300\t400\tfeature3\t740\nchr2\t290\t390\tfeature4\t630\nchr2\t180\t280\tfeature5\t920\nchr3\t120\t220\tfeature6\t410\n" > "test_data/featureC.bed" +printf "chr1\nchr3\nchr2\n" > "test_data/genome.txt" +printf "chr1\t248956422\nchr3\t242193529\nchr2\t198295559\n" > "test_data/genome.fai" + +# Create and populate example.gff file +printf "##gff-version 3\n" > "test_data/example.gff" +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/example.gff" +printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/example.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/example.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/example.gff" + +# Create expected output files +printf "chr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_sorted_A.bed" +printf "chr2\t180\t220\nchr1\t500\t600\nchr2\t290\t400\n" > "test_data/expected_sizeA.bed" +printf "chr2\t290\t400\nchr1\t500\t600\nchr2\t180\t220\n" > "test_data/expected_sizeD.bed" +printf "chr1\t500\t600\nchr2\t180\t220\nchr2\t290\t400\n" > "test_data/expected_chrThenSizeA.bed" +printf "chr1\t500\t600\nchr2\t290\t400\nchr2\t180\t220\n" > "test_data/expected_chrThenSizeD.bed" +printf "chr1\t300\t400\tfeature3\t740\nchr1\t150\t250\tfeature2\t850\nchr1\t100\t200\tfeature1\t960\nchr2\t290\t390\tfeature4\t630\nchr2\t180\t280\tfeature5\t920\nchr3\t120\t220\tfeature6\t410\n" > "test_data/expected_chrThenScoreA.bed" +printf "chr1\t100\t200\tfeature1\t960\nchr1\t150\t250\tfeature2\t850\nchr1\t300\t400\tfeature3\t740\nchr2\t180\t280\tfeature5\t920\nchr2\t290\t390\tfeature4\t630\nchr3\t120\t220\tfeature6\t410\n" > "test_data/expected_chrThenScoreD.bed" +printf "chr1\t100\t200\tfeature1\t960\nchr1\t150\t250\tfeature2\t850\nchr1\t300\t400\tfeature3\t740\nchr3\t120\t220\tfeature6\t410\nchr2\t180\t280\tfeature5\t920\nchr2\t290\t390\tfeature4\t630\n" > "test_data/expected_genome.bed" +printf "#Header\nchr1\t100\t200\nchr1\t150\t250\nchr1\t300\t400\n" > "test_data/expected_header.bed" + +# expected_sorted.gff +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "test_data/expected_sorted.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "test_data/expected_sorted.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "test_data/expected_sorted.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "test_data/expected_sorted.gff" +printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "test_data/expected_sorted.gff" +printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "test_data/expected_sorted.gff" + +# Test 1: Default sort on BED file +mkdir test1 +cd test1 + +echo "> Run bedtools_sort on BED file" +"$meta_executable" \ + --input "../test_data/featureA.bed" \ + --output "output.bed" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_sorted_A.bed" +echo "- test1 succeeded -" + +cd .. + +# Test 2: Default sort on GFF file +mkdir test2 +cd test2 + +echo "> Run bedtools_sort on GFF file" +"$meta_executable" \ + --input "../test_data/example.gff" \ + --output "output.gff" + +# checks +assert_file_exists "output.gff" +assert_file_not_empty "output.gff" +assert_identical_content "output.gff" "../test_data/expected_sorted.gff" +echo "- test2 succeeded -" + +cd .. + +# Test 3: Sort on sizeA +mkdir test3 +cd test3 + +echo "> Run bedtools_sort on BED file with sizeA" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --sizeA + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_sizeA.bed" +echo "- test3 succeeded -" + +cd .. + +# Test 4: Sort on sizeD +mkdir test4 +cd test4 + +echo "> Run bedtools_sort on BED file with sizeD" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --sizeD + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_sizeD.bed" +echo "- test4 succeeded -" + +cd .. + +# Test 5: Sort on chrThenSizeA +mkdir test5 +cd test5 + +echo "> Run bedtools_sort on BED file with chrThenSizeA" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --chrThenSizeA + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_chrThenSizeA.bed" +echo "- test5 succeeded -" + +cd .. + +# Test 6: Sort on chrThenSizeD +mkdir test6 +cd test6 + +echo "> Run bedtools_sort on BED file with chrThenSizeD" +"$meta_executable" \ + --input "../test_data/featureB.bed" \ + --output "output.bed" \ + --chrThenSizeD + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_chrThenSizeD.bed" +echo "- test6 succeeded -" + +cd .. + +# Test 7: Sort on chrThenScoreA +mkdir test7 +cd test7 + +echo "> Run bedtools_sort on BED file with chrThenScoreA" +"$meta_executable" \ + --input "../test_data/featureC.bed" \ + --output "output.bed" \ + --chrThenScoreA + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_chrThenScoreA.bed" +echo "- test7 succeeded -" + +cd .. + +# Test 8: Sort on chrThenScoreD +mkdir test8 +cd test8 + +echo "> Run bedtools_sort on BED file with chrThenScoreD" +"$meta_executable" \ + --input "../test_data/featureC.bed" \ + --output "output.bed" \ + --chrThenScoreD + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_chrThenScoreD.bed" +echo "- test8 succeeded -" + +cd .. + +# Test 9: Sort according to genome file +mkdir test9 +cd test9 + +echo "> Run bedtools_sort on BED file according to genome file" +"$meta_executable" \ + --input "../test_data/featureC.bed" \ + --output "output.bed" \ + --genome "../test_data/genome.txt" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_genome.bed" +echo "- test9 succeeded -" + +cd .. + +# Test 10: Sort according to faidx file +mkdir test10 +cd test10 + +echo "> Run bedtools_sort on BED file according to faidx file" +"$meta_executable" \ + --input "../test_data/featureC.bed" \ + --output "output.bed" \ + --faidx "../test_data/genome.fai" + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_genome.bed" +echo "- test10 succeeded -" + +cd .. + +# Test 11: Sort with header +mkdir test11 +cd test11 + +echo "> Run bedtools_sort on BED file with header" +"$meta_executable" \ + --input "../test_data/featureA.bed" \ + --output "output.bed" \ + --header + +# checks +assert_file_exists "output.bed" +assert_file_not_empty "output.bed" +assert_identical_content "output.bed" "../test_data/expected_header.bed" +echo "- test11 succeeded -" + +cd .. + +echo "---- All tests succeeded! ----" +exit 0 From 4aa0a893d2f8be5f0d03797afc15a04c53664367 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Wed, 31 Jul 2024 21:23:22 +0200 Subject: [PATCH 15/25] Add agat convert bed2gff (#97) * add config * add help * add script * add test data and expected output file * add script to get test data * add tests * update changelog * fix path to test data --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 9 +- src/agat/agat_convert_bed2gff/config.vsh.yaml | 86 ++++++++++++++++++ src/agat/agat_convert_bed2gff/help.txt | 89 +++++++++++++++++++ src/agat/agat_convert_bed2gff/script.sh | 19 ++++ src/agat/agat_convert_bed2gff/test.sh | 27 ++++++ .../test_data/agat_convert_bed2gff_1.gff | 12 +++ .../agat_convert_bed2gff/test_data/script.sh | 10 +++ .../agat_convert_bed2gff/test_data/test.bed | 1 + 8 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 src/agat/agat_convert_bed2gff/config.vsh.yaml create mode 100644 src/agat/agat_convert_bed2gff/help.txt create mode 100644 src/agat/agat_convert_bed2gff/script.sh create mode 100644 src/agat/agat_convert_bed2gff/test.sh create mode 100644 src/agat/agat_convert_bed2gff/test_data/agat_convert_bed2gff_1.gff create mode 100755 src/agat/agat_convert_bed2gff/test_data/script.sh create mode 100644 src/agat/agat_convert_bed2gff/test_data/test.bed diff --git a/CHANGELOG.md b/CHANGELOG.md index 1debf12b..9dd2389c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,11 +19,14 @@ - `seqtk/seqtk_subseq`: Extract the sequences (complete or subsequence) from the FASTA/FASTQ files based on a provided sequence IDs or region coordinates file (PR #85). -* `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). +* `agat`: + - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + - `/agat_convert_bed2gff`: convert bed file to gff format (PR #97). * `bedtools`: - - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). + - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). + - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). + ## MINOR CHANGES diff --git a/src/agat/agat_convert_bed2gff/config.vsh.yaml b/src/agat/agat_convert_bed2gff/config.vsh.yaml new file mode 100644 index 00000000..a0fafc44 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/config.vsh.yaml @@ -0,0 +1,86 @@ +name: agat_convert_bed2gff +namespace: agat +description: | + The script takes a bed file as input, and will translate it in gff format. The BED format is described here The script converts 0-based, half-open [start-1, end) bed file to 1-based, closed [start, end] General Feature Format v3 (GFF3). +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_bed2gff.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] +argument_groups: + - name: Inputs + arguments: + - name: --bed + description: Input bed file that will be converted. + type: file + required: true + direction: input + example: input.bed + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile, --gff] + description: Output GFF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --source + description: | + The source informs about the tool used to produce the data and is stored in 2nd field of a gff file. Example: Stringtie, Maker, Augustus, etc. [default: data] + type: string + required: false + example: Stringtie + - name: --primary_tag + description: | + The primary_tag corresponds to the data type and is stored in 3rd field of a gff file. Example: gene, mRNA, CDS, etc. [default: gene] + type: string + required: false + example: gene + - name: --inflate_off + description: | + By default we inflate the block fields (blockCount, blockSizes, blockStarts) to create subfeatures of the main feature (primary_tag). The type of subfeature created is based on the inflate_type parameter. If you do not want this inflating behaviour you can deactivate it by using the --inflate_off option. + type: boolean_false + - name: --inflate_type + description: | + Feature type (3rd column in gff) created when inflate parameter activated [default: exon]. + type: string + required: false + example: exon + - name: --verbose + description: add verbosity + type: boolean_true + - name: --config + alternatives: [-c] + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_bed2gff/help.txt b/src/agat/agat_convert_bed2gff/help.txt new file mode 100644 index 00000000..56e953d7 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/help.txt @@ -0,0 +1,89 @@ +```sh +agat_convert_bed2gff.pl --help +``` + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_bed2gff.pl + +Description: + The script takes a bed file as input, and will translate it in gff + format. The BED format is described here: + https://genome.ucsc.edu/FAQ/FAQformat.html#format1 The script converts + 0-based, half-open [start-1, end) bed file to 1-based, closed [start, + end] General Feature Format v3 (GFF3). + +Usage: + agat_convert_bed2gff.pl --bed infile.bed [ -o outfile ] + agat_convert_bed2gff.pl -h + +Options: + --bed Input bed file that will be converted. + + --source + The source informs about the tool used to produce the data and + is stored in 2nd field of a gff file. Example: + Stringtie,Maker,Augustus,etc. [default: data] + + --primary_tag + The primary_tag corresponds to the data type and is stored in + 3rd field of a gff file. Example: gene,mRNA,CDS,etc. [default: + gene] + + --inflate_off + By default we inflate the block fields (blockCount, blockSizes, + blockStarts) to create subfeatures of the main feature + (primary_tag). The type of subfeature created is based on the + inflate_type parameter. If you do not want this inflating + behaviour you can deactivate it by using the --inflate_off + option. + + --inflate_type + Feature type (3rd column in gff) created when inflate parameter + activated [default: exon]. + + --verbose + add verbosity + + -o , --output , --out , --outfile or --gff + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md diff --git a/src/agat/agat_convert_bed2gff/script.sh b/src/agat/agat_convert_bed2gff/script.sh new file mode 100644 index 00000000..fbeb9206 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# unset flags +[[ "$par_inflate_off" == "true" ]] && unset par_inflate_off +[[ "$par_verbose" == "false" ]] && unset par_verbose + +# run agat_convert_sp_bed2gff.pl +agat_convert_bed2gff.pl \ + --bed "$par_bed" \ + -o "$par_output" \ + ${par_source:+--source "${par_source}"} \ + ${par_primary_tag:+--primary_tag "${par_primary_tag}"} \ + ${par_inflate_off:+--inflate_off} \ + ${par_inflate_type:+--inflate_type "${par_inflate_type}"} \ + ${par_verbose:+--verbose} + ${par_config:+--config "${par_config}"} \ diff --git a/src/agat/agat_convert_bed2gff/test.sh b/src/agat/agat_convert_bed2gff/test.sh new file mode 100644 index 00000000..6e7d43f3 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --bed "$test_dir/test.bed" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_convert_bed2gff_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_bed2gff/test_data/agat_convert_bed2gff_1.gff b/src/agat/agat_convert_bed2gff/test_data/agat_convert_bed2gff_1.gff new file mode 100644 index 00000000..587e3d09 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/test_data/agat_convert_bed2gff_1.gff @@ -0,0 +1,12 @@ +##gff-version 3 +scaffold625 data gene 337818 343277 . + . ID=1;Name=CLUHART00000008717;blockCount=4;blockSizes=154%2C109%2C111%2C1314;blockStarts=0%2C2915%2C3700%2C4146;itemRgb=255%2C0%2C0;thickEnd=343033;thickStart=337914 +scaffold625 data exon 337818 337971 . + . ID=exon1;Parent=1 +scaffold625 data exon 340733 340841 . + . ID=exon2;Parent=1 +scaffold625 data exon 341518 341628 . + . ID=exon3;Parent=1 +scaffold625 data exon 341964 343277 . + . ID=exon4;Parent=1 +scaffold625 data CDS 337915 337971 . + 0 ID=CDS1;Parent=1 +scaffold625 data CDS 340733 340841 . + 0 ID=CDS2;Parent=1 +scaffold625 data CDS 341518 341628 . + 2 ID=CDS3;Parent=1 +scaffold625 data CDS 341964 343033 . + 2 ID=CDS4;Parent=1 +scaffold625 data five_prime_UTR 337818 337914 . + . ID=five_prime_UTR1;Parent=1 +scaffold625 data three_prime_UTR 343034 343277 . + . ID=three_prime_UTR1;Parent=1 diff --git a/src/agat/agat_convert_bed2gff/test_data/script.sh b/src/agat/agat_convert_bed2gff/test_data/script.sh new file mode 100755 index 00000000..d1206a42 --- /dev/null +++ b/src/agat/agat_convert_bed2gff/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/test.bed src/agat/agat_convert_bed2gff/test_data/test.bed +cp -r /tmp/agat_source/t/scripts_output/out/agat_convert_bed2gff_1.gff src/agat/agat_convert_bed2gff/test_data/agat_convert_bed2gff_1.gff \ No newline at end of file diff --git a/src/agat/agat_convert_bed2gff/test_data/test.bed b/src/agat/agat_convert_bed2gff/test_data/test.bed new file mode 100644 index 00000000..bfeba3bb --- /dev/null +++ b/src/agat/agat_convert_bed2gff/test_data/test.bed @@ -0,0 +1 @@ +scaffold625 337817 343277 CLUHART00000008717 0 + 337914 343033 255,0,0 4 154,109,111,1314 0,2915,3700,4146 From ede5850f577cbfe8ca5edf8525703535b12b4a36 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Sat, 10 Aug 2024 08:51:39 +0200 Subject: [PATCH 16/25] Add agat convert embl2gff (#99) * add config * add help * add test data and expected output * add script to get test data * add running script * add test script * update description * update changelog * cleanup * fix path to copy test data * pull the test data again * fix typo GTF => GFF * fix tests * fix output file: replace by generated output * fix test data: add --emblmygff3 * cleanup * config: add longer name to `-k` and `-d` --- CHANGELOG.md | 2 + .../agat_convert_embl2gff/config.vsh.yaml | 84 +++++++++++++++++++ src/agat/agat_convert_embl2gff/help.txt | 78 +++++++++++++++++ src/agat/agat_convert_embl2gff/script.sh | 23 +++++ src/agat/agat_convert_embl2gff/test.sh | 28 +++++++ .../test_data/agat_convert_embl2gff_1.embl | 51 +++++++++++ .../test_data/agat_convert_embl2gff_1.gff | 10 +++ .../agat_convert_embl2gff/test_data/script.sh | 10 +++ 8 files changed, 286 insertions(+) create mode 100644 src/agat/agat_convert_embl2gff/config.vsh.yaml create mode 100644 src/agat/agat_convert_embl2gff/help.txt create mode 100644 src/agat/agat_convert_embl2gff/script.sh create mode 100644 src/agat/agat_convert_embl2gff/test.sh create mode 100644 src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.embl create mode 100644 src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.gff create mode 100755 src/agat/agat_convert_embl2gff/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dd2389c..3c2f347a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). +* `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/agat/agat_convert_embl2gff/config.vsh.yaml b/src/agat/agat_convert_embl2gff/config.vsh.yaml new file mode 100644 index 00000000..99ceec46 --- /dev/null +++ b/src/agat/agat_convert_embl2gff/config.vsh.yaml @@ -0,0 +1,84 @@ +name: agat_convert_embl2gff +namespace: agat +description: | + The script takes an EMBL file as input, and will translate it in gff format. +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_embl2gff.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --embl + description: Input EMBL file that will be read. + type: file + required: true + direction: input + example: input.embl + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile, --gff] + description: Output GFF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: false + example: output.gff + - name: Arguments + arguments: + - name: --emblmygff3 + description: | + Means that the EMBL flat file comes from the EMBLmyGFF3 software. This is an EMBL format dedicated for submission and contains particularity to deal with. This parameter is needed to get a proper sequence id in the GFF3 from an embl made with EMBLmyGFF3. + type: boolean_true + - name: --primary_tag + alternatives: [--pt, -t] + description: | + List of "primary tag". Useful to discard or keep specific features. Multiple tags must be comma-separated. + type: string + multiple: true + required: false + example: [tag1, tag2] + - name: --discard + alternatives: [-d] + description: | + Means that primary tags provided by the option "primary_tag" will be discarded. + type: boolean_true + - name: --keep + alternatives: [-k] + description: | + Means that only primary tags provided by the option "primary_tag" will be kept. + type: boolean_true + - name: --config + alternatives: [-c] + description: | + Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_embl2gff/help.txt b/src/agat/agat_convert_embl2gff/help.txt new file mode 100644 index 00000000..5fce4939 --- /dev/null +++ b/src/agat/agat_convert_embl2gff/help.txt @@ -0,0 +1,78 @@ + ```sh +agat_convert_embl2gff.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_converter_embl2gff.pl + +Description: + The script takes an EMBL file as input, and will translate it in gff + format. + +Usage: + agat_converter_embl2gff.pl --embl infile.embl [ -o outfile ] + +Options: + --embl Input EMBL file that will be read + + --emblmygff3 + Bolean - Means that the EMBL flat file comes from the EMBLmyGFF3 + software. This is an EMBL format dedicated for submission and + contains particularity to deal with. This parameter is needed to + get a proper sequence id in the GFF3 from an embl made with + EMBLmyGFF3. + + --primary_tag, --pt, -t + List of "primary tag". Useful to discard or keep specific + features. Multiple tags must be coma-separated. + + -d Bolean - Means that primary tags provided by the option + "primary_tag" will be discarded. + + -k Bolean - Means that only primary tags provided by the option + "primary_tag" will be kept. + + -o, --output, --out, --outfile or --gff + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md diff --git a/src/agat/agat_convert_embl2gff/script.sh b/src/agat/agat_convert_embl2gff/script.sh new file mode 100644 index 00000000..63ab8df0 --- /dev/null +++ b/src/agat/agat_convert_embl2gff/script.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +## VIASH START +## VIASH END + + +# unset flags +[[ "$par_emblmygff3" == "false" ]] && unset par_emblmygff3 +[[ "$par_discard" == "false" ]] && unset par_discard +[[ "$par_keep" == "false" ]] && unset par_keep + +# replace ';' with ',' +par_primary_tag=$(echo $par_primary_tag | tr ';' ',') + +# run agat_convert_embl2gff +agat_convert_embl2gff.pl \ + --embl "$par_embl" \ + -o "$par_output" \ + ${par_emblmygff3:+--emblmygff3} \ + ${par_primary_tag:+--primary_tag "${par_primary_tag}"} \ + ${par_discard:+-d} \ + ${par_keep:+-k} \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_convert_embl2gff/test.sh b/src/agat/agat_convert_embl2gff/test.sh new file mode 100644 index 00000000..81d24aaa --- /dev/null +++ b/src/agat/agat_convert_embl2gff/test.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data and --emblmygff3" +"$meta_executable" \ + --embl "$test_dir/agat_convert_embl2gff_1.embl" \ + --output "$out_dir/output.gff" \ + --emblmygff3 + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_convert_embl2gff_1.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.embl b/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.embl new file mode 100644 index 00000000..aa4f50aa --- /dev/null +++ b/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.embl @@ -0,0 +1,51 @@ +ID patatrac; SV 1; circular; genomic DNA; XXX; PRO; 317941 BP. +XX +AC XXX; +XX +AC * _ERS324955|SC|contig000001 +XX +PR Project:PRJEBNNNN; +XX +DE XXX +XX +RN [1] +RP 1-2149 +RA XXX; +RT ; +RL Submitted {(DD-MMM-YYYY)} to the INSDC. +XX +FH Key Location/Qualifiers +FH +FT source 1..588788 +FT /organism={"scientific organism name"} +FT /mol_type={"in vivo molecule type of sequence"} +XX +SQ Sequence 588788 BP; 101836 A; 193561 C; 192752 G; 100639 T; 0 other; + tgcgtactcg aagagacgcg cccagattat ataagggcgt cgtctcgagg ccgacggcgc 60 + gccggcgagt acgcgtgatc cacaacccga agcgaccgtc gggagaccga gggtcgtcga 120 + gggtggatac gttcctgcct tcgtgccggg aaacggccga agggaacgtg gcgacctgcg 180 +// +ID fdssf; SV 1; circular; genomic DNA; XXX; PRO; 317941 BP. +XX +AC XXX; +XX +AC * _ERS344554 +XX +PR Project:PRJEBNNNN; +XX +DE XXX +XX +RN [1] +RP 1-2149 +RA XXX; +RT ; +RL Submitted {(DD-MMM-YYYY)} to the INSDC. +XX +FH Key Location/Qualifiers +FH +FT source 1..588788 +FT /organism={"scientific organism name"} +FT /mol_type={"in vivo molecule type of sequence"} +XX +SQ Sequence 588788 BP; 101836 A; 193561 C; 192752 G; 100639 T; 0 other; + TTTTTTTTTT aagagacgcg cccagattat ataagggcgt cgtctcgagg ccgacggcgc 60 diff --git a/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.gff b/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.gff new file mode 100644 index 00000000..f6893022 --- /dev/null +++ b/src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.gff @@ -0,0 +1,10 @@ +##gff-version 3 +ERS324955|SC|contig000001 EMBL/GenBank/SwissProt source 1 588788 . + 1 mol_type={"in vivo molecule type of sequence"};organism={"scientific organism name"} +ERS344554 EMBL/GenBank/SwissProt source 1 588788 . + 1 mol_type={"in vivo molecule type of sequence"};organism={"scientific organism name"} +##FASTA +>ERS324955|SC|contig000001 XXX +TGCGTACTCGAAGAGACGCGCCCAGATTATATAAGGGCGTCGTCTCGAGGCCGACGGCGCGCCGGCGAGTACGCGTGATC +CACAACCCGAAGCGACCGTCGGGAGACCGAGGGTCGTCGAGGGTGGATACGTTCCTGCCTTCGTGCCGGGAAACGGCCGA +AGGGAACGTGGCGACCTGCG +>ERS344554 XXX +TTTTTTTTTTAAGAGACGCGCCCAGATTATATAAGGGCGTCGTCTCGAGGCCGACGGCGC diff --git a/src/agat/agat_convert_embl2gff/test_data/script.sh b/src/agat/agat_convert_embl2gff/test_data/script.sh new file mode 100755 index 00000000..7ddbce5b --- /dev/null +++ b/src/agat/agat_convert_embl2gff/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/in/agat_convert_embl2gff_1.embl src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.embl +cp -r /tmp/agat_source/t/scripts_output/out/agat_convert_embl2gff_1.gff src/agat/agat_convert_embl2gff/test_data/agat_convert_embl2gff_1.gff \ No newline at end of file From d5fc46b1d9ef8313c06e369bc881f6de75c53dd4 Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:14:59 +0200 Subject: [PATCH 17/25] Avoid duplicate code when unsetting multiple boolean arguments (#133) * Avoid duplicate code when unsetting multiple boolean arguments * Add CHANGELOG entry [ci skip] * Update CONTRIBUTING guide --- CHANGELOG.md | 2 + CONTRIBUTING.md | 25 ++++++ src/bedtools/bedtools_intersect/script.sh | 49 +++++++----- src/bedtools/bedtools_sort/script.sh | 21 +++-- src/busco/busco_run/script.sh | 28 ++++--- src/fastp/script.sh | 45 ++++++----- src/featurecounts/script.sh | 43 +++++----- src/gffread/script.sh | 97 ++++++++++++----------- src/lofreq/call/script.sh | 37 +++++---- src/multiqc/script.sh | 44 +++++----- src/salmon/salmon_index/script.sh | 19 +++-- src/salmon/salmon_quant/script.sh | 90 +++++++++++---------- src/samtools/samtools_fastq/script.sh | 17 ++-- src/samtools/samtools_sort/script.sh | 25 +++--- src/samtools/samtools_view/script.sh | 38 +++++---- src/umi_tools/umi_tools_dedup/script.sh | 33 +++++--- src/umi_tools/umi_tools_extract/script.sh | 19 +++-- 17 files changed, 380 insertions(+), 252 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c2f347a..5030894c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ * Update CI to reusable workflow in `viash-io/viash-actions` (PR #86). +* Update several components in order to avoid duplicate code when using `unset` on boolean arguments (PR #133). + ## DOCUMENTATION * Extend the contributing guidelines (PR #82): diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cee4249a..a32b680c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -320,6 +320,31 @@ Notes: * If your tool allows for multiple inputs using a separator other than `;` (which is the default Viash multiple separator), you can substitute these values with a command like: `par_disable_filters=$(echo $par_disable_filters | tr ';' ',')`. +* If you have a lot of boolean variables that you would like to unset when the value is `false`, you can avoid duplicate code by using the following syntax: + +```bash +unset_if_false=( + par_argument_1 + par_argument_2 + par_argument_3 + par_argument_4 +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done +``` + +this code is equivalent to + +```bash +[[ "$par_argument_1" == "false" ]] && unset par_argument_1 +[[ "$par_argument_2" == "false" ]] && unset par_argument_2 +[[ "$par_argument_3" == "false" ]] && unset par_argument_3 +[[ "$par_argument_4" == "false" ]] && unset par_argument_4 +``` + ### Step 12: Create test script diff --git a/src/bedtools/bedtools_intersect/script.sh b/src/bedtools/bedtools_intersect/script.sh index 2141859d..04a8d854 100644 --- a/src/bedtools/bedtools_intersect/script.sh +++ b/src/bedtools/bedtools_intersect/script.sh @@ -3,27 +3,34 @@ ## VIASH START ## VIASH END -[[ "$par_write_a" == "false" ]] && unset par_write_a -[[ "$par_write_b" == "false" ]] && unset par_write_b -[[ "$par_left_outer_join" == "false" ]] && unset par_left_outer_join -[[ "$par_write_overlap" == "false" ]] && unset par_write_overlap -[[ "$par_write_overlap_plus" == "false" ]] && unset par_write_overlap_plus -[[ "$par_report_A_if_no_overlap" == "false" ]] && unset par_report_A_if_no_overlap -[[ "$par_number_of_overlaps_A" == "false" ]] && unset par_number_of_overlaps_A -[[ "$par_report_no_overlaps_A" == "false" ]] && unset par_report_no_overlaps_A -[[ "$par_uncompressed_bam" == "false" ]] && unset par_uncompressed_bam -[[ "$par_same_strand" == "false" ]] && unset par_same_strand -[[ "$par_opposite_strand" == "false" ]] && unset par_opposite_strand -[[ "$par_reciprocal_overlap" == "false" ]] && unset par_reciprocal_overlap -[[ "$par_either_overlap" == "false" ]] && unset par_either_overlap -[[ "$par_split" == "false" ]] && unset par_split -[[ "$par_nonamecheck" == "false" ]] && unset par_nonamecheck -[[ "$par_sorted" == "false" ]] && unset par_sorted -[[ "$par_filenames" == "false" ]] && unset par_filenames -[[ "$par_sortout" == "false" ]] && unset par_sortout -[[ "$par_bed" == "false" ]] && unset par_bed -[[ "$par_header" == "false" ]] && unset par_header -[[ "$par_no_buffer_output" == "false" ]] && unset par_no_buffer_output +unset_if_false=( + par_write_a + par_write_b + par_left_outer_join + par_write_overlap + par_write_overlap_plus + par_report_A_if_no_overlap + par_number_of_overlaps_A + par_report_no_overlaps_A + par_uncompressed_bam + par_same_strand + par_opposite_strand + par_reciprocal_overlap + par_either_overlap + par_split + par_nonamecheck + par_sorted + par_filenames + par_sortout + par_bed + par_no_buffer_output +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + # Create input array IFS=";" read -ra input <<< $par_input_b diff --git a/src/bedtools/bedtools_sort/script.sh b/src/bedtools/bedtools_sort/script.sh index e7f712d7..0d0b9b54 100644 --- a/src/bedtools/bedtools_sort/script.sh +++ b/src/bedtools/bedtools_sort/script.sh @@ -4,13 +4,20 @@ ## VIASH END # Unset parameters -[[ "$par_sizeA" == "false" ]] && unset par_sizeA -[[ "$par_sizeD" == "false" ]] && unset par_sizeD -[[ "$par_chrThenSizeA" == "false" ]] && unset par_chrThenSizeA -[[ "$par_chrThenSizeD" == "false" ]] && unset par_chrThenSizeD -[[ "$par_chrThenScoreA" == "false" ]] && unset par_chrThenScoreA -[[ "$par_chrThenScoreD" == "false" ]] && unset par_chrThenScoreD -[[ "$par_header" == "false" ]] && unset par_header +unset_if_false=( + par_sizeA + par_sizeD + par_chrThenSizeA + par_chrThenSizeD + par_chrThenScoreA + par_chrThenScoreD + par_header +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # Execute bedtools sort with the provided arguments bedtools sort \ diff --git a/src/busco/busco_run/script.sh b/src/busco/busco_run/script.sh index a0ef24de..673ccd0b 100644 --- a/src/busco/busco_run/script.sh +++ b/src/busco/busco_run/script.sh @@ -3,18 +3,24 @@ ## VIASH START ## VIASH END +unset_if_false=( + par_tar + par_force + par_quiet + par_restart + par_auto_lineage + par_auto_lineage_euk + par_auto_lineage_prok + par_augustus + par_long + par_scaffold_composition + par_miniprot +) -[[ "$par_tar" == "false" ]] && unset par_tar -[[ "$par_force" == "false" ]] && unset par_force -[[ "$par_quiet" == "false" ]] && unset par_quiet -[[ "$par_restart" == "false" ]] && unset par_restart -[[ "$par_auto_lineage" == "false" ]] && unset par_auto_lineage -[[ "$par_auto_lineage_euk" == "false" ]] && unset par_auto_lineage_euk -[[ "$par_auto_lineage_prok" == "false" ]] && unset par_auto_lineage_prok -[[ "$par_augustus" == "false" ]] && unset par_augustus -[[ "$par_long" == "false" ]] && unset par_long -[[ "$par_scaffold_composition" == "false" ]] && unset par_scaffold_composition -[[ "$par_miniprot" == "false" ]] && unset par_miniprot +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done tmp_dir=$(mktemp -d -p "$meta_temp_dir" busco_XXXXXXXXX) prefix=$(openssl rand -hex 8) diff --git a/src/fastp/script.sh b/src/fastp/script.sh index 4bb37c87..557f7ac3 100644 --- a/src/fastp/script.sh +++ b/src/fastp/script.sh @@ -4,25 +4,32 @@ ## VIASH END # disable flags -[[ "$par_disable_adapter_trimming" == "false" ]] && unset par_disable_adapter_trimming -[[ "$par_detect_adapter_for_pe" == "false" ]] && unset par_detect_adapter_for_pe -[[ "$par_merge" == "false" ]] && unset par_merge -[[ "$par_include_unmerged" == "false" ]] && unset par_include_unmerged -[[ "$par_interleaved_in" == "false" ]] && unset par_interleaved_in -[[ "$par_fix_mgi_id" == "false" ]] && unset par_fix_mgi_id -[[ "$par_phred64" == "false" ]] && unset par_phred64 -[[ "$par_dont_overwrite" == "false" ]] && unset par_dont_overwrite -[[ "$par_verbose" == "false" ]] && unset par_verbose -[[ "$par_dedup" == "false" ]] && unset par_dedup -[[ "$par_dont_eval_duplication" == "false" ]] && unset par_dont_eval_duplication -[[ "$par_trim_poly_g" == "false" ]] && unset par_trim_poly_g -[[ "$par_disable_trim_poly_g" == "false" ]] && unset par_disable_trim_poly_g -[[ "$par_trim_poly_x" == "false" ]] && unset par_trim_poly_x -[[ "$par_disable_quality_filtering" == "false" ]] && unset par_disable_quality_filtering -[[ "$par_disable_length_filtering" == "false" ]] && unset par_disable_length_filtering -[[ "$par_low_complexity_filter" == "false" ]] && unset par_low_complexity_filter -[[ "$par_umi" == "false" ]] && unset par_umi -[[ "$par_overrepresentation_analysis" == "false" ]] && unset par_overrepresentation_analysis +unset_if_false=( + par_disable_adapter_trimming + par_detect_adapter_for_pe + par_merge + par_include_unmerged + par_interleaved_in + par_fix_mgi_id + par_phred64 + par_dont_overwrite + par_verbose + par_dedup + par_dont_eval_duplication + par_trim_poly_g + par_disable_trim_poly_g + par_trim_poly_x + par_disable_quality_filtering + par_disable_length_filtering + par_low_complexity_filter + par_umi + par_overrepresentation_analysis +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # run command fastp \ diff --git a/src/featurecounts/script.sh b/src/featurecounts/script.sh index 2e54feb3..53f8c63f 100644 --- a/src/featurecounts/script.sh +++ b/src/featurecounts/script.sh @@ -19,24 +19,31 @@ par_feature_type=$(echo $par_feature_type | tr ',' ';') par_extra_attributes=$(echo $par_extra_attributes | tr ',' ';') # unset flag variables -[[ "$par_feature_level" == "false" ]] && unset par_feature_level -[[ "$par_overlapping" == "false" ]] && unset par_overlapping -[[ "$par_largest_overlap" == "false" ]] && unset par_largest_overlap -[[ "$par_multi_mapping" == "false" ]] && unset par_multi_mapping -[[ "$par_fraction" == "false" ]] && unset par_fraction -[[ "$par_split_only" == "false" ]] && unset par_split_only -[[ "$par_non_split_only" == "false" ]] && unset par_non_split_only -[[ "$par_primary" == "false" ]] && unset par_primary -[[ "$par_ignore_dup" == "false" ]] && unset par_ignore_dup -[[ "$par_paired" == "false" ]] && unset par_paired -[[ "$par_count_read_pairs" == "false" ]] && unset par_count_read_pairs -[[ "$par_both_aligned" == "false" ]] && unset par_both_aligned -[[ "$par_check_pe_dist" == "false" ]] && unset par_check_pe_dist -[[ "$par_same_strand" == "false" ]] && unset par_same_strand -[[ "$par_donotsort" == "false" ]] && unset par_donotsort -[[ "$par_by_read_group" == "false" ]] && unset par_by_read_group -[[ "$par_long_reads" == "false" ]] && unset par_long_reads -[[ "$par_verbose" == "false" ]] && unset par_verbose +unset_if_false=( + par_feature_level + par_overlapping + par_largest_overlap + par_multi_mapping + par_fraction + par_split_only + par_non_split_only + par_primary + par_ignore_dup + par_paired + par_count_read_pairs + par_both_aligned + par_check_pe_dist + par_same_strand + par_donotsort + par_by_read_group + par_long_reads + par_verbose +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done IFS=";" read -ra input <<< $par_input diff --git a/src/gffread/script.sh b/src/gffread/script.sh index cd4abf14..fab9e521 100644 --- a/src/gffread/script.sh +++ b/src/gffread/script.sh @@ -4,51 +4,58 @@ ## VIASH END # unset flags -[[ "$par_coding" == "false" ]] && unset par_coding -[[ "$par_strict_range" == "false" ]] && unset par_strict_range -[[ "$par_no_single_exon" == "false" ]] && unset par_no_single_exon -[[ "$par_no_exon_attrs" == "false" ]] && unset par_no_exon_attrs -[[ "$par_nc" == "false" ]] && unset par_nc -[[ "$par_ignore_locus" == "false" ]] && unset par_ignore_locus -[[ "$par_description" == "false" ]] && unset par_description -[[ "$par_sort_alpha" == "false" ]] && unset par_sort_alpha -[[ "$par_keep_genes" == "false" ]] && unset par_keep_genes -[[ "$par_keep_attrs" == "false" ]] && unset par_keep_attrs -[[ "$par_keep_exon_attrs" == "false" ]] && unset par_keep_exon_attrs -[[ "$par_keep_comments" == "false" ]] && unset par_keep_comments -[[ "$par_process_other" == "false" ]] && unset par_process_other -[[ "$par_rm_stop_codons" == "false" ]] && unset par_rm_stop_codons -[[ "$par_adj_cds_start" == "false" ]] && unset par_adj_cds_start -[[ "$par_opposite_strand" == "false" ]] && unset par_opposite_strand -[[ "$par_coding_status" == "false" ]] && unset par_coding_status -[[ "$par_add_hasCDS" == "false" ]] && unset par_add_hasCDS -[[ "$par_adj_stop" == "false" ]] && unset par_adj_stop -[[ "$par_rm_noncanon" == "false" ]] && unset par_rm_noncanon -[[ "$par_complete_cds" == "false" ]] && unset par_complete_cds -[[ "$par_no_pseudo" == "false" ]] && unset par_no_pseudo -[[ "$par_in_bed" == "false" ]] && unset par_in_bed -[[ "$par_in_tlf" == "false" ]] && unset par_in_tlf -[[ "$par_stream" == "false" ]] && unset par_stream -[[ "$par_merge" == "false" ]] && unset par_merge -[[ "$par_rm_redundant" == "false" ]] && unset par_rm_redundant -[[ "$par_no_boundary" == "false" ]] && unset par_no_boundary -[[ "$par_no_overlap" == "false" ]] && unset par_no_overlap -[[ "$par_force_exons" == "false" ]] && unset par_force_exons -[[ "$par_gene2exon" == "false" ]] && unset par_gene2exon -[[ "$par_t_adopt" == "false" ]] && unset par_t_adopt -[[ "$par_decode" == "false" ]] && unset par_decode -[[ "$par_merge_exons" == "false" ]] && unset par_merge_exons -[[ "$par_junctions" == "false" ]] && unset par_junctions -[[ "$par_w_nocds" == "false" ]] && unset par_w_nocds -[[ "$par_tr_cds" == "false" ]] && unset par_tr_cds -[[ "$par_w_coords" == "false" ]] && unset par_w_coords -[[ "$par_stop_dot" == "false" ]] && unset par_stop_dot -[[ "$par_id_version" == "false" ]] && unset par_id_version -[[ "$par_gtf_output" == "false" ]] && unset par_gtf_output -[[ "$par_bed" == "false" ]] && unset par_bed -[[ "$par_tlf" == "false" ]] && unset par_tlf -[[ "$par_expose_dups" == "false" ]] && unset par_expose_dups -[[ "$par_cluster_only" == "false" ]] && unset par_cluster_only +unset_if_false=( + par_coding + par_strict_range + par_no_single_exon + par_no_exon_attrs + par_nc + par_ignore_locus + par_description + par_sort_alpha + par_keep_genes + par_keep_attrs + par_keep_exon_attrs + par_keep_comments + par_process_other + par_rm_stop_codons + par_adj_cds_start + par_opposite_strand + par_coding_status + par_add_hasCDS + par_adj_stop + par_rm_noncanon + par_complete_cds + par_no_pseudo + par_in_bed + par_in_tlf + par_stream + par_merge + par_rm_redundant + par_no_boundary + par_no_overlap + par_force_exons + par_gene2exon + par_t_adopt + par_decode + par_merge_exons + par_junctions + par_w_nocds + par_tr_cds + par_w_coords + par_stop_dot + par_id_version + par_gtf_output + par_bed + par_tlf + par_expose_dups + par_cluster_only +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # if par_table is not empty, replace ";" with "," par_table=$(echo "$par_table" | tr ';' ',') diff --git a/src/lofreq/call/script.sh b/src/lofreq/call/script.sh index 863fe986..ca229194 100644 --- a/src/lofreq/call/script.sh +++ b/src/lofreq/call/script.sh @@ -4,21 +4,28 @@ ## VIASH END # Unset all parameters that are set to "false" -[[ "$par_no_baq" == "false" ]] && unset par_no_baq -[[ "$par_no_idaq" == "false" ]] && unset par_no_idaq -[[ "$par_del_baq" == "false" ]] && unset par_del_baq -[[ "$par_no_ext_baq" == "false" ]] && unset par_no_ext_baq -[[ "$par_no_mq" == "false" ]] && unset par_no_mq -[[ "$par_call_indels" == "false" ]] && unset par_call_indels -[[ "$par_only_indels" == "false" ]] && unset par_only_indels -[[ "$par_src_qual" == "false" ]] && unset par_src_qual -[[ "$par_illumina_13" == "false" ]] && unset par_illumina_13 -[[ "$par_use_orphan" == "false" ]] && unset par_use_orphan -[[ "$par_plp_summary_only" == "false" ]] && unset par_plp_summary_only -[[ "$par_no_default_filter" == "false" ]] && unset par_no_default_filter -[[ "$par_force_overwrite" == "false" ]] && unset par_force_overwrite -[[ "$par_verbose" == "false" ]] && unset par_verbose -[[ "$par_debug" == "false" ]] && unset par_debug +unset_if_false=( + par_no_baq + par_no_idaq + par_del_baq + par_no_ext_baq + par_no_mq + par_call_indels + par_only_indels + par_src_qual + par_illumina_13 + par_use_orphan + par_plp_summary_only + par_no_default_filter + par_force_overwrite + par_verbose + par_debug +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # Run lofreq call lofreq call \ diff --git a/src/multiqc/script.sh b/src/multiqc/script.sh index ad8c1c0c..5806fa1d 100755 --- a/src/multiqc/script.sh +++ b/src/multiqc/script.sh @@ -1,26 +1,32 @@ #!/bin/bash # disable flags -[[ "$par_ignore_symlinks" == "false" ]] && unset par_ignore_symlinks -[[ "$par_dirs" == "false" ]] && unset par_dirs -[[ "$par_full_names" == "false" ]] && unset par_full_names -[[ "$par_fn_as_s_name" == "false" ]] && unset par_fn_as_s_name -[[ "$par_profile_runtime" == "false" ]] && unset par_profile_runtime -[[ "$par_verbose" == "false" ]] && unset par_verbose -[[ "$par_quiet" == "false" ]] && unset par_quiet -[[ "$par_strict" == "false" ]] && unset par_strict -[[ "$par_development" == "false" ]] && unset par_development -[[ "$par_require_logs" == "false" ]] && unset par_require_logs -[[ "$par_no_megaqc_upload" == "false" ]] && unset par_no_megaqc_upload -[[ "$par_no_ansi" == "false" ]] && unset par_no_ansi -[[ "$par_flat" == "false" ]] && unset par_flat -[[ "$par_interactive" == "false" ]] && unset par_interactive -[[ "$par_static_plot_export" == "false" ]] && unset par_static_plot_export -[[ "$par_data_dir" == "false" ]] && unset par_data_dir -[[ "$par_no_data_dir" == "false" ]] && unset par_no_data_dir -[[ "$par_zip_data_dir" == "false" ]] && unset par_zip_data_dir -[[ "$par_pdf" == "false" ]] && unset par_pdf +unset_if_false=( + par_ignore_symlinks + par_dirs + par_full_names + par_fn_as_s_name + par_profile_runtime + par_verbose + par_quiet + par_strict + par_development + par_require_logs + par_no_megaqc_upload + par_no_ansi + par_flat + par_interactive + par_static_plot_export + par_data_dir + par_no_data_dir + par_zip_data_dir + par_pdf +) +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # handle inputs out_dir=$(dirname "$par_output_report") diff --git a/src/salmon/salmon_index/script.sh b/src/salmon/salmon_index/script.sh index c2b9e7a0..5b1c4d76 100644 --- a/src/salmon/salmon_index/script.sh +++ b/src/salmon/salmon_index/script.sh @@ -5,12 +5,19 @@ set -e ## VIASH START ## VIASH END -[[ "$par_gencode" == "false" ]] && unset par_gencode -[[ "$par_features" == "false" ]] && unset par_features -[[ "$par_keep_duplicates" == "false" ]] && unset par_keep_duplicates -[[ "$par_keep_fixed_fasta" == "false" ]] && unset par_keep_fixed_fasta -[[ "$par_sparse" == "false" ]] && unset par_sparse -[[ "$par_no_clip" == "false" ]] && unset par_no_clip +unset_if_false=( + par_gencode + par_features + par_keep_duplicates + par_keep_fixed_fasta + par_sparse + par_no_clip +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done tmp_dir=$(mktemp -d -p "$meta_temp_dir" "${meta_functionality_name}_XXXXXX") mkdir -p "$tmp_dir/temp" diff --git a/src/salmon/salmon_quant/script.sh b/src/salmon/salmon_quant/script.sh index 4c9f69d5..47cba1b9 100644 --- a/src/salmon/salmon_quant/script.sh +++ b/src/salmon/salmon_quant/script.sh @@ -4,49 +4,55 @@ set -e ## VIASH START ## VIASH END +unset_if_false=( + par_discard_orphans + par_ont + par_seq_bias + par_gc_bias + par_pos_bias + par_meta + par_discard_orphans_quasi + par_disable_chaining_heuristic + par_allow_dovetail + par_recover_orphans + par_mimicBT2 + par_mimic_strictBT2 + par_softclip + par_softclip_overhangs + par_full_length_alignment + par_hard_filter + par_write_mappings + par_write_qualities + par_alternative_init_mode + par_skip_quant + par_dump_eq + par_dump_eq_weights + par_reduce_GC_memory + par_init_uniform + par_no_length_correction + par_no_effective_length_correction + par_no_single_frag_prob + par_no_frag_length_dist + par_no_bias_length_threshold + par_useEM + par_useVBOpt + par_no_Gamma_draw + par_bootstrap_reproject + par_quiet + par_per_transcript_prior + par_per_nucleotide_prior + par_write_orphan_links + par_write_unmapped_names + par_no_error_model + par_sample_out + par_sample_unaligned + par_gencode +) -[[ "$par_discard_orphans" == "false" ]] && unset par_discard_orphans -[[ "$par_ont" == "false" ]] && unset par_ont -[[ "$par_seq_bias" == "false" ]] && unset par_seq_bias -[[ "$par_gc_bias" == "false" ]] && unset par_gc_bias -[[ "$par_pos_bias" == "false" ]] && unset par_pos_bias -[[ "$par_meta" == "false" ]] && unset par_meta -[[ "$par_discard_orphans_quasi" == "false" ]] && unset par_discard_orphans_quasi -[[ "$par_disable_chaining_heuristic" == "false" ]] && unset par_disable_chaining_heuristic -[[ "$par_allow_dovetail" == "false" ]] && unset par_allow_dovetail -[[ "$par_recover_orphans" == "false" ]] && unset par_recover_orphans -[[ "$par_mimicBT2" == "false" ]] && unset par_mimicBT2 -[[ "$par_mimic_strictBT2" == "false" ]] && unset par_mimic_strictBT2 -[[ "$par_softclip" == "false" ]] && unset par_softclip -[[ "$par_softclip_overhangs" == "false" ]] && unset par_softclip_overhangs -[[ "$par_full_length_alignment" == "false" ]] && unset par_full_length_alignment -[[ "$par_hard_filter" == "false" ]] && unset par_hard_filter -[[ "$par_write_mappings" == "false" ]] && unset par_write_mappings -[[ "$par_write_qualities" == "false" ]] && unset par_write_qualities -[[ "$par_alternative_init_mode" == "false" ]] && unset par_alternative_init_mode -[[ "$par_skip_quant" == "false" ]] && unset par_skip_quant -[[ "$par_dump_eq" == "false" ]] && unset par_dump_eq -[[ "$par_dump_eq_weights" == "false" ]] && unset par_dump_eq_weights -[[ "$par_reduce_GC_memory" == "false" ]] && unset par_reduce_GC_memory -[[ "$par_init_uniform" == "false" ]] && unset par_init_uniform -[[ "$par_no_length_correction" == "false" ]] && unset par_no_length_correction -[[ "$par_no_effective_length_correction" == "false" ]] && unset par_no_effective_length_correction -[[ "$par_no_single_frag_prob" == "false" ]] && unset par_no_single_frag_prob -[[ "$par_no_frag_length_dist" == "false" ]] && unset par_no_frag_length_dist -[[ "$par_no_bias_length_threshold" == "false" ]] && unset par_no_bias_length_threshold -[[ "$par_useEM" == "false" ]] && unset par_useEM -[[ "$par_useVBOpt" == "false" ]] && unset par_useVBOpt -[[ "$par_no_Gamma_draw" == "false" ]] && unset par_no_Gamma_draw -[[ "$par_bootstrap_reproject" == "false" ]] && unset par_bootstrap_reproject -[[ "$par_quiet" == "false" ]] && unset par_quiet -[[ "$par_per_transcript_prior" == "false" ]] && unset par_per_transcript_prior -[[ "$par_per_nucleotide_prior" == "false" ]] && unset par_per_nucleotide_prior -[[ "$par_write_orphan_links" == "false" ]] && unset par_write_orphan_links -[[ "$par_write_unmapped_names" == "false" ]] && unset par_write_unmapped_names -[[ "$par_no_error_model" == "false" ]] && unset par_no_error_model -[[ "$par_sample_out" == "false" ]] && unset par_sample_out -[[ "$par_sample_unaligned" == "false" ]] && unset par_sample_unaligned -[[ "$par_gencode" == "false" ]] && unset par_gencode +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done IFS=";" read -ra unmated_reads <<< $par_unmated_reads IFS=";" read -ra mates1 <<< $par_mates1 diff --git a/src/samtools/samtools_fastq/script.sh b/src/samtools/samtools_fastq/script.sh index 0cad9cfe..e05da9b0 100644 --- a/src/samtools/samtools_fastq/script.sh +++ b/src/samtools/samtools_fastq/script.sh @@ -5,11 +5,18 @@ set -e -[[ "$par_no_suffix" == "false" ]] && unset par_no_suffix -[[ "$par_suffix" == "false" ]] && unset par_suffix -[[ "$par_use_oq" == "false" ]] && unset par_use_oq -[[ "$par_copy_tags" == "false" ]] && unset par_copy_tags -[[ "$par_casava" == "false" ]] && unset par_casava +unset_if_false=( + par_no_suffix + par_suffix + par_use_oq + par_copy_tags + par_casava +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done if [[ "$meta_name" == "samtools_fasta" ]]; then subcommand=fasta diff --git a/src/samtools/samtools_sort/script.sh b/src/samtools/samtools_sort/script.sh index 94836c18..a8b3ce0f 100644 --- a/src/samtools/samtools_sort/script.sh +++ b/src/samtools/samtools_sort/script.sh @@ -5,15 +5,22 @@ set -e -[[ "$par_uncompressed" == "false" ]] && unset par_uncompressed -[[ "$par_minimiser" == "false" ]] && unset par_minimiser -[[ "$par_not_reverse" == "false" ]] && unset par_not_reverse -[[ "$par_homopolymers" == "false" ]] && unset par_homopolymers -[[ "$par_natural_sort" == "false" ]] && unset par_natural_sort -[[ "$par_ascii_sort" == "false" ]] && unset par_ascii_sort -[[ "$par_template_coordinate" == "false" ]] && unset par_template_coordinate -[[ "$par_write_index" == "false" ]] && unset par_write_index -[[ "$par_no_PG" == "false" ]] && unset par_no_PG +unset_if_false=( + par_uncompressed + par_minimiser + par_not_reverse + par_homopolymers + par_natural_sort + par_ascii_sort + par_template_coordinate + par_write_index + par_no_PG +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done samtools sort \ diff --git a/src/samtools/samtools_view/script.sh b/src/samtools/samtools_view/script.sh index c3911b48..7608844b 100644 --- a/src/samtools/samtools_view/script.sh +++ b/src/samtools/samtools_view/script.sh @@ -5,21 +5,29 @@ set -e -[[ "$par_bam" == "false" ]] && unset par_bam -[[ "$par_cram" == "false" ]] && unset par_cram -[[ "$par_fast" == "false" ]] && unset par_fast -[[ "$par_uncompressed" == "false" ]] && unset par_uncompressed -[[ "$par_with_header" == "false" ]] && unset par_with_header -[[ "$par_header_only" == "false" ]] && unset par_header_only -[[ "$par_no_header" == "false" ]] && unset par_no_header -[[ "$par_count" == "false" ]] && unset par_count -[[ "$par_unmap" == "false" ]] && unset par_unmap -[[ "$par_use_index" == "false" ]] && unset par_use_index -[[ "$par_fetch_pairs" == "false" ]] && unset par_fetch_pairs -[[ "$par_customized_index" == "false" ]] && unset par_customized_index -[[ "$par_no_PG" == "false" ]] && unset par_no_PG -[[ "$par_write_index" == "false" ]] && unset par_write_index -[[ "$par_remove_B" == "false" ]] && unset par_remove_B +unset_if_false=( + par_bam + par_cram + par_fast + par_uncompressed + par_with_header + par_header_only + par_no_header + par_count + par_unmap + par_use_index + par_fetch_pairs + par_customized_index + par_no_PG + par_write_index + par_remove_B +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + samtools view \ ${par_bam:+-b} \ diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh index d57a5e76..3f3bdc89 100644 --- a/src/umi_tools/umi_tools_dedup/script.sh +++ b/src/umi_tools/umi_tools_dedup/script.sh @@ -7,19 +7,26 @@ set -e test_dir="${metal_executable}/test_data" -[[ "$par_paired" == "false" ]] && unset par_paired -[[ "$par_in_sam" == "false" ]] && unset par_in_sam -[[ "$par_out_sam" == "false" ]] && unset par_out_sam -[[ "$par_spliced_is_unique" == "false" ]] && unset par_spliced_is_unique -[[ "$par_per_gene" == "false" ]] && unset par_per_gene -[[ "$par_per_contig" == "false" ]] && unset par_per_contig -[[ "$par_per_cell" == "false" ]] && unset par_per_cell -[[ "$par_no_sort_output" == "false" ]] && unset par_no_sort_output -[[ "$par_buffer_whole_contig" == "false" ]] && unset par_buffer_whole_contig -[[ "$par_ignore_umi" == "false" ]] && unset par_ignore_umi -[[ "$par_subset" == "false" ]] && unset par_subset -[[ "$par_log2stderr" == "false" ]] && unset par_log2stderr -[[ "$par_read_length" == "false" ]] && unset par_read_length +unset_if_false=( + par_paired + par_in_sam + par_out_sam + par_spliced_is_unique + par_per_gene + par_per_contig + par_per_cell + par_no_sort_output + par_buffer_whole_contig + par_ignore_umi + par_subset + par_log2stderr + par_read_length +) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done umi_tools dedup \ --stdin "$par_input" \ diff --git a/src/umi_tools/umi_tools_extract/script.sh b/src/umi_tools/umi_tools_extract/script.sh index 5e41865d..4514860e 100644 --- a/src/umi_tools/umi_tools_extract/script.sh +++ b/src/umi_tools/umi_tools_extract/script.sh @@ -5,14 +5,19 @@ set -exo pipefail -test_dir="${metal_executable}/test_data" +unset_if_false=( + par_error_correct_cell + par_reconcile_pairs + par_three_prime + par_ignore_read_pair_suffixes + par_timeit_header + par_log2stderr +) -[[ "$par_error_correct_cell" == "false" ]] && unset par_error_correct_cell -[[ "$par_reconcile_pairs" == "false" ]] && unset par_reconcile_pairs -[[ "$par_three_prime" == "false" ]] && unset par_three_prime -[[ "$par_ignore_read_pair_suffixes" == "false" ]] && unset par_ignore_read_pair_suffixes -[[ "$par_timeit_header" == "false" ]] && unset par_timeit_header -[[ "$par_log2stderr" == "false" ]] && unset par_log2stderr +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done # Check if we have the correct number of input files and patterns for paired-end or single-end reads From 9fc07f6c05879f8efff441767ec489bb24fdce7d Mon Sep 17 00:00:00 2001 From: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Date: Tue, 13 Aug 2024 17:19:06 +0200 Subject: [PATCH 18/25] Bump Viash to 0.9.0-RC7 (#134) * Bump viash to 0.9.0-RC7 * Update CHANGELOG --- CHANGELOG.md | 2 ++ _viash.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5030894c..d51fcf12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,8 @@ * Update several components in order to avoid duplicate code when using `unset` on boolean arguments (PR #133). +* Bump viash to `0.9.0-RC7` (PR #134) + ## DOCUMENTATION * Extend the contributing guidelines (PR #82): diff --git a/_viash.yaml b/_viash.yaml index 9a240c24..ab4f3828 100644 --- a/_viash.yaml +++ b/_viash.yaml @@ -7,7 +7,7 @@ links: issue_tracker: https://github.com/viash-hub/biobox/issues repository: https://github.com/viash-hub/biobox -viash_version: 0.9.0-RC6 +viash_version: 0.9.0-RC7 config_mods: | .requirements.commands := ['ps'] From d7cb75a9581750da3cbe90c79a5ac28410556550 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Tue, 13 Aug 2024 20:38:23 +0200 Subject: [PATCH 19/25] Bedtools_bamtofastq (#101) * Initial Commit * update * update * Config file and Script.sh * Test Script * adding test * working on testing * Fixing Bugs * Working version * Update CHANGELOG.md * Required Changes * Adding exit on error * adding more links --- CHANGELOG.md | 5 ++ .../bedtools_bamtofastq/config.vsh.yaml | 74 +++++++++++++++ src/bedtools/bedtools_bamtofastq/help.txt | 25 ++++++ src/bedtools/bedtools_bamtofastq/script.sh | 19 ++++ src/bedtools/bedtools_bamtofastq/test.sh | 84 ++++++++++++++++++ .../bedtools_bamtofastq/test_data/example.bam | Bin 0 -> 334 bytes .../bedtools_bamtofastq/test_data/example.sam | 3 + .../test_data/expected.fastq | 16 ++++ .../test_data/expected_1.fastq | 4 + .../test_data/expected_2.fastq | 4 + .../bedtools_bamtofastq/test_data/script.sh | 13 +++ 11 files changed, 247 insertions(+) create mode 100644 src/bedtools/bedtools_bamtofastq/config.vsh.yaml create mode 100644 src/bedtools/bedtools_bamtofastq/help.txt create mode 100644 src/bedtools/bedtools_bamtofastq/script.sh create mode 100644 src/bedtools/bedtools_bamtofastq/test.sh create mode 100644 src/bedtools/bedtools_bamtofastq/test_data/example.bam create mode 100644 src/bedtools/bedtools_bamtofastq/test_data/example.sam create mode 100644 src/bedtools/bedtools_bamtofastq/test_data/expected.fastq create mode 100644 src/bedtools/bedtools_bamtofastq/test_data/expected_1.fastq create mode 100644 src/bedtools/bedtools_bamtofastq/test_data/expected_2.fastq create mode 100755 src/bedtools/bedtools_bamtofastq/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index d51fcf12..f2892df6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,11 @@ * `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). +* `bedtools`: + - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). + - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). + - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/bedtools/bedtools_bamtofastq/config.vsh.yaml b/src/bedtools/bedtools_bamtofastq/config.vsh.yaml new file mode 100644 index 00000000..cd30cb71 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/config.vsh.yaml @@ -0,0 +1,74 @@ +name: bedtools_bamtofastq +namespace: bedtools +description: | + Conversion tool for extracting FASTQ records from sequence alignments in BAM format. +keywords: [Conversion ,BAM, FASTQ] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/bamtofastq.html + repository: https://github.com/arq5x/bedtools2 + homepage: https://bedtools.readthedocs.io/en/latest/# + issue_tracker: https://github.com/arq5x/bedtools2/issues +references: + doi: 10.1093/bioinformatics/btq033 +license: MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input BAM file to be converted to FASTQ. + required: true + + - name: Outputs + arguments: + - name: --fastq + alternatives: -fq + direction: output + type: file + description: Output FASTQ file. + required: true + + - name: --fastq2 + alternatives: -fq2 + type: file + direction: output + description: | + FASTQ for second end. Used if BAM contains paired-end data. + BAM should be sorted by query name is creating paired FASTQ. + + - name: Options + arguments: + - name: --tags + type: boolean_true + description: | + Create FASTQ based on the mate info in the BAM R2 and Q2 tags. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_bamtofastq/help.txt b/src/bedtools/bedtools_bamtofastq/help.txt new file mode 100644 index 00000000..ed2b7468 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/help.txt @@ -0,0 +1,25 @@ +```bash +bedtools bamtofastq +``` + +Tool: bedtools bamtofastq (aka bamToFastq) +Version: v2.30.0 +Summary: Convert BAM alignments to FASTQ files. + +Usage: bamToFastq [OPTIONS] -i -fq + +Options: + -fq2 FASTQ for second end. Used if BAM contains paired-end data. + BAM should be sorted by query name is creating paired FASTQ. + + -tags Create FASTQ based on the mate info + in the BAM R2 and Q2 tags. + +Tips: + If you want to create a single, interleaved FASTQ file + for paired-end data, you can just write both to /dev/stdout: + + bedtools bamtofastq -i x.bam -fq /dev/stdout -fq2 /dev/stdout > x.ilv.fq + + Also, the samtools fastq command has more fucntionality and is a useful alternative. + diff --git a/src/bedtools/bedtools_bamtofastq/script.sh b/src/bedtools/bedtools_bamtofastq/script.sh new file mode 100644 index 00000000..4b32f2d7 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +# Exit on error +set -eo pipefail + +# Unset parameters +[[ "$par_tags" == "false" ]] && unset par_tags + +# Execute bedtools bamtofastq with the provided arguments +bedtools bamtofastq \ + ${par_tags:+-tags} \ + ${par_fastq2:+-fq2 "$par_fastq2"} \ + -i "$par_input" \ + -fq "$par_fastq" + + diff --git a/src/bedtools/bedtools_bamtofastq/test.sh b/src/bedtools/bedtools_bamtofastq/test.sh new file mode 100644 index 00000000..6d913d85 --- /dev/null +++ b/src/bedtools/bedtools_bamtofastq/test.sh @@ -0,0 +1,84 @@ +#!/bin/bash + +# exit on error +set -eo pipefail + +test_data="$meta_resources_dir/test_data" + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Test 1: normal conversion +mkdir test1 +cd test1 + +echo "> Run bedtools bamtofastq on BAM file" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output.fastq" + +# checks +assert_file_exists "output.fastq" +assert_file_not_empty "output.fastq" +assert_identical_content "output.fastq" "$test_data/expected.fastq" +echo "- test1 succeeded -" + +cd .. + +# Test 2: with tags +mkdir test2 +cd test2 + +echo "> Run bedtools bamtofastq on BAM file with tags" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output.fastq" \ + --tags + +# checks +assert_file_exists "output.fastq" +assert_file_not_empty "output.fastq" +assert_identical_content "output.fastq" "$test_data/expected.fastq" +echo "- test2 succeeded -" + +cd .. + +# Test 3: with option fq2 +mkdir test3 +cd test3 + +echo "> Run bedtools bamtofastq on BAM file with output_fq2" +"$meta_executable" \ + --input "$test_data/example.bam" \ + --fastq "output1.fastq" \ + --fastq2 "output2.fastq" + +# checks +assert_file_exists "output1.fastq" +assert_file_not_empty "output1.fastq" +assert_identical_content "output1.fastq" "$test_data/expected_1.fastq" +assert_file_exists "output2.fastq" +assert_file_not_empty "output2.fastq" +assert_identical_content "output2.fastq" "$test_data/expected_2.fastq" +echo "- test3 succeeded -" + +cd .. + +echo "All tests succeeded" +exit 0 + + diff --git a/src/bedtools/bedtools_bamtofastq/test_data/example.bam b/src/bedtools/bedtools_bamtofastq/test_data/example.bam new file mode 100644 index 0000000000000000000000000000000000000000..ffc075ab83a83a98ed1edbf88b26cc27ad8946c6 GIT binary patch literal 334 zcmb2|=3rp}f&Xj_PR>jWAq>SuUsA6mBqS7Y@IB%Aw%O~PhS4S?6Z1_bX2zRMuCZ>` z;o;@Ato^fw$CpQUheTtRYNNz-r#8JXHa3Ry>s4lk0?m>~GxQF_-U<7&m>dP#pU;|5 z)~CHK)-&PMX8(zQnRkjz7tzu&Q_9lpm^;_nXXDZb**~)OH9hZA+GbYw!F2!1eU^u& z=6?J8db>^n+vnS58VqGOpQde!^LhT^FPno$sK1R;RVb&i_o5|>-LG;Kg??MHCx&;~ ziZww?R#r16X1LX_ZFYQZ=WBLl9Y-y@V*W$>-;Wo3eOwoN_@m-GsXhDI example.sam +printf "my_read\t99\tchr2:172936693-172938111\t129\t60\t100M\t=\t429\t400\tCTAACTAGCCTGGGAAAAAAGGATAGTGTCTCTCTGTTCTTTCATAGGAAATGTTGAATCAGACCCCTACTGGGAAAAGAAATTTAATGCATATCTCACT\t*\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:100\n" >> example.sam +printf "my_read\t147\tchr2:172936693-172938111\t429\t60\t100M\t=\t129\t-400\tTCGAGCTCTGCATTCATGGCTGTGTCTAAAGGGCATGTCAGCCTTTGATTCTCTCTGAGAGGTAATTATCCTTTTCCTGTCACGGAACAACAAATGATAG\t*\tXT:A:U\tNM:i:0\tSM:i:37\tAM:i:37\tX0:i:1\tX1:i:0\tXM:i:0\tXO:i:0\tXG:i:0\tMD:Z:100\n" >> example.sam + +# create bam file +# samtools view -b example.sam > example.bam + +# create fastq files +# bedtools bamtofastq -i example.bam -fq expected.fastq +# bedtools bamtofastq -i example.bam -fq expected_1.fastq -fq2 expected_2.fastq From 21caeff1becd6e6405ad7fcfc916cdde916fbd98 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 13 Aug 2024 20:50:02 +0200 Subject: [PATCH 20/25] Add agat convert spgff2tsv (#102) * add help * add config * add run script * add test data and expected output + script to fetch them * add tests * update changelog --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 15 +- .../agat_convert_sp_gff2tsv/config.vsh.yaml | 70 ++ src/agat/agat_convert_sp_gff2tsv/help.txt | 63 ++ src/agat/agat_convert_sp_gff2tsv/script.sh | 9 + src/agat/agat_convert_sp_gff2tsv/test.sh | 27 + .../agat_convert_sp_gff2tsv/test_data/1.gff | 942 ++++++++++++++++++ .../test_data/agat_convert_sp_gff2tsv_1.tsv | 881 ++++++++++++++++ .../test_data/script.sh | 10 + 8 files changed, 2007 insertions(+), 10 deletions(-) create mode 100644 src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml create mode 100644 src/agat/agat_convert_sp_gff2tsv/help.txt create mode 100644 src/agat/agat_convert_sp_gff2tsv/script.sh create mode 100644 src/agat/agat_convert_sp_gff2tsv/test.sh create mode 100644 src/agat/agat_convert_sp_gff2tsv/test_data/1.gff create mode 100644 src/agat/agat_convert_sp_gff2tsv/test_data/agat_convert_sp_gff2tsv_1.tsv create mode 100755 src/agat/agat_convert_sp_gff2tsv/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index f2892df6..9082149e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,20 +20,15 @@ based on a provided sequence IDs or region coordinates file (PR #85). * `agat`: - - `agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). - - `/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat/agat_convert_sp_gff2gtf`: convert any GTF/GFF file into a proper GTF file (PR #76). + - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). + - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). + - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - - -* `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - -* `bedtools`: - - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). + - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). ## MINOR CHANGES diff --git a/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml b/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml new file mode 100644 index 00000000..f1c78590 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/config.vsh.yaml @@ -0,0 +1,70 @@ +name: agat_convert_sp_gff2tsv +namespace: agat +description: | + The script aims to convert gtf/gff file into tabulated file. Attribute's + tags from the 9th column become column titles. +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gff2tsv.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --gff + alternatives: [-f] + description: Input GTF/GFF file. + type: file + required: true + direction: input + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o, --out, --outfile] + description: Output GFF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gff2tsv/help.txt b/src/agat/agat_convert_sp_gff2tsv/help.txt new file mode 100644 index 00000000..afbf85f8 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/help.txt @@ -0,0 +1,63 @@ +```sh +agat_convert_sp_gff2tsv.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_sp_gff2tsv.pl + +Description: + The script aims to convert gtf/gff file into tabulated file. Attribute's + tags from the 9th column become column titles. + +Usage: + agat_convert_sp_gff2tsv.pl -gff file.gff [ -o outfile ] + agat_convert_sp_gff2tsv.pl --help + +Options: + --gff or -f + Input GTF/GFF file. + + -o , --output , --out or --outfile + Output GFF file. If no output file is specified, the output will + be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gff2tsv/script.sh b/src/agat/agat_convert_sp_gff2tsv/script.sh new file mode 100644 index 00000000..6393303c --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +agat_convert_sp_gff2tsv.pl \ + -f "$par_gff" \ + -o "$par_output" \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_convert_sp_gff2tsv/test.sh b/src/agat/agat_convert_sp_gff2tsv/test.sh new file mode 100644 index 00000000..fabe46b9 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/test.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gff "$test_dir/1.gff" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/agat_convert_sp_gff2tsv_1.tsv" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gff2tsv/test_data/1.gff b/src/agat/agat_convert_sp_gff2tsv/test_data/1.gff new file mode 100644 index 00000000..40a06c78 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/test_data/1.gff @@ -0,0 +1,942 @@ +##gff-version 3 +##sequence-region 1 1 43270923 +#!genome-build RAP-DB IRGSP-1.0 +#!genome-version IRGSP-1.0 +#!genome-date 2015-10 +#!genome-build-accession GCA_001433935.1 +1 RAP-DB chromosome 1 43270923 . . . ID=chromosome:1;Alias=Chr1,AP014957.1,NC_029256.1 +### +1 irgsp repeat_region 2000 2100 . + . ID=fakeRepeat1 +### +1 irgsp gene 2983 10815 . + . ID=gene:Os01g0100100;biotype=protein_coding;description=RabGAP/TBC domain containing protein. (Os01t0100100-01);gene_id=Os01g0100100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 2983 10815 . + . ID=transcript:Os01t0100100-01;Parent=gene:Os01g0100100;biotype=protein_coding;transcript_id=Os01t0100100-01 +1 irgsp exon 2983 3268 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon1;rank=1 +1 irgsp five_prime_UTR 2983 3268 . + . Parent=transcript:Os01t0100100-01 +1 irgsp five_prime_UTR 3354 3448 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 3354 3616 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100100-01.exon2;rank=2 +1 irgsp CDS 3449 3616 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 4357 4455 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon3;rank=3 +1 irgsp CDS 4357 4455 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 5457 5560 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100100-01.exon4;rank=4 +1 irgsp CDS 5457 5560 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 7136 7944 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100100-01.exon5;rank=5 +1 irgsp CDS 7136 7944 . + 1 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8028 8150 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon6;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100100-01.exon6;rank=6 +1 irgsp CDS 8028 8150 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8232 8320 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon7;rank=7 +1 irgsp CDS 8232 8320 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 8408 8608 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100100-01.exon8;rank=8 +1 irgsp CDS 8408 8608 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 9210 9615 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon9;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100100-01.exon9;rank=9 +1 irgsp CDS 9210 9615 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10102 10187 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100100-01.exon10;rank=10 +1 irgsp CDS 10102 10187 . + 2 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp CDS 10274 10297 . + 0 ID=CDS:Os01t0100100-01;Parent=transcript:Os01t0100100-01;protein_id=Os01t0100100-01 +1 irgsp exon 10274 10430 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100100-01.exon11;rank=11 +1 irgsp three_prime_UTR 10298 10430 . + . Parent=transcript:Os01t0100100-01 +1 irgsp exon 10504 10815 . + . Parent=transcript:Os01t0100100-01;Name=Os01t0100100-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100100-01.exon12;rank=12 +1 irgsp three_prime_UTR 10504 10815 . + . Parent=transcript:Os01t0100100-01 +### +1 irgsp gene 11218 12435 . + . ID=gene:Os01g0100200;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0100200-01);gene_id=Os01g0100200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11218 12435 . + . ID=transcript:Os01t0100200-01;Parent=gene:Os01g0100200;biotype=protein_coding;transcript_id=Os01t0100200-01 +1 irgsp five_prime_UTR 11218 11797 . + . Parent=transcript:Os01t0100200-01 +1 irgsp exon 11218 12060 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100200-01.exon1;rank=1 +1 irgsp CDS 11798 12060 . + 0 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp CDS 12152 12317 . + 1 ID=CDS:Os01t0100200-01;Parent=transcript:Os01t0100200-01;protein_id=Os01t0100200-01 +1 irgsp exon 12152 12435 . + . Parent=transcript:Os01t0100200-01;Name=Os01t0100200-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100200-01.exon2;rank=2 +1 irgsp three_prime_UTR 12318 12435 . + . Parent=transcript:Os01t0100200-01 +### +1 irgsp gene 11372 12284 . - . ID=gene:Os01g0100300;biotype=protein_coding;description=Cytochrome P450 domain containing protein. (Os01t0100300-00);gene_id=Os01g0100300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 11372 12284 . - . ID=transcript:Os01t0100300-00;Parent=gene:Os01g0100300;biotype=protein_coding;transcript_id=Os01t0100300-00 +1 irgsp exon 11372 12042 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100300-00.exon2;rank=2 +1 irgsp CDS 11372 12042 . - 2 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +1 irgsp exon 12146 12284 . - . Parent=transcript:Os01t0100300-00;Name=Os01t0100300-00.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100300-00.exon1;rank=1 +1 irgsp CDS 12146 12284 . - 0 ID=CDS:Os01t0100300-00;Parent=transcript:Os01t0100300-00;protein_id=Os01t0100300-00 +### +1 irgsp gene 12721 15685 . + . ID=gene:Os01g0100400;biotype=protein_coding;description=Similar to Pectinesterase-like protein. (Os01t0100400-01);gene_id=Os01g0100400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12721 15685 . + . ID=transcript:Os01t0100400-01;Parent=gene:Os01g0100400;biotype=protein_coding;transcript_id=Os01t0100400-01 +1 irgsp five_prime_UTR 12721 12773 . + . Parent=transcript:Os01t0100400-01 +1 irgsp exon 12721 13813 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100400-01.exon1;rank=1 +1 irgsp CDS 12774 13813 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 13906 14271 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100400-01.exon2;rank=2 +1 irgsp CDS 13906 14271 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14359 14437 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100400-01.exon3;rank=3 +1 irgsp CDS 14359 14437 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 14969 15171 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100400-01.exon4;rank=4 +1 irgsp CDS 14969 15171 . + 0 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp CDS 15266 15359 . + 1 ID=CDS:Os01t0100400-01;Parent=transcript:Os01t0100400-01;protein_id=Os01t0100400-01 +1 irgsp exon 15266 15685 . + . Parent=transcript:Os01t0100400-01;Name=Os01t0100400-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100400-01.exon5;rank=5 +1 irgsp three_prime_UTR 15360 15685 . + . Parent=transcript:Os01t0100400-01 +### +1 irgsp gene 12808 13978 . - . ID=gene:Os01g0100466;biotype=protein_coding;description=Hypothetical protein. (Os01t0100466-00);gene_id=Os01g0100466;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 12808 13978 . - . ID=transcript:Os01t0100466-00;Parent=gene:Os01g0100466;biotype=protein_coding;transcript_id=Os01t0100466-00 +1 irgsp three_prime_UTR 12808 12868 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 12808 13782 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon2;rank=2 +1 irgsp CDS 12869 13102 . - 0 ID=CDS:Os01t0100466-00;Parent=transcript:Os01t0100466-00;protein_id=Os01t0100466-00 +1 irgsp five_prime_UTR 13103 13782 . - . Parent=transcript:Os01t0100466-00 +1 irgsp exon 13880 13978 . - . Parent=transcript:Os01t0100466-00;Name=Os01t0100466-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100466-00.exon1;rank=1 +1 irgsp five_prime_UTR 13880 13978 . - . Parent=transcript:Os01t0100466-00 +### +1 irgsp gene 16399 20144 . + . ID=gene:Os01g0100500;biotype=protein_coding;description=Immunoglobulin-like domain containing protein. (Os01t0100500-01);gene_id=Os01g0100500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 16399 20144 . + . ID=transcript:Os01t0100500-01;Parent=gene:Os01g0100500;biotype=protein_coding;transcript_id=Os01t0100500-01 +1 irgsp five_prime_UTR 16399 16598 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 16399 16976 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100500-01.exon1;rank=1 +1 irgsp CDS 16599 16976 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17383 17474 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100500-01.exon2;rank=2 +1 irgsp CDS 17383 17474 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 17558 18258 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100500-01.exon3;rank=3 +1 irgsp CDS 17558 18258 . + 1 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18501 18571 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100500-01.exon4;rank=4 +1 irgsp CDS 18501 18571 . + 2 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 18968 19057 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon5;rank=5 +1 irgsp CDS 18968 19057 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19142 19321 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100500-01.exon6;rank=6 +1 irgsp CDS 19142 19321 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp CDS 19531 19593 . + 0 ID=CDS:Os01t0100500-01;Parent=transcript:Os01t0100500-01;protein_id=Os01t0100500-01 +1 irgsp exon 19531 19629 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100500-01.exon7;rank=7 +1 irgsp three_prime_UTR 19594 19629 . + . Parent=transcript:Os01t0100500-01 +1 irgsp exon 19734 20144 . + . Parent=transcript:Os01t0100500-01;Name=Os01t0100500-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100500-01.exon8;rank=8 +1 irgsp three_prime_UTR 19734 20144 . + . Parent=transcript:Os01t0100500-01 +### +1 irgsp gene 22841 26892 . + . ID=gene:Os01g0100600;biotype=protein_coding;description=Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01);gene_id=Os01g0100600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 22841 26892 . + . ID=transcript:Os01t0100600-01;Parent=gene:Os01g0100600;biotype=protein_coding;transcript_id=Os01t0100600-01 +1 irgsp five_prime_UTR 22841 23231 . + . Parent=transcript:Os01t0100600-01 +1 irgsp exon 22841 23281 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100600-01.exon1;rank=1 +1 irgsp CDS 23232 23281 . + 0 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23572 23847 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon2;rank=2 +1 irgsp CDS 23572 23847 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 23962 24033 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100600-01.exon3;rank=3 +1 irgsp CDS 23962 24033 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 24492 24577 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0100600-01.exon4;rank=4 +1 irgsp CDS 24492 24577 . + 1 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25445 25519 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100600-01.exon5;rank=5 +1 irgsp CDS 25445 25519 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp CDS 25883 26391 . + 2 ID=CDS:Os01t0100600-01;Parent=transcript:Os01t0100600-01;protein_id=Os01t0100600-01 +1 irgsp exon 25883 26892 . + . Parent=transcript:Os01t0100600-01;Name=Os01t0100600-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0100600-01.exon6;rank=6 +1 irgsp three_prime_UTR 26392 26892 . + . Parent=transcript:Os01t0100600-01 +### +1 irgsp gene 25861 26424 . - . ID=gene:Os01g0100650;biotype=protein_coding;description=Hypothetical gene. (Os01t0100650-00);gene_id=Os01g0100650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 25861 26424 . - . ID=transcript:Os01t0100650-00;Parent=gene:Os01g0100650;biotype=protein_coding;transcript_id=Os01t0100650-00 +1 irgsp three_prime_UTR 25861 26039 . - . Parent=transcript:Os01t0100650-00 +1 irgsp exon 25861 26424 . - . Parent=transcript:Os01t0100650-00;Name=Os01t0100650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0100650-00.exon1;rank=1 +1 irgsp CDS 26040 26423 . - 0 ID=CDS:Os01t0100650-00;Parent=transcript:Os01t0100650-00;protein_id=Os01t0100650-00 +1 irgsp five_prime_UTR 26424 26424 . - . Parent=transcript:Os01t0100650-00 +### +1 irgsp gene 27143 28644 . + . ID=gene:Os01g0100700;biotype=protein_coding;description=Similar to 40S ribosomal protein S5-1. (Os01t0100700-01);gene_id=Os01g0100700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 27143 28644 . + . ID=transcript:Os01t0100700-01;Parent=gene:Os01g0100700;biotype=protein_coding;transcript_id=Os01t0100700-01 +1 irgsp five_prime_UTR 27143 27220 . + . Parent=transcript:Os01t0100700-01 +1 irgsp exon 27143 27292 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0100700-01.exon1;rank=1 +1 irgsp CDS 27221 27292 . + 0 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 27370 27641 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100700-01.exon2;rank=2 +1 irgsp CDS 27370 27641 . + 0 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 28090 28293 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100700-01.exon3;rank=3 +1 irgsp CDS 28090 28293 . + 1 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp CDS 28365 28419 . + 1 ID=CDS:Os01t0100700-01;Parent=transcript:Os01t0100700-01;protein_id=Os01t0100700-01 +1 irgsp exon 28365 28644 . + . Parent=transcript:Os01t0100700-01;Name=Os01t0100700-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0100700-01.exon4;rank=4 +1 irgsp three_prime_UTR 28420 28644 . + . Parent=transcript:Os01t0100700-01 +### +1 irgsp gene 29818 34453 . + . ID=gene:Os01g0100800;biotype=protein_coding;description=Protein of unknown function DUF1664 family protein. (Os01t0100800-01);gene_id=Os01g0100800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 29818 34453 . + . ID=transcript:Os01t0100800-01;Parent=gene:Os01g0100800;biotype=protein_coding;transcript_id=Os01t0100800-01 +1 irgsp five_prime_UTR 29818 29939 . + . Parent=transcript:Os01t0100800-01 +1 irgsp exon 29818 29976 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0100800-01.exon1;rank=1 +1 irgsp CDS 29940 29976 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30146 30228 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100800-01.exon2;rank=2 +1 irgsp CDS 30146 30228 . + 2 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30735 30806 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon3;rank=3 +1 irgsp CDS 30735 30806 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 30885 30963 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100800-01.exon4;rank=4 +1 irgsp CDS 30885 30963 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 31258 31325 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0100800-01.exon5;rank=5 +1 irgsp CDS 31258 31325 . + 2 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 31505 31606 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon6;rank=6 +1 irgsp CDS 31505 31606 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32377 32466 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon7;rank=7 +1 irgsp CDS 32377 32466 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32542 32616 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon8;rank=8 +1 irgsp CDS 32542 32616 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32712 32744 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon9;rank=9 +1 irgsp CDS 32712 32744 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 32828 32905 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon10;rank=10 +1 irgsp CDS 32828 32905 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33274 33330 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon11;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon11;rank=11 +1 irgsp CDS 33274 33330 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33400 33471 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon12;rank=12 +1 irgsp CDS 33400 33471 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33543 33617 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon13;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100800-01.exon13;rank=13 +1 irgsp CDS 33543 33617 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp CDS 33975 34124 . + 0 ID=CDS:Os01t0100800-01;Parent=transcript:Os01t0100800-01;protein_id=Os01t0100800-01 +1 irgsp exon 33975 34453 . + . Parent=transcript:Os01t0100800-01;Name=Os01t0100800-01.exon14;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100800-01.exon14;rank=14 +1 irgsp three_prime_UTR 34125 34453 . + . Parent=transcript:Os01t0100800-01 +### +1 irgsp gene 35623 41136 . + . ID=gene:Os01g0100900;Name=SPHINGOSINE-1-PHOSPHATE LYASE 1%2C Sphingosine-1-Phoshpate Lyase 1;biotype=protein_coding;description=Sphingosine-1-phosphate lyase%2C Disease resistance response (Os01t0100900-01);gene_id=Os01g0100900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 35623 41136 . + . ID=transcript:Os01t0100900-01;Parent=gene:Os01g0100900;biotype=protein_coding;transcript_id=Os01t0100900-01 +1 irgsp five_prime_UTR 35623 35742 . + . Parent=transcript:Os01t0100900-01 +1 irgsp exon 35623 35939 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0100900-01.exon1;rank=1 +1 irgsp CDS 35743 35939 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36027 36072 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon2;rank=2 +1 irgsp CDS 36027 36072 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36517 36668 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100900-01.exon3;rank=3 +1 irgsp CDS 36517 36668 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 36818 36877 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon4;rank=4 +1 irgsp CDS 36818 36877 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 37594 37818 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon5;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon5;rank=5 +1 irgsp CDS 37594 37818 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 37892 38033 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon6;rank=6 +1 irgsp CDS 37892 38033 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 38276 38326 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0100900-01.exon7;rank=7 +1 irgsp CDS 38276 38326 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 38434 38525 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon8;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0100900-01.exon8;rank=8 +1 irgsp CDS 38434 38525 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39319 39445 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon9;rank=9 +1 irgsp CDS 39319 39445 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39553 39568 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0100900-01.exon10;rank=10 +1 irgsp CDS 39553 39568 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 39939 40046 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon11;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0100900-01.exon11;rank=11 +1 irgsp CDS 39939 40046 . + 2 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40135 40189 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon12;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0100900-01.exon12;rank=12 +1 irgsp CDS 40135 40189 . + 2 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40456 40602 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0100900-01.exon13;rank=13 +1 irgsp CDS 40456 40602 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40703 40781 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0100900-01.exon14;rank=14 +1 irgsp CDS 40703 40781 . + 1 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp CDS 40885 41007 . + 0 ID=CDS:Os01t0100900-01;Parent=transcript:Os01t0100900-01;protein_id=Os01t0100900-01 +1 irgsp exon 40885 41136 . + . Parent=transcript:Os01t0100900-01;Name=Os01t0100900-01.exon15;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0100900-01.exon15;rank=15 +1 irgsp three_prime_UTR 41008 41136 . + . Parent=transcript:Os01t0100900-01 +### +1 irgsp gene 58658 61090 . + . ID=gene:Os01g0101150;biotype=protein_coding;description=Hypothetical conserved gene. (Os01t0101150-00);gene_id=Os01g0101150;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 58658 61090 . + . ID=transcript:Os01t0101150-00;Parent=gene:Os01g0101150;biotype=protein_coding;transcript_id=Os01t0101150-00 +1 irgsp exon 58658 61090 . + . Parent=transcript:Os01t0101150-00;Name=Os01t0101150-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101150-00.exon1;rank=1 +1 irgsp CDS 58658 61090 . + 0 ID=CDS:Os01t0101150-00;Parent=transcript:Os01t0101150-00;protein_id=Os01t0101150-00 +### +1 irgsp gene 62060 65537 . + . ID=gene:Os01g0101200;biotype=protein_coding;description=2%2C3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-01)%3B2%2C3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-02);gene_id=Os01g0101200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 62060 63576 . + . ID=transcript:Os01t0101200-01;Parent=gene:Os01g0101200;biotype=protein_coding;transcript_id=Os01t0101200-01 +1 irgsp five_prime_UTR 62060 62103 . + . Parent=transcript:Os01t0101200-01 +1 irgsp exon 62060 62295 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101200-01.exon1;rank=1 +1 irgsp CDS 62104 62295 . + 0 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 62385 62905 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-02.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101200-02.exon2;rank=2 +1 irgsp CDS 62385 62905 . + 0 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 62996 63114 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-02.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0101200-02.exon3;rank=3 +1 irgsp CDS 62996 63114 . + 1 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp CDS 63248 63345 . + 2 ID=CDS:Os01t0101200-01;Parent=transcript:Os01t0101200-01;protein_id=Os01t0101200-01 +1 irgsp exon 63248 63576 . + . Parent=transcript:Os01t0101200-01;Name=Os01t0101200-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0101200-01.exon4;rank=4 +1 irgsp three_prime_UTR 63346 63576 . + . Parent=transcript:Os01t0101200-01 +1 irgsp mRNA 62112 65537 . + . ID=transcript:Os01t0101200-02;Parent=gene:Os01g0101200;biotype=protein_coding;transcript_id=Os01t0101200-02 +1 irgsp five_prime_UTR 62112 62112 . + . Parent=transcript:Os01t0101200-02 +1 irgsp exon 62112 62295 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101200-02.exon1;rank=1 +1 irgsp CDS 62113 62295 . + 0 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 62385 62905 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101200-02.exon2;rank=2 +1 irgsp CDS 62385 62905 . + 0 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 62996 63114 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=2;exon_id=Os01t0101200-02.exon3;rank=3 +1 irgsp CDS 62996 63114 . + 1 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp CDS 63248 63345 . + 2 ID=CDS:Os01t0101200-02;Parent=transcript:Os01t0101200-02;protein_id=Os01t0101200-02 +1 irgsp exon 63248 65537 . + . Parent=transcript:Os01t0101200-02;Name=Os01t0101200-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0101200-02.exon4;rank=4 +1 irgsp three_prime_UTR 63346 65537 . + . Parent=transcript:Os01t0101200-02 +### +1 irgsp gene 63350 66302 . - . ID=gene:Os01g0101300;biotype=protein_coding;description=Similar to MRNA%2C partial cds%2C clone: RAFL22-26-L17. (Fragment). (Os01t0101300-01);gene_id=Os01g0101300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 63350 66302 . - . ID=transcript:Os01t0101300-01;Parent=gene:Os01g0101300;biotype=protein_coding;transcript_id=Os01t0101300-01 +1 irgsp three_prime_UTR 63350 63669 . - . Parent=transcript:Os01t0101300-01 +1 irgsp exon 63350 63783 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101300-01.exon7;rank=7 +1 irgsp CDS 63670 63783 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 63877 64020 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101300-01.exon6;rank=6 +1 irgsp CDS 63877 64020 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64339 64431 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101300-01.exon5;rank=5 +1 irgsp CDS 64339 64431 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64665 64779 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0101300-01.exon4;rank=4 +1 irgsp CDS 64665 64779 . - 1 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 64902 65152 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0101300-01.exon3;rank=3 +1 irgsp CDS 64902 65152 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 65248 65431 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0101300-01.exon2;rank=2 +1 irgsp CDS 65248 65431 . - 1 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp CDS 65628 65950 . - 0 ID=CDS:Os01t0101300-01;Parent=transcript:Os01t0101300-01;protein_id=Os01t0101300-01 +1 irgsp exon 65628 66302 . - . Parent=transcript:Os01t0101300-01;Name=Os01t0101300-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0101300-01.exon1;rank=1 +1 irgsp five_prime_UTR 65951 66302 . - . Parent=transcript:Os01t0101300-01 +### +1 irgsp gene 72816 78349 . + . ID=gene:Os01g0101600;biotype=protein_coding;description=Immunoglobulin-like fold domain containing protein. (Os01t0101600-01)%3BImmunoglobulin-like fold domain containing protein. (Os01t0101600-02)%3BHypothetical conserved gene. (Os01t0101600-03);gene_id=Os01g0101600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 72816 78349 . + . ID=transcript:Os01t0101600-01;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-01 +1 irgsp five_prime_UTR 72816 72902 . + . Parent=transcript:Os01t0101600-01 +1 irgsp exon 72816 73935 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0101600-01.exon1;rank=1 +1 irgsp CDS 72903 73935 . + 0 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp exon 74468 74981 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-02.exon2;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0101600-02.exon2;rank=2 +1 irgsp CDS 74468 74981 . + 2 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp CDS 75619 77008 . + 1 ID=CDS:Os01t0101600-01;Parent=transcript:Os01t0101600-01;protein_id=Os01t0101600-01 +1 irgsp exon 75619 77205 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0101600-01.exon3;rank=3 +1 irgsp three_prime_UTR 77009 77205 . + . Parent=transcript:Os01t0101600-01 +1 irgsp exon 77333 78349 . + . Parent=transcript:Os01t0101600-01;Name=Os01t0101600-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101600-01.exon4;rank=4 +1 irgsp three_prime_UTR 77333 78349 . + . Parent=transcript:Os01t0101600-01 +1 irgsp mRNA 72823 77699 . + . ID=transcript:Os01t0101600-02;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-02 +1 irgsp five_prime_UTR 72823 72902 . + . Parent=transcript:Os01t0101600-02 +1 irgsp exon 72823 73935 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0101600-02.exon1;rank=1 +1 irgsp CDS 72903 73935 . + 0 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp exon 74468 74981 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon2;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0101600-02.exon2;rank=2 +1 irgsp CDS 74468 74981 . + 2 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp CDS 75619 77008 . + 1 ID=CDS:Os01t0101600-02;Parent=transcript:Os01t0101600-02;protein_id=Os01t0101600-02 +1 irgsp exon 75619 77699 . + . Parent=transcript:Os01t0101600-02;Name=Os01t0101600-02.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0101600-02.exon3;rank=3 +1 irgsp three_prime_UTR 77009 77699 . + . Parent=transcript:Os01t0101600-02 +1 irgsp mRNA 75942 77699 . + . ID=transcript:Os01t0101600-03;Parent=gene:Os01g0101600;biotype=protein_coding;transcript_id=Os01t0101600-03 +1 irgsp five_prime_UTR 75942 75943 . + . Parent=transcript:Os01t0101600-03 +1 irgsp exon 75942 77699 . + . Parent=transcript:Os01t0101600-03;Name=Os01t0101600-03.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101600-03.exon1;rank=1 +1 irgsp CDS 75944 77008 . + 0 ID=CDS:Os01t0101600-03;Parent=transcript:Os01t0101600-03;protein_id=Os01t0101600-03 +1 irgsp three_prime_UTR 77009 77699 . + . Parent=transcript:Os01t0101600-03 +### +1 irgsp gene 82426 84095 . + . ID=gene:Os01g0101700;Name=DnaJ domain protein C1%2C rice DJC26 homolog;biotype=protein_coding;description=Similar to chaperone protein dnaJ 20. (Os01t0101700-00);gene_id=Os01g0101700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 82426 84095 . + . ID=transcript:Os01t0101700-00;Parent=gene:Os01g0101700;biotype=protein_coding;transcript_id=Os01t0101700-00 +1 irgsp five_prime_UTR 82426 82506 . + . Parent=transcript:Os01t0101700-00 +1 irgsp exon 82426 82932 . + . Parent=transcript:Os01t0101700-00;Name=Os01t0101700-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101700-00.exon1;rank=1 +1 irgsp CDS 82507 82932 . + 0 ID=CDS:Os01t0101700-00;Parent=transcript:Os01t0101700-00;protein_id=Os01t0101700-00 +1 irgsp CDS 83724 83864 . + 0 ID=CDS:Os01t0101700-00;Parent=transcript:Os01t0101700-00;protein_id=Os01t0101700-00 +1 irgsp exon 83724 84095 . + . Parent=transcript:Os01t0101700-00;Name=Os01t0101700-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101700-00.exon2;rank=2 +1 irgsp three_prime_UTR 83865 84095 . + . Parent=transcript:Os01t0101700-00 +### +1 irgsp gene 85337 88844 . + . ID=gene:Os01g0101800;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0101800-01);gene_id=Os01g0101800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 85337 88844 . + . ID=transcript:Os01t0101800-01;Parent=gene:Os01g0101800;biotype=protein_coding;transcript_id=Os01t0101800-01 +1 irgsp five_prime_UTR 85337 85378 . + . Parent=transcript:Os01t0101800-01 +1 irgsp exon 85337 85600 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0101800-01.exon1;rank=1 +1 irgsp CDS 85379 85600 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 85737 85830 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0101800-01.exon2;rank=2 +1 irgsp CDS 85737 85830 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 85935 86086 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0101800-01.exon3;rank=3 +1 irgsp CDS 85935 86086 . + 2 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 86212 86299 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0101800-01.exon4;rank=4 +1 irgsp CDS 86212 86299 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 86399 87681 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0101800-01.exon5;rank=5 +1 irgsp CDS 86399 87681 . + 2 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 88291 88398 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0101800-01.exon6;rank=6 +1 irgsp CDS 88291 88398 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp CDS 88500 88583 . + 0 ID=CDS:Os01t0101800-01;Parent=transcript:Os01t0101800-01;protein_id=Os01t0101800-01 +1 irgsp exon 88500 88844 . + . Parent=transcript:Os01t0101800-01;Name=Os01t0101800-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0101800-01.exon7;rank=7 +1 irgsp three_prime_UTR 88584 88844 . + . Parent=transcript:Os01t0101800-01 +### +1 irgsp gene 86211 88583 . - . ID=gene:Os01g0101850;biotype=protein_coding;description=Hypothetical protein. (Os01t0101850-00);gene_id=Os01g0101850;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 86211 88583 . - . ID=transcript:Os01t0101850-00;Parent=gene:Os01g0101850;biotype=protein_coding;transcript_id=Os01t0101850-00 +1 irgsp exon 86211 86277 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon4;rank=4 +1 irgsp three_prime_UTR 86211 86277 . - . Parent=transcript:Os01t0101850-00 +1 irgsp three_prime_UTR 86384 87326 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 86384 87694 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon3;rank=3 +1 irgsp CDS 87327 87662 . - 0 ID=CDS:Os01t0101850-00;Parent=transcript:Os01t0101850-00;protein_id=Os01t0101850-00 +1 irgsp five_prime_UTR 87663 87694 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 88308 88396 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon2;rank=2 +1 irgsp five_prime_UTR 88308 88396 . - . Parent=transcript:Os01t0101850-00 +1 irgsp exon 88496 88583 . - . Parent=transcript:Os01t0101850-00;Name=Os01t0101850-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101850-00.exon1;rank=1 +1 irgsp five_prime_UTR 88496 88583 . - . Parent=transcript:Os01t0101850-00 +### +1 irgsp gene 88883 89228 . - . ID=gene:Os01g0101900;biotype=protein_coding;description=Similar to OSIGBa0075F02.3 protein. (Os01t0101900-00);gene_id=Os01g0101900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 88883 89228 . - . ID=transcript:Os01t0101900-00;Parent=gene:Os01g0101900;biotype=protein_coding;transcript_id=Os01t0101900-00 +1 irgsp three_prime_UTR 88883 88985 . - . Parent=transcript:Os01t0101900-00 +1 irgsp exon 88883 89228 . - . Parent=transcript:Os01t0101900-00;Name=Os01t0101900-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0101900-00.exon1;rank=1 +1 irgsp CDS 88986 89204 . - 0 ID=CDS:Os01t0101900-00;Parent=transcript:Os01t0101900-00;protein_id=Os01t0101900-00 +1 irgsp five_prime_UTR 89205 89228 . - . Parent=transcript:Os01t0101900-00 +### +1 irgsp gene 89763 91465 . - . ID=gene:Os01g0102000;Name=NON-SPECIFIC PHOSPHOLIPASE C5;biotype=protein_coding;description=Phosphoesterase family protein. (Os01t0102000-01);gene_id=Os01g0102000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 89763 91465 . - . ID=transcript:Os01t0102000-01;Parent=gene:Os01g0102000;biotype=protein_coding;transcript_id=Os01t0102000-01 +1 irgsp three_prime_UTR 89763 89824 . - . Parent=transcript:Os01t0102000-01 +1 irgsp exon 89763 91465 . - . Parent=transcript:Os01t0102000-01;Name=Os01t0102000-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102000-01.exon1;rank=1 +1 irgsp CDS 89825 91411 . - 0 ID=CDS:Os01t0102000-01;Parent=transcript:Os01t0102000-01;protein_id=Os01t0102000-01 +1 irgsp five_prime_UTR 91412 91465 . - . Parent=transcript:Os01t0102000-01 +### +1 irgsp gene 134300 135439 . + . ID=gene:Os01g0102300;Name=OsTLP27;biotype=protein_coding;description=Thylakoid lumen protein%2C Photosynthesis and chloroplast development (Os01t0102300-01);gene_id=Os01g0102300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 134300 135439 . + . ID=transcript:Os01t0102300-01;Parent=gene:Os01g0102300;biotype=protein_coding;transcript_id=Os01t0102300-01 +1 irgsp five_prime_UTR 134300 134310 . + . Parent=transcript:Os01t0102300-01 +1 irgsp exon 134300 134615 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102300-01.exon1;rank=1 +1 irgsp CDS 134311 134615 . + 0 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp exon 134698 134824 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102300-01.exon2;rank=2 +1 irgsp CDS 134698 134824 . + 1 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp CDS 134912 135253 . + 0 ID=CDS:Os01t0102300-01;Parent=transcript:Os01t0102300-01;protein_id=Os01t0102300-01 +1 irgsp exon 134912 135439 . + . Parent=transcript:Os01t0102300-01;Name=Os01t0102300-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102300-01.exon3;rank=3 +1 irgsp three_prime_UTR 135254 135439 . + . Parent=transcript:Os01t0102300-01 +### +1 irgsp gene 139826 141555 . + . ID=gene:Os01g0102400;Name=HAP5H SUBUNIT OF CCAAT-BOX BINDING COMPLEX;biotype=protein_coding;description=Histone-fold domain containing protein. (Os01t0102400-01);gene_id=Os01g0102400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 139826 141555 . + . ID=transcript:Os01t0102400-01;Parent=gene:Os01g0102400;biotype=protein_coding;transcript_id=Os01t0102400-01 +1 irgsp exon 139826 139906 . + . Parent=transcript:Os01t0102400-01;Name=Os01t0102400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102400-01.exon1;rank=1 +1 irgsp five_prime_UTR 139826 139906 . + . Parent=transcript:Os01t0102400-01 +1 irgsp five_prime_UTR 140120 140149 . + . Parent=transcript:Os01t0102400-01 +1 irgsp exon 140120 141555 . + . Parent=transcript:Os01t0102400-01;Name=Os01t0102400-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102400-01.exon2;rank=2 +1 irgsp CDS 140150 141415 . + 0 ID=CDS:Os01t0102400-01;Parent=transcript:Os01t0102400-01;protein_id=Os01t0102400-01 +1 irgsp three_prime_UTR 141416 141555 . + . Parent=transcript:Os01t0102400-01 +### +1 irgsp gene 141959 144554 . + . ID=gene:Os01g0102500;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0102500-01);gene_id=Os01g0102500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 141959 144554 . + . ID=transcript:Os01t0102500-01;Parent=gene:Os01g0102500;biotype=protein_coding;transcript_id=Os01t0102500-01 +1 irgsp five_prime_UTR 141959 142083 . + . Parent=transcript:Os01t0102500-01 +1 irgsp exon 141959 142631 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102500-01.exon1;rank=1 +1 irgsp CDS 142084 142631 . + 0 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143191 143431 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102500-01.exon2;rank=2 +1 irgsp CDS 143191 143431 . + 1 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143563 143680 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102500-01.exon3;rank=3 +1 irgsp CDS 143563 143680 . + 0 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp CDS 143817 143908 . + 2 ID=CDS:Os01t0102500-01;Parent=transcript:Os01t0102500-01;protein_id=Os01t0102500-01 +1 irgsp exon 143817 144554 . + . Parent=transcript:Os01t0102500-01;Name=Os01t0102500-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0102500-01.exon4;rank=4 +1 irgsp three_prime_UTR 143909 144554 . + . Parent=transcript:Os01t0102500-01 +### +1 irgsp gene 145603 147847 . + . ID=gene:Os01g0102600;Name=Shikimate kinase 4;biotype=protein_coding;description=Shikimate kinase domain containing protein. (Os01t0102600-01)%3BSimilar to shikimate kinase family protein. (Os01t0102600-02);gene_id=Os01g0102600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 145603 147847 . + . ID=transcript:Os01t0102600-01;Parent=gene:Os01g0102600;biotype=protein_coding;transcript_id=Os01t0102600-01 +1 irgsp five_prime_UTR 145603 145644 . + . Parent=transcript:Os01t0102600-01 +1 irgsp exon 145603 145786 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102600-01.exon1;rank=1 +1 irgsp CDS 145645 145786 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 145905 145951 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon2;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-01.exon2;rank=2 +1 irgsp CDS 145905 145951 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146028 146082 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon3;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102600-01.exon3;rank=3 +1 irgsp CDS 146028 146082 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146179 146339 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon4;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-01.exon4;rank=4 +1 irgsp CDS 146179 146339 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146450 146532 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon5;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0102600-01.exon5;rank=5 +1 irgsp CDS 146450 146532 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 146611 146719 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon6;constitutive=0;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102600-01.exon6;rank=6 +1 irgsp CDS 146611 146719 . + 1 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147106 147184 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon7;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102600-01.exon7;rank=7 +1 irgsp CDS 147106 147184 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147311 147375 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-02.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-02.exon2;rank=8 +1 irgsp CDS 147311 147375 . + 2 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp CDS 147507 147575 . + 0 ID=CDS:Os01t0102600-01;Parent=transcript:Os01t0102600-01;protein_id=Os01t0102600-01 +1 irgsp exon 147507 147847 . + . Parent=transcript:Os01t0102600-01;Name=Os01t0102600-01.exon9;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102600-01.exon9;rank=9 +1 irgsp three_prime_UTR 147576 147847 . + . Parent=transcript:Os01t0102600-01 +1 irgsp mRNA 147104 147805 . + . ID=transcript:Os01t0102600-02;Parent=gene:Os01g0102600;biotype=protein_coding;transcript_id=Os01t0102600-02 +1 irgsp five_prime_UTR 147104 147105 . + . Parent=transcript:Os01t0102600-02 +1 irgsp exon 147104 147184 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102600-02.exon1;rank=1 +1 irgsp CDS 147106 147184 . + 0 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp exon 147311 147375 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102600-02.exon2;rank=2 +1 irgsp CDS 147311 147375 . + 2 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp CDS 147507 147575 . + 0 ID=CDS:Os01t0102600-02;Parent=transcript:Os01t0102600-02;protein_id=Os01t0102600-02 +1 irgsp exon 147507 147805 . + . Parent=transcript:Os01t0102600-02;Name=Os01t0102600-02.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102600-02.exon3;rank=3 +1 irgsp three_prime_UTR 147576 147805 . + . Parent=transcript:Os01t0102600-02 +### +1 irgsp gene 148085 150568 . + . ID=gene:Os01g0102700;biotype=protein_coding;description=Translocon-associated beta family protein. (Os01t0102700-01);gene_id=Os01g0102700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 148085 150568 . + . ID=transcript:Os01t0102700-01;Parent=gene:Os01g0102700;biotype=protein_coding;transcript_id=Os01t0102700-01 +1 irgsp five_prime_UTR 148085 148146 . + . Parent=transcript:Os01t0102700-01 +1 irgsp exon 148085 148313 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102700-01.exon1;rank=1 +1 irgsp CDS 148147 148313 . + 0 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149450 149548 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0102700-01.exon2;rank=2 +1 irgsp CDS 149450 149548 . + 1 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149634 149742 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0102700-01.exon3;rank=3 +1 irgsp CDS 149634 149742 . + 1 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 149856 149931 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon4;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0102700-01.exon4;rank=4 +1 irgsp CDS 149856 149931 . + 0 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp CDS 150152 150318 . + 2 ID=CDS:Os01t0102700-01;Parent=transcript:Os01t0102700-01;protein_id=Os01t0102700-01 +1 irgsp exon 150152 150568 . + . Parent=transcript:Os01t0102700-01;Name=Os01t0102700-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0102700-01.exon5;rank=5 +1 irgsp three_prime_UTR 150319 150568 . + . Parent=transcript:Os01t0102700-01 +### +1 irgsp gene 152853 156449 . + . ID=gene:Os01g0102800;Name=Cockayne syndrome WD-repeat protein;biotype=protein_coding;description=Similar to chromatin remodeling complex subunit. (Os01t0102800-01);gene_id=Os01g0102800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 152853 156449 . + . ID=transcript:Os01t0102800-01;Parent=gene:Os01g0102800;biotype=protein_coding;transcript_id=Os01t0102800-01 +1 irgsp five_prime_UTR 152853 152853 . + . Parent=transcript:Os01t0102800-01 +1 irgsp exon 152853 153025 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon1;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0102800-01.exon1;rank=1 +1 irgsp CDS 152854 153025 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 153178 154646 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0102800-01.exon2;rank=2 +1 irgsp CDS 153178 154646 . + 2 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 155010 155450 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0102800-01.exon3;rank=3 +1 irgsp CDS 155010 155450 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp CDS 155543 156214 . + 0 ID=CDS:Os01t0102800-01;Parent=transcript:Os01t0102800-01;protein_id=Os01t0102800-01 +1 irgsp exon 155543 156449 . + . Parent=transcript:Os01t0102800-01;Name=Os01t0102800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0102800-01.exon4;rank=4 +1 irgsp three_prime_UTR 156215 156449 . + . Parent=transcript:Os01t0102800-01 +### +1 irgsp gene 164577 168921 . + . ID=gene:Os01g0102850;biotype=protein_coding;description=Similar to nitrilase 2. (Os01t0102850-00);gene_id=Os01g0102850;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 164577 168921 . + . ID=transcript:Os01t0102850-00;Parent=gene:Os01g0102850;biotype=protein_coding;transcript_id=Os01t0102850-00 +1 irgsp exon 164577 164905 . + . Parent=transcript:Os01t0102850-00;Name=Os01t0102850-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0102850-00.exon1;rank=1 +1 irgsp five_prime_UTR 164577 164905 . + . Parent=transcript:Os01t0102850-00 +1 irgsp five_prime_UTR 168499 168804 . + . Parent=transcript:Os01t0102850-00 +1 irgsp exon 168499 168921 . + . Parent=transcript:Os01t0102850-00;Name=Os01t0102850-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0102850-00.exon2;rank=2 +1 irgsp CDS 168805 168921 . + 0 ID=CDS:Os01t0102850-00;Parent=transcript:Os01t0102850-00;protein_id=Os01t0102850-00 +### +1 irgsp gene 169390 170316 . - . ID=gene:Os01g0102900;Name=LIGHT-REGULATED GENE 1;biotype=protein_coding;description=Light-regulated protein%2C Regulation of light-dependent attachment of LEAF-TYPE FERREDOXIN-NADP+ OXIDOREDUCTASE (LFNR) to the thylakoid membrane (Os01t0102900-01);gene_id=Os01g0102900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 169390 170316 . - . ID=transcript:Os01t0102900-01;Parent=gene:Os01g0102900;biotype=protein_coding;transcript_id=Os01t0102900-01 +1 irgsp three_prime_UTR 169390 169598 . - . Parent=transcript:Os01t0102900-01 +1 irgsp exon 169390 169656 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0102900-01.exon3;rank=3 +1 irgsp CDS 169599 169656 . - 1 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp exon 169751 169909 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=2;exon_id=Os01t0102900-01.exon2;rank=2 +1 irgsp CDS 169751 169909 . - 1 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp CDS 170091 170260 . - 0 ID=CDS:Os01t0102900-01;Parent=transcript:Os01t0102900-01;protein_id=Os01t0102900-01 +1 irgsp exon 170091 170316 . - . Parent=transcript:Os01t0102900-01;Name=Os01t0102900-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0102900-01.exon1;rank=1 +1 irgsp five_prime_UTR 170261 170316 . - . Parent=transcript:Os01t0102900-01 +### +1 irgsp gene 170798 173144 . - . ID=gene:Os01g0103000;biotype=protein_coding;description=Snf7 family protein. (Os01t0103000-01);gene_id=Os01g0103000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 170798 173144 . - . ID=transcript:Os01t0103000-01;Parent=gene:Os01g0103000;biotype=protein_coding;transcript_id=Os01t0103000-01 +1 irgsp three_prime_UTR 170798 171044 . - . Parent=transcript:Os01t0103000-01 +1 irgsp exon 170798 171095 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0103000-01.exon7;rank=7 +1 irgsp CDS 171045 171095 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 171406 171554 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103000-01.exon6;rank=6 +1 irgsp CDS 171406 171554 . - 2 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 171764 171875 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103000-01.exon5;rank=5 +1 irgsp CDS 171764 171875 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172398 172469 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103000-01.exon4;rank=4 +1 irgsp CDS 172398 172469 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172578 172671 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0103000-01.exon3;rank=3 +1 irgsp CDS 172578 172671 . - 1 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 172770 172921 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0103000-01.exon2;rank=2 +1 irgsp CDS 172770 172921 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp CDS 173004 173072 . - 0 ID=CDS:Os01t0103000-01;Parent=transcript:Os01t0103000-01;protein_id=Os01t0103000-01 +1 irgsp exon 173004 173144 . - . Parent=transcript:Os01t0103000-01;Name=Os01t0103000-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103000-01.exon1;rank=1 +1 irgsp five_prime_UTR 173073 173144 . - . Parent=transcript:Os01t0103000-01 +### +1 irgsp gene 178607 180575 . + . ID=gene:Os01g0103100;biotype=protein_coding;description=TGF-beta receptor%2C type I/II extracellular region family protein. (Os01t0103100-01)%3BSimilar to predicted protein. (Os01t0103100-02);gene_id=Os01g0103100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 178607 180548 . + . ID=transcript:Os01t0103100-01;Parent=gene:Os01g0103100;biotype=protein_coding;transcript_id=Os01t0103100-01 +1 irgsp five_prime_UTR 178607 178641 . + . Parent=transcript:Os01t0103100-01 +1 irgsp exon 178607 180548 . + . Parent=transcript:Os01t0103100-01;Name=Os01t0103100-01.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103100-01.exon1;rank=1 +1 irgsp CDS 178642 180462 . + 0 ID=CDS:Os01t0103100-01;Parent=transcript:Os01t0103100-01;protein_id=Os01t0103100-01 +1 irgsp three_prime_UTR 180463 180548 . + . Parent=transcript:Os01t0103100-01 +1 irgsp mRNA 178652 180575 . + . ID=transcript:Os01t0103100-02;Parent=gene:Os01g0103100;biotype=protein_coding;transcript_id=Os01t0103100-02 +1 irgsp five_prime_UTR 178652 178677 . + . Parent=transcript:Os01t0103100-02 +1 irgsp exon 178652 180575 . + . Parent=transcript:Os01t0103100-02;Name=Os01t0103100-02.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103100-02.exon1;rank=1 +1 irgsp CDS 178678 180462 . + 0 ID=CDS:Os01t0103100-02;Parent=transcript:Os01t0103100-02;protein_id=Os01t0103100-02 +1 irgsp three_prime_UTR 180463 180575 . + . Parent=transcript:Os01t0103100-02 +### +1 irgsp gene 178815 180433 . - . ID=gene:Os01g0103075;biotype=protein_coding;description=Hypothetical protein. (Os01t0103075-00);gene_id=Os01g0103075;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 178815 180433 . - . ID=transcript:Os01t0103075-00;Parent=gene:Os01g0103075;biotype=protein_coding;transcript_id=Os01t0103075-00 +1 irgsp three_prime_UTR 178815 179511 . - . Parent=transcript:Os01t0103075-00 +1 irgsp exon 178815 180433 . - . Parent=transcript:Os01t0103075-00;Name=Os01t0103075-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103075-00.exon1;rank=1 +1 irgsp CDS 179512 180054 . - 0 ID=CDS:Os01t0103075-00;Parent=transcript:Os01t0103075-00;protein_id=Os01t0103075-00 +1 irgsp five_prime_UTR 180055 180433 . - . Parent=transcript:Os01t0103075-00 +### +1 Ensembl_Plants ncRNA_gene 182074 182154 . + . ID=gene:ENSRNA049442722;Name=tRNA-Leu;biotype=tRNA;description=tRNA-Leu for anticodon AAG;gene_id=ENSRNA049442722;logic_name=trnascan_gene +1 Ensembl_Plants tRNA 182074 182154 . + . ID=transcript:ENSRNA049442722-T1;Parent=gene:ENSRNA049442722;biotype=tRNA;transcript_id=ENSRNA049442722-T1 +1 Ensembl_Plants exon 182074 182154 . + . Parent=transcript:ENSRNA049442722-T1;Name=ENSRNA049442722-E1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=ENSRNA049442722-E1;rank=1 +### +1 irgsp gene 185189 185828 . - . ID=gene:Os01g0103400;biotype=protein_coding;description=Hypothetical gene. (Os01t0103400-01);gene_id=Os01g0103400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 185189 185828 . - . ID=transcript:Os01t0103400-01;Parent=gene:Os01g0103400;biotype=protein_coding;transcript_id=Os01t0103400-01 +1 irgsp three_prime_UTR 185189 185434 . - . Parent=transcript:Os01t0103400-01 +1 irgsp exon 185189 185828 . - . Parent=transcript:Os01t0103400-01;Name=Os01t0103400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103400-01.exon1;rank=1 +1 irgsp CDS 185435 185827 . - 0 ID=CDS:Os01t0103400-01;Parent=transcript:Os01t0103400-01;protein_id=Os01t0103400-01 +1 irgsp five_prime_UTR 185828 185828 . - . Parent=transcript:Os01t0103400-01 +### +1 irgsp repeat_region 186000 186100 . + . ID=fakeRepeat2 +### +1 irgsp gene 186250 190904 . - . ID=gene:Os01g0103600;biotype=protein_coding;description=Similar to sterol-8%2C7-isomerase. (Os01t0103600-01)%3BEmopamil-binding family protein. (Os01t0103600-02);gene_id=Os01g0103600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 186250 190262 . - . ID=transcript:Os01t0103600-02;Parent=gene:Os01g0103600;biotype=protein_coding;transcript_id=Os01t0103600-02 +1 irgsp three_prime_UTR 186250 186515 . - . Parent=transcript:Os01t0103600-02 +1 irgsp exon 186250 186771 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0103600-02.exon4;rank=4 +1 irgsp CDS 186516 186771 . - 1 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 189607 189715 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0103600-02.exon3;rank=3 +1 irgsp CDS 189607 189715 . - 2 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 189841 189990 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103600-02.exon2;rank=2 +1 irgsp CDS 189841 189990 . - 2 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp CDS 190087 190231 . - 0 ID=CDS:Os01t0103600-02;Parent=transcript:Os01t0103600-02;protein_id=Os01t0103600-02 +1 irgsp exon 190087 190262 . - . Parent=transcript:Os01t0103600-02;Name=Os01t0103600-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0103600-02.exon1;rank=1 +1 irgsp five_prime_UTR 190232 190262 . - . Parent=transcript:Os01t0103600-02 +1 irgsp mRNA 187345 190904 . - . ID=transcript:Os01t0103600-01;Parent=gene:Os01g0103600;biotype=protein_coding;transcript_id=Os01t0103600-01 +1 irgsp three_prime_UTR 187345 189395 . - . Parent=transcript:Os01t0103600-01 +1 irgsp exon 187345 189715 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-01.exon3;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103600-01.exon3;rank=3 +1 irgsp CDS 189396 189715 . - 2 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp exon 189841 189990 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-02.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103600-02.exon2;rank=2 +1 irgsp CDS 189841 189990 . - 2 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp CDS 190087 190231 . - 0 ID=CDS:Os01t0103600-01;Parent=transcript:Os01t0103600-01;protein_id=Os01t0103600-01 +1 irgsp exon 190087 190904 . - . Parent=transcript:Os01t0103600-01;Name=Os01t0103600-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0103600-01.exon1;rank=1 +1 irgsp five_prime_UTR 190232 190904 . - . Parent=transcript:Os01t0103600-01 +### +1 irgsp gene 187545 188586 . + . ID=gene:Os01g0103650;biotype=protein_coding;description=Hypothetical gene. (Os01t0103650-00);gene_id=Os01g0103650;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 187545 188586 . + . ID=transcript:Os01t0103650-00;Parent=gene:Os01g0103650;biotype=protein_coding;transcript_id=Os01t0103650-00 +1 irgsp five_prime_UTR 187545 187546 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 187545 188020 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon1;rank=1 +1 irgsp CDS 187547 187768 . + 0 ID=CDS:Os01t0103650-00;Parent=transcript:Os01t0103650-00;protein_id=Os01t0103650-00 +1 irgsp three_prime_UTR 187769 188020 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 188060 188385 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon2;rank=2 +1 irgsp three_prime_UTR 188060 188385 . + . Parent=transcript:Os01t0103650-00 +1 irgsp exon 188455 188586 . + . Parent=transcript:Os01t0103650-00;Name=Os01t0103650-00.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103650-00.exon3;rank=3 +1 irgsp three_prime_UTR 188455 188586 . + . Parent=transcript:Os01t0103650-00 +### +1 irgsp gene 191037 196287 . + . ID=gene:Os01g0103700;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0103700-01);gene_id=Os01g0103700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 191037 196287 . + . ID=transcript:Os01t0103700-01;Parent=gene:Os01g0103700;biotype=protein_coding;transcript_id=Os01t0103700-01 +1 irgsp exon 191037 191161 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103700-01.exon1;rank=1 +1 irgsp five_prime_UTR 191037 191161 . + . Parent=transcript:Os01t0103700-01 +1 irgsp five_prime_UTR 191625 191693 . + . Parent=transcript:Os01t0103700-01 +1 irgsp exon 191625 191705 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103700-01.exon2;rank=2 +1 irgsp CDS 191694 191705 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 192399 192506 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103700-01.exon3;rank=3 +1 irgsp CDS 192399 192506 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 192958 193161 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103700-01.exon4;rank=4 +1 irgsp CDS 192958 193161 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 193248 193356 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103700-01.exon5;rank=5 +1 irgsp CDS 193248 193356 . + 0 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp CDS 193434 193507 . + 2 ID=CDS:Os01t0103700-01;Parent=transcript:Os01t0103700-01;protein_id=Os01t0103700-01 +1 irgsp exon 193434 196287 . + . Parent=transcript:Os01t0103700-01;Name=Os01t0103700-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103700-01.exon6;rank=6 +1 irgsp three_prime_UTR 193508 196287 . + . Parent=transcript:Os01t0103700-01 +### +1 irgsp gene 197647 200803 . + . ID=gene:Os01g0103800;Name=OsDW1-01g;biotype=protein_coding;description=Conserved hypothetical protein. (Os01t0103800-01);gene_id=Os01g0103800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 197647 200803 . + . ID=transcript:Os01t0103800-01;Parent=gene:Os01g0103800;biotype=protein_coding;transcript_id=Os01t0103800-01 +1 irgsp exon 197647 197838 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0103800-01.exon1;rank=1 +1 irgsp five_prime_UTR 197647 197838 . + . Parent=transcript:Os01t0103800-01 +1 irgsp five_prime_UTR 198034 198129 . + . Parent=transcript:Os01t0103800-01 +1 irgsp exon 198034 198225 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103800-01.exon2;rank=2 +1 irgsp CDS 198130 198225 . + 0 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp exon 198830 200036 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103800-01.exon3;rank=3 +1 irgsp CDS 198830 200036 . + 0 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp CDS 200253 200479 . + 2 ID=CDS:Os01t0103800-01;Parent=transcript:Os01t0103800-01;protein_id=Os01t0103800-01 +1 irgsp exon 200253 200803 . + . Parent=transcript:Os01t0103800-01;Name=Os01t0103800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0103800-01.exon4;rank=4 +1 irgsp three_prime_UTR 200480 200803 . + . Parent=transcript:Os01t0103800-01 +### +1 irgsp gene 201944 206202 . + . ID=gene:Os01g0103900;biotype=protein_coding;description=Polynucleotidyl transferase%2C Ribonuclease H fold domain containing protein. (Os01t0103900-01);gene_id=Os01g0103900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 201944 206202 . + . ID=transcript:Os01t0103900-01;Parent=gene:Os01g0103900;biotype=protein_coding;transcript_id=Os01t0103900-01 +1 irgsp five_prime_UTR 201944 202041 . + . Parent=transcript:Os01t0103900-01 +1 irgsp exon 201944 202110 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0103900-01.exon1;rank=1 +1 irgsp CDS 202042 202110 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 202252 202359 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103900-01.exon2;rank=2 +1 irgsp CDS 202252 202359 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203007 203127 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon3;rank=3 +1 irgsp CDS 203007 203127 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203302 203429 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon4;rank=4 +1 irgsp CDS 203302 203429 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203511 203658 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon5;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon5;rank=5 +1 irgsp CDS 203511 203658 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 203760 203938 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon6;rank=6 +1 irgsp CDS 203760 203938 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204203 204440 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon7;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0103900-01.exon7;rank=7 +1 irgsp CDS 204203 204440 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204543 204635 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon8;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0103900-01.exon8;rank=8 +1 irgsp CDS 204543 204635 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 204730 204875 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0103900-01.exon9;rank=9 +1 irgsp CDS 204730 204875 . + 2 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205042 205149 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0103900-01.exon10;rank=10 +1 irgsp CDS 205042 205149 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205290 205378 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0103900-01.exon11;rank=11 +1 irgsp CDS 205290 205378 . + 0 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp CDS 205534 205543 . + 1 ID=CDS:Os01t0103900-01;Parent=transcript:Os01t0103900-01;protein_id=Os01t0103900-01 +1 irgsp exon 205534 206202 . + . Parent=transcript:Os01t0103900-01;Name=Os01t0103900-01.exon12;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0103900-01.exon12;rank=12 +1 irgsp three_prime_UTR 205544 206202 . + . Parent=transcript:Os01t0103900-01 +### +1 irgsp gene 206131 209606 . - . ID=gene:Os01g0104000;biotype=protein_coding;description=C-type lectin domain containing protein. (Os01t0104000-01)%3BSimilar to predicted protein. (Os01t0104000-02);gene_id=Os01g0104000;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 206131 209581 . - . ID=transcript:Os01t0104000-02;Parent=gene:Os01g0104000;biotype=protein_coding;transcript_id=Os01t0104000-02 +1 irgsp three_prime_UTR 206131 206449 . - . Parent=transcript:Os01t0104000-02 +1 irgsp exon 206131 207029 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0104000-02.exon4;rank=4 +1 irgsp CDS 206450 207029 . - 1 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 207706 208273 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0104000-02.exon3;rank=3 +1 irgsp CDS 207706 208273 . - 2 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 208408 208836 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104000-01.exon2;rank=2 +1 irgsp CDS 208408 208836 . - 2 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp CDS 209438 209525 . - 0 ID=CDS:Os01t0104000-02;Parent=transcript:Os01t0104000-02;protein_id=Os01t0104000-02 +1 irgsp exon 209438 209581 . - . Parent=transcript:Os01t0104000-02;Name=Os01t0104000-02.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104000-02.exon1;rank=1 +1 irgsp five_prime_UTR 209526 209581 . - . Parent=transcript:Os01t0104000-02 +1 irgsp mRNA 206134 209606 . - . ID=transcript:Os01t0104000-01;Parent=gene:Os01g0104000;biotype=protein_coding;transcript_id=Os01t0104000-01 +1 irgsp three_prime_UTR 206134 206449 . - . Parent=transcript:Os01t0104000-01 +1 irgsp exon 206134 207029 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0104000-01.exon4;rank=4 +1 irgsp CDS 206450 207029 . - 1 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 207706 208276 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon3;constitutive=0;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0104000-01.exon3;rank=3 +1 irgsp CDS 207706 208276 . - 2 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 208408 208836 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104000-01.exon2;rank=2 +1 irgsp CDS 208408 208836 . - 2 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp CDS 209438 209525 . - 0 ID=CDS:Os01t0104000-01;Parent=transcript:Os01t0104000-01;protein_id=Os01t0104000-01 +1 irgsp exon 209438 209606 . - . Parent=transcript:Os01t0104000-01;Name=Os01t0104000-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104000-01.exon1;rank=1 +1 irgsp five_prime_UTR 209526 209606 . - . Parent=transcript:Os01t0104000-01 +### +1 irgsp gene 209771 214173 . + . ID=gene:Os01g0104100;Name=cold-inducible%2C cold-inducible zinc finger protein;biotype=protein_coding;description=Similar to protein binding / zinc ion binding. (Os01t0104100-01)%3BSimilar to protein binding / zinc ion binding. (Os01t0104100-02);gene_id=Os01g0104100;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 209771 214173 . + . ID=transcript:Os01t0104100-01;Parent=gene:Os01g0104100;biotype=protein_coding;transcript_id=Os01t0104100-01 +1 irgsp exon 209771 209896 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon1;rank=1 +1 irgsp CDS 209771 209896 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 210244 210563 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon2;rank=2 +1 irgsp CDS 210244 210563 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 210659 210890 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon3;rank=3 +1 irgsp CDS 210659 210890 . + 1 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 211015 211160 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon4;rank=4 +1 irgsp CDS 211015 211160 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 212265 212352 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon5;rank=5 +1 irgsp CDS 212265 212352 . + 1 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 212433 212579 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon6;rank=6 +1 irgsp CDS 212433 212579 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 213490 213639 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon7;rank=7 +1 irgsp CDS 213490 213639 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp CDS 213741 213788 . + 0 ID=CDS:Os01t0104100-01;Parent=transcript:Os01t0104100-01;protein_id=Os01t0104100-01 +1 irgsp exon 213741 214173 . + . Parent=transcript:Os01t0104100-01;Name=Os01t0104100-01.exon8;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104100-01.exon8;rank=8 +1 irgsp three_prime_UTR 213789 214173 . + . Parent=transcript:Os01t0104100-01 +1 irgsp mRNA 209794 214147 . + . ID=transcript:Os01t0104100-02;Parent=gene:Os01g0104100;biotype=protein_coding;transcript_id=Os01t0104100-02 +1 irgsp five_prime_UTR 209794 209794 . + . Parent=transcript:Os01t0104100-02 +1 irgsp exon 209794 209896 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104100-02.exon1;rank=1 +1 irgsp CDS 209795 209896 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 210244 210563 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon2;rank=2 +1 irgsp CDS 210244 210563 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 210659 210890 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon3;rank=3 +1 irgsp CDS 210659 210890 . + 1 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 211015 211160 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon4;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104100-01.exon4;rank=4 +1 irgsp CDS 211015 211160 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 212265 212352 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104100-01.exon5;rank=5 +1 irgsp CDS 212265 212352 . + 1 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 212433 212579 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon6;rank=6 +1 irgsp CDS 212433 212579 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 213490 213639 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104100-01.exon7;rank=7 +1 irgsp CDS 213490 213639 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp CDS 213741 213788 . + 0 ID=CDS:Os01t0104100-02;Parent=transcript:Os01t0104100-02;protein_id=Os01t0104100-02 +1 irgsp exon 213741 214147 . + . Parent=transcript:Os01t0104100-02;Name=Os01t0104100-02.exon8;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104100-02.exon8;rank=8 +1 irgsp three_prime_UTR 213789 214147 . + . Parent=transcript:Os01t0104100-02 +### +1 irgsp gene 216212 217345 . + . ID=gene:Os01g0104200;Name=NAC DOMAIN-CONTAINING PROTEIN 16;biotype=protein_coding;description=No apical meristem (NAM) protein domain containing protein. (Os01t0104200-00);gene_id=Os01g0104200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 216212 217345 . + . ID=transcript:Os01t0104200-00;Parent=gene:Os01g0104200;biotype=protein_coding;transcript_id=Os01t0104200-00 +1 irgsp exon 216212 216769 . + . Parent=transcript:Os01t0104200-00;Name=Os01t0104200-00.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104200-00.exon1;rank=1 +1 irgsp CDS 216212 216769 . + 0 ID=CDS:Os01t0104200-00;Parent=transcript:Os01t0104200-00;protein_id=Os01t0104200-00 +1 irgsp exon 216884 217345 . + . Parent=transcript:Os01t0104200-00;Name=Os01t0104200-00.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104200-00.exon2;rank=2 +1 irgsp CDS 216884 217345 . + 0 ID=CDS:Os01t0104200-00;Parent=transcript:Os01t0104200-00;protein_id=Os01t0104200-00 +### +1 irgsp gene 226897 229301 . + . ID=gene:Os01g0104400;biotype=protein_coding;description=Ricin B-related lectin domain containing protein. (Os01t0104400-01)%3BRicin B-related lectin domain containing protein. (Os01t0104400-02)%3BRicin B-related lectin domain containing protein. (Os01t0104400-03);gene_id=Os01g0104400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 226897 229229 . + . ID=transcript:Os01t0104400-01;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-01 +1 irgsp five_prime_UTR 226897 227181 . + . Parent=transcript:Os01t0104400-01 +1 irgsp exon 226897 227634 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-01.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-01;Parent=transcript:Os01t0104400-01;protein_id=Os01t0104400-01 +1 irgsp exon 228930 229229 . + . Parent=transcript:Os01t0104400-01;Name=Os01t0104400-01.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-01.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229229 . + . Parent=transcript:Os01t0104400-01 +1 irgsp mRNA 227139 229301 . + . ID=transcript:Os01t0104400-02;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-02 +1 irgsp five_prime_UTR 227139 227181 . + . Parent=transcript:Os01t0104400-02 +1 irgsp exon 227139 227634 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-02.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-02;Parent=transcript:Os01t0104400-02;protein_id=Os01t0104400-02 +1 irgsp exon 228930 229301 . + . Parent=transcript:Os01t0104400-02;Name=Os01t0104400-02.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-02.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229301 . + . Parent=transcript:Os01t0104400-02 +1 irgsp mRNA 227179 229214 . + . ID=transcript:Os01t0104400-03;Parent=gene:Os01g0104400;biotype=protein_coding;transcript_id=Os01t0104400-03 +1 irgsp five_prime_UTR 227179 227181 . + . Parent=transcript:Os01t0104400-03 +1 irgsp exon 227179 227634 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104400-03.exon1;rank=1 +1 irgsp CDS 227182 227634 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 227742 227864 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104400-03.exon2;rank=2 +1 irgsp CDS 227742 227864 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 228557 228785 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon3;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104400-03.exon3;rank=3 +1 irgsp CDS 228557 228785 . + 0 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp CDS 228930 228931 . + 2 ID=CDS:Os01t0104400-03;Parent=transcript:Os01t0104400-03;protein_id=Os01t0104400-03 +1 irgsp exon 228930 229214 . + . Parent=transcript:Os01t0104400-03;Name=Os01t0104400-03.exon4;constitutive=0;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104400-03.exon4;rank=4 +1 irgsp three_prime_UTR 228932 229214 . + . Parent=transcript:Os01t0104400-03 +### +1 irgsp gene 241680 243440 . + . ID=gene:Os01g0104500;Name=NAC DOMAIN-CONTAINING PROTEIN 20;biotype=protein_coding;description=No apical meristem (NAM) protein domain containing protein. (Os01t0104500-01);gene_id=Os01g0104500;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 241680 243440 . + . ID=transcript:Os01t0104500-01;Parent=gene:Os01g0104500;biotype=protein_coding;transcript_id=Os01t0104500-01 +1 irgsp exon 241680 241702 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104500-01.exon1;rank=1 +1 irgsp five_prime_UTR 241680 241702 . + . Parent=transcript:Os01t0104500-01 +1 irgsp five_prime_UTR 241866 241907 . + . Parent=transcript:Os01t0104500-01 +1 irgsp exon 241866 242091 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104500-01.exon2;rank=2 +1 irgsp CDS 241908 242091 . + 0 ID=CDS:Os01t0104500-01;Parent=transcript:Os01t0104500-01;protein_id=Os01t0104500-01 +1 irgsp CDS 242199 242977 . + 2 ID=CDS:Os01t0104500-01;Parent=transcript:Os01t0104500-01;protein_id=Os01t0104500-01 +1 irgsp exon 242199 243440 . + . Parent=transcript:Os01t0104500-01;Name=Os01t0104500-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104500-01.exon3;rank=3 +1 irgsp three_prime_UTR 242978 243440 . + . Parent=transcript:Os01t0104500-01 +### +1 irgsp gene 248828 256872 . - . ID=gene:Os01g0104600;Name=DE-ETIOLATED1;biotype=protein_coding;description=Homolog of Arabidopsis DE-ETIOLATED1 (DET1)%2C Modulation of the ABA signaling pathway and ABA biosynthesis%2C Regulation of chlorophyll content (Os01t0104600-01)%3BSimilar to Light-mediated development protein DET1 (Deetiolated1 homolog) (tDET1) (High pigmentation protein 2) (Protein dark green). (Os01t0104600-02);gene_id=Os01g0104600;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 248828 256571 . - . ID=transcript:Os01t0104600-02;Parent=gene:Os01g0104600;biotype=protein_coding;transcript_id=Os01t0104600-02 +1 irgsp three_prime_UTR 248828 248970 . - . Parent=transcript:Os01t0104600-02 +1 irgsp exon 248828 249107 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104600-01.exon11;rank=11 +1 irgsp CDS 248971 249107 . - 2 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 249369 249468 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104600-01.exon10;rank=10 +1 irgsp CDS 249369 249468 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 249861 249956 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon9;rank=9 +1 irgsp CDS 249861 249956 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 250617 250781 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon8;rank=8 +1 irgsp CDS 250617 250781 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 250860 250940 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon7;rank=7 +1 irgsp CDS 250860 250940 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251026 251082 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon6;rank=6 +1 irgsp CDS 251026 251082 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251316 251384 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon5;rank=5 +1 irgsp CDS 251316 251384 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 251695 251790 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon4;rank=4 +1 irgsp CDS 251695 251790 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 255325 255553 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104600-01.exon3;rank=3 +1 irgsp CDS 255325 255553 . - 1 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 255674 256098 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104600-01.exon2;rank=2 +1 irgsp CDS 255674 256098 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp CDS 256361 256441 . - 0 ID=CDS:Os01t0104600-02;Parent=transcript:Os01t0104600-02;protein_id=Os01t0104600-02 +1 irgsp exon 256361 256571 . - . Parent=transcript:Os01t0104600-02;Name=Os01t0104600-02.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104600-02.exon1;rank=1 +1 irgsp five_prime_UTR 256442 256571 . - . Parent=transcript:Os01t0104600-02 +1 irgsp mRNA 248828 256872 . - . ID=transcript:Os01t0104600-01;Parent=gene:Os01g0104600;biotype=protein_coding;transcript_id=Os01t0104600-01 +1 irgsp three_prime_UTR 248828 248970 . - . Parent=transcript:Os01t0104600-01 +1 irgsp exon 248828 249107 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon11;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0104600-01.exon11;rank=11 +1 irgsp CDS 248971 249107 . - 2 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 249369 249468 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon10;constitutive=1;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104600-01.exon10;rank=10 +1 irgsp CDS 249369 249468 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 249861 249956 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon9;rank=9 +1 irgsp CDS 249861 249956 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 250617 250781 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon8;rank=8 +1 irgsp CDS 250617 250781 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 250860 250940 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon7;rank=7 +1 irgsp CDS 250860 250940 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251026 251082 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon6;rank=6 +1 irgsp CDS 251026 251082 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251316 251384 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon5;rank=5 +1 irgsp CDS 251316 251384 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 251695 251790 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104600-01.exon4;rank=4 +1 irgsp CDS 251695 251790 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 255325 255553 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104600-01.exon3;rank=3 +1 irgsp CDS 255325 255553 . - 1 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 255674 256098 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon2;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104600-01.exon2;rank=2 +1 irgsp CDS 255674 256098 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp CDS 256361 256441 . - 0 ID=CDS:Os01t0104600-01;Parent=transcript:Os01t0104600-01;protein_id=Os01t0104600-01 +1 irgsp exon 256361 256872 . - . Parent=transcript:Os01t0104600-01;Name=Os01t0104600-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104600-01.exon1;rank=1 +1 irgsp five_prime_UTR 256442 256872 . - . Parent=transcript:Os01t0104600-01 +### +1 irgsp gene 261530 268145 . + . ID=gene:Os01g0104800;biotype=protein_coding;description=Sas10/Utp3 family protein. (Os01t0104800-01)%3BHypothetical conserved gene. (Os01t0104800-02);gene_id=Os01g0104800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 261530 268145 . + . ID=transcript:Os01t0104800-01;Parent=gene:Os01g0104800;biotype=protein_coding;transcript_id=Os01t0104800-01 +1 irgsp five_prime_UTR 261530 261561 . + . Parent=transcript:Os01t0104800-01 +1 irgsp exon 261530 261661 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon1;constitutive=0;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0104800-01.exon1;rank=1 +1 irgsp CDS 261562 261661 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 261767 261805 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon2;constitutive=0;ensembl_end_phase=1;ensembl_phase=1;exon_id=Os01t0104800-01.exon2;rank=2 +1 irgsp CDS 261767 261805 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 261895 261941 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon3;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0104800-01.exon3;rank=3 +1 irgsp CDS 261895 261941 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 262582 262681 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon4;constitutive=0;ensembl_end_phase=1;ensembl_phase=0;exon_id=Os01t0104800-01.exon4;rank=4 +1 irgsp CDS 262582 262681 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 262925 263181 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon5;constitutive=0;ensembl_end_phase=0;ensembl_phase=1;exon_id=Os01t0104800-01.exon5;rank=5 +1 irgsp CDS 262925 263181 . + 2 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 263525 263640 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon6;constitutive=0;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon6;rank=6 +1 irgsp CDS 263525 263640 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 264014 264098 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon7;rank=7 +1 irgsp CDS 264014 264098 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265236 265415 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon8;rank=8 +1 irgsp CDS 265236 265415 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265506 265649 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon9;rank=9 +1 irgsp CDS 265506 265649 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265740 265817 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon10;rank=10 +1 irgsp CDS 265740 265817 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 265909 266045 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon11;rank=11 +1 irgsp CDS 265909 266045 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 266138 266246 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon12;rank=12 +1 irgsp CDS 266138 266246 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267237 267514 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon13;rank=13 +1 irgsp CDS 267237 267514 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267591 267657 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon14;rank=14 +1 irgsp CDS 267591 267657 . + 1 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267734 267802 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon15;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon15;rank=15 +1 irgsp CDS 267734 267802 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp CDS 267880 268011 . + 0 ID=CDS:Os01t0104800-01;Parent=transcript:Os01t0104800-01;protein_id=Os01t0104800-01 +1 irgsp exon 267880 268145 . + . Parent=transcript:Os01t0104800-01;Name=Os01t0104800-01.exon16;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104800-01.exon16;rank=16 +1 irgsp three_prime_UTR 268012 268145 . + . Parent=transcript:Os01t0104800-01 +1 irgsp mRNA 263523 268120 . + . ID=transcript:Os01t0104800-02;Parent=gene:Os01g0104800;biotype=protein_coding;transcript_id=Os01t0104800-02 +1 irgsp five_prime_UTR 263523 263524 . + . Parent=transcript:Os01t0104800-02 +1 irgsp exon 263523 263640 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-02.exon1;constitutive=0;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0104800-02.exon1;rank=1 +1 irgsp CDS 263525 263640 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 264014 264098 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon7;rank=2 +1 irgsp CDS 264014 264098 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265236 265415 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon8;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon8;rank=3 +1 irgsp CDS 265236 265415 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265506 265649 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon9;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon9;rank=4 +1 irgsp CDS 265506 265649 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265740 265817 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon10;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon10;rank=5 +1 irgsp CDS 265740 265817 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 265909 266045 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon11;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon11;rank=6 +1 irgsp CDS 265909 266045 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 266138 266246 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon12;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon12;rank=7 +1 irgsp CDS 266138 266246 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267237 267514 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon13;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0104800-01.exon13;rank=8 +1 irgsp CDS 267237 267514 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267591 267657 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon14;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0104800-01.exon14;rank=9 +1 irgsp CDS 267591 267657 . + 1 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267734 267802 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-01.exon15;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0104800-01.exon15;rank=10 +1 irgsp CDS 267734 267802 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp CDS 267880 268011 . + 0 ID=CDS:Os01t0104800-02;Parent=transcript:Os01t0104800-02;protein_id=Os01t0104800-02 +1 irgsp exon 267880 268120 . + . Parent=transcript:Os01t0104800-02;Name=Os01t0104800-02.exon11;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104800-02.exon11;rank=11 +1 irgsp three_prime_UTR 268012 268120 . + . Parent=transcript:Os01t0104800-02 +### +1 irgsp gene 270179 275084 . - . ID=gene:Os01g0104900;biotype=protein_coding;description=Transferase family protein. (Os01t0104900-01)%3BHypothetical conserved gene. (Os01t0104900-02);gene_id=Os01g0104900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 270179 275084 . - . ID=transcript:Os01t0104900-01;Parent=gene:Os01g0104900;biotype=protein_coding;transcript_id=Os01t0104900-01 +1 irgsp three_prime_UTR 270179 270355 . - . Parent=transcript:Os01t0104900-01 +1 irgsp exon 270179 271333 . - . Parent=transcript:Os01t0104900-01;Name=Os01t0104900-01.exon2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0104900-01.exon2;rank=2 +1 irgsp CDS 270356 271333 . - 0 ID=CDS:Os01t0104900-01;Parent=transcript:Os01t0104900-01;protein_id=Os01t0104900-01 +1 irgsp CDS 274529 274957 . - 0 ID=CDS:Os01t0104900-01;Parent=transcript:Os01t0104900-01;protein_id=Os01t0104900-01 +1 irgsp exon 274529 275084 . - . Parent=transcript:Os01t0104900-01;Name=Os01t0104900-01.exon1;constitutive=0;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0104900-01.exon1;rank=1 +1 irgsp five_prime_UTR 274958 275084 . - . Parent=transcript:Os01t0104900-01 +1 irgsp mRNA 270250 271518 . - . ID=transcript:Os01t0104900-02;Parent=gene:Os01g0104900;biotype=protein_coding;transcript_id=Os01t0104900-02 +1 irgsp three_prime_UTR 270250 270355 . - . Parent=transcript:Os01t0104900-02 +1 irgsp exon 270250 271333 . - . Parent=transcript:Os01t0104900-02;Name=Os01t0104900-02.exon2;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104900-02.exon2;rank=2 +1 irgsp CDS 270356 271309 . - 0 ID=CDS:Os01t0104900-02;Parent=transcript:Os01t0104900-02;protein_id=Os01t0104900-02 +1 irgsp five_prime_UTR 271310 271333 . - . Parent=transcript:Os01t0104900-02 +1 irgsp exon 271457 271518 . - . Parent=transcript:Os01t0104900-02;Name=Os01t0104900-02.exon1;constitutive=0;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0104900-02.exon1;rank=1 +1 irgsp five_prime_UTR 271457 271518 . - . Parent=transcript:Os01t0104900-02 +### +1 irgsp gene 284762 291892 . - . ID=gene:Os01g0105300;biotype=protein_coding;description=Similar to HAT family dimerisation domain containing protein%2C expressed. (Os01t0105300-01);gene_id=Os01g0105300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 284762 291892 . - . ID=transcript:Os01t0105300-01;Parent=gene:Os01g0105300;biotype=protein_coding;transcript_id=Os01t0105300-01 +1 irgsp three_prime_UTR 284762 284930 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 284762 287047 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon5;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon5;rank=5 +1 irgsp CDS 284931 285020 . - 0 ID=CDS:Os01t0105300-01;Parent=transcript:Os01t0105300-01;protein_id=Os01t0105300-01 +1 irgsp five_prime_UTR 285021 287047 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291398 291436 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon4;rank=4 +1 irgsp five_prime_UTR 291398 291436 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291520 291534 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon3;rank=3 +1 irgsp five_prime_UTR 291520 291534 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291678 291738 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon2;rank=2 +1 irgsp five_prime_UTR 291678 291738 . - . Parent=transcript:Os01t0105300-01 +1 irgsp exon 291838 291892 . - . Parent=transcript:Os01t0105300-01;Name=Os01t0105300-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105300-01.exon1;rank=1 +1 irgsp five_prime_UTR 291838 291892 . - . Parent=transcript:Os01t0105300-01 +### +1 irgsp gene 288372 292296 . + . ID=gene:Os01g0105400;biotype=protein_coding;description=Similar to Kinesin heavy chain. (Os01t0105400-01);gene_id=Os01g0105400;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 288372 292296 . + . ID=transcript:Os01t0105400-01;Parent=gene:Os01g0105400;biotype=protein_coding;transcript_id=Os01t0105400-01 +1 irgsp exon 288372 288846 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon1;rank=1 +1 irgsp five_prime_UTR 288372 288846 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 288950 289116 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon2;rank=2 +1 irgsp five_prime_UTR 288950 289116 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 289202 289572 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon3;rank=3 +1 irgsp five_prime_UTR 289202 289572 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 289661 289830 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon4;rank=4 +1 irgsp five_prime_UTR 289661 289830 . + . Parent=transcript:Os01t0105400-01 +1 irgsp five_prime_UTR 290395 290432 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 290395 290512 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon5;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0105400-01.exon5;rank=5 +1 irgsp CDS 290433 290512 . + 0 ID=CDS:Os01t0105400-01;Parent=transcript:Os01t0105400-01;protein_id=Os01t0105400-01 +1 irgsp CDS 291372 291558 . + 1 ID=CDS:Os01t0105400-01;Parent=transcript:Os01t0105400-01;protein_id=Os01t0105400-01 +1 irgsp exon 291372 291574 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon6;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0105400-01.exon6;rank=6 +1 irgsp three_prime_UTR 291559 291574 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 291648 291779 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon7;rank=7 +1 irgsp three_prime_UTR 291648 291779 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 291859 291948 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon8;rank=8 +1 irgsp three_prime_UTR 291859 291948 . + . Parent=transcript:Os01t0105400-01 +1 irgsp exon 292073 292296 . + . Parent=transcript:Os01t0105400-01;Name=Os01t0105400-01.exon9;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105400-01.exon9;rank=9 +1 irgsp three_prime_UTR 292073 292296 . + . Parent=transcript:Os01t0105400-01 +### +1 irgsp gene 303233 306736 . + . ID=gene:Os01g0105700;Name=basic helix-loop-helix protein 071;biotype=protein_coding;description=Basic helix-loop-helix dimerisation region bHLH domain containing protein. (Os01t0105700-01);gene_id=Os01g0105700;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 303233 306736 . + . ID=transcript:Os01t0105700-01;Parent=gene:Os01g0105700;biotype=protein_coding;transcript_id=Os01t0105700-01 +1 irgsp five_prime_UTR 303233 303328 . + . Parent=transcript:Os01t0105700-01 +1 irgsp exon 303233 303471 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0105700-01.exon1;rank=1 +1 irgsp CDS 303329 303471 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 303981 304509 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0105700-01.exon2;rank=2 +1 irgsp CDS 303981 304509 . + 1 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305572 305718 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon3;rank=3 +1 irgsp CDS 305572 305718 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305834 305899 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon4;rank=4 +1 irgsp CDS 305834 305899 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 305993 306058 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon5;rank=5 +1 irgsp CDS 305993 306058 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 306171 306245 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon6;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105700-01.exon6;rank=6 +1 irgsp CDS 306171 306245 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp CDS 306353 306493 . + 0 ID=CDS:Os01t0105700-01;Parent=transcript:Os01t0105700-01;protein_id=Os01t0105700-01 +1 irgsp exon 306353 306736 . + . Parent=transcript:Os01t0105700-01;Name=Os01t0105700-01.exon7;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0105700-01.exon7;rank=7 +1 irgsp three_prime_UTR 306494 306736 . + . Parent=transcript:Os01t0105700-01 +### +1 irgsp gene 306871 308842 . - . ID=gene:Os01g0105800;Name=IRON-SULFUR CLUSTER PROTEIN 9;biotype=protein_coding;description=Similar to Iron sulfur assembly protein 1. (Os01t0105800-01);gene_id=Os01g0105800;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 306871 308842 . - . ID=transcript:Os01t0105800-01;Parent=gene:Os01g0105800;biotype=protein_coding;transcript_id=Os01t0105800-01 +1 irgsp three_prime_UTR 306871 307123 . - . Parent=transcript:Os01t0105800-01 +1 irgsp exon 306871 307217 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon4;constitutive=1;ensembl_end_phase=-1;ensembl_phase=2;exon_id=Os01t0105800-01.exon4;rank=4 +1 irgsp CDS 307124 307217 . - 1 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp exon 307296 307413 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon3;constitutive=1;ensembl_end_phase=2;ensembl_phase=1;exon_id=Os01t0105800-01.exon3;rank=3 +1 irgsp CDS 307296 307413 . - 2 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp CDS 308397 308601 . - 0 ID=CDS:Os01t0105800-01;Parent=transcript:Os01t0105800-01;protein_id=Os01t0105800-01 +1 irgsp exon 308397 308626 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon2;constitutive=1;ensembl_end_phase=1;ensembl_phase=-1;exon_id=Os01t0105800-01.exon2;rank=2 +1 irgsp five_prime_UTR 308602 308626 . - . Parent=transcript:Os01t0105800-01 +1 irgsp exon 308703 308842 . - . Parent=transcript:Os01t0105800-01;Name=Os01t0105800-01.exon1;constitutive=1;ensembl_end_phase=-1;ensembl_phase=-1;exon_id=Os01t0105800-01.exon1;rank=1 +1 irgsp five_prime_UTR 308703 308842 . - . Parent=transcript:Os01t0105800-01 +### +1 irgsp gene 309520 313170 . - . ID=gene:Os01g0105900;biotype=protein_coding;description=Carbohydrate/purine kinase domain containing protein. (Os01t0105900-01);gene_id=Os01g0105900;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 309520 313170 . - . ID=transcript:Os01t0105900-01;Parent=gene:Os01g0105900;biotype=protein_coding;transcript_id=Os01t0105900-01 +1 irgsp three_prime_UTR 309520 309821 . - . Parent=transcript:Os01t0105900-01 +1 irgsp exon 309520 310070 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon8;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0105900-01.exon8;rank=8 +1 irgsp CDS 309822 310070 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310256 310367 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon7;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0105900-01.exon7;rank=7 +1 irgsp CDS 310256 310367 . - 1 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310455 310552 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon6;constitutive=1;ensembl_end_phase=2;ensembl_phase=0;exon_id=Os01t0105900-01.exon6;rank=6 +1 irgsp CDS 310455 310552 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310632 310739 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon5;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon5;rank=5 +1 irgsp CDS 310632 310739 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 310880 310918 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon4;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon4;rank=4 +1 irgsp CDS 310880 310918 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 311002 311073 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon3;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon3;rank=3 +1 irgsp CDS 311002 311073 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 311163 311426 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=0;exon_id=Os01t0105900-01.exon2;rank=2 +1 irgsp CDS 311163 311426 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp CDS 312867 313064 . - 0 ID=CDS:Os01t0105900-01;Parent=transcript:Os01t0105900-01;protein_id=Os01t0105900-01 +1 irgsp exon 312867 313170 . - . Parent=transcript:Os01t0105900-01;Name=Os01t0105900-01.exon1;constitutive=1;ensembl_end_phase=0;ensembl_phase=-1;exon_id=Os01t0105900-01.exon1;rank=1 +1 irgsp five_prime_UTR 313065 313170 . - . Parent=transcript:Os01t0105900-01 +### +1 irgsp gene 319754 322205 . + . ID=gene:Os01g0106200;biotype=protein_coding;description=Similar to RER1A protein (AtRER1A). (Os01t0106200-01);gene_id=Os01g0106200;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 319754 322205 . + . ID=transcript:Os01t0106200-01;Parent=gene:Os01g0106200;biotype=protein_coding;transcript_id=Os01t0106200-01 +1 irgsp five_prime_UTR 319754 319874 . + . Parent=transcript:Os01t0106200-01 +1 irgsp exon 319754 320236 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon1;constitutive=1;ensembl_end_phase=2;ensembl_phase=-1;exon_id=Os01t0106200-01.exon1;rank=1 +1 irgsp CDS 319875 320236 . + 0 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp exon 321468 321648 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon2;constitutive=1;ensembl_end_phase=0;ensembl_phase=2;exon_id=Os01t0106200-01.exon2;rank=2 +1 irgsp CDS 321468 321648 . + 1 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp CDS 321928 321975 . + 0 ID=CDS:Os01t0106200-01;Parent=transcript:Os01t0106200-01;protein_id=Os01t0106200-01 +1 irgsp exon 321928 322205 . + . Parent=transcript:Os01t0106200-01;Name=Os01t0106200-01.exon3;constitutive=1;ensembl_end_phase=-1;ensembl_phase=0;exon_id=Os01t0106200-01.exon3;rank=3 +1 irgsp three_prime_UTR 321976 322205 . + . Parent=transcript:Os01t0106200-01 +### +1 irgsp gene 322591 323923 . - . ID=gene:Os01g0106300;biotype=protein_coding;description=Similar to Isoflavone reductase homolog IRL (EC 1.3.1.-). (Os01t0106300-01);gene_id=Os01g0106300;logic_name=irgspv1.0-20170804-genes +1 irgsp mRNA 322591 323923 . - . ID=transcript:Os01t0106300-01;Parent=gene:Os01g0106300;biotype=protein_coding;transcript_id=Os01t0106300-01 +1 irgsp three_prime_UTR 322591 322809 . - . Parent=transcript:Os01t0106300-01 +1 irgsp exon 322591 322973 . - . Parent=transcript:Os01t0106300-01;Name=Os01t0106300-01.exon2;constitutive=1;ensembl_end_phase=-1;ensembl_phase=1;exon_id=Os01t0106300-01.exon2;rank=2 diff --git a/src/agat/agat_convert_sp_gff2tsv/test_data/agat_convert_sp_gff2tsv_1.tsv b/src/agat/agat_convert_sp_gff2tsv/test_data/agat_convert_sp_gff2tsv_1.tsv new file mode 100644 index 00000000..b30ae271 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/test_data/agat_convert_sp_gff2tsv_1.tsv @@ -0,0 +1,881 @@ +seq_id source_tag primary_tag start end score strand frame Alias biotype constitutive description ensembl_end_phase ensembl_phase exon_id gene_id ID logic_name Name Parent protein_id rank transcript_id +1 irgsp repeat_region 2000 2100 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A fakeRepeat1 N/A N/A N/A N/A N/A N/A +1 irgsp gene 2983 10815 . 1 . N/A protein_coding N/A RabGAP/TBC domain containing protein. (Os01t0100100-01) N/A N/A N/A Os01g0100100 gene:Os01g0100100 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 2983 10815 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100100-01 N/A N/A gene:Os01g0100100 N/A N/A Os01t0100100-01 +1 irgsp exon 2983 3268 . 1 . N/A N/A 1 N/A -1 -1 Os01t0100100-01.exon1 N/A Os01t0100100-01.exon1 N/A Os01t0100100-01.exon1 transcript:Os01t0100100-01 N/A 1 N/A +1 irgsp exon 3354 3616 . 1 . N/A N/A 1 N/A 0 -1 Os01t0100100-01.exon2 N/A Os01t0100100-01.exon2 N/A Os01t0100100-01.exon2 transcript:Os01t0100100-01 N/A 2 N/A +1 irgsp exon 4357 4455 . 1 . N/A N/A 1 N/A 0 0 Os01t0100100-01.exon3 N/A Os01t0100100-01.exon3 N/A Os01t0100100-01.exon3 transcript:Os01t0100100-01 N/A 3 N/A +1 irgsp exon 5457 5560 . 1 . N/A N/A 1 N/A 2 0 Os01t0100100-01.exon4 N/A Os01t0100100-01.exon4 N/A Os01t0100100-01.exon4 transcript:Os01t0100100-01 N/A 4 N/A +1 irgsp exon 7136 7944 . 1 . N/A N/A 1 N/A 1 2 Os01t0100100-01.exon5 N/A Os01t0100100-01.exon5 N/A Os01t0100100-01.exon5 transcript:Os01t0100100-01 N/A 5 N/A +1 irgsp exon 8028 8150 . 1 . N/A N/A 1 N/A 1 1 Os01t0100100-01.exon6 N/A Os01t0100100-01.exon6 N/A Os01t0100100-01.exon6 transcript:Os01t0100100-01 N/A 6 N/A +1 irgsp exon 8232 8320 . 1 . N/A N/A 1 N/A 0 1 Os01t0100100-01.exon7 N/A Os01t0100100-01.exon7 N/A Os01t0100100-01.exon7 transcript:Os01t0100100-01 N/A 7 N/A +1 irgsp exon 8408 8608 . 1 . N/A N/A 1 N/A 0 0 Os01t0100100-01.exon8 N/A Os01t0100100-01.exon8 N/A Os01t0100100-01.exon8 transcript:Os01t0100100-01 N/A 8 N/A +1 irgsp exon 9210 9615 . 1 . N/A N/A 1 N/A 1 0 Os01t0100100-01.exon9 N/A Os01t0100100-01.exon9 N/A Os01t0100100-01.exon9 transcript:Os01t0100100-01 N/A 9 N/A +1 irgsp exon 10102 10187 . 1 . N/A N/A 1 N/A 0 1 Os01t0100100-01.exon10 N/A Os01t0100100-01.exon10 N/A Os01t0100100-01.exon10 transcript:Os01t0100100-01 N/A 10 N/A +1 irgsp exon 10274 10430 . 1 . N/A N/A 1 N/A -1 0 Os01t0100100-01.exon11 N/A Os01t0100100-01.exon11 N/A Os01t0100100-01.exon11 transcript:Os01t0100100-01 N/A 11 N/A +1 irgsp exon 10504 10815 . 1 . N/A N/A 1 N/A -1 -1 Os01t0100100-01.exon12 N/A Os01t0100100-01.exon12 N/A Os01t0100100-01.exon12 transcript:Os01t0100100-01 N/A 12 N/A +1 irgsp CDS 3449 3616 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 4357 4455 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 5457 5560 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 7136 7944 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 8028 8150 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 8232 8320 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 8408 8608 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 9210 9615 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 10102 10187 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp CDS 10274 10297 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100100-01 N/A N/A transcript:Os01t0100100-01 Os01t0100100-01 N/A N/A +1 irgsp five_prime_UTR 2983 3268 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-1 N/A N/A transcript:Os01t0100100-01 N/A N/A N/A +1 irgsp five_prime_UTR 3354 3448 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-2 N/A N/A transcript:Os01t0100100-01 N/A N/A N/A +1 irgsp three_prime_UTR 10298 10430 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-1 N/A N/A transcript:Os01t0100100-01 N/A N/A N/A +1 irgsp three_prime_UTR 10504 10815 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-2 N/A N/A transcript:Os01t0100100-01 N/A N/A N/A +1 irgsp gene 11218 12435 . 1 . N/A protein_coding N/A Conserved hypothetical protein. (Os01t0100200-01) N/A N/A N/A Os01g0100200 gene:Os01g0100200 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 11218 12435 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100200-01 N/A N/A gene:Os01g0100200 N/A N/A Os01t0100200-01 +1 irgsp exon 11218 12060 . 1 . N/A N/A 1 N/A 2 -1 Os01t0100200-01.exon1 N/A Os01t0100200-01.exon1 N/A Os01t0100200-01.exon1 transcript:Os01t0100200-01 N/A 1 N/A +1 irgsp exon 12152 12435 . 1 . N/A N/A 1 N/A -1 2 Os01t0100200-01.exon2 N/A Os01t0100200-01.exon2 N/A Os01t0100200-01.exon2 transcript:Os01t0100200-01 N/A 2 N/A +1 irgsp CDS 11798 12060 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100200-01 N/A N/A transcript:Os01t0100200-01 Os01t0100200-01 N/A N/A +1 irgsp CDS 12152 12317 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100200-01 N/A N/A transcript:Os01t0100200-01 Os01t0100200-01 N/A N/A +1 irgsp five_prime_UTR 11218 11797 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-3 N/A N/A transcript:Os01t0100200-01 N/A N/A N/A +1 irgsp three_prime_UTR 12318 12435 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-3 N/A N/A transcript:Os01t0100200-01 N/A N/A N/A +1 irgsp gene 11372 12284 . -1 . N/A protein_coding N/A Cytochrome P450 domain containing protein. (Os01t0100300-00) N/A N/A N/A Os01g0100300 gene:Os01g0100300 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 11372 12284 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100300-00 N/A N/A gene:Os01g0100300 N/A N/A Os01t0100300-00 +1 irgsp exon 11372 12042 . -1 . N/A N/A 1 N/A 0 1 Os01t0100300-00.exon2 N/A Os01t0100300-00.exon2 N/A Os01t0100300-00.exon2 transcript:Os01t0100300-00 N/A 2 N/A +1 irgsp exon 12146 12284 . -1 . N/A N/A 1 N/A 1 0 Os01t0100300-00.exon1 N/A Os01t0100300-00.exon1 N/A Os01t0100300-00.exon1 transcript:Os01t0100300-00 N/A 1 N/A +1 irgsp CDS 11372 12042 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100300-00 N/A N/A transcript:Os01t0100300-00 Os01t0100300-00 N/A N/A +1 irgsp CDS 12146 12284 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100300-00 N/A N/A transcript:Os01t0100300-00 Os01t0100300-00 N/A N/A +1 irgsp gene 12721 15685 . 1 . N/A protein_coding N/A Similar to Pectinesterase-like protein. (Os01t0100400-01) N/A N/A N/A Os01g0100400 gene:Os01g0100400 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 12721 15685 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100400-01 N/A N/A gene:Os01g0100400 N/A N/A Os01t0100400-01 +1 irgsp exon 12721 13813 . 1 . N/A N/A 1 N/A 2 -1 Os01t0100400-01.exon1 N/A Os01t0100400-01.exon1 N/A Os01t0100400-01.exon1 transcript:Os01t0100400-01 N/A 1 N/A +1 irgsp exon 13906 14271 . 1 . N/A N/A 1 N/A 2 2 Os01t0100400-01.exon2 N/A Os01t0100400-01.exon2 N/A Os01t0100400-01.exon2 transcript:Os01t0100400-01 N/A 2 N/A +1 irgsp exon 14359 14437 . 1 . N/A N/A 1 N/A 0 2 Os01t0100400-01.exon3 N/A Os01t0100400-01.exon3 N/A Os01t0100400-01.exon3 transcript:Os01t0100400-01 N/A 3 N/A +1 irgsp exon 14969 15171 . 1 . N/A N/A 1 N/A 2 0 Os01t0100400-01.exon4 N/A Os01t0100400-01.exon4 N/A Os01t0100400-01.exon4 transcript:Os01t0100400-01 N/A 4 N/A +1 irgsp exon 15266 15685 . 1 . N/A N/A 1 N/A -1 2 Os01t0100400-01.exon5 N/A Os01t0100400-01.exon5 N/A Os01t0100400-01.exon5 transcript:Os01t0100400-01 N/A 5 N/A +1 irgsp CDS 12774 13813 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100400-01 N/A N/A transcript:Os01t0100400-01 Os01t0100400-01 N/A N/A +1 irgsp CDS 13906 14271 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100400-01 N/A N/A transcript:Os01t0100400-01 Os01t0100400-01 N/A N/A +1 irgsp CDS 14359 14437 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100400-01 N/A N/A transcript:Os01t0100400-01 Os01t0100400-01 N/A N/A +1 irgsp CDS 14969 15171 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100400-01 N/A N/A transcript:Os01t0100400-01 Os01t0100400-01 N/A N/A +1 irgsp CDS 15266 15359 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100400-01 N/A N/A transcript:Os01t0100400-01 Os01t0100400-01 N/A N/A +1 irgsp five_prime_UTR 12721 12773 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-4 N/A N/A transcript:Os01t0100400-01 N/A N/A N/A +1 irgsp three_prime_UTR 15360 15685 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-4 N/A N/A transcript:Os01t0100400-01 N/A N/A N/A +1 irgsp gene 12808 13978 . -1 . N/A protein_coding N/A Hypothetical protein. (Os01t0100466-00) N/A N/A N/A Os01g0100466 gene:Os01g0100466 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 12808 13978 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100466-00 N/A N/A gene:Os01g0100466 N/A N/A Os01t0100466-00 +1 irgsp exon 12808 13782 . -1 . N/A N/A 1 N/A -1 -1 Os01t0100466-00.exon2 N/A Os01t0100466-00.exon2 N/A Os01t0100466-00.exon2 transcript:Os01t0100466-00 N/A 2 N/A +1 irgsp exon 13880 13978 . -1 . N/A N/A 1 N/A -1 -1 Os01t0100466-00.exon1 N/A Os01t0100466-00.exon1 N/A Os01t0100466-00.exon1 transcript:Os01t0100466-00 N/A 1 N/A +1 irgsp CDS 12869 13102 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100466-00 N/A N/A transcript:Os01t0100466-00 Os01t0100466-00 N/A N/A +1 irgsp five_prime_UTR 13103 13782 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-5 N/A N/A transcript:Os01t0100466-00 N/A N/A N/A +1 irgsp five_prime_UTR 13880 13978 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-6 N/A N/A transcript:Os01t0100466-00 N/A N/A N/A +1 irgsp three_prime_UTR 12808 12868 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-5 N/A N/A transcript:Os01t0100466-00 N/A N/A N/A +1 irgsp gene 16399 20144 . 1 . N/A protein_coding N/A Immunoglobulin-like domain containing protein. (Os01t0100500-01) N/A N/A N/A Os01g0100500 gene:Os01g0100500 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 16399 20144 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100500-01 N/A N/A gene:Os01g0100500 N/A N/A Os01t0100500-01 +1 irgsp exon 16399 16976 . 1 . N/A N/A 1 N/A 0 -1 Os01t0100500-01.exon1 N/A Os01t0100500-01.exon1 N/A Os01t0100500-01.exon1 transcript:Os01t0100500-01 N/A 1 N/A +1 irgsp exon 17383 17474 . 1 . N/A N/A 1 N/A 2 0 Os01t0100500-01.exon2 N/A Os01t0100500-01.exon2 N/A Os01t0100500-01.exon2 transcript:Os01t0100500-01 N/A 2 N/A +1 irgsp exon 17558 18258 . 1 . N/A N/A 1 N/A 1 2 Os01t0100500-01.exon3 N/A Os01t0100500-01.exon3 N/A Os01t0100500-01.exon3 transcript:Os01t0100500-01 N/A 3 N/A +1 irgsp exon 18501 18571 . 1 . N/A N/A 1 N/A 0 1 Os01t0100500-01.exon4 N/A Os01t0100500-01.exon4 N/A Os01t0100500-01.exon4 transcript:Os01t0100500-01 N/A 4 N/A +1 irgsp exon 18968 19057 . 1 . N/A N/A 1 N/A 0 0 Os01t0100500-01.exon5 N/A Os01t0100500-01.exon5 N/A Os01t0100500-01.exon5 transcript:Os01t0100500-01 N/A 5 N/A +1 irgsp exon 19142 19321 . 1 . N/A N/A 1 N/A 0 0 Os01t0100500-01.exon6 N/A Os01t0100500-01.exon6 N/A Os01t0100500-01.exon6 transcript:Os01t0100500-01 N/A 6 N/A +1 irgsp exon 19531 19629 . 1 . N/A N/A 1 N/A -1 0 Os01t0100500-01.exon7 N/A Os01t0100500-01.exon7 N/A Os01t0100500-01.exon7 transcript:Os01t0100500-01 N/A 7 N/A +1 irgsp exon 19734 20144 . 1 . N/A N/A 1 N/A -1 -1 Os01t0100500-01.exon8 N/A Os01t0100500-01.exon8 N/A Os01t0100500-01.exon8 transcript:Os01t0100500-01 N/A 8 N/A +1 irgsp CDS 16599 16976 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 17383 17474 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 17558 18258 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 18501 18571 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 18968 19057 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 19142 19321 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp CDS 19531 19593 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100500-01 N/A N/A transcript:Os01t0100500-01 Os01t0100500-01 N/A N/A +1 irgsp five_prime_UTR 16399 16598 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-7 N/A N/A transcript:Os01t0100500-01 N/A N/A N/A +1 irgsp three_prime_UTR 19594 19629 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-6 N/A N/A transcript:Os01t0100500-01 N/A N/A N/A +1 irgsp three_prime_UTR 19734 20144 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-7 N/A N/A transcript:Os01t0100500-01 N/A N/A N/A +1 irgsp gene 22841 26892 . 1 . N/A protein_coding N/A Single-stranded nucleic acid binding R3H domain containing protein. (Os01t0100600-01) N/A N/A N/A Os01g0100600 gene:Os01g0100600 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 22841 26892 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100600-01 N/A N/A gene:Os01g0100600 N/A N/A Os01t0100600-01 +1 irgsp exon 22841 23281 . 1 . N/A N/A 1 N/A 2 -1 Os01t0100600-01.exon1 N/A Os01t0100600-01.exon1 N/A Os01t0100600-01.exon1 transcript:Os01t0100600-01 N/A 1 N/A +1 irgsp exon 23572 23847 . 1 . N/A N/A 1 N/A 2 2 Os01t0100600-01.exon2 N/A Os01t0100600-01.exon2 N/A Os01t0100600-01.exon2 transcript:Os01t0100600-01 N/A 2 N/A +1 irgsp exon 23962 24033 . 1 . N/A N/A 1 N/A 2 2 Os01t0100600-01.exon3 N/A Os01t0100600-01.exon3 N/A Os01t0100600-01.exon3 transcript:Os01t0100600-01 N/A 3 N/A +1 irgsp exon 24492 24577 . 1 . N/A N/A 1 N/A 1 2 Os01t0100600-01.exon4 N/A Os01t0100600-01.exon4 N/A Os01t0100600-01.exon4 transcript:Os01t0100600-01 N/A 4 N/A +1 irgsp exon 25445 25519 . 1 . N/A N/A 1 N/A 1 1 Os01t0100600-01.exon5 N/A Os01t0100600-01.exon5 N/A Os01t0100600-01.exon5 transcript:Os01t0100600-01 N/A 5 N/A +1 irgsp exon 25883 26892 . 1 . N/A N/A 1 N/A -1 1 Os01t0100600-01.exon6 N/A Os01t0100600-01.exon6 N/A Os01t0100600-01.exon6 transcript:Os01t0100600-01 N/A 6 N/A +1 irgsp CDS 23232 23281 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp CDS 23572 23847 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp CDS 23962 24033 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp CDS 24492 24577 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp CDS 25445 25519 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp CDS 25883 26391 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100600-01 N/A N/A transcript:Os01t0100600-01 Os01t0100600-01 N/A N/A +1 irgsp five_prime_UTR 22841 23231 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-8 N/A N/A transcript:Os01t0100600-01 N/A N/A N/A +1 irgsp three_prime_UTR 26392 26892 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-8 N/A N/A transcript:Os01t0100600-01 N/A N/A N/A +1 irgsp gene 25861 26424 . -1 . N/A protein_coding N/A Hypothetical gene. (Os01t0100650-00) N/A N/A N/A Os01g0100650 gene:Os01g0100650 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 25861 26424 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100650-00 N/A N/A gene:Os01g0100650 N/A N/A Os01t0100650-00 +1 irgsp exon 25861 26424 . -1 . N/A N/A 1 N/A -1 -1 Os01t0100650-00.exon1 N/A Os01t0100650-00.exon1 N/A Os01t0100650-00.exon1 transcript:Os01t0100650-00 N/A 1 N/A +1 irgsp CDS 26040 26423 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100650-00 N/A N/A transcript:Os01t0100650-00 Os01t0100650-00 N/A N/A +1 irgsp five_prime_UTR 26424 26424 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-9 N/A N/A transcript:Os01t0100650-00 N/A N/A N/A +1 irgsp three_prime_UTR 25861 26039 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-9 N/A N/A transcript:Os01t0100650-00 N/A N/A N/A +1 irgsp gene 27143 28644 . 1 . N/A protein_coding N/A Similar to 40S ribosomal protein S5-1. (Os01t0100700-01) N/A N/A N/A Os01g0100700 gene:Os01g0100700 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 27143 28644 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100700-01 N/A N/A gene:Os01g0100700 N/A N/A Os01t0100700-01 +1 irgsp exon 27143 27292 . 1 . N/A N/A 1 N/A 0 -1 Os01t0100700-01.exon1 N/A Os01t0100700-01.exon1 N/A Os01t0100700-01.exon1 transcript:Os01t0100700-01 N/A 1 N/A +1 irgsp exon 27370 27641 . 1 . N/A N/A 1 N/A 2 0 Os01t0100700-01.exon2 N/A Os01t0100700-01.exon2 N/A Os01t0100700-01.exon2 transcript:Os01t0100700-01 N/A 2 N/A +1 irgsp exon 28090 28293 . 1 . N/A N/A 1 N/A 2 2 Os01t0100700-01.exon3 N/A Os01t0100700-01.exon3 N/A Os01t0100700-01.exon3 transcript:Os01t0100700-01 N/A 3 N/A +1 irgsp exon 28365 28644 . 1 . N/A N/A 1 N/A -1 2 Os01t0100700-01.exon4 N/A Os01t0100700-01.exon4 N/A Os01t0100700-01.exon4 transcript:Os01t0100700-01 N/A 4 N/A +1 irgsp CDS 27221 27292 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100700-01 N/A N/A transcript:Os01t0100700-01 Os01t0100700-01 N/A N/A +1 irgsp CDS 27370 27641 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100700-01 N/A N/A transcript:Os01t0100700-01 Os01t0100700-01 N/A N/A +1 irgsp CDS 28090 28293 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100700-01 N/A N/A transcript:Os01t0100700-01 Os01t0100700-01 N/A N/A +1 irgsp CDS 28365 28419 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100700-01 N/A N/A transcript:Os01t0100700-01 Os01t0100700-01 N/A N/A +1 irgsp five_prime_UTR 27143 27220 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-10 N/A N/A transcript:Os01t0100700-01 N/A N/A N/A +1 irgsp three_prime_UTR 28420 28644 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-10 N/A N/A transcript:Os01t0100700-01 N/A N/A N/A +1 irgsp gene 29818 34453 . 1 . N/A protein_coding N/A Protein of unknown function DUF1664 family protein. (Os01t0100800-01) N/A N/A N/A Os01g0100800 gene:Os01g0100800 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 29818 34453 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100800-01 N/A N/A gene:Os01g0100800 N/A N/A Os01t0100800-01 +1 irgsp exon 29818 29976 . 1 . N/A N/A 1 N/A 1 -1 Os01t0100800-01.exon1 N/A Os01t0100800-01.exon1 N/A Os01t0100800-01.exon1 transcript:Os01t0100800-01 N/A 1 N/A +1 irgsp exon 30146 30228 . 1 . N/A N/A 1 N/A 0 1 Os01t0100800-01.exon2 N/A Os01t0100800-01.exon2 N/A Os01t0100800-01.exon2 transcript:Os01t0100800-01 N/A 2 N/A +1 irgsp exon 30735 30806 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon3 N/A Os01t0100800-01.exon3 N/A Os01t0100800-01.exon3 transcript:Os01t0100800-01 N/A 3 N/A +1 irgsp exon 30885 30963 . 1 . N/A N/A 1 N/A 1 0 Os01t0100800-01.exon4 N/A Os01t0100800-01.exon4 N/A Os01t0100800-01.exon4 transcript:Os01t0100800-01 N/A 4 N/A +1 irgsp exon 31258 31325 . 1 . N/A N/A 1 N/A 0 1 Os01t0100800-01.exon5 N/A Os01t0100800-01.exon5 N/A Os01t0100800-01.exon5 transcript:Os01t0100800-01 N/A 5 N/A +1 irgsp exon 31505 31606 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon6 N/A Os01t0100800-01.exon6 N/A Os01t0100800-01.exon6 transcript:Os01t0100800-01 N/A 6 N/A +1 irgsp exon 32377 32466 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon7 N/A Os01t0100800-01.exon7 N/A Os01t0100800-01.exon7 transcript:Os01t0100800-01 N/A 7 N/A +1 irgsp exon 32542 32616 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon8 N/A Os01t0100800-01.exon8 N/A Os01t0100800-01.exon8 transcript:Os01t0100800-01 N/A 8 N/A +1 irgsp exon 32712 32744 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon9 N/A Os01t0100800-01.exon9 N/A Os01t0100800-01.exon9 transcript:Os01t0100800-01 N/A 9 N/A +1 irgsp exon 32828 32905 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon10 N/A Os01t0100800-01.exon10 N/A Os01t0100800-01.exon10 transcript:Os01t0100800-01 N/A 10 N/A +1 irgsp exon 33274 33330 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon11 N/A Os01t0100800-01.exon11 N/A Os01t0100800-01.exon11 transcript:Os01t0100800-01 N/A 11 N/A +1 irgsp exon 33400 33471 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon12 N/A Os01t0100800-01.exon12 N/A Os01t0100800-01.exon12 transcript:Os01t0100800-01 N/A 12 N/A +1 irgsp exon 33543 33617 . 1 . N/A N/A 1 N/A 0 0 Os01t0100800-01.exon13 N/A Os01t0100800-01.exon13 N/A Os01t0100800-01.exon13 transcript:Os01t0100800-01 N/A 13 N/A +1 irgsp exon 33975 34453 . 1 . N/A N/A 1 N/A -1 0 Os01t0100800-01.exon14 N/A Os01t0100800-01.exon14 N/A Os01t0100800-01.exon14 transcript:Os01t0100800-01 N/A 14 N/A +1 irgsp CDS 29940 29976 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 30146 30228 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 30735 30806 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 30885 30963 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 31258 31325 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 31505 31606 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 32377 32466 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 32542 32616 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 32712 32744 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 32828 32905 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 33274 33330 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 33400 33471 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 33543 33617 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp CDS 33975 34124 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100800-01 N/A N/A transcript:Os01t0100800-01 Os01t0100800-01 N/A N/A +1 irgsp five_prime_UTR 29818 29939 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-11 N/A N/A transcript:Os01t0100800-01 N/A N/A N/A +1 irgsp three_prime_UTR 34125 34453 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-11 N/A N/A transcript:Os01t0100800-01 N/A N/A N/A +1 irgsp gene 35623 41136 . 1 . N/A protein_coding N/A Sphingosine-1-phosphate lyase, Disease resistance response (Os01t0100900-01) N/A N/A N/A Os01g0100900 gene:Os01g0100900 irgspv1.0-20170804-genes SPHINGOSINE-1-PHOSPHATE LYASE 1, Sphingosine-1-Phoshpate Lyase 1 N/A N/A N/A N/A +1 irgsp mRNA 35623 41136 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0100900-01 N/A N/A gene:Os01g0100900 N/A N/A Os01t0100900-01 +1 irgsp exon 35623 35939 . 1 . N/A N/A 1 N/A 2 -1 Os01t0100900-01.exon1 N/A Os01t0100900-01.exon1 N/A Os01t0100900-01.exon1 transcript:Os01t0100900-01 N/A 1 N/A +1 irgsp exon 36027 36072 . 1 . N/A N/A 1 N/A 0 2 Os01t0100900-01.exon2 N/A Os01t0100900-01.exon2 N/A Os01t0100900-01.exon2 transcript:Os01t0100900-01 N/A 2 N/A +1 irgsp exon 36517 36668 . 1 . N/A N/A 1 N/A 2 0 Os01t0100900-01.exon3 N/A Os01t0100900-01.exon3 N/A Os01t0100900-01.exon3 transcript:Os01t0100900-01 N/A 3 N/A +1 irgsp exon 36818 36877 . 1 . N/A N/A 1 N/A 2 2 Os01t0100900-01.exon4 N/A Os01t0100900-01.exon4 N/A Os01t0100900-01.exon4 transcript:Os01t0100900-01 N/A 4 N/A +1 irgsp exon 37594 37818 . 1 . N/A N/A 1 N/A 2 2 Os01t0100900-01.exon5 N/A Os01t0100900-01.exon5 N/A Os01t0100900-01.exon5 transcript:Os01t0100900-01 N/A 5 N/A +1 irgsp exon 37892 38033 . 1 . N/A N/A 1 N/A 0 2 Os01t0100900-01.exon6 N/A Os01t0100900-01.exon6 N/A Os01t0100900-01.exon6 transcript:Os01t0100900-01 N/A 6 N/A +1 irgsp exon 38276 38326 . 1 . N/A N/A 1 N/A 0 0 Os01t0100900-01.exon7 N/A Os01t0100900-01.exon7 N/A Os01t0100900-01.exon7 transcript:Os01t0100900-01 N/A 7 N/A +1 irgsp exon 38434 38525 . 1 . N/A N/A 1 N/A 2 0 Os01t0100900-01.exon8 N/A Os01t0100900-01.exon8 N/A Os01t0100900-01.exon8 transcript:Os01t0100900-01 N/A 8 N/A +1 irgsp exon 39319 39445 . 1 . N/A N/A 1 N/A 0 2 Os01t0100900-01.exon9 N/A Os01t0100900-01.exon9 N/A Os01t0100900-01.exon9 transcript:Os01t0100900-01 N/A 9 N/A +1 irgsp exon 39553 39568 . 1 . N/A N/A 1 N/A 1 0 Os01t0100900-01.exon10 N/A Os01t0100900-01.exon10 N/A Os01t0100900-01.exon10 transcript:Os01t0100900-01 N/A 10 N/A +1 irgsp exon 39939 40046 . 1 . N/A N/A 1 N/A 1 1 Os01t0100900-01.exon11 N/A Os01t0100900-01.exon11 N/A Os01t0100900-01.exon11 transcript:Os01t0100900-01 N/A 11 N/A +1 irgsp exon 40135 40189 . 1 . N/A N/A 1 N/A 2 1 Os01t0100900-01.exon12 N/A Os01t0100900-01.exon12 N/A Os01t0100900-01.exon12 transcript:Os01t0100900-01 N/A 12 N/A +1 irgsp exon 40456 40602 . 1 . N/A N/A 1 N/A 2 2 Os01t0100900-01.exon13 N/A Os01t0100900-01.exon13 N/A Os01t0100900-01.exon13 transcript:Os01t0100900-01 N/A 13 N/A +1 irgsp exon 40703 40781 . 1 . N/A N/A 1 N/A 0 2 Os01t0100900-01.exon14 N/A Os01t0100900-01.exon14 N/A Os01t0100900-01.exon14 transcript:Os01t0100900-01 N/A 14 N/A +1 irgsp exon 40885 41136 . 1 . N/A N/A 1 N/A -1 0 Os01t0100900-01.exon15 N/A Os01t0100900-01.exon15 N/A Os01t0100900-01.exon15 transcript:Os01t0100900-01 N/A 15 N/A +1 irgsp CDS 35743 35939 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 36027 36072 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 36517 36668 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 36818 36877 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 37594 37818 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 37892 38033 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 38276 38326 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 38434 38525 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 39319 39445 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 39553 39568 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 39939 40046 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 40135 40189 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 40456 40602 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 40703 40781 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp CDS 40885 41007 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0100900-01 N/A N/A transcript:Os01t0100900-01 Os01t0100900-01 N/A N/A +1 irgsp five_prime_UTR 35623 35742 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-12 N/A N/A transcript:Os01t0100900-01 N/A N/A N/A +1 irgsp three_prime_UTR 41008 41136 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-12 N/A N/A transcript:Os01t0100900-01 N/A N/A N/A +1 irgsp gene 58658 61090 . 1 . N/A protein_coding N/A Hypothetical conserved gene. (Os01t0101150-00) N/A N/A N/A Os01g0101150 gene:Os01g0101150 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 58658 61090 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101150-00 N/A N/A gene:Os01g0101150 N/A N/A Os01t0101150-00 +1 irgsp exon 58658 61090 . 1 . N/A N/A 1 N/A 0 0 Os01t0101150-00.exon1 N/A Os01t0101150-00.exon1 N/A Os01t0101150-00.exon1 transcript:Os01t0101150-00 N/A 1 N/A +1 irgsp CDS 58658 61090 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101150-00 N/A N/A transcript:Os01t0101150-00 Os01t0101150-00 N/A N/A +1 irgsp gene 62060 65537 . 1 . N/A protein_coding N/A 2,3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-01);2,3-diketo-5-methylthio-1-phosphopentane phosphatase domain containing protein. (Os01t0101200-02) N/A N/A N/A Os01g0101200 gene:Os01g0101200 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 62060 63576 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101200-01 N/A N/A gene:Os01g0101200 N/A N/A Os01t0101200-01 +1 irgsp exon 62060 62295 . 1 . N/A N/A 0 N/A 0 -1 Os01t0101200-01.exon1 N/A Os01t0101200-01.exon1 N/A Os01t0101200-01.exon1 transcript:Os01t0101200-01 N/A 1 N/A +1 irgsp exon 62385 62905 . 1 . N/A N/A 1 N/A 2 0 Os01t0101200-02.exon2 N/A Os01t0101200-02.exon2 N/A Os01t0101200-02.exon2 transcript:Os01t0101200-01 N/A 2 N/A +1 irgsp exon 62996 63114 . 1 . N/A N/A 1 N/A 1 2 Os01t0101200-02.exon3 N/A Os01t0101200-02.exon3 N/A Os01t0101200-02.exon3 transcript:Os01t0101200-01 N/A 3 N/A +1 irgsp exon 63248 63576 . 1 . N/A N/A 0 N/A -1 1 Os01t0101200-01.exon4 N/A Os01t0101200-01.exon4 N/A Os01t0101200-01.exon4 transcript:Os01t0101200-01 N/A 4 N/A +1 irgsp CDS 62104 62295 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-01 N/A N/A transcript:Os01t0101200-01 Os01t0101200-01 N/A N/A +1 irgsp CDS 62385 62905 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-01 N/A N/A transcript:Os01t0101200-01 Os01t0101200-01 N/A N/A +1 irgsp CDS 62996 63114 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-01 N/A N/A transcript:Os01t0101200-01 Os01t0101200-01 N/A N/A +1 irgsp CDS 63248 63345 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-01 N/A N/A transcript:Os01t0101200-01 Os01t0101200-01 N/A N/A +1 irgsp five_prime_UTR 62060 62103 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-13 N/A N/A transcript:Os01t0101200-01 N/A N/A N/A +1 irgsp three_prime_UTR 63346 63576 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-13 N/A N/A transcript:Os01t0101200-01 N/A N/A N/A +1 irgsp mRNA 62112 65537 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101200-02 N/A N/A gene:Os01g0101200 N/A N/A Os01t0101200-02 +1 irgsp exon 62112 62295 . 1 . N/A N/A 0 N/A 0 -1 Os01t0101200-02.exon1 N/A Os01t0101200-02.exon1 N/A Os01t0101200-02.exon1 transcript:Os01t0101200-02 N/A 1 N/A +1 irgsp exon 62385 62905 . 1 . N/A N/A 1 N/A 2 0 Os01t0101200-02.exon2 N/A agat-exon-1 N/A Os01t0101200-02.exon2 transcript:Os01t0101200-02 N/A 2 N/A +1 irgsp exon 62996 63114 . 1 . N/A N/A 1 N/A 1 2 Os01t0101200-02.exon3 N/A agat-exon-2 N/A Os01t0101200-02.exon3 transcript:Os01t0101200-02 N/A 3 N/A +1 irgsp exon 63248 65537 . 1 . N/A N/A 0 N/A -1 1 Os01t0101200-02.exon4 N/A Os01t0101200-02.exon4 N/A Os01t0101200-02.exon4 transcript:Os01t0101200-02 N/A 4 N/A +1 irgsp CDS 62113 62295 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-02 N/A N/A transcript:Os01t0101200-02 Os01t0101200-02 N/A N/A +1 irgsp CDS 62385 62905 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-02 N/A N/A transcript:Os01t0101200-02 Os01t0101200-02 N/A N/A +1 irgsp CDS 62996 63114 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-02 N/A N/A transcript:Os01t0101200-02 Os01t0101200-02 N/A N/A +1 irgsp CDS 63248 63345 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101200-02 N/A N/A transcript:Os01t0101200-02 Os01t0101200-02 N/A N/A +1 irgsp five_prime_UTR 62112 62112 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-14 N/A N/A transcript:Os01t0101200-02 N/A N/A N/A +1 irgsp three_prime_UTR 63346 65537 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-14 N/A N/A transcript:Os01t0101200-02 N/A N/A N/A +1 irgsp gene 63350 66302 . -1 . N/A protein_coding N/A Similar to MRNA, partial cds, clone: RAFL22-26-L17. (Fragment). (Os01t0101300-01) N/A N/A N/A Os01g0101300 gene:Os01g0101300 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 63350 66302 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101300-01 N/A N/A gene:Os01g0101300 N/A N/A Os01t0101300-01 +1 irgsp exon 63350 63783 . -1 . N/A N/A 1 N/A -1 0 Os01t0101300-01.exon7 N/A Os01t0101300-01.exon7 N/A Os01t0101300-01.exon7 transcript:Os01t0101300-01 N/A 7 N/A +1 irgsp exon 63877 64020 . -1 . N/A N/A 1 N/A 0 0 Os01t0101300-01.exon6 N/A Os01t0101300-01.exon6 N/A Os01t0101300-01.exon6 transcript:Os01t0101300-01 N/A 6 N/A +1 irgsp exon 64339 64431 . -1 . N/A N/A 1 N/A 0 0 Os01t0101300-01.exon5 N/A Os01t0101300-01.exon5 N/A Os01t0101300-01.exon5 transcript:Os01t0101300-01 N/A 5 N/A +1 irgsp exon 64665 64779 . -1 . N/A N/A 1 N/A 0 2 Os01t0101300-01.exon4 N/A Os01t0101300-01.exon4 N/A Os01t0101300-01.exon4 transcript:Os01t0101300-01 N/A 4 N/A +1 irgsp exon 64902 65152 . -1 . N/A N/A 1 N/A 2 0 Os01t0101300-01.exon3 N/A Os01t0101300-01.exon3 N/A Os01t0101300-01.exon3 transcript:Os01t0101300-01 N/A 3 N/A +1 irgsp exon 65248 65431 . -1 . N/A N/A 1 N/A 0 2 Os01t0101300-01.exon2 N/A Os01t0101300-01.exon2 N/A Os01t0101300-01.exon2 transcript:Os01t0101300-01 N/A 2 N/A +1 irgsp exon 65628 66302 . -1 . N/A N/A 1 N/A 2 -1 Os01t0101300-01.exon1 N/A Os01t0101300-01.exon1 N/A Os01t0101300-01.exon1 transcript:Os01t0101300-01 N/A 1 N/A +1 irgsp CDS 63670 63783 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 63877 64020 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 64339 64431 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 64665 64779 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 64902 65152 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 65248 65431 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp CDS 65628 65950 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101300-01 N/A N/A transcript:Os01t0101300-01 Os01t0101300-01 N/A N/A +1 irgsp five_prime_UTR 65951 66302 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-15 N/A N/A transcript:Os01t0101300-01 N/A N/A N/A +1 irgsp three_prime_UTR 63350 63669 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-15 N/A N/A transcript:Os01t0101300-01 N/A N/A N/A +1 irgsp gene 72816 78349 . 1 . N/A protein_coding N/A Immunoglobulin-like fold domain containing protein. (Os01t0101600-01);Immunoglobulin-like fold domain containing protein. (Os01t0101600-02);Hypothetical conserved gene. (Os01t0101600-03) N/A N/A N/A Os01g0101600 gene:Os01g0101600 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 72816 78349 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101600-01 N/A N/A gene:Os01g0101600 N/A N/A Os01t0101600-01 +1 irgsp exon 72816 73935 . 1 . N/A N/A 0 N/A 1 -1 Os01t0101600-01.exon1 N/A Os01t0101600-01.exon1 N/A Os01t0101600-01.exon1 transcript:Os01t0101600-01 N/A 1 N/A +1 irgsp exon 74468 74981 . 1 . N/A N/A 0 N/A 2 1 Os01t0101600-02.exon2 N/A Os01t0101600-02.exon2 N/A Os01t0101600-02.exon2 transcript:Os01t0101600-01 N/A 2 N/A +1 irgsp exon 75619 77205 . 1 . N/A N/A 0 N/A -1 2 Os01t0101600-01.exon3 N/A Os01t0101600-01.exon3 N/A Os01t0101600-01.exon3 transcript:Os01t0101600-01 N/A 3 N/A +1 irgsp exon 77333 78349 . 1 . N/A N/A 0 N/A -1 -1 Os01t0101600-01.exon4 N/A Os01t0101600-01.exon4 N/A Os01t0101600-01.exon4 transcript:Os01t0101600-01 N/A 4 N/A +1 irgsp CDS 72903 73935 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-01 N/A N/A transcript:Os01t0101600-01 Os01t0101600-01 N/A N/A +1 irgsp CDS 74468 74981 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-01 N/A N/A transcript:Os01t0101600-01 Os01t0101600-01 N/A N/A +1 irgsp CDS 75619 77008 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-01 N/A N/A transcript:Os01t0101600-01 Os01t0101600-01 N/A N/A +1 irgsp five_prime_UTR 72816 72902 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-16 N/A N/A transcript:Os01t0101600-01 N/A N/A N/A +1 irgsp three_prime_UTR 77009 77205 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-16 N/A N/A transcript:Os01t0101600-01 N/A N/A N/A +1 irgsp three_prime_UTR 77333 78349 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-17 N/A N/A transcript:Os01t0101600-01 N/A N/A N/A +1 irgsp mRNA 72823 77699 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101600-02 N/A N/A gene:Os01g0101600 N/A N/A Os01t0101600-02 +1 irgsp exon 72823 73935 . 1 . N/A N/A 0 N/A 1 -1 Os01t0101600-02.exon1 N/A Os01t0101600-02.exon1 N/A Os01t0101600-02.exon1 transcript:Os01t0101600-02 N/A 1 N/A +1 irgsp exon 74468 74981 . 1 . N/A N/A 0 N/A 2 1 Os01t0101600-02.exon2 N/A agat-exon-3 N/A Os01t0101600-02.exon2 transcript:Os01t0101600-02 N/A 2 N/A +1 irgsp exon 75619 77699 . 1 . N/A N/A 0 N/A -1 2 Os01t0101600-02.exon3 N/A Os01t0101600-02.exon3 N/A Os01t0101600-02.exon3 transcript:Os01t0101600-02 N/A 3 N/A +1 irgsp CDS 72903 73935 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-02 N/A N/A transcript:Os01t0101600-02 Os01t0101600-02 N/A N/A +1 irgsp CDS 74468 74981 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-02 N/A N/A transcript:Os01t0101600-02 Os01t0101600-02 N/A N/A +1 irgsp CDS 75619 77008 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-02 N/A N/A transcript:Os01t0101600-02 Os01t0101600-02 N/A N/A +1 irgsp five_prime_UTR 72823 72902 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-17 N/A N/A transcript:Os01t0101600-02 N/A N/A N/A +1 irgsp three_prime_UTR 77009 77699 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-18 N/A N/A transcript:Os01t0101600-02 N/A N/A N/A +1 irgsp mRNA 75942 77699 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101600-03 N/A N/A gene:Os01g0101600 N/A N/A Os01t0101600-03 +1 irgsp exon 75942 77699 . 1 . N/A N/A 0 N/A -1 -1 Os01t0101600-03.exon1 N/A Os01t0101600-03.exon1 N/A Os01t0101600-03.exon1 transcript:Os01t0101600-03 N/A 1 N/A +1 irgsp CDS 75944 77008 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101600-03 N/A N/A transcript:Os01t0101600-03 Os01t0101600-03 N/A N/A +1 irgsp five_prime_UTR 75942 75943 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-18 N/A N/A transcript:Os01t0101600-03 N/A N/A N/A +1 irgsp three_prime_UTR 77009 77699 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-19 N/A N/A transcript:Os01t0101600-03 N/A N/A N/A +1 irgsp gene 82426 84095 . 1 . N/A protein_coding N/A Similar to chaperone protein dnaJ 20. (Os01t0101700-00) N/A N/A N/A Os01g0101700 gene:Os01g0101700 irgspv1.0-20170804-genes DnaJ domain protein C1, rice DJC26 homolog N/A N/A N/A N/A +1 irgsp mRNA 82426 84095 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101700-00 N/A N/A gene:Os01g0101700 N/A N/A Os01t0101700-00 +1 irgsp exon 82426 82932 . 1 . N/A N/A 1 N/A 0 -1 Os01t0101700-00.exon1 N/A Os01t0101700-00.exon1 N/A Os01t0101700-00.exon1 transcript:Os01t0101700-00 N/A 1 N/A +1 irgsp exon 83724 84095 . 1 . N/A N/A 1 N/A -1 0 Os01t0101700-00.exon2 N/A Os01t0101700-00.exon2 N/A Os01t0101700-00.exon2 transcript:Os01t0101700-00 N/A 2 N/A +1 irgsp CDS 82507 82932 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101700-00 N/A N/A transcript:Os01t0101700-00 Os01t0101700-00 N/A N/A +1 irgsp CDS 83724 83864 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101700-00 N/A N/A transcript:Os01t0101700-00 Os01t0101700-00 N/A N/A +1 irgsp five_prime_UTR 82426 82506 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-19 N/A N/A transcript:Os01t0101700-00 N/A N/A N/A +1 irgsp three_prime_UTR 83865 84095 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-20 N/A N/A transcript:Os01t0101700-00 N/A N/A N/A +1 irgsp gene 85337 88844 . 1 . N/A protein_coding N/A Conserved hypothetical protein. (Os01t0101800-01) N/A N/A N/A Os01g0101800 gene:Os01g0101800 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 85337 88844 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101800-01 N/A N/A gene:Os01g0101800 N/A N/A Os01t0101800-01 +1 irgsp exon 85337 85600 . 1 . N/A N/A 1 N/A 0 -1 Os01t0101800-01.exon1 N/A Os01t0101800-01.exon1 N/A Os01t0101800-01.exon1 transcript:Os01t0101800-01 N/A 1 N/A +1 irgsp exon 85737 85830 . 1 . N/A N/A 1 N/A 1 0 Os01t0101800-01.exon2 N/A Os01t0101800-01.exon2 N/A Os01t0101800-01.exon2 transcript:Os01t0101800-01 N/A 2 N/A +1 irgsp exon 85935 86086 . 1 . N/A N/A 1 N/A 0 1 Os01t0101800-01.exon3 N/A Os01t0101800-01.exon3 N/A Os01t0101800-01.exon3 transcript:Os01t0101800-01 N/A 3 N/A +1 irgsp exon 86212 86299 . 1 . N/A N/A 1 N/A 1 0 Os01t0101800-01.exon4 N/A Os01t0101800-01.exon4 N/A Os01t0101800-01.exon4 transcript:Os01t0101800-01 N/A 4 N/A +1 irgsp exon 86399 87681 . 1 . N/A N/A 1 N/A 0 1 Os01t0101800-01.exon5 N/A Os01t0101800-01.exon5 N/A Os01t0101800-01.exon5 transcript:Os01t0101800-01 N/A 5 N/A +1 irgsp exon 88291 88398 . 1 . N/A N/A 1 N/A 0 0 Os01t0101800-01.exon6 N/A Os01t0101800-01.exon6 N/A Os01t0101800-01.exon6 transcript:Os01t0101800-01 N/A 6 N/A +1 irgsp exon 88500 88844 . 1 . N/A N/A 1 N/A -1 0 Os01t0101800-01.exon7 N/A Os01t0101800-01.exon7 N/A Os01t0101800-01.exon7 transcript:Os01t0101800-01 N/A 7 N/A +1 irgsp CDS 85379 85600 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 85737 85830 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 85935 86086 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 86212 86299 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 86399 87681 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 88291 88398 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp CDS 88500 88583 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101800-01 N/A N/A transcript:Os01t0101800-01 Os01t0101800-01 N/A N/A +1 irgsp five_prime_UTR 85337 85378 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-20 N/A N/A transcript:Os01t0101800-01 N/A N/A N/A +1 irgsp three_prime_UTR 88584 88844 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-21 N/A N/A transcript:Os01t0101800-01 N/A N/A N/A +1 irgsp gene 86211 88583 . -1 . N/A protein_coding N/A Hypothetical protein. (Os01t0101850-00) N/A N/A N/A Os01g0101850 gene:Os01g0101850 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 86211 88583 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101850-00 N/A N/A gene:Os01g0101850 N/A N/A Os01t0101850-00 +1 irgsp exon 86211 86277 . -1 . N/A N/A 1 N/A -1 -1 Os01t0101850-00.exon4 N/A Os01t0101850-00.exon4 N/A Os01t0101850-00.exon4 transcript:Os01t0101850-00 N/A 4 N/A +1 irgsp exon 86384 87694 . -1 . N/A N/A 1 N/A -1 -1 Os01t0101850-00.exon3 N/A Os01t0101850-00.exon3 N/A Os01t0101850-00.exon3 transcript:Os01t0101850-00 N/A 3 N/A +1 irgsp exon 88308 88396 . -1 . N/A N/A 1 N/A -1 -1 Os01t0101850-00.exon2 N/A Os01t0101850-00.exon2 N/A Os01t0101850-00.exon2 transcript:Os01t0101850-00 N/A 2 N/A +1 irgsp exon 88496 88583 . -1 . N/A N/A 1 N/A -1 -1 Os01t0101850-00.exon1 N/A Os01t0101850-00.exon1 N/A Os01t0101850-00.exon1 transcript:Os01t0101850-00 N/A 1 N/A +1 irgsp CDS 87327 87662 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101850-00 N/A N/A transcript:Os01t0101850-00 Os01t0101850-00 N/A N/A +1 irgsp five_prime_UTR 87663 87694 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-21 N/A N/A transcript:Os01t0101850-00 N/A N/A N/A +1 irgsp five_prime_UTR 88308 88396 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-22 N/A N/A transcript:Os01t0101850-00 N/A N/A N/A +1 irgsp five_prime_UTR 88496 88583 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-23 N/A N/A transcript:Os01t0101850-00 N/A N/A N/A +1 irgsp three_prime_UTR 86211 86277 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-22 N/A N/A transcript:Os01t0101850-00 N/A N/A N/A +1 irgsp three_prime_UTR 86384 87326 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-23 N/A N/A transcript:Os01t0101850-00 N/A N/A N/A +1 irgsp gene 88883 89228 . -1 . N/A protein_coding N/A Similar to OSIGBa0075F02.3 protein. (Os01t0101900-00) N/A N/A N/A Os01g0101900 gene:Os01g0101900 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 88883 89228 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0101900-00 N/A N/A gene:Os01g0101900 N/A N/A Os01t0101900-00 +1 irgsp exon 88883 89228 . -1 . N/A N/A 1 N/A -1 -1 Os01t0101900-00.exon1 N/A Os01t0101900-00.exon1 N/A Os01t0101900-00.exon1 transcript:Os01t0101900-00 N/A 1 N/A +1 irgsp CDS 88986 89204 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0101900-00 N/A N/A transcript:Os01t0101900-00 Os01t0101900-00 N/A N/A +1 irgsp five_prime_UTR 89205 89228 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-24 N/A N/A transcript:Os01t0101900-00 N/A N/A N/A +1 irgsp three_prime_UTR 88883 88985 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-24 N/A N/A transcript:Os01t0101900-00 N/A N/A N/A +1 irgsp gene 89763 91465 . -1 . N/A protein_coding N/A Phosphoesterase family protein. (Os01t0102000-01) N/A N/A N/A Os01g0102000 gene:Os01g0102000 irgspv1.0-20170804-genes NON-SPECIFIC PHOSPHOLIPASE C5 N/A N/A N/A N/A +1 irgsp mRNA 89763 91465 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102000-01 N/A N/A gene:Os01g0102000 N/A N/A Os01t0102000-01 +1 irgsp exon 89763 91465 . -1 . N/A N/A 1 N/A -1 -1 Os01t0102000-01.exon1 N/A Os01t0102000-01.exon1 N/A Os01t0102000-01.exon1 transcript:Os01t0102000-01 N/A 1 N/A +1 irgsp CDS 89825 91411 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102000-01 N/A N/A transcript:Os01t0102000-01 Os01t0102000-01 N/A N/A +1 irgsp five_prime_UTR 91412 91465 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-25 N/A N/A transcript:Os01t0102000-01 N/A N/A N/A +1 irgsp three_prime_UTR 89763 89824 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-25 N/A N/A transcript:Os01t0102000-01 N/A N/A N/A +1 irgsp gene 134300 135439 . 1 . N/A protein_coding N/A Thylakoid lumen protein, Photosynthesis and chloroplast development (Os01t0102300-01) N/A N/A N/A Os01g0102300 gene:Os01g0102300 irgspv1.0-20170804-genes OsTLP27 N/A N/A N/A N/A +1 irgsp mRNA 134300 135439 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102300-01 N/A N/A gene:Os01g0102300 N/A N/A Os01t0102300-01 +1 irgsp exon 134300 134615 . 1 . N/A N/A 1 N/A 2 -1 Os01t0102300-01.exon1 N/A Os01t0102300-01.exon1 N/A Os01t0102300-01.exon1 transcript:Os01t0102300-01 N/A 1 N/A +1 irgsp exon 134698 134824 . 1 . N/A N/A 1 N/A 0 2 Os01t0102300-01.exon2 N/A Os01t0102300-01.exon2 N/A Os01t0102300-01.exon2 transcript:Os01t0102300-01 N/A 2 N/A +1 irgsp exon 134912 135439 . 1 . N/A N/A 1 N/A -1 0 Os01t0102300-01.exon3 N/A Os01t0102300-01.exon3 N/A Os01t0102300-01.exon3 transcript:Os01t0102300-01 N/A 3 N/A +1 irgsp CDS 134311 134615 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102300-01 N/A N/A transcript:Os01t0102300-01 Os01t0102300-01 N/A N/A +1 irgsp CDS 134698 134824 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102300-01 N/A N/A transcript:Os01t0102300-01 Os01t0102300-01 N/A N/A +1 irgsp CDS 134912 135253 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102300-01 N/A N/A transcript:Os01t0102300-01 Os01t0102300-01 N/A N/A +1 irgsp five_prime_UTR 134300 134310 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-26 N/A N/A transcript:Os01t0102300-01 N/A N/A N/A +1 irgsp three_prime_UTR 135254 135439 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-26 N/A N/A transcript:Os01t0102300-01 N/A N/A N/A +1 irgsp gene 139826 141555 . 1 . N/A protein_coding N/A Histone-fold domain containing protein. (Os01t0102400-01) N/A N/A N/A Os01g0102400 gene:Os01g0102400 irgspv1.0-20170804-genes HAP5H SUBUNIT OF CCAAT-BOX BINDING COMPLEX N/A N/A N/A N/A +1 irgsp mRNA 139826 141555 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102400-01 N/A N/A gene:Os01g0102400 N/A N/A Os01t0102400-01 +1 irgsp exon 139826 139906 . 1 . N/A N/A 1 N/A -1 -1 Os01t0102400-01.exon1 N/A Os01t0102400-01.exon1 N/A Os01t0102400-01.exon1 transcript:Os01t0102400-01 N/A 1 N/A +1 irgsp exon 140120 141555 . 1 . N/A N/A 1 N/A -1 -1 Os01t0102400-01.exon2 N/A Os01t0102400-01.exon2 N/A Os01t0102400-01.exon2 transcript:Os01t0102400-01 N/A 2 N/A +1 irgsp CDS 140150 141415 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102400-01 N/A N/A transcript:Os01t0102400-01 Os01t0102400-01 N/A N/A +1 irgsp five_prime_UTR 139826 139906 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-27 N/A N/A transcript:Os01t0102400-01 N/A N/A N/A +1 irgsp five_prime_UTR 140120 140149 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-28 N/A N/A transcript:Os01t0102400-01 N/A N/A N/A +1 irgsp three_prime_UTR 141416 141555 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-27 N/A N/A transcript:Os01t0102400-01 N/A N/A N/A +1 irgsp gene 141959 144554 . 1 . N/A protein_coding N/A Conserved hypothetical protein. (Os01t0102500-01) N/A N/A N/A Os01g0102500 gene:Os01g0102500 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 141959 144554 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102500-01 N/A N/A gene:Os01g0102500 N/A N/A Os01t0102500-01 +1 irgsp exon 141959 142631 . 1 . N/A N/A 1 N/A 2 -1 Os01t0102500-01.exon1 N/A Os01t0102500-01.exon1 N/A Os01t0102500-01.exon1 transcript:Os01t0102500-01 N/A 1 N/A +1 irgsp exon 143191 143431 . 1 . N/A N/A 1 N/A 0 2 Os01t0102500-01.exon2 N/A Os01t0102500-01.exon2 N/A Os01t0102500-01.exon2 transcript:Os01t0102500-01 N/A 2 N/A +1 irgsp exon 143563 143680 . 1 . N/A N/A 1 N/A 1 0 Os01t0102500-01.exon3 N/A Os01t0102500-01.exon3 N/A Os01t0102500-01.exon3 transcript:Os01t0102500-01 N/A 3 N/A +1 irgsp exon 143817 144554 . 1 . N/A N/A 1 N/A -1 1 Os01t0102500-01.exon4 N/A Os01t0102500-01.exon4 N/A Os01t0102500-01.exon4 transcript:Os01t0102500-01 N/A 4 N/A +1 irgsp CDS 142084 142631 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102500-01 N/A N/A transcript:Os01t0102500-01 Os01t0102500-01 N/A N/A +1 irgsp CDS 143191 143431 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102500-01 N/A N/A transcript:Os01t0102500-01 Os01t0102500-01 N/A N/A +1 irgsp CDS 143563 143680 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102500-01 N/A N/A transcript:Os01t0102500-01 Os01t0102500-01 N/A N/A +1 irgsp CDS 143817 143908 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102500-01 N/A N/A transcript:Os01t0102500-01 Os01t0102500-01 N/A N/A +1 irgsp five_prime_UTR 141959 142083 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-29 N/A N/A transcript:Os01t0102500-01 N/A N/A N/A +1 irgsp three_prime_UTR 143909 144554 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-28 N/A N/A transcript:Os01t0102500-01 N/A N/A N/A +1 irgsp gene 145603 147847 . 1 . N/A protein_coding N/A Shikimate kinase domain containing protein. (Os01t0102600-01);Similar to shikimate kinase family protein. (Os01t0102600-02) N/A N/A N/A Os01g0102600 gene:Os01g0102600 irgspv1.0-20170804-genes Shikimate kinase 4 N/A N/A N/A N/A +1 irgsp mRNA 145603 147847 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102600-01 N/A N/A gene:Os01g0102600 N/A N/A Os01t0102600-01 +1 irgsp exon 145603 145786 . 1 . N/A N/A 0 N/A 1 -1 Os01t0102600-01.exon1 N/A Os01t0102600-01.exon1 N/A Os01t0102600-01.exon1 transcript:Os01t0102600-01 N/A 1 N/A +1 irgsp exon 145905 145951 . 1 . N/A N/A 0 N/A 0 1 Os01t0102600-01.exon2 N/A Os01t0102600-01.exon2 N/A Os01t0102600-01.exon2 transcript:Os01t0102600-01 N/A 2 N/A +1 irgsp exon 146028 146082 . 1 . N/A N/A 0 N/A 1 0 Os01t0102600-01.exon3 N/A Os01t0102600-01.exon3 N/A Os01t0102600-01.exon3 transcript:Os01t0102600-01 N/A 3 N/A +1 irgsp exon 146179 146339 . 1 . N/A N/A 0 N/A 0 1 Os01t0102600-01.exon4 N/A Os01t0102600-01.exon4 N/A Os01t0102600-01.exon4 transcript:Os01t0102600-01 N/A 4 N/A +1 irgsp exon 146450 146532 . 1 . N/A N/A 0 N/A 2 0 Os01t0102600-01.exon5 N/A Os01t0102600-01.exon5 N/A Os01t0102600-01.exon5 transcript:Os01t0102600-01 N/A 5 N/A +1 irgsp exon 146611 146719 . 1 . N/A N/A 0 N/A 0 2 Os01t0102600-01.exon6 N/A Os01t0102600-01.exon6 N/A Os01t0102600-01.exon6 transcript:Os01t0102600-01 N/A 6 N/A +1 irgsp exon 147106 147184 . 1 . N/A N/A 0 N/A 1 0 Os01t0102600-01.exon7 N/A Os01t0102600-01.exon7 N/A Os01t0102600-01.exon7 transcript:Os01t0102600-01 N/A 7 N/A +1 irgsp exon 147311 147375 . 1 . N/A N/A 1 N/A 0 1 Os01t0102600-02.exon2 N/A Os01t0102600-02.exon2 N/A Os01t0102600-02.exon2 transcript:Os01t0102600-01 N/A 8 N/A +1 irgsp exon 147507 147847 . 1 . N/A N/A 0 N/A -1 0 Os01t0102600-01.exon9 N/A Os01t0102600-01.exon9 N/A Os01t0102600-01.exon9 transcript:Os01t0102600-01 N/A 9 N/A +1 irgsp CDS 145645 145786 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 145905 145951 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 146028 146082 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 146179 146339 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 146450 146532 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 146611 146719 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 147106 147184 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 147311 147375 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp CDS 147507 147575 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-01 N/A N/A transcript:Os01t0102600-01 Os01t0102600-01 N/A N/A +1 irgsp five_prime_UTR 145603 145644 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-30 N/A N/A transcript:Os01t0102600-01 N/A N/A N/A +1 irgsp three_prime_UTR 147576 147847 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-29 N/A N/A transcript:Os01t0102600-01 N/A N/A N/A +1 irgsp mRNA 147104 147805 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102600-02 N/A N/A gene:Os01g0102600 N/A N/A Os01t0102600-02 +1 irgsp exon 147104 147184 . 1 . N/A N/A 0 N/A 1 -1 Os01t0102600-02.exon1 N/A Os01t0102600-02.exon1 N/A Os01t0102600-02.exon1 transcript:Os01t0102600-02 N/A 1 N/A +1 irgsp exon 147311 147375 . 1 . N/A N/A 1 N/A 0 1 Os01t0102600-02.exon2 N/A agat-exon-4 N/A Os01t0102600-02.exon2 transcript:Os01t0102600-02 N/A 2 N/A +1 irgsp exon 147507 147805 . 1 . N/A N/A 0 N/A -1 0 Os01t0102600-02.exon3 N/A Os01t0102600-02.exon3 N/A Os01t0102600-02.exon3 transcript:Os01t0102600-02 N/A 3 N/A +1 irgsp CDS 147106 147184 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-02 N/A N/A transcript:Os01t0102600-02 Os01t0102600-02 N/A N/A +1 irgsp CDS 147311 147375 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-02 N/A N/A transcript:Os01t0102600-02 Os01t0102600-02 N/A N/A +1 irgsp CDS 147507 147575 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102600-02 N/A N/A transcript:Os01t0102600-02 Os01t0102600-02 N/A N/A +1 irgsp five_prime_UTR 147104 147105 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-31 N/A N/A transcript:Os01t0102600-02 N/A N/A N/A +1 irgsp three_prime_UTR 147576 147805 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-30 N/A N/A transcript:Os01t0102600-02 N/A N/A N/A +1 irgsp gene 148085 150568 . 1 . N/A protein_coding N/A Translocon-associated beta family protein. (Os01t0102700-01) N/A N/A N/A Os01g0102700 gene:Os01g0102700 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 148085 150568 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102700-01 N/A N/A gene:Os01g0102700 N/A N/A Os01t0102700-01 +1 irgsp exon 148085 148313 . 1 . N/A N/A 1 N/A 2 -1 Os01t0102700-01.exon1 N/A Os01t0102700-01.exon1 N/A Os01t0102700-01.exon1 transcript:Os01t0102700-01 N/A 1 N/A +1 irgsp exon 149450 149548 . 1 . N/A N/A 1 N/A 2 2 Os01t0102700-01.exon2 N/A Os01t0102700-01.exon2 N/A Os01t0102700-01.exon2 transcript:Os01t0102700-01 N/A 2 N/A +1 irgsp exon 149634 149742 . 1 . N/A N/A 1 N/A 0 2 Os01t0102700-01.exon3 N/A Os01t0102700-01.exon3 N/A Os01t0102700-01.exon3 transcript:Os01t0102700-01 N/A 3 N/A +1 irgsp exon 149856 149931 . 1 . N/A N/A 1 N/A 1 0 Os01t0102700-01.exon4 N/A Os01t0102700-01.exon4 N/A Os01t0102700-01.exon4 transcript:Os01t0102700-01 N/A 4 N/A +1 irgsp exon 150152 150568 . 1 . N/A N/A 1 N/A -1 1 Os01t0102700-01.exon5 N/A Os01t0102700-01.exon5 N/A Os01t0102700-01.exon5 transcript:Os01t0102700-01 N/A 5 N/A +1 irgsp CDS 148147 148313 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102700-01 N/A N/A transcript:Os01t0102700-01 Os01t0102700-01 N/A N/A +1 irgsp CDS 149450 149548 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102700-01 N/A N/A transcript:Os01t0102700-01 Os01t0102700-01 N/A N/A +1 irgsp CDS 149634 149742 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102700-01 N/A N/A transcript:Os01t0102700-01 Os01t0102700-01 N/A N/A +1 irgsp CDS 149856 149931 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102700-01 N/A N/A transcript:Os01t0102700-01 Os01t0102700-01 N/A N/A +1 irgsp CDS 150152 150318 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102700-01 N/A N/A transcript:Os01t0102700-01 Os01t0102700-01 N/A N/A +1 irgsp five_prime_UTR 148085 148146 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-32 N/A N/A transcript:Os01t0102700-01 N/A N/A N/A +1 irgsp three_prime_UTR 150319 150568 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-31 N/A N/A transcript:Os01t0102700-01 N/A N/A N/A +1 irgsp gene 152853 156449 . 1 . N/A protein_coding N/A Similar to chromatin remodeling complex subunit. (Os01t0102800-01) N/A N/A N/A Os01g0102800 gene:Os01g0102800 irgspv1.0-20170804-genes Cockayne syndrome WD-repeat protein N/A N/A N/A N/A +1 irgsp mRNA 152853 156449 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102800-01 N/A N/A gene:Os01g0102800 N/A N/A Os01t0102800-01 +1 irgsp exon 152853 153025 . 1 . N/A N/A 1 N/A 1 -1 Os01t0102800-01.exon1 N/A Os01t0102800-01.exon1 N/A Os01t0102800-01.exon1 transcript:Os01t0102800-01 N/A 1 N/A +1 irgsp exon 153178 154646 . 1 . N/A N/A 1 N/A 0 1 Os01t0102800-01.exon2 N/A Os01t0102800-01.exon2 N/A Os01t0102800-01.exon2 transcript:Os01t0102800-01 N/A 2 N/A +1 irgsp exon 155010 155450 . 1 . N/A N/A 1 N/A 0 0 Os01t0102800-01.exon3 N/A Os01t0102800-01.exon3 N/A Os01t0102800-01.exon3 transcript:Os01t0102800-01 N/A 3 N/A +1 irgsp exon 155543 156449 . 1 . N/A N/A 1 N/A -1 0 Os01t0102800-01.exon4 N/A Os01t0102800-01.exon4 N/A Os01t0102800-01.exon4 transcript:Os01t0102800-01 N/A 4 N/A +1 irgsp CDS 152854 153025 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102800-01 N/A N/A transcript:Os01t0102800-01 Os01t0102800-01 N/A N/A +1 irgsp CDS 153178 154646 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102800-01 N/A N/A transcript:Os01t0102800-01 Os01t0102800-01 N/A N/A +1 irgsp CDS 155010 155450 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102800-01 N/A N/A transcript:Os01t0102800-01 Os01t0102800-01 N/A N/A +1 irgsp CDS 155543 156214 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102800-01 N/A N/A transcript:Os01t0102800-01 Os01t0102800-01 N/A N/A +1 irgsp five_prime_UTR 152853 152853 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-33 N/A N/A transcript:Os01t0102800-01 N/A N/A N/A +1 irgsp three_prime_UTR 156215 156449 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-32 N/A N/A transcript:Os01t0102800-01 N/A N/A N/A +1 irgsp gene 164577 168921 . 1 . N/A protein_coding N/A Similar to nitrilase 2. (Os01t0102850-00) N/A N/A N/A Os01g0102850 gene:Os01g0102850 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 164577 168921 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102850-00 N/A N/A gene:Os01g0102850 N/A N/A Os01t0102850-00 +1 irgsp exon 164577 164905 . 1 . N/A N/A 1 N/A -1 -1 Os01t0102850-00.exon1 N/A Os01t0102850-00.exon1 N/A Os01t0102850-00.exon1 transcript:Os01t0102850-00 N/A 1 N/A +1 irgsp exon 168499 168921 . 1 . N/A N/A 1 N/A 0 -1 Os01t0102850-00.exon2 N/A Os01t0102850-00.exon2 N/A Os01t0102850-00.exon2 transcript:Os01t0102850-00 N/A 2 N/A +1 irgsp CDS 168805 168921 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102850-00 N/A N/A transcript:Os01t0102850-00 Os01t0102850-00 N/A N/A +1 irgsp five_prime_UTR 164577 164905 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-34 N/A N/A transcript:Os01t0102850-00 N/A N/A N/A +1 irgsp five_prime_UTR 168499 168804 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-35 N/A N/A transcript:Os01t0102850-00 N/A N/A N/A +1 irgsp gene 169390 170316 . -1 . N/A protein_coding N/A Light-regulated protein, Regulation of light-dependent attachment of LEAF-TYPE FERREDOXIN-NADP+ OXIDOREDUCTASE (LFNR) to the thylakoid membrane (Os01t0102900-01) N/A N/A N/A Os01g0102900 gene:Os01g0102900 irgspv1.0-20170804-genes LIGHT-REGULATED GENE 1 N/A N/A N/A N/A +1 irgsp mRNA 169390 170316 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0102900-01 N/A N/A gene:Os01g0102900 N/A N/A Os01t0102900-01 +1 irgsp exon 169390 169656 . -1 . N/A N/A 1 N/A -1 2 Os01t0102900-01.exon3 N/A Os01t0102900-01.exon3 N/A Os01t0102900-01.exon3 transcript:Os01t0102900-01 N/A 3 N/A +1 irgsp exon 169751 169909 . -1 . N/A N/A 1 N/A 2 2 Os01t0102900-01.exon2 N/A Os01t0102900-01.exon2 N/A Os01t0102900-01.exon2 transcript:Os01t0102900-01 N/A 2 N/A +1 irgsp exon 170091 170316 . -1 . N/A N/A 1 N/A 2 -1 Os01t0102900-01.exon1 N/A Os01t0102900-01.exon1 N/A Os01t0102900-01.exon1 transcript:Os01t0102900-01 N/A 1 N/A +1 irgsp CDS 169599 169656 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102900-01 N/A N/A transcript:Os01t0102900-01 Os01t0102900-01 N/A N/A +1 irgsp CDS 169751 169909 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102900-01 N/A N/A transcript:Os01t0102900-01 Os01t0102900-01 N/A N/A +1 irgsp CDS 170091 170260 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0102900-01 N/A N/A transcript:Os01t0102900-01 Os01t0102900-01 N/A N/A +1 irgsp five_prime_UTR 170261 170316 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-36 N/A N/A transcript:Os01t0102900-01 N/A N/A N/A +1 irgsp three_prime_UTR 169390 169598 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-33 N/A N/A transcript:Os01t0102900-01 N/A N/A N/A +1 irgsp gene 170798 173144 . -1 . N/A protein_coding N/A Snf7 family protein. (Os01t0103000-01) N/A N/A N/A Os01g0103000 gene:Os01g0103000 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 170798 173144 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103000-01 N/A N/A gene:Os01g0103000 N/A N/A Os01t0103000-01 +1 irgsp exon 170798 171095 . -1 . N/A N/A 1 N/A -1 0 Os01t0103000-01.exon7 N/A Os01t0103000-01.exon7 N/A Os01t0103000-01.exon7 transcript:Os01t0103000-01 N/A 7 N/A +1 irgsp exon 171406 171554 . -1 . N/A N/A 1 N/A 0 1 Os01t0103000-01.exon6 N/A Os01t0103000-01.exon6 N/A Os01t0103000-01.exon6 transcript:Os01t0103000-01 N/A 6 N/A +1 irgsp exon 171764 171875 . -1 . N/A N/A 1 N/A 1 0 Os01t0103000-01.exon5 N/A Os01t0103000-01.exon5 N/A Os01t0103000-01.exon5 transcript:Os01t0103000-01 N/A 5 N/A +1 irgsp exon 172398 172469 . -1 . N/A N/A 1 N/A 0 0 Os01t0103000-01.exon4 N/A Os01t0103000-01.exon4 N/A Os01t0103000-01.exon4 transcript:Os01t0103000-01 N/A 4 N/A +1 irgsp exon 172578 172671 . -1 . N/A N/A 1 N/A 0 2 Os01t0103000-01.exon3 N/A Os01t0103000-01.exon3 N/A Os01t0103000-01.exon3 transcript:Os01t0103000-01 N/A 3 N/A +1 irgsp exon 172770 172921 . -1 . N/A N/A 1 N/A 2 0 Os01t0103000-01.exon2 N/A Os01t0103000-01.exon2 N/A Os01t0103000-01.exon2 transcript:Os01t0103000-01 N/A 2 N/A +1 irgsp exon 173004 173144 . -1 . N/A N/A 1 N/A 0 -1 Os01t0103000-01.exon1 N/A Os01t0103000-01.exon1 N/A Os01t0103000-01.exon1 transcript:Os01t0103000-01 N/A 1 N/A +1 irgsp CDS 171045 171095 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 171406 171554 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 171764 171875 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 172398 172469 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 172578 172671 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 172770 172921 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp CDS 173004 173072 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103000-01 N/A N/A transcript:Os01t0103000-01 Os01t0103000-01 N/A N/A +1 irgsp five_prime_UTR 173073 173144 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-37 N/A N/A transcript:Os01t0103000-01 N/A N/A N/A +1 irgsp three_prime_UTR 170798 171044 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-34 N/A N/A transcript:Os01t0103000-01 N/A N/A N/A +1 irgsp gene 178607 180575 . 1 . N/A protein_coding N/A TGF-beta receptor, type I/II extracellular region family protein. (Os01t0103100-01);Similar to predicted protein. (Os01t0103100-02) N/A N/A N/A Os01g0103100 gene:Os01g0103100 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 178607 180548 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103100-01 N/A N/A gene:Os01g0103100 N/A N/A Os01t0103100-01 +1 irgsp exon 178607 180548 . 1 . N/A N/A 0 N/A -1 -1 Os01t0103100-01.exon1 N/A Os01t0103100-01.exon1 N/A Os01t0103100-01.exon1 transcript:Os01t0103100-01 N/A 1 N/A +1 irgsp CDS 178642 180462 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103100-01 N/A N/A transcript:Os01t0103100-01 Os01t0103100-01 N/A N/A +1 irgsp five_prime_UTR 178607 178641 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-38 N/A N/A transcript:Os01t0103100-01 N/A N/A N/A +1 irgsp three_prime_UTR 180463 180548 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-35 N/A N/A transcript:Os01t0103100-01 N/A N/A N/A +1 irgsp mRNA 178652 180575 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103100-02 N/A N/A gene:Os01g0103100 N/A N/A Os01t0103100-02 +1 irgsp exon 178652 180575 . 1 . N/A N/A 0 N/A -1 -1 Os01t0103100-02.exon1 N/A Os01t0103100-02.exon1 N/A Os01t0103100-02.exon1 transcript:Os01t0103100-02 N/A 1 N/A +1 irgsp CDS 178678 180462 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103100-02 N/A N/A transcript:Os01t0103100-02 Os01t0103100-02 N/A N/A +1 irgsp five_prime_UTR 178652 178677 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-39 N/A N/A transcript:Os01t0103100-02 N/A N/A N/A +1 irgsp three_prime_UTR 180463 180575 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-36 N/A N/A transcript:Os01t0103100-02 N/A N/A N/A +1 irgsp gene 178815 180433 . -1 . N/A protein_coding N/A Hypothetical protein. (Os01t0103075-00) N/A N/A N/A Os01g0103075 gene:Os01g0103075 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 178815 180433 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103075-00 N/A N/A gene:Os01g0103075 N/A N/A Os01t0103075-00 +1 irgsp exon 178815 180433 . -1 . N/A N/A 1 N/A -1 -1 Os01t0103075-00.exon1 N/A Os01t0103075-00.exon1 N/A Os01t0103075-00.exon1 transcript:Os01t0103075-00 N/A 1 N/A +1 irgsp CDS 179512 180054 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103075-00 N/A N/A transcript:Os01t0103075-00 Os01t0103075-00 N/A N/A +1 irgsp five_prime_UTR 180055 180433 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-40 N/A N/A transcript:Os01t0103075-00 N/A N/A N/A +1 irgsp three_prime_UTR 178815 179511 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-37 N/A N/A transcript:Os01t0103075-00 N/A N/A N/A +1 Ensembl_Plants ncRNA_gene 182074 182154 . 1 . N/A tRNA N/A tRNA-Leu for anticodon AAG N/A N/A N/A ENSRNA049442722 gene:ENSRNA049442722 trnascan_gene tRNA-Leu N/A N/A N/A N/A +1 Ensembl_Plants tRNA 182074 182154 . 1 . N/A tRNA N/A N/A N/A N/A N/A N/A transcript:ENSRNA049442722-T1 N/A N/A gene:ENSRNA049442722 N/A N/A ENSRNA049442722-T1 +1 Ensembl_Plants exon 182074 182154 . 1 . N/A N/A 1 N/A -1 -1 ENSRNA049442722-E1 N/A ENSRNA049442722-E1 N/A ENSRNA049442722-E1 transcript:ENSRNA049442722-T1 N/A 1 N/A +1 irgsp gene 185189 185828 . -1 . N/A protein_coding N/A Hypothetical gene. (Os01t0103400-01) N/A N/A N/A Os01g0103400 gene:Os01g0103400 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 185189 185828 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103400-01 N/A N/A gene:Os01g0103400 N/A N/A Os01t0103400-01 +1 irgsp exon 185189 185828 . -1 . N/A N/A 1 N/A -1 -1 Os01t0103400-01.exon1 N/A Os01t0103400-01.exon1 N/A Os01t0103400-01.exon1 transcript:Os01t0103400-01 N/A 1 N/A +1 irgsp CDS 185435 185827 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103400-01 N/A N/A transcript:Os01t0103400-01 Os01t0103400-01 N/A N/A +1 irgsp five_prime_UTR 185828 185828 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-41 N/A N/A transcript:Os01t0103400-01 N/A N/A N/A +1 irgsp three_prime_UTR 185189 185434 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-38 N/A N/A transcript:Os01t0103400-01 N/A N/A N/A +1 irgsp repeat_region 186000 186100 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A fakeRepeat2 N/A N/A N/A N/A N/A N/A +1 irgsp gene 186250 190904 . -1 . N/A protein_coding N/A Similar to sterol-8,7-isomerase. (Os01t0103600-01);Emopamil-binding family protein. (Os01t0103600-02) N/A N/A N/A Os01g0103600 gene:Os01g0103600 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 186250 190262 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103600-02 N/A N/A gene:Os01g0103600 N/A N/A Os01t0103600-02 +1 irgsp exon 186250 186771 . -1 . N/A N/A 0 N/A -1 2 Os01t0103600-02.exon4 N/A Os01t0103600-02.exon4 N/A Os01t0103600-02.exon4 transcript:Os01t0103600-02 N/A 4 N/A +1 irgsp exon 189607 189715 . -1 . N/A N/A 0 N/A 2 1 Os01t0103600-02.exon3 N/A Os01t0103600-02.exon3 N/A Os01t0103600-02.exon3 transcript:Os01t0103600-02 N/A 3 N/A +1 irgsp exon 189841 189990 . -1 . N/A N/A 1 N/A 1 1 Os01t0103600-02.exon2 N/A Os01t0103600-02.exon2 N/A Os01t0103600-02.exon2 transcript:Os01t0103600-02 N/A 2 N/A +1 irgsp exon 190087 190262 . -1 . N/A N/A 0 N/A 1 -1 Os01t0103600-02.exon1 N/A Os01t0103600-02.exon1 N/A Os01t0103600-02.exon1 transcript:Os01t0103600-02 N/A 1 N/A +1 irgsp CDS 186516 186771 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-02 N/A N/A transcript:Os01t0103600-02 Os01t0103600-02 N/A N/A +1 irgsp CDS 189607 189715 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-02 N/A N/A transcript:Os01t0103600-02 Os01t0103600-02 N/A N/A +1 irgsp CDS 189841 189990 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-02 N/A N/A transcript:Os01t0103600-02 Os01t0103600-02 N/A N/A +1 irgsp CDS 190087 190231 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-02 N/A N/A transcript:Os01t0103600-02 Os01t0103600-02 N/A N/A +1 irgsp five_prime_UTR 190232 190262 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-42 N/A N/A transcript:Os01t0103600-02 N/A N/A N/A +1 irgsp three_prime_UTR 186250 186515 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-39 N/A N/A transcript:Os01t0103600-02 N/A N/A N/A +1 irgsp mRNA 187345 190904 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103600-01 N/A N/A gene:Os01g0103600 N/A N/A Os01t0103600-01 +1 irgsp exon 187345 189715 . -1 . N/A N/A 0 N/A -1 1 Os01t0103600-01.exon3 N/A Os01t0103600-01.exon3 N/A Os01t0103600-01.exon3 transcript:Os01t0103600-01 N/A 3 N/A +1 irgsp exon 189841 189990 . -1 . N/A N/A 1 N/A 1 1 Os01t0103600-02.exon2 N/A agat-exon-5 N/A Os01t0103600-02.exon2 transcript:Os01t0103600-01 N/A 2 N/A +1 irgsp exon 190087 190904 . -1 . N/A N/A 0 N/A 1 -1 Os01t0103600-01.exon1 N/A Os01t0103600-01.exon1 N/A Os01t0103600-01.exon1 transcript:Os01t0103600-01 N/A 1 N/A +1 irgsp CDS 189396 189715 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-01 N/A N/A transcript:Os01t0103600-01 Os01t0103600-01 N/A N/A +1 irgsp CDS 189841 189990 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-01 N/A N/A transcript:Os01t0103600-01 Os01t0103600-01 N/A N/A +1 irgsp CDS 190087 190231 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103600-01 N/A N/A transcript:Os01t0103600-01 Os01t0103600-01 N/A N/A +1 irgsp five_prime_UTR 190232 190904 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-43 N/A N/A transcript:Os01t0103600-01 N/A N/A N/A +1 irgsp three_prime_UTR 187345 189395 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-40 N/A N/A transcript:Os01t0103600-01 N/A N/A N/A +1 irgsp gene 187545 188586 . 1 . N/A protein_coding N/A Hypothetical gene. (Os01t0103650-00) N/A N/A N/A Os01g0103650 gene:Os01g0103650 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 187545 188586 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103650-00 N/A N/A gene:Os01g0103650 N/A N/A Os01t0103650-00 +1 irgsp exon 187545 188020 . 1 . N/A N/A 1 N/A -1 -1 Os01t0103650-00.exon1 N/A Os01t0103650-00.exon1 N/A Os01t0103650-00.exon1 transcript:Os01t0103650-00 N/A 1 N/A +1 irgsp exon 188060 188385 . 1 . N/A N/A 1 N/A -1 -1 Os01t0103650-00.exon2 N/A Os01t0103650-00.exon2 N/A Os01t0103650-00.exon2 transcript:Os01t0103650-00 N/A 2 N/A +1 irgsp exon 188455 188586 . 1 . N/A N/A 1 N/A -1 -1 Os01t0103650-00.exon3 N/A Os01t0103650-00.exon3 N/A Os01t0103650-00.exon3 transcript:Os01t0103650-00 N/A 3 N/A +1 irgsp CDS 187547 187768 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103650-00 N/A N/A transcript:Os01t0103650-00 Os01t0103650-00 N/A N/A +1 irgsp five_prime_UTR 187545 187546 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-44 N/A N/A transcript:Os01t0103650-00 N/A N/A N/A +1 irgsp three_prime_UTR 187769 188020 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-41 N/A N/A transcript:Os01t0103650-00 N/A N/A N/A +1 irgsp three_prime_UTR 188060 188385 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-42 N/A N/A transcript:Os01t0103650-00 N/A N/A N/A +1 irgsp three_prime_UTR 188455 188586 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-43 N/A N/A transcript:Os01t0103650-00 N/A N/A N/A +1 irgsp gene 191037 196287 . 1 . N/A protein_coding N/A Conserved hypothetical protein. (Os01t0103700-01) N/A N/A N/A Os01g0103700 gene:Os01g0103700 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 191037 196287 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103700-01 N/A N/A gene:Os01g0103700 N/A N/A Os01t0103700-01 +1 irgsp exon 191037 191161 . 1 . N/A N/A 1 N/A -1 -1 Os01t0103700-01.exon1 N/A Os01t0103700-01.exon1 N/A Os01t0103700-01.exon1 transcript:Os01t0103700-01 N/A 1 N/A +1 irgsp exon 191625 191705 . 1 . N/A N/A 1 N/A 0 -1 Os01t0103700-01.exon2 N/A Os01t0103700-01.exon2 N/A Os01t0103700-01.exon2 transcript:Os01t0103700-01 N/A 2 N/A +1 irgsp exon 192399 192506 . 1 . N/A N/A 1 N/A 0 0 Os01t0103700-01.exon3 N/A Os01t0103700-01.exon3 N/A Os01t0103700-01.exon3 transcript:Os01t0103700-01 N/A 3 N/A +1 irgsp exon 192958 193161 . 1 . N/A N/A 1 N/A 0 0 Os01t0103700-01.exon4 N/A Os01t0103700-01.exon4 N/A Os01t0103700-01.exon4 transcript:Os01t0103700-01 N/A 4 N/A +1 irgsp exon 193248 193356 . 1 . N/A N/A 1 N/A 1 0 Os01t0103700-01.exon5 N/A Os01t0103700-01.exon5 N/A Os01t0103700-01.exon5 transcript:Os01t0103700-01 N/A 5 N/A +1 irgsp exon 193434 196287 . 1 . N/A N/A 1 N/A -1 1 Os01t0103700-01.exon6 N/A Os01t0103700-01.exon6 N/A Os01t0103700-01.exon6 transcript:Os01t0103700-01 N/A 6 N/A +1 irgsp CDS 191694 191705 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103700-01 N/A N/A transcript:Os01t0103700-01 Os01t0103700-01 N/A N/A +1 irgsp CDS 192399 192506 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103700-01 N/A N/A transcript:Os01t0103700-01 Os01t0103700-01 N/A N/A +1 irgsp CDS 192958 193161 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103700-01 N/A N/A transcript:Os01t0103700-01 Os01t0103700-01 N/A N/A +1 irgsp CDS 193248 193356 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103700-01 N/A N/A transcript:Os01t0103700-01 Os01t0103700-01 N/A N/A +1 irgsp CDS 193434 193507 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103700-01 N/A N/A transcript:Os01t0103700-01 Os01t0103700-01 N/A N/A +1 irgsp five_prime_UTR 191037 191161 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-45 N/A N/A transcript:Os01t0103700-01 N/A N/A N/A +1 irgsp five_prime_UTR 191625 191693 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-46 N/A N/A transcript:Os01t0103700-01 N/A N/A N/A +1 irgsp three_prime_UTR 193508 196287 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-44 N/A N/A transcript:Os01t0103700-01 N/A N/A N/A +1 irgsp gene 197647 200803 . 1 . N/A protein_coding N/A Conserved hypothetical protein. (Os01t0103800-01) N/A N/A N/A Os01g0103800 gene:Os01g0103800 irgspv1.0-20170804-genes OsDW1-01g N/A N/A N/A N/A +1 irgsp mRNA 197647 200803 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103800-01 N/A N/A gene:Os01g0103800 N/A N/A Os01t0103800-01 +1 irgsp exon 197647 197838 . 1 . N/A N/A 1 N/A -1 -1 Os01t0103800-01.exon1 N/A Os01t0103800-01.exon1 N/A Os01t0103800-01.exon1 transcript:Os01t0103800-01 N/A 1 N/A +1 irgsp exon 198034 198225 . 1 . N/A N/A 1 N/A 0 -1 Os01t0103800-01.exon2 N/A Os01t0103800-01.exon2 N/A Os01t0103800-01.exon2 transcript:Os01t0103800-01 N/A 2 N/A +1 irgsp exon 198830 200036 . 1 . N/A N/A 1 N/A 1 0 Os01t0103800-01.exon3 N/A Os01t0103800-01.exon3 N/A Os01t0103800-01.exon3 transcript:Os01t0103800-01 N/A 3 N/A +1 irgsp exon 200253 200803 . 1 . N/A N/A 1 N/A -1 1 Os01t0103800-01.exon4 N/A Os01t0103800-01.exon4 N/A Os01t0103800-01.exon4 transcript:Os01t0103800-01 N/A 4 N/A +1 irgsp CDS 198130 198225 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103800-01 N/A N/A transcript:Os01t0103800-01 Os01t0103800-01 N/A N/A +1 irgsp CDS 198830 200036 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103800-01 N/A N/A transcript:Os01t0103800-01 Os01t0103800-01 N/A N/A +1 irgsp CDS 200253 200479 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103800-01 N/A N/A transcript:Os01t0103800-01 Os01t0103800-01 N/A N/A +1 irgsp five_prime_UTR 197647 197838 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-47 N/A N/A transcript:Os01t0103800-01 N/A N/A N/A +1 irgsp five_prime_UTR 198034 198129 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-48 N/A N/A transcript:Os01t0103800-01 N/A N/A N/A +1 irgsp three_prime_UTR 200480 200803 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-45 N/A N/A transcript:Os01t0103800-01 N/A N/A N/A +1 irgsp gene 201944 206202 . 1 . N/A protein_coding N/A Polynucleotidyl transferase, Ribonuclease H fold domain containing protein. (Os01t0103900-01) N/A N/A N/A Os01g0103900 gene:Os01g0103900 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 201944 206202 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0103900-01 N/A N/A gene:Os01g0103900 N/A N/A Os01t0103900-01 +1 irgsp exon 201944 202110 . 1 . N/A N/A 1 N/A 0 -1 Os01t0103900-01.exon1 N/A Os01t0103900-01.exon1 N/A Os01t0103900-01.exon1 transcript:Os01t0103900-01 N/A 1 N/A +1 irgsp exon 202252 202359 . 1 . N/A N/A 1 N/A 0 0 Os01t0103900-01.exon2 N/A Os01t0103900-01.exon2 N/A Os01t0103900-01.exon2 transcript:Os01t0103900-01 N/A 2 N/A +1 irgsp exon 203007 203127 . 1 . N/A N/A 1 N/A 1 0 Os01t0103900-01.exon3 N/A Os01t0103900-01.exon3 N/A Os01t0103900-01.exon3 transcript:Os01t0103900-01 N/A 3 N/A +1 irgsp exon 203302 203429 . 1 . N/A N/A 1 N/A 0 1 Os01t0103900-01.exon4 N/A Os01t0103900-01.exon4 N/A Os01t0103900-01.exon4 transcript:Os01t0103900-01 N/A 4 N/A +1 irgsp exon 203511 203658 . 1 . N/A N/A 1 N/A 1 0 Os01t0103900-01.exon5 N/A Os01t0103900-01.exon5 N/A Os01t0103900-01.exon5 transcript:Os01t0103900-01 N/A 5 N/A +1 irgsp exon 203760 203938 . 1 . N/A N/A 1 N/A 0 1 Os01t0103900-01.exon6 N/A Os01t0103900-01.exon6 N/A Os01t0103900-01.exon6 transcript:Os01t0103900-01 N/A 6 N/A +1 irgsp exon 204203 204440 . 1 . N/A N/A 1 N/A 1 0 Os01t0103900-01.exon7 N/A Os01t0103900-01.exon7 N/A Os01t0103900-01.exon7 transcript:Os01t0103900-01 N/A 7 N/A +1 irgsp exon 204543 204635 . 1 . N/A N/A 1 N/A 1 1 Os01t0103900-01.exon8 N/A Os01t0103900-01.exon8 N/A Os01t0103900-01.exon8 transcript:Os01t0103900-01 N/A 8 N/A +1 irgsp exon 204730 204875 . 1 . N/A N/A 1 N/A 0 1 Os01t0103900-01.exon9 N/A Os01t0103900-01.exon9 N/A Os01t0103900-01.exon9 transcript:Os01t0103900-01 N/A 9 N/A +1 irgsp exon 205042 205149 . 1 . N/A N/A 1 N/A 0 0 Os01t0103900-01.exon10 N/A Os01t0103900-01.exon10 N/A Os01t0103900-01.exon10 transcript:Os01t0103900-01 N/A 10 N/A +1 irgsp exon 205290 205378 . 1 . N/A N/A 1 N/A 2 0 Os01t0103900-01.exon11 N/A Os01t0103900-01.exon11 N/A Os01t0103900-01.exon11 transcript:Os01t0103900-01 N/A 11 N/A +1 irgsp exon 205534 206202 . 1 . N/A N/A 1 N/A -1 2 Os01t0103900-01.exon12 N/A Os01t0103900-01.exon12 N/A Os01t0103900-01.exon12 transcript:Os01t0103900-01 N/A 12 N/A +1 irgsp CDS 202042 202110 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 202252 202359 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 203007 203127 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 203302 203429 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 203511 203658 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 203760 203938 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 204203 204440 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 204543 204635 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 204730 204875 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 205042 205149 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 205290 205378 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp CDS 205534 205543 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0103900-01 N/A N/A transcript:Os01t0103900-01 Os01t0103900-01 N/A N/A +1 irgsp five_prime_UTR 201944 202041 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-49 N/A N/A transcript:Os01t0103900-01 N/A N/A N/A +1 irgsp three_prime_UTR 205544 206202 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-46 N/A N/A transcript:Os01t0103900-01 N/A N/A N/A +1 irgsp gene 206131 209606 . -1 . N/A protein_coding N/A C-type lectin domain containing protein. (Os01t0104000-01);Similar to predicted protein. (Os01t0104000-02) N/A N/A N/A Os01g0104000 gene:Os01g0104000 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 206131 209581 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104000-02 N/A N/A gene:Os01g0104000 N/A N/A Os01t0104000-02 +1 irgsp exon 206131 207029 . -1 . N/A N/A 0 N/A -1 2 Os01t0104000-02.exon4 N/A Os01t0104000-02.exon4 N/A Os01t0104000-02.exon4 transcript:Os01t0104000-02 N/A 4 N/A +1 irgsp exon 207706 208273 . -1 . N/A N/A 0 N/A 2 1 Os01t0104000-02.exon3 N/A Os01t0104000-02.exon3 N/A Os01t0104000-02.exon3 transcript:Os01t0104000-02 N/A 3 N/A +1 irgsp exon 208408 208836 . -1 . N/A N/A 1 N/A 1 1 Os01t0104000-01.exon2 N/A Os01t0104000-01.exon2 N/A Os01t0104000-01.exon2 transcript:Os01t0104000-02 N/A 2 N/A +1 irgsp exon 209438 209581 . -1 . N/A N/A 0 N/A 1 -1 Os01t0104000-02.exon1 N/A Os01t0104000-02.exon1 N/A Os01t0104000-02.exon1 transcript:Os01t0104000-02 N/A 1 N/A +1 irgsp CDS 206450 207029 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-02 N/A N/A transcript:Os01t0104000-02 Os01t0104000-02 N/A N/A +1 irgsp CDS 207706 208273 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-02 N/A N/A transcript:Os01t0104000-02 Os01t0104000-02 N/A N/A +1 irgsp CDS 208408 208836 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-02 N/A N/A transcript:Os01t0104000-02 Os01t0104000-02 N/A N/A +1 irgsp CDS 209438 209525 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-02 N/A N/A transcript:Os01t0104000-02 Os01t0104000-02 N/A N/A +1 irgsp five_prime_UTR 209526 209581 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-50 N/A N/A transcript:Os01t0104000-02 N/A N/A N/A +1 irgsp three_prime_UTR 206131 206449 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-47 N/A N/A transcript:Os01t0104000-02 N/A N/A N/A +1 irgsp mRNA 206134 209606 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104000-01 N/A N/A gene:Os01g0104000 N/A N/A Os01t0104000-01 +1 irgsp exon 206134 207029 . -1 . N/A N/A 0 N/A -1 2 Os01t0104000-01.exon4 N/A Os01t0104000-01.exon4 N/A Os01t0104000-01.exon4 transcript:Os01t0104000-01 N/A 4 N/A +1 irgsp exon 207706 208276 . -1 . N/A N/A 0 N/A 2 1 Os01t0104000-01.exon3 N/A Os01t0104000-01.exon3 N/A Os01t0104000-01.exon3 transcript:Os01t0104000-01 N/A 3 N/A +1 irgsp exon 208408 208836 . -1 . N/A N/A 1 N/A 1 1 Os01t0104000-01.exon2 N/A agat-exon-6 N/A Os01t0104000-01.exon2 transcript:Os01t0104000-01 N/A 2 N/A +1 irgsp exon 209438 209606 . -1 . N/A N/A 0 N/A 1 -1 Os01t0104000-01.exon1 N/A Os01t0104000-01.exon1 N/A Os01t0104000-01.exon1 transcript:Os01t0104000-01 N/A 1 N/A +1 irgsp CDS 206450 207029 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-01 N/A N/A transcript:Os01t0104000-01 Os01t0104000-01 N/A N/A +1 irgsp CDS 207706 208276 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-01 N/A N/A transcript:Os01t0104000-01 Os01t0104000-01 N/A N/A +1 irgsp CDS 208408 208836 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-01 N/A N/A transcript:Os01t0104000-01 Os01t0104000-01 N/A N/A +1 irgsp CDS 209438 209525 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104000-01 N/A N/A transcript:Os01t0104000-01 Os01t0104000-01 N/A N/A +1 irgsp five_prime_UTR 209526 209606 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-51 N/A N/A transcript:Os01t0104000-01 N/A N/A N/A +1 irgsp three_prime_UTR 206134 206449 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-48 N/A N/A transcript:Os01t0104000-01 N/A N/A N/A +1 irgsp gene 209771 214173 . 1 . N/A protein_coding N/A Similar to protein binding / zinc ion binding. (Os01t0104100-01);Similar to protein binding / zinc ion binding. (Os01t0104100-02) N/A N/A N/A Os01g0104100 gene:Os01g0104100 irgspv1.0-20170804-genes cold-inducible, cold-inducible zinc finger protein N/A N/A N/A N/A +1 irgsp mRNA 209771 214173 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104100-01 N/A N/A gene:Os01g0104100 N/A N/A Os01t0104100-01 +1 irgsp exon 209771 209896 . 1 . N/A N/A 0 N/A 0 0 Os01t0104100-01.exon1 N/A Os01t0104100-01.exon1 N/A Os01t0104100-01.exon1 transcript:Os01t0104100-01 N/A 1 N/A +1 irgsp exon 210244 210563 . 1 . N/A N/A 1 N/A 2 0 Os01t0104100-01.exon2 N/A Os01t0104100-01.exon2 N/A Os01t0104100-01.exon2 transcript:Os01t0104100-01 N/A 2 N/A +1 irgsp exon 210659 210890 . 1 . N/A N/A 1 N/A 0 2 Os01t0104100-01.exon3 N/A Os01t0104100-01.exon3 N/A Os01t0104100-01.exon3 transcript:Os01t0104100-01 N/A 3 N/A +1 irgsp exon 211015 211160 . 1 . N/A N/A 1 N/A 2 0 Os01t0104100-01.exon4 N/A Os01t0104100-01.exon4 N/A Os01t0104100-01.exon4 transcript:Os01t0104100-01 N/A 4 N/A +1 irgsp exon 212265 212352 . 1 . N/A N/A 1 N/A 0 2 Os01t0104100-01.exon5 N/A Os01t0104100-01.exon5 N/A Os01t0104100-01.exon5 transcript:Os01t0104100-01 N/A 5 N/A +1 irgsp exon 212433 212579 . 1 . N/A N/A 1 N/A 0 0 Os01t0104100-01.exon6 N/A Os01t0104100-01.exon6 N/A Os01t0104100-01.exon6 transcript:Os01t0104100-01 N/A 6 N/A +1 irgsp exon 213490 213639 . 1 . N/A N/A 1 N/A 0 0 Os01t0104100-01.exon7 N/A Os01t0104100-01.exon7 N/A Os01t0104100-01.exon7 transcript:Os01t0104100-01 N/A 7 N/A +1 irgsp exon 213741 214173 . 1 . N/A N/A 0 N/A -1 0 Os01t0104100-01.exon8 N/A Os01t0104100-01.exon8 N/A Os01t0104100-01.exon8 transcript:Os01t0104100-01 N/A 8 N/A +1 irgsp CDS 209771 209896 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 210244 210563 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 210659 210890 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 211015 211160 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 212265 212352 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 212433 212579 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 213490 213639 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp CDS 213741 213788 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-01 N/A N/A transcript:Os01t0104100-01 Os01t0104100-01 N/A N/A +1 irgsp three_prime_UTR 213789 214173 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-49 N/A N/A transcript:Os01t0104100-01 N/A N/A N/A +1 irgsp mRNA 209794 214147 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104100-02 N/A N/A gene:Os01g0104100 N/A N/A Os01t0104100-02 +1 irgsp exon 209794 209896 . 1 . N/A N/A 0 N/A 0 -1 Os01t0104100-02.exon1 N/A Os01t0104100-02.exon1 N/A Os01t0104100-02.exon1 transcript:Os01t0104100-02 N/A 1 N/A +1 irgsp exon 210244 210563 . 1 . N/A N/A 1 N/A 2 0 Os01t0104100-01.exon2 N/A agat-exon-7 N/A Os01t0104100-01.exon2 transcript:Os01t0104100-02 N/A 2 N/A +1 irgsp exon 210659 210890 . 1 . N/A N/A 1 N/A 0 2 Os01t0104100-01.exon3 N/A agat-exon-8 N/A Os01t0104100-01.exon3 transcript:Os01t0104100-02 N/A 3 N/A +1 irgsp exon 211015 211160 . 1 . N/A N/A 1 N/A 2 0 Os01t0104100-01.exon4 N/A agat-exon-9 N/A Os01t0104100-01.exon4 transcript:Os01t0104100-02 N/A 4 N/A +1 irgsp exon 212265 212352 . 1 . N/A N/A 1 N/A 0 2 Os01t0104100-01.exon5 N/A agat-exon-10 N/A Os01t0104100-01.exon5 transcript:Os01t0104100-02 N/A 5 N/A +1 irgsp exon 212433 212579 . 1 . N/A N/A 1 N/A 0 0 Os01t0104100-01.exon6 N/A agat-exon-11 N/A Os01t0104100-01.exon6 transcript:Os01t0104100-02 N/A 6 N/A +1 irgsp exon 213490 213639 . 1 . N/A N/A 1 N/A 0 0 Os01t0104100-01.exon7 N/A agat-exon-12 N/A Os01t0104100-01.exon7 transcript:Os01t0104100-02 N/A 7 N/A +1 irgsp exon 213741 214147 . 1 . N/A N/A 0 N/A -1 0 Os01t0104100-02.exon8 N/A Os01t0104100-02.exon8 N/A Os01t0104100-02.exon8 transcript:Os01t0104100-02 N/A 8 N/A +1 irgsp CDS 209795 209896 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 210244 210563 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 210659 210890 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 211015 211160 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 212265 212352 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 212433 212579 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 213490 213639 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp CDS 213741 213788 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104100-02 N/A N/A transcript:Os01t0104100-02 Os01t0104100-02 N/A N/A +1 irgsp five_prime_UTR 209794 209794 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-52 N/A N/A transcript:Os01t0104100-02 N/A N/A N/A +1 irgsp three_prime_UTR 213789 214147 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-50 N/A N/A transcript:Os01t0104100-02 N/A N/A N/A +1 irgsp gene 216212 217345 . 1 . N/A protein_coding N/A No apical meristem (NAM) protein domain containing protein. (Os01t0104200-00) N/A N/A N/A Os01g0104200 gene:Os01g0104200 irgspv1.0-20170804-genes NAC DOMAIN-CONTAINING PROTEIN 16 N/A N/A N/A N/A +1 irgsp mRNA 216212 217345 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104200-00 N/A N/A gene:Os01g0104200 N/A N/A Os01t0104200-00 +1 irgsp exon 216212 216769 . 1 . N/A N/A 1 N/A 0 0 Os01t0104200-00.exon1 N/A Os01t0104200-00.exon1 N/A Os01t0104200-00.exon1 transcript:Os01t0104200-00 N/A 1 N/A +1 irgsp exon 216884 217345 . 1 . N/A N/A 1 N/A 0 0 Os01t0104200-00.exon2 N/A Os01t0104200-00.exon2 N/A Os01t0104200-00.exon2 transcript:Os01t0104200-00 N/A 2 N/A +1 irgsp CDS 216212 216769 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104200-00 N/A N/A transcript:Os01t0104200-00 Os01t0104200-00 N/A N/A +1 irgsp CDS 216884 217345 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104200-00 N/A N/A transcript:Os01t0104200-00 Os01t0104200-00 N/A N/A +1 irgsp gene 226897 229301 . 1 . N/A protein_coding N/A Ricin B-related lectin domain containing protein. (Os01t0104400-01);Ricin B-related lectin domain containing protein. (Os01t0104400-02);Ricin B-related lectin domain containing protein. (Os01t0104400-03) N/A N/A N/A Os01g0104400 gene:Os01g0104400 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 226897 229229 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104400-01 N/A N/A gene:Os01g0104400 N/A N/A Os01t0104400-01 +1 irgsp exon 226897 227634 . 1 . N/A N/A 0 N/A 0 -1 Os01t0104400-01.exon1 N/A Os01t0104400-01.exon1 N/A Os01t0104400-01.exon1 transcript:Os01t0104400-01 N/A 1 N/A +1 irgsp exon 227742 227864 . 1 . N/A N/A 1 N/A 0 0 Os01t0104400-03.exon2 N/A Os01t0104400-03.exon2 N/A Os01t0104400-03.exon2 transcript:Os01t0104400-01 N/A 2 N/A +1 irgsp exon 228557 228785 . 1 . N/A N/A 1 N/A 1 0 Os01t0104400-03.exon3 N/A Os01t0104400-03.exon3 N/A Os01t0104400-03.exon3 transcript:Os01t0104400-01 N/A 3 N/A +1 irgsp exon 228930 229229 . 1 . N/A N/A 0 N/A -1 1 Os01t0104400-01.exon4 N/A Os01t0104400-01.exon4 N/A Os01t0104400-01.exon4 transcript:Os01t0104400-01 N/A 4 N/A +1 irgsp CDS 227182 227634 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-01 N/A N/A transcript:Os01t0104400-01 Os01t0104400-01 N/A N/A +1 irgsp CDS 227742 227864 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-01 N/A N/A transcript:Os01t0104400-01 Os01t0104400-01 N/A N/A +1 irgsp CDS 228557 228785 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-01 N/A N/A transcript:Os01t0104400-01 Os01t0104400-01 N/A N/A +1 irgsp CDS 228930 228931 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-01 N/A N/A transcript:Os01t0104400-01 Os01t0104400-01 N/A N/A +1 irgsp five_prime_UTR 226897 227181 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-53 N/A N/A transcript:Os01t0104400-01 N/A N/A N/A +1 irgsp three_prime_UTR 228932 229229 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-51 N/A N/A transcript:Os01t0104400-01 N/A N/A N/A +1 irgsp mRNA 227139 229301 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104400-02 N/A N/A gene:Os01g0104400 N/A N/A Os01t0104400-02 +1 irgsp exon 227139 227634 . 1 . N/A N/A 0 N/A 0 -1 Os01t0104400-02.exon1 N/A Os01t0104400-02.exon1 N/A Os01t0104400-02.exon1 transcript:Os01t0104400-02 N/A 1 N/A +1 irgsp exon 227742 227864 . 1 . N/A N/A 1 N/A 0 0 Os01t0104400-03.exon2 N/A agat-exon-13 N/A Os01t0104400-03.exon2 transcript:Os01t0104400-02 N/A 2 N/A +1 irgsp exon 228557 228785 . 1 . N/A N/A 1 N/A 1 0 Os01t0104400-03.exon3 N/A agat-exon-14 N/A Os01t0104400-03.exon3 transcript:Os01t0104400-02 N/A 3 N/A +1 irgsp exon 228930 229301 . 1 . N/A N/A 0 N/A -1 1 Os01t0104400-02.exon4 N/A Os01t0104400-02.exon4 N/A Os01t0104400-02.exon4 transcript:Os01t0104400-02 N/A 4 N/A +1 irgsp CDS 227182 227634 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-02 N/A N/A transcript:Os01t0104400-02 Os01t0104400-02 N/A N/A +1 irgsp CDS 227742 227864 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-02 N/A N/A transcript:Os01t0104400-02 Os01t0104400-02 N/A N/A +1 irgsp CDS 228557 228785 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-02 N/A N/A transcript:Os01t0104400-02 Os01t0104400-02 N/A N/A +1 irgsp CDS 228930 228931 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-02 N/A N/A transcript:Os01t0104400-02 Os01t0104400-02 N/A N/A +1 irgsp five_prime_UTR 227139 227181 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-54 N/A N/A transcript:Os01t0104400-02 N/A N/A N/A +1 irgsp three_prime_UTR 228932 229301 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-52 N/A N/A transcript:Os01t0104400-02 N/A N/A N/A +1 irgsp mRNA 227179 229214 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104400-03 N/A N/A gene:Os01g0104400 N/A N/A Os01t0104400-03 +1 irgsp exon 227179 227634 . 1 . N/A N/A 0 N/A 0 -1 Os01t0104400-03.exon1 N/A Os01t0104400-03.exon1 N/A Os01t0104400-03.exon1 transcript:Os01t0104400-03 N/A 1 N/A +1 irgsp exon 227742 227864 . 1 . N/A N/A 1 N/A 0 0 Os01t0104400-03.exon2 N/A agat-exon-15 N/A Os01t0104400-03.exon2 transcript:Os01t0104400-03 N/A 2 N/A +1 irgsp exon 228557 228785 . 1 . N/A N/A 1 N/A 1 0 Os01t0104400-03.exon3 N/A agat-exon-16 N/A Os01t0104400-03.exon3 transcript:Os01t0104400-03 N/A 3 N/A +1 irgsp exon 228930 229214 . 1 . N/A N/A 0 N/A -1 1 Os01t0104400-03.exon4 N/A Os01t0104400-03.exon4 N/A Os01t0104400-03.exon4 transcript:Os01t0104400-03 N/A 4 N/A +1 irgsp CDS 227182 227634 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-03 N/A N/A transcript:Os01t0104400-03 Os01t0104400-03 N/A N/A +1 irgsp CDS 227742 227864 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-03 N/A N/A transcript:Os01t0104400-03 Os01t0104400-03 N/A N/A +1 irgsp CDS 228557 228785 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-03 N/A N/A transcript:Os01t0104400-03 Os01t0104400-03 N/A N/A +1 irgsp CDS 228930 228931 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104400-03 N/A N/A transcript:Os01t0104400-03 Os01t0104400-03 N/A N/A +1 irgsp five_prime_UTR 227179 227181 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-55 N/A N/A transcript:Os01t0104400-03 N/A N/A N/A +1 irgsp three_prime_UTR 228932 229214 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-53 N/A N/A transcript:Os01t0104400-03 N/A N/A N/A +1 irgsp gene 241680 243440 . 1 . N/A protein_coding N/A No apical meristem (NAM) protein domain containing protein. (Os01t0104500-01) N/A N/A N/A Os01g0104500 gene:Os01g0104500 irgspv1.0-20170804-genes NAC DOMAIN-CONTAINING PROTEIN 20 N/A N/A N/A N/A +1 irgsp mRNA 241680 243440 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104500-01 N/A N/A gene:Os01g0104500 N/A N/A Os01t0104500-01 +1 irgsp exon 241680 241702 . 1 . N/A N/A 1 N/A -1 -1 Os01t0104500-01.exon1 N/A Os01t0104500-01.exon1 N/A Os01t0104500-01.exon1 transcript:Os01t0104500-01 N/A 1 N/A +1 irgsp exon 241866 242091 . 1 . N/A N/A 1 N/A 1 -1 Os01t0104500-01.exon2 N/A Os01t0104500-01.exon2 N/A Os01t0104500-01.exon2 transcript:Os01t0104500-01 N/A 2 N/A +1 irgsp exon 242199 243440 . 1 . N/A N/A 1 N/A -1 1 Os01t0104500-01.exon3 N/A Os01t0104500-01.exon3 N/A Os01t0104500-01.exon3 transcript:Os01t0104500-01 N/A 3 N/A +1 irgsp CDS 241908 242091 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104500-01 N/A N/A transcript:Os01t0104500-01 Os01t0104500-01 N/A N/A +1 irgsp CDS 242199 242977 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104500-01 N/A N/A transcript:Os01t0104500-01 Os01t0104500-01 N/A N/A +1 irgsp five_prime_UTR 241680 241702 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-56 N/A N/A transcript:Os01t0104500-01 N/A N/A N/A +1 irgsp five_prime_UTR 241866 241907 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-57 N/A N/A transcript:Os01t0104500-01 N/A N/A N/A +1 irgsp three_prime_UTR 242978 243440 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-54 N/A N/A transcript:Os01t0104500-01 N/A N/A N/A +1 irgsp gene 248828 256872 . -1 . N/A protein_coding N/A Homolog of Arabidopsis DE-ETIOLATED1 (DET1), Modulation of the ABA signaling pathway and ABA biosynthesis, Regulation of chlorophyll content (Os01t0104600-01);Similar to Light-mediated development protein DET1 (Deetiolated1 homolog) (tDET1) (High pigmentation protein 2) (Protein dark green). (Os01t0104600-02) N/A N/A N/A Os01g0104600 gene:Os01g0104600 irgspv1.0-20170804-genes DE-ETIOLATED1 N/A N/A N/A N/A +1 irgsp mRNA 248828 256571 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104600-02 N/A N/A gene:Os01g0104600 N/A N/A Os01t0104600-02 +1 irgsp exon 248828 249107 . -1 . N/A N/A 1 N/A -1 1 Os01t0104600-01.exon11 N/A Os01t0104600-01.exon11 N/A Os01t0104600-01.exon11 transcript:Os01t0104600-02 N/A 11 N/A +1 irgsp exon 249369 249468 . -1 . N/A N/A 1 N/A 1 0 Os01t0104600-01.exon10 N/A Os01t0104600-01.exon10 N/A Os01t0104600-01.exon10 transcript:Os01t0104600-02 N/A 10 N/A +1 irgsp exon 249861 249956 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon9 N/A Os01t0104600-01.exon9 N/A Os01t0104600-01.exon9 transcript:Os01t0104600-02 N/A 9 N/A +1 irgsp exon 250617 250781 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon8 N/A Os01t0104600-01.exon8 N/A Os01t0104600-01.exon8 transcript:Os01t0104600-02 N/A 8 N/A +1 irgsp exon 250860 250940 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon7 N/A Os01t0104600-01.exon7 N/A Os01t0104600-01.exon7 transcript:Os01t0104600-02 N/A 7 N/A +1 irgsp exon 251026 251082 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon6 N/A Os01t0104600-01.exon6 N/A Os01t0104600-01.exon6 transcript:Os01t0104600-02 N/A 6 N/A +1 irgsp exon 251316 251384 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon5 N/A Os01t0104600-01.exon5 N/A Os01t0104600-01.exon5 transcript:Os01t0104600-02 N/A 5 N/A +1 irgsp exon 251695 251790 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon4 N/A Os01t0104600-01.exon4 N/A Os01t0104600-01.exon4 transcript:Os01t0104600-02 N/A 4 N/A +1 irgsp exon 255325 255553 . -1 . N/A N/A 1 N/A 0 2 Os01t0104600-01.exon3 N/A Os01t0104600-01.exon3 N/A Os01t0104600-01.exon3 transcript:Os01t0104600-02 N/A 3 N/A +1 irgsp exon 255674 256098 . -1 . N/A N/A 1 N/A 2 0 Os01t0104600-01.exon2 N/A Os01t0104600-01.exon2 N/A Os01t0104600-01.exon2 transcript:Os01t0104600-02 N/A 2 N/A +1 irgsp exon 256361 256571 . -1 . N/A N/A 0 N/A 0 -1 Os01t0104600-02.exon1 N/A Os01t0104600-02.exon1 N/A Os01t0104600-02.exon1 transcript:Os01t0104600-02 N/A 1 N/A +1 irgsp CDS 248971 249107 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 249369 249468 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 249861 249956 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 250617 250781 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 250860 250940 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 251026 251082 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 251316 251384 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 251695 251790 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 255325 255553 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 255674 256098 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp CDS 256361 256441 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-02 N/A N/A transcript:Os01t0104600-02 Os01t0104600-02 N/A N/A +1 irgsp five_prime_UTR 256442 256571 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-58 N/A N/A transcript:Os01t0104600-02 N/A N/A N/A +1 irgsp three_prime_UTR 248828 248970 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-55 N/A N/A transcript:Os01t0104600-02 N/A N/A N/A +1 irgsp mRNA 248828 256872 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104600-01 N/A N/A gene:Os01g0104600 N/A N/A Os01t0104600-01 +1 irgsp exon 248828 249107 . -1 . N/A N/A 1 N/A -1 1 Os01t0104600-01.exon11 N/A agat-exon-17 N/A Os01t0104600-01.exon11 transcript:Os01t0104600-01 N/A 11 N/A +1 irgsp exon 249369 249468 . -1 . N/A N/A 1 N/A 1 0 Os01t0104600-01.exon10 N/A agat-exon-18 N/A Os01t0104600-01.exon10 transcript:Os01t0104600-01 N/A 10 N/A +1 irgsp exon 249861 249956 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon9 N/A agat-exon-19 N/A Os01t0104600-01.exon9 transcript:Os01t0104600-01 N/A 9 N/A +1 irgsp exon 250617 250781 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon8 N/A agat-exon-20 N/A Os01t0104600-01.exon8 transcript:Os01t0104600-01 N/A 8 N/A +1 irgsp exon 250860 250940 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon7 N/A agat-exon-21 N/A Os01t0104600-01.exon7 transcript:Os01t0104600-01 N/A 7 N/A +1 irgsp exon 251026 251082 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon6 N/A agat-exon-22 N/A Os01t0104600-01.exon6 transcript:Os01t0104600-01 N/A 6 N/A +1 irgsp exon 251316 251384 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon5 N/A agat-exon-23 N/A Os01t0104600-01.exon5 transcript:Os01t0104600-01 N/A 5 N/A +1 irgsp exon 251695 251790 . -1 . N/A N/A 1 N/A 0 0 Os01t0104600-01.exon4 N/A agat-exon-24 N/A Os01t0104600-01.exon4 transcript:Os01t0104600-01 N/A 4 N/A +1 irgsp exon 255325 255553 . -1 . N/A N/A 1 N/A 0 2 Os01t0104600-01.exon3 N/A agat-exon-25 N/A Os01t0104600-01.exon3 transcript:Os01t0104600-01 N/A 3 N/A +1 irgsp exon 255674 256098 . -1 . N/A N/A 1 N/A 2 0 Os01t0104600-01.exon2 N/A agat-exon-26 N/A Os01t0104600-01.exon2 transcript:Os01t0104600-01 N/A 2 N/A +1 irgsp exon 256361 256872 . -1 . N/A N/A 0 N/A 0 -1 Os01t0104600-01.exon1 N/A Os01t0104600-01.exon1 N/A Os01t0104600-01.exon1 transcript:Os01t0104600-01 N/A 1 N/A +1 irgsp CDS 248971 249107 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 249369 249468 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 249861 249956 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 250617 250781 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 250860 250940 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 251026 251082 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 251316 251384 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 251695 251790 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 255325 255553 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 255674 256098 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp CDS 256361 256441 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104600-01 N/A N/A transcript:Os01t0104600-01 Os01t0104600-01 N/A N/A +1 irgsp five_prime_UTR 256442 256872 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-59 N/A N/A transcript:Os01t0104600-01 N/A N/A N/A +1 irgsp three_prime_UTR 248828 248970 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-56 N/A N/A transcript:Os01t0104600-01 N/A N/A N/A +1 irgsp gene 261530 268145 . 1 . N/A protein_coding N/A Sas10/Utp3 family protein. (Os01t0104800-01);Hypothetical conserved gene. (Os01t0104800-02) N/A N/A N/A Os01g0104800 gene:Os01g0104800 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 261530 268145 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104800-01 N/A N/A gene:Os01g0104800 N/A N/A Os01t0104800-01 +1 irgsp exon 261530 261661 . 1 . N/A N/A 0 N/A 1 -1 Os01t0104800-01.exon1 N/A Os01t0104800-01.exon1 N/A Os01t0104800-01.exon1 transcript:Os01t0104800-01 N/A 1 N/A +1 irgsp exon 261767 261805 . 1 . N/A N/A 0 N/A 1 1 Os01t0104800-01.exon2 N/A Os01t0104800-01.exon2 N/A Os01t0104800-01.exon2 transcript:Os01t0104800-01 N/A 2 N/A +1 irgsp exon 261895 261941 . 1 . N/A N/A 0 N/A 0 1 Os01t0104800-01.exon3 N/A Os01t0104800-01.exon3 N/A Os01t0104800-01.exon3 transcript:Os01t0104800-01 N/A 3 N/A +1 irgsp exon 262582 262681 . 1 . N/A N/A 0 N/A 1 0 Os01t0104800-01.exon4 N/A Os01t0104800-01.exon4 N/A Os01t0104800-01.exon4 transcript:Os01t0104800-01 N/A 4 N/A +1 irgsp exon 262925 263181 . 1 . N/A N/A 0 N/A 0 1 Os01t0104800-01.exon5 N/A Os01t0104800-01.exon5 N/A Os01t0104800-01.exon5 transcript:Os01t0104800-01 N/A 5 N/A +1 irgsp exon 263525 263640 . 1 . N/A N/A 0 N/A 2 0 Os01t0104800-01.exon6 N/A Os01t0104800-01.exon6 N/A Os01t0104800-01.exon6 transcript:Os01t0104800-01 N/A 6 N/A +1 irgsp exon 264014 264098 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon7 N/A Os01t0104800-01.exon7 N/A Os01t0104800-01.exon7 transcript:Os01t0104800-01 N/A 7 N/A +1 irgsp exon 265236 265415 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon8 N/A Os01t0104800-01.exon8 N/A Os01t0104800-01.exon8 transcript:Os01t0104800-01 N/A 8 N/A +1 irgsp exon 265506 265649 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon9 N/A Os01t0104800-01.exon9 N/A Os01t0104800-01.exon9 transcript:Os01t0104800-01 N/A 9 N/A +1 irgsp exon 265740 265817 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon10 N/A Os01t0104800-01.exon10 N/A Os01t0104800-01.exon10 transcript:Os01t0104800-01 N/A 10 N/A +1 irgsp exon 265909 266045 . 1 . N/A N/A 1 N/A 2 0 Os01t0104800-01.exon11 N/A Os01t0104800-01.exon11 N/A Os01t0104800-01.exon11 transcript:Os01t0104800-01 N/A 11 N/A +1 irgsp exon 266138 266246 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon12 N/A Os01t0104800-01.exon12 N/A Os01t0104800-01.exon12 transcript:Os01t0104800-01 N/A 12 N/A +1 irgsp exon 267237 267514 . 1 . N/A N/A 1 N/A 2 0 Os01t0104800-01.exon13 N/A Os01t0104800-01.exon13 N/A Os01t0104800-01.exon13 transcript:Os01t0104800-01 N/A 13 N/A +1 irgsp exon 267591 267657 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon14 N/A Os01t0104800-01.exon14 N/A Os01t0104800-01.exon14 transcript:Os01t0104800-01 N/A 14 N/A +1 irgsp exon 267734 267802 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon15 N/A Os01t0104800-01.exon15 N/A Os01t0104800-01.exon15 transcript:Os01t0104800-01 N/A 15 N/A +1 irgsp exon 267880 268145 . 1 . N/A N/A 0 N/A -1 0 Os01t0104800-01.exon16 N/A Os01t0104800-01.exon16 N/A Os01t0104800-01.exon16 transcript:Os01t0104800-01 N/A 16 N/A +1 irgsp CDS 261562 261661 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 261767 261805 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 261895 261941 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 262582 262681 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 262925 263181 . 1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 263525 263640 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 264014 264098 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 265236 265415 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 265506 265649 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 265740 265817 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 265909 266045 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 266138 266246 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 267237 267514 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 267591 267657 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 267734 267802 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp CDS 267880 268011 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-01 N/A N/A transcript:Os01t0104800-01 Os01t0104800-01 N/A N/A +1 irgsp five_prime_UTR 261530 261561 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-60 N/A N/A transcript:Os01t0104800-01 N/A N/A N/A +1 irgsp three_prime_UTR 268012 268145 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-57 N/A N/A transcript:Os01t0104800-01 N/A N/A N/A +1 irgsp mRNA 263523 268120 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104800-02 N/A N/A gene:Os01g0104800 N/A N/A Os01t0104800-02 +1 irgsp exon 263523 263640 . 1 . N/A N/A 0 N/A 2 -1 Os01t0104800-02.exon1 N/A Os01t0104800-02.exon1 N/A Os01t0104800-02.exon1 transcript:Os01t0104800-02 N/A 1 N/A +1 irgsp exon 264014 264098 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon7 N/A agat-exon-27 N/A Os01t0104800-01.exon7 transcript:Os01t0104800-02 N/A 2 N/A +1 irgsp exon 265236 265415 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon8 N/A agat-exon-28 N/A Os01t0104800-01.exon8 transcript:Os01t0104800-02 N/A 3 N/A +1 irgsp exon 265506 265649 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon9 N/A agat-exon-29 N/A Os01t0104800-01.exon9 transcript:Os01t0104800-02 N/A 4 N/A +1 irgsp exon 265740 265817 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon10 N/A agat-exon-30 N/A Os01t0104800-01.exon10 transcript:Os01t0104800-02 N/A 5 N/A +1 irgsp exon 265909 266045 . 1 . N/A N/A 1 N/A 2 0 Os01t0104800-01.exon11 N/A agat-exon-31 N/A Os01t0104800-01.exon11 transcript:Os01t0104800-02 N/A 6 N/A +1 irgsp exon 266138 266246 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon12 N/A agat-exon-32 N/A Os01t0104800-01.exon12 transcript:Os01t0104800-02 N/A 7 N/A +1 irgsp exon 267237 267514 . 1 . N/A N/A 1 N/A 2 0 Os01t0104800-01.exon13 N/A agat-exon-33 N/A Os01t0104800-01.exon13 transcript:Os01t0104800-02 N/A 8 N/A +1 irgsp exon 267591 267657 . 1 . N/A N/A 1 N/A 0 2 Os01t0104800-01.exon14 N/A agat-exon-34 N/A Os01t0104800-01.exon14 transcript:Os01t0104800-02 N/A 9 N/A +1 irgsp exon 267734 267802 . 1 . N/A N/A 1 N/A 0 0 Os01t0104800-01.exon15 N/A agat-exon-35 N/A Os01t0104800-01.exon15 transcript:Os01t0104800-02 N/A 10 N/A +1 irgsp exon 267880 268120 . 1 . N/A N/A 0 N/A -1 0 Os01t0104800-02.exon11 N/A Os01t0104800-02.exon11 N/A Os01t0104800-02.exon11 transcript:Os01t0104800-02 N/A 11 N/A +1 irgsp CDS 263525 263640 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 264014 264098 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 265236 265415 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 265506 265649 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 265740 265817 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 265909 266045 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 266138 266246 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 267237 267514 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 267591 267657 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 267734 267802 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp CDS 267880 268011 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104800-02 N/A N/A transcript:Os01t0104800-02 Os01t0104800-02 N/A N/A +1 irgsp five_prime_UTR 263523 263524 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-61 N/A N/A transcript:Os01t0104800-02 N/A N/A N/A +1 irgsp three_prime_UTR 268012 268120 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-58 N/A N/A transcript:Os01t0104800-02 N/A N/A N/A +1 irgsp gene 270179 275084 . -1 . N/A protein_coding N/A Transferase family protein. (Os01t0104900-01);Hypothetical conserved gene. (Os01t0104900-02) N/A N/A N/A Os01g0104900 gene:Os01g0104900 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 270179 275084 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104900-01 N/A N/A gene:Os01g0104900 N/A N/A Os01t0104900-01 +1 irgsp exon 270179 271333 . -1 . N/A N/A 0 N/A -1 0 Os01t0104900-01.exon2 N/A Os01t0104900-01.exon2 N/A Os01t0104900-01.exon2 transcript:Os01t0104900-01 N/A 2 N/A +1 irgsp exon 274529 275084 . -1 . N/A N/A 0 N/A 0 -1 Os01t0104900-01.exon1 N/A Os01t0104900-01.exon1 N/A Os01t0104900-01.exon1 transcript:Os01t0104900-01 N/A 1 N/A +1 irgsp CDS 270356 271333 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104900-01 N/A N/A transcript:Os01t0104900-01 Os01t0104900-01 N/A N/A +1 irgsp CDS 274529 274957 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104900-01 N/A N/A transcript:Os01t0104900-01 Os01t0104900-01 N/A N/A +1 irgsp five_prime_UTR 274958 275084 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-62 N/A N/A transcript:Os01t0104900-01 N/A N/A N/A +1 irgsp three_prime_UTR 270179 270355 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-59 N/A N/A transcript:Os01t0104900-01 N/A N/A N/A +1 irgsp mRNA 270250 271518 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0104900-02 N/A N/A gene:Os01g0104900 N/A N/A Os01t0104900-02 +1 irgsp exon 270250 271333 . -1 . N/A N/A 0 N/A -1 -1 Os01t0104900-02.exon2 N/A Os01t0104900-02.exon2 N/A Os01t0104900-02.exon2 transcript:Os01t0104900-02 N/A 2 N/A +1 irgsp exon 271457 271518 . -1 . N/A N/A 0 N/A -1 -1 Os01t0104900-02.exon1 N/A Os01t0104900-02.exon1 N/A Os01t0104900-02.exon1 transcript:Os01t0104900-02 N/A 1 N/A +1 irgsp CDS 270356 271309 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0104900-02 N/A N/A transcript:Os01t0104900-02 Os01t0104900-02 N/A N/A +1 irgsp five_prime_UTR 271310 271333 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-63 N/A N/A transcript:Os01t0104900-02 N/A N/A N/A +1 irgsp five_prime_UTR 271457 271518 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-64 N/A N/A transcript:Os01t0104900-02 N/A N/A N/A +1 irgsp three_prime_UTR 270250 270355 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-60 N/A N/A transcript:Os01t0104900-02 N/A N/A N/A +1 irgsp gene 284762 291892 . -1 . N/A protein_coding N/A Similar to HAT family dimerisation domain containing protein, expressed. (Os01t0105300-01) N/A N/A N/A Os01g0105300 gene:Os01g0105300 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 284762 291892 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0105300-01 N/A N/A gene:Os01g0105300 N/A N/A Os01t0105300-01 +1 irgsp exon 284762 287047 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105300-01.exon5 N/A Os01t0105300-01.exon5 N/A Os01t0105300-01.exon5 transcript:Os01t0105300-01 N/A 5 N/A +1 irgsp exon 291398 291436 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105300-01.exon4 N/A Os01t0105300-01.exon4 N/A Os01t0105300-01.exon4 transcript:Os01t0105300-01 N/A 4 N/A +1 irgsp exon 291520 291534 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105300-01.exon3 N/A Os01t0105300-01.exon3 N/A Os01t0105300-01.exon3 transcript:Os01t0105300-01 N/A 3 N/A +1 irgsp exon 291678 291738 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105300-01.exon2 N/A Os01t0105300-01.exon2 N/A Os01t0105300-01.exon2 transcript:Os01t0105300-01 N/A 2 N/A +1 irgsp exon 291838 291892 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105300-01.exon1 N/A Os01t0105300-01.exon1 N/A Os01t0105300-01.exon1 transcript:Os01t0105300-01 N/A 1 N/A +1 irgsp CDS 284931 285020 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105300-01 N/A N/A transcript:Os01t0105300-01 Os01t0105300-01 N/A N/A +1 irgsp five_prime_UTR 285021 287047 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-65 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp five_prime_UTR 291398 291436 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-66 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp five_prime_UTR 291520 291534 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-67 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp five_prime_UTR 291678 291738 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-68 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp five_prime_UTR 291838 291892 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-69 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp three_prime_UTR 284762 284930 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-61 N/A N/A transcript:Os01t0105300-01 N/A N/A N/A +1 irgsp gene 288372 292296 . 1 . N/A protein_coding N/A Similar to Kinesin heavy chain. (Os01t0105400-01) N/A N/A N/A Os01g0105400 gene:Os01g0105400 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 288372 292296 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0105400-01 N/A N/A gene:Os01g0105400 N/A N/A Os01t0105400-01 +1 irgsp exon 288372 288846 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon1 N/A Os01t0105400-01.exon1 N/A Os01t0105400-01.exon1 transcript:Os01t0105400-01 N/A 1 N/A +1 irgsp exon 288950 289116 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon2 N/A Os01t0105400-01.exon2 N/A Os01t0105400-01.exon2 transcript:Os01t0105400-01 N/A 2 N/A +1 irgsp exon 289202 289572 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon3 N/A Os01t0105400-01.exon3 N/A Os01t0105400-01.exon3 transcript:Os01t0105400-01 N/A 3 N/A +1 irgsp exon 289661 289830 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon4 N/A Os01t0105400-01.exon4 N/A Os01t0105400-01.exon4 transcript:Os01t0105400-01 N/A 4 N/A +1 irgsp exon 290395 290512 . 1 . N/A N/A 1 N/A 2 -1 Os01t0105400-01.exon5 N/A Os01t0105400-01.exon5 N/A Os01t0105400-01.exon5 transcript:Os01t0105400-01 N/A 5 N/A +1 irgsp exon 291372 291574 . 1 . N/A N/A 1 N/A -1 2 Os01t0105400-01.exon6 N/A Os01t0105400-01.exon6 N/A Os01t0105400-01.exon6 transcript:Os01t0105400-01 N/A 6 N/A +1 irgsp exon 291648 291779 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon7 N/A Os01t0105400-01.exon7 N/A Os01t0105400-01.exon7 transcript:Os01t0105400-01 N/A 7 N/A +1 irgsp exon 291859 291948 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon8 N/A Os01t0105400-01.exon8 N/A Os01t0105400-01.exon8 transcript:Os01t0105400-01 N/A 8 N/A +1 irgsp exon 292073 292296 . 1 . N/A N/A 1 N/A -1 -1 Os01t0105400-01.exon9 N/A Os01t0105400-01.exon9 N/A Os01t0105400-01.exon9 transcript:Os01t0105400-01 N/A 9 N/A +1 irgsp CDS 290433 290512 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105400-01 N/A N/A transcript:Os01t0105400-01 Os01t0105400-01 N/A N/A +1 irgsp CDS 291372 291558 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105400-01 N/A N/A transcript:Os01t0105400-01 Os01t0105400-01 N/A N/A +1 irgsp five_prime_UTR 288372 288846 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-70 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp five_prime_UTR 288950 289116 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-71 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp five_prime_UTR 289202 289572 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-72 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp five_prime_UTR 289661 289830 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-73 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp five_prime_UTR 290395 290432 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-74 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp three_prime_UTR 291559 291574 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-62 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp three_prime_UTR 291648 291779 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-63 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp three_prime_UTR 291859 291948 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-64 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp three_prime_UTR 292073 292296 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-65 N/A N/A transcript:Os01t0105400-01 N/A N/A N/A +1 irgsp gene 303233 306736 . 1 . N/A protein_coding N/A Basic helix-loop-helix dimerisation region bHLH domain containing protein. (Os01t0105700-01) N/A N/A N/A Os01g0105700 gene:Os01g0105700 irgspv1.0-20170804-genes basic helix-loop-helix protein 071 N/A N/A N/A N/A +1 irgsp mRNA 303233 306736 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0105700-01 N/A N/A gene:Os01g0105700 N/A N/A Os01t0105700-01 +1 irgsp exon 303233 303471 . 1 . N/A N/A 1 N/A 2 -1 Os01t0105700-01.exon1 N/A Os01t0105700-01.exon1 N/A Os01t0105700-01.exon1 transcript:Os01t0105700-01 N/A 1 N/A +1 irgsp exon 303981 304509 . 1 . N/A N/A 1 N/A 0 2 Os01t0105700-01.exon2 N/A Os01t0105700-01.exon2 N/A Os01t0105700-01.exon2 transcript:Os01t0105700-01 N/A 2 N/A +1 irgsp exon 305572 305718 . 1 . N/A N/A 1 N/A 0 0 Os01t0105700-01.exon3 N/A Os01t0105700-01.exon3 N/A Os01t0105700-01.exon3 transcript:Os01t0105700-01 N/A 3 N/A +1 irgsp exon 305834 305899 . 1 . N/A N/A 1 N/A 0 0 Os01t0105700-01.exon4 N/A Os01t0105700-01.exon4 N/A Os01t0105700-01.exon4 transcript:Os01t0105700-01 N/A 4 N/A +1 irgsp exon 305993 306058 . 1 . N/A N/A 1 N/A 0 0 Os01t0105700-01.exon5 N/A Os01t0105700-01.exon5 N/A Os01t0105700-01.exon5 transcript:Os01t0105700-01 N/A 5 N/A +1 irgsp exon 306171 306245 . 1 . N/A N/A 1 N/A 0 0 Os01t0105700-01.exon6 N/A Os01t0105700-01.exon6 N/A Os01t0105700-01.exon6 transcript:Os01t0105700-01 N/A 6 N/A +1 irgsp exon 306353 306736 . 1 . N/A N/A 1 N/A -1 0 Os01t0105700-01.exon7 N/A Os01t0105700-01.exon7 N/A Os01t0105700-01.exon7 transcript:Os01t0105700-01 N/A 7 N/A +1 irgsp CDS 303329 303471 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 303981 304509 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 305572 305718 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 305834 305899 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 305993 306058 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 306171 306245 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp CDS 306353 306493 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105700-01 N/A N/A transcript:Os01t0105700-01 Os01t0105700-01 N/A N/A +1 irgsp five_prime_UTR 303233 303328 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-75 N/A N/A transcript:Os01t0105700-01 N/A N/A N/A +1 irgsp three_prime_UTR 306494 306736 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-66 N/A N/A transcript:Os01t0105700-01 N/A N/A N/A +1 irgsp gene 306871 308842 . -1 . N/A protein_coding N/A Similar to Iron sulfur assembly protein 1. (Os01t0105800-01) N/A N/A N/A Os01g0105800 gene:Os01g0105800 irgspv1.0-20170804-genes IRON-SULFUR CLUSTER PROTEIN 9 N/A N/A N/A N/A +1 irgsp mRNA 306871 308842 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0105800-01 N/A N/A gene:Os01g0105800 N/A N/A Os01t0105800-01 +1 irgsp exon 306871 307217 . -1 . N/A N/A 1 N/A -1 2 Os01t0105800-01.exon4 N/A Os01t0105800-01.exon4 N/A Os01t0105800-01.exon4 transcript:Os01t0105800-01 N/A 4 N/A +1 irgsp exon 307296 307413 . -1 . N/A N/A 1 N/A 2 1 Os01t0105800-01.exon3 N/A Os01t0105800-01.exon3 N/A Os01t0105800-01.exon3 transcript:Os01t0105800-01 N/A 3 N/A +1 irgsp exon 308397 308626 . -1 . N/A N/A 1 N/A 1 -1 Os01t0105800-01.exon2 N/A Os01t0105800-01.exon2 N/A Os01t0105800-01.exon2 transcript:Os01t0105800-01 N/A 2 N/A +1 irgsp exon 308703 308842 . -1 . N/A N/A 1 N/A -1 -1 Os01t0105800-01.exon1 N/A Os01t0105800-01.exon1 N/A Os01t0105800-01.exon1 transcript:Os01t0105800-01 N/A 1 N/A +1 irgsp CDS 307124 307217 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105800-01 N/A N/A transcript:Os01t0105800-01 Os01t0105800-01 N/A N/A +1 irgsp CDS 307296 307413 . -1 2 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105800-01 N/A N/A transcript:Os01t0105800-01 Os01t0105800-01 N/A N/A +1 irgsp CDS 308397 308601 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105800-01 N/A N/A transcript:Os01t0105800-01 Os01t0105800-01 N/A N/A +1 irgsp five_prime_UTR 308602 308626 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-76 N/A N/A transcript:Os01t0105800-01 N/A N/A N/A +1 irgsp five_prime_UTR 308703 308842 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-77 N/A N/A transcript:Os01t0105800-01 N/A N/A N/A +1 irgsp three_prime_UTR 306871 307123 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-67 N/A N/A transcript:Os01t0105800-01 N/A N/A N/A +1 irgsp gene 309520 313170 . -1 . N/A protein_coding N/A Carbohydrate/purine kinase domain containing protein. (Os01t0105900-01) N/A N/A N/A Os01g0105900 gene:Os01g0105900 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 309520 313170 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0105900-01 N/A N/A gene:Os01g0105900 N/A N/A Os01t0105900-01 +1 irgsp exon 309520 310070 . -1 . N/A N/A 1 N/A -1 0 Os01t0105900-01.exon8 N/A Os01t0105900-01.exon8 N/A Os01t0105900-01.exon8 transcript:Os01t0105900-01 N/A 8 N/A +1 irgsp exon 310256 310367 . -1 . N/A N/A 1 N/A 0 2 Os01t0105900-01.exon7 N/A Os01t0105900-01.exon7 N/A Os01t0105900-01.exon7 transcript:Os01t0105900-01 N/A 7 N/A +1 irgsp exon 310455 310552 . -1 . N/A N/A 1 N/A 2 0 Os01t0105900-01.exon6 N/A Os01t0105900-01.exon6 N/A Os01t0105900-01.exon6 transcript:Os01t0105900-01 N/A 6 N/A +1 irgsp exon 310632 310739 . -1 . N/A N/A 1 N/A 0 0 Os01t0105900-01.exon5 N/A Os01t0105900-01.exon5 N/A Os01t0105900-01.exon5 transcript:Os01t0105900-01 N/A 5 N/A +1 irgsp exon 310880 310918 . -1 . N/A N/A 1 N/A 0 0 Os01t0105900-01.exon4 N/A Os01t0105900-01.exon4 N/A Os01t0105900-01.exon4 transcript:Os01t0105900-01 N/A 4 N/A +1 irgsp exon 311002 311073 . -1 . N/A N/A 1 N/A 0 0 Os01t0105900-01.exon3 N/A Os01t0105900-01.exon3 N/A Os01t0105900-01.exon3 transcript:Os01t0105900-01 N/A 3 N/A +1 irgsp exon 311163 311426 . -1 . N/A N/A 1 N/A 0 0 Os01t0105900-01.exon2 N/A Os01t0105900-01.exon2 N/A Os01t0105900-01.exon2 transcript:Os01t0105900-01 N/A 2 N/A +1 irgsp exon 312867 313170 . -1 . N/A N/A 1 N/A 0 -1 Os01t0105900-01.exon1 N/A Os01t0105900-01.exon1 N/A Os01t0105900-01.exon1 transcript:Os01t0105900-01 N/A 1 N/A +1 irgsp CDS 309822 310070 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 310256 310367 . -1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 310455 310552 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 310632 310739 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 310880 310918 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 311002 311073 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 311163 311426 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp CDS 312867 313064 . -1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0105900-01 N/A N/A transcript:Os01t0105900-01 Os01t0105900-01 N/A N/A +1 irgsp five_prime_UTR 313065 313170 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-78 N/A N/A transcript:Os01t0105900-01 N/A N/A N/A +1 irgsp three_prime_UTR 309520 309821 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-68 N/A N/A transcript:Os01t0105900-01 N/A N/A N/A +1 irgsp gene 319754 322205 . 1 . N/A protein_coding N/A Similar to RER1A protein (AtRER1A). (Os01t0106200-01) N/A N/A N/A Os01g0106200 gene:Os01g0106200 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 319754 322205 . 1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0106200-01 N/A N/A gene:Os01g0106200 N/A N/A Os01t0106200-01 +1 irgsp exon 319754 320236 . 1 . N/A N/A 1 N/A 2 -1 Os01t0106200-01.exon1 N/A Os01t0106200-01.exon1 N/A Os01t0106200-01.exon1 transcript:Os01t0106200-01 N/A 1 N/A +1 irgsp exon 321468 321648 . 1 . N/A N/A 1 N/A 0 2 Os01t0106200-01.exon2 N/A Os01t0106200-01.exon2 N/A Os01t0106200-01.exon2 transcript:Os01t0106200-01 N/A 2 N/A +1 irgsp exon 321928 322205 . 1 . N/A N/A 1 N/A -1 0 Os01t0106200-01.exon3 N/A Os01t0106200-01.exon3 N/A Os01t0106200-01.exon3 transcript:Os01t0106200-01 N/A 3 N/A +1 irgsp CDS 319875 320236 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0106200-01 N/A N/A transcript:Os01t0106200-01 Os01t0106200-01 N/A N/A +1 irgsp CDS 321468 321648 . 1 1 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0106200-01 N/A N/A transcript:Os01t0106200-01 Os01t0106200-01 N/A N/A +1 irgsp CDS 321928 321975 . 1 0 N/A N/A N/A N/A N/A N/A N/A N/A CDS:Os01t0106200-01 N/A N/A transcript:Os01t0106200-01 Os01t0106200-01 N/A N/A +1 irgsp five_prime_UTR 319754 319874 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-five_prime_utr-79 N/A N/A transcript:Os01t0106200-01 N/A N/A N/A +1 irgsp three_prime_UTR 321976 322205 . 1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-69 N/A N/A transcript:Os01t0106200-01 N/A N/A N/A +1 irgsp gene 322591 323923 . -1 . N/A protein_coding N/A Similar to Isoflavone reductase homolog IRL (EC 1.3.1.-). (Os01t0106300-01) N/A N/A N/A Os01g0106300 gene:Os01g0106300 irgspv1.0-20170804-genes N/A N/A N/A N/A N/A +1 irgsp mRNA 322591 323923 . -1 . N/A protein_coding N/A N/A N/A N/A N/A N/A transcript:Os01t0106300-01 N/A N/A gene:Os01g0106300 N/A N/A Os01t0106300-01 +1 irgsp exon 322591 323923 . -1 . N/A N/A 1 N/A -1 1 Os01t0106300-01.exon2 N/A Os01t0106300-01.exon2 N/A Os01t0106300-01.exon2 transcript:Os01t0106300-01 N/A 2 N/A +1 irgsp three_prime_UTR 322591 322809 . -1 . N/A N/A N/A N/A N/A N/A N/A N/A agat-three_prime_utr-70 N/A N/A transcript:Os01t0106300-01 N/A N/A N/A diff --git a/src/agat/agat_convert_sp_gff2tsv/test_data/script.sh b/src/agat/agat_convert_sp_gff2tsv/test_data/script.sh new file mode 100755 index 00000000..ba7ba143 --- /dev/null +++ b/src/agat/agat_convert_sp_gff2tsv/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/scripts_output/out/agat_convert_sp_gff2tsv_1.tsv src/agat/agat_convert_sp_gff2tsv/test_data +cp -r /tmp/agat_source/t/scripts_output/in/1.gff src/agat/agat_convert_sp_gff2tsv/test_data From 93d2e7882b4865a744bb3836390be0737ca06ab7 Mon Sep 17 00:00:00 2001 From: Leila011 Date: Tue, 13 Aug 2024 20:56:50 +0200 Subject: [PATCH 21/25] Add agat convert sp gxf2gxf (#103) * add help * add config * add run script * add test data and expected output + script to fetch them * add tests * add example to config * update changelog --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 + .../agat_convert_sp_gxf2gxf/config.vsh.yaml | 75 +++++++++++++++++++ src/agat/agat_convert_sp_gxf2gxf/help.txt | 73 ++++++++++++++++++ src/agat/agat_convert_sp_gxf2gxf/script.sh | 9 +++ src/agat/agat_convert_sp_gxf2gxf/test.sh | 28 +++++++ .../test_data/0_correct_output.gff | 36 +++++++++ .../test_data/0_test.gff | 36 +++++++++ .../test_data/script.sh | 10 +++ 8 files changed, 269 insertions(+) create mode 100644 src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml create mode 100644 src/agat/agat_convert_sp_gxf2gxf/help.txt create mode 100644 src/agat/agat_convert_sp_gxf2gxf/script.sh create mode 100644 src/agat/agat_convert_sp_gxf2gxf/test.sh create mode 100644 src/agat/agat_convert_sp_gxf2gxf/test_data/0_correct_output.gff create mode 100644 src/agat/agat_convert_sp_gxf2gxf/test_data/0_test.gff create mode 100755 src/agat/agat_convert_sp_gxf2gxf/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 9082149e..a8819766 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,8 @@ - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). +* `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml b/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml new file mode 100644 index 00000000..9e77b09d --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/config.vsh.yaml @@ -0,0 +1,75 @@ +name: agat_convert_sp_gxf2gxf +namespace: agat +description: | + This script fixes and/or standardizes any GTF/GFF file into full sorted + GTF/GFF file. It AGAT parser removes duplicate features, fixes + duplicated IDs, adds missing ID and/or Parent attributes, deflates + factorized attributes (attributes with several parents are duplicated + with uniq ID), add missing features when possible (e.g. add exon if only + CDS described, add UTR if CDS and exon described), fix feature locations + (e.g. check exon is embedded in the parent features mRNA, gene), etc... + + All AGAT's scripts with the _sp_ prefix use the AGAT parser, before to + perform any supplementary task. So, it is not necessary to run this + script prior the use of any other _sp_ script. +keywords: [gene annotations, GFF conversion] +links: + homepage: https://github.com/NBISweden/AGAT + documentation: https://agat.readthedocs.io/en/latest/tools/agat_convert_sp_gxf2gxf.html + issue_tracker: https://github.com/NBISweden/AGAT/issues + repository: https://github.com/NBISweden/AGAT +references: + doi: 10.5281/zenodo.3552717 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/leila_paquay.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --gxf + alternatives: [-g, --gtf, --gff] + description: | + String - Input GTF/GFF file. Compressed file with .gz extension is accepted. + type: file + required: true + direction: input + example: input.gff + - name: Outputs + arguments: + - name: --output + alternatives: [-o] + description: | + String - Output GFF file. If no output file is specified, the output will be written to STDOUT. + type: file + direction: output + required: true + example: output.gff + - name: Arguments + arguments: + - name: --config + alternatives: [-c] + description: | + String - Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the original agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently). + type: file + required: false + example: custom_agat_config.yaml +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - type: file + path: test_data +engines: + - type: docker + image: quay.io/biocontainers/agat:1.4.0--pl5321hdfd78af_0 + setup: + - type: docker + run: | + agat --version | sed 's/AGAT\s\(.*\)/agat: "\1"/' > /var/software_versions.txt +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gxf2gxf/help.txt b/src/agat/agat_convert_sp_gxf2gxf/help.txt new file mode 100644 index 00000000..7658c4ed --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/help.txt @@ -0,0 +1,73 @@ +```sh +agat_convert_sp_gxf2gxf.pl --help +``` + + ------------------------------------------------------------------------------ +| Another GFF Analysis Toolkit (AGAT) - Version: v1.4.0 | +| https://github.com/NBISweden/AGAT | +| National Bioinformatics Infrastructure Sweden (NBIS) - www.nbis.se | + ------------------------------------------------------------------------------ + + +Name: + agat_convert_sp_gxf2gxf.pl + +Description: + This script fixes and/or standardizes any GTF/GFF file into full sorted + GTF/GFF file. It AGAT parser removes duplicate features, fixes + duplicated IDs, adds missing ID and/or Parent attributes, deflates + factorized attributes (attributes with several parents are duplicated + with uniq ID), add missing features when possible (e.g. add exon if only + CDS described, add UTR if CDS and exon described), fix feature locations + (e.g. check exon is embedded in the parent features mRNA, gene), etc... + + All AGAT's scripts with the _sp_ prefix use the AGAT parser, before to + perform any supplementary task. So, it is not necessary to run this + script prior the use of any other _sp_ script. + +Usage: + agat_convert_sp_gxf2gxf.pl -g infile.gff [ -o outfile ] + agat_convert_sp_gxf2gxf.pl --help + +Options: + -g, --gtf, --gff or --gxf + String - Input GTF/GFF file. Compressed file with .gz extension + is accepted. + + -o or --output + String - Output GFF file. If no output file is specified, the + output will be written to STDOUT. + + -c or --config + String - Input agat config file. By default AGAT takes as input + agat_config.yaml file from the working directory if any, + otherwise it takes the orignal agat_config.yaml shipped with + AGAT. To get the agat_config.yaml locally type: "agat config + --expose". The --config option gives you the possibility to use + your own AGAT config file (located elsewhere or named + differently). + + -h or --help + Boolean - Display this helpful text. + +Feedback: + Did you find a bug?: + Do not hesitate to report bugs to help us keep track of the bugs and + their resolution. Please use the GitHub issue tracking system available + at this address: + + https://github.com/NBISweden/AGAT/issues + + Ensure that the bug was not already reported by searching under Issues. + If you're unable to find an (open) issue addressing the problem, open a new one. + Try as much as possible to include in the issue when relevant: + - a clear description, + - as much relevant information as possible, + - the command used, + - a data sample, + - an explanation of the expected behaviour that is not occurring. + + Do you want to contribute?: + You are very welcome, visit this address for the Contributing + guidelines: + https://github.com/NBISweden/AGAT/blob/master/CONTRIBUTING.md \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gxf2gxf/script.sh b/src/agat/agat_convert_sp_gxf2gxf/script.sh new file mode 100644 index 00000000..2d532a41 --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/script.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +agat_convert_sp_gxf2gxf.pl \ + -g "$par_gxf" \ + -o "$par_output" \ + ${par_config:+--config "${par_config}"} diff --git a/src/agat/agat_convert_sp_gxf2gxf/test.sh b/src/agat/agat_convert_sp_gxf2gxf/test.sh new file mode 100644 index 00000000..99574b5b --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/test.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +test_dir="${meta_resources_dir}/test_data" +out_dir="${meta_resources_dir}/out_data" + +echo "> Run $meta_name with test data" +"$meta_executable" \ + --gxf "$test_dir/0_test.gff" \ + --output "$out_dir/output.gff" + +echo ">> Checking output" +[ ! -f "$out_dir/output.gff" ] && echo "Output file output.gff does not exist" && exit 1 + +echo ">> Check if output is empty" +[ ! -s "$out_dir/output.gff" ] && echo "Output file output.gff is empty" && exit 1 + + +echo ">> Check if output matches expected output" +diff "$out_dir/output.gff" "$test_dir/0_correct_output.gff" +if [ $? -ne 0 ]; then + echo "Output file output.gff does not match expected output" + exit 1 +fi + +echo "> Test successful" \ No newline at end of file diff --git a/src/agat/agat_convert_sp_gxf2gxf/test_data/0_correct_output.gff b/src/agat/agat_convert_sp_gxf2gxf/test_data/0_correct_output.gff new file mode 100644 index 00000000..fafe86ed --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/test_data/0_correct_output.gff @@ -0,0 +1,36 @@ +##gff-version 3 +scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 +scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 +scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 +scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 +scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 +scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 +scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 +scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 +scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 +scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 +scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/src/agat/agat_convert_sp_gxf2gxf/test_data/0_test.gff b/src/agat/agat_convert_sp_gxf2gxf/test_data/0_test.gff new file mode 100644 index 00000000..fafe86ed --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/test_data/0_test.gff @@ -0,0 +1,36 @@ +##gff-version 3 +scaffold625 maker gene 337818 343277 . + . ID=CLUHARG00000005458;Name=TUBB3_2 +scaffold625 maker mRNA 337818 343277 . + . ID=CLUHART00000008717;Parent=CLUHARG00000005458 +scaffold625 maker exon 337818 337971 . + . ID=CLUHART00000008717:exon:1404;Parent=CLUHART00000008717 +scaffold625 maker exon 340733 340841 . + . ID=CLUHART00000008717:exon:1405;Parent=CLUHART00000008717 +scaffold625 maker exon 341518 341628 . + . ID=CLUHART00000008717:exon:1406;Parent=CLUHART00000008717 +scaffold625 maker exon 341964 343277 . + . ID=CLUHART00000008717:exon:1407;Parent=CLUHART00000008717 +scaffold625 maker CDS 337915 337971 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 340733 340841 . + 0 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341518 341628 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker CDS 341964 343033 . + 2 ID=CLUHART00000008717:cds;Parent=CLUHART00000008717 +scaffold625 maker five_prime_UTR 337818 337914 . + . ID=CLUHART00000008717:five_prime_utr;Parent=CLUHART00000008717 +scaffold625 maker three_prime_UTR 343034 343277 . + . ID=CLUHART00000008717:three_prime_utr;Parent=CLUHART00000008717 +scaffold789 maker gene 558184 564780 . + . ID=CLUHARG00000003852;Name=PF11_0240 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006146;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006146:exon:995;Parent=CLUHART00000006146 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006146:exon:996;Parent=CLUHART00000006146 +scaffold789 maker exon 564171 564235 . + . ID=CLUHART00000006146:exon:997;Parent=CLUHART00000006146 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006146:exon:998;Parent=CLUHART00000006146 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564171 564235 . + 0 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006146:cds;Parent=CLUHART00000006146 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006146:five_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006146:three_prime_utr;Parent=CLUHART00000006146 +scaffold789 maker mRNA 558184 564780 . + . ID=CLUHART00000006147;Parent=CLUHARG00000003852 +scaffold789 maker exon 558184 560123 . + . ID=CLUHART00000006147:exon:997;Parent=CLUHART00000006147 +scaffold789 maker exon 561401 561519 . + . ID=CLUHART00000006147:exon:998;Parent=CLUHART00000006147 +scaffold789 maker exon 562057 562121 . + . ID=CLUHART00000006147:exon:999;Parent=CLUHART00000006147 +scaffold789 maker exon 564372 564780 . + . ID=CLUHART00000006147:exon:1000;Parent=CLUHART00000006147 +scaffold789 maker CDS 558191 560123 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 561401 561519 . + 2 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 562057 562121 . + 0 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker CDS 564372 564588 . + 1 ID=CLUHART00000006147:cds;Parent=CLUHART00000006147 +scaffold789 maker five_prime_UTR 558184 558190 . + . ID=CLUHART00000006147:five_prime_utr;Parent=CLUHART00000006147 +scaffold789 maker three_prime_UTR 564589 564780 . + . ID=CLUHART00000006147:three_prime_utr;Parent=CLUHART00000006147 diff --git a/src/agat/agat_convert_sp_gxf2gxf/test_data/script.sh b/src/agat/agat_convert_sp_gxf2gxf/test_data/script.sh new file mode 100755 index 00000000..831dd963 --- /dev/null +++ b/src/agat/agat_convert_sp_gxf2gxf/test_data/script.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# clone repo +if [ ! -d /tmp/agat_source ]; then + git clone --depth 1 --single-branch --branch master https://github.com/NBISweden/AGAT /tmp/agat_source +fi + +# copy test data +cp -r /tmp/agat_source/t/gff_syntax/in/0_test.gff src/agat/agat_convert_sp_gxf2gxf/test_data +cp -r /tmp/agat_source/t/gff_syntax/out/0_correct_output.gff src/agat/agat_convert_sp_gxf2gxf/test_data From 68e2a57517595048d1fb89f2c083b7e906aca8f1 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Tue, 13 Aug 2024 21:16:59 +0200 Subject: [PATCH 22/25] Bedtools bedtobam (#111) * Initial Commit * update * Tests * Update test.sh * Update test.sh * update on test - ubam option does not work properly. older version 2.27.1 works somewhat better than the version I m working 2.30.0 of bedtools. - one solution would be to add samtools as a dependency and use it to uncompress the bam file to a sam file. However, it could be heavy. * adding identical check * adding gff file test * removing test_data * Working on suggested changes - still need to add content check using samtools view * making more changes trying to add up samtools view to test. * Update script.sh * Update config.vsh.yaml - min max for map_quality option * adding more links * updated on tests outputs --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 4 +- .../bedtools_bedtobam/config.vsh.yaml | 91 +++++++++ src/bedtools/bedtools_bedtobam/help.txt | 21 ++ src/bedtools/bedtools_bedtobam/script.sh | 19 ++ src/bedtools/bedtools_bedtobam/test.sh | 188 ++++++++++++++++++ 5 files changed, 321 insertions(+), 2 deletions(-) create mode 100644 src/bedtools/bedtools_bedtobam/config.vsh.yaml create mode 100644 src/bedtools/bedtools_bedtobam/help.txt create mode 100644 src/bedtools/bedtools_bedtobam/script.sh create mode 100644 src/bedtools/bedtools_bedtobam/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index a8819766..6bd21a1e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,13 +24,13 @@ - `agat/agat_convert_bed2gff`: convert bed file to gff format (PR #97). - `agat/agat_convert_embl2gff`: convert an EMBL file into GFF format (PR #99). - `agat/agat_convert_sp_gff2tsv`: convert gtf/gff file into tabulated file (PR #102). + - `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). * `bedtools`: - `bedtools/bedtools_intersect`: Allows one to screen for overlaps between two sets of genomic features (PR #94). - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). - -* `agat/agat_convert_sp_gxf2gxf`: fixes and/or standardizes any GTF/GFF file into full sorted GTF/GFF file (PR #103). + - `bedtools/bedtools_bedtobam`: Converts genomic feature records (bed/gff/vcf) to BAM format (PR #111). ## MINOR CHANGES diff --git a/src/bedtools/bedtools_bedtobam/config.vsh.yaml b/src/bedtools/bedtools_bedtobam/config.vsh.yaml new file mode 100644 index 00000000..bd3c48f5 --- /dev/null +++ b/src/bedtools/bedtools_bedtobam/config.vsh.yaml @@ -0,0 +1,91 @@ +name: bedtools_bedtobam +namespace: bedtools +description: Converts feature records (bed/gff/vcf) to BAM format. +keywords: [Converts, BED, GFF, VCF, BAM] +links: + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/bedtobam.html + repository: https://github.com/arq5x/bedtools2 + homepage: https://bedtools.readthedocs.io/en/latest/# + issue_tracker: https://github.com/arq5x/bedtools2/issues +references: + doi: 10.1093/bioinformatics/btq033 +license: MIT +requirements: + commands: [bedtools] +authors: + - __merge__: /src/_authors/theodoro_gasperin.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --input + alternatives: -i + type: file + description: Input file (bed/gff/vcf). + required: true + + - name: --genome + alternatives: -g + type: file + description: | + Input genome file. + NOTE: This is not a fasta file. This is a two-column tab-delimited file + where the first column is the chromosome name and the second their sizes. + required: true + + - name: Outputs + arguments: + - name: --output + alternatives: -o + type: file + direction: output + description: Output BAM file to be written. + + - name: Options + arguments: + - name: --map_quality + alternatives: -mapq + type: integer + description: | + Set the mappinq quality for the BAM records. + min: 0 + max: 255 + default: 255 + + - name: --bed12 + type: boolean_true + description: | + The BED file is in BED12 format. The BAM CIGAR + string will reflect BED "blocks". + + - name: --uncompress_bam + alternatives: -ubam + type: boolean_true + description: | + Write uncompressed BAM output. Default writes compressed BAM. + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: + - type: docker + image: debian:stable-slim + setup: + - type: apt + packages: [bedtools, procps] + - type: docker + run: | + echo "bedtools: \"$(bedtools --version | sed -n 's/^bedtools //p')\"" > /var/software_versions.txt + test_setup: + - type: apt + packages: [samtools] + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/bedtools/bedtools_bedtobam/help.txt b/src/bedtools/bedtools_bedtobam/help.txt new file mode 100644 index 00000000..c9fa1c6f --- /dev/null +++ b/src/bedtools/bedtools_bedtobam/help.txt @@ -0,0 +1,21 @@ +```bash +bedtools bedtobam +``` + +Tool: bedtools bedtobam (aka bedToBam) +Version: v2.30.0 +Summary: Converts feature records to BAM format. + +Usage: bedtools bedtobam [OPTIONS] -i -g + +Options: + -mapq Set the mappinq quality for the BAM records. + (INT) Default: 255 + + -bed12 The BED file is in BED12 format. The BAM CIGAR + string will reflect BED "blocks". + + -ubam Write uncompressed BAM output. Default writes compressed BAM. + +Notes: + (1) BED files must be at least BED4 to create BAM (needs name field). diff --git a/src/bedtools/bedtools_bedtobam/script.sh b/src/bedtools/bedtools_bedtobam/script.sh new file mode 100644 index 00000000..ac96ae20 --- /dev/null +++ b/src/bedtools/bedtools_bedtobam/script.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +## VIASH START +## VIASH END + +set -eo pipefail + +# Unset parameters +[[ "$par_bed12" == "false" ]] && unset par_bed12 +[[ "$par_uncompress_bam" == "false" ]] && unset par_uncompress_bam + +# Execute bedtools bed to bam +bedtools bedtobam \ + ${par_bed12:+-bed12} \ + ${par_uncompress_bam:+-ubam} \ + ${par_map_quality:+-mapq "$par_map_quality"} \ + -i "$par_input" \ + -g "$par_genome" \ + > "$par_output" diff --git a/src/bedtools/bedtools_bedtobam/test.sh b/src/bedtools/bedtools_bedtobam/test.sh new file mode 100644 index 00000000..14d04241 --- /dev/null +++ b/src/bedtools/bedtools_bedtobam/test.sh @@ -0,0 +1,188 @@ +#!/bin/bash + +# exit on error +set -eo pipefail + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +assert_identical_content() { + diff -a "$2" "$1" \ + || (echo "Files are not identical!" && exit 1) +} +############################################# + +# Create directories for tests +echo "Creating Test Data..." +TMPDIR=$(mktemp -d "$meta_temp_dir/XXXXXX") +function clean_up { + [[ -d "$TMPDIR" ]] && rm -r "$TMPDIR" +} +trap clean_up EXIT + +# Create and populate input files +printf "chr1\t248956422\nchr3\t242193529\nchr2\t198295559\n" > "$TMPDIR/genome.txt" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t37\t+\nchr2:172936693-172938111\t428\t528\tmy_read/2\t37\t-\n" > "$TMPDIR/example.bed" +printf "chr2:172936693-172938111\t128\t228\tmy_read/1\t60\t+\t128\t228\t255,0,0\t1\t100\t0\nchr2:172936693-172938111\t428\t528\tmy_read/2\t60\t-\t428\t528\t255,0,0\t1\t100\t0\n" > "$TMPDIR/example.bed12" +# Create and populate example.gff file +printf "##gff-version 3\n" > "$TMPDIR/example.gff" +printf "chr1\t.\tgene\t1000\t2000\t.\t+\t.\tID=gene1;Name=Gene1\n" >> "$TMPDIR/example.gff" +printf "chr3\t.\tmRNA\t1000\t2000\t.\t+\t.\tID=transcript1;Parent=gene1\n" >> "$TMPDIR/example.gff" +printf "chr1\t.\texon\t1000\t1200\t.\t+\t.\tID=exon1;Parent=transcript1\n" >> "$TMPDIR/example.gff" +printf "chr2\t.\texon\t1500\t1700\t.\t+\t.\tID=exon2;Parent=transcript1\n" >> "$TMPDIR/example.gff" +printf "chr1\t.\tCDS\t1000\t1200\t.\t+\t0\tID=cds1;Parent=transcript1\n" >> "$TMPDIR/example.gff" +printf "chr1\t.\tCDS\t1500\t1700\t.\t+\t2\tID=cds2;Parent=transcript1\n" >> "$TMPDIR/example.gff" + +# Expected output sam files for each test +cat < "$TMPDIR/expected.sam" +@HD VN:1.0 SO:unsorted +@PG ID:BEDTools_bedToBam VN:Vv2.30.0 +@PG ID:samtools PN:samtools PP:BEDTools_bedToBam VN:1.16.1 CL:samtools view -h output.bam +@SQ SN:chr1 AS:../genome.txt LN:248956422 +@SQ SN:chr3 AS:../genome.txt LN:242193529 +@SQ SN:chr2 AS:../genome.txt LN:198295559 +my_read/1 0 chr1 129 255 100M * 0 0 * * +my_read/2 16 chr1 429 255 100M * 0 0 * * +EOF +cat < "$TMPDIR/expected12.sam" +@HD VN:1.0 SO:unsorted +@PG ID:BEDTools_bedToBam VN:Vv2.30.0 +@PG ID:samtools PN:samtools PP:BEDTools_bedToBam VN:1.16.1 CL:samtools view -h output.bam +@SQ SN:chr1 AS:../genome.txt LN:248956422 +@SQ SN:chr3 AS:../genome.txt LN:242193529 +@SQ SN:chr2 AS:../genome.txt LN:198295559 +my_read/1 0 chr1 129 255 100M * 0 0 * * +my_read/2 16 chr1 429 255 100M * 0 0 * * +EOF +cat < "$TMPDIR/expected_mapquality.sam" +@HD VN:1.0 SO:unsorted +@PG ID:BEDTools_bedToBam VN:Vv2.30.0 +@PG ID:samtools PN:samtools PP:BEDTools_bedToBam VN:1.16.1 CL:samtools view -h output.bam +@SQ SN:chr1 AS:../genome.txt LN:248956422 +@SQ SN:chr3 AS:../genome.txt LN:242193529 +@SQ SN:chr2 AS:../genome.txt LN:198295559 +my_read/1 0 chr1 129 10 100M * 0 0 * * +my_read/2 16 chr1 429 10 100M * 0 0 * * +EOF +cat < "$TMPDIR/expected_gff.sam" +@HD VN:1.0 SO:unsorted +@PG ID:BEDTools_bedToBam VN:Vv2.30.0 +@PG ID:samtools PN:samtools PP:BEDTools_bedToBam VN:1.16.1 CL:samtools view -h output.bam +@SQ SN:chr1 AS:../genome.txt LN:248956422 +@SQ SN:chr3 AS:../genome.txt LN:242193529 +@SQ SN:chr2 AS:../genome.txt LN:198295559 +gene 0 chr1 1000 255 1001M * 0 0 * * +mRNA 0 chr3 1000 255 1001M * 0 0 * * +exon 0 chr1 1000 255 201M * 0 0 * * +exon 0 chr2 1500 255 201M * 0 0 * * +CDS 0 chr1 1000 255 201M * 0 0 * * +CDS 0 chr1 1500 255 201M * 0 0 * * +EOF + +# Test 1: Default conversion BED to BAM +mkdir "$TMPDIR/test1" && pushd "$TMPDIR/test1" > /dev/null + +echo "> Run bedtools_bedtobam on BED file" +"$meta_executable" \ + --input "../example.bed" \ + --genome "../genome.txt" \ + --output "output.bam" + +samtools view -h output.bam > output.sam + +# checks +assert_file_exists "output.bam" +assert_file_not_empty "output.bam" +assert_identical_content "output.sam" "../expected.sam" +echo "- test1 succeeded -" + +popd > /dev/null + +# Test 2: BED12 file +mkdir "$TMPDIR/test2" && pushd "$TMPDIR/test2" > /dev/null + +echo "> Run bedtools_bedtobam on BED12 file" +"$meta_executable" \ + --input "../example.bed12" \ + --genome "../genome.txt" \ + --output "output.bam" \ + --bed12 \ + +samtools view -h output.bam > output.sam + +# checks +assert_file_exists "output.bam" +assert_file_not_empty "output.bam" +assert_identical_content "output.sam" "../expected12.sam" +echo "- test2 succeeded -" + +popd > /dev/null + +# Test 3: Uncompressed BAM file +mkdir "$TMPDIR/test3" && pushd "$TMPDIR/test3" > /dev/null + +echo "> Run bedtools_bedtobam on BED file with uncompressed BAM output" +"$meta_executable" \ + --input "../example.bed" \ + --genome "../genome.txt" \ + --output "output.bam" \ + --uncompress_bam + +# checks +assert_file_exists "output.bam" +assert_file_not_empty "output.bam" +# Cannot assert_identical_content because umcompress option does not work on this version of bedtools. + +echo "- test3 succeeded -" + +popd > /dev/null + +# Test 4: Map quality +mkdir "$TMPDIR/test4" && pushd "$TMPDIR/test4" > /dev/null + +echo "> Run bedtools_bedtobam on BED file with map quality" +"$meta_executable" \ + --input "../example.bed" \ + --genome "../genome.txt" \ + --output "output.bam" \ + --map_quality 10 \ + +samtools view -h output.bam > output.sam + +# checks +assert_file_exists "output.bam" +assert_file_not_empty "output.bam" +assert_identical_content "output.sam" "../expected_mapquality.sam" +echo "- test4 succeeded -" + +popd > /dev/null + +# Test 5: gff to bam conversion +mkdir "$TMPDIR/test5" && pushd "$TMPDIR/test5" > /dev/null + +echo "> Run bedtools_bedtobam on GFF file" +"$meta_executable" \ + --input "../example.gff" \ + --genome "../genome.txt" \ + --output "output.bam" + +samtools view -h output.bam > output.sam + +# checks +assert_file_exists "output.bam" +assert_file_not_empty "output.bam" +assert_identical_content "output.sam" "../expected_gff.sam" +echo "- test5 succeeded -" + +popd > /dev/null + +echo "---- All tests succeeded! ----" +exit 0 From 923a6da3898a832df96a0e17c8a3b74c2806d939 Mon Sep 17 00:00:00 2001 From: Theodoro Gasperin Terra Camargo <98555209+tgaspe@users.noreply.github.com> Date: Wed, 14 Aug 2024 22:55:45 +0200 Subject: [PATCH 23/25] Bug Fixed (#136) --- src/bedtools/bedtools_intersect/script.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/src/bedtools/bedtools_intersect/script.sh b/src/bedtools/bedtools_intersect/script.sh index 04a8d854..3a28ba57 100644 --- a/src/bedtools/bedtools_intersect/script.sh +++ b/src/bedtools/bedtools_intersect/script.sh @@ -24,6 +24,7 @@ unset_if_false=( par_sortout par_bed par_no_buffer_output + par_header ) for par in ${unset_if_false[@]}; do From 766ab6c9c3059004c7c3f205621909b2d8b0b26d Mon Sep 17 00:00:00 2001 From: Toni Verbeiren Date: Wed, 21 Aug 2024 13:32:48 +0200 Subject: [PATCH 24/25] Qualimap rnaseq (#74) * first version * complete script for qualimap * add escaping character before leading hashtag (#50) * add escaping character before leading hashtag * update changelog * Update CHANGELOG.md Co-authored-by: Robrecht Cannoodt * replace escaping \ by \\ --------- Co-authored-by: Robrecht Cannoodt * Samtools collate (#49) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * Initial commit, whole component is functional * Update viash (#51) * update viash * update readme * update changelog * update changelog * fix incorrect heading detection * update again * clean up readme * Samtools view (#48) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * initial version with a few tests, script, and config file * update changelog, add one test * add a 4th test, fix option names in the script * Fix name of component in config * remove option named with a number * add must_exist to input file argument * removed "default: null" from one of the arguments in config * remove utf8 characters from config * Update CHANGELOG.md --------- Co-authored-by: Robrecht Cannoodt * Samtools fastq (#52) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * Initial commit, config, script, help and test_data * Update changelog, add tests, fix argument naming errors, add test data * update changelog, remove gffread namespace field --------- Co-authored-by: Robrecht Cannoodt * format URL in the description (#55) * format URL in the description * update changelog * Change name in _viash.yaml (#60) * Update operational code (#63) * update readme * switch ci to toolbox * update to viash 0.9.0-RC6 * edit keywords * fix version * update biobox * cutadapt (#7) * First commit, clone of cutadapt in htrnaseq + help.txt * Add config * Don't allow multiple: true when providing a FASTA file with adapters * First version of script * Updates and fixes - se/pe * Add tests and fix --json argument * Add software version * Better consistency in using snake_case * Update src/cutadapt/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/cutadapt/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/cutadapt/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Specify --input and --input_r2 as separate arguments * Avoid specifying default arg values * Add more information to `--minimum_length` and `maximum_length` * Add --cpus by means of $meta_cpus and set proper default * Allow multiple for adapters/fasta and add test * change multiple_sep to ';' * add example * simplify code with a helper function * create directories in test * use a different output extension if --fasta is provided * decrease code duplication by separating optional outputs from paired/unpaired output arguments * write custom tests for cutadapt * fix _r2 arguments * add debug flag as not to always print the cli command * remove comment * Update to Viash 0.9.0-RC4 * Ability to specify output globbing patterns * Avoid the need for both output_dir and output * Move fields from `info` to `links` Co-authored-by: Robrecht Cannoodt * Move references back to the info field * apologies, I proposed a wrong syntax --------- Co-authored-by: Robrecht Cannoodt * update changelog * update readme * Update salmon quant arguments (#57) * Make index an optional argument * FIx argument type and add optional argument * FEAT: add bedtools getfasta. (#59) * FEAT: add bedtools getfasta. * Add PR number to CHANGELOG * Add star genomegenerate component (#58) * Add star genomegenerate component * Update changelog * Rename component * Update test * Update CHANGELOG.md --------- Co-authored-by: Robrecht Cannoodt * fix package config (#65) * Delete src/bgzip directory (#64) It was moved to toolbox * Output alignments to the transcriptome (#56) * Output alignments to the transcriptome * Change argument name * BUG: pear component failure is ignored (#70) * FEAT + BUG: cutadapt; allowing disabling demultiplexing and fix par_quality_cutoff_r2 (#69) * FEAT: Disable cutadapt demultiplexing by default * Cutadapt: fix --par_quality_cutoff_r2 * FEAT: update busco to 5.7.1 (#72) * FEAT: update busco to 5.7.1 * Typo * Samtools fasta (#53) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * Fasta component * change script resource to samtools_fastq script, with dummy argument to specify the command * add dummy argument to samtools_fastq to share the script with samtools_fasta * fix path to script in config * Update src/samtools/samtools_fastq/script.sh Co-authored-by: Robrecht Cannoodt * Change default fields to examples * Two more default fields changed to examples * Minor formatting changes * Markdown formatting changes in configs --------- Co-authored-by: Robrecht Cannoodt * Umi tools dedup (#54) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * inital commit dedup * Working component with one test * Update test 1 and test data, fix some arg types in config and script * test data files and changes to script * Add third test and test data * Fix typo in script * remove utf8 characters in config * Add choices fields and change default fields to exampels * Minor formatting changes * md formatting changes in config * Fix typo (#79) * add vscode to gitignore * update multiple separator (#81) * update multiple separator * update changelog * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/multiqc/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * update ifs --------- Co-authored-by: Robrecht Cannoodt * add test data * add tests * update changelog * remove unrequired test data * update descriptions * update changelog * update help text * Update src/qualimap/qualimap_rnaseq/script.sh Co-authored-by: Robrecht Cannoodt * update unit tests * update unit tests * addres pr changes request * add version * remove whitespace multiqc * Apply suggestions from code review Co-authored-by: Robrecht Cannoodt * address pr comments * Update CHANGELOG.md * fix doi * Fix name * update version and container image * write software version to file --------- Co-authored-by: dorien-er Co-authored-by: Leila011 Co-authored-by: Robrecht Cannoodt Co-authored-by: emmarousseau Co-authored-by: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com> Co-authored-by: Dries Schaumont <5946712+DriesSchaumont@users.noreply.github.com> Co-authored-by: Dorien <41797896+dorien-er@users.noreply.github.com> --- CHANGELOG.md | 2 + src/qualimap/qualimap_rnaseq/config.vsh.yaml | 103 ++++++++++++++++ src/qualimap/qualimap_rnaseq/help.txt | 52 ++++++++ src/qualimap/qualimap_rnaseq/script.sh | 50 ++++++++ src/qualimap/qualimap_rnaseq/test.sh | 112 ++++++++++++++++++ src/qualimap/qualimap_rnaseq/test_data/a.bam | Bin 0 -> 2447 bytes .../qualimap_rnaseq/test_data/annotation.gtf | 10 ++ .../qualimap_rnaseq/test_data/script.sh | 10 ++ 8 files changed, 339 insertions(+) create mode 100644 src/qualimap/qualimap_rnaseq/config.vsh.yaml create mode 100644 src/qualimap/qualimap_rnaseq/help.txt create mode 100644 src/qualimap/qualimap_rnaseq/script.sh create mode 100755 src/qualimap/qualimap_rnaseq/test.sh create mode 100644 src/qualimap/qualimap_rnaseq/test_data/a.bam create mode 100644 src/qualimap/qualimap_rnaseq/test_data/annotation.gtf create mode 100755 src/qualimap/qualimap_rnaseq/test_data/script.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 6bd21a1e..2f4c0c71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,8 @@ - `bedtools/bedtools_sort`: Sorts a feature file (bed/gff/vcf) by chromosome and other criteria (PR #98). - `bedtools/bedtools_bamtofastq`: Convert BAM alignments to FASTQ files (PR #101). - `bedtools/bedtools_bedtobam`: Converts genomic feature records (bed/gff/vcf) to BAM format (PR #111). + +* `qualimap/qualimap_rnaseq`: RNA-seq QC analysis using qualimap (PR #74). ## MINOR CHANGES diff --git a/src/qualimap/qualimap_rnaseq/config.vsh.yaml b/src/qualimap/qualimap_rnaseq/config.vsh.yaml new file mode 100644 index 00000000..ffc807ab --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/config.vsh.yaml @@ -0,0 +1,103 @@ +name: qualimap_rnaseq +namespace: qualimap +keywords: [RNA-seq, quality control, QC Report] +description: | + Qualimap RNA-seq QC reports quality control metrics and bias estimations + which are specific for whole transcriptome sequencing, including reads genomic + origin, junction analysis, transcript coverage and 5’-3’ bias computation. +links: + homepage: http://qualimap.conesalab.org/ + documentation: http://qualimap.conesalab.org/doc_html/analysis.html#rna-seq-qc + issue_tracker: https://bitbucket.org/kokonech/qualimap/issues?status=new&status=open + repository: https://bitbucket.org/kokonech/qualimap/commits/branch/master +references: + doi: 10.1093/bioinformatics/btv566 +license: GPL-2.0 +authors: + - __merge__: /src/_authors/dorien_roosen.yaml + roles: [ author, maintainer ] +argument_groups: + - name: "Input" + arguments: + - name: "--bam" + type: file + required: true + example: alignment.bam + description: Path to the sequence alignment file in BAM format, produced by a splicing-aware aligner. + - name: "--gtf" + type: file + required: true + example: annotations.gtf + description: Path to genomic annotations in Ensembl GTF format. + + - name: "Output" + arguments: + - name: "--qc_results" + direction: output + type: file + required: true + example: rnaseq_qc_results.txt + description: Text file containing the RNAseq QC results. + - name: "--counts" + type: file + required: false + direction: output + description: Output file for computed counts. + - name: "--report" + type: file + direction: output + required: false + example: report.html + description: Report output file. Supported formats are PDF or HTML. + + - name: "Optional" + arguments: + - name: "--num_pr_bases" + type: integer + required: false + min: 1 + description: Number of upstream/downstream nucleotide bases to compute 5'-3' bias (default = 100). + - name: "--num_tr_bias" + type: integer + required: false + min: 1 + description: Number of top highly expressed transcripts to compute 5'-3' bias (default = 1000). + - name: "--algorithm" + type: string + required: false + choices: ["uniquely-mapped-reads", "proportional"] + description: Counting algorithm (uniquely-mapped-reads (default) or proportional). + - name: "--sequencing_protocol" + type: string + required: false + choices: ["non-strand-specific", "strand-specific-reverse", "strand-specific-forward"] + description: Sequencing library protocol (strand-specific-forward, strand-specific-reverse or non-strand-specific (default)). + - name: "--paired" + type: boolean_true + description: Setting this flag for paired-end experiments will result in counting fragments instead of reads. + - name: "--sorted" + type: boolean_true + description: Setting this flag indicates that the input file is already sorted by name. If flag is not set, additional sorting by name will be performed. Only requiredfor paired-end analysis. + - name: "--java_memory_size" + type: string + required: false + description: maximum Java heap memory size, default = 4G. + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: test_data/ + +engines: + - type: docker + image: quay.io/biocontainers/qualimap:2.3--hdfd78af_0 + setup: + - type: docker + run: | + echo QualiMap: $(qualimap 2>&1 | grep QualiMap | sed 's/^.*QualiMap//') > /var/software_versions.txt +runners: + - type: executable + - type: nextflow diff --git a/src/qualimap/qualimap_rnaseq/help.txt b/src/qualimap/qualimap_rnaseq/help.txt new file mode 100644 index 00000000..c6493ed9 --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/help.txt @@ -0,0 +1,52 @@ +QualiMap v.2.3 +Built on 2023-05-19 16:57 + +usage: qualimap [options] + +To launch GUI leave empty. + +Available tools: + + bamqc Evaluate NGS mapping to a reference genome + rnaseq Evaluate RNA-seq alignment data + counts Counts data analysis (further RNA-seq data evaluation) + multi-bamqc Compare QC reports from multiple NGS mappings + clustering Cluster epigenomic signals + comp-counts Compute feature counts + +Special arguments: + + --java-mem-size Use this argument to set Java memory heap size. Example: + qualimap bamqc -bam very_large_alignment.bam --java-mem-size=4G + +usage: qualimap rnaseq [-a ] -bam -gtf [-npb ] [-ntb + ] [-oc ] [-outdir ] [-outfile ] [-outformat ] + [-p ] [-pe] [-s] + -a,--algorithm Counting algorithm: + uniquely-mapped-reads(default) or + proportional. + -bam Input mapping file in BAM format. + -gtf Annotations file in Ensembl GTF format. + -npb,--num-pr-bases Number of upstream/downstream nucleotide bases + to compute 5'-3' bias (default is 100). + -ntb,--num-tr-bias Number of top highly expressed transcripts to + compute 5'-3' bias (default is 1000). + -oc Output file for computed counts. If only name + of the file is provided, then the file will be + saved in the output folder. + -outdir Output folder for HTML report and raw data. + -outfile Output file for PDF report (default value is + report.pdf). + -outformat Format of the output report (PDF, HTML or both + PDF:HTML, default is HTML). + -p,--sequencing-protocol Sequencing library protocol: + strand-specific-forward, + strand-specific-reverse or non-strand-specific + (default) + -pe,--paired Setting this flag for paired-end experiments + will result in counting fragments instead of + reads + -s,--sorted This flag indicates that the input file is + already sorted by name. If not set, additional + sorting by name will be performed. Only + required for paired-end analysis. \ No newline at end of file diff --git a/src/qualimap/qualimap_rnaseq/script.sh b/src/qualimap/qualimap_rnaseq/script.sh new file mode 100644 index 00000000..351e5159 --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/script.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -eo pipefail + +tmp_dir=$(mktemp -d -p "$meta_temp_dir" qualimap_XXXXXXXXX) + +# Handle output parameters +if [ -n "$par_report" ]; then + outfile=$(basename "$par_report") + report_extension="${outfile##*.}" +fi + +if [ -n "$par_counts" ]; then + counts=$(basename "$par_counts") +fi + +# disable flags +[[ "$par_paired" == "false" ]] && unset par_paired +[[ "$par_sorted" == "false" ]] && unset par_sorted + +# Run qualimap +qualimap rnaseq \ + ${meta_memory_mb:+--java-mem-size=${meta_memory_mb}M} \ + ${par_algorithm:+--algorithm $par_algorithm} \ + ${par_sequencing_protocol:+--sequencing-protocol $par_sequencing_protocol} \ + -bam $par_bam \ + -gtf $par_gtf \ + -outdir "$tmp_dir" \ + ${par_num_pr_bases:+--num-pr-bases $par_num_pr_bases} \ + ${par_num_tr_bias:+--num-tr-bias $par_num_tr_bias} \ + ${par_report:+-outformat $report_extension} \ + ${par_paired:+--paired} \ + ${par_sorted:+--sorted} \ + ${par_report:+-outfile "$outfile"} \ + ${par_counts:+-oc "$counts"} + +# Move output files +mv "$tmp_dir/rnaseq_qc_results.txt" "$par_qc_results" + +if [ -n "$par_report" ] && [ $report_extension = "html" ]; then + mv "$tmp_dir/qualimapReport.html" "$par_report" +fi + +if [ -n "$par_report" ] && [ $report_extension = "pdf" ]; then + mv "$tmp_dir/$outfile" "$par_report" +fi + +if [ -n "$par_counts" ]; then + mv "$tmp_dir/$counts" "$par_counts" +fi diff --git a/src/qualimap/qualimap_rnaseq/test.sh b/src/qualimap/qualimap_rnaseq/test.sh new file mode 100755 index 00000000..2e1b647b --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/test.sh @@ -0,0 +1,112 @@ +set -e + +############################################# +# helper functions +assert_file_exists() { + [ -f "$1" ] || { echo "File '$1' does not exist" && exit 1; } +} +assert_file_doesnt_exist() { + [ ! -f "$1" ] || { echo "File '$1' exists but shouldn't" && exit 1; } +} +assert_file_not_empty() { + [ -s "$1" ] || { echo "File '$1' is empty but shouldn't be" && exit 1; } +} +assert_file_contains() { + grep -q "$2" "$1" || { echo "File '$1' does not contain '$2'" && exit 1; } +} +############################################# + + +test_dir="$meta_resources_dir/test_data" + +mkdir "run_qualimap_rnaseq_html" +cd "run_qualimap_rnaseq_html" + +echo "> Running qualimap with html output report" + +"$meta_executable" \ + --bam $test_dir/a.bam \ + --gtf $test_dir/annotation.gtf \ + --report report.html \ + --counts counts.txt \ + --qc_results output.txt + +echo ">> Checking output" +assert_file_exists "report.html" +assert_file_exists "counts.txt" +assert_file_exists "output.txt" +assert_file_doesnt_exist "report.pdf" + +echo ">> Checking if output is empty" +assert_file_not_empty "report.html" +assert_file_not_empty "counts.txt" +assert_file_not_empty "output.txt" + +echo ">> Checking output contents" +assert_file_contains "output.txt" ">>>>>>> Input" +assert_file_contains "output.txt" ">>>>>>> Reads alignment" +assert_file_contains "output.txt" ">>>>>>> Reads genomic origin" +assert_file_contains "output.txt" ">>>>>>> Transcript coverage profile" +assert_file_contains "output.txt" ">>>>>>> Junction analysis" +assert_file_contains "output.txt" ">>>>>>> Transcript coverage profile" + +assert_file_contains "counts.txt" "ENSG00000125841.12" + +assert_file_contains "report.html" "Qualimap report: RNA Seq QC" +assert_file_contains "report.html" "

Input

" +assert_file_contains "report.html" "

Reads alignment

" +assert_file_contains "report.html" "

Reads genomic origin

" +assert_file_contains "report.html" "

Transcript coverage profile

" +assert_file_contains "report.html" "

Junction analysis

" + + +cd .. +rm -r run_qualimap_rnaseq_html + +mkdir "run_qualimap_rnaseq_pdf" +cd "run_qualimap_rnaseq_pdf" + +echo "> Running qualimap with pdf output report" + +"$meta_executable" \ + --bam $test_dir/a.bam \ + --gtf $test_dir/annotation.gtf \ + --report report.pdf \ + --counts counts.txt \ + --qc_results output.txt + +echo ">> Checking output" +assert_file_exists "report.pdf" +assert_file_exists "counts.txt" +assert_file_exists "output.txt" +assert_file_doesnt_exist "report.html" + +echo ">> Checking if output is empty" +assert_file_not_empty "report.pdf" +assert_file_not_empty "counts.txt" +assert_file_not_empty "output.txt" + +cd .. +rm -r run_qualimap_rnaseq_pdf + +mkdir "run_qualimap_rnaseq" +cd "run_qualimap_rnaseq" + +echo "> Running qualimap without report and counts output" + +"$meta_executable" \ + --bam $test_dir/a.bam \ + --gtf $test_dir/annotation.gtf \ + --qc_results output.txt + +echo ">> Checking output" +assert_file_doesnt_exist "report.pdf" +assert_file_doesnt_exist "report.html" +assert_file_doesnt_exist "counts.txt" +assert_file_exists "output.txt" + +echo ">> Checking if output is empty" +assert_file_not_empty "output.txt" + +cd .. +rm -r run_qualimap_rnaseq \ No newline at end of file diff --git a/src/qualimap/qualimap_rnaseq/test_data/a.bam b/src/qualimap/qualimap_rnaseq/test_data/a.bam new file mode 100644 index 0000000000000000000000000000000000000000..c8ea1065e89ca06cf12711850c36f85fba0d31b3 GIT binary patch literal 2447 zcmV;A32^owiwFb&00000{{{d;LjnL`0CRHmWi(=7U~uqo;SBS$GSoBU4EDE5&d)DO z$;?YEN#$|~4&)5>vr5h=GBV)w@v|~B0Rlrab1p`pE;b+r%P_EqUuOmY%wr|6OaK4? zABzYC000000RIL6LPG)o5edy%O^9Si9e+KOiG~T1sx7fvv$$2fVM}u9e*H1sjo?;I z5lhI2uzOI^AnR^U8Z~?JpzbRQErbQl3SK-2f`LWULr$WbKo(sQKk$-+AUU{t5Q(5D zM$GuDneLvqJ^fzq?2aM-nVz0kJw5dA_y79+|8Jv}?b(+vZe*u-pQ7v8ce9K8N7*A! zZ)60o>rQ7p=uNssf8ri39&`5WM?N|!Cf(k!nDjfN-lQ1!yQAzocnihjM;lqDESFJA zg9vtGth8%dO8P7oi?Y}Hu%w&%^tE1R2pm+h@(w*2%7x6^VT z+G**tJ2#$qe5due{aCm2)Xun@y_1ba&wcj( zOzobhtx6cYh_mOi`Y?b^y2D~Po)o=e)ZK$0ERW6yTUu_l6Utgfxm6}arFE&5V%7*9 zbPyInz;dQ*{NHZ4T59c@-}kKo|K<|-FV5i~I6qPH%x=B&m%>*7&p4yLv{o{1IU=4( zVYFq&E6Y4Z*juYZutr%bbZz(_tF3_lmorZS{#Q;^z5jOeWajIOK+zox@ngF`niPor z@gDr>Y1EXt2pZsU>zNH!TkZf_BmILGA{wHFBPP!g2I|z-QEb7lE7GDpe_8=QLEVRc z;Qd6&m+Di0DSRc6)mjPQS0P&X*{BdCB{GO$ohhx0gbEg%$cg1X@>7RD?lK7XXf^O> zmG9{vTxgx%{r$UI*{8A#)3a~=@IvcKLkk#o`;+dVKO9U(-F_!K1N1^ljsPIU86K^& z(J`caz)>7-A=Wu^( zM!P7rpLa(kpt~-AQk-U5uv5Z+tYuiyU<>7tZE6SGn{w2&z0)GT10s&OP|y z!YFAU+B$42BK{5&qWWh*zXtScHOYT|4*%*LexhVtU;3BAx2!biexAyaQe^B}pB}etX&$U%a z)iB}>dZ1+524bUOff=p6F+v&R1z6x#FqV^t4t>;>b{Dx(1Al$0{@?lX$u+h6(z~nb z{}aDHxpv{f2K8T{=Z%YgZ_?`zI@{T0RKJ(6W!Z1>UcnFtP8jAB6^=>nyrRS?dYn2N4%Ox}<$on)LL_wnF+kbD+{tGm`x|U^^HnZ$=>siKt z64hNY29*m96a-2GV@`pTkOIXh2Bi=*N4$V6zO)n`ztzw%*Fhf`frj_Jr&-GozeeY97$|Ugt-vz>mg7S}wsu&HW z0xJQo3Aa(93kEC;+>wTW<{H@a>I(P?>OTAf9hfNjX~Ph(41P(dvWiQVdY?|SZxVlm zDHk*&%*sfJ$HRdgC<7|U<*hrmdzc4S3<0OETwgoAyK}Od`>){a;fCBVa8PUyN23WS z*dKwk4$el*eoo^X1x^Paf+dj#fxK6O3X5d`bfQFrPz(x7o!@}7y<9Ua6Uh}@f&P^_ z`h(O@jQp`7^_QUMvJ~8urA5h8GBrBQOCL;*vhQi>koXRLfU3R9&xo~edi$%QpN&BF zneMvUeaCyNng1-#u4YXG;CRyQY(oN)l8m4_>&H@Y}0aUw2{^_&=M@fW6+i;r2ZDpPdtMko$?0&+InMfQJcaKq?g1 z%3&NpKt!CC9>L+&vc`8jLx0s42$=bwOy$p?yDzMt-ffHfD*nGUd13w02kQMl7;I0v zqak)+gYI~k{R40LdQw1FAO!Jja-}i!M2U)zmWN7@K}dRyOPc3M0&Ih$mY4!Vo|nj# zcy|)us9aT;xB@0Nz(mnI*V!qGozc$VgGDcEwGIeKq@=yTY%7w8`td*OV`71n8~9h3 z@5?garz`PWqA?)l-eO!}`R7s3skIwiqRek{n0!z{&4Fi664dN zS{WYs)6MlSY}9Ll{&rf9bbDhoMXX2nhhuaF8Zjg}NwEOI04#Fu0(aKoR)Go<(lANn z5U?K(m=k1eWw`t7*gSk$;ouib=7Iz@!C$Y`g+$3$U$55$i}3%3dEnAxS@ua}g?3I` ztj2OnrL{ppD4-qUD9uBGgCCQpDFG{Tj7l~)<~aL9yY{$-hFKK6|6jiW4e_=pxPi03 zHtYzBAtdp5G#XBNy}_W^-w~w!uTh?9$qX?RpmAqpc4`k!6GxOX8IKEoa~D~m3~$Wg zA4EZ-SVif#U^c0D*-tP@+*Lq=Xrul{vE_Z3xoRTri0x$+5w2PMaF|uTG&p zaN>7>{|%9p;knA$xdwI6>!oT~bnsGl8_n^|MjDDupcsTu39Q8J0Mx)d;0__a@_??1 zBNebIP-g4@U=@(xn}Qtmvo{tnu2uA3I{SWu09eGxP&5)w=dcHr+T&)Y(jq7ubZ+uM zvHy_HVC+>A&rx6=^wDA*3diW?23yX+{{U)jjNb?z001A02m}BC000301^_}s0stET N0{{R300000002N$thE3D literal 0 HcmV?d00001 diff --git a/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf b/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf new file mode 100644 index 00000000..976de753 --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/test_data/annotation.gtf @@ -0,0 +1,10 @@ +chr20 HAVANA transcript 347024 354868 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA exon 347024 347142 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 1; exon_id "ENSE00001831391.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA exon 349249 349363 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 2; exon_id "ENSE00001491647.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA exon 349638 349832 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA CDS 349644 349832 . + 0 gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA start_codon 349644 349646 . + 0 gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 3; exon_id "ENSE00003710328.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA exon 353210 354868 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA CDS 353210 353632 . + 0 gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA stop_codon 353633 353635 . + 0 gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 4; exon_id "ENSE00001822456.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; +chr20 HAVANA UTR 347024 347142 . + . gene_id "ENSG00000125841.12"; transcript_id "ENST00000382291.7"; gene_type "protein_coding"; gene_name "NRSN2"; transcript_type "protein_coding"; transcript_name "NRSN2-202"; exon_number 1; exon_id "ENSE00001831391.1"; level 2; protein_id "ENSP00000371728.3"; transcript_support_level "2"; tag "basic"; tag "appris_principal_1"; tag "CCDS"; ccdsid "CCDS12996.1"; havana_gene "OTTHUMG00000031628.5"; havana_transcript "OTTHUMT00000077446.1"; diff --git a/src/qualimap/qualimap_rnaseq/test_data/script.sh b/src/qualimap/qualimap_rnaseq/test_data/script.sh new file mode 100755 index 00000000..801fe405 --- /dev/null +++ b/src/qualimap/qualimap_rnaseq/test_data/script.sh @@ -0,0 +1,10 @@ +# qualimap test data + +# Test data was obtained from https://github.com/snakemake/snakemake-wrappers/raw/master/bio/qualimap/rnaseq/test + +if [ ! -d /tmp/snakemake-wrappers ]; then + git clone --depth 1 --single-branch --branch master https://github.com/snakemake/snakemake-wrappers /tmp/snakemake-wrappers +fi + +cp -r /tmp/snakemake-wrappers/bio/qualimap/rnaseq/test/mapped/a.bam src/qualimap/qualimap_rnaseq/test_data +cp -r /tmp/snakemake-wrappers/bio/qualimap/rnaseq/test/annotation.gtf src/qualimap/qualimap_rnaseq/test_data From c4ea23a0f508b93b31bb1a36418ad4868fdb5bc3 Mon Sep 17 00:00:00 2001 From: Sai Nirmayi Yasa <92786623+sainirmayi@users.noreply.github.com> Date: Wed, 21 Aug 2024 17:31:32 +0200 Subject: [PATCH 25/25] Add RSEM prepare reference component (#89) * initial commit * incorporaate some requested changes * update test * change argument reference_fasta_files to multiple true and update docker setup * Update src/rsem/rsem_prepare_reference/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/rsem/rsem_prepare_reference/config.vsh.yaml Co-authored-by: Robrecht Cannoodt * Update src/rsem/rsem_prepare_reference/script.sh Co-authored-by: Robrecht Cannoodt * set multiple true * update changelog * Apply suggestions from code review * fix script --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 2 + .../rsem_prepare_reference/config.vsh.yaml | 196 +++++++++++++++++ src/rsem/rsem_prepare_reference/help.txt | 207 ++++++++++++++++++ src/rsem/rsem_prepare_reference/script.sh | 42 ++++ src/rsem/rsem_prepare_reference/test.sh | 37 ++++ 5 files changed, 484 insertions(+) create mode 100644 src/rsem/rsem_prepare_reference/config.vsh.yaml create mode 100644 src/rsem/rsem_prepare_reference/help.txt create mode 100644 src/rsem/rsem_prepare_reference/script.sh create mode 100644 src/rsem/rsem_prepare_reference/test.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f4c0c71..3e9f40fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ * `qualimap/qualimap_rnaseq`: RNA-seq QC analysis using qualimap (PR #74). +* `rsem/rsem_prepare_reference`: Prepare transcript references for RSEM (PR #89). + ## MINOR CHANGES * `busco` components: update BUSCO to `5.7.1` (PR #72). diff --git a/src/rsem/rsem_prepare_reference/config.vsh.yaml b/src/rsem/rsem_prepare_reference/config.vsh.yaml new file mode 100644 index 00000000..44915a2f --- /dev/null +++ b/src/rsem/rsem_prepare_reference/config.vsh.yaml @@ -0,0 +1,196 @@ +name: rsem_prepare_reference +namespace: rsem +description: | + RSEM is a software package for estimating gene and isoform expression levels from RNA-Seq data. This component prepares transcript references for RSEM. +keywords: ["Transcriptome", "Index"] +links: + homepage: http://deweylab.github.io/RSEM + documentation: https://deweylab.github.io/RSEM/rsem-prepare-reference.html + repository: https://github.com/deweylab/RSEM +references: + doi: 10.1186/1471-2105-12-323 +license: GPL-3.0 +requirements: + commands: [ rsem-prepare-reference ] +authors: + - __merge__: /src/_authors/sai_nirmayi_yasa.yaml + roles: [ author, maintainer ] + +argument_groups: + - name: Inputs + arguments: + - name: --reference_fasta_files + type: file + description: | + Semi-colon separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix ".fa" or ".fasta" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the '--gtf' option is used. + required: true + multiple: true + example: read1.fasta + - name: --reference_name + type: string + description: | + The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. '/ref/mm9'). + required: true + example: /ref/mm9 + + - name: Outputs + arguments: + - name: --output + type: file + description: Directory containing reference files generated by RSEM. + required: true + direction: output + + - name: Other options + arguments: + - name: --gtf + type: file + description: Assume that 'reference_fasta_files' contains the sequence of a genome, and extract transcript reference sequences using the gene annotations specified in the GTF file. If this and '--gff3' options are not provided, RSEM will assume 'reference_fasta_files' contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id. + example: annotations.gtf + - name: --gff3 + type: file + description: GFF3 annotation file. Converted to GTF format with the file name 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not exist. + example: annotations.gff + - name: --gff3_rna_patterns + type: string + description: List of transcript categories (separated by semi-colon). Only transcripts that match the string will be extracted. + multiple: true + example: mRNA;rRNA + - name: --gff3_genes_as_transcripts + type: boolean_true + description: This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format. + - name: --trusted_sources + type: string + description: List of trusted sources (separated by semi-colon). Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted. + multiple: true + example: ENSEMBL;HAVANA + - name: --transcript_to_gene_map + type: file + description: | + Use information from this file to map from transcript (isoform) ids to gene ids. Each line of this file should be of the form: + gene_id transcript_id + with the two fields separated by a tab character. + If you are using a GTF file for the "UCSC Genes" gene set from the UCSC Genome Browser, then the "knownIsoforms.txt" file (obtained from the "Downloads" section of the UCSC Genome Browser site) is of this format. + If this option is off, then the mapping of isoforms to genes depends on whether the '--gtf' option is specified. If '--gtf' is specified, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene. + example: isoforms.txt + - name: --allele_to_gene_map + type: file + description: | + Use information from to provide gene_id and transcript_id information for each allele-specific transcript. Each line of should be of the form: + gene_id transcript_id allele_id + with the fields separated by a tab character. + This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files. + - name: --polyA + type: boolean_true + description: Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by '--polyA-length' option. STAR aligner users may not want to use this option. + - name: --polyA_length + type: integer + description: The length of the poly(A) tails to be added. + example: 125 + - name: --no_polyA_subset + type: file + description: Only meaningful if '--polyA' is specified. Do not add poly(A) tails to those transcripts listed in this file containing a list of transcript_ids. + example: transcript_ids.txt + - name: --bowtie + type: boolean_true + description: Build Bowtie indices. + - name: --bowtie2 + type: boolean_true + description: Build Bowtie 2 indices. + - name: --star + type: boolean_true + description: Build STAR indices. + - name: --star_sjdboverhang + type: integer + description: Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default is 100) + example: 100 + - name: --hisat2_hca + type: boolean_true + description: Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline. + - name: --quiet + alternatives: -q + type: boolean_true + description: Suppress the output of logging information. + + - name: Prior-enhanced RSEM options + arguments: + - name: --prep_pRSEM + type: boolean_true + description: A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM. + - name: --mappability_bigwig_file + type: file + description: Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One). + +resources: + - type: bash_script + path: script.sh + +test_resources: + - type: bash_script + path: test.sh + +engines: +- type: docker + image: ubuntu:22.04 + setup: + - type: apt + packages: + - build-essential + - gcc + - g++ + - make + - wget + - zlib1g-dev + - unzip xxd + - perl + - r-base + - bowtie2 + - pip + - git + - type: python + packages: bowtie + - type: docker + env: + - STAR_VERSION=2.7.11b + - RSEM_VERSION=1.3.3 + - BOWTIE_VERSION=1.3.1 + - TZ=Europe/Brussels + run: | + ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/alexdobin/STAR/archive/refs/tags/${STAR_VERSION}.zip && \ + unzip ${STAR_VERSION}.zip && \ + cd STAR-${STAR_VERSION}/source && \ + make STARstatic CXXFLAGS_SIMD=-std=c++11 && \ + cp STAR /usr/local/bin && \ + cd /tmp && \ + wget --no-check-certificate https://github.com/deweylab/RSEM/archive/refs/tags/v${RSEM_VERSION}.zip && \ + unzip v${RSEM_VERSION}.zip && \ + cd RSEM-${RSEM_VERSION} && \ + make && \ + make install && \ + cd /tmp && \ + wget --no-check-certificate -O bowtie-${BOWTIE_VERSION}-linux-x86_64.zip https://sourceforge.net/projects/bowtie-bio/files/bowtie/${BOWTIE_VERSION}/bowtie-${BOWTIE_VERSION}-linux-x86_64.zip/download && \ + unzip bowtie-${BOWTIE_VERSION}-linux-x86_64.zip && \ + cp bowtie-${BOWTIE_VERSION}-linux-x86_64/bowtie* /usr/local/bin && \ + cd /tmp && \ + git clone https://github.com/DaehwanKimLab/hisat2.git /tmp/hisat2 && \ + cd /tmp/hisat2 && \ + make && \ + cp -r hisat2* /usr/local/bin && \ + cd && \ + rm -rf /tmp/STAR-${STAR_VERSION} /tmp/${STAR_VERSION}.zip /tmp/bowtie-${BOWTIE_VERSION}-linux-x86_64 /tmp/hisat2 && \ + apt-get --purge autoremove -y ${PACKAGES} && \ + apt-get clean + + - type: docker + run: | + echo "RSEM: `rsem-calculate-expression --version | sed -e 's/Current version: RSEM v//g'`" > /var/software_versions.txt && \ + echo "STAR: `STAR --version`" >> /var/software_versions.txt && \ + echo "bowtie2: `bowtie2 --version | grep -oP '\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ + echo "bowtie: `bowtie --version | grep -oP 'bowtie-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt && \ + echo "HISAT2: `hisat2 --version | grep -oP 'hisat2-align-s version \K\d+\.\d+\.\d+'`" >> /var/software_versions.txt + +runners: + - type: executable + - type: nextflow \ No newline at end of file diff --git a/src/rsem/rsem_prepare_reference/help.txt b/src/rsem/rsem_prepare_reference/help.txt new file mode 100644 index 00000000..c69899ec --- /dev/null +++ b/src/rsem/rsem_prepare_reference/help.txt @@ -0,0 +1,207 @@ +```bash +rsem-prepare-reference --help +``` + +NAME +rsem-prepare-reference - Prepare transcript references for RSEM and optionally build BOWTIE/BOWTIE2/STAR/HISAT2(transcriptome) indices. + +SYNOPSIS + rsem-prepare-reference [options] reference_fasta_file(s) reference_name +ARGUMENTS +reference_fasta_file(s) +Either a comma-separated list of Multi-FASTA formatted files OR a directory name. If a directory name is specified, RSEM will read all files with suffix ".fa" or ".fasta" in this directory. The files should contain either the sequences of transcripts or an entire genome, depending on whether the '--gtf' option is used. + +reference name +The name of the reference used. RSEM will generate several reference-related files that are prefixed by this name. This name can contain path information (e.g. '/ref/mm9'). + +OPTIONS +--gtf +If this option is on, RSEM assumes that 'reference_fasta_file(s)' contains the sequence of a genome, and will extract transcript reference sequences using the gene annotations specified in , which should be in GTF format. + +If this and '--gff3' options are off, RSEM will assume 'reference_fasta_file(s)' contains the reference transcripts. In this case, RSEM assumes that name of each sequence in the Multi-FASTA files is its transcript_id. + +(Default: off) + +--gff3 +The annotation file is in GFF3 format instead of GTF format. RSEM will first convert it to GTF format with the file name 'reference_name.gtf'. Please make sure that 'reference_name.gtf' does not exist. (Default: off) + +--gff3-RNA-patterns + is a comma-separated list of transcript categories, e.g. "mRNA,rRNA". Only transcripts that match the will be extracted. (Default: "mRNA") + +--gff3-genes-as-transcripts +This option is designed for untypical organisms, such as viruses, whose GFF3 files only contain genes. RSEM will assume each gene as a unique transcript when it converts the GFF3 file into GTF format. + +--trusted-sources + is a comma-separated list of trusted sources, e.g. "ENSEMBL,HAVANA". Only transcripts coming from these sources will be extracted. If this option is off, all sources are accepted. (Default: off) + +--transcript-to-gene-map +Use information from to map from transcript (isoform) ids to gene ids. Each line of should be of the form: + +gene_id transcript_id + +with the two fields separated by a tab character. + +If you are using a GTF file for the "UCSC Genes" gene set from the UCSC Genome Browser, then the "knownIsoforms.txt" file (obtained from the "Downloads" section of the UCSC Genome Browser site) is of this format. + +If this option is off, then the mapping of isoforms to genes depends on whether the '--gtf' option is specified. If '--gtf' is specified, then RSEM uses the "gene_id" and "transcript_id" attributes in the GTF file. Otherwise, RSEM assumes that each sequence in the reference sequence files is a separate gene. + +(Default: off) + +--allele-to-gene-map +Use information from to provide gene_id and transcript_id information for each allele-specific transcript. Each line of should be of the form: + +gene_id transcript_id allele_id + +with the fields separated by a tab character. + +This option is designed for quantifying allele-specific expression. It is only valid if '--gtf' option is not specified. allele_id should be the sequence names presented in the Multi-FASTA-formatted files. + +(Default: off) + +--polyA +Add poly(A) tails to the end of all reference isoforms. The length of poly(A) tail added is specified by '--polyA-length' option. STAR aligner users may not want to use this option. (Default: do not add poly(A) tail to any of the isoforms) + +--polyA-length +The length of the poly(A) tails to be added. (Default: 125) + +--no-polyA-subset +Only meaningful if '--polyA' is specified. Do not add poly(A) tails to those transcripts listed in . is a file containing a list of transcript_ids. (Default: off) + +--bowtie +Build Bowtie indices. (Default: off) + +--bowtie-path +The path to the Bowtie executables. (Default: the path to Bowtie executables is assumed to be in the user's PATH environment variable) + +--bowtie2 +Build Bowtie 2 indices. (Default: off) + +--bowtie2-path +The path to the Bowtie 2 executables. (Default: the path to Bowtie 2 executables is assumed to be in the user's PATH environment variable) + +--star +Build STAR indices. (Default: off) + +--star-path +The path to STAR's executable. (Default: the path to STAR executable is assumed to be in user's PATH environment variable) + +--star-sjdboverhang +Length of the genomic sequence around annotated junction. It is only used for STAR to build splice junctions database and not needed for Bowtie or Bowtie2. It will be passed as the --sjdbOverhang option to STAR. According to STAR's manual, its ideal value is max(ReadLength)-1, e.g. for 2x101 paired-end reads, the ideal value is 101-1=100. In most cases, the default value of 100 will work as well as the ideal value. (Default: 100) + +--hisat2-hca +Build HISAT2 indices on the transcriptome according to Human Cell Atlas (HCA) SMART-Seq2 pipeline. (Default: off) + +--hisat2-path +The path to the HISAT2 executables. (Default: the path to HISAT2 executables is assumed to be in the user's PATH environment variable) + +-p/--num-threads +Number of threads to use for building STAR's genome indices. (Default: 1) + +-q/--quiet +Suppress the output of logging information. (Default: off) + +-h/--help +Show help information. + +PRIOR-ENHANCED RSEM OPTIONS +--prep-pRSEM +A Boolean indicating whether to prepare reference files for pRSEM, including building Bowtie indices for a genome and selecting training set isoforms. The index files will be used for aligning ChIP-seq reads in prior-enhanced RSEM and the training set isoforms will be used for learning prior. A path to Bowtie executables and a mappability file in bigWig format are required when this option is on. Currently, Bowtie2 is not supported for prior-enhanced RSEM. (Default: off) + +--mappability-bigwig-file +Full path to a whole-genome mappability file in bigWig format. This file is required for running prior-enhanced RSEM. It is used for selecting a training set of isoforms for prior-learning. This file can be either downloaded from UCSC Genome Browser or generated by GEM (Derrien et al., 2012, PLoS One). (Default: "") + +DESCRIPTION +This program extracts/preprocesses the reference sequences for RSEM and prior-enhanced RSEM. It can optionally build Bowtie indices (with '--bowtie' option) and/or Bowtie 2 indices (with '--bowtie2' option) using their default parameters. It can also optionally build STAR indices (with '--star' option) using parameters from ENCODE3's STAR-RSEM pipeline. For prior-enhanced RSEM, it can build Bowtie genomic indices and select training set isoforms (with options '--prep-pRSEM' and '--mappability-bigwig-file '). If an alternative aligner is to be used, indices for that particular aligner can be built from either 'reference_name.idx.fa' or 'reference_name.n2g.idx.fa' (see OUTPUT for details). This program is used in conjunction with the 'rsem-calculate-expression' program. + +OUTPUT +This program will generate 'reference_name.grp', 'reference_name.ti', 'reference_name.transcripts.fa', 'reference_name.seq', 'reference_name.chrlist' (if '--gtf' is on), 'reference_name.idx.fa', 'reference_name.n2g.idx.fa', optional Bowtie/Bowtie 2 index files, and optional STAR index files. + +'reference_name.grp', 'reference_name.ti', 'reference_name.seq', and 'reference_name.chrlist' are used by RSEM internally. + +'reference_name.transcripts.fa' contains the extracted reference transcripts in Multi-FASTA format. Poly(A) tails are not added and it may contain lower case bases in its sequences if the corresponding genomic regions are soft-masked. + +'reference_name.idx.fa' and 'reference_name.n2g.idx.fa' are used by aligners to build their own indices. In these two files, all sequence bases are converted into upper case. In addition, poly(A) tails are added if '--polyA' option is set. The only difference between 'reference_name.idx.fa' and 'reference_name.n2g.idx.fa' is that 'reference_name.n2g.idx.fa' in addition converts all 'N' characters to 'G' characters. This conversion is in particular desired for aligners (e.g. Bowtie) that do not allow reads to overlap with 'N' characters in the reference sequences. Otherwise, 'reference_name.idx.fa' should be used to build the aligner's index files. RSEM uses 'reference_name.idx.fa' to build Bowtie 2 indices and 'reference_name.n2g.idx.fa' to build Bowtie indices. For visualizing the transcript-coordinate-based BAM files generated by RSEM in IGV, 'reference_name.idx.fa' should be imported as a "genome" (see Visualization section in README.md for details). + +If the whole genome is indexed for prior-enhanced RSEM, all the index files will be generated with prefix as 'reference_name_prsem'. Selected isoforms for training set are listed in the file 'reference_name_prsem.training_tr_crd' + +EXAMPLES +1) Suppose we have mouse RNA-Seq data and want to use the UCSC mm9 version of the mouse genome. We have downloaded the UCSC Genes transcript annotations in GTF format (as mm9.gtf) using the Table Browser and the knownIsoforms.txt file for mm9 from the UCSC Downloads. We also have all chromosome files for mm9 in the directory '/data/mm9'. We want to put the generated reference files under '/ref' with name 'mouse_0'. We do not add any poly(A) tails. Please note that GTF files generated from UCSC's Table Browser do not contain isoform-gene relationship information. For the UCSC Genes annotation, this information can be obtained from the knownIsoforms.txt file. Suppose we want to build Bowtie indices and Bowtie executables are found in '/sw/bowtie'. + +There are two ways to write the command: + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --bowtie \ + --bowtie-path /sw/bowtie \ + /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \ + /ref/mouse_0 +OR + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --bowtie \ + --bowtie-path /sw/bowtie \ + /data/mm9 \ + /ref/mouse_0 +2) Suppose we also want to build Bowtie 2 indices in the above example and Bowtie 2 executables are found in '/sw/bowtie2', the command will be: + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --bowtie \ + --bowtie-path /sw/bowtie \ + --bowtie2 \ + --bowtie2-path /sw/bowtie2 \ + /data/mm9 \ + /ref/mouse_0 +3) Suppose we want to build STAR indices in the above example and save index files under '/ref' with name 'mouse_0'. Assuming STAR executable is '/sw/STAR', the command will be: + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --star \ + --star-path /sw/STAR \ + -p 8 \ + /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \ + /ref/mouse_0 +OR + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --star \ + --star-path /sw/STAR \ + -p 8 \ + /data/mm9 + /ref/mouse_0 +STAR genome index files will be saved under '/ref/'. + +4) Suppose we want to prepare references for prior-enhanced RSEM in the above example. In this scenario, both STAR and Bowtie are required to build genomic indices - STAR for RNA-seq reads and Bowtie for ChIP-seq reads. Assuming their executables are under '/sw/STAR' and '/sw/Bowtie', respectively. Also, assuming the mappability file for mouse genome is '/data/mm9.bigWig'. The command will be: + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --star \ + --star-path /sw/STAR \ + -p 8 \ + --prep-pRSEM \ + --bowtie-path /sw/Bowtie \ + --mappability-bigwig-file /data/mm9.bigWig \ + /data/mm9/chr1.fa,/data/mm9/chr2.fa,...,/data/mm9/chrM.fa \ + /ref/mouse_0 +OR + + rsem-prepare-reference --gtf mm9.gtf \ + --transcript-to-gene-map knownIsoforms.txt \ + --star \ + --star-path /sw/STAR \ + -p 8 \ + --prep-pRSEM \ + --bowtie-path /sw/Bowtie \ + --mappability-bigwig-file /data/mm9.bigWig \ + /data/mm9 + /ref/mouse_0 +Both STAR and Bowtie's index files will be saved under '/ref/'. Bowtie files will have name prefix 'mouse_0_prsem' + +5) Suppose we only have transcripts from EST tags stored in 'mm9.fasta' and isoform-gene information stored in 'mapping.txt'. We want to add 125bp long poly(A) tails to all transcripts. The reference_name is set as 'mouse_125'. In addition, we do not want to build Bowtie/Bowtie 2 indices, and will use an alternative aligner to align reads against either 'mouse_125.idx.fa' or 'mouse_125.idx.n2g.fa': + + rsem-prepare-reference --transcript-to-gene-map mapping.txt \ + --polyA + mm9.fasta \ + mouse_125 \ No newline at end of file diff --git a/src/rsem/rsem_prepare_reference/script.sh b/src/rsem/rsem_prepare_reference/script.sh new file mode 100644 index 00000000..806804d8 --- /dev/null +++ b/src/rsem/rsem_prepare_reference/script.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +set -eo pipefail + +unset_if_false=( par_gff3_genes_as_transcripts par_polyA par_bowtie par_bowtie2 par_star par_hisat2_hca par_quiet par_prep_pRSEM ) + +for par in ${unset_if_false[@]}; do + test_val="${!par}" + [[ "$test_val" == "false" ]] && unset $par +done + +# replace ';' with ',' +par_reference_fasta_files=$(echo $par_reference_fasta_files | tr ';' ',') +par_gff3_rna_patterns=$(echo $par_gff3_rna_patterns | tr ';' ',') +par_trusted_sources=$(echo $par_trusted_sources | tr ';' ',') + +echo "$par_reference_fasta_files" +rsem-prepare-reference \ + ${par_gtf:+--gtf "${par_gtf}"} \ + ${par_gff3:+--gff3 "${par_gff3}"} \ + ${par_gff3_rna_patterns:+--gff3-RNA-patterns "${par_gff3_rna_patterns}"} \ + ${par_gff3_genes_as_transcripts:+--gff3-genes-as-transcripts "${par_gff3_genes_as_transcripts}"} \ + ${par_trusted_sources:+--trusted-sources "${par_trusted_sources}"} \ + ${par_transcript_to_gene_map:+--transcript-to-gene-map "${par_transcript_to_gene_map}"} \ + ${par_allele_to_gene_map:+--allele-to-gene-map "${par_allele_to_gene_map}"} \ + ${par_polyA:+--polyA} \ + ${par_polyA_length:+--polyA-length "${par_polyA_length}"} \ + ${par_no_polyA_subset:+--no-polyA-subset "${par_no_polyA_subset}"} \ + ${par_bowtie:+--bowtie} \ + ${par_bowtie2:+--bowtie2} \ + ${par_star:+--star} \ + ${par_star_sjdboverhang:+--star-sjdboverhang "${par_star_sjdboverhang}"} \ + ${par_hisat2_hca:+--hisat2-hca} \ + ${par_quiet:+--quiet} \ + ${par_prep_pRSEM:+--prep-pRSEM} \ + ${par_mappability_bigwig_file:+--mappability-bigwig-file "${par_mappability_bigwig_file}"} \ + ${meta_cpus:+--num-threads "${meta_cpus}"} \ + "${par_reference_fasta_files}" \ + "${par_reference_name}" + +mkdir -p "${par_output}" +mv ${par_reference_name}.* "${par_output}/" diff --git a/src/rsem/rsem_prepare_reference/test.sh b/src/rsem/rsem_prepare_reference/test.sh new file mode 100644 index 00000000..b38dd0a9 --- /dev/null +++ b/src/rsem/rsem_prepare_reference/test.sh @@ -0,0 +1,37 @@ + +#!/bin/bash + +set -e pipefail + +echo ">>> Testing $meta_functionality_name" + +cat > genome.fasta <<'EOF' +>Sheila +GCTAGCTCAGAAAAaaaNNN +EOF + +echo ">>> Prepare RSEM reference without gene annotations" +"$meta_executable" \ + --reference_fasta_files genome.fasta \ + --reference_name test \ + --output RSEM_index + +echo ">>> Checking whether output files exist" +[ ! -d "RSEM_index" ] && echo "RSEM index does not exist!" && exit 1 +[ ! -f "RSEM_index/test.grp" ] && echo "test.grp does not exist!" && exit 1 +[ ! -f "RSEM_index/test.n2g.idx.fa" ] && echo "test.n2g.idx.fa does not exist!" && exit 1 +[ ! -f "RSEM_index/test.ti" ] && echo "test.ti does not exist!" && exit 1 +[ ! -f "RSEM_index/test.idx.fa" ] && echo "test.idx.fa does not exist!" && exit 1 +[ ! -f "RSEM_index/test.seq" ] && echo "test.seq does not exist!" && exit 1 +[ ! -f "RSEM_index/test.transcripts.fa" ] && echo "test.transcripts.fa does not exist!" && exit 1 + +echo ">>> Checking whether output is correct" +[ ! -s "RSEM_index/test.grp" ] && echo "test.grp is empty!" && exit 1 +[ ! -s "RSEM_index/test.ti" ] && echo "test.ti is empty!" && exit 1 +[ ! -s "RSEM_index/test.seq" ] && echo "test.seq is empty!" && exit 1 +grep -q "GCTAGCTCAGAAAAaaaNNN" "RSEM_index/test.transcripts.fa" || { echo "The content of file 'test.transcripts.fa' seems to be incorrect." && exit 1; } +grep -q "GCTAGCTCAGAAAAAAANNN" "RSEM_index/test.idx.fa" || { echo "The content of file 'test.idx.fa' seems to be incorrect." && exit 1; } +grep -q "GCTAGCTCAGAAAAAAAGGG" "RSEM_index/test.n2g.idx.fa" || { echo "The content of file 'test.n2g.idx.fa' seems to be incorrect." && exit 1; } + +echo "All tests succeeded!" +exit 0