From aa43543e1fb609901d09b7a9f0c5e72707cb47a4 Mon Sep 17 00:00:00 2001 From: Emma Rousseau Date: Sat, 26 Oct 2024 20:42:22 +0200 Subject: [PATCH] Rseqc innerdistance (#159) * initial commit dedup * Revert "initial commit dedup" This reverts commit 38f586bec0ac9e4312b016e29c3aa0bd53f292b2. * full component with two tests * fix default values * adjust argument names and container image --------- Co-authored-by: Robrecht Cannoodt --- CHANGELOG.md | 1 + .../rseqc_inner_distance/config.vsh.yaml | 116 ++++++++++++++++++ src/rseqc/rseqc_inner_distance/help.txt | 43 +++++++ src/rseqc/rseqc_inner_distance/script.sh | 25 ++++ src/rseqc/rseqc_inner_distance/test.sh | 77 ++++++++++++ .../rseqc_inner_distance/test_data/test.bed12 | 4 + .../test_data/test.paired_end.sorted.bam | Bin 0 -> 10205 bytes .../test_data/test1.inner_distance.txt | 49 ++++++++ .../test_data/test1.inner_distance_freq.txt | 100 +++++++++++++++ .../test_data/test2.inner_distance.txt | 4 + .../test_data/test2.inner_distance_freq.txt | 100 +++++++++++++++ 11 files changed, 519 insertions(+) create mode 100644 src/rseqc/rseqc_inner_distance/config.vsh.yaml create mode 100644 src/rseqc/rseqc_inner_distance/help.txt create mode 100644 src/rseqc/rseqc_inner_distance/script.sh create mode 100644 src/rseqc/rseqc_inner_distance/test.sh create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test.bed12 create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt create mode 100644 src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fc134fd..0e32edb1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * `rsem/rsem_calculate_expression`: Calculate expression levels (PR #93). * `rseqc`: + - `rseqc/rseqc_inner_distance`: Calculate inner distance between read pairs (PR #159). - `rseqc/rseqc_inferexperiment`: Infer strandedness from sequencing reads (PR #158). - `rseqc/bam_stat`: Generate statistics from a bam file (PR #155). diff --git a/src/rseqc/rseqc_inner_distance/config.vsh.yaml b/src/rseqc/rseqc_inner_distance/config.vsh.yaml new file mode 100644 index 00000000..e050bb24 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/config.vsh.yaml @@ -0,0 +1,116 @@ +name: "rseqc_inner_distance" +namespace: "rseqc" +description: | + Calculate inner distance between read pairs. +links: + homepage: https://rseqc.sourceforge.net/ + documentation: https://rseqc.sourceforge.net/#inner-distance-py + issue_tracker: https://github.com/MonashBioinformaticsPlatform/RSeQC/issues + repository: https://github.com/MonashBioinformaticsPlatform/RSeQC +references: + doi: 10.1093/bioinformatics/bts356 +license: GPL-3.0 +authors: + - __merge__: /src/_authors/emma_rousseau.yaml + roles: [ author, maintainer ] + +argument_groups: +- name: "Input" + arguments: + - name: "--input_file" + alternatives: ["-i"] + type: file + required: true + description: input alignment file in BAM or SAM format + + - name: "--refgene" + alternatives: ["-r"] + type: file + required: true + description: Reference gene model in bed format + + - name: "--sample_size" + alternatives: ["-k"] + type: integer + example: 1000000 + description: Numer of reads sampled from SAM/BAM file, default = 1000000. + + - name: "--mapq" + alternatives: ["-q"] + type: integer + example: 30 + description: Minimum mapping quality (phred scaled) to determine uniquely mapped reads, default=30. + + - name: "--lower_bound" + alternatives: ["-l"] + type: integer + example: -250 + description: Lower bound of inner distance (bp). This option is used for ploting histograme, default=-250. + + - name: "--upper_bound" + alternatives: ["-u"] + type: integer + example: 250 + description: Upper bound of inner distance (bp). This option is used for ploting histograme, default=250. + + - name: "--step" + alternatives: ["-s"] + type: integer + example: 5 + description: Step size (bp) of histograme. This option is used for plotting histogram, default=5. + +- name: "Output" + arguments: + - name: "--output_prefix" + alternatives: ["-o"] + type: string + required: true + description: Rrefix of output files. + + - name: "--output_stats" + type: file + direction: output + description: output file (txt) with summary statistics of inner distances of paired reads + + - name: "--output_dist" + type: file + direction: output + description: output file (txt) with inner distances of all paired reads + + - name: "--output_freq" + type: file + direction: output + description: output file (txt) with frequencies of inner distances of all paired reads + + - name: "--output_plot" + type: file + direction: output + description: output file (pdf) with histogram plot of of inner distances of all paired reads + + - name: "--output_plot_r" + type: file + direction: output + description: output file (R) with script of histogram plot of of inner distances of all paired reads + +resources: + - type: bash_script + path: script.sh +test_resources: + - type: bash_script + path: test.sh + - path: test_data + +engines: +- type: docker + image: python:3.10 + setup: + - type: apt + packages: [r-base] + - type: python + packages: [ RSeQC ] + - type: docker + run: | + echo "RSeQC - inner_distance.py: $(inner_distance.py --version | cut -d' ' -f2)" > /var/software_versions.txt +runners: +- type: executable +- type: nextflow \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/help.txt b/src/rseqc/rseqc_inner_distance/help.txt new file mode 100644 index 00000000..18f97bb6 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/help.txt @@ -0,0 +1,43 @@ +``` +inner_distance.py --help +``` + +Usage: inner_distance.py [options] + +Calculate the inner distance (insert size) of RNA-seq fragments. + + RNA fragment + _________________||_________________ +| | +| | +||||||||||------------------|||||||||| + read_1 insert_size read_2 + +fragment size = read_1 + insert_size + read_2 + + + +Options: + --version show program's version number and exit + -h, --help show this help message and exit + -i INPUT_FILE, --input-file=INPUT_FILE + Alignment file in BAM or SAM format. + -o OUTPUT_PREFIX, --out-prefix=OUTPUT_PREFIX + Prefix of output files(s) + -r REF_GENE, --refgene=REF_GENE + Reference gene model in BED format. + -k SAMPLESIZE, --sample-size=SAMPLESIZE + Number of read-pairs used to estimate inner distance. + default=1000000 + -l LOWER_BOUND_SIZE, --lower-bound=LOWER_BOUND_SIZE + Lower bound of inner distance (bp). This option is + used for ploting histograme. default=-250 + -u UPPER_BOUND_SIZE, --upper-bound=UPPER_BOUND_SIZE + Upper bound of inner distance (bp). This option is + used for plotting histogram. default=250 + -s STEP_SIZE, --step=STEP_SIZE + Step size (bp) of histograme. This option is used for + plotting histogram. default=5 + -q MAP_QUAL, --mapq=MAP_QUAL + Minimum mapping quality (phred scaled) for an + alignment to be called "uniquely mapped". default=30 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/script.sh b/src/rseqc/rseqc_inner_distance/script.sh new file mode 100644 index 00000000..fe00c590 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/script.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -exo pipefail + + +inner_distance.py \ + -i $par_input_file \ + -r $par_refgene \ + -o $par_output_prefix \ + ${par_sample_size:+-k "${par_sample_size}"} \ + ${par_lower_bound:+-l "${par_lower_bound}"} \ + ${par_upper_bound:+-u "${par_upper_bound}"} \ + ${par_step:+-s "${par_step}"} \ + ${par_mapq:+-q "${par_mapq}"} \ +> stdout.txt + +if [[ -n $par_output_stats ]]; then head -n 2 stdout.txt > $par_output_stats; fi + + +[[ -n "$par_output_dist" && -f "$par_output_prefix.inner_distance.txt" ]] && mv $par_output_prefix.inner_distance.txt $par_output_dist +[[ -n "$par_output_plot" && -f "$par_output_prefix.inner_distance_plot.pdf" ]] && mv $par_output_prefix.inner_distance_plot.pdf $par_output_plot +[[ -n "$par_output_plot_r" && -f "$par_output_prefix.inner_distance_plot.r" ]] && mv $par_output_prefix.inner_distance_plot.r $par_output_plot_r +[[ -n "$par_output_freq" && -f "$par_output_prefix.inner_distance_freq.txt" ]] && mv $par_output_prefix.inner_distance_freq.txt $par_output_freq + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/test.sh b/src/rseqc/rseqc_inner_distance/test.sh new file mode 100644 index 00000000..927a69a9 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test.sh @@ -0,0 +1,77 @@ +#!/bin/bash + + +# define input and output for script +input_bam="$meta_resources_dir/test_data/test.paired_end.sorted.bam" +input_bed="$meta_resources_dir/test_data/test.bed12" + +output_stats="inner_distance_stats.txt" +output_dist="inner_distance.txt" +output_plot="inner_distance_plot.pdf" +output_plot_r="inner_distance_plot.r" +output_freq="inner_distance_freq.txt" + +# Run executable +echo "> Running $meta_functionality_name" + +"$meta_executable" \ + --input_file $input_bam \ + --refgene $input_bed \ + --output_prefix "test" \ + --output_stats $output_stats \ + --output_dist $output_dist \ + --output_plot $output_plot \ + --output_plot_r $output_plot_r \ + --output_freq $output_freq + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check whether output is present and not empty" + +[[ -f "$output_stats" ]] || { echo "$output_stats was not created"; exit 1; } +[[ -s "$output_stats" ]] || { echo "$output_stats is empty"; exit 1; } +[[ -f "$output_dist" ]] || { echo "$output_dist was not created"; exit 1; } +[[ -s "$output_dist" ]] || { echo "$output_dist is empty"; exit 1; } +[[ -f "$output_plot" ]] || { echo "$output_plot was not created"; exit 1; } +[[ -s "$output_plot" ]] || { echo "$output_plot is empty"; exit 1; } +[[ -f "$output_plot_r" ]] || { echo "$output_plot_r was not created"; exit 1; } +[[ -s "$output_plot_r" ]] || { echo "$output_plot_r is empty"; exit 1; } +[[ -f "$output_freq" ]] || { echo "$output_freq was created"; exit 1; } +[[ -s "$output_freq" ]] || { echo "$output_freq is empty"; exit 1; } + +echo ">> Check whether output is correct" +diff "$output_freq" "$meta_resources_dir/test_data/test1.inner_distance_freq.txt" || { echo "Output is not correct"; exit 1; } +diff "$output_dist" "$meta_resources_dir/test_data/test1.inner_distance.txt" || { echo "Output is not correct"; exit 1; } + +# clean up +rm "$output_stats" "$output_dist" "$output_plot" "$output_plot_r" "$output_freq" +################################################################################ + +echo "> Running $meta_functionality_name with non-default parameters and default output file names" +"$meta_executable" \ + --input_file $input_bam \ + --refgene $input_bed \ + --output_prefix "test" \ + --sample_size 4 \ + --mapq 10 + +exit_code=$? +[[ $exit_code != 0 ]] && echo "Non zero exit code: $exit_code" && exit 1 + +echo ">> Check whether output is present and not empty" + +[[ -f "test.inner_distance.txt" ]] || { echo "test.inner_distance.txt was not created"; exit 1; } +[[ -s "test.inner_distance.txt" ]] || { echo "test.inner_distance.txt is empty"; exit 1; } +[[ -f "test.inner_distance_plot.pdf" ]] || { echo "test.inner_distance_plot.pdf was not created"; exit 1; } +[[ -s "test.inner_distance_plot.pdf" ]] || { echo "test.inner_distance_plot.pdf is empty"; exit 1; } +[[ -f "test.inner_distance_plot.r" ]] || { echo "test.inner_distance_plot.r was not created"; exit 1; } +[[ -s "test.inner_distance_plot.r" ]] || { echo "test.inner_distance_plot.r is empty"; exit 1; } +[[ -f "test.inner_distance_freq.txt" ]] || { echo "test.inner_distance_freq.txt was created"; exit 1; } +[[ -s "test.inner_distance_freq.txt" ]] || { echo "test.inner_distance_freq.txt is empty"; exit 1; } + +echo ">> Check whether output is correct" +diff "test.inner_distance_freq.txt" "$meta_resources_dir/test_data/test2.inner_distance_freq.txt" || { echo "Output is not correct"; exit 1; } +diff "test.inner_distance.txt" "$meta_resources_dir/test_data/test2.inner_distance.txt" || { echo "Output is not correct"; exit 1; } + +exit 0 \ No newline at end of file diff --git a/src/rseqc/rseqc_inner_distance/test_data/test.bed12 b/src/rseqc/rseqc_inner_distance/test_data/test.bed12 new file mode 100644 index 00000000..33a46951 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test.bed12 @@ -0,0 +1,4 @@ +MT192765.1 1242 1264 nCoV-2019_5_LEFT 1 + 1242 1264 0 2 10,12, 0,10, +MT192765.1 1573 1595 nCoV-2019_6_LEFT 2 + 1573 1595 0 2 7,15, 0,7, +MT192765.1 1623 1651 nCoV-2019_5_RIGHT 1 - 1623 1651 0 2 14,14, 0,14, +MT192765.1 1942 1964 nCoV-2019_6_RIGHT 2 - 1942 1964 0 2 11,11 0,11, diff --git a/src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam b/src/rseqc/rseqc_inner_distance/test_data/test.paired_end.sorted.bam new file mode 100644 index 0000000000000000000000000000000000000000..8b215e12d1a932f1619cf7ded7e172141f45d479 GIT binary patch literal 10205 zcmV<3CnDG%iwFb&00000{{{d;LjnMD0fmy^PQox0hpTtRm)Hxe<5ULHD+VM;vd!s) z+ok&hE@3OK4I1x#Sl_~Iz@|9l?zE@j$SUWjh*u-d8%zMC+9d0b3kPX^@EAz)ObM}( zWuT_^euV=9Rjy-S+oj2yru(5*gZU;Wl4qw>0;k-%ZsST(C`}g)cFWTuiT89-s3ayK z&sy7Ii=3X56WHr%w89S7C{uiWCPTDz#3XP$C+s z&xA^#5*26?p+YG#h&GCa0JXTSQ0t}~%fLyPigm5zveTlGwPB6vOwqPl)Fl^PDnzFmW7?u=+p4INMy0$-)57F=9+z<$ zj=X;qXEcs88GXz1(p?wF^uS&k)5Y#FouXlSM%TxgWP8Ja_}gFtl=xiw;0cI48;D`;qu&$*o!P{OAn(J>})}&a>wPIMv0kU znTujcaDK7*Y7oV{M{t%~ft%0vaDK4oCh>J^DvD!no(m~vB={>UU8_P$ooZo{l*vxj ztOd1`RN|FHl{_PkIBBHE_K1lr`0pl8Gk27t=a~b@tD-!B+>%p#$?;^oD5Q>u*0l1=Df!87MZROa<}*Jgt59hbtQz0Hxqa4UYr!q2mj(Jvz z*;AR5H~#{h=iAn9q+m6u=~ae|RR41DHDD0HzTz1TYbhu|zi& zZwr8^mFXBT?S$eYWy%O9(grv~?ZJ%jMjGCk2V@N9$-&{lo;4OT@QNvT$vW^-Q@0W?tazQ^M|;I1&4jf|lG?x~7^NFk@viE0 zr#jK(|ef+b7OeZl9RUdB+*Z@tl8SCXbh$^Vxs)bs-T1C&!#$ zzh=%ikLUacBM}KsK^+aE^d~br=ENqatJ@mhRI8F&NL`Y?CNHZZuM6VR=)(Fj5d4ML zqt>$~E52}=!g@UZ?_OA?v7pD!Jd^XiL|bTqtMA5S-g?I`2G-s_n#{Xj?sdTS3kK`- zem|K6WAC2@ZimMwd%Mx6Z;zr{a+VtSHZZ_dsuG==ve265RB71?1)tGC##05;Y?xt8 z$fo9+o0L~=lPWkpLYupISYld@*f%M<#H2_RBm`w@+Kx zVCPn*!}ZSkTQlWvo;6Q#7Q3i2cH@Ba8rzza@qrnh_l)p-VCzS>2JGg+3Ss=Ysb2DU zl5b?Vfv_qG*%+nw>YzGdR$8S|yR>b%W5IIV}=8Nvgdr6mOM{JW>Yvk{uLMTO-MW zA;o3Qs?zEJ!QHUf&Ny=cf{V^<27u=5 z95^pW4vfr#5d1{)|0;=$YkjpfR!E82$Gsz% zf=nh_rX^fyQ^BX!kV8l*UJGlA6jsxvS8l97m~TEc{e|;Pm_6ql=w~jbz;5+$KHS6k zRP>f;7<{{uPa=A~IJ2cJ*# zeJ07HSE6otqFdF$n}^~D*F=H*FoWhtykBsMs0Q^1sT8?q9sDm1T_qV6BwCIX0F2?4 z8zyzZSBTGkjk`^GpSsM+#JWeByC7s5Afnf9-MUzEIQUy%W6R_ujK4MYw>+Li&K`{l z{u3wCagMBTs(7Iam{n20%Wq*xInsNYC3K|#;vLwGXrXH0yCPf!N!bicn0^WdMzxG} zVD_OvS+2|uUgq%RoT0gjop(>z8PNP2cQ2<&Z%U(RcgD`+)03TJmnKEUOSCDz8MygE zlmgCH!+0xrB@`%#FjXU0B^Byk8YLrmFyO9Am_*L1LnfIVTwx43V>dpvG91}hYlirK z0M2Dj$+7J(bh$mC$v2{zu0)fNo99Uh;V;a%IiLxz6|hOk5R9g^F;&Y{K_XmrGBrjb zm)8=uHNu3B>S+nW1kMVUAUJ5?#2AS)h*c{UJajLB&M}C6u_5jwgK`4F^0N!ykTcCu z;O1sDU$ipo{Oxp^n-46a=;^6ra=3#!(2|-3!A`X z6ELrC4ZsCz{=;Yh`2ML)_h9DKD{x+@5O0wkm)Z(HB?Azy!o^N;6CmZxN2D~p#Yif% zFyme<<1ln_c+Y*Sf;C@)vD`W{@rrY;hwb32;N^6_y0jT9_6x->5{Sl0iLos)b5z(vE?(2=H{5ih{|yOYuUfQlZ=Jvp5;w z2utEZIZHPe4tmo~@onDM1{@>v(FMTrGjlBOUt@dj@8$dV7R&N|?>$lUzh_t;>>Tbn zH}i%;$@mSjJs}*+CuCef%*@x}dTp6cHK+S)0Or>(X2?TO{9F&_2YWY@$a*l7Z%0gW zGfRzggX8*Q^UGp~C`op3;bE;>)SOjSUdcm6&2^`QR;Y!qeUqGTuaxsE_bj&>eHdf^b0+7< z`^P)Zj<@Vw!0{Py$c$0$K52_CP4JU~BKx;5${TgWtA{sOqNO$ufcf;wa_5iy_t!nN zLeB5)iTUAs200JJB!JTyF&&shOc-w}8`nH#jh0jzO43e>isNTnD8m}wnhtPP30!yh zo~VPsTNf>`q#8blmPHm3g%cQ{Y>?JopHYj<$lneb=n=KnqjP!V6^QxtCStyDLd>su z+iY zoR3VWH)=Gv`jN@(_M8eE&+cE(nR3zDg$B-SawoFYcc}kP8fNOKP>gWH`z@=WNz9fpC zpKQv;~sMrEO5_K1ZNJ*v3G(~)bOfaFAGQkt1mt<9>NZl#XwWytHV2Mi+ zw-+4wJII;l-l=znIPMFY-f}x2MwPcRL`$*Hr`ead@?9Qk%AwYr^_Sb4C#mSnXFRM3 z=e9kEzOqHWh<)=J>2)Ca$Mf`DJ}cmjjeWjVZ4I~)mN$WM{~#Lhy=lfchh>AaPu`AK zbTeE~ypv6b0F#`3s(sPHXxR+gnpYo*eC*`Wk(g)?i|b)Uj!ZY9;cSb&5i>2FYr}kS=1-;ao6dl2W0n zTp?r^sEn#s3x*1GDl4iarsFX4f`Yi@2K58+stfJ+aawLEK{dLL;QX^q&U`j;=7R(J zMJU`{6gVP3@6GRfNJE9$j!s`^O6$TMy-8IyRvC$rDWaZ)m$IRtB9j>~0?&v^uH{@JDq{oRuaeaLbC$6`5ZfA3$8qMx2} z^B9HfBbR@#IW%BFrD&6C(IV;;9aJGh!H4=FZ~}E9+keAFZBUve8v__oLV_wqEu_Os zYSFS;|5ARw$*-wcj6$Y7Jea+C?&=u6w&A`9{TkjGInM11W{g91h>#1^43bU=|JIb6 z9!-+;1PS=M(0~uICW1^P*=s7CbtrL2l!%ZHL53Xwrzs)L3VN+Sm-BPC$SAsJ%FdJH zozp!R9tnoXQn983?M$Uv3;Qimzo>-P)L|hd^o8to`!boj zaod=;GWj^+7WZddf-3e6ER;#1$F^6d^!-7U)uPpB)Zo62eKS{P9YnZKnR5$i0f8fB9-N)rx=PuY(s+SS=f&1~x#mY=wYzLA z6Xb5qll=aM_|pCy)|&zAT?Fel-}LLN8o}?{`XdkF{~L+^Px`SEqW^bhY&|(VJvxfM zgLrq2pJ_W()tfnfkLSJ<*e;>!|64yhc0;)f-rcD<^wL=x`o=Nm;oFNb#h;obf3Cg{ zGyFf*7s3p09fc|H3$}vv*gVBcy!rOjR(Rd!n#cw@Vja8{juH(jhN#@Y&QgjEg&I>S zfszhL!4Sx`90sQCYU&kFD^W>f1Q=C6?!pvm7Q_7}lJq43*9U9kirD>gv59wzX0_O_K%{X`G?T%i|YS+{+icbYonGRg7bzxQ^A>=NPh>T2M((xs-!l zDdie7U0Z(^5>(6G5_xs)>Yq#%_lriTem(dWUzf|C?=3*}-%SF(*#@vilJQ3usP z2em1b_o?NO!_sMnaH2Da9AE+A)9Q*N9Ft87fF%lC2zjYR0%3-(sG^`D!pV8s=kmOq zx0OtzsnO3-)Vdg6KJtg%sorSP+T>7#9@hMjD*MDH$Ct0HR}pY?$Cm(pQQ0Rp>9RW}x-8g+ zH@tM&Hjozh^wc(Zi6=PMBZPDT_*z5QTZU+?MI}I2m1w|XDR>DdSiquy-pttL+}2z0 zerV=Gq(vT%Ebo`=SuE``TJNJxSm};94iY|i&;$EP4J_6G83=n;U(|Zr5PLj7e*Ylr z@9=nj{(h(Z$9Nv^pYHEO(f5(8!_h_k4`~8anqe}fDH4izX>Fe1$Q39-%Su{sx1-Ru zk{xtqx)d~A?ugGA&j)AC{T6WS&GS_9GiO#yu0HcequGaYLU6ALr4P&m)*(Xf6*C=b zadk>*JeG8V;u6Epu2Q+yfi; zi+(2CPYgQ9J(}%Iv20BP&whWC?Yn1Xo8W%}n0L<#;xzW5*^if$(L!t+4SfX0}p;f?&hEHn1{4J2aQxt+O0L6I{=3<>~6Et_G z)TFVu%of9r&}?oXntqcSJ42gK!_@3-?P42e$a0VEBKG_Wg`0fbr@F}U!19sd>njRl zVPATb_cthTQvgjfPEN>KdiUlLnsVzU9?dWI+hYtGHRo#W%<=t-!->jLv) zK|+4P(<3>aFC!$3-aa>k&z;%PfAzTuLf#OL5qi&@Z#M`b`^Q>mYC(}CJqlru@4(1> ziZ--T#rd>;I9^~mIu_$ofpiNu99TpMy>tyiV@vwnD?LK(%Le`SL66W^Fm~V8j6+99 zdxxjdj*XPYLRD$RpbZk(2(zgGs5{z`q%`nCTLbuz(2ypDPgQV~4BMve3{N^LDww=C ztgFeeGBgT61Q&C<^u}9}I3F8?Jodtw^?Akqdp@oTy49u2`Rx1F)CcbC{rZ3UvOye! zU;inL{j0e+9_^usZyoz}=*UDea!ym)wyG5x9!f1L%6w5jKJ}o(WTTpDBobq?c|5;p z41d`p{GM!7-f_=eOl>{Qy2y>r33$h0fdt9Ijahx!%nG4Y-bWA)~rD(ERu& z)4RR0diALu%^`><#{R@y6%OpYZVY9YYP=lU^zl9y5d>|j@kfMN5ufL?%kpt!Pw=(# z@?zuw?hWs?MY}EgM#D5%VY@w;Wr9 zO%C$BuQhr9#%9>Yojs1Q@fsaFxdHjvr>1zGqa2v76!))=2pYtlRKP2i2-=JLMRjDt z8KV?ArM?sqN9C(VL+ z@1Do;u$hFv4O2@$v=@1Letc8u|F%i#KLmk4{lH+?Lvi)J82j8@d7hjcpB%dCYMXY3 zbzO?^H=!Ctswyr}W}(tbgHX9e%uG$EaEUrFk(wxI88*CZozPwg2(6JvK+f4m%x?Eb zS69irW%fPLJrU!wH(C@I0=&#M_{4rIqwr-z6NhEiztml3TiI>Y92UFxZPplF#4fc= zVE)d^a@FsB>^B}-u`cwko|^A{V374dO#=Akb80#;-D0uzprsBp?Lh-`Ycg7>Xw}rQ zXBSAI{+}jxmpg}!FXmZljJCYW@{uaFQ+AkT6Y*T1lJUx7sfB<3ib>e=JDV&~ywVbh z2bcY%`!V)U=G$5B@9iJ%J4?jUMlh)W3Qr3#6DgdofX9TivZyItBkPW~9!Ik5AIP3*jNB|9Ho!Ol-TxRhGl z-96kr-gl+j3-PakW`NE#Re_utijZ2c7MLRQ%o|yqU5J|xp3TBDBJ04*ETl3<{zJ(8 z4{tE|W_B}@RVzv!ShF&8sCInn!DYXbFur5X&H>Lg_`tbGL{7Woaa2NTc>A=gQ3~w< zjG3ZxDD46Ql^`U(r`sXMbS23?kL#o~8*Sh?UH(2#C$7E|SvjiWja{qx)$!yeoUuaZ zfi)Xy4SuKDS?1@XyHWJkS-`$`cyhSoHn43;S5`>pssaW+5wuYeqbhMLRFN{J7_DrA zbJPNIT8~Fdb11Jt8y!_i2kqO!C;?e@ku6oGj{MmUq&u^s=iKd#Ipp5GxeM=``LpyK ze&<~q{m#25fPUA5_t_rM$D*Gdjs5Ig;7?BX4)>y*_7dTV5DL0bm0HuvCx#-ML9SfX zWc3qXBm(HBP^dgr@C2?iG@m21;xq{L+IfcTGiPs4_?gM<51eZ=p8Y?bi|2LB{t`O< z{6`Qcntirr_BV}ZAH3w9-DNNNZ5aFL?A{|JKG|{02~Z<6DeawrDw#}BcjXPFifhGS zJd>stFqN_u37#hkCe)-I>;as-N>s>=!M!UQL@g{~R@eLB!aA*PGp~@dcd3NM!VSf4 z3%1y~$9}IgdjWNF`9AqLZ1U;%c5=I9QLB$#@0t0kjFlamU$7ft+PD?w>?gXuTT&$=3sE)&Qw2kXmYSLz7>t-YK8d z{Nm~5tE<9*@?&$IB-iL9*&EGAc9)Y21n*DIq|!rajpp^BCo7Ezr9^mwu_W(vUwucb zo8hQ4bME2(g&d>c#Y_3eHtG1=RvOJmcbAGXV4&lnusdZg$4&2S>yYzIb^r5(QUU+ zA@fZNW60h;r{Il+Yzjy|nasLl^mz<%c^+H5ijaY2_X4tat%L0531p!|^h1<&Qwb)xfWO$biq`OH*o!lKzsor#OU+vD$OZ|Id*|F` zTz&B~;OLbCS=X(NE6rX!$w38A`oE3f44(8i_LeiG`v*IR2+GKlzA;GBz8&j5=@lZE zuXwZ?+Hu-%bZs`fY_!drKfugixtu5cjTQEU;C<6vm>rbXp4<=^Wz(hgcG$?xmU%6# z@pz)kk49Uh(|=Sh=P&+#PbpRLagl5Y3NB>iR*>bRw@Qq(o^Uo-+~ z1o%COhM_Ha!;y3Dq-k_p^AeXzTvfC%;ygtW3eh%{XZ83f{nLxw@+7^T*#zNg^RZ;4 z)*r!>-rBlc_Wn)#%c`@(*uR;{`oZqr$^L2h^qcEK5%tarnfw%a)%@H$?gT^@YpVd?qNi*2p-SGES^0zE@J6CwQLGjaE5?%0h8S&4F7 zf>Z=j_*Ben8zetbJnl5o`lW%hjXX25TAUnoZnS1d={E|=3m9`%Ak0HNt@sreA%_qy zvpc=JB4lXxDtq3>Wx2Z8HbFES0ePNdECE5#Kf0-#>CTniOxp*8;}`_}VT}F3oR_pG zjdR42XDDRbw1yUxCP)$z{mM?4Ey^CCo6^vs(2yL(<+4l)p=FnLDUuID@RAmgmyN;I z;HAqm$LE`@f0nPiY^+k_ksgji%`^6O>8wxn_`5Yp@Bq(GUMyk`HvQoq&ky!CoiIM0 zGt=QoHeI9Y(HN9H>>l}WD2uvc1Nzu}$IJ9*mpc-VV zfNj@u=4O%EQjVveoA|=T!SS?{)tBwg*d{3USGbPZ3$KT;pN6=w)coWon||j?oBplC z<(T_VG4`?f=Y{Q`oF2JsjwpE}s)9Z}lae|$q}Xn53tWYOG*=12#*T6OsTkDM$d&05 z+H4+LmXpRz6A-pi%Jeb!Xpyq}%6yl0o|SVK%J0^=3QothGs5*=t-6>fb9$wZjuxZV z6yW)hd9TQQvqjEqBI>~e5q2)%84AdR@oVQF8Rqb`?G4urBdfv78miN+ptSqj#eTI> zwg~%ClX;34zE0P#x3}Lym>maB4%_W`470(MpQ#;2OAdP9)GN-H0?m)CGeLK(>~a5v zqvaI?e~7VfFMXQb-tMVGlXF#;N|m$|b3>c7(!-i|&_R(?Q7RYGE_9h7hNNaO2)nF= zN$qFZbscKW2|P~CO*!4C`Nr*~x9Z5|u$~zR5Srh-*rpqyLi5q1LDoZsCOU?{IY;wo z7e1$}(2(t+9Z4k0nNXMXIT6$nrr~~S##Kch$Hr;tk8-T+8bBvt%L@<^3AQFRNK7JP zzsR!KMJh_kd;(W*@%#$evxE6zVWS}rRPUGG`?Y(Ps* z+o60(=unGLM%a(Vk)St|nz;{Pu>zj^&o{|@kGxwFWak?crITjo7}{1Q`!IGN9GKLNBIi literal 0 HcmV?d00001 diff --git a/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt new file mode 100644 index 00000000..e5f09f8f --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance.txt @@ -0,0 +1,49 @@ +ERR5069949.29668 -4 sameTranscript=No,dist=genomic +ERR5069949.114870 -45 sameTranscript=No,dist=genomic +ERR5069949.147998 94 sameTranscript=No,dist=genomic +ERR5069949.155944 -105 sameTranscript=No,dist=genomic +ERR5069949.184542 49 sameTranscript=No,dist=genomic +ERR5069949.169513 -92 sameTranscript=No,dist=genomic +ERR5069949.257821 -139 sameTranscript=No,dist=genomic +ERR5069949.309410 13 sameTranscript=No,dist=genomic +ERR5069949.376959 -66 sameTranscript=No,dist=genomic +ERR5069949.366975 -106 sameTranscript=No,dist=genomic +ERR5069949.465452 -19 sameTranscript=No,dist=genomic +ERR5069949.479807 5 sameTranscript=No,dist=genomic +ERR5069949.501486 -82 sameTranscript=No,dist=genomic +ERR5069949.532979 -96 sameTranscript=No,dist=genomic +ERR5069949.540529 -61 sameTranscript=No,dist=genomic +ERR5069949.573706 -63 sameTranscript=No,dist=genomic +ERR5069949.576388 -77 sameTranscript=No,dist=genomic +ERR5069949.611123 -125 sameTranscript=No,dist=genomic +ERR5069949.651338 -33 sameTranscript=No,dist=genomic +ERR5069949.686090 -29 sameTranscript=No,dist=genomic +ERR5069949.786562 42 sameTranscript=No,dist=genomic +ERR5069949.870926 -22 sameTranscript=No,dist=genomic +ERR5069949.856527 -69 sameTranscript=No,dist=genomic +ERR5069949.885966 -32 sameTranscript=No,dist=genomic +ERR5069949.937422 18 sameTranscript=No,dist=genomic +ERR5069949.919671 -116 sameTranscript=No,dist=genomic +ERR5069949.973930 -79 sameTranscript=No,dist=genomic +ERR5069949.986441 -22 sameTranscript=No,dist=genomic +ERR5069949.1014693 -150 sameTranscript=No,dist=genomic +ERR5069949.1020777 -122 sameTranscript=No,dist=genomic +ERR5069949.1066259 -4 sameTranscript=No,dist=genomic +ERR5069949.1062611 -124 sameTranscript=No,dist=genomic +ERR5069949.1067032 -103 sameTranscript=No,dist=genomic +ERR5069949.1088785 -101 sameTranscript=No,dist=genomic +ERR5069949.1132353 -142 sameTranscript=No,dist=genomic +ERR5069949.1151736 -55 sameTranscript=No,dist=genomic +ERR5069949.1258508 62 sameTranscript=No,dist=genomic +ERR5069949.1189252 -98 sameTranscript=No,dist=genomic +ERR5069949.1261808 -88 sameTranscript=No,dist=genomic +ERR5069949.1246538 -122 sameTranscript=No,dist=genomic +ERR5069949.1328186 -64 sameTranscript=No,dist=genomic +ERR5069949.1331889 -132 sameTranscript=No,dist=genomic +ERR5069949.1372331 -29 sameTranscript=No,dist=genomic +ERR5069949.1340552 -140 sameTranscript=No,dist=genomic +ERR5069949.1412839 -117 sameTranscript=No,dist=genomic +ERR5069949.1476386 -98 sameTranscript=No,dist=genomic +ERR5069949.1538968 -133 sameTranscript=No,dist=genomic +ERR5069949.1552198 -67 sameTranscript=No,dist=genomic +ERR5069949.1561137 -59 sameTranscript=No,dist=genomic diff --git a/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt new file mode 100644 index 00000000..908326ff --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test1.inner_distance_freq.txt @@ -0,0 +1,100 @@ +-250 -245 0 +-245 -240 0 +-240 -235 0 +-235 -230 0 +-230 -225 0 +-225 -220 0 +-220 -215 0 +-215 -210 0 +-210 -205 0 +-205 -200 0 +-200 -195 0 +-195 -190 0 +-190 -185 0 +-185 -180 0 +-180 -175 0 +-175 -170 0 +-170 -165 0 +-165 -160 0 +-160 -155 0 +-155 -150 1 +-150 -145 0 +-145 -140 2 +-140 -135 1 +-135 -130 2 +-130 -125 1 +-125 -120 3 +-120 -115 2 +-115 -110 0 +-110 -105 2 +-105 -100 2 +-100 -95 3 +-95 -90 1 +-90 -85 1 +-85 -80 1 +-80 -75 2 +-75 -70 0 +-70 -65 3 +-65 -60 3 +-60 -55 2 +-55 -50 0 +-50 -45 1 +-45 -40 0 +-40 -35 0 +-35 -30 2 +-30 -25 2 +-25 -20 2 +-20 -15 1 +-15 -10 0 +-10 -5 0 +-5 0 2 +0 5 1 +5 10 0 +10 15 1 +15 20 1 +20 25 0 +25 30 0 +30 35 0 +35 40 0 +40 45 1 +45 50 1 +50 55 0 +55 60 0 +60 65 1 +65 70 0 +70 75 0 +75 80 0 +80 85 0 +85 90 0 +90 95 1 +95 100 0 +100 105 0 +105 110 0 +110 115 0 +115 120 0 +120 125 0 +125 130 0 +130 135 0 +135 140 0 +140 145 0 +145 150 0 +150 155 0 +155 160 0 +160 165 0 +165 170 0 +170 175 0 +175 180 0 +180 185 0 +185 190 0 +190 195 0 +195 200 0 +200 205 0 +205 210 0 +210 215 0 +215 220 0 +220 225 0 +225 230 0 +230 235 0 +235 240 0 +240 245 0 +245 250 0 diff --git a/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt new file mode 100644 index 00000000..a1930c9e --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance.txt @@ -0,0 +1,4 @@ +ERR5069949.29668 -4 sameTranscript=No,dist=genomic +ERR5069949.114870 -45 sameTranscript=No,dist=genomic +ERR5069949.147998 94 sameTranscript=No,dist=genomic +ERR5069949.155944 -105 sameTranscript=No,dist=genomic diff --git a/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt new file mode 100644 index 00000000..021311a2 --- /dev/null +++ b/src/rseqc/rseqc_inner_distance/test_data/test2.inner_distance_freq.txt @@ -0,0 +1,100 @@ +-250 -245 0 +-245 -240 0 +-240 -235 0 +-235 -230 0 +-230 -225 0 +-225 -220 0 +-220 -215 0 +-215 -210 0 +-210 -205 0 +-205 -200 0 +-200 -195 0 +-195 -190 0 +-190 -185 0 +-185 -180 0 +-180 -175 0 +-175 -170 0 +-170 -165 0 +-165 -160 0 +-160 -155 0 +-155 -150 0 +-150 -145 0 +-145 -140 0 +-140 -135 0 +-135 -130 0 +-130 -125 0 +-125 -120 0 +-120 -115 0 +-115 -110 0 +-110 -105 1 +-105 -100 0 +-100 -95 0 +-95 -90 0 +-90 -85 0 +-85 -80 0 +-80 -75 0 +-75 -70 0 +-70 -65 0 +-65 -60 0 +-60 -55 0 +-55 -50 0 +-50 -45 1 +-45 -40 0 +-40 -35 0 +-35 -30 0 +-30 -25 0 +-25 -20 0 +-20 -15 0 +-15 -10 0 +-10 -5 0 +-5 0 1 +0 5 0 +5 10 0 +10 15 0 +15 20 0 +20 25 0 +25 30 0 +30 35 0 +35 40 0 +40 45 0 +45 50 0 +50 55 0 +55 60 0 +60 65 0 +65 70 0 +70 75 0 +75 80 0 +80 85 0 +85 90 0 +90 95 1 +95 100 0 +100 105 0 +105 110 0 +110 115 0 +115 120 0 +120 125 0 +125 130 0 +130 135 0 +135 140 0 +140 145 0 +145 150 0 +150 155 0 +155 160 0 +160 165 0 +165 170 0 +170 175 0 +175 180 0 +180 185 0 +185 190 0 +190 195 0 +195 200 0 +200 205 0 +205 210 0 +210 215 0 +215 220 0 +220 225 0 +225 230 0 +230 235 0 +235 240 0 +240 245 0 +245 250 0