From 2e227e4960d022129b190cb3b1ba99aa6da4a33b Mon Sep 17 00:00:00 2001 From: emmarousseau Date: Mon, 13 May 2024 09:18:30 +0200 Subject: [PATCH] Update test 1 and test data, fix some arg types in config and script --- src/umi_tools/umi_tools_dedup/config.vsh.yaml | 2 +- src/umi_tools/umi_tools_dedup/help.txt | 8 +++--- src/umi_tools/umi_tools_dedup/script.sh | 4 +-- src/umi_tools/umi_tools_dedup/test.sh | 24 +++++++++++++++--- .../test_data/dedup_edit_distance.tsv | 5 ++++ .../test_data/dedup_per_umi.tsv | 6 +++++ .../test_data/dedup_per_umi_per_position.tsv | 5 ++++ .../umi_tools_dedup/test_data/deduped.bam | Bin 2154 -> 840 bytes .../umi_tools_dedup/test_data/sample.bam | Bin 3268 -> 1584 bytes .../umi_tools_dedup/test_data/sample.bam.bai | Bin 889032 -> 2656 bytes .../umi_tools_dedup/test_data/script.sh | 5 ++-- 11 files changed, 46 insertions(+), 13 deletions(-) create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv create mode 100644 src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv diff --git a/src/umi_tools/umi_tools_dedup/config.vsh.yaml b/src/umi_tools/umi_tools_dedup/config.vsh.yaml index 312d5078..0614cbb2 100644 --- a/src/umi_tools/umi_tools_dedup/config.vsh.yaml +++ b/src/umi_tools/umi_tools_dedup/config.vsh.yaml @@ -134,7 +134,7 @@ argument_groups: tags are “NH”, “X0” and “XT”. If not specified, the read with the highest mapping quality will be selected. - name: --read_length - type: integer + type: boolean_true description: | Use the read length as a criteria when deduping, for e.g sRNA-Seq. diff --git a/src/umi_tools/umi_tools_dedup/help.txt b/src/umi_tools/umi_tools_dedup/help.txt index acbab88e..87baf322 100644 --- a/src/umi_tools/umi_tools_dedup/help.txt +++ b/src/umi_tools/umi_tools_dedup/help.txt @@ -33,15 +33,13 @@ Dedup Options: --output-stats= One can use the edit distance between UMIs at the same position as an quality control for the deduplication process by comparing with a null expectation of random sampling. For the random sampling, the observed frequency of UMIs is used to more reasonably model the null expectation. - + Use this option to generate a stats outfiles called: + [PREFIX]_stats_edit_distance.tsv + Reports the (binned) average edit distance between the UMIs at each position. In addition, this option will trigger reporting of further summary statistics for the UMIs which may be informative for selecting the optimal deduplication method or debugging. Each unique UMI sequence may be observed [0-many] times at multiple positions in the BAM. The following files report the distribution for the frequencies of each UMI. - - Use this option to generate a stats outfiles called: - [PREFIX]_stats_edit_distance.tsv - Reports the (binned) average edit distance between the UMIs at each position. [PREFIX]_stats_per_umi_per_position.tsv Tabulates the counts for unique combinations of UMI and position. [PREFIX]_stats_per_umi_per.tsv diff --git a/src/umi_tools/umi_tools_dedup/script.sh b/src/umi_tools/umi_tools_dedup/script.sh index 00889600..8aa89d10 100644 --- a/src/umi_tools/umi_tools_dedup/script.sh +++ b/src/umi_tools/umi_tools_dedup/script.sh @@ -20,7 +20,7 @@ test_dir="${metal_executable}/test_data" [[ "$par_subset" == "false" ]] && unset par_subset [[ "$par_log2stderr" == "false" ]] && unset par_log2stderr [[ "$par_get_output_stats" == "false" ]] && unset par_get_output_stats - +[[ "$par_read_length" == "false" ]] && unset par_read_length umi_tools dedup \ --stdin "$par_input" \ @@ -43,7 +43,7 @@ umi_tools dedup \ ${par_spliced_is_unique:+--spliced-is-unique} \ ${par_soft_clip_threshold:+--soft-clip-threshold "$par_soft_clip_threshold"} \ ${par_multimapping_detection_method:+--multimapping-detection-method "$par_multimapping_detection_method"} \ - ${par_read_length:+--read-length "$par_read_length"} \ + ${par_read_length:+--read-length} \ ${par_per_gene:+--per-gene} \ ${par_gene_tag:+--gene-tag "$par_gene_tag"} \ ${par_assigned_status_tag:+--assigned-status-tag "$par_assigned_status_tag"} \ diff --git a/src/umi_tools/umi_tools_dedup/test.sh b/src/umi_tools/umi_tools_dedup/test.sh index 4b83ff5e..1b5a9053 100644 --- a/src/umi_tools/umi_tools_dedup/test.sh +++ b/src/umi_tools/umi_tools_dedup/test.sh @@ -7,27 +7,45 @@ mkdir -p "$out_dir" ############################################################################################ -echo ">>> Test 1: Basic usage of $meta_functionality_name" +echo ">>> Test 1: Basic usage of $meta_functionality_name with statistics output" "$meta_executable" \ --paired \ --input "$test_dir/sample.bam" \ --bai "$test_dir/sample.bam.bai" \ - --output "$out_dir/deduped.bam" + --output "$out_dir/deduped.bam" \ + --output_stats deduped \ + --random_seed 1 echo ">>> Checking whether output exists" [ ! -f "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' does not exist!" && exit 1 +[ ! -f "$out_dir/deduped_edit_distance.tsv" ] && echo "File 'deduped_edit_distance.tsv' does not exist!" && exit 1 echo ">>> Checking whether output is non-empty" [ ! -s "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' is empty!" && exit 1 +[ ! -s "$out_dir/deduped_edit_distance.tsv" ] && echo "File 'deduped_edit_distance.tsv' is empty!" && exit 1 echo ">>> Checking whether output is correct" diff "$out_dir/deduped.bam" "$test_dir/deduped.bam" || \ (echo "Output file deduped.bam does not match expected output" && exit 1) - +diff "$out_dir/deduped_edit_distance.tsv" "$test_dir/deduped_edit_distance.tsv" || \ + (echo "Output file deduped_edit_distance.tsv does not match expected output" && exit 1) ############################################################################################ +echo ">>> Test 2: $meta_functionality_name" + +"$meta_executable" \ + --paired \ + --input "$test_dir/sample.bam" \ + --bai "$test_dir/sample.bam.bai" \ + --output "$out_dir/deduped.bam" \ + --random_seed 1 \ + + +echo ">>> Checking whether output exists" +[ ! -f "$out_dir/deduped.bam" ] && echo "File 'deduped.bam' does not exist!" && exit 1 +[] rm -rf "$out_dir" echo "All tests succeeded!" diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv new file mode 100644 index 00000000..89684b04 --- /dev/null +++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_edit_distance.tsv @@ -0,0 +1,5 @@ +unique unique_null directional directional_null edit_distance +3 3 4 4 Single_UMI +0 1 0 0 0 +1 0 0 0 1 +0 0 0 0 2 diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv new file mode 100644 index 00000000..a1d364e2 --- /dev/null +++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi.tsv @@ -0,0 +1,6 @@ +UMI median_counts_pre times_observed_pre total_counts_pre median_counts_post times_observed_post total_counts_post +ACCGGTTTA 74 1 74 74 1 74 +ACTGGTTTC 48 1 48 49 1 49 +AGCGGTTAC 1 1 1 1 1 1 +CCAGGTTCT 1 1 1 1 1 1 +TCTGGTTTC 1 1 1 0 0 0 diff --git a/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv new file mode 100644 index 00000000..d9211d0a --- /dev/null +++ b/src/umi_tools/umi_tools_dedup/test_data/dedup_per_umi_per_position.tsv @@ -0,0 +1,5 @@ +counts instances_pre instances_post +1 3 2 +48 1 0 +49 0 1 +74 1 1 diff --git a/src/umi_tools/umi_tools_dedup/test_data/deduped.bam b/src/umi_tools/umi_tools_dedup/test_data/deduped.bam index 5ffd9fc26f6dfaea2dd1f4cfe2c1cb18d93c59ba..a82e6c81a47c472acd45c379a42b294f31966e87 100644 GIT binary patch delta 825 zcmV-91IGO75Xc69ABzYC000000RIL6LPG)oTmpTPxo;Ce6o(%~N!ZX)Ks!nUQdsZY z$0FqxhZF~cB}fF#I^M)9vRP|w+4M9BQBd+9BvM3=D1e@t1|cL$3MeR`WxbciDJU$> z_dd^?_nX=EwdT3UGXSf%H=PIDK51a5eb?VhlC&Eg1;cQEVYPkFX>a>`y_7gx*&mD| zMsP-LfpbHRF^mdfn_>?onlmk=ZD?tzP=aCGu)<)PZ6Z_W@WKdUxR%^DA~%?E!;P9U za$<~T#1JucjLLu(iqL@tyGZB(7PWjZtMHEdG*H|k{6iI{;#wg7!Q3Me} zGMojqM3F;(P{9-x#u9l9!i^vbGy4ox?Vwx;`;0sWVMH;lm7T*Bx5%-QMA`S-%!SO} z(JJl~MJWZQ#(u%wG91pHJbUxjE$7arzn+YTQCNIQq(PmHEq}3-*O2Ru)1coEQ`89t z(VmO5%CMJ)L3e;Saz}_FH%25YKS2a}(f+`Tqclx_Qtu!G3hZCmTs^kAl#tnz3Sg=6IQQ1~>6`-fqp6U1lxHOSeH_4^J#_;N;K8i*qGlxccJ5d_@Fa;f+KQw~O9y!Lsa1O~@j&zurCTzhx30s*JbuT&u6 zXZqtp2@0HtwbAhR`j`0<47^+oD`>*$N_`tU@b>$!YR@h_I{pp+0PvNx?2rTi03VA8 z1ONa4009360763o0L}n~&^-=<`P2S~!jgHdwGz7%ez}=g`8d z7!Tkryb&%7A)%31%`{(Lo`p7_?(MuEE98w2 zNX}?L6RhrX8?%_r9=pe#y^V;4rT;+!!CoxX-r7b)u(Z&^LR;td#oWTee14yuncsJ2 z*H*XAU7rD1xwGLt?1ZF^y`6jEewt?e__#ZaS}QyEy`4_Df0z+((|%wSF@iJd3S1a! zjA2v=*OYT8(VS@^T|+BFg%S+AhLr|WJBdu~!%HKG;aYOnh{9mT4L54a$cZtU5kth( zHmVSUDk=qWg)RhEiefe;%tT1SIOS6vmr*oPTw|>qQ6>>a7^5_EL>WW~$*>LTh%$$u zf+;GDBZ?S=8$lFi?jEY1LAem_9z_hoh+W4!@_Wcnk*y7mqIR#FAU{XAjqM{)^sFhjE(5@n9q=TB#<#wd(ezqBe~aI*Q_tAN9`_qS>gaQ5O#4HAB3KQC0Ez$JaSW8+FRm$x&*~@mP85GOYW>u-a{5r;Ik2U3rPNm{11EfG;LR`PNej zB?k2{PZ3cmK}kepDfOTa`cnB21`-(sNj*fRv-{S*tg}y^Y1x4_oOw7uzV%)H-+w)f z^3R3GC+D`(dAf!cWdEnd$9|s}AE#I-VZ<;Yl5;9e631Z_#c>d#sRlw@g0Q}6{Wcjg z8552+Zwl*U@WUFM1?Q(`HPh3#d%*BlNFkCS=f)vASj)II;8GS63#~m=K*xUj0-*5< z57ZDYj1ox{l~6n@gCZNOvPd)vKdHkx|H2cfmF})I-Ao5pd7xS;OgSzK6~#T`ERl!T z9s}fxH6EnIj9>+E=IIP0G+B2@X1b8*!R^;I(~s`-RftkUm`cJZ&Z3Oar4EvJqJ$np z=e|Rb&5@gZ*^~qmDiV8dTc8MyHXKqgS9FU!^Fh1ON>8;sNJ1IM+L$B?a^_6QoMjm= zt%lzG`Bo2;YQu@tNfZWIZ^r2R%FShloG6ed21fw->}?(-1Zk+!NgS6qZ`a&x5*&yq zo1%Id=``C6H9Fv{5ut#^2HG==GzidS%|%L?yZmf84?ME9)=b}D;%iQrAwp;pgjw0c z03EENeulSl|wXIY0hrso|CTu^3Ap{N-njL3_2;Cu|%T< z@{M2Lg-*KM_T>^wB{U--A9=(7S2A}eUqYBWmwT9!2}%fa^@+3Umd%-|F&c-T^uhW1 zZ`2q{M_RtI#2j0V%ZX4P+!1*WDj#9jNS1o&opaEiM|^UuIhLC8B+B~}?tZCLb=jFr zi=037>FidzZjHA$ji82V$Bbr}%sFuShjxO1oV~?2nT#`TI5cNaY>ge2=Hy+-4b5vq zQUzM9Ksn=LGLKd|iFF}=xi%!@oMU49J&!W#F+yVna-e-pNJ+S{xeUWn-z$4a#uB?d zG#AdP={7)5`{Yf)XrtD#?YRVm8**`3xHq+w;tOfx_o2GY)2`kwu=p`>wub z`sc7OP@}mr6s{i@qI7>nj%62e_x8`{HPg-Oyw;0rL9ADZijKcJN}JB>;kfkG|^I8-yIaoqj&W!@|)xjFg7cPeG+O6 zv5T{O%S33Z^X4ElB+L!dd^mr7oWO{FwZs!B)m#wpilvmqJ^bfbfqZo3$vMq*tMCMB zFi|j0PVitObppjP?F{{mR-cwh82aS{9w_WhiET$Y!*sl|leml1B3}s+G}FeFKBUG< zYguX@lzp=Dh@%niigWSm_drRmVBc1fFr;AYidP4W{n1K3jVruGp1jy$YOprg{v%4>XvvC^1D2?3VtGpC6bKiO8VS^m4C|>8K zZIOL@7xuN%mwk5!Ye$)(rnGjzoT@Y^b@?2y4GKH7nQmC*dmB>43OtVd7>KtAeK@8WD+eYLDGj6z1Q$|jV(To@( zrjAj$5L8hqh%Iz3uu>GWDPbl;62>W?s<>!H6U8;w$`VBzI zP{9-x#u73Y+z6sDv(He~4$6hF&(OJGL@}+E-G?b|kz*x^vhR007c!fp72GX?QVL9s zJ;A-QIh>6=n|bq=b7#|EPsYP2EM5|6P-kPyUu@?wiUaGE>Tx;b5#DJrWx8b#q4xjhs<3*!Dk*k}jw>3j`xI%B;)D`&ff+=a^kFq?go zHw4FjKV6(F0mIdoALlC~@EUK_M8RM7Yhv)GyHpd0{{2x+0+z+nTtyNZH@?kQq~P<1 zw>4>4xfIo8;QRUGnX)hnJcDtc`|$ z*T2q}VBpniSiv@&tkk!$3-5mXuJ-J~qoY6YAMlm5?2rTi03VA81ONa4009360763o z0J#IT+B=TqR1^kace4ScNvB6_V9d z*@dLwY*>-6v(t>-)L%kvxr3V>p$vDGxXpZX=l;I2b6a~cdtc8~y;jYX7F|`3Gj&5N zaB@;Nnl%c^DmWWv#tCODy$znNTZ=TD%}&cj*Zw;@&Dbrofha0c@Gh4a7u;1g)e5|8 z=cH0qziI-OuAIwz#N0YtXcmxskCPuR&DedrT#ZyTM&8l%2IhwWTwrn&T$|Q>y zUkq#Pve?uyY^p71FN966QioTYVaf_;%dHt7LWgjqt+LaM-L^w8$KoJ{aoO0y8SV;K zmF(bZx+y*f=_uJDUlnVyJ=|8Q!n;N(zP*3H|MBDD#p$DU*M-vgaU2i#_tP{^!|R)O zF3W+&SWLICfBs~+I~||hJU#!{EN)^`c|6>o%RP(>cCrKOVq^c|r_YAxr*GF-&c56X zPUjHD;m_N5Dvf#2xJs?!b1OhqDd!Tjs?;R}S4{$+fQHfeMIEcAl;IPox_skYHA z?}2PdfzN_!>hqf!RBelLcmnN|_|a--J#URzHBxe2JmNnfG8l6V9qq z5S}$^Yr?ZE)s1H*xbQ<(?V|Wii(yS#*{@&-A_ujl& zU)w%+eFk9l&ZhTpCnPQGweN+!G|l?)ac3AUthVoa?VYf9m=SNw{$La_f-~w0To`JM zVN?j$lxrx_oM|ClLn}jt5)8YBl?GG0h)m_-r4ht%ExBt%VKC!{8#QI*#2C$pAz~_x zDukelNY5t1-Y`BcSaE1D>-u~v>KI}t`0qcn2_FNKf{+eRHBNJ^j=;pWhVU_>#kmD`6YPmyCKiE__(yAaaO(FyLAK`8~M#+~4P z)f~1Xw=>_p?cLoB*VFMZj>?-vTGZRv3KzRY4Eg>z>kI}_hPs_R?)lh8hKE_y>F09zh&Sa`KX}pAWzf`K-U4`A2B<8Z!v=vXO1&v?{^raR>`{t7t zv{cZCJc)>xAZQU0s|I~&9@-Z*4^p9s6$Ohvh}z`%t-beJd!KZsYxX(^_GA)rKK|?X z-}Xm}_Ve;5&R(}ZTQy6?4b9(Y-N*la>f}jAq?T45rc?>Rl%3YKtE###U2$%uC{CAd z`RLsbD92SzMRDw?TRwp=F5$W1rE}LzW*6U`0LD+Lq@0#5ZtaSvmKxk8;4<#02VMGb z2YTY4zXE7|YXa0#A+46vstTawng+D+!fFp$9LHCd@x1iKPZsO5CzewN;G9dT#T2S{ z26#Q@i4ODQe|dU&GCQ}n)ERjTOsFme;n2PEkgg7pUUzAoj=Fo$zdp)$OlErzCP0nW zgb5NHRn?ow86cmz{aHZXdME*@2&Y6t&eoi+D$XuPNN(pKi+dh?(`5FO_oh6=n5A4# zT~#+yR>iA5r0Atw!E@=41+01G?I~+2N+^?4ueWzlRUBW5kdjBId&n1lw6U^2dwx9u zNf{T!SUauC7IQ0O;vb9Imv2LC{_UL!OlGX0%1kR)HnCY3e;R168pu-}!A6@Qp`YV8!jHj6Po{iC8DNP;cs7!jWpQ>XO3HXteg~er zK6?MsWcGu7so+EoQOc&JYuxV2;;Dg;^c>_9KmN?hWcIK92`RNOS`oy16_V)qAUNkC zAHDE(Kz?f@C8bctDvn5UTe|3Y2l>r^d=HWI+D6KjL}_U_1%0#u|3=0$KMNDL&8`t)}% zAwGYYqGKb7GE7XXHa?;ItG(A%pP3%=`n8|$SfAZ-C>fhpGRsWFthUI^Yw_$_dl&@r z;yY5A$psfyAUMmeYCJRuPB900?__(BOoNLxn6vIO^ZX!^_#EWlw+G3E5QO@8Z>6j@ zNud)RgfX9udwImicYdIULiX@3fm5YDL%)`Wvz7^J0*)?CH&*$;fMSeuo5 zQiQ}LS_SZXJ>MsIV{%h6(^&Il2YTZ-9s_aiyE)k=DaEZpAeHkSP17bxC;C2V7oJ}{ zdAXR(KD0NXy=ZzbFnn7w>v9VpdB_*v|Gu4**?$hF1hqzJ%b>pNNa@Z29jiIWGY|f9 zbuv44N3!)2M#4v~YNgyP)xA02jOS~w?1I}LUr!bdV$dzX?ZHW1N7I;-I@-2g$2-*G zas^WQ^uc7iASL3-w+o@RKR1Y`#_>hnVV>FZGxT2aP|8JwX{~$_uID?DhLV|Myd<&j zZg5X~T$LhFp{(VCg>I>y>pgWPZ8X4Jd?WIO&o8adZofHg0B%~oi%CDoP#V4OF<1WZ z43hpkYpH-T)F|cq?iRA#!uuZb_tSr)u{m%!MWNPGUpT`oQx)g>nS(M%c}%A3@%;Cv zQ>5r`_ay{nMo5ZYF;GdpN&k7GgZ$Wm3)fC&_sfK!mQam!iiC%X)C)>#)+ha2JwNSN zNcvm$BtTI&74Z=j8>Z(5HAzG{J>*wifnd7tKnl_jZHx+>2e+Rc3~{U?b8_Bv+xNjF zmr1HBDI_TxyKd@0+MgfT(?-X8$P2Hnz@DdBs=Yz?>1$Hg-(1C_suaDT9!I^r3xYnL zqCCO4vXW1$HkF|Lmj;6Nls5v>a!Vu;aXczFiKUYrcjLM5N`X{5@}89Em1Ir#-FhAswI(JGf=D1Lp+d&sx>?%~g$Ld?H%FxAvb0eg~Q z&$8NL$KWBqeenwF$K5GoV#gqUMo|v_|708XAWBfoBCZ|^G8)7P?uU;yJ!_XG z3AMs<5@z8{Ntnc1&J1S297zx&R4Ev?>O6Ij>QwaO zdzl3yjli@K)(Ul=IvCLf14q=u%oz=0(Wgs*xNhpn0nzv{8C{$wiQx)EatO9O?=ThG99;ihELOw{CToVL>KJ847j6t~v*V8Bnzi z`@cM*=CCYsa9Y=OPX(hvIAV?3FjQ1|l1NL4X5nm;M@n323O9%CdS0+W8~ka*iY+fr z1fxPXOB%Za>T)7)^2unZSj#B-Rv#rIecMZO< zzMYL9MIX!JOxuO*XcUHnGg0wnhX@s*smpZ;RROg{+2!o!hBigQU z8;xR!RRXO}H7sFcQ7FL%#K&CgX;)>UApEr)T(GlH;2`3wSh)}i_pfG%tJb^YQILzA z#aSWGN7#a3OlDCdm#2}$FaEMs4tZfbipKOKLd#Q?c_z5H|0I&m9-WqZo~o ze{=ik*Q}#a7^Y?=uDWH7M&W33jSlgWX=6LEB zsMAmgwL}eGEFaQ7_iwFb&00000{{{d;LjnLB00RI3000000001Q Caw19q diff --git a/src/umi_tools/umi_tools_dedup/test_data/sample.bam.bai b/src/umi_tools/umi_tools_dedup/test_data/sample.bam.bai index bd87cde81a00c7699faed37ab04f88fc5f77bcfa..e9e2eee1d579c04b0b58bff33862d56ff931ff5d 100644 GIT binary patch literal 2656 zcmZ>A^kfucU|?VcVnbmD21X#wz!1d*B8?;=wAUmC2Cx8BoPj}v4J2O+B9K5ZRDTy# z-$N*E4Hbu(2b0f+io?{w%vA=z7VRP}Asf++tD=JF#wrL6cyG@=&gX+HViTCj`+ble=FYv} zOV9kz?={V1=GkYSAGS&h}zc1*L$BSWvC0e?`q%w^_^JH$9ewXvE|-8qegG_K_A*|$$=cmfjQs+ z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IB@?vaJ59vRefmlE(daOCpp-x zn}4xg%24ZzL-g)fz3!Zk^;`Y%3zK+$>s+kozI^^S7h*m4z18=>Hi`A2-zUG$d{Ih$ zs^og^-)R2=qwCv`EcfJIHC_vS(1$iEav%qCU=BFI0S<7019!)P{IiN0{nQ72Xy0Gt zKn~=<9B_aG9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02RN`!4&-}fs*ziL(1$j^ zav%qCU=BFI0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14(wkCZVi!tRUeuIav%qCU=BFI0S<70103K02ROiiyXn9Sljx_a4=q=6AO~__ z4miL84sd`29N+*4IKTl8aDW3GxEBsg>rLm2QYN*&)&HO5d>XxOYvb$ssZxfz@UK1l zE}e|^JfF}1;NbF}`J5WP)dziOvn2;|AP44v103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zU{xH*_X<$MC4JC`=Cd5gfgG3v4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<700~}a&2WB;-SI)%ehP>H7Z{)*Xzkeat^YL@DnCZWMI@a^^N%CBNeo3z9{`T1> zVrPC8Ic1ykOOnT0S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCKy8fj6h&fT|D8LphLx zdzpg|yEkg&Ods^2&6^y^fgG3v4sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0fpv3WvxfGY zkC!sk`qPb=;Xn8KkH=#DXR}!UTd(s)DU-VYx&Ao!Z?pjzT|Yk^=kwgt{k|Xd^?aOP z_v-Q3pZCqLd-Cw|Yvprlye9gf4{hY+Kn~=<9B_aG9N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`299Vw`@@Hq&m}h;^hvu3b$blS~0}gP2103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$U=s`}98Qx4=n4$J`uIKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<7019#%Uq;9_XXemRjAN0R_eDYJ#!}+3=_Wa)U zS^EN`>qieP_vBtRdZ-Wj&_+cLIH1MR)V{_t45AKCBw`;c9DAZ9k-k1XGxEZ3(k(4*_UlW{)x z{jlF>tG~}#KF<69qEZxj2eB^2YqPY@8v)a=z~7A(UAi=kOOnDE)L}Pb~W762YqOM%YhuofjQs+ z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<7019#tnS#9dScjM>M z&mV}4z2575QA+z6_W%BzdS7op3qQWTb}`Q9xf^}om$za)&*%L&FD&nw&#BRSeb9$C zTXG-=a$pWPzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4?zIE?clc^}qYwJf z+?4}4kOOnT0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0W zzyS_$fCC)({~UPzK%7(cp*boCav%rhfCC)h00%h00S<70103K02ROh14sd`29N+*4 zIKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103MM`Z};tU%XdOl`_=70kn-~b0WzyS_$fCC)h00%h00S<70 z103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8-2V<-c`$OW>O-4%IgkT6Fb5pq00%h00S<701E17^oBhw3vl^yf zZkICD`r5&mt@nHV(ur6<+aKri^F=9>xlpM8cyhgJ~S`oKn~=<9B_aG9N+*4IKTl8aDW3G-~b0W zzyS_$fCC)h00%h00S<70103MM?HtJW#8krxeb9&Ir5wnC9GC+RaDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW5%j|0>CVqSi#l%dw|9*S&V?{&T?Wm4DA>{)MX9 zonGEE@2y4;^+6xn49S5U$bmWF00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G z-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$ zfCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h0 z0S<70103K02ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K0 z2ROh14sd`29N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`2 z9N+*4IKTl8aDW3G-~b0WzyS_$fCC)h00%h00S<70103K02ROh14sd`29N+*4IKTl8 zaDW3GSQiJb9gcZb^`W^X2XY_>=70kn-~b0WzyS_$fCC)h00%h00S<70103K02ROh1 z4sd`29N+*4IKTl8+&c$aZ`(DkzvhclCbgb85}A9u*S0pkE+is%zKG91T*^@E+x=%l9zGh`o-ayi&+lDtd&k%K?6f}d zmveEyyw6K}?w9Mo?d$DpjIXae5$E$ sample.bam.bai +# extract 150 reads with a maximum of two reads having the same start position +samtools view -h example.bam | head -n 150 | samtools view -bS - > sample.bam +samtools index sample.bam rm example.bam \ No newline at end of file