From b2a7a75a903ac46ee52687077e301b5dfcbc8305 Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Fri, 11 Aug 2023 13:58:29 +1000 Subject: [PATCH] more oneliners --- docs/oneliners.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/oneliners.md b/docs/oneliners.md index a06da6f..2c83990 100644 --- a/docs/oneliners.md +++ b/docs/oneliners.md @@ -3,20 +3,22 @@ ## Extracting information for eye-balling and inspecting -**Note that these commands are not efficient to be run routinely on giagntic datasets, instead are only for quickly eyeballing and inspecting relatively smaller datasets.** +**Note that these commands are not efficient to be run routinely on gigantic datasets, instead are only for quickly eyeballing and inspecting relatively smaller datasets.** ``` # print slow5 header: -slow5tools view file.blow5 | grep '^[#@]' # from slow5tools v0.7.0: slow5tools skim --hdr file.blow5 +slow5tools view file.blow5 | grep '^[#@]' +slow5tools skim --hdr file.blow5 # available from slow5tools v0.7.0 # print read records without the header: slow5tools view file.blow5 | grep -v '^[#@]' # print the first 10 read records (without the header): -slow5tools view file.blow5 | grep -v '^[#@]' | head -10 # from slow5tools v0.7.0: slow5tools skim --rid file.blow5 | head -10 +slow5tools view file.blow5 | grep -v '^[#@]' | head -10 # print the list of read IDs: -slow5tools view file.blow5 | grep -v '^[#@]' | awk '{print $1}' # from slow5tools v0.7.0: slow5tools skim --rid file.blow5 +slow5tools view file.blow5 | grep -v '^[#@]' | awk '{print $1}' +slow5tools skim --rid file.blow5 # available from slow5tools v0.7.0 # print all data columns (including the data type and column name), except the raw signal (column 8): slow5tools view file.blow5 | sed -n '/#char*/,$p' | cut -f 1-7,9- @@ -30,6 +32,14 @@ slow5tools view file.blow5 | grep -v '^[#@]' | datamash mean 7 median 7 sstdev 7 # extract the signal samples 100-200 (1-indexed) for the read-id r1: slow5tools get --to slow5 file.blow5 "r1" | grep -v '^[#@]' | awk '{print $8}' | cut -d, -f 100-200 +# print the axiliary data field 'median_before' (note: slow5tools skim is available from v0.7.0) +slow5tools skim file.blow5 | awk -v c="median_before" 'NR==1{for (i=1; i<=NF; i++) if ($i==c){p=i; break};} {print $p}' + +# get the mean and standard deviation of the axiliary data field 'median_before' +slow5tools skim file.blow5 | awk -v c="median_before" 'NR==1{for (i=1; i<=NF; i++) if ($i==c){p=i; break};} {print $p}' | tail -n+2 | datamash mean 1 sstdev 1 + +# count how many reads come from each 'channel_number' +slow5tools skim file.blow5 | awk -v c="channel_number" 'NR==1{for (i=1; i<=NF; i++) if ($i==c){p=i; break};} {print $p}' | tail -n+2 | sort | uniq -c ``` ## Operatings on multiple files in parallel