From 93a6b60d26e2b0c70ea8755d5b0913c1df7bb88d Mon Sep 17 00:00:00 2001 From: Jonathan Bieler Date: Thu, 13 Oct 2022 15:06:39 +0200 Subject: [PATCH 01/19] added method for handling different bam index types --- src/bam/reader.jl | 12 ++++++------ test/test_bam.jl | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 43e228e..d1e71ec 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -28,13 +28,8 @@ function BioGenerics.IO.stream(reader::Reader) end function Reader(input::IO; index=nothing) - if isa(index, AbstractString) - index = BAI(index) - elseif index != nothing - error("unrecognizable index argument") - end reader = init_bam_reader(input) - reader.index = index + reader.index = init_bam_index(index) return reader end @@ -125,6 +120,11 @@ function init_bam_reader(input::IO) return init_bam_reader(BGZFStreams.BGZFStream(input)) end +init_bam_index(index::AbstractString) = BAI(index) +init_bam_index(index::BAI) = index +init_bam_index(index::Nothing) = nothing +init_bam_index(index) = error("unrecognizable index argument") + function _read!(reader::Reader, record) unsafe_read( reader.stream, diff --git a/test/test_bam.jl b/test/test_bam.jl index beb6d03..0a504b1 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -241,6 +241,21 @@ end + @testset "BAI" begin + + filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam") + + index = BAM.BAI(filepath * ".bai") + reader = open(BAM.Reader, filepath, index=index) + + @test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator) + + close(reader) + + @test_throws ErrorException open(BAM.Reader, filepath, index=1234) + + end + @testset "Random access" begin filepath = joinpath(bamdir, "GSE25840_GSM424320_GM06985_gencode_spliced.head.bam") reader = open(BAM.Reader, filepath, index=filepath * ".bai") From 286e271e9135b4379996e69deb0c6726eb4c5a01 Mon Sep 17 00:00:00 2001 From: Jonathan Bieler Date: Thu, 13 Oct 2022 15:17:20 +0200 Subject: [PATCH 02/19] update docstring --- src/bam/reader.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bam/reader.jl b/src/bam/reader.jl index d1e71ec..3dd2f7f 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -8,7 +8,7 @@ Create a data reader of the BAM file format. # Arguments * `input`: data source -* `index=nothing`: filepath to a random access index (currently *bai* is supported) +* `index=nothing`: filepath to a random access index (currently *bai* is supported) or BAI object """ mutable struct Reader{T} <: BioGenerics.IO.AbstractReader stream::BGZFStreams.BGZFStream{T} From 50039e749f39bb7b29d28ccd808331493767eec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Fri, 14 Oct 2022 22:24:41 +1100 Subject: [PATCH 03/19] Update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0261552..9ed8c7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56/files)) + ## [0.3.1] ### Changed From 8151d877e79eda65808f16a968db3e488afa3380 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 9 Jan 2023 12:30:36 +1100 Subject: [PATCH 04/19] Subtype from XAMReader and XAMWriter --- CHANGELOG.md | 4 ++++ src/XAM.jl | 5 +++++ src/bam/bam.jl | 2 +- src/bam/reader.jl | 6 +++--- src/bam/writer.jl | 2 +- src/sam/reader.jl | 2 +- src/sam/sam.jl | 2 +- src/sam/writer.jl | 2 +- 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ed8c7b..cd21a88 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56/files)) +### Changed + +- Subtype from XAMReader and XAMWriter from common abstract types. + ## [0.3.1] ### Changed diff --git a/src/XAM.jl b/src/XAM.jl index 2c5bad1..b3f5e4c 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -1,9 +1,14 @@ module XAM +using BioGenerics + export SAM, BAM +abstract type XAMReader <: BioGenerics.IO.AbstractReader end +abstract type XAMWriter <: BioGenerics.IO.AbstractWriter end + """ flag(record::Union{SAM.Record, BAM.Record})::UInt16 diff --git a/src/bam/bam.jl b/src/bam/bam.jl index b3591d7..41aea12 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,7 +6,7 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM -import ..XAM: flag +import ..XAM: flag, XAMReader, XAMWriter import BGZFStreams import BioAlignments diff --git a/src/bam/reader.jl b/src/bam/reader.jl index 3dd2f7f..c1686c0 100644 --- a/src/bam/reader.jl +++ b/src/bam/reader.jl @@ -8,9 +8,9 @@ Create a data reader of the BAM file format. # Arguments * `input`: data source -* `index=nothing`: filepath to a random access index (currently *bai* is supported) or BAI object +* `index=nothing`: filepath to a random access index (currently *bai* is supported) or BAI object """ -mutable struct Reader{T} <: BioGenerics.IO.AbstractReader +mutable struct Reader{T} <: XAMReader stream::BGZFStreams.BGZFStream{T} header::SAM.Header start_offset::BGZFStreams.VirtualOffset @@ -124,7 +124,7 @@ init_bam_index(index::AbstractString) = BAI(index) init_bam_index(index::BAI) = index init_bam_index(index::Nothing) = nothing init_bam_index(index) = error("unrecognizable index argument") - + function _read!(reader::Reader, record) unsafe_read( reader.stream, diff --git a/src/bam/writer.jl b/src/bam/writer.jl index 81bdf23..8c4268f 100644 --- a/src/bam/writer.jl +++ b/src/bam/writer.jl @@ -10,7 +10,7 @@ Create a data writer of the BAM file format. * `output`: data sink * `header`: SAM header object """ -mutable struct Writer <: BioGenerics.IO.AbstractWriter +mutable struct Writer <: XAMWriter stream::BGZFStreams.BGZFStream end diff --git a/src/sam/reader.jl b/src/sam/reader.jl index 46fd286..f38044f 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -1,7 +1,7 @@ # SAM Reader # ========= -mutable struct Reader{S <: TranscodingStream} <: BioGenerics.IO.AbstractReader +mutable struct Reader{S <: TranscodingStream} <: XAMReader state::State{S} header::Header end diff --git a/src/sam/sam.jl b/src/sam/sam.jl index fcd0821..0e25d39 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,7 +11,7 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream -import ..XAM: flag +import ..XAM: flag, XAMReader, XAMWriter using Printf: @sprintf diff --git a/src/sam/writer.jl b/src/sam/writer.jl index 801ed68..c16a346 100644 --- a/src/sam/writer.jl +++ b/src/sam/writer.jl @@ -10,7 +10,7 @@ Create a data writer of the SAM file format. * `output`: data sink * `header=Header()`: SAM header object """ -mutable struct Writer <: BioGenerics.IO.AbstractWriter +mutable struct Writer <: XAMWriter stream::IO function Writer(output::IO, header::Header=Header()) From 0d1eec3ed3e9ba65a118e6b86a6a8c80779de153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 9 Jan 2023 12:48:54 +1100 Subject: [PATCH 05/19] Subtype from XAMRecord --- CHANGELOG.md | 2 ++ src/XAM.jl | 3 ++- src/bam/bam.jl | 2 +- src/bam/record.jl | 2 +- src/sam/record.jl | 2 +- src/sam/sam.jl | 2 +- 6 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd21a88..938f911 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Subtype from XAMReader and XAMWriter from common abstract types. +- Subtype from XAMRecord. +- Unified flag queries. ## [0.3.1] diff --git a/src/XAM.jl b/src/XAM.jl index b3f5e4c..2ae8ac8 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -5,7 +5,8 @@ using BioGenerics export SAM, BAM - + +abstract type XAMRecord end abstract type XAMReader <: BioGenerics.IO.AbstractReader end abstract type XAMWriter <: BioGenerics.IO.AbstractWriter end diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 41aea12..03ef9fd 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,7 +6,7 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM -import ..XAM: flag, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter import BGZFStreams import BioAlignments diff --git a/src/bam/record.jl b/src/bam/record.jl index bd092e8..db33f88 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -6,7 +6,7 @@ Create an unfilled BAM record. """ -mutable struct Record +mutable struct Record <: XAMRecord # fixed-length fields (see BMA specs for the details) block_size::Int32 refid::Int32 diff --git a/src/sam/record.jl b/src/sam/record.jl index ef8bb6d..befa967 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -1,7 +1,7 @@ # SAM Record # ========== -mutable struct Record +mutable struct Record <: XAMRecord # Data and filled range. data::Vector{UInt8} filled::UnitRange{Int} # Note: Specifies the data in use. diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 0e25d39..70cf2de 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,7 +11,7 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream -import ..XAM: flag, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter using Printf: @sprintf From 9a9f2c1f5ad7ad7765475e744c8dc9a958458c08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Mon, 9 Jan 2023 14:10:50 +1100 Subject: [PATCH 06/19] Fun with flags Implements flag queries. --- src/XAM.jl | 24 +---- src/bam/bam.jl | 3 +- src/bam/record.jl | 40 -------- src/flags.jl | 226 ++++++++++++++++++++++++++++++++++++++++++++++ src/sam/flags.jl | 29 ------ src/sam/record.jl | 29 ------ src/sam/sam.jl | 4 +- 7 files changed, 233 insertions(+), 122 deletions(-) create mode 100644 src/flags.jl delete mode 100644 src/sam/flags.jl diff --git a/src/XAM.jl b/src/XAM.jl index 2ae8ac8..6aa98a5 100644 --- a/src/XAM.jl +++ b/src/XAM.jl @@ -1,35 +1,17 @@ module XAM using BioGenerics +import BioGenerics: isfilled #Note: used by `ismapped`. export SAM, BAM - + abstract type XAMRecord end abstract type XAMReader <: BioGenerics.IO.AbstractReader end abstract type XAMWriter <: BioGenerics.IO.AbstractWriter end -""" - flag(record::Union{SAM.Record, BAM.Record})::UInt16 - -Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag -being OR'd together. The possible flags are: - - 0x0001 template having multiple segments in sequencing - 0x0002 each segment properly aligned according to the aligner - 0x0004 segment unmapped - 0x0008 next segment in the template unmapped - 0x0010 SEQ being reverse complemented - 0x0020 SEQ of the next segment in the template being reverse complemented - 0x0040 the first segment in the template - 0x0080 the last segment in the template - 0x0100 secondary alignment - 0x0200 not passing filters, such as platform/vendor quality controls - 0x0400 PCR or optical duplicate - 0x0800 supplementary alignment -""" -function flag end +include("flags.jl") include("sam/sam.jl") include("bam/bam.jl") diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 03ef9fd..69ca235 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,7 +6,8 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, + ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. import BGZFStreams import BioAlignments diff --git a/src/bam/record.jl b/src/bam/record.jl index db33f88..e8ee356 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -137,37 +137,6 @@ function hasflag(record::Record) return isfilled(record) end -""" - ismapped(record::Record)::Bool - -Test if `record` is mapped. -""" -function ismapped(record::Record)::Bool - return flag(record) & SAM.FLAG_UNMAP == 0 -end - -""" - isprimary(record::Record)::Bool - -Test if `record` is a primary line of the read. - -This is equivalent to `flag(record) & 0x900 == 0`. -""" -function isprimary(record::Record)::Bool - return flag(record) & 0x900 == 0 -end - -""" - ispositivestrand(record::Record)::Bool - -Test if `record` is aligned to the positive strand. - -This is equivalent to `flag(record) & 0x10 == 0`. -""" -function ispositivestrand(record::Record)::Bool - flag(record) & 0x10 == 0 -end - """ refid(record::Record)::Int @@ -253,15 +222,6 @@ function hasrightposition(record::Record) return isfilled(record) && ismapped(record) end -""" - isnextmapped(record::Record)::Bool - -Test if the mate/next read of `record` is mapped. -""" -function isnextmapped(record::Record)::Bool - return isfilled(record) && (flag(record) & SAM.FLAG_MUNMAP == 0) -end - """ nextrefid(record::Record)::Int diff --git a/src/flags.jl b/src/flags.jl new file mode 100644 index 0000000..4679749 --- /dev/null +++ b/src/flags.jl @@ -0,0 +1,226 @@ +# Flags +# ========= +# + +""" + flag(record::XAMRecord})::UInt16 + +Get the bitwise flags of `record`. +The returned value is a `UInt16` of each flag being OR'd together. +The possible flags are: + + 0x0001 template having multiple segments in sequencing + 0x0002 each segment properly aligned according to the aligner + 0x0004 segment unmapped + 0x0008 next segment in the template unmapped + 0x0010 SEQ being reverse complemented + 0x0020 SEQ of the next segment in the template being reverse complemented + 0x0040 the first segment in the template + 0x0080 the last segment in the template + 0x0100 secondary alignment + 0x0200 not passing filters, such as platform/vendor quality controls + 0x0400 PCR or optical duplicate + 0x0800 supplementary alignment +""" + +function flag end + +# Bitwise flags (or FLAG). +for (name, bits, doc) in [ + (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), + (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), + (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with FLAG_PROPER_PAIR" ), + (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), + (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), + (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), + (:READ1, UInt16(0x040), "this is read1" ), + (:READ2, UInt16(0x080), "this is read2" ), + (:SECONDARY, UInt16(0x100), "not primary alignment" ), + (:QCFAIL, UInt16(0x200), "QC failure" ), + (:DUP, UInt16(0x400), "optical or PCR duplicate" ), + (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] + @assert bits isa UInt16 "The bits must be of type UInt16." + sym = Symbol("FLAG_", name) + docstring = """ $sym + SAM/BAM flag: $doc + + See also: [`flag`](@ref) + """ + @eval begin + @doc $(docstring) const $(sym) = $(bits) + end +end + +""" + ispaired(record::XAMRecord)::Bool + +Query whether the `record`'s template has multiple segments in sequencing. +""" +function ispaired(record::XAMRecord)::Bool + return flag(record) & FLAG_PAIRED == FLAG_PAIRED +end + +""" + isproperpair(record::XAMRecord)::Bool + +Query whether each segment of the `record`'s template properly aligned according to the aligner. +""" +function isproperpair(record::XAMRecord)::Bool + return flag(record) & PROPER_PAIR == PROPER_PAIR +end + +""" + isunmapped(record::XAMRecord)::Bool + +Query whether the `record` is unmapped. +""" +function isunmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_UNMAP == FLAG_UNMAP +end + +""" + ismapped(record::XAMRecord)::Bool + +Query whether the `record` is mapped. +""" +function ismapped(record::XAMRecord)::Bool + # return flag(record) & FLAG_UNMAP == 0 + return isfilled(record) && (flag(record) & FLAG_UNMAP == 0) +end + +""" + ismateunmapped(record::XAMRecord)::Bool + +Query whether the `record`'s mate is unmapped. +""" +function ismateunmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_MUNMAP == FLAG_MUNMAP +end + +""" + isnextmapped(record::XAMRecord)::Bool + +Test if the mate/next read of `record` is mapped. +""" +function isnextmapped(record::XAMRecord)::Bool + return flag(record) & FLAG_MUNMAP == 0 +end + +""" + isreverse(record::XAMRecord)::Bool + +Query whether the `record` is mapped to the reverse strand. +""" +function isreverse(record::XAMRecord)::Bool + return flag(record) & FLAG_REVERSE == FLAG_REVERSE +end + +""" + isforward(record::XAMRecord)::Bool + +Query whether the `record` is mapped to the forward strand. +""" +function isforward(record::XAMRecord)::Bool + return flag(record) & FLAG_REVERSE == 0 +end + +""" + ispositivestrand(record::XAMRecord)::Bool + +Query whether `record` is aligned to the positive strand. +""" +function ispositivestrand(record::XAMRecord)::Bool + return isforward(record) +end + +""" + ispositivestrand(record::XAMRecord)::Bool + +Query whether `record` is aligned to the negative strand. +""" +function isnegativestrand(record::XAMRecord)::Bool + return isreverse(record) +end + +""" + ismatereverse(record::XAMRecord)::Bool + +Query whether the `record`'s mate is mapped to the reverse strand. +""" +function ismatereverse(record::XAMRecord)::Bool + return flag(record) & FLAG_MREVERSE == FLAG_MREVERSE +end + +""" + isread1(record::XAMRecord)::Bool + +Query whether the `record` is read1. +""" +function isread1(record::XAMRecord)::Bool + return flag(record) & FLAG_READ1 == FLAG_READ1 +end + +""" + isread2(record::XAMRecord)::Bool + +Query whether the `record` is read2. +""" +function isread2(record::XAMRecord)::Bool + return flag(record) & FLAG_READ2 == FLAG_READ2 +end + +""" + issecondaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is a secondary alignment. +""" +function issecondaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SECONDARY == FLAG_SECONDARY +end + +""" + isprimaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is the primary alignment. +""" +function isprimaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SECONDARY == 0 +end + +""" + isqcfail(record::XAMRecord)::Bool + +Query whether the `record` did not pass filters, such as platform/vendor quality controls. +""" +function isqcfail(record::XAMRecord)::Bool + return flag(record) & FLAG_QCFAIL == FLAG_QCFAIL +end + +""" + isduplicate(record::XAMRecord)::Bool + +Query whether the `record` is a PCR or optical duplicate. +""" +function isduplicate(record::XAMRecord)::Bool + return flag(record) & FLAG_DUP == FLAG_DUP +end + +""" + issupplementaryalignment(record::XAMRecord)::Bool + +Query whether the `record` is a supplementary alignment. +""" +function issupplementaryalignment(record::XAMRecord)::Bool + return flag(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY +end + +""" + isprimary(record::XAMRecord)::Bool + +Query whether `record` is a primary line of the read. + +This is equivalent to `flag(record) & 0x900 == 0`. +""" +function isprimary(record::XAMRecord)::Bool + return flag(record) & 0x900 == 0 +end diff --git a/src/sam/flags.jl b/src/sam/flags.jl deleted file mode 100644 index ded0df1..0000000 --- a/src/sam/flags.jl +++ /dev/null @@ -1,29 +0,0 @@ -# SAM Flags -# ========= -# -# Bitwise flags (or FLAG). - -for (name, bits, doc) in [ - (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), - (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), - (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with SAM.FLAG_PROPER_PAIR" ), - (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), - (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), - (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), - (:READ1, UInt16(0x040), "this is read1" ), - (:READ2, UInt16(0x080), "this is read2" ), - (:SECONDARY, UInt16(0x100), "not primary alignment" ), - (:QCFAIL, UInt16(0x200), "QC failure" ), - (:DUP, UInt16(0x400), "optical or PCR duplicate" ), - (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] - @assert bits isa UInt16 "The bits must be of type UInt16." - sym = Symbol("FLAG_", name) - docstring = """ $sym - SAM/BAM flag: $doc - - See also: [`flag`](@ref) - """ - @eval begin - @doc $(docstring) const $(sym) = $(bits) - end -end diff --git a/src/sam/record.jl b/src/sam/record.jl index befa967..7a2a22b 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -159,26 +159,6 @@ function hasflag(record::Record) return isfilled(record) end -""" - ismapped(record::Record)::Bool - -Test if `record` is mapped. -""" -function ismapped(record::Record)::Bool - return isfilled(record) && (flag(record) & FLAG_UNMAP == 0) -end - -""" - isprimary(record::Record)::Bool - -Test if `record` is a primary line of the read. - -This is equivalent to `flag(record) & 0x900 == 0`. -""" -function isprimary(record::Record)::Bool - return flag(record) & 0x900 == 0 -end - """ refname(record::Record)::String @@ -227,15 +207,6 @@ function hasrightposition(record::Record) return hasposition(record) && hasalignment(record) end -""" - isnextmapped(record::Record)::Bool - -Test if the mate/next read of `record` is mapped. -""" -function isnextmapped(record::Record)::Bool - return isfilled(record) && (flag(record) & FLAG_MUNMAP == 0) -end - """ nextrefname(record::Record)::String diff --git a/src/sam/sam.jl b/src/sam/sam.jl index 70cf2de..f3b73e6 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,7 +11,8 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter +import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, + ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. using Printf: @sprintf @@ -48,7 +49,6 @@ function unsafe_parse_decimal(::Type{T}, data::Vector{UInt8}, range::UnitRange{I end -include("flags.jl") include("metainfo.jl") include("record.jl") include("header.jl") From cc5f21b8548d34010e388a8d43379a9240a07ee6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Tue, 28 Feb 2023 15:09:38 +1100 Subject: [PATCH 07/19] Add doi to README --- CHANGELOG.md | 1 + README.md | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 938f911..e1ede94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56/files)) +- Added doi badge ### Changed diff --git a/README.md b/README.md index 6b83fc1..c5f392c 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) [![Latest Release](https://img.shields.io/github/release/BioJulia/XAM.jl.svg)](https://github.com/BioJulia/XAM.jl/releases/latest) +[![DOI](https://zenodo.org/badge/201858041.svg)](https://zenodo.org/badge/latestdoi/201858041) [![MIT license](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/BioJulia/XAM.jl/blob/master/LICENSE) [![Stable documentation](https://img.shields.io/badge/docs-stable-blue.svg)](https://biojulia.github.io/XAM.jl/stable) [![Latest documentation](https://img.shields.io/badge/docs-dev-blue.svg)](https://biojulia.github.io/XAM.jl/dev/) From 10c1aacd4da057d51c2110c3256192d73571b5b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 16 Apr 2023 21:22:52 +1000 Subject: [PATCH 08/19] Improve Slack link --- README.md | 2 +- docs/src/index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c5f392c..8ac1812 100644 --- a/README.md +++ b/README.md @@ -66,4 +66,4 @@ Your logo will show up here with a link to your website. ## Questions? -If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). +If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.slack.com/channels/biology), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). diff --git a/docs/src/index.md b/docs/src/index.md index 43b19cf..e6c1379 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -64,4 +64,4 @@ Your logo will show up here with a link to your website. ## Questions? -If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.org/slack/), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). +If you have a question about contributing or using BioJulia software, come on over and chat to us on [the Julia Slack workspace](https://julialang.slack.com/channels/biology), or you can try the [Bio category of the Julia discourse site](https://discourse.julialang.org/c/domain/bio). From 560a5cd8dfb5d6c732c4e37ecc6c2c30f9986a0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Tue, 2 May 2023 14:56:48 +1000 Subject: [PATCH 09/19] Check that EOF_BLOCK gets written --- test/test_bam.jl | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/test/test_bam.jl b/test/test_bam.jl index f10bb72..47cb3f4 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -209,6 +209,14 @@ close(reader) close(writer) + + # Check that EOF_BLOCK gets written. + nbytes = filesize(path) + @test BAM.BGZFStreams.EOF_BLOCK == open(path) do io + seek(io, nbytes - length(BAM.BGZFStreams.EOF_BLOCK)) + read(io) + end + reader = open(BAM.Reader, path) @test header(reader) == header_original @@ -247,12 +255,12 @@ index = BAM.BAI(filepath * ".bai") reader = open(BAM.Reader, filepath, index=index) - + @test isa(eachoverlap(reader, "chr1", 1:100), BAM.OverlapIterator) close(reader) - @test_throws ErrorException open(BAM.Reader, filepath, index=1234) + @test_throws ErrorException open(BAM.Reader, filepath, index=1234) end From 4a64cda57743ad2b24c14fd3c3f85abd2554498e Mon Sep 17 00:00:00 2001 From: Jonathan Bieler Date: Mon, 3 Jul 2023 11:59:36 +0200 Subject: [PATCH 10/19] Update hts-files.md Fix find -> findall for headers --- docs/src/man/hts-files.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index 77f96d8..133a082 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -67,7 +67,7 @@ end ## SAM and BAM Headers Both `SAM.Reader` and `BAM.Reader` implement the `header` function, which returns a `SAM.Header` object. -To extract certain information out of the headers, you can use the `find` method on the header to extract information according to SAM/BAM tag. +To extract certain information out of the headers, you can use the `findall` method on the header to extract information according to SAM/BAM tag. Again we refer you to the [specification](https://samtools.github.io/hts-specs/SAMv1.pdf) for full details of all the different tags that can occur in headers, and what they mean. Below is an example of extracting all the info about the reference sequences from the BAM header. @@ -76,7 +76,7 @@ In SAM/BAM, any description of a reference sequence is stored in the header, und ```jlcon julia> reader = open(SAM.Reader, "data.sam"); -julia> find(header(reader), "SQ") +julia> findall(SAM.header(reader), "SQ") 7-element Array{Bio.Align.SAM.MetaInfo,1}: Bio.Align.SAM.MetaInfo: tag: SQ From 5079f1b6b8aacddc4bd0ae99778493692ea8b761 Mon Sep 17 00:00:00 2001 From: Jakob Nybo Nissen Date: Mon, 15 Jan 2024 14:43:27 +0100 Subject: [PATCH 11/19] Update Automa to v1 --- Project.toml | 4 +- src/sam/reader.jl | 4 +- src/sam/readrecord.jl | 201 ++++++++++++++---------------------------- 3 files changed, 72 insertions(+), 137 deletions(-) diff --git a/Project.toml b/Project.toml index 98065dd..bd34c82 100644 --- a/Project.toml +++ b/Project.toml @@ -15,7 +15,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" [compat] -Automa = "0.7, 0.8" +Automa = "1" BGZFStreams = "0.3.1" BioAlignments = "3" BioGenerics = "0.1" @@ -23,7 +23,7 @@ BioSequences = "3" FormatSpecimens = "1.1" GenomicFeatures = "2" Indexes = "0.1" -TranscodingStreams = "0.6, 0.7, 0.8, 0.9" +TranscodingStreams = "0.6, 0.7, 0.8, 0.9, 0.10" julia = "1.6" [extras] diff --git a/src/sam/reader.jl b/src/sam/reader.jl index f38044f..4d6de24 100644 --- a/src/sam/reader.jl +++ b/src/sam/reader.jl @@ -10,9 +10,9 @@ function Reader(state::State{S}) where {S <: TranscodingStream} rdr = Reader(state, Header()) - cs, ln = readheader!(rdr.state.stream, rdr.header, (sam_machine_header.start_state, rdr.state.linenum)) + cs, ln = readheader!(rdr.state.stream, rdr.header, (1, rdr.state.linenum)) - rdr.state.state = sam_machine_body.start_state # Get the reader ready to read the body. + rdr.state.state = 1 # Get the reader ready to read the body. rdr.state.linenum = ln rdr.state.filled = false diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index a3c91c2..1a90927 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -1,149 +1,79 @@ # Automa.jl generated readrecord! and readmetainfo! functions # ======================================== -import Automa -import Automa.RegExp: @re_str -import Automa.Stream: @mark, @markpos, @relpos, @abspos +import Automa: @re_str, rep, onexit!, onenter!, CodeGenContext, generate_reader, compile # file = header . body # header = metainfo* # body = record* -const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = (function () - - isinteractive() && @info "compiling SAM" - - cat = Automa.RegExp.cat - rep = Automa.RegExp.rep - alt = Automa.RegExp.alt - opt = Automa.RegExp.opt - any = Automa.RegExp.any - +const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_body, sam_machine = let metainfo = let - tag = re"[A-Z][A-Z]" \ cat("CO") - tag.actions[:enter] = [:pos1] - tag.actions[:exit] = [:metainfo_tag] + tag = onexit!(onenter!(re"[A-Z][A-Z]" \ re"CO", :pos1), :metainfo_tag) dict = let - key = re"[A-Za-z][A-Za-z0-9]" - key.actions[:enter] = [:pos2] - key.actions[:exit] = [:metainfo_dict_key] - val = re"[ -~]+" - val.actions[:enter] = [:pos2] - val.actions[:exit] = [:metainfo_dict_val] - keyval = cat(key, ':', val) - - cat(keyval, rep(cat('\t', keyval))) + key = onexit!(onenter!(re"[A-Za-z][A-Za-z0-9]", :pos2), :metainfo_dict_key) + val = onexit!(onenter!(re"[ -~]+", :pos2), :metainfo_dict_val) + keyval = key * ':' * val + keyval * rep('\t' * keyval) end - dict.actions[:enter] = [:pos1] - dict.actions[:exit] = [:metainfo_val] + onexit!(onenter!(dict, :pos1), :metainfo_val) - co = cat("CO") - co.actions[:enter] = [:pos1] - co.actions[:exit] = [:metainfo_tag] + co = onexit!(onenter!(re"CO", :pos1), :metainfo_tag) - comment = re"[^\r\n]*" # Note: Only single line comments are allowed. - comment.actions[:enter] = [:pos1] - comment.actions[:exit] = [:metainfo_val] + comment = onexit!(onenter!(re"[^\r\n]*", :pos1), :metainfo_val) # Note: Only single line comments are allowed. - cat('@', alt(cat(tag, '\t', dict), cat(co, '\t', comment))) + '@' * ((tag * '\t' * dict) | (co * '\t' * comment)) end - metainfo.actions[:enter] = [:mark] - metainfo.actions[:exit] = [:metainfo] + onexit!(onenter!(metainfo, :mark), :metainfo) record = let - qname = re"[!-?A-~]+" - qname.actions[:enter] = [:pos] - qname.actions[:exit] = [:record_qname] - - flag = re"[0-9]+" - flag.actions[:enter] = [:pos] - flag.actions[:exit] = [:record_flag] - - rname = re"\*|[!-()+-<>-~][!-~]*" - rname.actions[:enter] = [:pos] - rname.actions[:exit] = [:record_rname] - - pos = re"[0-9]+" - pos.actions[:enter] = [:pos] - pos.actions[:exit] = [:record_pos] - - mapq = re"[0-9]+" - mapq.actions[:enter] = [:pos] - mapq.actions[:exit] = [:record_mapq] - - cigar = re"\*|([0-9]+[MIDNSHPX=])+" - cigar.actions[:enter] = [:pos] - cigar.actions[:exit] = [:record_cigar] - - rnext = re"\*|=|[!-()+-<>-~][!-~]*" - rnext.actions[:enter] = [:pos] - rnext.actions[:exit] = [:record_rnext] - - pnext = re"[0-9]+" - pnext.actions[:enter] = [:pos] - pnext.actions[:exit] = [:record_pnext] - - tlen = re"[-+]?[0-9]+" - tlen.actions[:enter] = [:pos] - tlen.actions[:exit] = [:record_tlen] - - seq = re"\*|[A-Za-z=.]+" - seq.actions[:enter] = [:pos] - seq.actions[:exit] = [:record_seq] - - qual = re"[!-~]+" - qual.actions[:enter] = [:pos] - qual.actions[:exit] = [:record_qual] + qname = onexit!(onenter!(re"[!-?A-~]+", :pos), :record_qname) + flag = onexit!(onenter!(re"[0-9]+", :pos), :record_flag) + rname = onexit!(onenter!(re"\*|[!-()+-<>-~][!-~]*", :pos), :record_rname) + pos = onexit!(onenter!(re"[0-9]+", :pos), :record_pos) + mapq = onexit!(onenter!(re"[0-9]+", :pos), :record_mapq) + cigar = onexit!(onenter!(re"\*|([0-9]+[MIDNSHPX=])+", :pos), :record_cigar) + rnext = onexit!(onenter!(re"\*|=|[!-()+-<>-~][!-~]*", :pos), :record_rnext) + pnext = onexit!(onenter!(re"[0-9]+", :pos), :record_pnext) + tlen = onexit!(onenter!(re"[-+]?[0-9]+", :pos), :record_tlen) + seq = onexit!(onenter!(re"\*|[A-Za-z=.]+", :pos), :record_seq) + qual = onexit!(onenter!(re"[!-~]+", :pos), :record_qual) field = let tag = re"[A-Za-z][A-Za-z0-9]" - val = alt( - re"A:[!-~]", - re"i:[-+]?[0-9]+", - re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?", - re"Z:[ !-~]*", - re"H:([0-9A-F][0-9A-F])*", - re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+") - - cat(tag, ':', val) + val = ( + re"A:[!-~]" | + re"i:[-+]?[0-9]+" | + re"f:[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?" | + re"Z:[ !-~]*" | + re"H:([0-9A-F][0-9A-F])*" | + re"B:[cCsSiIf](,[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?)+" + ) + tag * ':' * val end - field.actions[:enter] = [:pos] - field.actions[:exit] = [:record_field] - - cat( - qname, '\t', - flag, '\t', - rname, '\t', - pos, '\t', - mapq, '\t', - cigar, '\t', - rnext, '\t', - pnext, '\t', - tlen, '\t', - seq, '\t', - qual, - rep(cat('\t', field))) + onexit!(onenter!(field, :pos), :record_field) + + qname * '\t' * + flag * '\t' * + rname * '\t' * + pos * '\t' * + mapq * '\t' * + cigar * '\t' * + rnext * '\t' * + pnext * '\t' * + tlen * '\t' * + seq * '\t' * + qual * + rep('\t' * field) end - record.actions[:enter] = [:mark] - record.actions[:exit] = [:record] - - newline = let - lf = re"\n" - lf.actions[:enter] = [:countline] - - cat(re"\r?", lf) - end - - header = rep(cat(metainfo, newline)) - header.actions[:exit] = [:header] + onexit!(onenter!(record, :mark), :record) + newline = "\r?" * onenter!(re"\n", :countline) + header = onexit!(rep(metainfo * newline), :header) + body = onexit!(rep(record * newline), :body) + sam = header * body - body = rep(cat(record, newline)) - body.actions[:exit] = [:body] - - sam = cat(header, body) - - return map(Automa.compile, (metainfo, record, header, body, sam)) -end)() + map(compile, (metainfo, record, header, body, sam)) +end # write("sam_machine_metainfo.dot", Automa.machine2dot(sam_machine_metainfo)) # run(`dot -Tsvg -o sam_machine_metainfo.svg sam_machine_metainfo.dot`) @@ -229,11 +159,7 @@ const sam_actions_body = merge( ) ) -const sam_context = Automa.CodeGenContext( - generator = :goto, - checkbounds = false, - loopunroll = 0 -) +const sam_context = CodeGenContext(generator = :goto) const sam_initcode_metainfo = quote pos1 = 0 @@ -256,7 +182,7 @@ const sam_initcode_body = quote cs, linenum = state end -Automa.Stream.generate_reader( +generate_reader( :index!, sam_machine_metainfo, arguments = (:(metainfo::MetaInfo),), @@ -269,14 +195,23 @@ const sam_returncode_header = quote return cs, linenum end -Automa.Stream.generate_reader( +generate_reader( :readheader!, sam_machine_header, arguments = (:(header::SAM.Header), :(state::Tuple{Int,Int})), actions = sam_actions_header, context = sam_context, initcode = sam_initcode_header, - returncode = sam_returncode_header + returncode = sam_returncode_header, + errorcode = quote + # We expect the SAM header machine to error, as it finds the first non-header byte. + # This happens at state 1 (hence error state -1), and before reaching EOF. + if cs == -1 && !(is_eof && p < p_end) + @goto __return__ + else + error("Expected input byte after SAM header.") + end + end ) |> eval const sam_loopcode_body = quote @@ -285,7 +220,7 @@ const sam_loopcode_body = quote end end -Automa.Stream.generate_reader( +generate_reader( :index!, sam_machine_record, arguments = (:(record::Record),), @@ -299,7 +234,7 @@ const sam_returncode_body = quote return cs, linenum, found_record end -Automa.Stream.generate_reader( +generate_reader( :readrecord!, sam_machine_body, arguments = (:(record::Record), :(state::Tuple{Int,Int})), From 6a31ffa343cd5d558094947fcef4cc2fe53113c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Wed, 17 Jan 2024 07:17:51 +1100 Subject: [PATCH 12/19] Update CHANGELOG --- CHANGELOG.md | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1ede94..f38afde 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -All notable changes to HapLink.jl will be documented in this file. +All notable changes to XAM.jl will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). @@ -8,36 +8,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added -- Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56/files)) -- Added doi badge +- Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56)). +- Added doi badge. +- Added test to ensure EOF_BLOCK gets written. ### Changed - Subtype from XAMReader and XAMWriter from common abstract types. - Subtype from XAMRecord. - Unified flag queries. +- Improved Slack link. +- Updated to use [Automa](https://github.com/BioJulia/Automa.jl) v1 ([#65](https://github.com/BioJulia/XAM.jl/pull/65)). + +### Fixed +- Updated hts-files.md ([#62](https://github.com/BioJulia/XAM.jl/pull/62)). ## [0.3.1] ### Changed -- Upgraded to BioAlignments v3 ([#55](https://github.com/BioJulia/XAM.jl/pull/55)) +- Upgraded to BioAlignments v3 ([#55](https://github.com/BioJulia/XAM.jl/pull/55)). ## [0.3.0] - 2022-10-10 ## Added -- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29)) -- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43)) +- Crosschecks for SAM and BAM ([#29](https://github.com/BioJulia/XAM.jl/pull/29)). +- Improved documentation for flags ([#43](https://github.com/BioJulia/XAM.jl/pull/43)). ### Changed -- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21)) -- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48)) +- `BAM.quality` performance improved ([#21](https://github.com/BioJulia/XAM.jl/issues/21)). +- Updated BioAlignments to v2.2 and BioSequences to v3 ([#48](https://github.com/BioJulia/XAM.jl/pull/48)). ### Fixed -- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26)) +- `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26)). [Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.1...HEAD [0.3.1]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...v0.3.1 From aca2471faaec6e6911278c7b013123b0977c4231 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Wed, 17 Jan 2024 07:10:41 +1100 Subject: [PATCH 13/19] Increment version --- CHANGELOG.md | 5 ++++- Project.toml | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f38afde..afc5d51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.4.0] + ### Added - Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56)). - Added doi badge. @@ -45,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `BAM.Record` layout now matches the BAM specs ([#26](https://github.com/BioJulia/XAM.jl/pull/26)). -[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.3.1...HEAD +[Unreleased]: https://github.com/BioJulia/XAM.jl/compare/v0.4.0...HEAD +[0.4.0]: https://github.com/BioJulia/XAM.jl/compare/v0.3.1...0.4.0 [0.3.1]: https://github.com/BioJulia/XAM.jl/compare/v0.3.0...v0.3.1 [0.3.0]: https://github.com/BioJulia/XAM.jl/compare/v0.2.8...v0.3.0 diff --git a/Project.toml b/Project.toml index bd34c82..06c9681 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "XAM" uuid = "d759349c-bcba-11e9-07c2-5b90f8f05f7c" authors = ["Kenta Sato ", "Ben J. Ward ", "Ciarán O'Mara "] -version = "0.3.1" +version = "0.4.0" [deps] Automa = "67c07d97-cdcb-5c2c-af73-a7f9c32a568b" From 95d470d85b7cc2a0df9f007e8be0a57839c612df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Tue, 28 Feb 2023 11:19:01 +1100 Subject: [PATCH 14/19] Update UnitTests workflow - Add GitHub "annotations" - Update codecov-action --- .github/workflows/UnitTests.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/UnitTests.yml b/.github/workflows/UnitTests.yml index a6cbe9e..04ab7b6 100644 --- a/.github/workflows/UnitTests.yml +++ b/.github/workflows/UnitTests.yml @@ -29,17 +29,19 @@ jobs: steps: - name: Checkout Repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Setup Julia uses: julia-actions/setup-julia@v1 with: version: ${{ matrix.julia-version }} - name: Run Tests uses: julia-actions/julia-runtest@v1 + with: + annotate: true - name: Create CodeCov uses: julia-actions/julia-processcoverage@v1 - name: Upload CodeCov - uses: codecov/codecov-action@v1 + uses: codecov/codecov-action@v2 with: file: ./lcov.info flags: unittests From 38bc47da4e6731569ed3ab60063e27b370c746fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Thu, 18 Jan 2024 14:08:34 +1100 Subject: [PATCH 15/19] Update UnitTests badge --- CHANGELOG.md | 1 + README.md | 2 +- docs/src/index.md | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index afc5d51..1f9073b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Unified flag queries. - Improved Slack link. - Updated to use [Automa](https://github.com/BioJulia/Automa.jl) v1 ([#65](https://github.com/BioJulia/XAM.jl/pull/65)). +- Pointed the Unit Tests badge at the develop branch. ### Fixed - Updated hts-files.md ([#62](https://github.com/BioJulia/XAM.jl/pull/62)). diff --git a/README.md b/README.md index 8ac1812..d1d3387 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ XAM is tested against Julia `1.X` on Linux, OS X, and Windows. **Latest build status:** -[![Unit tests](https://github.com/BioJulia/XAM.jl/workflows/Unit%20tests/badge.svg?branch=master)](https://github.com/BioJulia/XAM.jl/actions?query=workflow%3A%22Unit+tests%22+branch%3Amaster) +[![Unit Tests](https://github.com/BioJulia/XAM.jl/actions/workflows/UnitTests.yml/badge.svg?branch=develop)](https://github.com/BioJulia/XAM.jl/actions/workflows/UnitTests.yml) [![Documentation](https://github.com/BioJulia/XAM.jl/workflows/Documentation/badge.svg?branch=master)](https://github.com/BioJulia/XAM.jl/actions?query=workflow%3ADocumentation+branch%3Amaster) [![codecov](https://codecov.io/gh/BioJulia/XAM.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/BioJulia/XAM.jl) diff --git a/docs/src/index.md b/docs/src/index.md index e6c1379..91e8dd0 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -25,7 +25,7 @@ XAM is tested against Julia `1.X` on Linux, OS X, and Windows. **Latest build status:** -[![Unit tests](https://github.com/BioJulia/XAM.jl/workflows/Unit%20tests/badge.svg?branch=master)](https://github.com/BioJulia/XAM.jl/actions?query=workflow%3A%22Unit+tests%22+branch%3Amaster) +[![Unit Tests](https://github.com/BioJulia/XAM.jl/actions/workflows/UnitTests.yml/badge.svg?branch=develop)](https://github.com/BioJulia/XAM.jl/actions/workflows/UnitTests.yml) [![Documentation](https://github.com/BioJulia/XAM.jl/workflows/Documentation/badge.svg?branch=master)](https://github.com/BioJulia/XAM.jl/actions?query=workflow%3ADocumentation+branch%3Amaster) [![codecov](https://codecov.io/gh/BioJulia/XAM.jl/branch/master/graph/badge.svg)](https://codecov.io/gh/BioJulia/XAM.jl) From afaa3636b8a92b7a29444a3d4c0704cd20eae56c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Wed, 17 Jan 2024 07:28:00 +1100 Subject: [PATCH 16/19] Update Documentation workflow --- .github/workflows/Documentation.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml index 7e683ea..912707a 100644 --- a/.github/workflows/Documentation.yml +++ b/.github/workflows/Documentation.yml @@ -12,12 +12,13 @@ jobs: Documenter: permissions: contents: write - name: Documentation + statuses: write + name: Documentation runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: julia-actions/julia-buildpkg@v1 + - uses: actions/checkout@v4 + - uses: julia-actions/julia-buildpkg@v1 # if package requires Pkg.build() - uses: julia-actions/julia-docdeploy@v1 env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # If authenticating with GitHub Actions token + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # If authenticating with SSH deploy key From 671f522be541dc4fd8922b7e82124e04eab63c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Wed, 17 Jan 2024 07:33:46 +1100 Subject: [PATCH 17/19] Update TagBot workflow --- .github/workflows/TagBot.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 9bdb8d7..fd94b60 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -9,7 +9,18 @@ on: lookback: default: 3 permissions: + actions: read + checks: read contents: write + deployments: read + issues: read + discussions: read + packages: read + pages: read + pull-requests: read + repository-projects: read + security-events: read + statuses: read jobs: TagBot: From 1d1d9c483606cba9c5eaa5b5e525c12463dde92f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 21 Jan 2024 07:29:03 +1100 Subject: [PATCH 18/19] Pluralise flag --- CHANGELOG.md | 1 + docs/src/man/hts-files.md | 4 ++-- src/bam/bam.jl | 2 +- src/bam/record.jl | 14 ++++++------ src/flags.jl | 48 +++++++++++++++++++-------------------- src/sam/readrecord.jl | 6 ++--- src/sam/record.jl | 16 ++++++------- src/sam/sam.jl | 2 +- test/test_bam.jl | 8 +++---- test/test_crosscheck.jl | 2 +- test/test_sam.jl | 8 +++---- 11 files changed, 56 insertions(+), 55 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1f9073b..b4be00c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Improved Slack link. - Updated to use [Automa](https://github.com/BioJulia/Automa.jl) v1 ([#65](https://github.com/BioJulia/XAM.jl/pull/65)). - Pointed the Unit Tests badge at the develop branch. +- Pluralised flag. ### Fixed - Updated hts-files.md ([#62](https://github.com/BioJulia/XAM.jl/pull/62)). diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index 133a082..43e7ed7 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -110,7 +110,7 @@ In the above we can see there were 7 sequences in the reference: 5 chromosomes, The `XAM` package supports the following accessors for `SAM.Record` types. ```@docs -XAM.SAM.flag +XAM.SAM.flags XAM.SAM.ismapped XAM.SAM.isprimary XAM.SAM.refname @@ -135,7 +135,7 @@ XAM.SAM.auxdata The `XAM` package supports the following accessors for `BAM.Record` types. ```@docs -XAM.BAM.flag +XAM.BAM.flags XAM.BAM.ismapped XAM.BAM.isprimary XAM.BAM.refid diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 69ca235..6bacfd2 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -6,7 +6,7 @@ module BAM using BioGenerics using GenomicFeatures using XAM.SAM -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, +import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. import BGZFStreams diff --git a/src/bam/record.jl b/src/bam/record.jl index e8ee356..366e54e 100644 --- a/src/bam/record.jl +++ b/src/bam/record.jl @@ -15,7 +15,7 @@ mutable struct Record <: XAMRecord mapq::UInt8 bin::UInt16 n_cigar_op::UInt16 - flag::UInt16 + flags::UInt16 l_seq::Int32 next_refid::Int32 next_pos::Int32 @@ -56,7 +56,7 @@ function Base.:(==)(a::Record, b::Record) a.mapq == b.mapq && a.bin == b.bin && a.n_cigar_op == b.n_cigar_op && - a.flag == b.flag && + a.flags == b.flags && a.l_seq == b.l_seq && a.next_refid == b.next_refid && a.next_pos == b.next_pos && @@ -83,7 +83,7 @@ function Base.empty!(record::Record) record.l_read_name = 0 record.mapq = 0 record.bin = 0 - record.flag = 0 + record.flags = 0 record.n_cigar_op = 0 record.l_seq = 0 record.next_refid = 0 @@ -100,7 +100,7 @@ function Base.show(io::IO, record::Record) if isfilled(record) println(io) println(io, " template name: ", tempname(record)) - println(io, " flag: ", flag(record)) + println(io, " flags: ", flags(record)) println(io, " reference ID: ", refid(record)) println(io, " position: ", position(record)) println(io, " mapping quality: ", mappingquality(record)) @@ -128,12 +128,12 @@ end # Accessor Fuctions # ----------------- -function flag(record::Record)::UInt16 +function flags(record::Record)::UInt16 checkfilled(record) - return record.flag + return record.flags end -function hasflag(record::Record) +function hasflags(record::Record) return isfilled(record) end diff --git a/src/flags.jl b/src/flags.jl index 4679749..96a371e 100644 --- a/src/flags.jl +++ b/src/flags.jl @@ -3,7 +3,7 @@ # """ - flag(record::XAMRecord})::UInt16 + flags(record::XAMRecord})::UInt16 Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag being OR'd together. @@ -23,9 +23,9 @@ The possible flags are: 0x0800 supplementary alignment """ -function flag end +function flags end -# Bitwise flags (or FLAG). +# Bitwise flag (or FLAG). for (name, bits, doc) in [ (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), @@ -42,9 +42,9 @@ for (name, bits, doc) in [ @assert bits isa UInt16 "The bits must be of type UInt16." sym = Symbol("FLAG_", name) docstring = """ $sym - SAM/BAM flag: $doc + SAM/BAM flags: $doc - See also: [`flag`](@ref) + See also: [`flags`](@ref) """ @eval begin @doc $(docstring) const $(sym) = $(bits) @@ -57,7 +57,7 @@ end Query whether the `record`'s template has multiple segments in sequencing. """ function ispaired(record::XAMRecord)::Bool - return flag(record) & FLAG_PAIRED == FLAG_PAIRED + return flags(record) & FLAG_PAIRED == FLAG_PAIRED end """ @@ -66,7 +66,7 @@ end Query whether each segment of the `record`'s template properly aligned according to the aligner. """ function isproperpair(record::XAMRecord)::Bool - return flag(record) & PROPER_PAIR == PROPER_PAIR + return flags(record) & PROPER_PAIR == PROPER_PAIR end """ @@ -75,7 +75,7 @@ end Query whether the `record` is unmapped. """ function isunmapped(record::XAMRecord)::Bool - return flag(record) & FLAG_UNMAP == FLAG_UNMAP + return flags(record) & FLAG_UNMAP == FLAG_UNMAP end """ @@ -84,8 +84,8 @@ end Query whether the `record` is mapped. """ function ismapped(record::XAMRecord)::Bool - # return flag(record) & FLAG_UNMAP == 0 - return isfilled(record) && (flag(record) & FLAG_UNMAP == 0) + # return flags(record) & FLAG_UNMAP == 0 + return isfilled(record) && (flags(record) & FLAG_UNMAP == 0) end """ @@ -94,7 +94,7 @@ end Query whether the `record`'s mate is unmapped. """ function ismateunmapped(record::XAMRecord)::Bool - return flag(record) & FLAG_MUNMAP == FLAG_MUNMAP + return flags(record) & FLAG_MUNMAP == FLAG_MUNMAP end """ @@ -103,7 +103,7 @@ end Test if the mate/next read of `record` is mapped. """ function isnextmapped(record::XAMRecord)::Bool - return flag(record) & FLAG_MUNMAP == 0 + return flags(record) & FLAG_MUNMAP == 0 end """ @@ -112,7 +112,7 @@ end Query whether the `record` is mapped to the reverse strand. """ function isreverse(record::XAMRecord)::Bool - return flag(record) & FLAG_REVERSE == FLAG_REVERSE + return flags(record) & FLAG_REVERSE == FLAG_REVERSE end """ @@ -121,7 +121,7 @@ end Query whether the `record` is mapped to the forward strand. """ function isforward(record::XAMRecord)::Bool - return flag(record) & FLAG_REVERSE == 0 + return flags(record) & FLAG_REVERSE == 0 end """ @@ -148,7 +148,7 @@ end Query whether the `record`'s mate is mapped to the reverse strand. """ function ismatereverse(record::XAMRecord)::Bool - return flag(record) & FLAG_MREVERSE == FLAG_MREVERSE + return flags(record) & FLAG_MREVERSE == FLAG_MREVERSE end """ @@ -157,7 +157,7 @@ end Query whether the `record` is read1. """ function isread1(record::XAMRecord)::Bool - return flag(record) & FLAG_READ1 == FLAG_READ1 + return flags(record) & FLAG_READ1 == FLAG_READ1 end """ @@ -166,7 +166,7 @@ end Query whether the `record` is read2. """ function isread2(record::XAMRecord)::Bool - return flag(record) & FLAG_READ2 == FLAG_READ2 + return flags(record) & FLAG_READ2 == FLAG_READ2 end """ @@ -175,7 +175,7 @@ end Query whether the `record` is a secondary alignment. """ function issecondaryalignment(record::XAMRecord)::Bool - return flag(record) & FLAG_SECONDARY == FLAG_SECONDARY + return flags(record) & FLAG_SECONDARY == FLAG_SECONDARY end """ @@ -184,7 +184,7 @@ end Query whether the `record` is the primary alignment. """ function isprimaryalignment(record::XAMRecord)::Bool - return flag(record) & FLAG_SECONDARY == 0 + return flags(record) & FLAG_SECONDARY == 0 end """ @@ -193,7 +193,7 @@ end Query whether the `record` did not pass filters, such as platform/vendor quality controls. """ function isqcfail(record::XAMRecord)::Bool - return flag(record) & FLAG_QCFAIL == FLAG_QCFAIL + return flags(record) & FLAG_QCFAIL == FLAG_QCFAIL end """ @@ -202,7 +202,7 @@ end Query whether the `record` is a PCR or optical duplicate. """ function isduplicate(record::XAMRecord)::Bool - return flag(record) & FLAG_DUP == FLAG_DUP + return flags(record) & FLAG_DUP == FLAG_DUP end """ @@ -211,7 +211,7 @@ end Query whether the `record` is a supplementary alignment. """ function issupplementaryalignment(record::XAMRecord)::Bool - return flag(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY + return flags(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY end """ @@ -219,8 +219,8 @@ end Query whether `record` is a primary line of the read. -This is equivalent to `flag(record) & 0x900 == 0`. +This is equivalent to `flags(record) & 0x900 == 0`. """ function isprimary(record::XAMRecord)::Bool - return flag(record) & 0x900 == 0 + return flags(record) & 0x900 == 0 end diff --git a/src/sam/readrecord.jl b/src/sam/readrecord.jl index 1a90927..c347267 100644 --- a/src/sam/readrecord.jl +++ b/src/sam/readrecord.jl @@ -28,7 +28,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_ record = let qname = onexit!(onenter!(re"[!-?A-~]+", :pos), :record_qname) - flag = onexit!(onenter!(re"[0-9]+", :pos), :record_flag) + flags = onexit!(onenter!(re"[0-9]+", :pos), :record_flags) rname = onexit!(onenter!(re"\*|[!-()+-<>-~][!-~]*", :pos), :record_rname) pos = onexit!(onenter!(re"[0-9]+", :pos), :record_pos) mapq = onexit!(onenter!(re"[0-9]+", :pos), :record_mapq) @@ -54,7 +54,7 @@ const sam_machine_metainfo, sam_machine_record, sam_machine_header, sam_machine_ onexit!(onenter!(field, :pos), :record_field) qname * '\t' * - flag * '\t' * + flags * '\t' * rname * '\t' * pos * '\t' * mapq * '\t' * @@ -129,7 +129,7 @@ const sam_actions_record = Dict( :mark => :(@mark), :pos => :(pos = @relpos(p)), :record_qname => :(record.qname = pos:@relpos(p-1)), - :record_flag => :(record.flag = pos:@relpos(p-1)), + :record_flags => :(record.flags = pos:@relpos(p-1)), :record_rname => :(record.rname = pos:@relpos(p-1)), :record_pos => :(record.pos = pos:@relpos(p-1)), :record_mapq => :(record.mapq = pos:@relpos(p-1)), diff --git a/src/sam/record.jl b/src/sam/record.jl index 7a2a22b..039887c 100644 --- a/src/sam/record.jl +++ b/src/sam/record.jl @@ -8,7 +8,7 @@ mutable struct Record <: XAMRecord # Mandatory fields. qname::UnitRange{Int} - flag::UnitRange{Int} + flags::UnitRange{Int} rname::UnitRange{Int} pos::UnitRange{Int} mapq::UnitRange{Int} @@ -80,7 +80,7 @@ end function Base.:(==)(a::Record, b::Record) return a.filled == b.filled && a.qname == b.qname && - a.flag == b.flag && + a.flags == b.flags && a.rname == b.rname && a.pos == b.pos && a.mapq == b.mapq && @@ -99,7 +99,7 @@ function Base.show(io::IO, record::Record) if isfilled(record) println(io) println(io, " template name: ", hastempname(record) ? tempname(record) : "") - println(io, " flag: ", hasflag(record) ? flag(record) : "") + println(io, " flags: ", hasflags(record) ? flags(record) : "") println(io, " reference: ", hasrefname(record) ? refname(record) : "") println(io, " position: ", hasposition(record) ? position(record) : "") println(io, " mapping quality: ", hasmappingquality(record) ? mappingquality(record) : "") @@ -133,7 +133,7 @@ function Base.copy(record::Record) copy(record.data), record.filled, record.qname, - record.flag, + record.flags, record.rname, record.pos, record.mapq, @@ -150,12 +150,12 @@ end # Accessor Functions # ------------------ -function flag(record::Record)::UInt16 +function flags(record::Record)::UInt16 checkfilled(record) - return unsafe_parse_decimal(UInt16, record.data, record.flag) + return unsafe_parse_decimal(UInt16, record.data, record.flags) end -function hasflag(record::Record) +function hasflags(record::Record) return isfilled(record) end @@ -545,7 +545,7 @@ end function Base.empty!(record::Record) record.filled = 1:0 record.qname = 1:0 - record.flag = 1:0 + record.flags = 1:0 record.rname = 1:0 record.pos = 1:0 record.mapq = 1:0 diff --git a/src/sam/sam.jl b/src/sam/sam.jl index f3b73e6..ad6e282 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -11,7 +11,7 @@ import BioGenerics.Exceptions: missingerror import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream -import ..XAM: flag, XAMRecord, XAMReader, XAMWriter, +import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. using Printf: @sprintf diff --git a/test/test_bam.jl b/test/test_bam.jl index 47cb3f4..3bfa29d 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -37,7 +37,7 @@ record = BAM.Record() @test !isfilled(record) @test repr(record) == "XAM.BAM.Record: " - @test_throws ArgumentError BAM.flag(record) + @test_throws ArgumentError BAM.flags(record) end @testset "Reader" begin @@ -76,7 +76,7 @@ @test BAM.hasquality(record) @test eltype(BAM.quality(record)) == UInt8 @test BAM.quality(record) == [Int(x) - 33 for x in "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC"] - @test BAM.flag(record) === UInt16(16) + @test BAM.flags(record) === UInt16(16) @test BAM.cigar(record) == "27M1D73M" @test BAM.alignment(record) == Alignment([ AlignmentAnchor( 0, 1, 0, OP_START), @@ -108,7 +108,7 @@ @test record.mapq == new_record.mapq @test record.bin == new_record.bin @test record.block_size == new_record.block_size - @test record.flag == new_record.flag + @test record.flags == new_record.flags @test record.n_cigar_op == new_record.n_cigar_op @test record.l_seq == new_record.l_seq @test record.next_refid == new_record.next_refid @@ -174,7 +174,7 @@ a.mapq == b.mapq && a.bin == b.bin && a.n_cigar_op == b.n_cigar_op && - a.flag == b.flag && + a.flags == b.flags && a.l_seq == b.l_seq && a.next_refid == b.next_refid && a.next_pos == b.next_pos && diff --git a/test/test_crosscheck.jl b/test/test_crosscheck.jl index 0e853ef..c27e107 100644 --- a/test/test_crosscheck.jl +++ b/test/test_crosscheck.jl @@ -34,7 +34,7 @@ :tempname,# QNAME :mappingquality,# MAPQ :cigar, # CIGAR - :flag, # FLAG + :flags, # FLAG :sequence, # SEQ :nextposition, # PNEXT :templength, # TLEN diff --git a/test/test_sam.jl b/test/test_sam.jl index 5496d8d..837129d 100644 --- a/test/test_sam.jl +++ b/test/test_sam.jl @@ -48,7 +48,7 @@ @test !isfilled(record) @test !SAM.ismapped(record) @test repr(record) == "XAM.SAM.Record: " - @test_throws ArgumentError SAM.flag(record) + @test_throws ArgumentError SAM.flags(record) record = SAM.Record("r001\t99\tchr1\t7\t30\t8M2I4M1D3M\t=\t37\t39\tTTAGATAAAGGATACTG\t*") @test isfilled(record) @@ -57,8 +57,8 @@ @test SAM.isprimary(record) @test SAM.hastempname(record) @test SAM.tempname(record) == "r001" - @test SAM.hasflag(record) - @test SAM.flag(record) === UInt16(99) + @test SAM.hasflags(record) + @test SAM.flags(record) === UInt16(99) @test SAM.hasrefname(record) @test SAM.refname(record) == "chr1" @test SAM.hasposition(record) @@ -101,7 +101,7 @@ @test SAM.seqlength(record) == 100 @test SAM.quality(record) == (b"#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" .- 33) @test SAM.quality(String, record) == "#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" - @test SAM.flag(record) == 16 + @test SAM.flags(record) == 16 @test SAM.cigar(record) == "27M1D73M" @test SAM.alignment(record) == Alignment([ AlignmentAnchor( 0, 1, 0, OP_START), From 456d820866f88efdff6ea1d3962fd5c91f6cb825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ciar=C3=A1n=20O=27Mara?= Date: Sun, 21 Jan 2024 10:12:37 +1100 Subject: [PATCH 19/19] Better align `flags.jl` nomenclature with the SAMv1 specification --- CHANGELOG.md | 18 +++++ docs/src/man/hts-files.md | 4 +- src/bam/bam.jl | 2 +- src/flags.jl | 145 ++++++++++++++++++++++---------------- src/sam/sam.jl | 2 +- test/test_bam.jl | 4 +- test/test_sam.jl | 2 +- 7 files changed, 110 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4be00c..84732b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,12 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] + ## [0.4.0] ### Added - Added BAM.Reader index support for BAI object ([#56](https://github.com/BioJulia/XAM.jl/pull/56)). - Added doi badge. - Added test to ensure EOF_BLOCK gets written. +- Added `isreversestrand`. +- Added `isfirstsegment`. +- Added `islastsegment`. ### Changed @@ -23,9 +27,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated to use [Automa](https://github.com/BioJulia/Automa.jl) v1 ([#65](https://github.com/BioJulia/XAM.jl/pull/65)). - Pointed the Unit Tests badge at the develop branch. - Pluralised flag. +- Renamed `ismateunmapped` to `isnextunmapped`. +- Renamed `isreverse` to `isreversecomplemented`. +- Renamed `isforward` to `isforwardstrand`. +- `ispositivestrand` aliases `isforwardstrand`. +- `isnegativestrand` aliases `isreversestrand`. +- Renamed `ismatereverse` to `isnextreversecomplemented`. +- `isread1` aliases `isfirstsegment`. +- `isread2` aliases `islastsegment`. ### Fixed - Updated hts-files.md ([#62](https://github.com/BioJulia/XAM.jl/pull/62)). +- Corrected the behaviour of `isprimaryalignment` with `isprimary`. + +### Removed +- Moved the functionality of `isprimary` into `isprimaryalignment`. + ## [0.3.1] @@ -33,6 +50,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Upgraded to BioAlignments v3 ([#55](https://github.com/BioJulia/XAM.jl/pull/55)). + ## [0.3.0] - 2022-10-10 ## Added diff --git a/docs/src/man/hts-files.md b/docs/src/man/hts-files.md index 43e7ed7..a8a5f7e 100644 --- a/docs/src/man/hts-files.md +++ b/docs/src/man/hts-files.md @@ -112,7 +112,7 @@ The `XAM` package supports the following accessors for `SAM.Record` types. ```@docs XAM.SAM.flags XAM.SAM.ismapped -XAM.SAM.isprimary +XAM.SAM.isprimaryalignment XAM.SAM.refname XAM.SAM.position XAM.SAM.rightposition @@ -137,7 +137,7 @@ The `XAM` package supports the following accessors for `BAM.Record` types. ```@docs XAM.BAM.flags XAM.BAM.ismapped -XAM.BAM.isprimary +XAM.BAM.isprimaryalignment XAM.BAM.refid XAM.BAM.refname XAM.BAM.reflen diff --git a/src/bam/bam.jl b/src/bam/bam.jl index 6bacfd2..2120d01 100644 --- a/src/bam/bam.jl +++ b/src/bam/bam.jl @@ -7,7 +7,7 @@ using BioGenerics using GenomicFeatures using XAM.SAM import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, - ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. + ismapped, isprimaryalignment, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. import BGZFStreams import BioAlignments diff --git a/src/flags.jl b/src/flags.jl index 96a371e..a0340d2 100644 --- a/src/flags.jl +++ b/src/flags.jl @@ -7,6 +7,7 @@ Get the bitwise flags of `record`. The returned value is a `UInt16` of each flag being OR'd together. + The possible flags are: 0x0001 template having multiple segments in sequencing @@ -25,20 +26,24 @@ The possible flags are: function flags end + + + # Bitwise flag (or FLAG). for (name, bits, doc) in [ - (:PAIRED, UInt16(0x001), "the read is paired in sequencing, no matter whether it is mapped in a pair"), - (:PROPER_PAIR, UInt16(0x002), "the read is mapped in a proper pair" ), - (:UNMAP, UInt16(0x004), "the read itself is unmapped; conflictive with FLAG_PROPER_PAIR" ), - (:MUNMAP, UInt16(0x008), "the mate is unmapped" ), - (:REVERSE, UInt16(0x010), "the read is mapped to the reverse strand" ), - (:MREVERSE, UInt16(0x020), "the mate is mapped to the reverse strand" ), - (:READ1, UInt16(0x040), "this is read1" ), - (:READ2, UInt16(0x080), "this is read2" ), - (:SECONDARY, UInt16(0x100), "not primary alignment" ), - (:QCFAIL, UInt16(0x200), "QC failure" ), - (:DUP, UInt16(0x400), "optical or PCR duplicate" ), - (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment" ),] + (:PAIRED, UInt16(0x001), "the segment is paired with other segments"), + (:PROPER_PAIR, UInt16(0x002), "the segment is in a template where all segments are properly aligned according to the aligner"), + (:UNMAPPED, UInt16(0x004), "the segment itself is unmapped; conflictive with FLAG_PROPER_PAIR"), + (:NEXT_UNMAPPED, UInt16(0x008), "the next segment in the template is unmapped"), + (:REVERSE, UInt16(0x010), "the *SEQ*uence is reverse complemented"), + (:NEXT_REVERSE, UInt16(0x020), "the *SEQ*uence of the next segment in the template is reverse complemented" ), + (:FIRST_SEGMENT, UInt16(0x040), "the segment is the first in the template"), + (:LAST_SEGMENT, UInt16(0x080), "the segment is last in the template"), + (:SECONDARY, UInt16(0x100), "not primary alignment"), + (:QCFAIL, UInt16(0x200), "QC failure"), + (:DUPLICATE, UInt16(0x400), "optical or PCR duplicate"), + (:SUPPLEMENTARY, UInt16(0x800), "supplementary alignment"), + ] @assert bits isa UInt16 "The bits must be of type UInt16." sym = Symbol("FLAG_", name) docstring = """ $sym @@ -54,7 +59,7 @@ end """ ispaired(record::XAMRecord)::Bool -Query whether the `record`'s template has multiple segments in sequencing. +Query whether the segment is in a template having multiple segments in sequencing. """ function ispaired(record::XAMRecord)::Bool return flags(record) & FLAG_PAIRED == FLAG_PAIRED @@ -63,7 +68,7 @@ end """ isproperpair(record::XAMRecord)::Bool -Query whether each segment of the `record`'s template properly aligned according to the aligner. +Query whether the segment is in a template where all segments are properly aligned according to the aligner. """ function isproperpair(record::XAMRecord)::Bool return flags(record) & PROPER_PAIR == PROPER_PAIR @@ -72,125 +77,145 @@ end """ isunmapped(record::XAMRecord)::Bool -Query whether the `record` is unmapped. +Query whether the segment is unmapped. """ function isunmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_UNMAP == FLAG_UNMAP + return flags(record) & FLAG_UNMAPPED == FLAG_UNMAPPED end """ ismapped(record::XAMRecord)::Bool -Query whether the `record` is mapped. +Query whether the segment is mapped. """ function ismapped(record::XAMRecord)::Bool - # return flags(record) & FLAG_UNMAP == 0 - return isfilled(record) && (flags(record) & FLAG_UNMAP == 0) + # return flags(record) & FLAG_UNMAPPED == 0 + return isfilled(record) && (flags(record) & FLAG_UNMAPPED == 0) end """ - ismateunmapped(record::XAMRecord)::Bool + isnextunmapped(record::XAMRecord)::Bool -Query whether the `record`'s mate is unmapped. +Query whether the next segment in the template is unmapped. """ -function ismateunmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_MUNMAP == FLAG_MUNMAP +function isnextunmapped(record::XAMRecord)::Bool + return flags(record) & FLAG_NEXT_UNMAPPED == FLAG_NEXT_UNMAPPED end """ isnextmapped(record::XAMRecord)::Bool -Test if the mate/next read of `record` is mapped. +Query whether the next segment in the template is mapped. """ function isnextmapped(record::XAMRecord)::Bool - return flags(record) & FLAG_MUNMAP == 0 + return flags(record) & FLAG_NEXT_UNMAPPED == 0 end """ isreverse(record::XAMRecord)::Bool -Query whether the `record` is mapped to the reverse strand. +Query whether the `record.SEQ`uence is reverse complemented. """ -function isreverse(record::XAMRecord)::Bool +function isreversecomplemented(record::XAMRecord)::Bool return flags(record) & FLAG_REVERSE == FLAG_REVERSE end """ isforward(record::XAMRecord)::Bool -Query whether the `record` is mapped to the forward strand. +Query whether the `record.SEQ`uence is mapped to the forward strand. """ -function isforward(record::XAMRecord)::Bool - return flags(record) & FLAG_REVERSE == 0 +function isforwardstrand(record::XAMRecord)::Bool + # return flags(record) & FLAG_REVERSE == 0 + return !isreversecomplemented(record) # Note: this is an interpretation of FLAG_REVERSE. end """ ispositivestrand(record::XAMRecord)::Bool -Query whether `record` is aligned to the positive strand. +Query whether the `record.SEQ`uence is aligned to the positive strand. """ function ispositivestrand(record::XAMRecord)::Bool - return isforward(record) + return isforwardstrand(record) +end + +""" + isreversestrand(record::XAMRecord)::Bool + +Query whether the `record.SEQ`uence is aligned to the reverse strand. +""" +function isreversestrand(record::XAMRecord)::Bool + return isreversecomplemented(record) # Note: this is an interpretation of FLAG_REVERSE. end """ ispositivestrand(record::XAMRecord)::Bool -Query whether `record` is aligned to the negative strand. +Query whether the `record.SEQ`uence is aligned to the negative strand. """ function isnegativestrand(record::XAMRecord)::Bool - return isreverse(record) + return isreversestrand(record) +end + +""" + isnextreversecomplemented(record::XAMRecord)::Bool + +Query whether the next segment in the template is reverse complemented. +""" +function isnextreversecomplemented(record::XAMRecord)::Bool + return flags(record) & FLAG_NEXT_REVERSE == FLAG_NEXT_REVERSE end """ - ismatereverse(record::XAMRecord)::Bool + isfirstsegment(record::XAMRecord)::Bool -Query whether the `record`'s mate is mapped to the reverse strand. +Query whether the segemnt is first in the template. """ -function ismatereverse(record::XAMRecord)::Bool - return flags(record) & FLAG_MREVERSE == FLAG_MREVERSE +function isfirstsegment(record::XAMRecord)::Bool + return flags(record) & FLAG_FIRST_SEGMENT == FLAG_FIRST_SEGMENT end """ isread1(record::XAMRecord)::Bool -Query whether the `record` is read1. +From a paired-end sequencing point of view, query whether the read is read1. """ function isread1(record::XAMRecord)::Bool - return flags(record) & FLAG_READ1 == FLAG_READ1 + return isfirstsegment(record) +end + +""" + islastsegment(record::XAMRecord)::Bool + +Query whether the segemnt is last in the template. +""" +function islastsegment(record::XAMRecord)::Bool + return flags(record) & FLAG_LAST_SEGMENT == FLAG_LAST_SEGMENT end """ isread2(record::XAMRecord)::Bool -Query whether the `record` is read2. +From a paired-end sequencing point of view, query whether the read is read2. """ function isread2(record::XAMRecord)::Bool - return flags(record) & FLAG_READ2 == FLAG_READ2 + return islastsegment(record) end """ issecondaryalignment(record::XAMRecord)::Bool -Query whether the `record` is a secondary alignment. +Query whether the read is considered to be the secondary alignment. """ function issecondaryalignment(record::XAMRecord)::Bool return flags(record) & FLAG_SECONDARY == FLAG_SECONDARY end -""" - isprimaryalignment(record::XAMRecord)::Bool - -Query whether the `record` is the primary alignment. -""" -function isprimaryalignment(record::XAMRecord)::Bool - return flags(record) & FLAG_SECONDARY == 0 -end """ isqcfail(record::XAMRecord)::Bool -Query whether the `record` did not pass filters, such as platform/vendor quality controls. +Query whether the read failed filters, such as platform/vendor quality controls. """ function isqcfail(record::XAMRecord)::Bool return flags(record) & FLAG_QCFAIL == FLAG_QCFAIL @@ -199,28 +224,28 @@ end """ isduplicate(record::XAMRecord)::Bool -Query whether the `record` is a PCR or optical duplicate. +Query whether the read is a PCR or optical duplicate. """ function isduplicate(record::XAMRecord)::Bool - return flags(record) & FLAG_DUP == FLAG_DUP + return flags(record) & FLAG_DUPLICATE == FLAG_DUPLICATE end """ issupplementaryalignment(record::XAMRecord)::Bool -Query whether the `record` is a supplementary alignment. +Query whether the read alignment is considered to be a supplementary alignment. """ function issupplementaryalignment(record::XAMRecord)::Bool return flags(record) & FLAG_SUPPLEMENTARY == FLAG_SUPPLEMENTARY end """ - isprimary(record::XAMRecord)::Bool - -Query whether `record` is a primary line of the read. + isprimaryalignment(record::XAMRecord)::Bool -This is equivalent to `flags(record) & 0x900 == 0`. +Query whether the read alignment is considered to be the primary alignment. +This is primary line of the read and is equivalent to `flags(record) & 0x900 == 0`. """ -function isprimary(record::XAMRecord)::Bool +function isprimaryalignment(record::XAMRecord)::Bool + # return !issecondaryalignment(record) && !issupplementaryalignment(record) return flags(record) & 0x900 == 0 end diff --git a/src/sam/sam.jl b/src/sam/sam.jl index ad6e282..2c1718d 100644 --- a/src/sam/sam.jl +++ b/src/sam/sam.jl @@ -12,7 +12,7 @@ import BioGenerics.Automa: State import BioSequences import TranscodingStreams: TranscodingStreams, TranscodingStream import ..XAM: flags, XAMRecord, XAMReader, XAMWriter, - ismapped, isprimary, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. + ismapped, isprimaryalignment, ispositivestrand, isnextmapped #TODO: Deprecate import of flag queries. These were imported to preseve existing API. using Printf: @sprintf diff --git a/test/test_bam.jl b/test/test_bam.jl index 3bfa29d..ca515aa 100644 --- a/test/test_bam.jl +++ b/test/test_bam.jl @@ -54,8 +54,8 @@ record = BAM.Record() read!(reader, record) @test BAM.ismapped(record) - @test BAM.isprimary(record) - @test ! BAM.ispositivestrand(record) + @test BAM.isprimaryalignment(record) + @test !BAM.ispositivestrand(record) @test BAM.refname(record) == "CHROMOSOME_I" @test BAM.refid(record) === 1 @test BAM.hasnextrefid(record) diff --git a/test/test_sam.jl b/test/test_sam.jl index 837129d..4d4f4b3 100644 --- a/test/test_sam.jl +++ b/test/test_sam.jl @@ -54,7 +54,7 @@ @test isfilled(record) @test occursin(r"^XAM.SAM.Record:\n", repr(record)) @test SAM.ismapped(record) - @test SAM.isprimary(record) + @test SAM.isprimaryalignment(record) @test SAM.hastempname(record) @test SAM.tempname(record) == "r001" @test SAM.hasflags(record)