From 1bd9dac0896f69bb4c5ca27d130ae5e95f97abb7 Mon Sep 17 00:00:00 2001 From: Diego Javier Zea Date: Wed, 26 Jun 2024 16:12:20 +0200 Subject: [PATCH] Deprecate buslje09 and BLMI using a filename and format --- NEWS.md | 8 +++++ Project.toml | 2 +- src/Information/CorrectedMutualInformation.jl | 12 ++++--- .../Information/CorrectedMutualInformation.jl | 31 ++++++++++++------- 4 files changed, 35 insertions(+), 18 deletions(-) diff --git a/NEWS.md b/NEWS.md index 214ee3bc..6c954789 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ ## MIToS.jl Release Notes +### Changes from v2.20.0 to v2.21.0 + +* *[Breaking change]* The `buslje09` and `BLMI` functions from the `Information` module does + not longer accept a filename and a file format as arguments. You should explicitly read + the MSA using the `read_file` function and then run the `buslje09` or `BLMI` functions + on the returned MSA object. As an example of migration, `buslje09("msa.sto", "Stockholm")` + should be replaced by `buslje09(read_file("msa.sto", Stockholm))`. + ### Changes from v2.19.0 to v2.20.0 * *[Breaking change]* The PDB module has deprecated `residues` and `@residues` in favor of diff --git a/Project.toml b/Project.toml index 50cd9bff..4b177772 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "MIToS" uuid = "51bafb47-8a16-5ded-8b04-24ef4eede0b5" -version = "2.20.0" +version = "2.21.0" [deps] ArgParse = "c7e460c6-2fb9-53a9-8c5b-16f535851c63" diff --git a/src/Information/CorrectedMutualInformation.jl b/src/Information/CorrectedMutualInformation.jl index 568af1c1..b7b47d5a 100644 --- a/src/Information/CorrectedMutualInformation.jl +++ b/src/Information/CorrectedMutualInformation.jl @@ -17,8 +17,8 @@ function _buslje09(aln, alphabet::A, clusters, lambda, apc) where A end """ -`buslje09` takes a MSA or a file and a `FileFormat` as first arguments. It calculates a Z score -and a corrected MI/MIp as described on **Busjle et. al. 2009**. +`buslje09` takes a MSA and calculates a Z score and a corrected MI/MIp as described +on **Busjle et. al. 2009**. keyword argument, type, default value and descriptions: @@ -68,6 +68,7 @@ function buslje09(aln::AbstractMatrix{Residue}; end function buslje09(filename::String, format::Type{T}; kargs...) where T <: FileFormat + @warn "Using a file name and format with `buslje09` is deprecated. Use `read_file` to read an MSA object and call `buslje09` on it." aln = read_file(filename, T, AnnotatedMultipleSequenceAlignment, generatemapping=true) buslje09(aln; kargs...) end @@ -92,9 +93,9 @@ function _BLMI(aln, clusters, alpha, beta, apc, lambda::Float64=0.0) end """ -`BLMI` takes a MSA or a file and a `FileFormat` as first arguments. It calculates a Z score -(ZBLMI) and a corrected MI/MIp as described on **Busjle et. al. 2009** but using using -BLOSUM62 pseudo frequencies instead of a fixed pseudocount. +`BLMI` takes an MSA and calculates a Z score (ZBLMI) and a corrected MI/MIp as described +on **Busjle et. al. 2009** but using using BLOSUM62 pseudo frequencies instead of a +fixed pseudocount. Keyword argument, type, default value and descriptions: @@ -143,6 +144,7 @@ function BLMI(aln::AbstractMatrix{Residue}; end function BLMI(filename::String, format::Type{T}; kargs...) where T <: FileFormat + @warn "Using a file name and format with `BLMI` is deprecated. Use `read_file` to read an MSA object and call `BLMI` on it." aln = read_file(filename, T, AnnotatedMultipleSequenceAlignment, generatemapping=true) BLMI(aln; kargs...) end diff --git a/test/Information/CorrectedMutualInformation.jl b/test/Information/CorrectedMutualInformation.jl index a525a1f7..38d446d1 100644 --- a/test/Information/CorrectedMutualInformation.jl +++ b/test/Information/CorrectedMutualInformation.jl @@ -225,8 +225,8 @@ @testset "Simple" begin data = readdlm(joinpath(DATA, "data_simple_soft_Busljeetal2009_measure_MI.txt"), comments=true) - results = buslje09(joinpath(DATA, "simple.fasta"), FASTA, - lambda=0.0, clustering=false, apc=false) + _msa = read_file(joinpath(DATA, "simple.fasta"), FASTA) + results = buslje09(_msa, lambda=0.0, clustering=false, apc=false) @test isapprox(Float64(data[1, SCORE]), results[MIToS_SCORE][1,2], atol=1e-6) @test isapprox(Float64(data[1, ZSCORE]), results[MIToS_ZSCORE][1,2], atol=2.) @@ -234,7 +234,8 @@ @testset "Gaoetal2011" begin data = readdlm(gao11_buslje09("MI"), comments=true) - results = buslje09(Gaoetal2011, FASTA, lambda=0.0, clustering=false, apc=false) + _msa = read_file(Gaoetal2011, FASTA) + results = buslje09(_msa, lambda=0.0, clustering=false, apc=false) @test isapprox([ Float64(x) for x in data[:,SCORE] ], matrix2list(results[MIToS_SCORE]), atol=1e-6) @@ -252,7 +253,8 @@ # 1.38629 = 0.693147 + 0.693147 end - result_0_05 = buslje09(Gaoetal2011,FASTA,lambda=0.05,clustering=false,apc=false) + _msa = read_file(Gaoetal2011, FASTA) + result_0_05 = buslje09(_msa,lambda=0.05,clustering=false,apc=false) @test isapprox(result_0_05[MIToS_SCORE][1,2], 0.33051006116310444, atol=1e-14) end end @@ -260,7 +262,8 @@ @testset "MI + clustering" begin data = readdlm(gao11_buslje09("MI_clustering"), comments=true) - results = buslje09(Gaoetal2011, FASTA, lambda=0.0, clustering=true, apc=false) + _msa = read_file(Gaoetal2011, FASTA) + results = buslje09(_msa, lambda=0.0, clustering=true, apc=false) @test isapprox([ Float64(x) for x in data[:,SCORE] ], matrix2list(results[MIToS_SCORE]), atol=1e-6) @@ -271,7 +274,8 @@ @testset "MIp" begin data = readdlm(gao11_buslje09("MI_APC"), comments=true) - results = buslje09(Gaoetal2011, FASTA, lambda=0.0, clustering=false, apc=true) + _msa = read_file(Gaoetal2011, FASTA) + results = buslje09(_msa, lambda=0.0, clustering=false, apc=true) @test isapprox([ Float64(x) for x in data[:,SCORE] ], matrix2list(results[MIToS_SCORE]), atol=1e-6) @@ -284,7 +288,8 @@ @testset "MIp + clustering" begin data = readdlm(gao11_buslje09("MI_APC_clustering"), comments=true) - results = buslje09(Gaoetal2011, FASTA, lambda=0.0, clustering=true, apc=true) + _msa = read_file(Gaoetal2011, FASTA) + results = buslje09(_msa, lambda=0.0, clustering=true, apc=true) @test isapprox([ Float64(x) for x in data[:,SCORE] ], matrix2list(results[MIToS_SCORE]), atol=1e-6) @@ -298,8 +303,9 @@ @testset "Simple" begin file = joinpath(DATA, "simple.fasta") - busl = buslje09(file, FASTA) - blmi = BLMI(file, FASTA) + msa = read_file(file, FASTA) + busl = buslje09(msa) + blmi = BLMI(msa) @test PairwiseListMatrices.getlist(busl[1]) ≈ PairwiseListMatrices.getlist(blmi[1]) @@ -309,7 +315,7 @@ @testset "Gaoetal2011" begin msa = read_file(Gaoetal2011, FASTA) - busl = buslje09(Gaoetal2011, FASTA, lambda=0.0, samples=0) + busl = buslje09(msa, lambda=0.0, samples=0) blmi = BLMI(msa, lambda=0.0, beta=0.0, samples=5) # BLMI should be equal to Buslje09 if beta is zero @@ -320,8 +326,9 @@ @testset "Gaoetal2011, lambda 0.05" begin - busl = buslje09(Gaoetal2011, FASTA, lambda=0.5, samples=0) - blmi = BLMI(Gaoetal2011, FASTA, lambda=0.5, beta=0.0, samples=5) + msa = read_file(Gaoetal2011, FASTA) + busl = buslje09(msa, lambda=0.5, samples=0) + blmi = BLMI(msa, lambda=0.5, beta=0.0, samples=5) # BLMI should be equal to Buslje09 if beta is zero @test PairwiseListMatrices.getlist(busl[2]) ≈