Skip to content

Commit

Permalink
Add AbstractVariant.alleleKey field and alleleKey() accessor method
Browse files Browse the repository at this point in the history
Update AllelePropertiesDaoMvStore, ClinVarDaoMvStore, RemmDao and LocalFrequencyDao use memoised Variant.alleleKey()
  • Loading branch information
julesjacobsen committed Feb 7, 2024
1 parent 58755af commit 69402bf
Show file tree
Hide file tree
Showing 12 changed files with 125 additions and 88 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -52,43 +52,43 @@ public class Acmg2015EvidenceAssigner implements AcmgEvidenceAssigner {
// https://www.clinicalgenome.org/site/assets/files/3460/ba1_exception_list_07_30_2018.pdf
private static final Set<AlleleProto.AlleleKey> HG19_BA1_EXCLUSION_VARIANTS = Set.of(
// ClinVar 1018 - 3: 128598490 (GRCh37) 128879647 (GRCh38) (SPDI: NC_000003.12:128879647:TAAG:TAAGTAAG)
AlleleProto.AlleleKey.newBuilder().setChr(3).setPosition(128_598_490).setRef("C").setAlt("CTAAG").build(),
AlleleProtoAdaptor.toAlleleKey(3, 128_598_490, "C", "CTAAG"),
// ClinVar 17023 - 13: 20763612 (GRCh37) 20189473 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(13).setPosition(20_763_612).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(13, 20_763_612, "C", "T"),
// ClinVar 10 - 6: 26091179 (GRCh37) 26090951 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(6).setPosition(26_091_179).setRef("C").setAlt("G").build(),
AlleleProtoAdaptor.toAlleleKey(6, 26_091_179, "C", "G"),
// ClinVar 9 - 6: 26093141 (GRCh37) 26092913 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(6).setPosition(26_093_141).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(6, 26_093_141, "G", "A"),
// ClinVar 2551 - 16: 3299586 (GRCh37) 3249586 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(16).setPosition(3_299_586).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(16, 3_299_586, "G", "A"),
// ClinVar 2552 - 16: 3299468 (GRCh37) 3249468 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(16).setPosition(3_299_468).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(16, 3_299_468, "C", "T"),
// ClinVar 217689 - 13: 73409497 (GRCh37) 72835359 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(13).setPosition(73_409_497).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(13, 73_409_497, "G", "A"),
// ClinVar 3830 - 12: 121175678 (GRCh37) 120737875 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(12).setPosition(121_175_678).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(12, 121_175_678, "C", "T"),
// ClinVar 1900 - 3: 15686693 (GRCh37) 15645186 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(3).setPosition(15_686_693).setRef("G").setAlt("C").build()
AlleleProtoAdaptor.toAlleleKey(3, 15_686_693, "G", "C")
);
private static final Set<AlleleProto.AlleleKey> HG38_BA1_EXCLUSION_VARIANTS = Set.of(
// ClinVar 1018 - 3: 128598490 (GRCh37) 128879647 (GRCh38) (SPDI: NC_000003.12:128879647:TAAG:TAAGTAAG)
AlleleProto.AlleleKey.newBuilder().setChr(3).setPosition(128_879_647).setRef("C").setAlt("CTAAG").build(),
AlleleProtoAdaptor.toAlleleKey(3, 128_879_647, "C", "CTAAG"),
// ClinVar 17023 - 13: 20763612 (GRCh37) 20189473 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(13).setPosition(20_189_473).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(13, 20_189_473, "C", "T"),
// ClinVar 10 - 6: 26091179 (GRCh37) 26090951 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(6).setPosition(26_090_951).setRef("C").setAlt("G").build(),
AlleleProtoAdaptor.toAlleleKey(6, 26_090_951, "C", "G"),
// ClinVar 9 - 6: 26093141 (GRCh37) 26092913 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(6).setPosition(26_092_913).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(6, 26_092_913, "G", "A"),
// ClinVar 2551 - 16: 3299586 (GRCh37) 3249586 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(16).setPosition(3_249_586).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(16, 3_249_586, "G", "A"),
// ClinVar 2552 - 16: 3299468 (GRCh37) 3249468 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(16).setPosition(3_249_468).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(16, 3_249_468, "C", "T"),
// ClinVar 217689 - 13: 73409497 (GRCh37) 72835359 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(13).setPosition(72_835_359).setRef("G").setAlt("A").build(),
AlleleProtoAdaptor.toAlleleKey(13, 72_835_359, "G", "A"),
// ClinVar 3830 - 12: 121175678 (GRCh37) 120737875 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(12).setPosition(120_737_875).setRef("C").setAlt("T").build(),
AlleleProtoAdaptor.toAlleleKey(12, 120_737_875, "C", "T"),
// ClinVar 1900 - 3: 15686693 (GRCh37) 15645186 (GRCh38)
AlleleProto.AlleleKey.newBuilder().setChr(3).setPosition(15_645_186).setRef("G").setAlt("C").build()
AlleleProtoAdaptor.toAlleleKey(3, 15_645_186, "G", "C")
);

private final String probandId;
Expand Down Expand Up @@ -131,7 +131,8 @@ public AcmgEvidence assignVariantAcmgEvidence(VariantEvaluation variantEvaluatio
// Updated recommendation: "Allele frequency is >0.05 in any general continental population dataset of at least
// 2,000 observed alleles and found in a gene without a gene- or variant-specific BA1 modification." i.e. ExAC
// African, East Asian, European [non-Finnish], Latino, and South Asian
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variantEvaluation);
// AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variantEvaluation);
AlleleProto.AlleleKey alleleKey = variantEvaluation.alleleKey();
boolean isBa1ExcludedVariant = variantEvaluation.getGenomeAssembly() == GenomeAssembly.HG19 ? HG19_BA1_EXCLUSION_VARIANTS.contains(alleleKey) : HG38_BA1_EXCLUSION_VARIANTS.contains(alleleKey);
if (!isBa1ExcludedVariant && frequencyData.getMaxFreqForPopulation(FrequencySource.NON_FOUNDER_POPS) >= 5.0) {
acmgEvidenceBuilder.add(BA1);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import org.h2.mvstore.MVStore;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.exomiser.core.proto.AlleleProtoFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.cache.annotation.Cacheable;
Expand All @@ -50,24 +50,21 @@ public AllelePropertiesDaoMvStore(MVStore mvStore) {
}

@Caching(cacheable = {
@Cacheable(cacheNames = "hg19.allele", condition = "#genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.allele", condition = "#genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
@Cacheable(cacheNames = "hg19.allele", key = "#alleleKey", condition = "#genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.allele", key = "#alleleKey", condition = "#genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
})
@Override
public AlleleProto.AlleleProperties getAlleleProperties(AlleleProto.AlleleKey alleleKey, GenomeAssembly genomeAssembly) {
AlleleProto.AlleleProperties alleleProperties = map.getOrDefault(alleleKey, AlleleProto.AlleleProperties.getDefaultInstance());
logger.debug("{} {}", alleleKey, alleleProperties);
if (logger.isDebugEnabled()) {
logger.debug("{} {}", AlleleProtoFormatter.format(alleleKey), AlleleProtoFormatter.format(alleleProperties));
}
return alleleProperties;
}

@Caching(cacheable = {
@Cacheable(cacheNames = "hg19.allele", keyGenerator = "variantKeyGenerator", condition = "T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19.containsContig(#variant.contig())"),
@Cacheable(cacheNames = "hg38.allele", keyGenerator = "variantKeyGenerator", condition = "T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38.containsContig(#variant.contig())"),
})
@Override
public AlleleProto.AlleleProperties getAlleleProperties(Variant variant) {
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variant);
return getAlleleProperties(alleleKey, variant.getGenomeAssembly());
return getAlleleProperties(variant.alleleKey(), variant.getGenomeAssembly());
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.monarchinitiative.exomiser.core.genome.dao;

import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.GenomicInterval;
import org.monarchinitiative.svart.GenomicVariant;

Expand All @@ -14,7 +16,11 @@
*/
public interface ClinVarDao {

ClinVarData getClinVarData(GenomicVariant variant);
ClinVarData getClinVarData(Variant variant);

ClinVarData getClinVarData(GenomicVariant genomicVariant);

ClinVarData getClinVarData(AlleleProto.AlleleKey alleleKey);

Map<GenomicVariant, ClinVarData> findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.h2.mvstore.MVStore;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.*;
Expand All @@ -29,12 +30,16 @@ public ClinVarDaoMvStore(MVStore mvStore) {
}

@Override
public ClinVarData getClinVarData(@Nonnull GenomicVariant variant) {
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variant);
return getClinVarData(alleleKey);
public ClinVarData getClinVarData(@Nonnull Variant variant) {
return getClinVarData(variant.alleleKey());
}

private ClinVarData getClinVarData(AlleleProto.AlleleKey alleleKey) {
@Override
public ClinVarData getClinVarData(@Nonnull GenomicVariant genomicVariant) {
return getClinVarData(AlleleProtoAdaptor.toAlleleKey(genomicVariant));
}

public ClinVarData getClinVarData(@Nonnull AlleleProto.AlleleKey alleleKey) {
AlleleProto.ClinVar clinVar = clinVarMap.get(alleleKey);
return clinVar == null ? ClinVarData.empty() : AlleleProtoAdaptor.toClinVarData(clinVar);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ private InMemoryVariantWhiteList(Set<AlleleProto.AlleleKey> whiteList) {

@Override
public boolean contains(Variant variant) {
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variant);
AlleleProto.AlleleKey alleleKey = variant.alleleKey();
return whiteList.contains(alleleKey);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ public LocalFrequencyDao(TabixDataSource localFrequencyTabixDataSource) {
}

@Caching(cacheable = {
@Cacheable(cacheNames = "hg19.local", keyGenerator = "variantKeyGenerator", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.local", keyGenerator = "variantKeyGenerator", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
@Cacheable(cacheNames = "hg19.local", key = "#variant.alleleKey()", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.local", key = "#variant.alleleKey()", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
})
@Override
public FrequencyData getFrequencyData(Variant variant) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@

import java.io.IOException;

import static org.monarchinitiative.svart.VariantType.INS;

/**
*
* @author Jules Jacobsen <[email protected]>
Expand All @@ -52,8 +54,8 @@ public RemmDao(TabixDataSource remmTabixDataSource) {
}

@Caching(cacheable = {
@Cacheable(cacheNames = "hg19.remm", keyGenerator = "variantKeyGenerator", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.remm", keyGenerator = "variantKeyGenerator", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
@Cacheable(cacheNames = "hg19.remm", key = "#variant.alleleKey()", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG19"),
@Cacheable(cacheNames = "hg38.remm", key = "#variant.alleleKey()", condition = "#variant.genomeAssembly == T(org.monarchinitiative.exomiser.core.genome.GenomeAssembly).HG38"),
})
@Override
public PathogenicityData getPathogenicityData(Variant variant) {
Expand All @@ -62,46 +64,13 @@ public PathogenicityData getPathogenicityData(Variant variant) {
if (variant.getVariantEffect() == VariantEffect.MISSENSE_VARIANT) {
return PathogenicityData.empty();
}
return processResults(variant);
}

private PathogenicityData processResults(Variant variant) {
String chromosome = variant.contigName();
int start = variant.start();
int end = calculateEndPosition(variant);
// test bases either side of insertion or all bases of a SNV/MNV/DEL
int end = variant.variantType() == INS ? variant.end() + 1 : variant.end();
return getRemmData(chromosome, start, end);
}

private int calculateEndPosition(Variant variant) {
int pos = variant.start();

//we're doing this here in order not to have to count all this each time we need the value
int refLength = variant.ref().length();
int altLength = variant.alt().length();
//What about MNV?
if (refLength == altLength) {
return pos;
}
//these end positions are calculated according to recommendation by Max and Peter who produced the REMM score
//don't change this unless they say.
if (isDeletion(refLength, altLength)) {
// test all deleted bases (being 1-based we need to correct the length)
return pos + refLength - 1;
} else if (isInsertion(refLength, altLength)) {
// test bases either side of insertion
return pos + 1;
}
return pos;
}

private static boolean isDeletion(int refLength, int altLength) {
return refLength > altLength;
}

private static boolean isInsertion(int refLength, int altLength) {
return refLength < altLength;
}

private synchronized PathogenicityData getRemmData(String chromosome, int start, int end) {
try {
float score = Float.NaN;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import de.charite.compbio.jannovar.annotation.VariantEffect;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.*;

import java.util.List;
Expand All @@ -34,13 +35,16 @@ public abstract class AbstractVariant extends BaseGenomicVariant<AbstractVariant

final GenomeAssembly genomeAssembly;

final AlleleProto.AlleleKey alleleKey;

final String geneSymbol;
final String geneId;
final VariantEffect variantEffect;
final List<TranscriptAnnotation> annotations;

AbstractVariant(Builder<?> builder) {
super(builder);
this.alleleKey = AlleleProtoAdaptor.toAlleleKey(this);
this.genomeAssembly = builder.genomeAssembly;
this.geneSymbol = builder.geneSymbol;
this.geneId = builder.geneId;
Expand All @@ -50,13 +54,19 @@ public abstract class AbstractVariant extends BaseGenomicVariant<AbstractVariant

AbstractVariant(Contig contig, String id, Strand strand, Coordinates coordinates, String ref, String alt, int changeLength, GenomeAssembly genomeAssembly, String geneSymbol, String geneId, VariantEffect variantEffect, List<TranscriptAnnotation> annotations) {
super(contig, id, strand, coordinates, ref, alt, changeLength, "", "");
this.alleleKey = AlleleProtoAdaptor.toAlleleKey(this);
this.genomeAssembly = genomeAssembly;
this.geneSymbol = geneSymbol;
this.geneId = geneId;
this.variantEffect = variantEffect;
this.annotations = List.copyOf(annotations);
}

@Override
public AlleleProto.AlleleKey alleleKey() {
return alleleKey;
}

public String getGeneSymbol() {
return geneSymbol;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,14 +101,17 @@ private AlleleProtoAdaptor() {
//un-instantiable utility class
}

// This would make sense to have this here rather than having similar functionality in the MvStoreUtil
// and the VariantKeyGenerator

public static AlleleKey toAlleleKey(GenomicVariant variant) {
return toAlleleKey(variant.contigId(), variant.start(), variant.ref(), variant.alt());
}

public static AlleleKey toAlleleKey(int contigId, int start, String ref, String alt) {
return AlleleKey.newBuilder()
.setChr(variant.contigId())
.setPosition(variant.start())
.setRef(variant.ref())
.setAlt(variant.alt())
.setChr(contigId)
.setPosition(start)
.setRef(ref)
.setAlt(alt)
.build();
}

Expand All @@ -118,12 +121,14 @@ public static FrequencyData toFrequencyData(AlleleProperties alleleProperties) {
}
FrequencyData.Builder frequencyDataBuilder = FrequencyData.builder()
.rsId(alleleProperties.getRsId());
parseFrequencyData(frequencyDataBuilder, alleleProperties.getFrequenciesList());
parseFrequencyData(frequencyDataBuilder, alleleProperties);
return frequencyDataBuilder.build();
}

private static void parseFrequencyData(FrequencyData.Builder frequencyDataBuilder, List<AlleleProto.Frequency> frequenciesList) {
for (AlleleProto.Frequency frequency : frequenciesList) {
private static void parseFrequencyData(FrequencyData.Builder frequencyDataBuilder, AlleleProperties alleleProperties) {
int freqsCount = alleleProperties.getFrequenciesCount();
for (int i = 0; i < freqsCount; i++) {
AlleleProto.Frequency frequency = alleleProperties.getFrequencies(i);
var freqSource = toFreqSource(frequency.getFrequencySource());
var freq = Frequency.percentageFrequency(frequency.getAc(), frequency.getAn());
var hom = frequency.getHom();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@
*/
package org.monarchinitiative.exomiser.core.model;

import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.GenomicVariant;

/**
Expand All @@ -40,4 +42,7 @@ default GenomeAssembly getGenomeAssembly() {
return GenomeAssembly.assemblyOfContig(contig());
}

@JsonIgnore
AlleleProto.AlleleKey alleleKey();

}
Loading

0 comments on commit 69402bf

Please sign in to comment.