Skip to content

Commit

Permalink
Update to issues #501 & #473 - Include VariantEffect and geneSymbol a…
Browse files Browse the repository at this point in the history
…nnotations on ClinVarData at build time.
  • Loading branch information
julesjacobsen committed Nov 20, 2023
1 parent e2a00dc commit 7c9b847
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 18 deletions.
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
package org.monarchinitiative.exomiser.core.genome.dao;

import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.svart.GenomicInterval;
import org.monarchinitiative.svart.GenomicVariant;

import java.util.Map;


/**
* Interface for providing {@link ClinVarData} about a {@link Variant}.
* Interface for providing {@link ClinVarData} about a {@link GenomicVariant}.
*
* @since 14.0.0
*/
public interface ClinVarDao {

ClinVarData getClinVarData(Variant variant);

// List<ClinVarData> findClinVarRecordsOverlappingRegion(GenomicInterval genomicInterval);
//// Map<AlleleKey, PathogenicityData> pathogenicityData = mvStoreDao.getPathogenicityDataForRange(chr, min, max);
ClinVarData getClinVarData(GenomicVariant variant);

Map<GenomicVariant, ClinVarData> findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import org.h2.mvstore.MVStore;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;

/**
* @since 14.0.0
Expand All @@ -26,9 +29,60 @@ public ClinVarDaoMvStore(MVStore mvStore) {
}

@Override
public ClinVarData getClinVarData(@Nonnull Variant variant) {
public ClinVarData getClinVarData(@Nonnull GenomicVariant variant) {
AlleleProto.AlleleKey alleleKey = AlleleProtoAdaptor.toAlleleKey(variant);
return getClinVarData(alleleKey);
}

private ClinVarData getClinVarData(AlleleProto.AlleleKey alleleKey) {
AlleleProto.ClinVar clinVar = clinVarMap.get(alleleKey);
return clinVar == null ? ClinVarData.empty() : AlleleProtoAdaptor.toClinVarData(clinVar);
}

@Override
public Map<GenomicVariant, ClinVarData> findClinVarRecordsOverlappingInterval(GenomicInterval genomicInterval) {
Contig contig = genomicInterval.contig();

int chr = genomicInterval.contigId();
int start = genomicInterval.start();
int end = genomicInterval.end();

// build Allele keys for map and define bounds
AlleleProto.AlleleKey lowerBound = AlleleProto.AlleleKey.newBuilder()
.setChr(chr)
.setPosition(start)
.build();
AlleleProto.AlleleKey upperBound = AlleleProto.AlleleKey.newBuilder()
.setChr(chr)
.setPosition(end)
.build();

AlleleProto.AlleleKey floorKey = clinVarMap.floorKey(lowerBound);
if (floorKey != null && floorKey.getPosition() < lowerBound.getPosition()) {
lowerBound = floorKey;
}

Map<GenomicVariant, ClinVarData> results = new LinkedHashMap<>();

Iterator<AlleleProto.AlleleKey> keyIterator = clinVarMap.keyIterator(lowerBound);
while (keyIterator.hasNext()) {
AlleleProto.AlleleKey ak = keyIterator.next();
// don't process keys out of the initial boundaries
if (ak.getPosition() >= start && ak.getPosition() <= end) {
GenomicVariant gvFromAk = alleleKeyToGenomicVariant(ak, contig, genomicInterval.coordinateSystem());
ClinVarData cvData = getClinVarData(ak);
results.put(gvFromAk, cvData);
}
if (ak.getPosition() > upperBound.getPosition()) {
break;
}
}
return results;
}

private GenomicVariant alleleKeyToGenomicVariant(AlleleProto.AlleleKey alleleKey, Contig contig, CoordinateSystem coordinateSystem) {
return GenomicVariant.builder()
.variant(contig, Strand.POSITIVE, Coordinates.ofAllele(coordinateSystem, alleleKey.getPosition(), alleleKey.getRef()), alleleKey.getRef(), alleleKey.getAlt()).build();
}

}
Original file line number Diff line number Diff line change
@@ -1,28 +1,80 @@
package org.monarchinitiative.exomiser.core.genome.dao;

import org.h2.mvstore.MVMap;
import org.h2.mvstore.MVStore;
import org.junit.jupiter.api.Test;
import org.monarchinitiative.exomiser.core.genome.GenomeAssembly;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.AlleleProtoAdaptor;
import org.monarchinitiative.exomiser.core.model.Variant;
import org.monarchinitiative.exomiser.core.model.VariantEvaluation;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
import org.monarchinitiative.svart.CoordinateSystem;
import org.monarchinitiative.svart.Coordinates;
import org.monarchinitiative.svart.GenomicVariant;
import org.monarchinitiative.svart.Strand;

import java.util.Set;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;

class ClinVarDaoMvStoreTest {

private ClinVarDaoMvStore buildClinVarDaoMvStore(String... broadFormatVariants) {
// open an in-memory store
MVStore mvStore = new MVStore.Builder().open();
MVMap<AlleleProto.AlleleKey, AlleleProto.ClinVar> clinVarMap = MvStoreUtil.openClinVarMVMap(mvStore);
for (String variant : broadFormatVariants) {
clinVarMap.put(parseAlleleKey(variant), AlleleProto.ClinVar.newBuilder().build());
}
return new ClinVarDaoMvStore(mvStore);
}

private AlleleProto.AlleleKey parseAlleleKey(String broadFormatVariant) {
String[] fields = broadFormatVariant.split("-");
return AlleleProto.AlleleKey.newBuilder()
.setChr(Integer.parseInt(fields[0]))
.setPosition(Integer.parseInt(fields[1]))
.setRef(fields[2])
.setAlt(fields[3])
.build();
}

private GenomicVariant parseVariant(String broadFormatVariant) {
String[] fields = broadFormatVariant.split("-");
return GenomicVariant.of(GenomeAssembly.HG19.getContigById(Integer.parseInt(fields[0])), Strand.POSITIVE, Coordinates.ofAllele(CoordinateSystem.ONE_BASED, Integer.parseInt(fields[1]), fields[2]), fields[2], fields[3]);
}

private final AlleleProto.AlleleKey positionStartMinus1 = parseAlleleKey("1-1229-A-G");
private final AlleleProto.AlleleKey positionEndPlus1 = parseAlleleKey("1-1231-C-G");

// Variants directly at the boundaries:
private final AlleleProto.AlleleKey positionStartMinus2 = parseAlleleKey("1-1228-A-G");
private final AlleleProto.AlleleKey positionEndPlus2 = parseAlleleKey("1-1232-T-A");

// Variants just outside the boundaries:
private final AlleleProto.AlleleKey positionStartMinus3 = parseAlleleKey("1-1227-A-G");
private final AlleleProto.AlleleKey positionEndPlus3 = parseAlleleKey("1-1233-C-T");

// Variants are exactly matching on position:
private final AlleleProto.AlleleKey positionExactlyMatches = parseAlleleKey("1-1230-T-G");

// Variants are exactly matching on position + but ref and alt are different
private final AlleleProto.AlleleKey positionExactlyMatchesButDifferentRefAlt = parseAlleleKey("1-1230-T-C");

// Variants are far out of boundaries:
private final AlleleProto.AlleleKey positionFarOutOfBoundariesMinus = parseAlleleKey("1-7700-A-G");
private final AlleleProto.AlleleKey positionFarOutOfBoundariesPlus = parseAlleleKey("1-1-A-G");

// two variants that are in range but Alt.length and Ref.length > 1
private final AlleleProto.AlleleKey positionInButLengthIncorrect = parseAlleleKey("1-1229-AA-G");
private final AlleleProto.AlleleKey positionInButLengthIncorrect1 = parseAlleleKey("1-1231-A-GG");

@Test
void getClinVarData() {
try (MVStore mvStore = new MVStore.Builder().open()) {
var clinvarMap = MvStoreUtil.openClinVarMVMap(mvStore);
AlleleProto.AlleleKey alleleKey = AlleleProto.AlleleKey.newBuilder().setChr(1).setPosition(200).setRef("A").setAlt("T").build();
AlleleProto.AlleleKey alleleKey = parseAlleleKey("1-200-A-T");
AlleleProto.ClinVar clinVar = AlleleProto.ClinVar.newBuilder()
.setAlleleId("12345")
.setPrimaryInterpretation(AlleleProto.ClinVar.ClinSig.PATHOGENIC)
Expand All @@ -31,17 +83,104 @@ void getClinVarData() {
clinvarMap.put(alleleKey, clinVar);

ClinVarDao instance = new ClinVarDaoMvStore(mvStore);
Variant clinVarVariant = VariantEvaluation.builder()
.variant(GenomeAssembly.HG19.getContigById(1), Strand.POSITIVE, Coordinates.oneBased(200, 200), "A", "T")
.build();

GenomicVariant clinVarVariant = parseVariant("1-200-A-T");
assertThat(instance.getClinVarData(clinVarVariant), equalTo(AlleleProtoAdaptor.toClinVarData(clinVar)));

Variant nonClinVarVariant = VariantEvaluation.builder()
.variant(GenomeAssembly.HG19.getContigById(1), Strand.POSITIVE, Coordinates.oneBased(200, 200), "A", "A")
.build();

GenomicVariant nonClinVarVariant = parseVariant("1-200-A-A");
assertThat(instance.getClinVarData(nonClinVarVariant), equalTo(ClinVarData.empty()));
}
}

@Test
public void FiveInsideAndFourOutsideBoundaries() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore(
"1-1-A-G",
"1-1227-A-G", // -3
"1-1228-A-G", // -2
"1-1229-A-G", // -1
"1-1230-T-G", // 0 same ref
"1-1230-T-C", // 0 same ref
"1-1231-C-G", // +1
"1-1232-T-A", // +2
"1-1233-C-T", // +3
"1-7700-A-G"
);

GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(6));
}

@Test
public void variantsNonOverlap() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-1227-A-G", "1-1233-C-T");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(0));

}

@Test
public void variantsOverlapDirectlyAtBoundaries() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-1228-A-G", "1-1232-T-A");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(2));
}

@Test
public void variantsOverlapInsideBoundaries() {
// MVStore mvStore = newMvStore();
// MVMap<AlleleProto.AlleleKey, AlleleProto.ClinVar> clinVarMap = MvStoreUtil.openClinVarMVMap(mvStore);
// clinVarMap.put(parseAlleleKey("1-1229-A-G"), AlleleProto.ClinVar.newBuilder().build());
// clinVarMap.put(parseAlleleKey("1-1231-C-G"), AlleleProto.ClinVar.newBuilder().build());
// ClinVarDaoMvStore clinVarDao = new ClinVarDaoMvStore(mvStore);
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-1229-A-G", "1-1231-C-G");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(2));
}

@Test
public void variantsExactlyMatchOnPosition() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-1230-T-G");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant);
assertThat(result.keySet(), equalTo(Set.of(parseVariant("1-1230-T-G"))));
}

@Test
public void variantsAreFarOutOfBoundaries() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-7700-A-G", "1-1-A-G");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(0));

}

@Test
public void noVariantsGettingAddedNoOverlap() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore();
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(0));

}

@Test
public void setPositionInButLengthIncorrectTest() {
ClinVarDaoMvStore clinVarDao = buildClinVarDaoMvStore("1-1228-AA-G", "1-1231-A-GG");
GenomicVariant genomicVariant = parseVariant("1-1230-T-A");

var result = clinVarDao.findClinVarRecordsOverlappingInterval(genomicVariant.withPadding(2, 2));
assertThat(result.size(), equalTo(2));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.monarchinitiative.exomiser.core.genome.VariantAnnotator;
import org.monarchinitiative.exomiser.core.genome.dao.serialisers.MvStoreUtil;
import org.monarchinitiative.exomiser.core.model.ChromosomalRegionIndex;
import org.monarchinitiative.exomiser.core.model.TranscriptAnnotation;
import org.monarchinitiative.exomiser.core.model.VariantAnnotation;
import org.monarchinitiative.exomiser.core.model.pathogenicity.ClinVarData;
import org.monarchinitiative.exomiser.core.proto.AlleleProto;
Expand Down Expand Up @@ -90,10 +91,13 @@ private ClinVarData annotateClinvar(Allele allele) {
List<VariantAnnotation> variantAnnotations = variantAnnotator.annotate(genomicVariant);
if (!variantAnnotations.isEmpty()) {
VariantAnnotation variantAnnotation = variantAnnotations.get(0);
TranscriptAnnotation transcriptAnnotation = variantAnnotation.getTranscriptAnnotations().get(0);
return allele.getClinVarData()
.toBuilder()
.geneSymbol(variantAnnotation.getGeneSymbol())
.variantEffect(variantAnnotation.getVariantEffect())
// builder.hgvsCdna(transcriptAnnotation == null ? "" : transcriptAnnotation.getHgvsCdna()); // TODO need cDNA info for PS1/PM5
// builder.hgvsProtein(transcriptAnnotation == null ? "" : transcriptAnnotation.getHgvsProtein()); // TODO need AA info for PS1/PM5
.build();
}
return allele.getClinVarData();
Expand Down

0 comments on commit 7c9b847

Please sign in to comment.