From f83ef53a5b0c66179fd34e3c6b1934b66148a1f3 Mon Sep 17 00:00:00 2001 From: nkumar2 Date: Thu, 22 Feb 2024 15:41:54 +0000 Subject: [PATCH 1/3] Remove locus equality condition from DeprecatedVariantMongoReader --- .../DeprecatedVariantMongoReader.java | 4 +-- .../DeprecatedVariantMongoReaderTest.java | 27 ++++++++++++++----- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java index a1c61185a..51a0f7560 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java @@ -107,18 +107,16 @@ protected List getVariants(Document deprecatedVariant) { Document inactiveEntity = inactiveObjects.iterator().next(); String contig = inactiveEntity.getString(VariantMongoAggregationReader.CONTIG_FIELD); long start = inactiveEntity.getLong(VariantMongoAggregationReader.START_FIELD); - String type = inactiveEntity.getString(TYPE_FIELD); for (Document submittedVariantOperation : submittedVariantOperations) { Collection inactiveEntitySubmittedVariant = (Collection) submittedVariantOperation .get("inactiveObjects"); Document submittedVariant = inactiveEntitySubmittedVariant.iterator().next(); - long submittedVariantStart = submittedVariant.getLong(START_FIELD); String submittedVariantContig = submittedVariant.getString(CONTIG_FIELD); String reference = submittedVariant.getString("ref"); String alternate = submittedVariant.getString("alt"); - if (isSameLocation(contig, start, submittedVariantContig, submittedVariantStart, type)) { + if (contig.equals(submittedVariantContig)) { // Since we only need evidence that at least one submitted variant agrees // with the deprecated RS in locus, we just return one variant record per RS Variant variantToReturn = new Variant(contig, start, diff --git a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java index 2fb3ff4e8..e80c7fdc6 100644 --- a/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java +++ b/eva-accession-release/src/test/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReaderTest.java @@ -76,6 +76,8 @@ public class DeprecatedVariantMongoReaderTest { private static final int TAXONOMY_2 = 3703; + private static final int TAXONOMY_3 = 3704; + private static final int CHUNK_SIZE = 5; @Autowired @@ -121,17 +123,20 @@ public void testOnlySpecifiedTaxVariantsRead() { DbsnpSubmittedVariantEntity ss1 = createSS(ASSEMBLY, TAXONOMY_1, 1L, 1L, 100L, "C", "A"); DbsnpSubmittedVariantEntity ss2 = createSS(ASSEMBLY, TAXONOMY_2, 2L, 2L, 101L, "A", "T"); DbsnpSubmittedVariantEntity ss3 = createSS(ASSEMBLY, TAXONOMY_2, 3L, 3L, 102L, "T", "G"); - DbsnpClusteredVariantEntity rs1 = createRS(ss1, null); - DbsnpClusteredVariantEntity rs2 = createRS(ss2, null); - DbsnpClusteredVariantEntity rs3 = createRS(ss3, TAXONOMY_1); - this.mongoTemplate.insert(Stream.of(ss1, ss2, ss3).map(ss -> { + DbsnpSubmittedVariantEntity ss4 = createSS(ASSEMBLY, TAXONOMY_3, 4L, 4L, 103L, "A", "C"); + DbsnpClusteredVariantEntity rs1 = createRS(ss1, null, null); + DbsnpClusteredVariantEntity rs2 = createRS(ss2, null, null); + DbsnpClusteredVariantEntity rs3 = createRS(ss3, TAXONOMY_1, null); + DbsnpClusteredVariantEntity rs4 = createRS(ss4, TAXONOMY_3, ss4.getStart() + 2); + // create RS + this.mongoTemplate.insert(Stream.of(ss1, ss2, ss3, ss4).map(ss -> { DbsnpSubmittedVariantOperationEntity dbsnpSvoeObj = new DbsnpSubmittedVariantOperationEntity(); dbsnpSvoeObj.fill( EventType.UPDATED, ss.getAccession(), "Declustered: None of the variant alleles match the reference allele.", Arrays.asList(new DbsnpSubmittedVariantInactiveEntity(ss))); return dbsnpSvoeObj; }).collect(Collectors.toList()), DbsnpSubmittedVariantOperationEntity.class); - this.mongoTemplate.insert(Stream.of(rs1, rs2, rs3).map(rs -> { + this.mongoTemplate.insert(Stream.of(rs1, rs2, rs3, rs4).map(rs -> { DbsnpClusteredVariantOperationEntity dbsnpCvoeObj = new DbsnpClusteredVariantOperationEntity(); dbsnpCvoeObj.fill( EventType.DEPRECATED, rs.getAccession(), "Clustered variant completely declustered", @@ -157,6 +162,13 @@ public void testOnlySpecifiedTaxVariantsRead() { assertEquals(2, deprecatedVariants.size()); assertTrue(Arrays.asList("rs2", "rs3").contains(deprecatedVariants.get(0).getMainId())); assertTrue(Arrays.asList("rs2", "rs3").contains(deprecatedVariants.get(1).getMainId())); + + this.reader = new DeprecatedVariantMongoReader(ASSEMBLY, TAXONOMY_3, mongoClient, TEST_DB, CHUNK_SIZE, + new DbsnpCollectionNames()); + this.reader.open(new ExecutionContext()); + deprecatedVariants = this.readIntoList(); + assertEquals(1, deprecatedVariants.size()); + assertEquals("rs4", deprecatedVariants.get(0).getMainId()); } private DbsnpSubmittedVariantEntity createSS(String assembly, int taxonomy, Long ssAccession, Long rsAccession, @@ -166,13 +178,14 @@ private DbsnpSubmittedVariantEntity createSS(String assembly, int taxonomy, Long false, false, 1); } - private DbsnpClusteredVariantEntity createRS(SubmittedVariantEntity sve, Integer alternateTaxonomy) { + private DbsnpClusteredVariantEntity createRS(SubmittedVariantEntity sve, Integer alternateTaxonomy, Long start) { Function hashingFunction = new ClusteredVariantSummaryFunction().andThen( new SHA1HashingFunction()); int taxonomyToUse = Objects.isNull(alternateTaxonomy)? sve.getTaxonomyAccession(): alternateTaxonomy; + long startToUse = Objects.isNull(start) ? sve.getStart() : start; ClusteredVariant cv = new ClusteredVariant(sve.getReferenceSequenceAccession(), taxonomyToUse, sve.getContig(), - sve.getStart(), + startToUse, new Variant(sve.getContig(), sve.getStart(), sve.getStart(), sve.getReferenceAllele(), sve.getAlternateAllele()).getType(), From 4bbf2066781c756caca3c9ed206789c427f60ba3 Mon Sep 17 00:00:00 2001 From: nkumar2 Date: Sun, 25 Feb 2024 23:27:20 +0000 Subject: [PATCH 2/3] review comment --- .../DeprecatedVariantMongoReader.java | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java index 51a0f7560..c77dec321 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java @@ -107,27 +107,19 @@ protected List getVariants(Document deprecatedVariant) { Document inactiveEntity = inactiveObjects.iterator().next(); String contig = inactiveEntity.getString(VariantMongoAggregationReader.CONTIG_FIELD); long start = inactiveEntity.getLong(VariantMongoAggregationReader.START_FIELD); - - for (Document submittedVariantOperation : submittedVariantOperations) { - Collection inactiveEntitySubmittedVariant = (Collection) submittedVariantOperation - .get("inactiveObjects"); - Document submittedVariant = inactiveEntitySubmittedVariant.iterator().next(); - String submittedVariantContig = submittedVariant.getString(CONTIG_FIELD); - String reference = submittedVariant.getString("ref"); - String alternate = submittedVariant.getString("alt"); - - if (contig.equals(submittedVariantContig)) { - // Since we only need evidence that at least one submitted variant agrees - // with the deprecated RS in locus, we just return one variant record per RS - Variant variantToReturn = new Variant(contig, start, - start + Math.max(reference.length(), alternate.length()) - 1, - reference, alternate); - variantToReturn.setMainId("rs" + deprecatedVariant.getLong("accession")); - return Arrays.asList(variantToReturn); - } - } - - return new ArrayList<>(); + Collection inactiveEntitySubmittedVariant = (Collection) submittedVariantOperations + .iterator().next().get("inactiveObjects"); + Document submittedVariant = inactiveEntitySubmittedVariant.iterator().next(); + String reference = submittedVariant.getString("ref"); + String alternate = submittedVariant.getString("alt"); + + // Since we only need evidence that at least one submitted variant agrees + // with the deprecated RS in locus, we just return one variant record per RS + Variant variantToReturn = new Variant(contig, start, + start + Math.max(reference.length(), alternate.length()) - 1, + reference, alternate); + variantToReturn.setMainId("rs" + deprecatedVariant.getLong("accession")); + return Arrays.asList(variantToReturn); } @Override From ecefe1525e6d8a26c97cfbc1977dbe3fef909e83 Mon Sep 17 00:00:00 2001 From: nkumar2 Date: Mon, 26 Feb 2024 15:29:04 +0000 Subject: [PATCH 3/3] review comment --- .../batch/io/deprecated/DeprecatedVariantMongoReader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java index c77dec321..041ce166f 100644 --- a/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java +++ b/eva-accession-release/src/main/java/uk/ac/ebi/eva/accession/release/batch/io/deprecated/DeprecatedVariantMongoReader.java @@ -107,14 +107,14 @@ protected List getVariants(Document deprecatedVariant) { Document inactiveEntity = inactiveObjects.iterator().next(); String contig = inactiveEntity.getString(VariantMongoAggregationReader.CONTIG_FIELD); long start = inactiveEntity.getLong(VariantMongoAggregationReader.START_FIELD); + // Since we only need evidence that at least one submitted variant agrees with the deprecated RS, + // we just return one variant record per RS Collection inactiveEntitySubmittedVariant = (Collection) submittedVariantOperations .iterator().next().get("inactiveObjects"); Document submittedVariant = inactiveEntitySubmittedVariant.iterator().next(); String reference = submittedVariant.getString("ref"); String alternate = submittedVariant.getString("alt"); - // Since we only need evidence that at least one submitted variant agrees - // with the deprecated RS in locus, we just return one variant record per RS Variant variantToReturn = new Variant(contig, start, start + Math.max(reference.length(), alternate.length()) - 1, reference, alternate);