Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EVA-3434 Remove locus equality condition from DeprecatedVariantMongoReader #430

Merged
merged 3 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -107,18 +107,16 @@ protected List<Variant> getVariants(Document deprecatedVariant) {
Document inactiveEntity = inactiveObjects.iterator().next();
String contig = inactiveEntity.getString(VariantMongoAggregationReader.CONTIG_FIELD);
long start = inactiveEntity.getLong(VariantMongoAggregationReader.START_FIELD);
String type = inactiveEntity.getString(TYPE_FIELD);

for (Document submittedVariantOperation : submittedVariantOperations) {
Collection<Document> inactiveEntitySubmittedVariant = (Collection<Document>) submittedVariantOperation
.get("inactiveObjects");
Document submittedVariant = inactiveEntitySubmittedVariant.iterator().next();
long submittedVariantStart = submittedVariant.getLong(START_FIELD);
String submittedVariantContig = submittedVariant.getString(CONTIG_FIELD);
String reference = submittedVariant.getString("ref");
String alternate = submittedVariant.getString("alt");

if (isSameLocation(contig, start, submittedVariantContig, submittedVariantStart, type)) {
if (contig.equals(submittedVariantContig)) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed the condition for checking the start, but was not sure if we needed to remove the contig checking as well, so left it for now

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to also remove the contig condition.
It does not make sense to check the contig if we don't check the start.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed

// Since we only need evidence that at least one submitted variant agrees
// with the deprecated RS in locus, we just return one variant record per RS
Variant variantToReturn = new Variant(contig, start,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ public class DeprecatedVariantMongoReaderTest {

private static final int TAXONOMY_2 = 3703;

private static final int TAXONOMY_3 = 3704;

private static final int CHUNK_SIZE = 5;

@Autowired
Expand Down Expand Up @@ -121,17 +123,20 @@ public void testOnlySpecifiedTaxVariantsRead() {
DbsnpSubmittedVariantEntity ss1 = createSS(ASSEMBLY, TAXONOMY_1, 1L, 1L, 100L, "C", "A");
DbsnpSubmittedVariantEntity ss2 = createSS(ASSEMBLY, TAXONOMY_2, 2L, 2L, 101L, "A", "T");
DbsnpSubmittedVariantEntity ss3 = createSS(ASSEMBLY, TAXONOMY_2, 3L, 3L, 102L, "T", "G");
DbsnpClusteredVariantEntity rs1 = createRS(ss1, null);
DbsnpClusteredVariantEntity rs2 = createRS(ss2, null);
DbsnpClusteredVariantEntity rs3 = createRS(ss3, TAXONOMY_1);
this.mongoTemplate.insert(Stream.of(ss1, ss2, ss3).map(ss -> {
DbsnpSubmittedVariantEntity ss4 = createSS(ASSEMBLY, TAXONOMY_3, 4L, 4L, 103L, "A", "C");
DbsnpClusteredVariantEntity rs1 = createRS(ss1, null, null);
DbsnpClusteredVariantEntity rs2 = createRS(ss2, null, null);
DbsnpClusteredVariantEntity rs3 = createRS(ss3, TAXONOMY_1, null);
DbsnpClusteredVariantEntity rs4 = createRS(ss4, TAXONOMY_3, ss4.getStart() + 2);
// create RS
this.mongoTemplate.insert(Stream.of(ss1, ss2, ss3, ss4).map(ss -> {
DbsnpSubmittedVariantOperationEntity dbsnpSvoeObj = new DbsnpSubmittedVariantOperationEntity();
dbsnpSvoeObj.fill( EventType.UPDATED, ss.getAccession(),
"Declustered: None of the variant alleles match the reference allele.",
Arrays.asList(new DbsnpSubmittedVariantInactiveEntity(ss)));
return dbsnpSvoeObj;
}).collect(Collectors.toList()), DbsnpSubmittedVariantOperationEntity.class);
this.mongoTemplate.insert(Stream.of(rs1, rs2, rs3).map(rs -> {
this.mongoTemplate.insert(Stream.of(rs1, rs2, rs3, rs4).map(rs -> {
DbsnpClusteredVariantOperationEntity dbsnpCvoeObj = new DbsnpClusteredVariantOperationEntity();
dbsnpCvoeObj.fill( EventType.DEPRECATED, rs.getAccession(),
"Clustered variant completely declustered",
Expand All @@ -157,6 +162,13 @@ public void testOnlySpecifiedTaxVariantsRead() {
assertEquals(2, deprecatedVariants.size());
assertTrue(Arrays.asList("rs2", "rs3").contains(deprecatedVariants.get(0).getMainId()));
assertTrue(Arrays.asList("rs2", "rs3").contains(deprecatedVariants.get(1).getMainId()));

this.reader = new DeprecatedVariantMongoReader(ASSEMBLY, TAXONOMY_3, mongoClient, TEST_DB, CHUNK_SIZE,
new DbsnpCollectionNames());
this.reader.open(new ExecutionContext());
deprecatedVariants = this.readIntoList();
assertEquals(1, deprecatedVariants.size());
assertEquals("rs4", deprecatedVariants.get(0).getMainId());
}

private DbsnpSubmittedVariantEntity createSS(String assembly, int taxonomy, Long ssAccession, Long rsAccession,
Expand All @@ -166,13 +178,14 @@ private DbsnpSubmittedVariantEntity createSS(String assembly, int taxonomy, Long
false, false, 1);
}

private DbsnpClusteredVariantEntity createRS(SubmittedVariantEntity sve, Integer alternateTaxonomy) {
private DbsnpClusteredVariantEntity createRS(SubmittedVariantEntity sve, Integer alternateTaxonomy, Long start) {
Function<IClusteredVariant, String> hashingFunction = new ClusteredVariantSummaryFunction().andThen(
new SHA1HashingFunction());
int taxonomyToUse = Objects.isNull(alternateTaxonomy)? sve.getTaxonomyAccession(): alternateTaxonomy;
long startToUse = Objects.isNull(start) ? sve.getStart() : start;
ClusteredVariant cv = new ClusteredVariant(sve.getReferenceSequenceAccession(), taxonomyToUse,
sve.getContig(),
sve.getStart(),
startToUse,
new Variant(sve.getContig(), sve.getStart(), sve.getStart(),
sve.getReferenceAllele(),
sve.getAlternateAllele()).getType(),
Expand Down
Loading