Skip to content

Commit

Permalink
scheduler for retrieving and saving md5 checksum
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Jan 19, 2024
1 parent 2464759 commit c79660e
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@
<dependency>
<groupId>org.springframework.retry</groupId>
<artifactId>spring-retry</artifactId>
<version>1.2.5.RELEASE</version>
<version>1.3.1</version>
</dependency>

</dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.hateoas.config.EnableHypermediaSupport;
import org.springframework.retry.annotation.EnableRetry;
import org.springframework.scheduling.annotation.EnableScheduling;

@EnableScheduling
@SpringBootApplication
@EnableRetry
@EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,16 @@
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Modifying;
import org.springframework.data.jpa.repository.Query;
import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;

import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;

import java.util.List;

@Repository
public interface ChromosomeRepository extends JpaRepository<ChromosomeEntity, Long> {

Expand All @@ -35,6 +40,16 @@ public interface ChromosomeRepository extends JpaRepository<ChromosomeEntity, Lo

Page<ChromosomeEntity> findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request);

@Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')")
Page<ChromosomeEntity> findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable);

@Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''")
List<String> findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();

@Modifying
@Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession")
void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum);

Page<ChromosomeEntity> findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request);

Page<ChromosomeEntity> findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
package uk.ac.ebi.eva.contigalias.scheduler;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.domain.Slice;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Component;
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
import uk.ac.ebi.eva.contigalias.service.ChromosomeService;

import java.util.List;

@Component
public class ChecksumSetter {
private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class);
private int DEFAULT_PAGE_SIZE = 10000;
private ChromosomeService chromosomeService;
private Md5ChecksumRetriever md5ChecksumRetriever;

@Autowired
public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) {
this.chromosomeService = chromosomeService;
this.md5ChecksumRetriever = md5ChecksumRetriever;
}

// @Scheduled(cron = "30 15 10 1 * ? 2023") -- the task to run at 10:15:30 AM on the 1st day of every month in the year 2023.
//Seconds: 30 Minutes: 15 Hours: 10 Day of the month: 1 Month: Every month Day of the week: Every day of the week Year: 2023
@Scheduled(initialDelay = 0, fixedDelay = 24 * 60 * 60 * 1000)
public void updateMd5CheckSumForAllAssemblies() {
List<String> assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull();
for (String assembly : assemblyList) {
logger.info("Trying to update md5checksum for assembly: " + assembly);
updateMD5ChecksumForAllChromosomesInAssembly(assembly);
}
}

public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) {
int pageNumber = 0;
Pageable pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE);
Slice<ChromosomeEntity> chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable);
while (chrSlice.hasContent()) {
List<ChromosomeEntity> chromosomeEntityList = chrSlice.getContent();
updateMd5ChecksumForChromosome(chromosomeEntityList);

pageNumber++;
pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE);
chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable);
}
}

public void updateMd5ChecksumForChromosome(List<ChromosomeEntity> chromosomesList) {
chromosomesList.parallelStream().forEach(chromosome -> {
try {
String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession());
chromosome.setMd5checksum(md5Checksum);
} catch (Exception e) {
logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession());
}
});

chromosomeService.updateMd5ChecksumForAll(chromosomesList);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package uk.ac.ebi.eva.contigalias.scheduler;

import com.fasterxml.jackson.databind.JsonNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.retry.annotation.Backoff;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Component;
import org.springframework.web.client.RestTemplate;

@Component
public class Md5ChecksumRetriever {
private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class);
private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER";
private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata";
private RestTemplate restTemplate = new RestTemplate();

@Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2))
public String retrieveMd5Checksum(String insdcAccession) {
String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession);
JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class);
String md5Checksum = jsonResponse.get("metadata").get("md5").asText();
return md5Checksum;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository;

import javax.transaction.Transactional;
import java.util.LinkedList;
import java.util.List;

Expand Down Expand Up @@ -54,6 +55,22 @@ public Page<ChromosomeEntity> getChromosomesByAssemblyInsdcAccession(String asmI
return stripAssembliesFromChromosomes(chromosomes);
}

public List<String> getAssembliesWhereChromosomeMd5ChecksumIsNull() {
return repository.findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();
}

public Page<ChromosomeEntity> getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(String asmInsdcAccession, Pageable request) {
Page<ChromosomeEntity> chrPage = repository.findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(asmInsdcAccession, request);
return chrPage;
}

@Transactional
public void updateMd5ChecksumForAll(List<ChromosomeEntity> chromosomeEntityList) {
for (ChromosomeEntity chromosome : chromosomeEntityList) {
repository.updateMd5ChecksumByInsdcAccession(chromosome.getAssembly().getInsdcAccession(), chromosome.getInsdcAccession(), chromosome.getMd5checksum());
}
}

public Page<ChromosomeEntity> getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) {
Page<ChromosomeEntity> chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request);
return stripAssembliesFromChromosomes(chromosomes);
Expand Down

0 comments on commit c79660e

Please sign in to comment.