diff --git a/pom.xml b/pom.xml
index f63b0907..48572f74 100644
--- a/pom.xml
+++ b/pom.xml
@@ -144,7 +144,7 @@
org.springframework.retry
spring-retry
- 1.2.5.RELEASE
+ 1.3.1
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
index 0bc6cdba..3f286d2e 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java
@@ -22,7 +22,9 @@
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.hateoas.config.EnableHypermediaSupport;
import org.springframework.retry.annotation.EnableRetry;
+import org.springframework.scheduling.annotation.EnableScheduling;
+@EnableScheduling
@SpringBootApplication
@EnableRetry
@EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL)
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
index 0b6f5bd7..2a1ae338 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java
@@ -19,11 +19,16 @@
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.data.jpa.repository.Modifying;
+import org.springframework.data.jpa.repository.Query;
+import org.springframework.data.repository.query.Param;
import org.springframework.stereotype.Repository;
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
+import java.util.List;
+
@Repository
public interface ChromosomeRepository extends JpaRepository {
@@ -35,6 +40,16 @@ public interface ChromosomeRepository extends JpaRepository findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request);
+ @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')")
+ Page findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable);
+
+ @Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''")
+ List findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();
+
+ @Modifying
+ @Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession")
+ void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum);
+
Page findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request);
Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request);
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java
new file mode 100644
index 00000000..fe7c8cd2
--- /dev/null
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java
@@ -0,0 +1,66 @@
+package uk.ac.ebi.eva.contigalias.scheduler;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.data.domain.PageRequest;
+import org.springframework.data.domain.Pageable;
+import org.springframework.data.domain.Slice;
+import org.springframework.scheduling.annotation.Scheduled;
+import org.springframework.stereotype.Component;
+import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
+import uk.ac.ebi.eva.contigalias.service.ChromosomeService;
+
+import java.util.List;
+
+@Component
+public class ChecksumSetter {
+ private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class);
+ private int DEFAULT_PAGE_SIZE = 10000;
+ private ChromosomeService chromosomeService;
+ private Md5ChecksumRetriever md5ChecksumRetriever;
+
+ @Autowired
+ public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) {
+ this.chromosomeService = chromosomeService;
+ this.md5ChecksumRetriever = md5ChecksumRetriever;
+ }
+
+ // @Scheduled(cron = "30 15 10 1 * ? 2023") -- the task to run at 10:15:30 AM on the 1st day of every month in the year 2023.
+ //Seconds: 30 Minutes: 15 Hours: 10 Day of the month: 1 Month: Every month Day of the week: Every day of the week Year: 2023
+ @Scheduled(initialDelay = 0, fixedDelay = 24 * 60 * 60 * 1000)
+ public void updateMd5CheckSumForAllAssemblies() {
+ List assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull();
+ for (String assembly : assemblyList) {
+ logger.info("Trying to update md5checksum for assembly: " + assembly);
+ updateMD5ChecksumForAllChromosomesInAssembly(assembly);
+ }
+ }
+
+ public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) {
+ int pageNumber = 0;
+ Pageable pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE);
+ Slice chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable);
+ while (chrSlice.hasContent()) {
+ List chromosomeEntityList = chrSlice.getContent();
+ updateMd5ChecksumForChromosome(chromosomeEntityList);
+
+ pageNumber++;
+ pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE);
+ chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable);
+ }
+ }
+
+ public void updateMd5ChecksumForChromosome(List chromosomesList) {
+ chromosomesList.parallelStream().forEach(chromosome -> {
+ try {
+ String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession());
+ chromosome.setMd5checksum(md5Checksum);
+ } catch (Exception e) {
+ logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession());
+ }
+ });
+
+ chromosomeService.updateMd5ChecksumForAll(chromosomesList);
+ }
+}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java
new file mode 100644
index 00000000..3b24ca0f
--- /dev/null
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java
@@ -0,0 +1,25 @@
+package uk.ac.ebi.eva.contigalias.scheduler;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.retry.annotation.Backoff;
+import org.springframework.retry.annotation.Retryable;
+import org.springframework.stereotype.Component;
+import org.springframework.web.client.RestTemplate;
+
+@Component
+public class Md5ChecksumRetriever {
+ private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class);
+ private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER";
+ private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata";
+ private RestTemplate restTemplate = new RestTemplate();
+
+ @Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2))
+ public String retrieveMd5Checksum(String insdcAccession) {
+ String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession);
+ JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class);
+ String md5Checksum = jsonResponse.get("metadata").get("md5").asText();
+ return md5Checksum;
+ }
+}
diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
index 93679963..898ae64b 100644
--- a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
+++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java
@@ -25,6 +25,7 @@
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository;
+import javax.transaction.Transactional;
import java.util.LinkedList;
import java.util.List;
@@ -54,6 +55,22 @@ public Page getChromosomesByAssemblyInsdcAccession(String asmI
return stripAssembliesFromChromosomes(chromosomes);
}
+ public List getAssembliesWhereChromosomeMd5ChecksumIsNull() {
+ return repository.findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty();
+ }
+
+ public Page getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(String asmInsdcAccession, Pageable request) {
+ Page chrPage = repository.findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(asmInsdcAccession, request);
+ return chrPage;
+ }
+
+ @Transactional
+ public void updateMd5ChecksumForAll(List chromosomeEntityList) {
+ for (ChromosomeEntity chromosome : chromosomeEntityList) {
+ repository.updateMd5ChecksumByInsdcAccession(chromosome.getAssembly().getInsdcAccession(), chromosome.getInsdcAccession(), chromosome.getMd5checksum());
+ }
+ }
+
public Page getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) {
Page chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request);
return stripAssembliesFromChromosomes(chromosomes);