diff --git a/pom.xml b/pom.xml index f63b0907..48572f74 100644 --- a/pom.xml +++ b/pom.xml @@ -144,7 +144,7 @@ org.springframework.retry spring-retry - 1.2.5.RELEASE + 1.3.1 diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java index 0bc6cdba..3f286d2e 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/ContigAliasApplication.java @@ -22,7 +22,9 @@ import org.springframework.boot.web.servlet.support.SpringBootServletInitializer; import org.springframework.hateoas.config.EnableHypermediaSupport; import org.springframework.retry.annotation.EnableRetry; +import org.springframework.scheduling.annotation.EnableScheduling; +@EnableScheduling @SpringBootApplication @EnableRetry @EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL) diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java index 0b6f5bd7..2a1ae338 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java @@ -19,11 +19,16 @@ import org.springframework.data.domain.Page; import org.springframework.data.domain.Pageable; import org.springframework.data.jpa.repository.JpaRepository; +import org.springframework.data.jpa.repository.Modifying; +import org.springframework.data.jpa.repository.Query; +import org.springframework.data.repository.query.Param; import org.springframework.stereotype.Repository; import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity; import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; +import java.util.List; + @Repository public interface ChromosomeRepository extends JpaRepository { @@ -35,6 +40,16 @@ public interface ChromosomeRepository extends JpaRepository findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request); + @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')") + Page findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable); + + @Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''") + List findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty(); + + @Modifying + @Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession") + void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum); + Page findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request); Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request); diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java new file mode 100644 index 00000000..fe7c8cd2 --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/ChecksumSetter.java @@ -0,0 +1,66 @@ +package uk.ac.ebi.eva.contigalias.scheduler; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; +import org.springframework.data.domain.Slice; +import org.springframework.scheduling.annotation.Scheduled; +import org.springframework.stereotype.Component; +import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; +import uk.ac.ebi.eva.contigalias.service.ChromosomeService; + +import java.util.List; + +@Component +public class ChecksumSetter { + private final Logger logger = LoggerFactory.getLogger(ChecksumSetter.class); + private int DEFAULT_PAGE_SIZE = 10000; + private ChromosomeService chromosomeService; + private Md5ChecksumRetriever md5ChecksumRetriever; + + @Autowired + public ChecksumSetter(ChromosomeService chromosomeService, Md5ChecksumRetriever md5ChecksumRetriever) { + this.chromosomeService = chromosomeService; + this.md5ChecksumRetriever = md5ChecksumRetriever; + } + + // @Scheduled(cron = "30 15 10 1 * ? 2023") -- the task to run at 10:15:30 AM on the 1st day of every month in the year 2023. + //Seconds: 30 Minutes: 15 Hours: 10 Day of the month: 1 Month: Every month Day of the week: Every day of the week Year: 2023 + @Scheduled(initialDelay = 0, fixedDelay = 24 * 60 * 60 * 1000) + public void updateMd5CheckSumForAllAssemblies() { + List assemblyList = chromosomeService.getAssembliesWhereChromosomeMd5ChecksumIsNull(); + for (String assembly : assemblyList) { + logger.info("Trying to update md5checksum for assembly: " + assembly); + updateMD5ChecksumForAllChromosomesInAssembly(assembly); + } + } + + public void updateMD5ChecksumForAllChromosomesInAssembly(String assembly) { + int pageNumber = 0; + Pageable pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE); + Slice chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable); + while (chrSlice.hasContent()) { + List chromosomeEntityList = chrSlice.getContent(); + updateMd5ChecksumForChromosome(chromosomeEntityList); + + pageNumber++; + pageable = PageRequest.of(pageNumber, DEFAULT_PAGE_SIZE); + chrSlice = chromosomeService.getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(assembly, pageable); + } + } + + public void updateMd5ChecksumForChromosome(List chromosomesList) { + chromosomesList.parallelStream().forEach(chromosome -> { + try { + String md5Checksum = md5ChecksumRetriever.retrieveMd5Checksum(chromosome.getInsdcAccession()); + chromosome.setMd5checksum(md5Checksum); + } catch (Exception e) { + logger.info("Could not retrieve md5Checksum for insdc accession: " + chromosome.getInsdcAccession()); + } + }); + + chromosomeService.updateMd5ChecksumForAll(chromosomesList); + } +} diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java new file mode 100644 index 00000000..3b24ca0f --- /dev/null +++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/Md5ChecksumRetriever.java @@ -0,0 +1,25 @@ +package uk.ac.ebi.eva.contigalias.scheduler; + +import com.fasterxml.jackson.databind.JsonNode; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.retry.annotation.Backoff; +import org.springframework.retry.annotation.Retryable; +import org.springframework.stereotype.Component; +import org.springframework.web.client.RestTemplate; + +@Component +public class Md5ChecksumRetriever { + private final Logger logger = LoggerFactory.getLogger(Md5ChecksumRetriever.class); + private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER"; + private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata"; + private RestTemplate restTemplate = new RestTemplate(); + + @Retryable(value = Exception.class, maxAttempts = 5, backoff = @Backoff(delay = 2000, multiplier = 2)) + public String retrieveMd5Checksum(String insdcAccession) { + String apiURL = INSDC_CHECKSUM_URL.replace(INSDC_ACCESSION_PLACE_HOLDER, insdcAccession); + JsonNode jsonResponse = restTemplate.getForObject(apiURL, JsonNode.class); + String md5Checksum = jsonResponse.get("metadata").get("md5").asText(); + return md5Checksum; + } +} diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java index 93679963..898ae64b 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java @@ -25,6 +25,7 @@ import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository; +import javax.transaction.Transactional; import java.util.LinkedList; import java.util.List; @@ -54,6 +55,22 @@ public Page getChromosomesByAssemblyInsdcAccession(String asmI return stripAssembliesFromChromosomes(chromosomes); } + public List getAssembliesWhereChromosomeMd5ChecksumIsNull() { + return repository.findAssembliesWhereChromosomeMd5checksumIsNullOrEmpty(); + } + + public Page getChromosomesByAssemblyInsdcAccessionWhereMd5ChecksumIsNull(String asmInsdcAccession, Pageable request) { + Page chrPage = repository.findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(asmInsdcAccession, request); + return chrPage; + } + + @Transactional + public void updateMd5ChecksumForAll(List chromosomeEntityList) { + for (ChromosomeEntity chromosome : chromosomeEntityList) { + repository.updateMd5ChecksumByInsdcAccession(chromosome.getAssembly().getInsdcAccession(), chromosome.getInsdcAccession(), chromosome.getMd5checksum()); + } + } + public Page getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) { Page chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request); return stripAssembliesFromChromosomes(chromosomes);