diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java index a715a648..928291ad 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/repo/ChromosomeRepository.java @@ -33,15 +33,15 @@ @Repository public interface ChromosomeRepository extends JpaRepository { - Page findChromosomeEntitiesByInsdcAccession(String insdcAccession, Pageable request); + Page findChromosomeEntitiesByInsdcAccessionOrderByInsdcAccession(String insdcAccession, Pageable request); - Page findChromosomeEntitiesByRefseq(String refseq, Pageable request); + Page findChromosomeEntitiesByRefseqOrderByInsdcAccession(String refseq, Pageable request); - Page findChromosomeEntitiesByInsdcAccessionOrRefseq(String insdcAccession, String refseq, Pageable request); + Page findChromosomeEntitiesByInsdcAccessionOrRefseqOrderByInsdcAccession(String insdcAccession, String refseq, Pageable request); - Page findChromosomeEntitiesByAssembly_InsdcAccession(String asmInsdcAccession, Pageable request); + Page findChromosomeEntitiesByAssembly_InsdcAccessionOrderByInsdcAccession(String asmInsdcAccession, Pageable request); - @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '')") + @Query("SELECT c FROM ChromosomeEntity c WHERE c.assembly.insdcAccession = :asmInsdcAccession AND (c.md5checksum IS NULL OR c.md5checksum = '') ORDER BY c.insdcAccession") Page findChromosomeEntitiesByAssembly_InsdcAccessionAndMd5checksumIsNullOrEmpty(@Param("asmInsdcAccession") String asmInsdcAccession, Pageable pageable); @Query("SELECT distinct c.assembly.insdcAccession FROM ChromosomeEntity c WHERE c.md5checksum IS NULL OR c.md5checksum = ''") @@ -60,33 +60,33 @@ public interface ChromosomeRepository extends JpaRepository findChromosomeEntitiesByAssembly_Refseq(String asmRefseq, Pageable request); + Page findChromosomeEntitiesByAssembly_RefseqOrderByInsdcAccession(String asmRefseq, Pageable request); - Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(String genbankName, long asmTaxid, Pageable request); + Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly_TaxidOrderByInsdcAccession(String genbankName, long asmTaxid, Pageable request); - Page findChromosomeEntitiesByUcscNameAndAssembly_Taxid(String ucscName, long asmTaxid, - Pageable request); + Page findChromosomeEntitiesByUcscNameAndAssembly_TaxidOrderByInsdcAccession(String ucscName, long asmTaxid, + Pageable request); - Page findChromosomeEntitiesByEnaSequenceNameAndAssembly_Taxid(String enaName, long asmTaxid, - Pageable request); + Page findChromosomeEntitiesByEnaSequenceNameAndAssembly_TaxidOrderByInsdcAccession(String enaName, long asmTaxid, + Pageable request); - Page findChromosomeEntitiesByGenbankSequenceNameAndAssembly(String genbankName, AssemblyEntity assembly, - Pageable request); + Page findChromosomeEntitiesByGenbankSequenceNameAndAssemblyOrderByInsdcAccession(String genbankName, AssemblyEntity assembly, + Pageable request); - Page findChromosomeEntitiesByUcscNameAndAssembly(String ucscName, AssemblyEntity assembly, - Pageable request); + Page findChromosomeEntitiesByUcscNameAndAssemblyOrderByInsdcAccession(String ucscName, AssemblyEntity assembly, + Pageable request); - Page findChromosomeEntitiesByEnaSequenceNameAndAssembly(String enaName, AssemblyEntity assembly, - Pageable request); + Page findChromosomeEntitiesByEnaSequenceNameAndAssemblyOrderByInsdcAccession(String enaName, AssemblyEntity assembly, + Pageable request); - Page findChromosomeEntitiesByGenbankSequenceName(String genbankName, Pageable request); + Page findChromosomeEntitiesByGenbankSequenceNameOrderByInsdcAccession(String genbankName, Pageable request); - Page findChromosomeEntitiesByEnaSequenceName(String enaSequenceName, Pageable request); + Page findChromosomeEntitiesByEnaSequenceNameOrderByInsdcAccession(String enaSequenceName, Pageable request); - Page findChromosomeEntitiesByAssemblyInsdcAccessionOrAssemblyRefseq(String insdcAccession, String refseq, - Pageable request); + Page findChromosomeEntitiesByAssemblyInsdcAccessionOrAssemblyRefseqOrderByInsdcAccession(String insdcAccession, String refseq, + Pageable request); - Page findChromosomeEntitiesByUcscName(String ucscName, Pageable request); + Page findChromosomeEntitiesByUcscNameOrderByInsdcAccession(String ucscName, Pageable request); Page findChromosomeEntitiesByMd5checksumOrderByInsdcAccession(String md5Checksum, Pageable request); diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdater.java b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdater.java index b9db041d..61ad6cdd 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdater.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdater.java @@ -4,15 +4,16 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.jdbc.core.JdbcTemplate; -import org.springframework.jdbc.core.ResultSetExtractor; +import org.springframework.data.domain.Page; +import org.springframework.data.domain.PageRequest; +import org.springframework.data.domain.Pageable; import org.springframework.stereotype.Component; import org.springframework.web.client.RestTemplate; import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity; import uk.ac.ebi.eva.contigalias.service.ChromosomeService; -import java.util.ArrayList; import java.util.List; +import java.util.stream.Collectors; @Component public class MD5ChecksumUpdater { @@ -20,49 +21,46 @@ public class MD5ChecksumUpdater { private final int DEFAULT_BATCH_SIZE = 10000; private String INSDC_ACCESSION_PLACE_HOLDER = "INSDC_ACCESSION_PLACE_HOLDER"; private String INSDC_CHECKSUM_URL = "https://www.ebi.ac.uk/ena/cram/sequence/insdc:" + INSDC_ACCESSION_PLACE_HOLDER + "/metadata"; - private RestTemplate restTemplate; - private final JdbcTemplate jdbcTemplate; private final ChromosomeService chromosomeService; + private RestTemplate restTemplate; @Autowired - public MD5ChecksumUpdater(RestTemplate restTemplate, JdbcTemplate jdbcTemplate, ChromosomeService chromosomeService) { - this.restTemplate = restTemplate; - this.jdbcTemplate = jdbcTemplate; + public MD5ChecksumUpdater(ChromosomeService chromosomeService, RestTemplate restTemplate) { this.chromosomeService = chromosomeService; + this.restTemplate = restTemplate; } - public void updateMD5ChecksumForAssembly(String assembly) { + public void updateMD5ChecksumForAssembly(String accession) { + logger.info("Start Update MD5 Checksum for assembly : " + accession); try { - logger.info("Trying to update MD5 Checksum for assembly: " + assembly); - String sql = "select * from chromosome c where c.assembly_insdc_accession = '" + assembly - + "' AND (c.md5checksum IS NULL OR c.md5checksum = '')"; - jdbcTemplate.query(sql, (ResultSetExtractor) rs -> { - long chromosomeProcessed = 0; - List chromosomeEntityList = new ArrayList<>(); - while (rs.next()) { - ChromosomeEntity chromosome = new ChromosomeEntity(); - chromosome.setInsdcAccession(rs.getString(1)); - chromosomeEntityList.add(chromosome); + int pageNumber = 0; + Page chrPage; + long chromosomeProcessed = 0; + long chromosomeUpdated = 0; + do { + Pageable pageable = PageRequest.of(pageNumber, DEFAULT_BATCH_SIZE); + chrPage = chromosomeService.getChromosomesByAssemblyAccession(accession, pageable); - if (chromosomeEntityList.size() == DEFAULT_BATCH_SIZE) { - updateMd5ChecksumForChromosome(assembly, chromosomeEntityList); - chromosomeProcessed += chromosomeEntityList.size(); - logger.info("Chromosomes Processed till now: " + chromosomeProcessed); - chromosomeEntityList = new ArrayList<>(); - } - } - if (chromosomeEntityList.size() > 0) { - updateMd5ChecksumForChromosome(assembly, chromosomeEntityList); - chromosomeProcessed += chromosomeEntityList.size(); - logger.info("Chromosomes Processed till now: " + chromosomeProcessed); + List chromosomeEntityList = chrPage.getContent(); + List chromosomeEntitiesWithoutMD5 = chromosomeEntityList.stream() + .filter(c -> c.getMd5checksum() == null || c.getMd5checksum().isEmpty()) + .collect(Collectors.toList()); + + if(!chromosomeEntitiesWithoutMD5.isEmpty()){ + updateMd5ChecksumForChromosome(accession, chromosomeEntityList); } - logger.info("Finished updating MD5 Checksum for assembly: " + assembly); + chromosomeProcessed += chromosomeEntityList.size(); + chromosomeUpdated += chromosomeEntitiesWithoutMD5.size(); + logger.info("Chromosomes Processed till now: {}, selected for update till now: {}", chromosomeProcessed, chromosomeUpdated); + + pageNumber++; + } while (chrPage.hasNext()); + + logger.info("Finished updating MD5 Checksum for assembly: " + accession); - return null; - }); } catch (Exception e) { - logger.error("Error while updating MD5 Checksum for assembly : " + assembly + "\n" + e); + logger.error("Error while updating MD5 Checksum for assembly : " + accession + "\n" + e); } } diff --git a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java index 52dfd6ad..f30e27ae 100644 --- a/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java +++ b/src/main/java/uk/ac/ebi/eva/contigalias/service/ChromosomeService.java @@ -46,17 +46,17 @@ public ChromosomeService(ChromosomeRepository repository, JdbcTemplate jdbcTempl public Page getChromosomesByInsdcAccession(String insdcAccession, Pageable request) { - Page chromosomes = repository.findChromosomeEntitiesByInsdcAccession(insdcAccession, request); + Page chromosomes = repository.findChromosomeEntitiesByInsdcAccessionOrderByInsdcAccession(insdcAccession, request); return stripChromosomesAndScaffoldsFromAssembly(chromosomes); } public Page getChromosomesByRefseq(String refseq, Pageable request) { - Page chromosomes = repository.findChromosomeEntitiesByRefseq(refseq, request); + Page chromosomes = repository.findChromosomeEntitiesByRefseqOrderByInsdcAccession(refseq, request); return stripChromosomesAndScaffoldsFromAssembly(chromosomes); } public Page getChromosomesByAssemblyInsdcAccession(String asmInsdcAccession, Pageable request) { - Page chromosomes = repository.findChromosomeEntitiesByAssembly_InsdcAccession(asmInsdcAccession, request); + Page chromosomes = repository.findChromosomeEntitiesByAssembly_InsdcAccessionOrderByInsdcAccession(asmInsdcAccession, request); return stripAssembliesFromChromosomes(chromosomes); } @@ -84,17 +84,17 @@ public void updateENASequenceNameForAllChromosomeInAssembly(String assembly, Lis } public Page getChromosomesByAssemblyRefseq(String asmRefseq, Pageable request) { - Page chromosomes = repository.findChromosomeEntitiesByAssembly_Refseq(asmRefseq, request); + Page chromosomes = repository.findChromosomeEntitiesByAssembly_RefseqOrderByInsdcAccession(asmRefseq, request); return stripAssembliesFromChromosomes(chromosomes); } public List getAssembliesByChromosomeInsdcAccession(String chrInsdcAccession) { - Page page = repository.findChromosomeEntitiesByInsdcAccession(chrInsdcAccession, Pageable.unpaged()); + Page page = repository.findChromosomeEntitiesByInsdcAccessionOrderByInsdcAccession(chrInsdcAccession, Pageable.unpaged()); return extractAssembliesFromChromosomes(page); } public List getAssembliesByChromosomeRefseq(String chrRefseq) { - Page page = repository.findChromosomeEntitiesByRefseq(chrRefseq, Pageable.unpaged()); + Page page = repository.findChromosomeEntitiesByRefseqOrderByInsdcAccession(chrRefseq, Pageable.unpaged()); return extractAssembliesFromChromosomes(page); } @@ -111,64 +111,64 @@ public List extractAssembliesFromChromosomes(Page getChromosomesByName(String name, Pageable request) { - Page page = repository.findChromosomeEntitiesByGenbankSequenceName(name, request); + Page page = repository.findChromosomeEntitiesByGenbankSequenceNameOrderByInsdcAccession(name, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByNameAndAssemblyTaxid(String name, long asmTaxid, Pageable request) { - Page page = repository.findChromosomeEntitiesByGenbankSequenceNameAndAssembly_Taxid(name, asmTaxid, request); + Page page = repository.findChromosomeEntitiesByGenbankSequenceNameAndAssembly_TaxidOrderByInsdcAccession(name, asmTaxid, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByNameAndAssembly( String name, AssemblyEntity assembly, Pageable request) { - Page page = repository.findChromosomeEntitiesByGenbankSequenceNameAndAssembly(name, assembly, request); + Page page = repository.findChromosomeEntitiesByGenbankSequenceNameAndAssemblyOrderByInsdcAccession(name, assembly, request); assembly.setChromosomes(null); return injectAssemblyIntoChromosomes(page, assembly); } public Page getChromosomesByAssemblyAccession(String accession, Pageable request) { - Page chromosomes = repository.findChromosomeEntitiesByAssemblyInsdcAccessionOrAssemblyRefseq( + Page chromosomes = repository.findChromosomeEntitiesByAssemblyInsdcAccessionOrAssemblyRefseqOrderByInsdcAccession( accession, accession, request); return stripAssembliesFromChromosomes(chromosomes); } public Page getChromosomesByUcscName(String ucscName, Pageable request) { - Page page = repository.findChromosomeEntitiesByUcscName(ucscName, request); + Page page = repository.findChromosomeEntitiesByUcscNameOrderByInsdcAccession(ucscName, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByUcscNameAndAssemblyTaxid( String ucscName, long asmTaxid, Pageable request) { Page page - = repository.findChromosomeEntitiesByUcscNameAndAssembly_Taxid(ucscName, asmTaxid, request); + = repository.findChromosomeEntitiesByUcscNameAndAssembly_TaxidOrderByInsdcAccession(ucscName, asmTaxid, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByUcscNameAndAssembly(String ucscName, AssemblyEntity assembly, Pageable request) { Page page - = repository.findChromosomeEntitiesByUcscNameAndAssembly(ucscName, assembly, request); + = repository.findChromosomeEntitiesByUcscNameAndAssemblyOrderByInsdcAccession(ucscName, assembly, request); assembly.setChromosomes(null); return injectAssemblyIntoChromosomes(page, assembly); } public Page getChromosomesByEnaName(String enaName, Pageable request) { - Page page = repository.findChromosomeEntitiesByEnaSequenceName(enaName, request); + Page page = repository.findChromosomeEntitiesByEnaSequenceNameOrderByInsdcAccession(enaName, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByEnaNameAndAssemblyTaxid( String enaName, long asmTaxid, Pageable request) { Page page - = repository.findChromosomeEntitiesByEnaSequenceNameAndAssembly_Taxid(enaName, asmTaxid, request); + = repository.findChromosomeEntitiesByEnaSequenceNameAndAssembly_TaxidOrderByInsdcAccession(enaName, asmTaxid, request); return stripChromosomesAndScaffoldsFromAssembly(page); } public Page getChromosomesByEnaNameAndAssembly( String enaName, AssemblyEntity assembly, Pageable request) { Page page - = repository.findChromosomeEntitiesByEnaSequenceNameAndAssembly(enaName, assembly, request); + = repository.findChromosomeEntitiesByEnaSequenceNameAndAssemblyOrderByInsdcAccession(enaName, assembly, request); assembly.setChromosomes(null); return injectAssemblyIntoChromosomes(page, assembly); } @@ -211,7 +211,7 @@ private void stripAssemblyFromChromosome(ChromosomeEntity chromosome) { } public void putChromosomeChecksumsByAccession(String accession, String md5, String trunc512) { - Page page = repository.findChromosomeEntitiesByInsdcAccessionOrRefseq( + Page page = repository.findChromosomeEntitiesByInsdcAccessionOrRefseqOrderByInsdcAccession( accession, accession, Pageable.unpaged()); if (page.isEmpty()) { throw new IllegalArgumentException( diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdaterTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdaterTest.java index 6969b670..b490e6cc 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdaterTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/scheduler/MD5ChecksumUpdaterTest.java @@ -9,7 +9,6 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.test.context.SpringBootTest; import org.springframework.data.domain.PageRequest; -import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.test.annotation.DirtiesContext; import org.springframework.test.context.ActiveProfiles; import org.springframework.web.client.RestTemplate; @@ -34,15 +33,13 @@ class MD5ChecksumUpdaterTest { private AssemblyEntity assemblyEntity = AssemblyGenerator.generate(); private List chromosomeEntityList = new ArrayList<>(); @Autowired - private JdbcTemplate jdbcTemplate; - @Autowired private ChromosomeService chromosomeService; private MD5ChecksumUpdater md5ChecksumUpdater; @BeforeEach void setup() throws JsonProcessingException { RestTemplate restTemplate = mock(RestTemplate.class); - md5ChecksumUpdater = new MD5ChecksumUpdater(restTemplate, jdbcTemplate, chromosomeService); + md5ChecksumUpdater = new MD5ChecksumUpdater(chromosomeService, restTemplate); for (int i = 0; i < 5; i++) { ChromosomeEntity chromosomeEntity = ChromosomeGenerator.generate(assemblyEntity); chromosomeEntityList.add(chromosomeEntity); @@ -58,13 +55,13 @@ void setup() throws JsonProcessingException { @Test void testUpdateMD5ChecksumForAssembly() { chromosomeService.getChromosomesByAssemblyInsdcAccession(assemblyEntity.getInsdcAccession(), - PageRequest.of(0, 100)) + PageRequest.of(0, 10)) .forEach(c -> assertNull(c.getMd5checksum())); md5ChecksumUpdater.updateMD5ChecksumForAssembly(assemblyEntity.getInsdcAccession()); chromosomeService.getChromosomesByAssemblyInsdcAccession(assemblyEntity.getInsdcAccession(), - PageRequest.of(0, 100)) + PageRequest.of(0, 10)) .forEach(c -> assertEquals(c.getInsdcAccession() + "-MD5", c.getMd5checksum())); } } diff --git a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyAndChromosomeServiceIntegrationTest.java b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyAndChromosomeServiceIntegrationTest.java index b9447aaf..f0be306b 100644 --- a/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyAndChromosomeServiceIntegrationTest.java +++ b/src/test/java/uk/ac/ebi/eva/contigalias/service/AssemblyAndChromosomeServiceIntegrationTest.java @@ -205,8 +205,14 @@ void getChromosomesByNameAndAssemblySameNameSameTaxid() { assertNotNull(page); assertEquals(2, page.getTotalElements()); List entityList = page.get().collect(Collectors.toList()); - assertChromosomeEntityIdentical(chromosomeEntities[0], entityList.get(0)); - assertChromosomeEntityIdentical(chromosomeEntities[1], entityList.get(1)); + ChromosomeEntity first = entityList.stream() + .filter(c->c.getInsdcAccession()==chromosomeEntities[0].getInsdcAccession()) + .findFirst().get(); + ChromosomeEntity second = entityList.stream() + .filter(c->c.getInsdcAccession()==chromosomeEntities[1].getInsdcAccession()) + .findFirst().get(); + assertChromosomeEntityIdentical(chromosomeEntities[0], first); + assertChromosomeEntityIdentical(chromosomeEntities[1], second); } /**