Skip to content

Commit

Permalink
scheduled jobs for ena sequence name and md5 checksum retriever
Browse files Browse the repository at this point in the history
  • Loading branch information
nitin-ebi committed Feb 9, 2024
1 parent dadb928 commit 5515132
Show file tree
Hide file tree
Showing 21 changed files with 456 additions and 327 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

@RequestMapping("/v1/admin")
@RestController
Expand Down Expand Up @@ -101,24 +99,35 @@ public ResponseEntity<String> retrieveAndInsertMd5ChecksumForAssembly(@PathVaria
handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
return ResponseEntity.ok("A task has been submitted for updating md5checksum for all chromosomes " +
"in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " +
"this might take some time to complete");
"and other scheduled jobs, this might take some time to complete");
} catch (AssemblyNotFoundException e) {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first (md5checksum will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Given an assembly accession, retrieve ENA sequence name for all chromosomes belonging to assembly and update")
@PutMapping(value = "assemblies/{accession}/enaSequenceName")
public ResponseEntity<String> retrieveAndInsertENASequenceNameForAssembly(@PathVariable(name = "accession")
@ApiParam(value = "INSDC or RefSeq assembly accession. " +
"Eg: GCA_000001405.10") String asmAccession) {
try {
handler.getAssemblyByAccession(asmAccession);
handler.retrieveAndInsertENASequenceNameForAssembly(asmAccession);
return ResponseEntity.ok("A task has been submitted for updating ENA Sequence Name for all chromosomes " +
"in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " +
"and other scheduled jobs, this might take some time to complete");
} catch (AssemblyNotFoundException e) {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first (ENA sequence name will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Retrieve list of assemblies for which MD5 Checksum updates are running/going-to-run ")
@GetMapping(value = "assemblies/md5checksum/status")
public ResponseEntity<String> getMD5ChecksumUpdateTaskStatus() {
Map<String, Set<String>> md5ChecksumUpdateTasks = handler.getMD5ChecksumUpdateTaskStatus();
Set<String> runningTasks = md5ChecksumUpdateTasks.get("running");
Set<String> scheduledTasks = md5ChecksumUpdateTasks.get("scheduled");
String runningTaskRes = runningTasks == null || runningTasks.isEmpty() ? "No running MD5 checksum update tasks" :
runningTasks.stream().collect(Collectors.joining(","));
String scheduledTaskRes = scheduledTasks == null || scheduledTasks.isEmpty() ? "No scheduled MD5 checksum update tasks" :
scheduledTasks.stream().collect(Collectors.joining(","));
return ResponseEntity.ok("running: " + runningTaskRes + "\nscheduled: " + scheduledTaskRes);
public ResponseEntity<List<String>> getMD5ChecksumUpdateTaskStatus() {
List<String> scheduledJobStatus = handler.getScheduledJobStatus();
return ResponseEntity.ok(scheduledJobStatus);
}

// This endpoint can be enabled in the future when checksums for assemblies are added to the project.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,13 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.stereotype.Service;

import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.service.AssemblyService;
import uk.ac.ebi.eva.contigalias.service.ChromosomeService;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

@Service
public class AdminHandler {
Expand All @@ -52,7 +49,7 @@ public Optional<AssemblyEntity> getAssemblyByAccession(String accession) {
return assemblyService.getAssemblyByAccession(accession);
}

public void fetchAndInsertAssemblyByAccession(String accession) throws IOException {
public void fetchAndInsertAssemblyByAccession(String accession) {
assemblyService.fetchAndInsertAssembly(accession);
}

Expand All @@ -64,8 +61,12 @@ public void retrieveAndInsertMd5ChecksumForAssembly(String accession) {
assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accession);
}

public Map<String, Set<String>> getMD5ChecksumUpdateTaskStatus() {
return assemblyService.getMD5ChecksumUpdateTaskStatus();
public void retrieveAndInsertENASequenceNameForAssembly(String accession) {
assemblyService.retrieveAndInsertENASequenceNameForAssembly(accession);
}

public List<String> getScheduledJobStatus() {
return assemblyService.getScheduledJobStatus();
}

public void deleteAssemblyByAccession(String accession) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.hateoas.EntityModel;
Expand Down Expand Up @@ -83,6 +82,7 @@ public PagedModel<EntityModel<AssemblyEntity>> getAssemblyByRefseq(String refseq

public PagedModel<EntityModel<AssemblyEntity>> getAssembliesByTaxid(long taxid, Pageable request) {
Page<AssemblyEntity> page = assemblyService.getAssembliesByTaxid(taxid, request);
page.forEach(it->it.setChromosomes(null));
return generatePagedModelFromPage(page, assemblyAssembler);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@
import uk.ac.ebi.eva.contigalias.entities.SequenceEntity;
import uk.ac.ebi.eva.contigalias.exception.DownloadFailedException;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
Expand Down Expand Up @@ -112,7 +110,7 @@ public Optional<Path> downloadAssemblyReport(String accession) throws IOExceptio
try {
enaBrowser.disconnect();
} catch (IOException e) {
logger.warn("Error while trying to disconnect - ncbiBrowser (assembly: " + accession + ") : " + e);
logger.warn("Error while trying to disconnect - enaBrowser (assembly: " + accession + ") : " + e);
}
}
}
Expand Down Expand Up @@ -195,29 +193,4 @@ public void addENASequenceNames(
}
}
}

public void addENASequenceNameToChromosomes(List<ChromosomeEntity> ncbiChromosomeList,
Path downloadedENAFilePath, final int BATCH_SIZE) throws IOException {
try (BufferedReader bufferedReader = new BufferedReader(new FileReader(downloadedENAFilePath.toFile()))) {
List<String> chrLines = new ArrayList<>();
List<ChromosomeEntity> enaChromosomeList;
String line;
while ((line = bufferedReader.readLine()) != null) {
if (line.startsWith("accession")) {
continue;
}
chrLines.add(line);
if (chrLines.size() == BATCH_SIZE) {
enaChromosomeList = getChromosomeEntityList(chrLines);
addENASequenceNames(enaChromosomeList, ncbiChromosomeList);

chrLines = new ArrayList<>();
}
}
if (!chrLines.isEmpty()) {
enaChromosomeList = getChromosomeEntityList(chrLines);
addENASequenceNames(enaChromosomeList, ncbiChromosomeList);
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@
import uk.ac.ebi.eva.contigalias.dus.NCBIBrowserFactory;
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.entities.ChromosomeEntity;
import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.contigalias.repo.AssemblyRepository;
import uk.ac.ebi.eva.contigalias.repo.ChromosomeRepository;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
Expand All @@ -52,8 +47,6 @@ public class NCBIAssemblyDataSource implements AssemblyDataSource {

private final Logger logger = LoggerFactory.getLogger(NCBIAssemblyDataSource.class);

private final int BATCH_SIZE = 100000;

private final NCBIBrowserFactory factory;

private final NCBIAssemblyReportReaderFactory readerFactory;
Expand Down Expand Up @@ -163,60 +156,4 @@ public Optional<Path> downloadAssemblyReport(String accession, NCBIBrowser ncbiB
return Optional.empty();
}
}

public void parseFileAndInsertAssembly(String accession, ENAAssemblyDataSource enaDataSource,
AssemblyRepository assemblyRepository, ChromosomeRepository chromosomeRepository) throws IOException {
Optional<Path> downloadNCBIFilePathOpt = downloadAssemblyReport(accession);
Path downloadedNCBIFilePath = downloadNCBIFilePathOpt.orElseThrow(() -> new AssemblyNotFoundException(accession));
Optional<Path> downloadENAFilePathOpt = enaDataSource.downloadAssemblyReport(accession);
Path downloadedENAFilePath = downloadENAFilePathOpt.orElse(null);

long numberOfChromosomesInFile = Files.lines(downloadedNCBIFilePath).filter(line -> !line.startsWith("#")).count();
logger.info("Number of chromosomes in assembly (" + accession + "): " + numberOfChromosomesInFile);

AssemblyEntity assemblyEntity = getAssemblyEntity(downloadedNCBIFilePath);
assemblyRepository.save(assemblyEntity);

try (BufferedReader bufferedReader = new BufferedReader(new FileReader(downloadedNCBIFilePath.toFile()))) {
long chromosomesSavedTillNow = 0l;
List<String> chrLines = new ArrayList<>();
String line;
while ((line = bufferedReader.readLine()) != null) {
if (line.startsWith("#")) {
continue;
}
chrLines.add(line);
if (chrLines.size() == BATCH_SIZE) {
// add ena sequence name and save
addENASequenceNameAndSave(assemblyEntity, chrLines, enaDataSource, downloadedENAFilePath, chromosomeRepository);
chromosomesSavedTillNow += chrLines.size();
logger.info("Number of chromosomes saved till now : " + chromosomesSavedTillNow);

chrLines = new ArrayList<>();
}
}
if (!chrLines.isEmpty()) {
// add ena sequence name and save
addENASequenceNameAndSave(assemblyEntity, chrLines, enaDataSource, downloadedENAFilePath, chromosomeRepository);
chromosomesSavedTillNow += chrLines.size();
logger.info("Number of chromosomes saved till now : " + chromosomesSavedTillNow);
}
}

// delete the files after assembly insertion
Files.deleteIfExists(downloadedNCBIFilePath);
if (downloadedENAFilePath != null) {
Files.deleteIfExists(downloadedENAFilePath);
}
}

public void addENASequenceNameAndSave(AssemblyEntity assemblyEntity, List<String> chrLines,
ENAAssemblyDataSource enaDataSource, Path downloadedENAFilePath,
ChromosomeRepository chromosomeRepository) throws IOException {
List<ChromosomeEntity> chromosomeEntityList = getChromosomeEntityList(assemblyEntity, chrLines);
if (downloadedENAFilePath != null) {
enaDataSource.addENASequenceNameToChromosomes(chromosomeEntityList, downloadedENAFilePath, BATCH_SIZE);
}
chromosomeRepository.saveAll(chromosomeEntityList);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.FetchType;
import javax.persistence.Id;
import javax.persistence.OneToMany;
import javax.persistence.Table;
Expand Down Expand Up @@ -63,8 +64,8 @@ public class AssemblyEntity {

@JsonInclude(JsonInclude.Include.NON_NULL)
@ApiModelProperty(value = "List of all chromosomes of the assembly present in the database.")
@LazyCollection(LazyCollectionOption.FALSE)
@OneToMany(mappedBy = "assembly", cascade = CascadeType.ALL)
@LazyCollection(LazyCollectionOption.TRUE)
@OneToMany(mappedBy = "assembly", cascade = CascadeType.REMOVE, fetch = FetchType.LAZY)
private List<ChromosomeEntity> chromosomes;

public AssemblyEntity() {
Expand Down Expand Up @@ -178,11 +179,6 @@ public String toString() {
.append("trunc512checksum :\t")
.append(this.trunc512checksum)
.append("\n");
if (this.chromosomes != null) {
builder.append("Number of chromosomes :\t")
.append(this.chromosomes.size())
.append("\n");
}
return builder.toString();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import com.fasterxml.jackson.annotation.JsonInclude;
import io.swagger.annotations.ApiModelProperty;

import javax.persistence.CascadeType;
import javax.persistence.Column;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
Expand Down Expand Up @@ -68,7 +67,7 @@ public enum ContigType {
@Id
@JsonInclude(JsonInclude.Include.NON_NULL)
@ApiModelProperty(value = "Assembly that this sequence belongs to.")
@ManyToOne(cascade = CascadeType.ALL)
@ManyToOne
private AssemblyEntity assembly;

public String getGenbankSequenceName() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ public interface ChromosomeRepository extends JpaRepository<ChromosomeEntity, Lo
@Query("UPDATE ChromosomeEntity c SET c.md5checksum = :md5Checksum WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession")
void updateMd5ChecksumByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("md5Checksum") String md5Checksum);

@Modifying
@Query("UPDATE ChromosomeEntity c SET c.enaSequenceName = :enaSequenceName WHERE c.assembly.insdcAccession= :asmInsdcAccession AND c.insdcAccession = :insdcAccession")
void updateENASequenceNameByInsdcAccession(@Param("asmInsdcAccession") String asmInsdcAccession, @Param("insdcAccession") String insdcAccession, @Param("enaSequenceName") String enaSequenceName);

@Transactional
@Modifying
@Query("DELETE FROM ChromosomeEntity c WHERE c.assembly.insdcAccession=:asmInsdcAccession")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package uk.ac.ebi.eva.contigalias.scheduler;

import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;

@Component
public class ApplicationContextHolder implements ApplicationContextAware {

private static ApplicationContext applicationContext;

@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
ApplicationContextHolder.applicationContext = applicationContext;
}

public static ApplicationContext getApplicationContext() {
return applicationContext;
}
}
Loading

0 comments on commit 5515132

Please sign in to comment.