Skip to content

Commit

Permalink
Merge pull request #125 from nitin-ebi/ingest_assembly
Browse files Browse the repository at this point in the history
EVA-3494 Ingest assembly in batches
  • Loading branch information
nitin-ebi authored Feb 12, 2024
2 parents b4f6389 + 530f623 commit edd5c2e
Show file tree
Hide file tree
Showing 38 changed files with 1,038 additions and 761 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,15 @@
import org.springframework.boot.web.servlet.support.SpringBootServletInitializer;
import org.springframework.hateoas.config.EnableHypermediaSupport;
import org.springframework.retry.annotation.EnableRetry;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.transaction.annotation.EnableTransactionManagement;

@EnableScheduling
@EnableAsync
@SpringBootApplication
@EnableRetry
@EnableTransactionManagement
@EnableHypermediaSupport(type = EnableHypermediaSupport.HypermediaType.HAL)
public class ContigAliasApplication extends SpringBootServletInitializer {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package uk.ac.ebi.eva.contigalias.conf;

import org.springframework.beans.BeansException;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.stereotype.Component;

@Component
public class ApplicationContextHolder implements ApplicationContextAware {

private static ApplicationContext applicationContext;

@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
ApplicationContextHolder.applicationContext = applicationContext;
}

public static ApplicationContext getApplicationContext() {
return applicationContext;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import uk.ac.ebi.eva.contigalias.exception.AssemblyNotFoundException;
import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.Optional;

@RequestMapping("/v1/admin")
@RestController
Expand Down Expand Up @@ -63,6 +63,9 @@ public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
"GCA_000001405.10") String asmAccession) throws IOException {
try {
handler.fetchAndInsertAssemblyByAccession(asmAccession);
// submit jobs for updating ena sequence name and md5 checksum for assembly
handler.retrieveAndInsertENASequenceNameForAssembly(asmAccession);
handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
} catch (IllegalArgumentException e) {
return new ResponseEntity<>(e.getMessage(), HttpStatus.BAD_REQUEST);
}
Expand All @@ -82,43 +85,124 @@ public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
"parallel manner.")
@PutMapping(value = "assemblies")
public ResponseEntity<?> fetchAndInsertAssemblyByAccession(
@RequestBody(required = false) @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}
Map<String, List<String>> accessionResult = handler.fetchAndInsertAssemblyByAccession(accessions);
// submit jobs for updating ena sequence names and md5 checksum for all successfully inserted assemblies
if (accessionResult.get("SUCCESS").size() > 0) {
handler.retrieveAndInsertENASequenceNameForAssembly(accessionResult.get("SUCCESS"));
handler.retrieveAndInsertMd5ChecksumForAssembly(accessionResult.get("SUCCESS"));
}
return new ResponseEntity<>("Accession Processing Result : " + accessionResult, HttpStatus.MULTI_STATUS);
}

@ApiOperation(value = "Given an assembly accession, retrieve MD5 checksum for all chromosomes belonging to assembly and update")
@PutMapping(value = "assemblies/{accession}/md5checksum")
@PutMapping(value = "assemblies/md5checksum/{accession}")
public ResponseEntity<String> retrieveAndInsertMd5ChecksumForAssembly(@PathVariable(name = "accession")
@ApiParam(value = "INSDC or RefSeq assembly accession. Eg: " +
"GCA_000001405.10") String asmAccession) {
try {
handler.getAssemblyByAccession(asmAccession);
handler.retrieveAndInsertMd5ChecksumForAssembly(asmAccession);
return ResponseEntity.ok("A task has been submitted for updating md5checksum for all chromosomes " +
"in assembly " + asmAccession + ". Depending upon the number of chromosomes present in assembly, " +
"this might take some time to complete");
} catch (AssemblyNotFoundException e) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(asmAccession);
if (assemblyOpt.isPresent()) {
handler.retrieveAndInsertMd5ChecksumForAssembly(assemblyOpt.get().getInsdcAccession());
return ResponseEntity.ok("A task has been submitted for updating md5checksum for assembly " + asmAccession
+ "\nDepending upon the size of assembly and other scheduled jobs, this might take some time to complete");
} else {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first (md5checksum will be updated as part of the insertion process");
". Please insert the assembly first. MD5 checksum will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Retrieve list of assemblies for which MD5 Checksum updates are running/going-to-run ")
@GetMapping(value = "assemblies/md5checksum/status")
public ResponseEntity<String> getMD5ChecksumUpdateTaskStatus() {
Map<String, Set<String>> md5ChecksumUpdateTasks = handler.getMD5ChecksumUpdateTaskStatus();
Set<String> runningTasks = md5ChecksumUpdateTasks.get("running");
Set<String> scheduledTasks = md5ChecksumUpdateTasks.get("scheduled");
String runningTaskRes = runningTasks == null || runningTasks.isEmpty() ? "No running MD5 checksum update tasks" :
runningTasks.stream().collect(Collectors.joining(","));
String scheduledTaskRes = scheduledTasks == null || scheduledTasks.isEmpty() ? "No scheduled MD5 checksum update tasks" :
scheduledTasks.stream().collect(Collectors.joining(","));
return ResponseEntity.ok("running: " + runningTaskRes + "\nscheduled: " + scheduledTaskRes);
@ApiOperation(value = "Given a list of assembly accessions, retrieve MD5 checksum for all chromosomes belonging to all the assemblies and update")
@PutMapping(value = "assemblies/md5checksum")
public ResponseEntity<String> retrieveAndInsertMd5ChecksumForAssembly(
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

List<String> asmInsdcAccessionsList = new ArrayList<>();
List<String> asmNotPresent = new ArrayList<>();
for (String accession : accessions) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(accession);
if (assemblyOpt.isPresent()) {
asmInsdcAccessionsList.add(assemblyOpt.get().getInsdcAccession());
} else {
asmNotPresent.add(accession);
}
}

handler.retrieveAndInsertMd5ChecksumForAssembly(asmInsdcAccessionsList);

accessions.removeAll(asmNotPresent);
String responseText = "A task has been submitted for updating MD5 checksum for assemblies: " + accessions + "."
+ "\nDepending upon other scheduled jobs and the size of assembly, this might take some time to complete";
if (!asmNotPresent.isEmpty()) {
responseText = responseText + "\nThe following assemblies are not present: " + asmNotPresent + "."
+ "\nPlease insert the assembly first, MD5 Checksum will be updated as part of the insertion process";
}

return ResponseEntity.ok(responseText);
}

@ApiOperation(value = "Given an assembly accession, retrieve ENA sequence name for all chromosomes belonging to assembly and update")
@PutMapping(value = "assemblies/ena-sequence-name/{accession}")
public ResponseEntity<String> retrieveAndInsertENASequenceNameForAssembly(@PathVariable(name = "accession")
@ApiParam(value = "INSDC or RefSeq assembly accession. " +
"Eg: GCA_000001405.10") String asmAccession) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(asmAccession);
if (assemblyOpt.isPresent()) {
handler.retrieveAndInsertENASequenceNameForAssembly(assemblyOpt.get().getInsdcAccession());
return ResponseEntity.ok("A task has been submitted for updating ENA Sequence Name for assembly " + asmAccession
+ "\nDepending upon the size of assembly and other scheduled jobs, this might take some time to complete");
} else {
return ResponseEntity.ok("Could not find assembly " + asmAccession +
". Please insert the assembly first. ENA sequence name will be updated as part of the insertion process");
}
}

@ApiOperation(value = "Given a list of assembly accessions, retrieve ENA sequence name for all chromosomes belonging to all the assemblies and update")
@PutMapping(value = "assemblies/ena-sequence-name")
public ResponseEntity<String> retrieveAndInsertENASequenceNameForAssembly(
@RequestBody @ApiParam(value = "A JSON array of INSDC or RefSeq assembly accessions. " +
"Eg: [\"GCA_000001405.10\",\"GCA_000001405.11\",\"GCA_000001405.12\"]") List<String> accessions) {
if (accessions == null || accessions.size() <= 0) {
return new ResponseEntity<>(HttpStatus.BAD_REQUEST);
}

List<String> asmInsdcAccessionsList = new ArrayList<>();
List<String> asmNotPresent = new ArrayList<>();
for (String accession : accessions) {
Optional<AssemblyEntity> assemblyOpt = handler.getAssemblyByAccession(accession);
if (assemblyOpt.isPresent()) {
asmInsdcAccessionsList.add(assemblyOpt.get().getInsdcAccession());
} else {
asmNotPresent.add(accession);
}
}

handler.retrieveAndInsertENASequenceNameForAssembly(asmInsdcAccessionsList);

accessions.removeAll(asmNotPresent);
String responseText = "A task has been submitted for updating ENA Sequence Name for assemblies: " + accessions
+ "\nDepending upon other scheduled jobs and the size of assembly, this might take some time to complete";
if (!asmNotPresent.isEmpty()) {
responseText = responseText + "\nThe following assemblies are not present: " + asmNotPresent + "."
+ "\nPlease insert the assembly first, ENA Sequence Name will be updated as part of the insertion process";
}

return ResponseEntity.ok(responseText);
}


@ApiOperation(value = "Retrieve list of Jobs that are running or scheduled to run")
@GetMapping(value = "assemblies/scheduled-jobs")
public ResponseEntity<List<String>> getMD5ChecksumUpdateTaskStatus() {
List<String> scheduledJobStatus = handler.getScheduledJobStatus();
return ResponseEntity.ok(scheduledJobStatus);
}

// This endpoint can be enabled in the future when checksums for assemblies are added to the project.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,13 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.stereotype.Service;

import uk.ac.ebi.eva.contigalias.entities.AssemblyEntity;
import uk.ac.ebi.eva.contigalias.service.AssemblyService;
import uk.ac.ebi.eva.contigalias.service.ChromosomeService;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;

@Service
public class AdminHandler {
Expand All @@ -52,7 +49,7 @@ public Optional<AssemblyEntity> getAssemblyByAccession(String accession) {
return assemblyService.getAssemblyByAccession(accession);
}

public void fetchAndInsertAssemblyByAccession(String accession) throws IOException {
public void fetchAndInsertAssemblyByAccession(String accession) {
assemblyService.fetchAndInsertAssembly(accession);
}

Expand All @@ -64,8 +61,20 @@ public void retrieveAndInsertMd5ChecksumForAssembly(String accession) {
assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accession);
}

public Map<String, Set<String>> getMD5ChecksumUpdateTaskStatus() {
return assemblyService.getMD5ChecksumUpdateTaskStatus();
public void retrieveAndInsertMd5ChecksumForAssembly(List<String> accessions) {
assemblyService.retrieveAndInsertMd5ChecksumForAssembly(accessions);
}

public void retrieveAndInsertENASequenceNameForAssembly(String accession) {
assemblyService.retrieveAndInsertENASequenceNameForAssembly(accession);
}

public void retrieveAndInsertENASequenceNameForAssembly(List<String> accessions) {
assemblyService.retrieveAndInsertENASequenceNameForAssembly(accessions);
}

public List<String> getScheduledJobStatus() {
return assemblyService.getScheduledJobStatus();
}

public void deleteAssemblyByAccession(String accession) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageImpl;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.domain.Pageable;
import org.springframework.data.web.PagedResourcesAssembler;
import org.springframework.hateoas.EntityModel;
Expand Down Expand Up @@ -83,6 +82,7 @@ public PagedModel<EntityModel<AssemblyEntity>> getAssemblyByRefseq(String refseq

public PagedModel<EntityModel<AssemblyEntity>> getAssembliesByTaxid(long taxid, Pageable request) {
Page<AssemblyEntity> page = assemblyService.getAssembliesByTaxid(taxid, request);
page.forEach(it->it.setChromosomes(null));
return generatePagedModelFromPage(page, assemblyAssembler);
}

Expand Down

This file was deleted.

Loading

0 comments on commit edd5c2e

Please sign in to comment.