Skip to content

Commit

Permalink
Updated SciStarter import process
Browse files Browse the repository at this point in the history
  • Loading branch information
jack-brinkman committed Sep 1, 2024
1 parent c2d997f commit b7b3818
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 135 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -374,8 +374,8 @@ class ProjectController {
}

def importProjectsFromSciStarter(){
Integer count = projectService.importProjectsFromSciStarter()?:0
render(text: [count: count] as JSON, contentType: 'application/json');
Map counts = projectService.importProjectsFromSciStarter()
render(counts as JSON, contentType: 'application/json');
}

/**
Expand Down
121 changes: 56 additions & 65 deletions grails-app/services/au/org/ala/ecodata/ProjectService.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,17 @@ class ProjectService {
}
}

Map getSciStarterProjectsPage(JsonSlurper slurper, int page = 1) {
String baseUrl = grailsApplication.config.getProperty("scistarter.baseUrl")
String finderUrl = grailsApplication.config.getProperty("scistarter.finderUrl")
String apiKey = grailsApplication.config.getProperty("scistarter.apiKey")
String url = "${baseUrl}${finderUrl}?format=json&key=${apiKey}&page=${page}"

String data = webService.get(url, false)

return slurper.parseText(data)
}

/**
* Import SciStarter projects to Biocollect. Import script does the following.
* 1. gets the list of projects and contacts SciStarter for more details on a project
Expand All @@ -735,50 +746,48 @@ class ProjectService {
* And link artifacts to the project. TODO: creating project extent.
* @return
*/
Integer importProjectsFromSciStarter() {
int ignoredProjects = 0, createdProjects = 0, updatedProjects = 0
Map importProjectsFromSciStarter() {
int ignoredProjects = 0, createdProjects = 0, updatedProjects = 0, page = 1
JsonSlurper slurper = new JsonSlurper()

log.info("Starting SciStarter import")
try {
JsonSlurper jsonSlurper = new JsonSlurper()
String sciStarterProjectUrl
// list all SciStarter projects
List projects = getSciStarterProjectsFromFinder()
projects?.eachWithIndex { pProperties, index ->
Map transformedProject
Map project = pProperties
if (project && project.title && project.id) {
Project importedSciStarterProject = Project.findByExternalIdAndIsSciStarter(project.id?.toString(), true)
// get more details about the project
try {
sciStarterProjectUrl = "${grailsApplication.config.getProperty('scistarter.baseUrl')}${grailsApplication.config.getProperty('scistarter.projectUrl')}/${project.id}?key=${grailsApplication.config.getProperty('scistarter.apiKey')}"
String text = webService.get(sciStarterProjectUrl, false);
if (text instanceof String) {
Map projectDetails = jsonSlurper.parseText(text)
if (projectDetails.origin && projectDetails.origin == 'atlasoflivingaustralia') {
// ignore projects SciStarter imported from Biocollect
log.warn("Ignoring ${projectDetails.title} - ${projectDetails.id} - This is an ALA project.")
ignoredProjects++
} else {
projectDetails << project
// map properties from SciStarter to Biocollect
transformedProject = SciStarterConverter.convert(projectDetails)
if (!importedSciStarterProject) {
// create project & document & site & organisation
createSciStarterProject(transformedProject, projectDetails)
createdProjects++
} else {
// update a project just in case something has changed.
updateSciStarterProject(transformedProject, importedSciStarterProject)
log.info("Updating ${importedSciStarterProject.name} ${importedSciStarterProject.projectId}.")
updatedProjects++
}
}
}
} catch (Exception e) {
log.error("Error processing project - ${sciStarterProjectUrl}. Ignoring it. ${e.message}", e);

while(true) {
// list SciStarter projects for the current page
Map data = getSciStarterProjectsPage(slurper, page)
log.info("-- PAGE ${page}/${Math.round(data.total / 10)} SCISTARTER --")

// Break the loop if there are no more projects left to import
if (data.entities.size() == 0) break

data.entities.eachWithIndex { project, index ->
Project existingProject = Project.findByExternalIdAndIsSciStarter(project.legacy_id.toString(), true)

if (project.origin == 'atlasoflivingaustralia') {
// ignore projects SciStarter imported from BioCollect
log.warn("Ignoring ALA project ${project.name} - ${project.id}")
ignoredProjects++
} else {
// map properties from SciStarter to Biocollect
Map transformedProject = SciStarterConverter.convert(project)
if (!existingProject) {
// create project & document & site & organisation
createSciStarterProject(transformedProject, project)
log.info("Creating ${project.name} in ecodata")

createdProjects++
} else {
// update a project just in case something has changed.
updateSciStarterProject(transformedProject, existingProject)

log.info("Updating ${existingProject.name} ${existingProject.projectId}.")
updatedProjects++
}
}
}

page++;
}

log.info("Number of created projects ${createdProjects}. Number of ignored projects ${ignoredProjects}. Number of projects updated ${updatedProjects}.")
Expand All @@ -789,23 +798,7 @@ class ProjectService {
}

log.info("Completed SciStarter import")
createdProjects
}

/**
* Get the entire project list from SciStarter
* @return
* @throws SocketTimeoutException
* @throws Exception
*/
List getSciStarterProjectsFromFinder() throws SocketTimeoutException, Exception {
String scistarterFinderUrl = "${grailsApplication.config.getProperty('scistarter.baseUrl')}${grailsApplication.config.getProperty('scistarter.finderUrl')}?format=json&q="
String responseText = webService.get(scistarterFinderUrl, false)
if (responseText instanceof String) {
ObjectMapper mapper = new ObjectMapper()
Map response = mapper.readValue(responseText, Map.class)
return response.results
}
[created: createdProjects, updated: updatedProjects, ignored: ignoredProjects]
}

/**
Expand Down Expand Up @@ -886,16 +879,14 @@ class ProjectService {
Map createSciStarterSites(Map project) {
Map result = [siteIds: null]
List sites = []
if (project.regions?.size()) {
if (project.regions) {
// convert region to site
project.regions.each { region ->
Map site = SciStarterConverter.siteMapping(region)
// only add valid geojson objects
if (site?.extent?.geometry && siteService.isGeoJsonValid((site?.extent?.geometry as JSON).toString())) {
Map createdSite = siteService.create(site)
if (createdSite.siteId) {
sites.push(createdSite.siteId)
}
Map site = SciStarterConverter.siteMapping(project)
// only add valid geojson objects
if (site?.extent?.geometry && siteService.isGeoJsonValid((site?.extent?.geometry as JSON).toString())) {
Map createdSite = siteService.create(site)
if (createdSite.siteId) {
sites.push(createdSite.siteId)
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package au.org.ala.ecodata.converter

import grails.util.Holders
import groovy.json.JsonSlurper
import groovy.util.logging.Slf4j
import org.apache.commons.lang.StringUtils

Expand Down Expand Up @@ -41,11 +42,11 @@ class SciStarterConverter {

public static convert(Map sciStarter, Map override = [:]) {
Map mapping = [
'id' : 'externalId',
'title' : [
'legacy_id' : 'externalId',
'name' : [
'name' : 'name',
'transform': { props, target ->
props.title?.trim()
props.name?.trim()
}],
'tags' : [
'name' : 'keywords',
Expand All @@ -64,88 +65,61 @@ class SciStarterConverter {
'goal' : 'aim',
'task' : 'task',
'description' : 'description',
'url' : 'urlWeb',
'image' : [
'name' : 'image',
'url' : [
'name': 'urlWeb',
'transform': { props, target ->
String imageUrl = props.image?.toLowerCase()
if (Holders.grailsApplication.config.getProperty('scistarter.forceHttpsUrls') == 'true') {
try {
URL oldUrl = new URL(props.image)
URL newUrl = new URL("https", oldUrl.getHost(), oldUrl.getPort(), oldUrl.getFile())
newUrl.toString()
} catch (MalformedURLException e) {
"${Holders.grailsApplication.config.getProperty('scistarter.baseUrl')}/${props.image}"
}
} else if (!imageUrl?.equals(null) && (imageUrl?.contains('http://') || imageUrl?.contains('https://'))) {
props.image
} else if (!props.image?.equals(null)) {
"${Holders.grailsApplication.config.getProperty('scistarter.baseUrl')}/${props.image}"
} else {
return null
try {
String url = props.url.replace("www.", "").replaceAll("\n","")

new URL(url)
url
} catch (error) {
log.info("${props.url} is not a valid URL, returning null...")
null
}
}],
}
],
'image' : 'image',
'difficulty' : [
'name' : 'difficulty',
'transform': { props, target ->
def difficulty
if (props?.difficulty instanceof List) {
difficulty = props.difficulty.findAll {
(it instanceof String) && it.capitalize() in ['Easy', 'Medium', 'Hard']
}?.getAt(0)?.capitalize()
}

if (props?.difficulty instanceof String) {
difficulty = props.difficulty.capitalize()
}

difficulty
[
'easy': 'Easy',
'medium': 'Medium',
'difficult': 'Hard'
][props.difficulty.label.toLowerCase()]
}
],
'begin_date' : [
'begin' : [
'name' : 'plannedStartDate',
'transform': { props, target ->
SimpleDateFormat sdf = new SimpleDateFormat('yyyy-MM-dd');
if (props.begin_date) {
sdf.parse(props.begin_date)
} else {
new Date()
}
props.begin ? new Date(props.begin.longValue()) : new Date()
}],
'end_date' : [
'end' : [
'name' : 'plannedEndDate',
'transform': { props, target ->
SimpleDateFormat sdf = new SimpleDateFormat('yyyy-MM-dd');
if (props.end_date) {
sdf.parse(props.end_date)
}
props.end ? new Date(props.end.longValue()) : new Date()
}],
'date' : [
'created' : [
'name' : 'dateCreated',
'transform': { props, target ->
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
if (props.date) {
sdf.parse(props.date)
}
props.created ? new Date(props.created.longValue()) : new Date()
}],
'updated' : [
'name' : 'lastUpdated',
'transform': { props, target ->
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
if (props.updated) {
sdf.parse(props.updated)
}
props.updated ? new Date(props.updated.longValue()) : new Date()
}],
'state' : 'state',
'image_credit': 'attribution',
'presenter' : 'organisationName',
'topics' : [
'topics' : [ // TODO: check topic mapping parity
'name' : 'scienceType',
'transform': { props, target ->
List approvedScienceType = Holders.grailsApplication.config.getProperty('biocollect.scienceType', List)
List scienceTypes = []
props?.topics?.each { String type ->
String lowerType = type?.toLowerCase()
props?.topics?.each { topic ->
String lowerType = topic.label.toLowerCase()
approvedScienceType?.each { String scienceType ->
if (scienceType.toLowerCase() == lowerType) {
scienceTypes.push(scienceType)
Expand All @@ -157,7 +131,7 @@ class SciStarterConverter {
}
],
"origin" : "origin",
"country" : [
"country" : [ // TODO: check topic mapping parity
name : "countries",
'transform': { props, target ->
List countries = Holders.grailsApplication.config.getProperty('countries', List)
Expand All @@ -177,7 +151,7 @@ class SciStarterConverter {
}
}
],
"UN_regions" : [
"united_nations_region" : [
name : "uNRegions",
'transform': { props, target ->
List uNRegions = Holders.grailsApplication.config.getProperty('uNRegions', List)
Expand All @@ -187,9 +161,7 @@ class SciStarterConverter {
UN_region.toLowerCase() == region.toLowerCase()
}

if (matchedRegion) {
matchedRegions.push(matchedRegion)
}
if (matchedRegion) matchedRegions.push(matchedRegion)
}

if (matchedRegions.size()) {
Expand All @@ -199,7 +171,7 @@ class SciStarterConverter {
}
}
]
];
]

// default values
Map target = [
Expand Down Expand Up @@ -266,7 +238,10 @@ class SciStarterConverter {
target
}

static Map siteMapping(Map props) {
static Map siteMapping(Map project) {
JsonSlurper slurper = new JsonSlurper();
Map geometry = slurper.parseText(project.regions)

Map site = [
"projects" : [
],
Expand All @@ -292,11 +267,11 @@ class SciStarterConverter {
"type" : "projectArea"
]

switch (props.geometry.type) {
switch (geometry.type) {
case 'MultiPolygon':
site.name = props.name;
site.name = project.regions_description;
// possible data loss here. converting multipolygon to polygon since biocollect/merit does not support it.
site.geoIndex.coordinates = site.extent.geometry.coordinates = props.geometry.coordinates[0]
site.geoIndex.coordinates = site.extent.geometry.coordinates = geometry.coordinates[0]
site.geoIndex.type = site.extent.geometry.type = "Polygon"
break
}
Expand Down

0 comments on commit b7b3818

Please sign in to comment.