diff --git a/nbactions.xml b/nbactions.xml index 00290ed..4f7f9fe 100644 --- a/nbactions.xml +++ b/nbactions.xml @@ -1,24 +1,67 @@ - - build - - * - - - compile - assembly:single - - - - rebuild - - * - - - clean - compile - assembly:single - - - + + build + + * + + + compile + assembly:single + + + + rebuild + + * + + + clean + compile + assembly:single + + + + run + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.5.0:exec + + + -classpath %classpath ${packageClassName} -server=https://sead2-dev.ncsa.illinois.edu -key=523caf58-b444-46ef-bc38-633e6846d5ca -skip=4 dummytree + java + + + + debug + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.5.0:exec + + + -agentlib:jdwp=transport=dt_socket,server=n,address=${jpda.address} -classpath %classpath ${packageClassName} -server=https://sead2-dev.ncsa.illinois.edu -key=523caf58-b444-46ef-bc38-633e6846d5ca -skip=4 dummytree + java + true + + + + profile + + jar + + + process-classes + org.codehaus.mojo:exec-maven-plugin:1.5.0:exec + + + -classpath %classpath ${packageClassName} -server=https://sead2-dev.ncsa.illinois.edu -key=523caf58-b444-46ef-bc38-633e6846d5ca -skip=4 dummytree + java + + + diff --git a/src/main/java/org/sead/uploader/AbstractUploader.java b/src/main/java/org/sead/uploader/AbstractUploader.java index 9a4cc0a..176b6cd 100644 --- a/src/main/java/org/sead/uploader/AbstractUploader.java +++ b/src/main/java/org/sead/uploader/AbstractUploader.java @@ -152,18 +152,18 @@ public void parseArgs(String[] args) { public abstract HttpClientContext authenticate(); public void processRequests() { - if(importRO && (skip>0 || max < Long.MAX_VALUE)) { + if (importRO && (skip > 0 || max < Long.MAX_VALUE)) { println("Cannot set max or skip limits when importing an existing RO"); System.exit(0); } //Avoid max+skip > Long.MAX_VALUE - max=((Long.MAX_VALUE-max) -skip) <0 ? (max-skip): max; + max = ((Long.MAX_VALUE - max) - skip) < 0 ? (max - skip) : max; localContext = authenticate(); if (localContext == null) { println("Authentication failure - exiting."); System.exit(0); } - if(skip>0) { + if (skip > 0) { println("WILL SKIP " + skip + " FILES"); } try { @@ -202,7 +202,7 @@ public void processRequests() { println("CURRENT TOTAL: " + globalFileCount + " files :" + totalBytes + " bytes"); } else if ((tagId == null) && (!listonly)) { println("Not uploaded due to error during processing: " + file.getPath()); - } + } } else { println("SKIPPING(F): " + file.getPath()); skip--; @@ -361,11 +361,11 @@ protected String uploadCollection(Resource dir, String path, String parentId, St totalBytes += file.length(); println(" UPLOADED as: " + newUri); println("CURRENT TOTAL: " + globalFileCount + " files :" + totalBytes + " bytes"); - } + } } else { println("SKIPPING(F): " + file.getPath()); skip = skip - 1; - if(skip==0l) { + if (skip == 0l) { println("\nSKIP COMPLETE"); } } @@ -443,7 +443,7 @@ public String uploadDatafile(Resource file, String path, String dataId) { if (!listonly) { if (dataId == null) { // doesn't exist or we don't care (!merge) try { - dataId = uploadDatafile(file, path); + dataId = uploadDatafile(file, path); } catch (UploaderException ue) { println(ue.getMessage()); } diff --git a/src/main/java/org/sead/uploader/clowder/SEADAuthenticator.java b/src/main/java/org/sead/uploader/clowder/SEADAuthenticator.java index efae339..15d1449 100644 --- a/src/main/java/org/sead/uploader/clowder/SEADAuthenticator.java +++ b/src/main/java/org/sead/uploader/clowder/SEADAuthenticator.java @@ -1,269 +1,269 @@ -/** - * - */ -package org.sead.uploader.clowder; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.net.PasswordAuthentication; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; -import java.nio.charset.Charset; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.http.HttpEntity; -import org.apache.http.NameValuePair; -import org.apache.http.ParseException; -import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.CookieStore; -import org.apache.http.client.entity.UrlEncodedFormEntity; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.entity.mime.MultipartEntityBuilder; -import org.apache.http.impl.client.BasicCookieStore; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.message.BasicNameValuePair; -import org.apache.http.util.EntityUtils; -import org.json.JSONObject; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.DeserializationFeature; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.node.ObjectNode; - -/** - * @author Jim - * - */ -public class SEADAuthenticator { - - static long token_start_time = -1; - static int expires_in = -1; - - private static Log log = LogFactory.getLog(SEADAuthenticator.class); - - private static long authTime; - // Create a local instance of cookie store - static CookieStore cookieStore = new BasicCookieStore(); - private static HttpClientContext localContext = HttpClientContext.create(); - - // Create local HTTP context - // Bind custom cookie store to the local context - static { - localContext.setCookieStore(cookieStore); - } - - static HttpClientContext authenticate(String server) { - - boolean authenticated = false; - log.info("Authenticating"); - - String accessToken = SEADGoogleLogin.getAccessToken(); - - // Now login to server and create a session - CloseableHttpClient httpclient = HttpClients.createDefault(); - try { - HttpPost seadAuthenticate = new HttpPost(server - + "/api/authenticate"); - List nvpList = new ArrayList(1); - nvpList.add(0, new BasicNameValuePair("googleAccessToken", - accessToken)); - - seadAuthenticate.setEntity(new UrlEncodedFormEntity(nvpList)); - - CloseableHttpResponse response = httpclient.execute( - seadAuthenticate, localContext); - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - // String seadSessionId = - // EntityUtils.toString(resEntity); - authenticated = true; - } - } else { - // Seems to occur when google device id is not set on server - // - with a Not Found response... - log.error("Error response from " + server + " : " - + response.getStatusLine().getReasonPhrase()); - } - } finally { - response.close(); - httpclient.close(); - } - } catch (IOException e) { - log.error("Cannot read sead-google.json"); - log.error(e.getMessage()); - } - - // localContext should have the cookie with the SEAD session key, which - // nominally is all that's needed. - // FixMe: If there is no activity for more than 15 minutes, the session - // may expire, in which case, - // re-authentication using the refresh token to get a new google token - // to allow SEAD login again may be required - // also need to watch the 60 minutes google token timeout - project - // spaces will invalidate the session at 60 minutes even if there is - // activity - authTime = System.currentTimeMillis(); - - if (authenticated) { - return localContext; - } - return null; - } - - public static HttpClientContext reAuthenticateIfNeeded(String server, - long startTime) { - long curTime = System.currentTimeMillis(); - // If necessary, re-authenticate and return the result - if (((curTime - startTime) / 1000l > 1700) - || ((curTime - authTime) / 1000l > 3500)) { - return UPAuthenticate(server); - } - // If it's not time, just return the current value - return localContext; - } - - static PasswordAuthentication passwordAuthentication = null; - - static HttpClientContext UPAuthenticate(String server) { - - boolean authenticated = false; - log.info("Authenticating with username/password"); - - File up = new File("./upass.txt"); - if (up.exists()) { - try { - BufferedReader bReader = new BufferedReader(new FileReader(up)); - passwordAuthentication = new PasswordAuthentication(bReader.readLine(), bReader.readLine().toCharArray()); - bReader.close(); - } catch (IOException e) { - System.out.println("Uable to read u/p from file"); - e.printStackTrace(); - } - - } - - if (passwordAuthentication == null) { - passwordAuthentication = SEAD2UPLogin - .getPasswordAuthentication(); - } - // Now login to server and create a session - CloseableHttpClient httpclient = HttpClients.createDefault(); - try { - HttpPost seadAuthenticate = new HttpPost(server - + "/authenticate/userpass"); - MultipartEntityBuilder meb = MultipartEntityBuilder.create(); - meb.addTextBody("username", passwordAuthentication.getUserName()); - meb.addBinaryBody("password", toBytes(passwordAuthentication.getPassword())); - - seadAuthenticate.setEntity(meb.build()); - - CloseableHttpResponse response = httpclient.execute( - seadAuthenticate, localContext); - HttpEntity resEntity = null; - try { - // 303 is a redirect after a successful login, 400 if bad - // password - if ((response.getStatusLine().getStatusCode() == 303) || (response.getStatusLine().getStatusCode() == 200)) { - resEntity = response.getEntity(); - if (resEntity != null) { - // String seadSessionId = - // EntityUtils.toString(resEntity); - authenticated = true; - } - } else { - // 400 for bad values - log.error("Error response from " + server + " : " - + response.getStatusLine().getReasonPhrase()); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - response.close(); - httpclient.close(); - } - } catch (IOException e) { - log.error(e.getMessage()); - } - - // localContext should have the cookie with the SEAD2 session key, which - // nominally is all that's needed. - authTime = System.currentTimeMillis(); - - if (authenticated) { - return localContext; - } - - return null; - } - - private static byte[] toBytes(char[] chars) { - CharBuffer charBuffer = CharBuffer.wrap(chars); - ByteBuffer byteBuffer = Charset.forName("UTF-8").encode(charBuffer); - byte[] bytes = Arrays.copyOfRange(byteBuffer.array(), - byteBuffer.position(), byteBuffer.limit()); - Arrays.fill(charBuffer.array(), '\u0000'); // clear sensitive data - Arrays.fill(byteBuffer.array(), (byte) 0); // clear sensitive data - return bytes; - } - - public static HttpClientContext UPReAuthenticateIfNeeded(String server, - long startTime) { - if ((startTime - authTime) > 300000l) { //assume it lasts at least 5*60*1000 msec == 5 min - return UPAuthenticate(server); - } - return localContext; - } - - private static JSONObject getMe(String server) { - JSONObject me = null; - // Now login to server and get user info - CloseableHttpClient httpclient = HttpClients.createDefault(); - try { - HttpGet seadGetMe = new HttpGet(server + "/api/me"); - - CloseableHttpResponse response = httpclient.execute(seadGetMe, - localContext); - HttpEntity resEntity = null; - try { - // 303 is a redirect after a successful login, 400 if bad - // password - if (response.getStatusLine().getStatusCode() == 200) { - resEntity = response.getEntity(); - if (resEntity != null) { - me = new JSONObject(EntityUtils.toString(resEntity)); - - } - } else { - // 400 for bad values - log.error("Error response from " + server + " : " - + response.getStatusLine().getReasonPhrase()); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - response.close(); - httpclient.close(); - } - } catch (IOException e) { - - log.error(e.getMessage()); - } - return me; - } -} +/** + * + */ +package org.sead.uploader.clowder; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.PrintWriter; +import java.io.UnsupportedEncodingException; +import java.net.PasswordAuthentication; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.HttpEntity; +import org.apache.http.NameValuePair; +import org.apache.http.ParseException; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.CookieStore; +import org.apache.http.client.entity.UrlEncodedFormEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.impl.client.BasicCookieStore; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.message.BasicNameValuePair; +import org.apache.http.util.EntityUtils; +import org.json.JSONObject; + +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; + +/** + * @author Jim + * + */ +public class SEADAuthenticator { + + static long token_start_time = -1; + static int expires_in = -1; + + private static Log log = LogFactory.getLog(SEADAuthenticator.class); + + private static long authTime; + // Create a local instance of cookie store + static CookieStore cookieStore = new BasicCookieStore(); + private static HttpClientContext localContext = HttpClientContext.create(); + + // Create local HTTP context + // Bind custom cookie store to the local context + static { + localContext.setCookieStore(cookieStore); + } + + static HttpClientContext authenticate(String server) { + + boolean authenticated = false; + log.info("Authenticating"); + + String accessToken = SEADGoogleLogin.getAccessToken(); + + // Now login to server and create a session + CloseableHttpClient httpclient = HttpClients.createDefault(); + try { + HttpPost seadAuthenticate = new HttpPost(server + + "/api/authenticate"); + List nvpList = new ArrayList(1); + nvpList.add(0, new BasicNameValuePair("googleAccessToken", + accessToken)); + + seadAuthenticate.setEntity(new UrlEncodedFormEntity(nvpList)); + + CloseableHttpResponse response = httpclient.execute( + seadAuthenticate, localContext); + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + // String seadSessionId = + // EntityUtils.toString(resEntity); + authenticated = true; + } + } else { + // Seems to occur when google device id is not set on server + // - with a Not Found response... + log.error("Error response from " + server + " : " + + response.getStatusLine().getReasonPhrase()); + } + } finally { + response.close(); + httpclient.close(); + } + } catch (IOException e) { + log.error("Cannot read sead-google.json"); + log.error(e.getMessage()); + } + + // localContext should have the cookie with the SEAD session key, which + // nominally is all that's needed. + // FixMe: If there is no activity for more than 15 minutes, the session + // may expire, in which case, + // re-authentication using the refresh token to get a new google token + // to allow SEAD login again may be required + // also need to watch the 60 minutes google token timeout - project + // spaces will invalidate the session at 60 minutes even if there is + // activity + authTime = System.currentTimeMillis(); + + if (authenticated) { + return localContext; + } + return null; + } + + public static HttpClientContext reAuthenticateIfNeeded(String server, + long startTime) { + long curTime = System.currentTimeMillis(); + // If necessary, re-authenticate and return the result + if (((curTime - startTime) / 1000l > 1700) + || ((curTime - authTime) / 1000l > 3500)) { + return UPAuthenticate(server); + } + // If it's not time, just return the current value + return localContext; + } + + static PasswordAuthentication passwordAuthentication = null; + + static HttpClientContext UPAuthenticate(String server) { + + boolean authenticated = false; + log.info("Authenticating with username/password"); + + File up = new File("./upass.txt"); + if (up.exists()) { + try { + BufferedReader bReader = new BufferedReader(new FileReader(up)); + passwordAuthentication = new PasswordAuthentication(bReader.readLine(), bReader.readLine().toCharArray()); + bReader.close(); + } catch (IOException e) { + System.out.println("Uable to read u/p from file"); + e.printStackTrace(); + } + + } + + if (passwordAuthentication == null) { + passwordAuthentication = SEAD2UPLogin + .getPasswordAuthentication(); + } + // Now login to server and create a session + CloseableHttpClient httpclient = HttpClients.createDefault(); + try { + HttpPost seadAuthenticate = new HttpPost(server + + "/authenticate/userpass"); + MultipartEntityBuilder meb = MultipartEntityBuilder.create(); + meb.addTextBody("username", passwordAuthentication.getUserName()); + meb.addBinaryBody("password", toBytes(passwordAuthentication.getPassword())); + + seadAuthenticate.setEntity(meb.build()); + + CloseableHttpResponse response = httpclient.execute( + seadAuthenticate, localContext); + HttpEntity resEntity = null; + try { + // 303 is a redirect after a successful login, 400 if bad + // password + if ((response.getStatusLine().getStatusCode() == 303) || (response.getStatusLine().getStatusCode() == 200)) { + resEntity = response.getEntity(); + if (resEntity != null) { + // String seadSessionId = + // EntityUtils.toString(resEntity); + authenticated = true; + } + } else { + // 400 for bad values + log.error("Error response from " + server + " : " + + response.getStatusLine().getReasonPhrase()); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + response.close(); + httpclient.close(); + } + } catch (IOException e) { + log.error(e.getMessage()); + } + + // localContext should have the cookie with the SEAD2 session key, which + // nominally is all that's needed. + authTime = System.currentTimeMillis(); + + if (authenticated) { + return localContext; + } + + return null; + } + + private static byte[] toBytes(char[] chars) { + CharBuffer charBuffer = CharBuffer.wrap(chars); + ByteBuffer byteBuffer = Charset.forName("UTF-8").encode(charBuffer); + byte[] bytes = Arrays.copyOfRange(byteBuffer.array(), + byteBuffer.position(), byteBuffer.limit()); + Arrays.fill(charBuffer.array(), '\u0000'); // clear sensitive data + Arrays.fill(byteBuffer.array(), (byte) 0); // clear sensitive data + return bytes; + } + + public static HttpClientContext UPReAuthenticateIfNeeded(String server, + long startTime) { + if ((startTime - authTime) > 300000l) { //assume it lasts at least 5*60*1000 msec == 5 min + return UPAuthenticate(server); + } + return localContext; + } + + private static JSONObject getMe(String server) { + JSONObject me = null; + // Now login to server and get user info + CloseableHttpClient httpclient = HttpClients.createDefault(); + try { + HttpGet seadGetMe = new HttpGet(server + "/api/me"); + + CloseableHttpResponse response = httpclient.execute(seadGetMe, + localContext); + HttpEntity resEntity = null; + try { + // 303 is a redirect after a successful login, 400 if bad + // password + if (response.getStatusLine().getStatusCode() == 200) { + resEntity = response.getEntity(); + if (resEntity != null) { + me = new JSONObject(EntityUtils.toString(resEntity)); + + } + } else { + // 400 for bad values + log.error("Error response from " + server + " : " + + response.getStatusLine().getReasonPhrase()); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + response.close(); + httpclient.close(); + } + } catch (IOException e) { + + log.error(e.getMessage()); + } + return me; + } +} diff --git a/src/main/java/org/sead/uploader/clowder/SEADTester.java b/src/main/java/org/sead/uploader/clowder/SEADTester.java index 9a3741f..2d2ed04 100644 --- a/src/main/java/org/sead/uploader/clowder/SEADTester.java +++ b/src/main/java/org/sead/uploader/clowder/SEADTester.java @@ -1,184 +1,183 @@ -/** ***************************************************************************** - * Copyright 2014, 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ***************************************************************************** */ -package org.sead.uploader.clowder; - -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Set; -import org.apache.http.HttpEntity; -import org.apache.http.HttpHost; -import org.apache.http.client.CookieStore; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.entity.StringEntity; -import org.apache.http.impl.client.BasicCookieStore; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.cookie.BasicClientCookie; -import org.apache.http.message.BasicHeader; -import org.apache.http.protocol.HTTP; -import org.apache.http.util.EntityUtils; -import org.json.JSONArray; -import org.json.JSONObject; -import org.sead.uploader.util.ResourceFactory; - - -/** - * - */ -public class SEADTester { - - public static final String FRBR_EO = "http://purl.org/vocab/frbr/core#embodimentOf"; - private static final String DCTERMS_HAS_PART = "http://purl.org/dc/terms/hasPart"; - - private static long max = 9223372036854775807l; - private static boolean merge = false; - private static boolean verify = false; - private static boolean importRO = false; - private static boolean sead2space = false; - private static String sead2datasetId = null; - - private static long globalFileCount = 0l; - private static long totalBytes = 0L; - - protected static boolean listonly = false; - - protected static Set excluded = new HashSet(); - ; - - private static String server = null; - - static PrintWriter pw = null; - - static HttpClientContext localContext = null; - - private static ResourceFactory rf = null; - - private static HashMap roDataIdToNewId = new HashMap(); - private static HashMap roCollIdToNewId = new HashMap(); - private static HashMap roFolderProxy = new HashMap(); - - private static String CLOWDER_DEFAULT_VOCAB = "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld"; - - // Create a local instance of cookie store - static CookieStore cookieStore = new BasicCookieStore(); - - public static void main(String args[]) throws Exception { - - File outputFile = new File("SEADUploadLog_" - + System.currentTimeMillis() + ".txt"); - try { - pw = new PrintWriter(new FileWriter(outputFile)); - } catch (Exception e) { - println(e.getMessage()); - } - server = "http://localhost:9000"; - - localContext = SEADAuthenticator.UPAuthenticate(server); - // Create local HTTP context - BasicClientCookie bc = new BasicClientCookie("id", "e601a866dd9faa8baa9a9aa40b770031e4303e9a6f3d10b54a59ba0a77d75815a4ce919ea99021fcb846f9bbfe106208d8d9b6468f8ec8b03753812ce4faea720b4e137b55665b10ac1bef1f8f12a0d63f2f828f3dc5130b6ff9042824c4c786902ec6a35e8deda741daf53abe4deaabf9587457143f69f466af09ebf3cf8208"); - bc.setDomain("localhost"); - cookieStore.addCookie(bc); - localContext.setCookieStore(cookieStore); - - CloseableHttpClient httpclient = HttpClients - .createDefault(); - - postSingleMetadata( - new JSONObject("{\"a\":\"rá\"}"), - //new JSONObject("{\"a\":\"r\"}"), - new JSONArray("[\"" + CLOWDER_DEFAULT_VOCAB + "\",{\"a\":\"http://purl.org/dc/terms/audience\"}]"), - new JSONObject("{\"c\":\"d\"}"), "http://localhost:9000/api/metadata.jsonld", httpclient); - if (pw != null) { - pw.flush(); - pw.close(); - } - } - - private static void postSingleMetadata(JSONObject singleContent, - JSONArray singleContext, JSONObject agent, String uri, - CloseableHttpClient httpclient) throws IOException { - HttpEntity resEntity = null; - try { - //singleContext.put(CLOWDER_DEFAULT_VOCAB); - JSONObject meta = new JSONObject(); - meta.put("content", singleContent); - meta.put("dataset_id", "583314110d15772a7e37cd90"); - meta.put("@context", singleContext); - //meta.put("agent", agent); - - //StringEntity se2 = new StringEntity(meta.toString()); - StringEntity se2 = new StringEntity( - "{\"@context\":[\"https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld\",{\"Funding Institution\":\"http://sead-data.net/terms/FundingInstitution\"}],\"dataset_id\":\"583314110d15772a7e37cd90\",\"content\":{\"Funding Institution\":\"rá\"}}", "UTF-8"); - println(meta.toString(2)); - - //se2.setContentEncoding("UTF-8"); - se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json")); - - HttpHost proxy = new HttpHost("127.0.0.1", 8888, "http"); - - RequestConfig config = RequestConfig.custom() - .setProxy(proxy) - .build(); - HttpPost metadataPost = new HttpPost(uri); - - metadataPost.setHeader("X-Requested-With", "XMLHttpRequest"); - metadataPost.setHeader("Origin", "http://localhost:9000"); - - metadataPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"); - metadataPost.setHeader("Referer", "http://localhost:9000/datasets/583314110d15772a7e37cd90"); - metadataPost.setHeader("DNT", "1"); - metadataPost.setHeader("Cache-Control", "no-cache"); - - metadataPost.setConfig(config); - - metadataPost.setEntity(se2); - - CloseableHttpResponse mdResponse = httpclient.execute(metadataPost, - localContext); - - resEntity = mdResponse.getEntity(); - if (mdResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing key=" - + singleContent.keys().next() + " : " - + mdResponse.getStatusLine().getReasonPhrase()); - println("Value: " + singleContent.get(singleContent.keys().next().toString()).toString()); - println("Details: " + EntityUtils.toString(resEntity)); - throw new IOException("Non 200 response"); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - } - - } - - public static void println(String s) { - System.out.println(s); - System.out.flush(); - if (pw != null) { - pw.println(s); - pw.flush(); - } - return; - } -} +/** ***************************************************************************** + * Copyright 2014, 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.clowder; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Set; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.client.CookieStore; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.BasicCookieStore; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.cookie.BasicClientCookie; +import org.apache.http.message.BasicHeader; +import org.apache.http.protocol.HTTP; +import org.apache.http.util.EntityUtils; +import org.json.JSONArray; +import org.json.JSONObject; +import org.sead.uploader.util.ResourceFactory; + +/** + * + */ +public class SEADTester { + + public static final String FRBR_EO = "http://purl.org/vocab/frbr/core#embodimentOf"; + private static final String DCTERMS_HAS_PART = "http://purl.org/dc/terms/hasPart"; + + private static long max = 9223372036854775807l; + private static boolean merge = false; + private static boolean verify = false; + private static boolean importRO = false; + private static boolean sead2space = false; + private static String sead2datasetId = null; + + private static long globalFileCount = 0l; + private static long totalBytes = 0L; + + protected static boolean listonly = false; + + protected static Set excluded = new HashSet(); + ; + + private static String server = null; + + static PrintWriter pw = null; + + static HttpClientContext localContext = null; + + private static ResourceFactory rf = null; + + private static HashMap roDataIdToNewId = new HashMap(); + private static HashMap roCollIdToNewId = new HashMap(); + private static HashMap roFolderProxy = new HashMap(); + + private static String CLOWDER_DEFAULT_VOCAB = "https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld"; + + // Create a local instance of cookie store + static CookieStore cookieStore = new BasicCookieStore(); + + public static void main(String args[]) throws Exception { + + File outputFile = new File("SEADUploadLog_" + + System.currentTimeMillis() + ".txt"); + try { + pw = new PrintWriter(new FileWriter(outputFile)); + } catch (Exception e) { + println(e.getMessage()); + } + server = "http://localhost:9000"; + + localContext = SEADAuthenticator.UPAuthenticate(server); + // Create local HTTP context + BasicClientCookie bc = new BasicClientCookie("id", "e601a866dd9faa8baa9a9aa40b770031e4303e9a6f3d10b54a59ba0a77d75815a4ce919ea99021fcb846f9bbfe106208d8d9b6468f8ec8b03753812ce4faea720b4e137b55665b10ac1bef1f8f12a0d63f2f828f3dc5130b6ff9042824c4c786902ec6a35e8deda741daf53abe4deaabf9587457143f69f466af09ebf3cf8208"); + bc.setDomain("localhost"); + cookieStore.addCookie(bc); + localContext.setCookieStore(cookieStore); + + CloseableHttpClient httpclient = HttpClients + .createDefault(); + + postSingleMetadata( + new JSONObject("{\"a\":\"r�\"}"), + //new JSONObject("{\"a\":\"r\"}"), + new JSONArray("[\"" + CLOWDER_DEFAULT_VOCAB + "\",{\"a\":\"http://purl.org/dc/terms/audience\"}]"), + new JSONObject("{\"c\":\"d\"}"), "http://localhost:9000/api/metadata.jsonld", httpclient); + if (pw != null) { + pw.flush(); + pw.close(); + } + } + + private static void postSingleMetadata(JSONObject singleContent, + JSONArray singleContext, JSONObject agent, String uri, + CloseableHttpClient httpclient) throws IOException { + HttpEntity resEntity = null; + try { + //singleContext.put(CLOWDER_DEFAULT_VOCAB); + JSONObject meta = new JSONObject(); + meta.put("content", singleContent); + meta.put("dataset_id", "583314110d15772a7e37cd90"); + meta.put("@context", singleContext); + //meta.put("agent", agent); + + //StringEntity se2 = new StringEntity(meta.toString()); + StringEntity se2 = new StringEntity( + "{\"@context\":[\"https://clowder.ncsa.illinois.edu/contexts/metadata.jsonld\",{\"Funding Institution\":\"http://sead-data.net/terms/FundingInstitution\"}],\"dataset_id\":\"583314110d15772a7e37cd90\",\"content\":{\"Funding Institution\":\"r�\"}}", "UTF-8"); + println(meta.toString(2)); + + //se2.setContentEncoding("UTF-8"); + se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json")); + + HttpHost proxy = new HttpHost("127.0.0.1", 8888, "http"); + + RequestConfig config = RequestConfig.custom() + .setProxy(proxy) + .build(); + HttpPost metadataPost = new HttpPost(uri); + + metadataPost.setHeader("X-Requested-With", "XMLHttpRequest"); + metadataPost.setHeader("Origin", "http://localhost:9000"); + + metadataPost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"); + metadataPost.setHeader("Referer", "http://localhost:9000/datasets/583314110d15772a7e37cd90"); + metadataPost.setHeader("DNT", "1"); + metadataPost.setHeader("Cache-Control", "no-cache"); + + metadataPost.setConfig(config); + + metadataPost.setEntity(se2); + + CloseableHttpResponse mdResponse = httpclient.execute(metadataPost, + localContext); + + resEntity = mdResponse.getEntity(); + if (mdResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing key=" + + singleContent.keys().next() + " : " + + mdResponse.getStatusLine().getReasonPhrase()); + println("Value: " + singleContent.get(singleContent.keys().next().toString()).toString()); + println("Details: " + EntityUtils.toString(resEntity)); + throw new IOException("Non 200 response"); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + } + + } + + public static void println(String s) { + System.out.println(s); + System.out.flush(); + if (pw != null) { + pw.println(s); + pw.flush(); + } + return; + } +} diff --git a/src/main/java/org/sead/uploader/clowder/SEADUploader.java b/src/main/java/org/sead/uploader/clowder/SEADUploader.java index 8edc005..c7bbfaf 100644 --- a/src/main/java/org/sead/uploader/clowder/SEADUploader.java +++ b/src/main/java/org/sead/uploader/clowder/SEADUploader.java @@ -1,2021 +1,2020 @@ -/** ***************************************************************************** - * Copyright 2014, 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ***************************************************************************** */ -package org.sead.uploader.clowder; - -import java.io.IOException; -import java.io.InputStream; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; -import java.security.KeyManagementException; -import java.security.KeyStoreException; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Set; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.net.ssl.HostnameVerifier; -import javax.net.ssl.SSLContext; - -import org.apache.commons.codec.digest.DigestUtils; -import org.apache.http.Consts; -import org.apache.http.HttpEntity; -import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.client.protocol.HttpClientContext; -import org.apache.http.conn.ssl.SSLConnectionSocketFactory; -import org.apache.http.conn.ssl.TrustAllStrategy; -import org.apache.http.ssl.SSLContextBuilder; -import org.apache.http.entity.ContentType; -import org.apache.http.entity.StringEntity; -import org.apache.http.entity.mime.MultipartEntityBuilder; -import org.apache.http.entity.mime.content.ContentBody; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.message.BasicHeader; -import org.apache.http.protocol.HTTP; -import org.apache.http.util.EntityUtils; -import org.json.JSONArray; -import org.json.JSONObject; -import org.sead.uploader.util.PublishedFolderProxyResource; -import org.sead.uploader.util.PublishedResource; -import org.sead.uploader.util.Resource; -import org.sead.uploader.util.ResourceFactory; - -import org.sead.uploader.AbstractUploader; -import org.sead.uploader.util.UploaderException; - -/** - * The SEAD Uploader supports the upload of files/directories from a local disk, - * or existing SEAD publications for which a valid OREMap file is available from - * a URL (repositories must update the data file links in the ORE for the - * Uploader to retrieve them) - * - * In addition to sending files and creating a SEAD collection/dataset (1.5) or - * Dataset/Folder/File (2.0) structure, the Uploader adds path metadata, usable - * in detecting whether an item has already been created/uploaded. For - * publications, it also sends metadata, tags, comments, and spatial reference - * metadata, performing some mapping to clarify when metadata applies only to - * the original/published version and when the new live copy 'inherits' the - * metadata. This can be adjusted using the black and gray lists of terms and/or - * providing custom code to map metadata to SEAD 2.0 conventions. - * - */ -public class SEADUploader extends AbstractUploader { - - public static final String FRBR_EO = "http://purl.org/vocab/frbr/core#embodimentOf"; - private static final String DCTERMS_HAS_PART = "http://purl.org/dc/terms/hasPart"; - - private static boolean d2a = false; - private static String apiKey = null; - private static String knownId = null; - private static boolean checkDataset = false; - private static String sead2datasetId = null; - private static int numFoundDatasets = 0; - - private static String CLOWDER_DEFAULT_VOCAB = "https://clowder.ncsa.illinois.edu/contexts/dummy"; - - public static void main(String args[]) throws Exception { - setUploader(new SEADUploader()); - uploader.createLogFile("SEADUploaderLog_"); - uploader.setSpaceType("SEAD2"); - println("\n----------------------------------------------------------------------------------\n"); - println("SSS EEEE A DDD"); - println("S E A A D D"); - println(" SS EEE AAAAA D D"); - println(" S E A A D D"); - println("SSS EEEE A A DDD"); - - println("SEADUploader - a command-line application to upload files to any Clowder Dataset"); - println("Developed for the SEAD (https://sead-data.net) Community"); - println("\n----------------------------------------------------------------------------------\n"); - println("\n***Parsing arguments:***\n"); - uploader.parseArgs(args); - - if (server == null || requests.isEmpty()) { - println("\n***Required arguments not found.***"); - usage(); - } else if(checkDataset) { - ((SEADUploader)uploader).checkTheDataset(); - - } else { - println("\n***Starting to Process Upload Requests:***\n"); - uploader.processRequests(); - } - println("\n***Execution Complete.***"); - } - - private static void usage() { - println("\nUsage:"); - println(" java -cp .;sead2.1.jar org.sead.uploader.clowder.SEADUploader -server= "); - - println("\n where:"); - println(" = the URL of the server to upload to, e.g. https://sead2.ncsa.illinois.edu"); - println(" = a space separated list of directory name(s) to upload as Dataset(s) containing the folders and files within them"); - println("\n Optional Arguments:"); - println(" -key= - your personal apikey, created in the server at "); - println(" - using an apiKey avoids having to enter your username/password and having to reauthenticate for long upload runs"); - println(" -id= - if you know a dataset exists, specifying it's id will improve performance"); - println(" -listonly - Scan the Dataset and local files and list what would be uploaded (does not upload with this flag)"); - println(" -limit= - Specify a maximum number of files to upload per invocation."); - println(" -verify - Check both the file name and checksum in comparing with current Dataset entries."); - println(" -skip= - a number of files to skip before starting processing (saves time when you know the first n files have been uploaded before)"); - println(" -forcenew - A new dataset will be created for this upload, even if a matching one is found."); - println(" -importRO - uploads from a zipped BagIt file rather than from disk"); - println(""); - - } - - private void checkTheDataset() { - if (knownId == null) { - println("CheckId only works with knownId - exiting"); - System.exit(0); - } - int goodFiles=0; - String dPath = null; - CloseableHttpResponse response = null; - String serviceUrl = ""; - try { - CloseableHttpClient httpclient = getSharedHttpClient(); - - serviceUrl = server + "/api/datasets/" + knownId - + "/metadata.jsonld"; - println(serviceUrl); - - HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - response = httpclient.execute(httpget, - getLocalContext()); - try { - if (response.getStatusLine() - .getStatusCode() == 200) { - HttpEntity resEntity = response - .getEntity(); - if (resEntity != null) { - JSONArray mdList = new JSONArray( - EntityUtils - .toString(resEntity)); - - for (int j = 0; j < mdList.length(); j++) { - if (mdList - .getJSONObject(j) - .getJSONObject( - "content") - .has("Upload Path")) { - dPath = mdList - .getJSONObject( - j) - .getJSONObject( - "content") - .getString( - "Upload Path") - .trim(); - existingDatasets - .put(dPath, knownId); - break; - } - } - } - } else { - println("Error response when getting metadata for dataset: " - + knownId - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - try { - response.close(); - } catch (IOException ex) { - Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); - } - } - if (dPath == null) { - println("Dataset: " + knownId + " does not have an Upload Path"); - System.exit(0); - } - - - existingFiles = new HashMap(); - existingFolders = new HashMap(); - try { - try { - serviceUrl = server + "/api/datasets/" - + knownId + "/folders"; - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - response = httpclient.execute( - httpget, getLocalContext()); - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - JSONArray folders = new JSONArray( - EntityUtils.toString(resEntity)); - for (int i = 0; i < folders.length(); i++) { - existingFolders.put(folders.getJSONObject(i) - .getString("name"), folders.getJSONObject(i) - .getString("id")); - } - } - } else { - println("Error response when checking for folders" - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - } catch (IOException e) { - println("Error processing folders: " + e.getMessage()); - } finally { - folderMDRetrieved = true; - } - - try { - serviceUrl = server + "/api/datasets/" - + knownId + "/listAllFiles"; - - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - - response = httpclient.execute( - httpget, getLocalContext()); - JSONArray fileList = null; - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - fileList = new JSONArray( - EntityUtils.toString(resEntity)); - } - } else { - println("Error response when checking files" - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - if (fileList != null) { - for (int i = 0; i < fileList.length(); i++) { - String id = fileList.getJSONObject(i) - .getString("id"); - serviceUrl = server + "/api/files/" + id - + "/metadata.jsonld"; - - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - - response = httpclient.execute(httpget, - getLocalContext()); - - try { - if (response.getStatusLine() - .getStatusCode() == 200) { - HttpEntity resEntity = response - .getEntity(); - if (resEntity != null) { - JSONArray mdList = new JSONArray( - EntityUtils - .toString(resEntity)); - boolean hasPath = false; - for (int j = 0; j < mdList.length(); j++) { - if (mdList - .getJSONObject(j) - .getJSONObject( - "content") - .has("Upload Path")) { - String path = mdList - .getJSONObject( - j) - .getJSONObject( - "content") - .getString( - "Upload Path") - .trim(); - if (existingFiles.containsKey(path)) { - println(id + " duplicates " + existingFiles.get(path)); - } else { - String folderPath = path.substring(1,path.lastIndexOf("/")); - if(folderPath.indexOf("/")>=0) { - folderPath = folderPath.substring(folderPath.indexOf("/")); - } else { - folderPath=""; - } - if (folderPath.length()>0 && !existingFolders.containsKey(folderPath)) { - println("Folder for " + path + " not found"); - } else { - - existingFiles - .put(path, id); - goodFiles++; - if (goodFiles % 10 == 0) { - System.out.print("."); - } - if (goodFiles % 1000 == 0) { - System.out.print("Processed " + goodFiles + " good files."); - } - - } - } - hasPath = true; - break; - } - } - if (!hasPath) { - println("File with no path: " + id); - } - } - - } else { - println("Error response when getting metadata for file: " - + id - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting. Please contact SEAD about the error."); - System.exit(1); - } - } finally { - response.close(); - } - } - - } - } finally { - response.close(); - httpclient.close(); - } - } catch (IOException io) { - println("Error doing " + serviceUrl + " : " + io.getMessage()); - } - println("Analysis complete with " + goodFiles + " good files."); - } - - public boolean parseCustomArg(String arg) { - if (arg.equalsIgnoreCase("-d2a")) { - d2a = true; - println("Description to Abstract translation on"); - return true; - } else if (arg.startsWith("-key")) { - apiKey = arg.substring(arg.indexOf(argSeparator) + 1); - println("Using apiKey: " + apiKey); - return true; - } else if (arg.startsWith("-id")) { - knownId = arg.substring(arg.indexOf(argSeparator) + 1); - println("Updating Dataset with id: " + knownId); - return true; - } else if(arg.startsWith("-checkDataset")) { - checkDataset=true; - println("Only checking Dataset"); - return true; - } - return false; - } - - CloseableHttpClient httpClient = null; - - public CloseableHttpClient getSharedHttpClient() { - if (httpClient == null) { - // use the TrustSelfSignedStrategy to allow Self Signed Certificates - SSLContext sslContext; - try { - sslContext = SSLContextBuilder - .create() - .loadTrustMaterial(new TrustAllStrategy()) - .build(); - - // create an SSL Socket Factory to use the SSLContext with the trust self signed certificate strategy - // and allow all hosts verifier. - SSLConnectionSocketFactory connectionFactory = new SSLConnectionSocketFactory(sslContext); - - // finally create the HttpClient using HttpClient factory methods and assign the ssl socket factory - httpClient = HttpClients - .custom() - .setSSLSocketFactory(connectionFactory) - .build(); - } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { - Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); - } - } - return httpClient; - } - - @Override - public void processRequests() { - println("Contacting server..."); - getSharedHttpClient(); - super.processRequests(); - println("Closing server connection..."); - - try { - getSharedHttpClient().close(); - } catch (IOException ex) { - Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); - } - } - - @Override - public HttpClientContext authenticate() { - if (apiKey != null) { - //Don't need to update context since we have the apikey to use - if (getLocalContext() == null) { - return (new HttpClientContext()); - } else { - return getLocalContext(); - } - } - return SEADAuthenticator - .UPAuthenticate(server); - } - - private void moveFileToFolder(String newUri, String parentId, - Resource file) { - CloseableHttpClient httpclient = getSharedHttpClient(); - try { - HttpPost httppost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" - + sead2datasetId + "/moveFile/" + parentId + "/" + newUri)); - - StringEntity se = new StringEntity("{}"); - se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json")); - httppost.setEntity(se); - - CloseableHttpResponse response = httpclient.execute(httppost, - getLocalContext()); - HttpEntity resEntity = null; - try { - if (response.getStatusLine().getStatusCode() == 200) { - EntityUtils.consume(response.getEntity()); - } else { - println("Error response when processing " - + file.getAbsolutePath() + " : " - + response.getStatusLine().getReasonPhrase()); - println("Details: " - + EntityUtils.toString(response.getEntity())); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - response.close(); - } - - // FixMe Add tags - /* - * if (tagValues != null) { addTags(httpclient, dir, collectionId, - * tagValues); } - */ - } catch (IOException e) { - println("Error processing " + file.getAbsolutePath() + " : " - + e.getMessage()); - } - } - - private String create2Folder(String parentId, String sead2datasetId, - String path, Resource dir) { - String collectionId = null; - CloseableHttpClient httpclient = getSharedHttpClient(); - try { - String postUri = server + "/api/datasets/" - + sead2datasetId + "/newFolder"; - if (apiKey != null) { - postUri = postUri + "?key=" + apiKey; - } - HttpPost httppost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" - + sead2datasetId + "/newFolder")); - - JSONObject jo = new JSONObject(); - String title = dir.getName().trim(); - // For publishedResource, we want to use the Title - if (dir instanceof PublishedResource) { - title = ((PublishedResource) dir).getAndRemoveTitle().trim(); - } - jo.put("name", title); - jo.put("parentId", parentId); - jo.put("parentType", ((parentId == sead2datasetId) ? "dataset" - : "folder")); - - StringEntity se = new StringEntity(jo.toString(), "UTF-8"); - se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - httppost.setEntity(se); - - CloseableHttpResponse response = httpclient.execute(httppost, - getLocalContext()); - HttpEntity resEntity = null; - try { - if (response.getStatusLine().getStatusCode() == 200) { - EntityUtils.consume(response.getEntity()); - // Now query to get the new folder's id - // path should be of the form - // "//[/]" for - // file uploads and - // "/data//[/]" - // for imported ROs - // and we need to strip to get only the folder path part - String folderPath = path; - if (importRO) { - folderPath = folderPath.substring(folderPath.substring( - 1).indexOf("/") + 1); - folderPath = folderPath.substring(folderPath.substring( - 1).indexOf("/") + 1); - } - folderPath = folderPath.substring(folderPath.substring(1) - .indexOf("/") + 1); - - HttpGet httpget = new HttpGet(appendKeyIfUsed(server + "/api/datasets/" - + sead2datasetId + "/folders")); - - CloseableHttpResponse getResponse = httpclient.execute( - httpget, getLocalContext()); - try { - if (getResponse.getStatusLine().getStatusCode() == 200) { - JSONArray folders = new JSONArray( - EntityUtils.toString(getResponse - .getEntity())); - for (int i = 0; i < folders.length(); i++) { - if (folders.getJSONObject(i).getString("name") - .equals(folderPath)) { - collectionId = folders.getJSONObject(i) - .getString("id"); - break; - } - } - } else { - println("Error response when processing " - + dir.getAbsolutePath() - + " : " - + getResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(getResponse - .getEntity())); - } - } finally { - EntityUtils.consumeQuietly(getResponse.getEntity()); - getResponse.close(); - } - } else { - println("Error response when processing " - + dir.getAbsolutePath() + " : " - + response.getStatusLine().getReasonPhrase()); - println("Details: " - + EntityUtils.toString(response.getEntity())); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - response.close(); - } - - // Add metadata for imported folders - // FixMe - Add Metadata to folder directly - // Assume we only write a metadata file if collection is newly - // created and we're importing - if (importRO && collectionId != null) { - Resource mdFile = new PublishedFolderProxyResource( - (PublishedResource) dir, collectionId); - String mdId = null; - try { - mdId = uploadDatafile(mdFile, path + "/" - + mdFile.getName()); - } catch (UploaderException ue) { - println(ue.getMessage()); - } - // By default, we are in a folder and need to move the file - // (sead2datasetId != collectionId)) - if (mdId != null) { // and it was just - // created - moveFileToFolder(mdId, collectionId, mdFile); - roFolderProxy.put(collectionId, mdId); - } else { - println("Unable to write metadata file for folder: " - + collectionId); - } - } - } catch (IOException e) { - println("Error processing " + dir.getAbsolutePath() + " : " - + e.getMessage()); - } - return collectionId; - - } - - private String create2Dataset(Resource dir, String path) { - String datasetId = null; - CloseableHttpClient httpclient = getSharedHttpClient(); - try { - - HttpPost httppost = new HttpPost(appendKeyIfUsed(server - + "/api/datasets/createempty")); - JSONObject jo = new JSONObject(); - String title = dir.getName().trim(); - // For publishedResource, we want to use the Title - if (dir instanceof PublishedResource) { - title = ((PublishedResource) dir).getAndRemoveTitle().trim(); - } - jo.put("name", title); - if (importRO) { - String abs = ((PublishedResource) dir) - .getAndRemoveAbstract(d2a); - if (abs != null) { - jo.put("description", abs); - } - } - - StringEntity se = new StringEntity(jo.toString(), "UTF-8"); - se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - httppost.setEntity(se); - - CloseableHttpResponse response = httpclient.execute(httppost, - getLocalContext()); - HttpEntity resEntity = null; - try { - resEntity = response.getEntity(); - if (response.getStatusLine().getStatusCode() == 200) { - if (resEntity != null) { - datasetId = new JSONObject( - EntityUtils.toString(resEntity)) - .getString("id"); - } - } else { - println("Error response when processing " - + dir.getAbsolutePath() + " : " - + response.getStatusLine().getReasonPhrase()); - println("Details: " + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - response.close(); - } - if (datasetId != null) { - - // Add Metadata - JSONObject content = new JSONObject(); - JSONObject context = new JSONObject(); - JSONObject agent = new JSONObject(); - List creators = new ArrayList(); - content.put("Upload Path", path); - List comments = new ArrayList(); - // Should be true for all PublishedResources, never for files... - if (dir instanceof PublishedResource) { - ((PublishedResource) dir).getAndRemoveCreator(creators); - } - - String creatorPostUri = server + "/api/datasets/" + datasetId - + "/creator"; - for (String creator : creators) { - postDatasetCreator(creator, creatorPostUri, httpclient); - } - - String tagValues = add2ResourceMetadata(content, context, - agent, comments, path, dir); - - postMetadata(httpclient, server + "/api/datasets/" + datasetId - + "/metadata.jsonld", dir.getAbsolutePath(), content, - context, agent); - if (creators != null) { - - } - // FixMe Add tags - if (tagValues != null) { - HttpPost tagPost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" - + datasetId + "/tags")); - JSONObject tags = new JSONObject(); - - String[] tagArray = tagValues.split(","); - JSONArray tagList = new JSONArray(tagArray); - tags.put("tags", tagList); - - StringEntity se3 = new StringEntity(tags.toString(), - "UTF-8"); - se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - tagPost.setEntity(se3); - - CloseableHttpResponse tagResponse = httpclient.execute( - tagPost, getLocalContext()); - resEntity = null; - try { - resEntity = tagResponse.getEntity(); - if (tagResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + dir.getAbsolutePath() - + " : " - + tagResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - tagResponse.close(); - } - } - if (comments.size() > 0) { - Collections.sort(comments); - for (String text : comments.toArray(new String[comments - .size()])) { - HttpPost commentPost = new HttpPost(appendKeyIfUsed(server - + "/api/datasets/" + datasetId + "/comment")); - - JSONObject comment = new JSONObject(); - comment.put("text", text); - - StringEntity se3 = new StringEntity(comment.toString(), - "UTF-8"); - se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - commentPost.setEntity(se3); - - CloseableHttpResponse commentResponse = httpclient - .execute(commentPost, getLocalContext()); - resEntity = null; - try { - resEntity = commentResponse.getEntity(); - if (commentResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + dir.getAbsolutePath() - + " : " - + commentResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - commentResponse.close(); - } - } - } - } - } catch (IOException e) { - println("Error processing " + dir.getAbsolutePath() + " : " - + e.getMessage()); - } - return datasetId; - } - - @SuppressWarnings("unchecked") - private String add2ResourceMetadata(JSONObject content, - JSONObject context, JSONObject agent, List comments, - String path, Resource item) { - Object tags = null; - - JSONObject metadata = item.getMetadata(); // Empty for file resources - if (metadata.has("Metadata on Original")) { - JSONObject original = metadata - .getJSONObject("Metadata on Original"); - // Gray list metadata should be used (and removed) from - // this - // field or passed in as is - - if (original.has("Keyword")) { - tags = original.get("Keyword"); - original.remove("Keyword"); - } - if (original.has("GeoPoint")) { - Object gpObject = original.get("GeoPoint"); - if (gpObject instanceof JSONArray) { - // An error so we'll just capture as a string - metadata.put("Geolocation", - "This entry had multiple Lat/Long from SEAD 1.5 GeoPoints: " - + ((JSONArray) gpObject).toString(2)); - } else { - JSONObject point = original.getJSONObject("GeoPoint"); - metadata.put("Geolocation", - "Lat/Long from SEAD 1.5 GeoPoint"); - metadata.put("Latitude", point.getString("lat")); - metadata.put("Longitude", point.getString("long")); - original.remove("GeoPoint"); - } - - } - - if (original.has("Comment")) { - Object comObject = original.get("Comment"); - if (comObject instanceof JSONArray) { - for (int i = 0; i < ((JSONArray) comObject).length(); i++) { - JSONObject comment = ((JSONArray) comObject) - .getJSONObject(i); - comments.add(getComment(comment)); - } - } else { - comments.add(getComment(((JSONObject) comObject))); - } - original.remove("Comment"); - } - } - // Convert all vals to Strings - for (String key : (Set) metadata.keySet()) { - String newKey = key; - if (ResourceFactory.graySwaps.containsKey(key)) { - newKey = ResourceFactory.graySwaps.get(key); - } - if (metadata.get(key) instanceof JSONArray) { - // split values and handle them separately - JSONArray valArray = (JSONArray) metadata.get(key); - JSONArray newVals = new JSONArray(); - for (int i = 0; i < valArray.length(); i++) { - String val = valArray.get(i).toString(); - newVals.put(val); - } - content.put(newKey, newVals); - } else { - content.put(newKey, metadata.get(key).toString()); - } - } - // create tag(s) string - String tagValues = null; - if (tags != null) { - if (tags instanceof JSONArray) { - tagValues = ""; - JSONArray valArray = (JSONArray) tags; - for (int i = 0; i < valArray.length(); i++) { - tagValues = tagValues + valArray.get(i).toString(); - if (valArray.length() > 1 && i != valArray.length() - 1) { - tagValues = tagValues + ","; - } - } - } else { - tagValues = ((String) tags); - } - } - content.put("Upload Path", path); - - // Flatten context for 2.0 - context.put("@vocab", CLOWDER_DEFAULT_VOCAB); - for (String key : ((Set) content.keySet())) { - if (rf != null) { // importRO == true - String pred = rf.getURIForContextEntry(key); - if (pred != null) { - context.put(key, pred); - } - } else { - if (key.equals("Upload Path")) { - context.put(key, SEADUploader.FRBR_EO); - } else { // shouldn't happen - println("Unrecognized Metadata Entry: " + key); - } - } - } - JSONObject me = get2me(); - agent.put("name", me.getString("fullName")); - agent.put("@type", "cat:user"); - agent.put("user_id", server + "/api/users/" + me.getString("id")); - - return tagValues; - } - - private String getComment(JSONObject comment) { - StringBuilder sb = new StringBuilder(); - sb.append("Imported Comment: "); - sb.append(comment.getString("comment_date")); - sb.append(", Author: "); - String comAuth = comment.getString("comment_author"); - sb.append(comAuth.substring(comAuth.lastIndexOf("/") + 1)); - sb.append(": "); - sb.append(comment.getString("comment_body")); - return sb.toString(); - } - - JSONObject me = null; - - private JSONObject get2me() { - CloseableHttpClient httpclient = getSharedHttpClient(); - if (me == null) { - try { - String serviceUrl = server + "/api/me"; - - HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - CloseableHttpResponse response = httpclient.execute(httpget, - getLocalContext()); - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - me = new JSONObject(EntityUtils.toString(resEntity)); - } - } else { - println("Error response when retrieving user details: " - + response.getStatusLine().getReasonPhrase()); - - } - } finally { - response.close(); - } - } catch (IOException e) { - println("Error processing get user request: " + e.getMessage()); - } - // me.put("fullName", "SEAD 1.5 Importer"); - } - return me; - } - - @Override - protected String uploadDatafile(Resource file, String path) throws UploaderException { - if (sead2datasetId == null) { - throw new UploaderException("SEAD2 does not support upload of individual files that are not in a dataset."); - } - CloseableHttpClient httpclient = getSharedHttpClient(); - String dataId = null; - try { - // FixMe: requires update to 2.0 ... To support long uploads, - // request a key to allow the - // upload to complete even if the session has timed out - - // Now post data - String urlString = server + "/api/uploadToDataset/" - + sead2datasetId; - HttpPost httppost = new HttpPost(appendKeyIfUsed(urlString)); - ContentBody bin = file.getContentBody(); - MultipartEntityBuilder meb = MultipartEntityBuilder.create(); - meb.addPart("files[]", bin); - - // FixMe - // addLiteralMetadata(meb, FRBR_EO, path); - // FixMe - // String tagValues = addResourceMetadata(meb, file); - HttpEntity reqEntity = meb.build(); - httppost.setEntity(reqEntity); - - CloseableHttpResponse response = httpclient.execute(httppost, - getLocalContext()); - HttpEntity resEntity = response.getEntity(); - - try { - if (response.getStatusLine().getStatusCode() == 200) { - if (resEntity != null) { - dataId = new JSONObject(EntityUtils.toString(resEntity)) - .getString("id"); - } - } else { - println("Error response when processing " - + file.getAbsolutePath() + " : " - + response.getStatusLine().getReasonPhrase()); - println("Details: " + EntityUtils.toString(resEntity)); - } - - } catch (Exception e) { - println("Error uploading file: " + file.getName()); - e.printStackTrace(); - } finally { - EntityUtils.consumeQuietly(response.getEntity()); - response.close(); - } - if (dataId != null) { - - // FixMe - add Metadata - /* - * addLiteralMetadata(meb, FRBR_EO, path); - * - * // Add metadata for published resources - * - * String tagValues = addResourceMetadata(meb, dir); HttpEntity - * reqEntity = meb.build(); - */ - JSONObject content = new JSONObject(); - List comments = new ArrayList(); - JSONObject context = new JSONObject(); - JSONObject agent = new JSONObject(); - - String abs = null; - String title = null; - if (file instanceof PublishedResource) { - abs = ((PublishedResource) file).getAndRemoveAbstract(d2a); - - title = ((PublishedResource) file).getAndRemoveTitle(); - if ((title != null) && (title.equals(file.getName()))) { - title = null; - } - } - String tagValues = add2ResourceMetadata(content, context, - agent, comments, path, file); - - postMetadata(httpclient, server + "/api/files/" + dataId - + "/metadata.jsonld", file.getAbsolutePath(), content, - context, agent); - - if (abs != null) { - HttpPut descPut = new HttpPut(appendKeyIfUsed(server + "/api/files/" - + dataId + "/updateDescription")); - JSONObject desc = new JSONObject(); - - desc.put("description", abs); - - StringEntity descSE = new StringEntity(desc.toString(), - "UTF-8"); - descSE.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - descPut.setEntity(descSE); - - CloseableHttpResponse descResponse = httpclient.execute( - descPut, getLocalContext()); - resEntity = null; - try { - resEntity = descResponse.getEntity(); - if (descResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + file.getAbsolutePath() - + " : " - + descResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - descResponse.close(); - } - } - // We need a valid filename (from "Label"/getName() to do - // the - // upload, but, if the user - // has changed the "Title", we need to then update the - // displayed - // filename - // For folders, this will currently always be null - // (since Title is used for the name in PublishedResource - // for directories) and therefore we won't change the name - // of the readme file - // as set in the Proxy class. - if (title != null) { - HttpPut namePut = new HttpPut(appendKeyIfUsed(server + "/api/files/" - + dataId + "/filename")); - JSONObject name = new JSONObject(); - - name.put("name", title); - - StringEntity nameSE = new StringEntity(name.toString(), - "UTF-8"); - nameSE.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - namePut.setEntity(nameSE); - - CloseableHttpResponse nameResponse = httpclient.execute( - namePut, getLocalContext()); - resEntity = null; - try { - resEntity = nameResponse.getEntity(); - if (nameResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + file.getAbsolutePath() - + " : " - + nameResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } else { - println("Dataset name successfully changed from : " - + file.getName() + " to " + title); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - nameResponse.close(); - } - } - - // FixMe Add tags - if (tagValues != null) { - HttpPost tagPost = new HttpPost(appendKeyIfUsed(server + "/api/files/" - + dataId + "/tags")); - JSONObject tags = new JSONObject(); - - String[] tagArray = tagValues.split(","); - JSONArray tagList = new JSONArray(tagArray); - tags.put("tags", tagList); - - StringEntity se3 = new StringEntity(tags.toString(), - "UTF-8"); - se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - tagPost.setEntity(se3); - - CloseableHttpResponse tagResponse = httpclient.execute( - tagPost, getLocalContext()); - resEntity = null; - try { - resEntity = tagResponse.getEntity(); - if (tagResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + file.getAbsolutePath() - + " : " - + tagResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - tagResponse.close(); - } - - } - if (comments.size() > 0) { - Collections.sort(comments); - for (String text : comments.toArray(new String[comments - .size()])) { - HttpPost commentPost = new HttpPost(appendKeyIfUsed(server - + "/api/files/" + dataId + "/comment")); - - JSONObject comment = new JSONObject(); - comment.put("text", text); - - StringEntity se4 = new StringEntity(comment.toString(), - "UTF-8"); - se4.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - commentPost.setEntity(se4); - - CloseableHttpResponse commentResponse = httpclient - .execute(commentPost, getLocalContext()); - resEntity = null; - try { - resEntity = commentResponse.getEntity(); - if (commentResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing " - + file.getAbsolutePath() - + " : " - + commentResponse.getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils.toString(resEntity)); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - commentResponse.close(); - } - } - } - - } - } catch (IOException e) { - println("Error processing " + file.getAbsolutePath() + " : " - + e.getMessage()); - } - return dataId; - } - - @SuppressWarnings("unchecked") - private void postMetadata(CloseableHttpClient httpclient, - String uri, String path, JSONObject content, JSONObject context, - JSONObject agent) { - Set keys = new HashSet(); - keys.addAll(((Set) content.keySet())); - if (keys.contains("Geolocation")) { - keys.remove("Latitude"); - keys.remove("Longitude"); - } - - for (String key : keys) { - try { - String safeKey = key.replace(".", "_").replace("$", "_") - .replace("/", "_"); // Clowder/MongoDB don't allow keys - // with .$/ chars - - JSONObject singleContent = new JSONObject().put(safeKey, - content.get(key)); - JSONObject singleContext = new JSONObject().put(safeKey, - context.get(key)); - // Geolocation stays together with lat and long to mirror - // how the Clowder GUI works - if (key.equals("Geolocation")) { - if (content.has("Latitude")) { - singleContent.put("Latitude", content.get("Latitude")); - singleContext.put("Latitude", context.get("Latitude")); - } - if (content.has("Longitude")) { - singleContent - .put("Longitude", content.get("Longitude")); - singleContext - .put("Longitude", context.get("Longitude")); - } - } - // Clowder expects flat "Creator"s - might as well flatten all - // values... - if (singleContent.get(safeKey) instanceof JSONArray) { - for (int i = 0; i < ((JSONArray) singleContent - .getJSONArray(key)).length(); i++) { - JSONObject flatContent = new JSONObject(); - flatContent.put(key, ((JSONArray) singleContent - .getJSONArray(key)).get(i).toString()); - postSingleMetadata(flatContent, singleContext, agent, - uri, httpclient); - } - } else { - postSingleMetadata(singleContent, singleContext, agent, - uri, httpclient); - } - - } catch (IOException e) { - println("Error processing " + path + " : " + e.getMessage()); - break; - } - } - - } - - private void postSingleMetadata(JSONObject singleContent, - JSONObject singleContext, JSONObject agent, String uri, - CloseableHttpClient httpclient) throws IOException { - HttpEntity resEntity = null; - try { - singleContext.put("@vocab", CLOWDER_DEFAULT_VOCAB); - JSONObject meta = new JSONObject(); - meta.put("content", singleContent); - meta.put("@context", singleContext); - meta.put("agent", agent); - - StringEntity se2 = new StringEntity(meta.toString(), "UTF-8"); - - se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - - HttpPost metadataPost = new HttpPost(appendKeyIfUsed(uri)); - - metadataPost.setEntity(se2); - - CloseableHttpResponse mdResponse = httpclient.execute(metadataPost, - getLocalContext()); - - resEntity = mdResponse.getEntity(); - if (mdResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when processing key=" - + singleContent.keys().next() + " : " - + mdResponse.getStatusLine().getReasonPhrase()); - println("Value: " - + singleContent.get( - singleContent.keys().next().toString()) - .toString()); - println("Details: " + EntityUtils.toString(resEntity)); - throw new IOException("Non 200 response"); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - } - - } - - private void postDatasetCreator(String creator, String uri, - CloseableHttpClient httpclient) throws IOException { - HttpEntity resEntity = null; - try { - JSONObject body = new JSONObject(); - body.put("creator", creator); - - StringEntity se2 = new StringEntity(body.toString(), "UTF-8"); - se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, - "application/json; charset=utf-8")); - - HttpPost creatorPost = new HttpPost(appendKeyIfUsed(uri)); - - creatorPost.setEntity(se2); - - CloseableHttpResponse creatorResponse = httpclient.execute( - creatorPost, getLocalContext()); - - resEntity = creatorResponse.getEntity(); - if (creatorResponse.getStatusLine().getStatusCode() != 200) { - println("Error response when sending creator: " + creator - + " : " - + creatorResponse.getStatusLine().getReasonPhrase()); - println("Details: " + EntityUtils.toString(resEntity)); - throw new IOException("Non 200 response"); - } - } finally { - EntityUtils.consumeQuietly(resEntity); - } - } - - protected String findGeneralizationOf(String id) { - return id; - } - - HashMap existingDatasets = new HashMap(); - HashMap existingFolders = new HashMap(); - HashMap existingFiles = new HashMap(); - - boolean fileMDRetrieved = false; - boolean folderMDRetrieved = false; - - public String itemExists(String path, Resource item) { - String tagId = null; - - String relPath = path; - if (importRO) { - // remove the '//data' prefix on imported paths to make - // it match the file upload paths - relPath = relPath - .substring(relPath.substring(1).indexOf("/") + 1); - relPath = relPath - .substring(relPath.substring(1).indexOf("/") + 1); - } - if (relPath.equals("/")) { - println("Searching for existing dataset. If this takes a long time, consider using:"); - println(" -id= if you know the dataset exists, or"); - println(" -forcenew if you know the dataset does not yet exist."); - - // It's a dataset - CloseableHttpClient httpclient = getSharedHttpClient(); - String sourcepath = path + item.getName(); - //If we haven't yet found this dataset because we haven't looked yet, or we looked and haven't yet found this dataset and there are still more to scan... - if (!existingDatasets.containsKey(sourcepath) && (numFoundDatasets == 0 || existingDatasets.size() < numFoundDatasets)) { - try { - // Only returns first 12 by default - String serviceUrl = server + "/api/datasets?limit=0"; - JSONArray datasetList = null; - HttpGet httpget = null; - CloseableHttpResponse response = null; - if (knownId == null) { - //Get the whole list of datasets to scan through - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - response = httpclient.execute( - httpget, getLocalContext()); - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - datasetList = new JSONArray( - EntityUtils.toString(resEntity)); - println("Scanning " + datasetList.length() + " datasets for a match..."); - numFoundDatasets = datasetList.length(); - } - } else { - println("Error response when checking for existing item at " - + sourcepath - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting to prevent duplicates. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - } else { - //Add the one knownId to the dataset list - //Note: the datasets in the list returned by Clowder also have a "name" entry, but we don't use this. - datasetList = new JSONArray(); - JSONObject dataset = new JSONObject(); - dataset.put("id", knownId); - datasetList.put(dataset); - } - if (datasetList != null) { - for (int i = 0; i < datasetList.length(); i++) { - String id = datasetList.getJSONObject(i) - .getString("id"); - String dPath = null; - serviceUrl = server + "/api/datasets/" + id - + "/metadata.jsonld"; - println(serviceUrl); - - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - if (i % 10 == 0) { - //Give some indication of progress - System.out.print("."); - } - response = httpclient.execute(httpget, - getLocalContext()); - try { - if (response.getStatusLine() - .getStatusCode() == 200) { - HttpEntity resEntity = response - .getEntity(); - if (resEntity != null) { - JSONArray mdList = new JSONArray( - EntityUtils - .toString(resEntity)); - - for (int j = 0; j < mdList.length(); j++) { - if (mdList - .getJSONObject(j) - .getJSONObject( - "content") - .has("Upload Path")) { - dPath = mdList - .getJSONObject( - j) - .getJSONObject( - "content") - .getString( - "Upload Path") - .trim(); - existingDatasets - .put(dPath, id); - break; - } - } - } - } else { - println("Error response when getting metadata for dataset: " - + id - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting to prevent duplicates. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - if (dPath != null && sourcepath.equals(dPath)) { - //Only scan until we find the right dataset - break; - } - } - } - } catch (IOException e) { - println("Error processing check on " + sourcepath - + " : " + e.getMessage()); - } - } - if (existingDatasets.containsKey(sourcepath)) { - //If we're looking for a dataset and found it, it's because we've started on a new dataset and need to get new folder/file info - tagId = existingDatasets.get(sourcepath); - sead2datasetId = tagId; - folderMDRetrieved = false; - fileMDRetrieved = false; - existingFiles = new HashMap(); - existingFolders = new HashMap(); - } else { - if (knownId != null) { - //We should have found something - don't continue and create a new dataset - println("Dataset with id=" + knownId + "and path: " + sourcepath + " not found."); - println("Rerun without the -id flag to scan the entire repository or use -forcenew to force creation of a new dataset."); - System.exit(1); - } - } - - } else if (item.isDirectory()) { - /* - * /We're looking for a folder Since folders in 2 have no - * metadata and can't be moved, we will assume for now that if - * the dataset exists and the folder's relative path in the - * dataset matches, we've found the folder. - */ - String sourcepath = relPath + item.getName().trim(); - sourcepath = sourcepath.substring(sourcepath.substring(1) - .indexOf("/") + 1); - if (sead2datasetId != null && !folderMDRetrieved) { // Can't be in a dataset if it - // wasn't found/created already - CloseableHttpClient httpclient = getSharedHttpClient(); - try { - String serviceUrl = server + "/api/datasets/" - + sead2datasetId + "/folders"; - HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - CloseableHttpResponse response = httpclient.execute( - httpget, getLocalContext()); - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - JSONArray folders = new JSONArray( - EntityUtils.toString(resEntity)); - for (int i = 0; i < folders.length(); i++) { - existingFolders.put(folders.getJSONObject(i) - .getString("name"), folders.getJSONObject(i) - .getString("id")); - } - } - } else { - println("Error response when checking for existing item at " - + sourcepath - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting to prevent duplicates. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - } catch (IOException e) { - println("Error processing check on " + sourcepath - + " : " + e.getMessage()); - } finally { - folderMDRetrieved = true; - } - } - if (existingFolders.containsKey(sourcepath)) { - tagId = existingFolders.get(sourcepath); - - } - } else { - // A file - String sourcepath = path + item.getName().trim(); - - if (sead2datasetId != null && !fileMDRetrieved) { - // One-time retrieval of all file id/Upload Path info - - CloseableHttpClient httpclient = getSharedHttpClient(); - try { - String serviceUrl = server + "/api/datasets/" - + sead2datasetId + "/listAllFiles"; - - HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - - CloseableHttpResponse response = httpclient.execute( - httpget, getLocalContext()); - JSONArray fileList = null; - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - fileList = new JSONArray( - EntityUtils.toString(resEntity)); - } - } else { - println("Error response when checking for existing item at " - + sourcepath - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting to prevent duplicates. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - if (fileList != null) { - for (int i = 0; i < fileList.length(); i++) { - String id = fileList.getJSONObject(i) - .getString("id"); - serviceUrl = server + "/api/files/" + id - + "/metadata.jsonld"; - - httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - - response = httpclient.execute(httpget, - getLocalContext()); - - try { - if (response.getStatusLine() - .getStatusCode() == 200) { - HttpEntity resEntity = response - .getEntity(); - if (resEntity != null) { - JSONArray mdList = new JSONArray( - EntityUtils - .toString(resEntity)); - for (int j = 0; j < mdList.length(); j++) { - if (mdList - .getJSONObject(j) - .getJSONObject( - "content") - .has("Upload Path")) { - - existingFiles - .put(mdList - .getJSONObject( - j) - .getJSONObject( - "content") - .getString( - "Upload Path") - .trim(), id); - break; - } - } - } - } else { - println("Error response when getting metadata for file: " - + id - + " : " - + response.getStatusLine() - .getReasonPhrase()); - println("Exiting to prevent duplicates. Please contact SEAD about the error."); - System.exit(1); - - } - } finally { - response.close(); - } - } - - } - - } catch (IOException e) { - println("Error processing check on " + sourcepath - + " : " + e.getMessage()); - } finally { - fileMDRetrieved = true; - } - } - if (existingFiles.containsKey(sourcepath)) { - tagId = existingFiles.get(sourcepath); - } - } - - if (verify && (tagId != null) && (!item.isDirectory())) { - tagId = verifyDataByHash(tagId, path, item); - } - return (tagId); - } - - HashMap hashIssues = new HashMap(); - - protected String verifyDataByHash(String tagId, String path, - Resource item) { - - String serviceUrl; - CloseableHttpClient httpclient = getSharedHttpClient(); - - try { - // Work-around - our sead2 servers have issues with incorrect or - // missing hash values - // So implementing a direct download and compute option for now. - // Can be added as a - // permanent option or replaced with the metadata check later - serviceUrl = server + "/api/files/" - + URLEncoder.encode(tagId, "UTF-8") + "/blob"; - HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); - - CloseableHttpResponse response = httpclient.execute(httpget, - getLocalContext()); - try { - if (response.getStatusLine().getStatusCode() == 200) { - HttpEntity resEntity = response.getEntity(); - if (resEntity != null) { - String realHash = null; - InputStream inputStream = resEntity.getContent(); - realHash = DigestUtils.sha1Hex(inputStream); - /* - * if (hashtype != null) { if - * (hashtype.equals("SHA1 Hash")) { realHash = - * DigestUtils.sha1Hex(inputStream); - * - * } else if (hashtype.equals("SHA512 Hash")) { - * realHash = DigestUtils.sha512Hex(inputStream); } - */ - - if (realHash != null) { - if (!realHash.equals(item.getHash("SHA-1"))) { - hashIssues.put(path + item.getName(), - "!!!: A different version of this item exists with ID: " - + tagId); - return null; - } // else it matches! - } else { - hashIssues.put( - path + item.getName(), - "Error calculating hash for " - + item.getAbsolutePath() - + " - cannot verify it"); - return null; - } - } - } else { - println("Error downloading file to verify " - + item.getAbsolutePath() + " : " - + response.getStatusLine().getReasonPhrase()); - - } - } finally { - response.close(); - } - /* - * // sha1: "http://www.w3.org/2001/04/xmldsig-more#sha1" - * serviceUrl = server + "/api/files/" + - * URLEncoder.encode(tagId, "UTF-8") + "/metadata.jsonld"; - * HttpGet httpget = new HttpGet(serviceUrl); - * - * CloseableHttpResponse response = httpclient.execute(httpget, - * getLocalContext()); try { if - * (response.getStatusLine().getStatusCode() == 200) { - * HttpEntity resEntity = response.getEntity(); if (resEntity != - * null) { String json = EntityUtils.toString(resEntity); - * JSONArray metadata = new JSONArray(json); String remoteHash = - * null; for (int i = 0; i < metadata.length(); i++) { - * JSONObject content = metadata.getJSONObject(i) - * .getJSONObject("content"); if (content != null) { if - * (content.has("sha1")) { remoteHash = - * content.getString("sha1"); break; } } } if (remoteHash != - * null) { if (!remoteHash.equals(item.getSHA1Hash())) { - * hashIssues.put(path + item.getName(), - * "!!!: A different version of this item exists with ID: " + - * tagId); return null; } // else it matches! } else { - * hashIssues.put(path + item.getName(), - * "Remote Hash does not exist for " + item.getAbsolutePath() + - * " - cannot verify it"); return null; } } } else { - * println("Error response while verifying " + - * item.getAbsolutePath() + " : " + - * response.getStatusLine().getReasonPhrase()); - * - * } } finally { response.close(); } - */ - - } catch (UnsupportedEncodingException e1) { - - e1.printStackTrace(); - - } catch (IOException e) { - println("Error processing verify on " + item.getAbsolutePath() - + " : " + e.getMessage()); - } - return tagId; - } - - void addLiteralMetadata(MultipartEntityBuilder meb, - String predicate, String value) { - meb.addTextBody(predicate, value); - - } - - void addURIMetadata(MultipartEntityBuilder meb, String predicate, - String value) { - meb.addTextBody(predicate, value, - ContentType.create("text/uri-list", Consts.ISO_8859_1)); - } - - @SuppressWarnings("unchecked") - @Override - public void addDatasetMetadata(String newSubject, String type, JSONObject relationships) { - - JSONObject content = new JSONObject(); - - JSONObject agent = new JSONObject(); - JSONObject me = get2me(); - agent.put("name", me.getString("fullName")); - agent.put("@type", "cat:user"); - agent.put("user_id", - server + "/api/users/" + me.getString("id")); - - for (String predLabel : (Set) relationships - .keySet()) { - Object newObject = null; - if (relationships.get(predLabel) instanceof String) { - newObject = roCollIdToNewId.get(relationships - .getString(predLabel)); - if (newObject != null) { - if (newObject.equals(sead2datasetId)) { - newObject = server + "/datasets/" - + newObject; - } else { - newObject = server + "/datasets/" - + sead2datasetId + "#folderId" - + newObject; - } - } else { - newObject = roDataIdToNewId - .get(relationships - .getString(predLabel)); - if (newObject != null) { - newObject = server + "/files/" - + newObject; - } else { // Object is not in this Dataset - // and - // can't be translated - use - // original URI - newObject = relationships - .getString(predLabel); - } - } - println(newSubject + ": " + predLabel + ": " - + newObject.toString()); - } else { // JSONArray - newObject = new JSONArray(); - - JSONArray objects = (JSONArray) relationships - .get(predLabel); - for (int i = 0; i < objects.length(); i++) { - String ob = objects.getString(i); - String newOb = null; - newOb = roCollIdToNewId.get(ob); - if (newOb != null) { - if (newOb.equals(sead2datasetId)) { - newOb = server + "/datasets/" - + newOb; - } else { - newOb = server + "/datasets/" - + sead2datasetId - + "#folderId" + newOb; - } - } else { - newOb = roDataIdToNewId.get(ob); - if (newOb != null) { - newOb = server + "/files/" + newOb; - } else { // Object is not in this - // Dataset and - // can't be translated - use - // original URI - newOb = ob; - } - } - ((JSONArray) newObject).put(newOb); - } - - } - println("Writing: " + predLabel + " : " - + newObject.toString()); - content.put(predLabel, newObject); - - } - JSONObject context = new JSONObject(); - context.put("@vocab", CLOWDER_DEFAULT_VOCAB); - // Create flattened context for 2.0 - for (String key : ((Set) content.keySet())) { - String pred = rf.getURIForContextEntry(key); - if (pred != null) { - context.put(key, pred); - } - } - if (type.equals("datasets") - || newSubject.equals(sead2datasetId)) { - CloseableHttpClient httpclient = getSharedHttpClient(); - - String uri = server - + "/api/" - + (type.equals("datasets") ? "files/" - : "datasets/") + newSubject - + "/metadata.jsonld"; - postMetadata(httpclient, uri, newSubject, content, - context, agent); - } else { - println("Folder: Would've written: " + newSubject - + ": " + content.toString()); - - } - - } - - @Override - protected void postProcessChildren() { - // TODO Auto-generated method stub - - } - - @Override - protected void postProcessCollection() { - // TODO Auto-generated method stub - - } - - @Override - protected String preprocessCollection(Resource dir, String path, String parentId, String collectionId) throws UploaderException { - // SEAD2 - create the dataset or folder first before processing - // children - if (!listonly) { - if (collectionId == null) { - if (parentId == null) { - collectionId = create2Dataset(dir, path); - sead2datasetId = collectionId; - } else { - collectionId = create2Folder(parentId, sead2datasetId, - path, dir); - if (collectionId == null) { - throw new UploaderException("Failed to create Folder - will not process contents of :" + path); - } - } - - } else { - // We already have the dataset uploaded so record it's id - if (parentId == null) { - sead2datasetId = collectionId; - } - } - } else { - if (collectionId != null && parentId == null) { - sead2datasetId = collectionId; - } - } - if (sead2datasetId != null) { - println("Dataset ID: " + sead2datasetId); - } - return collectionId; - } - - @Override - protected String postProcessChild(Resource dir, String path, String parentId, String collectionId) { - // TODO Auto-generated method stub - return null; - } - - @Override - protected void postProcessDatafile(String newUri, String existingUri, String collectionId, Resource file, Resource dir) throws ClientProtocolException, IOException { - - //IF NOT LISTONLY? - if (existingUri == null) { // file didn't exist - // before - if ((collectionId != null) - && (!sead2datasetId - .equals(collectionId))) { - // it's in a folder and not the dataset - if (newUri != null) { // and it was just - // created - moveFileToFolder(newUri, collectionId, - file); - } - } - } else { // the file existed - // FixMe - need to check if it is already in the - // folder or not... - if (!sead2datasetId.equals(existingUri)) { - - CloseableHttpClient httpclient = getSharedHttpClient(); - - HttpGet httpget = new HttpGet(appendKeyIfUsed(server - + "/api/datasets/" + sead2datasetId - + "/listFiles")); - - CloseableHttpResponse getResponse = httpclient - .execute(httpget, getLocalContext()); - try { - if (getResponse.getStatusLine() - .getStatusCode() == 200) { - JSONArray files = new JSONArray( - EntityUtils - .toString(getResponse - .getEntity())); - for (int i = 0; i < files.length(); i++) { - if (files.getJSONObject(i) - .getString("id") - .equals(existingUri)) { - // File is in dataset - // directly, not in a - // folder, so move it if - // needed - if ((collectionId != null) - && (!sead2datasetId - .equals(collectionId))) { // it's - - moveFileToFolder( - existingUri, - collectionId, - file); - } - break; - } - } - } else { - println("Error response when listing files " - + dir.getAbsolutePath() - + " : " - + getResponse - .getStatusLine() - .getReasonPhrase()); - println("Details: " - + EntityUtils - .toString(getResponse - .getEntity())); - - } - } finally { - EntityUtils.consumeQuietly(getResponse - .getEntity()); - getResponse.close(); - } - } - } - } - - @Override - protected HttpClientContext reauthenticate(long startTime) { - if (apiKey != null) { - return getLocalContext(); - } else { - return SEADAuthenticator.UPReAuthenticateIfNeeded(server, - startTime); - } - } - - private String appendKeyIfUsed(String url) { - if (apiKey != null) { - if (url.contains("?")) { - url = url + "&key=" + apiKey; - } else { - url = url + "?key=" + apiKey; - } - } - return url; - } - -} +/** ***************************************************************************** + * Copyright 2014, 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.clowder; + +import java.io.IOException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.net.ssl.HostnameVerifier; +import javax.net.ssl.SSLContext; + +import org.apache.commons.codec.digest.DigestUtils; +import org.apache.http.Consts; +import org.apache.http.HttpEntity; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.conn.ssl.TrustAllStrategy; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.StringEntity; +import org.apache.http.entity.mime.MultipartEntityBuilder; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.message.BasicHeader; +import org.apache.http.protocol.HTTP; +import org.apache.http.util.EntityUtils; +import org.json.JSONArray; +import org.json.JSONObject; +import org.sead.uploader.util.PublishedFolderProxyResource; +import org.sead.uploader.util.PublishedResource; +import org.sead.uploader.util.Resource; +import org.sead.uploader.util.ResourceFactory; + +import org.sead.uploader.AbstractUploader; +import org.sead.uploader.util.UploaderException; + +/** + * The SEAD Uploader supports the upload of files/directories from a local disk, + * or existing SEAD publications for which a valid OREMap file is available from + * a URL (repositories must update the data file links in the ORE for the + * Uploader to retrieve them) + * + * In addition to sending files and creating a SEAD collection/dataset (1.5) or + * Dataset/Folder/File (2.0) structure, the Uploader adds path metadata, usable + * in detecting whether an item has already been created/uploaded. For + * publications, it also sends metadata, tags, comments, and spatial reference + * metadata, performing some mapping to clarify when metadata applies only to + * the original/published version and when the new live copy 'inherits' the + * metadata. This can be adjusted using the black and gray lists of terms and/or + * providing custom code to map metadata to SEAD 2.0 conventions. + * + */ +public class SEADUploader extends AbstractUploader { + + public static final String FRBR_EO = "http://purl.org/vocab/frbr/core#embodimentOf"; + private static final String DCTERMS_HAS_PART = "http://purl.org/dc/terms/hasPart"; + + private static boolean d2a = false; + private static String apiKey = null; + private static String knownId = null; + private static boolean checkDataset = false; + private static String sead2datasetId = null; + private static int numFoundDatasets = 0; + + private static String CLOWDER_DEFAULT_VOCAB = "https://clowder.ncsa.illinois.edu/contexts/dummy"; + + public static void main(String args[]) throws Exception { + setUploader(new SEADUploader()); + uploader.createLogFile("SEADUploaderLog_"); + uploader.setSpaceType("SEAD2"); + println("\n----------------------------------------------------------------------------------\n"); + println("SSS EEEE A DDD"); + println("S E A A D D"); + println(" SS EEE AAAAA D D"); + println(" S E A A D D"); + println("SSS EEEE A A DDD"); + + println("SEADUploader - a command-line application to upload files to any Clowder Dataset"); + println("Developed for the SEAD (https://sead-data.net) Community"); + println("\n----------------------------------------------------------------------------------\n"); + println("\n***Parsing arguments:***\n"); + uploader.parseArgs(args); + + if (server == null || requests.isEmpty()) { + println("\n***Required arguments not found.***"); + usage(); + } else if (checkDataset) { + ((SEADUploader) uploader).checkTheDataset(); + + } else { + println("\n***Starting to Process Upload Requests:***\n"); + uploader.processRequests(); + } + println("\n***Execution Complete.***"); + } + + private static void usage() { + println("\nUsage:"); + println(" java -cp .;sead2.1.jar org.sead.uploader.clowder.SEADUploader -server= "); + + println("\n where:"); + println(" = the URL of the server to upload to, e.g. https://sead2.ncsa.illinois.edu"); + println(" = a space separated list of directory name(s) to upload as Dataset(s) containing the folders and files within them"); + println("\n Optional Arguments:"); + println(" -key= - your personal apikey, created in the server at "); + println(" - using an apiKey avoids having to enter your username/password and having to reauthenticate for long upload runs"); + println(" -id= - if you know a dataset exists, specifying it's id will improve performance"); + println(" -listonly - Scan the Dataset and local files and list what would be uploaded (does not upload with this flag)"); + println(" -limit= - Specify a maximum number of files to upload per invocation."); + println(" -verify - Check both the file name and checksum in comparing with current Dataset entries."); + println(" -skip= - a number of files to skip before starting processing (saves time when you know the first n files have been uploaded before)"); + println(" -forcenew - A new dataset will be created for this upload, even if a matching one is found."); + println(" -importRO - uploads from a zipped BagIt file rather than from disk"); + println(""); + + } + + private void checkTheDataset() { + if (knownId == null) { + println("CheckId only works with knownId - exiting"); + System.exit(0); + } + int goodFiles = 0; + String dPath = null; + CloseableHttpResponse response = null; + String serviceUrl = ""; + try { + CloseableHttpClient httpclient = getSharedHttpClient(); + + serviceUrl = server + "/api/datasets/" + knownId + + "/metadata.jsonld"; + println(serviceUrl); + + HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + response = httpclient.execute(httpget, + getLocalContext()); + try { + if (response.getStatusLine() + .getStatusCode() == 200) { + HttpEntity resEntity = response + .getEntity(); + if (resEntity != null) { + JSONArray mdList = new JSONArray( + EntityUtils + .toString(resEntity)); + + for (int j = 0; j < mdList.length(); j++) { + if (mdList + .getJSONObject(j) + .getJSONObject( + "content") + .has("Upload Path")) { + dPath = mdList + .getJSONObject( + j) + .getJSONObject( + "content") + .getString( + "Upload Path") + .trim(); + existingDatasets + .put(dPath, knownId); + break; + } + } + } + } else { + println("Error response when getting metadata for dataset: " + + knownId + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + try { + response.close(); + } catch (IOException ex) { + Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); + } + } + if (dPath == null) { + println("Dataset: " + knownId + " does not have an Upload Path"); + System.exit(0); + } + + existingFiles = new HashMap(); + existingFolders = new HashMap(); + try { + try { + serviceUrl = server + "/api/datasets/" + + knownId + "/folders"; + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + response = httpclient.execute( + httpget, getLocalContext()); + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + JSONArray folders = new JSONArray( + EntityUtils.toString(resEntity)); + for (int i = 0; i < folders.length(); i++) { + existingFolders.put(folders.getJSONObject(i) + .getString("name"), folders.getJSONObject(i) + .getString("id")); + } + } + } else { + println("Error response when checking for folders" + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + } catch (IOException e) { + println("Error processing folders: " + e.getMessage()); + } finally { + folderMDRetrieved = true; + } + + try { + serviceUrl = server + "/api/datasets/" + + knownId + "/listAllFiles"; + + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + + response = httpclient.execute( + httpget, getLocalContext()); + JSONArray fileList = null; + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + fileList = new JSONArray( + EntityUtils.toString(resEntity)); + } + } else { + println("Error response when checking files" + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + if (fileList != null) { + for (int i = 0; i < fileList.length(); i++) { + String id = fileList.getJSONObject(i) + .getString("id"); + serviceUrl = server + "/api/files/" + id + + "/metadata.jsonld"; + + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + + response = httpclient.execute(httpget, + getLocalContext()); + + try { + if (response.getStatusLine() + .getStatusCode() == 200) { + HttpEntity resEntity = response + .getEntity(); + if (resEntity != null) { + JSONArray mdList = new JSONArray( + EntityUtils + .toString(resEntity)); + boolean hasPath = false; + for (int j = 0; j < mdList.length(); j++) { + if (mdList + .getJSONObject(j) + .getJSONObject( + "content") + .has("Upload Path")) { + String path = mdList + .getJSONObject( + j) + .getJSONObject( + "content") + .getString( + "Upload Path") + .trim(); + if (existingFiles.containsKey(path)) { + println(id + " duplicates " + existingFiles.get(path)); + } else { + String folderPath = path.substring(1, path.lastIndexOf("/")); + if (folderPath.indexOf("/") >= 0) { + folderPath = folderPath.substring(folderPath.indexOf("/")); + } else { + folderPath = ""; + } + if (folderPath.length() > 0 && !existingFolders.containsKey(folderPath)) { + println("Folder for " + path + " not found"); + } else { + + existingFiles + .put(path, id); + goodFiles++; + if (goodFiles % 10 == 0) { + System.out.print("."); + } + if (goodFiles % 1000 == 0) { + System.out.print("Processed " + goodFiles + " good files."); + } + + } + } + hasPath = true; + break; + } + } + if (!hasPath) { + println("File with no path: " + id); + } + } + + } else { + println("Error response when getting metadata for file: " + + id + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting. Please contact SEAD about the error."); + System.exit(1); + } + } finally { + response.close(); + } + } + + } + } finally { + response.close(); + httpclient.close(); + } + } catch (IOException io) { + println("Error doing " + serviceUrl + " : " + io.getMessage()); + } + println("Analysis complete with " + goodFiles + " good files."); + } + + public boolean parseCustomArg(String arg) { + if (arg.equalsIgnoreCase("-d2a")) { + d2a = true; + println("Description to Abstract translation on"); + return true; + } else if (arg.startsWith("-key")) { + apiKey = arg.substring(arg.indexOf(argSeparator) + 1); + println("Using apiKey: " + apiKey); + return true; + } else if (arg.startsWith("-id")) { + knownId = arg.substring(arg.indexOf(argSeparator) + 1); + println("Updating Dataset with id: " + knownId); + return true; + } else if (arg.startsWith("-checkDataset")) { + checkDataset = true; + println("Only checking Dataset"); + return true; + } + return false; + } + + CloseableHttpClient httpClient = null; + + public CloseableHttpClient getSharedHttpClient() { + if (httpClient == null) { + // use the TrustSelfSignedStrategy to allow Self Signed Certificates + SSLContext sslContext; + try { + sslContext = SSLContextBuilder + .create() + .loadTrustMaterial(new TrustAllStrategy()) + .build(); + + // create an SSL Socket Factory to use the SSLContext with the trust self signed certificate strategy + // and allow all hosts verifier. + SSLConnectionSocketFactory connectionFactory = new SSLConnectionSocketFactory(sslContext); + + // finally create the HttpClient using HttpClient factory methods and assign the ssl socket factory + httpClient = HttpClients + .custom() + .setSSLSocketFactory(connectionFactory) + .build(); + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { + Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); + } + } + return httpClient; + } + + @Override + public void processRequests() { + println("Contacting server..."); + getSharedHttpClient(); + super.processRequests(); + println("Closing server connection..."); + + try { + getSharedHttpClient().close(); + } catch (IOException ex) { + Logger.getLogger(SEADUploader.class.getName()).log(Level.SEVERE, null, ex); + } + } + + @Override + public HttpClientContext authenticate() { + if (apiKey != null) { + //Don't need to update context since we have the apikey to use + if (getLocalContext() == null) { + return (new HttpClientContext()); + } else { + return getLocalContext(); + } + } + return SEADAuthenticator + .UPAuthenticate(server); + } + + private void moveFileToFolder(String newUri, String parentId, + Resource file) { + CloseableHttpClient httpclient = getSharedHttpClient(); + try { + HttpPost httppost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" + + sead2datasetId + "/moveFile/" + parentId + "/" + newUri)); + + StringEntity se = new StringEntity("{}"); + se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json")); + httppost.setEntity(se); + + CloseableHttpResponse response = httpclient.execute(httppost, + getLocalContext()); + HttpEntity resEntity = null; + try { + if (response.getStatusLine().getStatusCode() == 200) { + EntityUtils.consume(response.getEntity()); + } else { + println("Error response when processing " + + file.getAbsolutePath() + " : " + + response.getStatusLine().getReasonPhrase()); + println("Details: " + + EntityUtils.toString(response.getEntity())); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + response.close(); + } + + // FixMe Add tags + /* + * if (tagValues != null) { addTags(httpclient, dir, collectionId, + * tagValues); } + */ + } catch (IOException e) { + println("Error processing " + file.getAbsolutePath() + " : " + + e.getMessage()); + } + } + + private String create2Folder(String parentId, String sead2datasetId, + String path, Resource dir) { + String collectionId = null; + CloseableHttpClient httpclient = getSharedHttpClient(); + try { + String postUri = server + "/api/datasets/" + + sead2datasetId + "/newFolder"; + if (apiKey != null) { + postUri = postUri + "?key=" + apiKey; + } + HttpPost httppost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" + + sead2datasetId + "/newFolder")); + + JSONObject jo = new JSONObject(); + String title = dir.getName().trim(); + // For publishedResource, we want to use the Title + if (dir instanceof PublishedResource) { + title = ((PublishedResource) dir).getAndRemoveTitle().trim(); + } + jo.put("name", title); + jo.put("parentId", parentId); + jo.put("parentType", ((parentId == sead2datasetId) ? "dataset" + : "folder")); + + StringEntity se = new StringEntity(jo.toString(), "UTF-8"); + se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + httppost.setEntity(se); + + CloseableHttpResponse response = httpclient.execute(httppost, + getLocalContext()); + HttpEntity resEntity = null; + try { + if (response.getStatusLine().getStatusCode() == 200) { + EntityUtils.consume(response.getEntity()); + // Now query to get the new folder's id + // path should be of the form + // "//[/]" for + // file uploads and + // "/data//[/]" + // for imported ROs + // and we need to strip to get only the folder path part + String folderPath = path; + if (importRO) { + folderPath = folderPath.substring(folderPath.substring( + 1).indexOf("/") + 1); + folderPath = folderPath.substring(folderPath.substring( + 1).indexOf("/") + 1); + } + folderPath = folderPath.substring(folderPath.substring(1) + .indexOf("/") + 1); + + HttpGet httpget = new HttpGet(appendKeyIfUsed(server + "/api/datasets/" + + sead2datasetId + "/folders")); + + CloseableHttpResponse getResponse = httpclient.execute( + httpget, getLocalContext()); + try { + if (getResponse.getStatusLine().getStatusCode() == 200) { + JSONArray folders = new JSONArray( + EntityUtils.toString(getResponse + .getEntity())); + for (int i = 0; i < folders.length(); i++) { + if (folders.getJSONObject(i).getString("name") + .equals(folderPath)) { + collectionId = folders.getJSONObject(i) + .getString("id"); + break; + } + } + } else { + println("Error response when processing " + + dir.getAbsolutePath() + + " : " + + getResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(getResponse + .getEntity())); + } + } finally { + EntityUtils.consumeQuietly(getResponse.getEntity()); + getResponse.close(); + } + } else { + println("Error response when processing " + + dir.getAbsolutePath() + " : " + + response.getStatusLine().getReasonPhrase()); + println("Details: " + + EntityUtils.toString(response.getEntity())); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + response.close(); + } + + // Add metadata for imported folders + // FixMe - Add Metadata to folder directly + // Assume we only write a metadata file if collection is newly + // created and we're importing + if (importRO && collectionId != null) { + Resource mdFile = new PublishedFolderProxyResource( + (PublishedResource) dir, collectionId); + String mdId = null; + try { + mdId = uploadDatafile(mdFile, path + "/" + + mdFile.getName()); + } catch (UploaderException ue) { + println(ue.getMessage()); + } + // By default, we are in a folder and need to move the file + // (sead2datasetId != collectionId)) + if (mdId != null) { // and it was just + // created + moveFileToFolder(mdId, collectionId, mdFile); + roFolderProxy.put(collectionId, mdId); + } else { + println("Unable to write metadata file for folder: " + + collectionId); + } + } + } catch (IOException e) { + println("Error processing " + dir.getAbsolutePath() + " : " + + e.getMessage()); + } + return collectionId; + + } + + private String create2Dataset(Resource dir, String path) { + String datasetId = null; + CloseableHttpClient httpclient = getSharedHttpClient(); + try { + + HttpPost httppost = new HttpPost(appendKeyIfUsed(server + + "/api/datasets/createempty")); + JSONObject jo = new JSONObject(); + String title = dir.getName().trim(); + // For publishedResource, we want to use the Title + if (dir instanceof PublishedResource) { + title = ((PublishedResource) dir).getAndRemoveTitle().trim(); + } + jo.put("name", title); + if (importRO) { + String abs = ((PublishedResource) dir) + .getAndRemoveAbstract(d2a); + if (abs != null) { + jo.put("description", abs); + } + } + + StringEntity se = new StringEntity(jo.toString(), "UTF-8"); + se.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + httppost.setEntity(se); + + CloseableHttpResponse response = httpclient.execute(httppost, + getLocalContext()); + HttpEntity resEntity = null; + try { + resEntity = response.getEntity(); + if (response.getStatusLine().getStatusCode() == 200) { + if (resEntity != null) { + datasetId = new JSONObject( + EntityUtils.toString(resEntity)) + .getString("id"); + } + } else { + println("Error response when processing " + + dir.getAbsolutePath() + " : " + + response.getStatusLine().getReasonPhrase()); + println("Details: " + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + response.close(); + } + if (datasetId != null) { + + // Add Metadata + JSONObject content = new JSONObject(); + JSONObject context = new JSONObject(); + JSONObject agent = new JSONObject(); + List creators = new ArrayList(); + content.put("Upload Path", path); + List comments = new ArrayList(); + // Should be true for all PublishedResources, never for files... + if (dir instanceof PublishedResource) { + ((PublishedResource) dir).getAndRemoveCreator(creators); + } + + String creatorPostUri = server + "/api/datasets/" + datasetId + + "/creator"; + for (String creator : creators) { + postDatasetCreator(creator, creatorPostUri, httpclient); + } + + String tagValues = add2ResourceMetadata(content, context, + agent, comments, path, dir); + + postMetadata(httpclient, server + "/api/datasets/" + datasetId + + "/metadata.jsonld", dir.getAbsolutePath(), content, + context, agent); + if (creators != null) { + + } + // FixMe Add tags + if (tagValues != null) { + HttpPost tagPost = new HttpPost(appendKeyIfUsed(server + "/api/datasets/" + + datasetId + "/tags")); + JSONObject tags = new JSONObject(); + + String[] tagArray = tagValues.split(","); + JSONArray tagList = new JSONArray(tagArray); + tags.put("tags", tagList); + + StringEntity se3 = new StringEntity(tags.toString(), + "UTF-8"); + se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + tagPost.setEntity(se3); + + CloseableHttpResponse tagResponse = httpclient.execute( + tagPost, getLocalContext()); + resEntity = null; + try { + resEntity = tagResponse.getEntity(); + if (tagResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + dir.getAbsolutePath() + + " : " + + tagResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + tagResponse.close(); + } + } + if (comments.size() > 0) { + Collections.sort(comments); + for (String text : comments.toArray(new String[comments + .size()])) { + HttpPost commentPost = new HttpPost(appendKeyIfUsed(server + + "/api/datasets/" + datasetId + "/comment")); + + JSONObject comment = new JSONObject(); + comment.put("text", text); + + StringEntity se3 = new StringEntity(comment.toString(), + "UTF-8"); + se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + commentPost.setEntity(se3); + + CloseableHttpResponse commentResponse = httpclient + .execute(commentPost, getLocalContext()); + resEntity = null; + try { + resEntity = commentResponse.getEntity(); + if (commentResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + dir.getAbsolutePath() + + " : " + + commentResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + commentResponse.close(); + } + } + } + } + } catch (IOException e) { + println("Error processing " + dir.getAbsolutePath() + " : " + + e.getMessage()); + } + return datasetId; + } + + @SuppressWarnings("unchecked") + private String add2ResourceMetadata(JSONObject content, + JSONObject context, JSONObject agent, List comments, + String path, Resource item) { + Object tags = null; + + JSONObject metadata = item.getMetadata(); // Empty for file resources + if (metadata.has("Metadata on Original")) { + JSONObject original = metadata + .getJSONObject("Metadata on Original"); + // Gray list metadata should be used (and removed) from + // this + // field or passed in as is + + if (original.has("Keyword")) { + tags = original.get("Keyword"); + original.remove("Keyword"); + } + if (original.has("GeoPoint")) { + Object gpObject = original.get("GeoPoint"); + if (gpObject instanceof JSONArray) { + // An error so we'll just capture as a string + metadata.put("Geolocation", + "This entry had multiple Lat/Long from SEAD 1.5 GeoPoints: " + + ((JSONArray) gpObject).toString(2)); + } else { + JSONObject point = original.getJSONObject("GeoPoint"); + metadata.put("Geolocation", + "Lat/Long from SEAD 1.5 GeoPoint"); + metadata.put("Latitude", point.getString("lat")); + metadata.put("Longitude", point.getString("long")); + original.remove("GeoPoint"); + } + + } + + if (original.has("Comment")) { + Object comObject = original.get("Comment"); + if (comObject instanceof JSONArray) { + for (int i = 0; i < ((JSONArray) comObject).length(); i++) { + JSONObject comment = ((JSONArray) comObject) + .getJSONObject(i); + comments.add(getComment(comment)); + } + } else { + comments.add(getComment(((JSONObject) comObject))); + } + original.remove("Comment"); + } + } + // Convert all vals to Strings + for (String key : (Set) metadata.keySet()) { + String newKey = key; + if (ResourceFactory.graySwaps.containsKey(key)) { + newKey = ResourceFactory.graySwaps.get(key); + } + if (metadata.get(key) instanceof JSONArray) { + // split values and handle them separately + JSONArray valArray = (JSONArray) metadata.get(key); + JSONArray newVals = new JSONArray(); + for (int i = 0; i < valArray.length(); i++) { + String val = valArray.get(i).toString(); + newVals.put(val); + } + content.put(newKey, newVals); + } else { + content.put(newKey, metadata.get(key).toString()); + } + } + // create tag(s) string + String tagValues = null; + if (tags != null) { + if (tags instanceof JSONArray) { + tagValues = ""; + JSONArray valArray = (JSONArray) tags; + for (int i = 0; i < valArray.length(); i++) { + tagValues = tagValues + valArray.get(i).toString(); + if (valArray.length() > 1 && i != valArray.length() - 1) { + tagValues = tagValues + ","; + } + } + } else { + tagValues = ((String) tags); + } + } + content.put("Upload Path", path); + + // Flatten context for 2.0 + context.put("@vocab", CLOWDER_DEFAULT_VOCAB); + for (String key : ((Set) content.keySet())) { + if (rf != null) { // importRO == true + String pred = rf.getURIForContextEntry(key); + if (pred != null) { + context.put(key, pred); + } + } else { + if (key.equals("Upload Path")) { + context.put(key, SEADUploader.FRBR_EO); + } else { // shouldn't happen + println("Unrecognized Metadata Entry: " + key); + } + } + } + JSONObject me = get2me(); + agent.put("name", me.getString("fullName")); + agent.put("@type", "cat:user"); + agent.put("user_id", server + "/api/users/" + me.getString("id")); + + return tagValues; + } + + private String getComment(JSONObject comment) { + StringBuilder sb = new StringBuilder(); + sb.append("Imported Comment: "); + sb.append(comment.getString("comment_date")); + sb.append(", Author: "); + String comAuth = comment.getString("comment_author"); + sb.append(comAuth.substring(comAuth.lastIndexOf("/") + 1)); + sb.append(": "); + sb.append(comment.getString("comment_body")); + return sb.toString(); + } + + JSONObject me = null; + + private JSONObject get2me() { + CloseableHttpClient httpclient = getSharedHttpClient(); + if (me == null) { + try { + String serviceUrl = server + "/api/me"; + + HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + CloseableHttpResponse response = httpclient.execute(httpget, + getLocalContext()); + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + me = new JSONObject(EntityUtils.toString(resEntity)); + } + } else { + println("Error response when retrieving user details: " + + response.getStatusLine().getReasonPhrase()); + + } + } finally { + response.close(); + } + } catch (IOException e) { + println("Error processing get user request: " + e.getMessage()); + } + // me.put("fullName", "SEAD 1.5 Importer"); + } + return me; + } + + @Override + protected String uploadDatafile(Resource file, String path) throws UploaderException { + if (sead2datasetId == null) { + throw new UploaderException("SEAD2 does not support upload of individual files that are not in a dataset."); + } + CloseableHttpClient httpclient = getSharedHttpClient(); + String dataId = null; + try { + // FixMe: requires update to 2.0 ... To support long uploads, + // request a key to allow the + // upload to complete even if the session has timed out + + // Now post data + String urlString = server + "/api/uploadToDataset/" + + sead2datasetId; + HttpPost httppost = new HttpPost(appendKeyIfUsed(urlString)); + ContentBody bin = file.getContentBody(); + MultipartEntityBuilder meb = MultipartEntityBuilder.create(); + meb.addPart("files[]", bin); + + // FixMe + // addLiteralMetadata(meb, FRBR_EO, path); + // FixMe + // String tagValues = addResourceMetadata(meb, file); + HttpEntity reqEntity = meb.build(); + httppost.setEntity(reqEntity); + + CloseableHttpResponse response = httpclient.execute(httppost, + getLocalContext()); + HttpEntity resEntity = response.getEntity(); + + try { + if (response.getStatusLine().getStatusCode() == 200) { + if (resEntity != null) { + dataId = new JSONObject(EntityUtils.toString(resEntity)) + .getString("id"); + } + } else { + println("Error response when processing " + + file.getAbsolutePath() + " : " + + response.getStatusLine().getReasonPhrase()); + println("Details: " + EntityUtils.toString(resEntity)); + } + + } catch (Exception e) { + println("Error uploading file: " + file.getName()); + e.printStackTrace(); + } finally { + EntityUtils.consumeQuietly(response.getEntity()); + response.close(); + } + if (dataId != null) { + + // FixMe - add Metadata + /* + * addLiteralMetadata(meb, FRBR_EO, path); + * + * // Add metadata for published resources + * + * String tagValues = addResourceMetadata(meb, dir); HttpEntity + * reqEntity = meb.build(); + */ + JSONObject content = new JSONObject(); + List comments = new ArrayList(); + JSONObject context = new JSONObject(); + JSONObject agent = new JSONObject(); + + String abs = null; + String title = null; + if (file instanceof PublishedResource) { + abs = ((PublishedResource) file).getAndRemoveAbstract(d2a); + + title = ((PublishedResource) file).getAndRemoveTitle(); + if ((title != null) && (title.equals(file.getName()))) { + title = null; + } + } + String tagValues = add2ResourceMetadata(content, context, + agent, comments, path, file); + + postMetadata(httpclient, server + "/api/files/" + dataId + + "/metadata.jsonld", file.getAbsolutePath(), content, + context, agent); + + if (abs != null) { + HttpPut descPut = new HttpPut(appendKeyIfUsed(server + "/api/files/" + + dataId + "/updateDescription")); + JSONObject desc = new JSONObject(); + + desc.put("description", abs); + + StringEntity descSE = new StringEntity(desc.toString(), + "UTF-8"); + descSE.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + descPut.setEntity(descSE); + + CloseableHttpResponse descResponse = httpclient.execute( + descPut, getLocalContext()); + resEntity = null; + try { + resEntity = descResponse.getEntity(); + if (descResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + file.getAbsolutePath() + + " : " + + descResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + descResponse.close(); + } + } + // We need a valid filename (from "Label"/getName() to do + // the + // upload, but, if the user + // has changed the "Title", we need to then update the + // displayed + // filename + // For folders, this will currently always be null + // (since Title is used for the name in PublishedResource + // for directories) and therefore we won't change the name + // of the readme file + // as set in the Proxy class. + if (title != null) { + HttpPut namePut = new HttpPut(appendKeyIfUsed(server + "/api/files/" + + dataId + "/filename")); + JSONObject name = new JSONObject(); + + name.put("name", title); + + StringEntity nameSE = new StringEntity(name.toString(), + "UTF-8"); + nameSE.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + namePut.setEntity(nameSE); + + CloseableHttpResponse nameResponse = httpclient.execute( + namePut, getLocalContext()); + resEntity = null; + try { + resEntity = nameResponse.getEntity(); + if (nameResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + file.getAbsolutePath() + + " : " + + nameResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } else { + println("Dataset name successfully changed from : " + + file.getName() + " to " + title); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + nameResponse.close(); + } + } + + // FixMe Add tags + if (tagValues != null) { + HttpPost tagPost = new HttpPost(appendKeyIfUsed(server + "/api/files/" + + dataId + "/tags")); + JSONObject tags = new JSONObject(); + + String[] tagArray = tagValues.split(","); + JSONArray tagList = new JSONArray(tagArray); + tags.put("tags", tagList); + + StringEntity se3 = new StringEntity(tags.toString(), + "UTF-8"); + se3.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + tagPost.setEntity(se3); + + CloseableHttpResponse tagResponse = httpclient.execute( + tagPost, getLocalContext()); + resEntity = null; + try { + resEntity = tagResponse.getEntity(); + if (tagResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + file.getAbsolutePath() + + " : " + + tagResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + tagResponse.close(); + } + + } + if (comments.size() > 0) { + Collections.sort(comments); + for (String text : comments.toArray(new String[comments + .size()])) { + HttpPost commentPost = new HttpPost(appendKeyIfUsed(server + + "/api/files/" + dataId + "/comment")); + + JSONObject comment = new JSONObject(); + comment.put("text", text); + + StringEntity se4 = new StringEntity(comment.toString(), + "UTF-8"); + se4.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + commentPost.setEntity(se4); + + CloseableHttpResponse commentResponse = httpclient + .execute(commentPost, getLocalContext()); + resEntity = null; + try { + resEntity = commentResponse.getEntity(); + if (commentResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing " + + file.getAbsolutePath() + + " : " + + commentResponse.getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils.toString(resEntity)); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + commentResponse.close(); + } + } + } + + } + } catch (IOException e) { + println("Error processing " + file.getAbsolutePath() + " : " + + e.getMessage()); + } + return dataId; + } + + @SuppressWarnings("unchecked") + private void postMetadata(CloseableHttpClient httpclient, + String uri, String path, JSONObject content, JSONObject context, + JSONObject agent) { + Set keys = new HashSet(); + keys.addAll(((Set) content.keySet())); + if (keys.contains("Geolocation")) { + keys.remove("Latitude"); + keys.remove("Longitude"); + } + + for (String key : keys) { + try { + String safeKey = key.replace(".", "_").replace("$", "_") + .replace("/", "_"); // Clowder/MongoDB don't allow keys + // with .$/ chars + + JSONObject singleContent = new JSONObject().put(safeKey, + content.get(key)); + JSONObject singleContext = new JSONObject().put(safeKey, + context.get(key)); + // Geolocation stays together with lat and long to mirror + // how the Clowder GUI works + if (key.equals("Geolocation")) { + if (content.has("Latitude")) { + singleContent.put("Latitude", content.get("Latitude")); + singleContext.put("Latitude", context.get("Latitude")); + } + if (content.has("Longitude")) { + singleContent + .put("Longitude", content.get("Longitude")); + singleContext + .put("Longitude", context.get("Longitude")); + } + } + // Clowder expects flat "Creator"s - might as well flatten all + // values... + if (singleContent.get(safeKey) instanceof JSONArray) { + for (int i = 0; i < ((JSONArray) singleContent + .getJSONArray(key)).length(); i++) { + JSONObject flatContent = new JSONObject(); + flatContent.put(key, ((JSONArray) singleContent + .getJSONArray(key)).get(i).toString()); + postSingleMetadata(flatContent, singleContext, agent, + uri, httpclient); + } + } else { + postSingleMetadata(singleContent, singleContext, agent, + uri, httpclient); + } + + } catch (IOException e) { + println("Error processing " + path + " : " + e.getMessage()); + break; + } + } + + } + + private void postSingleMetadata(JSONObject singleContent, + JSONObject singleContext, JSONObject agent, String uri, + CloseableHttpClient httpclient) throws IOException { + HttpEntity resEntity = null; + try { + singleContext.put("@vocab", CLOWDER_DEFAULT_VOCAB); + JSONObject meta = new JSONObject(); + meta.put("content", singleContent); + meta.put("@context", singleContext); + meta.put("agent", agent); + + StringEntity se2 = new StringEntity(meta.toString(), "UTF-8"); + + se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + + HttpPost metadataPost = new HttpPost(appendKeyIfUsed(uri)); + + metadataPost.setEntity(se2); + + CloseableHttpResponse mdResponse = httpclient.execute(metadataPost, + getLocalContext()); + + resEntity = mdResponse.getEntity(); + if (mdResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when processing key=" + + singleContent.keys().next() + " : " + + mdResponse.getStatusLine().getReasonPhrase()); + println("Value: " + + singleContent.get( + singleContent.keys().next().toString()) + .toString()); + println("Details: " + EntityUtils.toString(resEntity)); + throw new IOException("Non 200 response"); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + } + + } + + private void postDatasetCreator(String creator, String uri, + CloseableHttpClient httpclient) throws IOException { + HttpEntity resEntity = null; + try { + JSONObject body = new JSONObject(); + body.put("creator", creator); + + StringEntity se2 = new StringEntity(body.toString(), "UTF-8"); + se2.setContentType(new BasicHeader(HTTP.CONTENT_TYPE, + "application/json; charset=utf-8")); + + HttpPost creatorPost = new HttpPost(appendKeyIfUsed(uri)); + + creatorPost.setEntity(se2); + + CloseableHttpResponse creatorResponse = httpclient.execute( + creatorPost, getLocalContext()); + + resEntity = creatorResponse.getEntity(); + if (creatorResponse.getStatusLine().getStatusCode() != 200) { + println("Error response when sending creator: " + creator + + " : " + + creatorResponse.getStatusLine().getReasonPhrase()); + println("Details: " + EntityUtils.toString(resEntity)); + throw new IOException("Non 200 response"); + } + } finally { + EntityUtils.consumeQuietly(resEntity); + } + } + + protected String findGeneralizationOf(String id) { + return id; + } + + HashMap existingDatasets = new HashMap(); + HashMap existingFolders = new HashMap(); + HashMap existingFiles = new HashMap(); + + boolean fileMDRetrieved = false; + boolean folderMDRetrieved = false; + + public String itemExists(String path, Resource item) { + String tagId = null; + + String relPath = path; + if (importRO) { + // remove the '//data' prefix on imported paths to make + // it match the file upload paths + relPath = relPath + .substring(relPath.substring(1).indexOf("/") + 1); + relPath = relPath + .substring(relPath.substring(1).indexOf("/") + 1); + } + if (relPath.equals("/")) { + println("Searching for existing dataset. If this takes a long time, consider using:"); + println(" -id= if you know the dataset exists, or"); + println(" -forcenew if you know the dataset does not yet exist."); + + // It's a dataset + CloseableHttpClient httpclient = getSharedHttpClient(); + String sourcepath = path + item.getName(); + //If we haven't yet found this dataset because we haven't looked yet, or we looked and haven't yet found this dataset and there are still more to scan... + if (!existingDatasets.containsKey(sourcepath) && (numFoundDatasets == 0 || existingDatasets.size() < numFoundDatasets)) { + try { + // Only returns first 12 by default + String serviceUrl = server + "/api/datasets?limit=0"; + JSONArray datasetList = null; + HttpGet httpget = null; + CloseableHttpResponse response = null; + if (knownId == null) { + //Get the whole list of datasets to scan through + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + response = httpclient.execute( + httpget, getLocalContext()); + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + datasetList = new JSONArray( + EntityUtils.toString(resEntity)); + println("Scanning " + datasetList.length() + " datasets for a match..."); + numFoundDatasets = datasetList.length(); + } + } else { + println("Error response when checking for existing item at " + + sourcepath + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting to prevent duplicates. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + } else { + //Add the one knownId to the dataset list + //Note: the datasets in the list returned by Clowder also have a "name" entry, but we don't use this. + datasetList = new JSONArray(); + JSONObject dataset = new JSONObject(); + dataset.put("id", knownId); + datasetList.put(dataset); + } + if (datasetList != null) { + for (int i = 0; i < datasetList.length(); i++) { + String id = datasetList.getJSONObject(i) + .getString("id"); + String dPath = null; + serviceUrl = server + "/api/datasets/" + id + + "/metadata.jsonld"; + println(serviceUrl); + + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + if (i % 10 == 0) { + //Give some indication of progress + System.out.print("."); + } + response = httpclient.execute(httpget, + getLocalContext()); + try { + if (response.getStatusLine() + .getStatusCode() == 200) { + HttpEntity resEntity = response + .getEntity(); + if (resEntity != null) { + JSONArray mdList = new JSONArray( + EntityUtils + .toString(resEntity)); + + for (int j = 0; j < mdList.length(); j++) { + if (mdList + .getJSONObject(j) + .getJSONObject( + "content") + .has("Upload Path")) { + dPath = mdList + .getJSONObject( + j) + .getJSONObject( + "content") + .getString( + "Upload Path") + .trim(); + existingDatasets + .put(dPath, id); + break; + } + } + } + } else { + println("Error response when getting metadata for dataset: " + + id + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting to prevent duplicates. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + if (dPath != null && sourcepath.equals(dPath)) { + //Only scan until we find the right dataset + break; + } + } + } + } catch (IOException e) { + println("Error processing check on " + sourcepath + + " : " + e.getMessage()); + } + } + if (existingDatasets.containsKey(sourcepath)) { + //If we're looking for a dataset and found it, it's because we've started on a new dataset and need to get new folder/file info + tagId = existingDatasets.get(sourcepath); + sead2datasetId = tagId; + folderMDRetrieved = false; + fileMDRetrieved = false; + existingFiles = new HashMap(); + existingFolders = new HashMap(); + } else { + if (knownId != null) { + //We should have found something - don't continue and create a new dataset + println("Dataset with id=" + knownId + "and path: " + sourcepath + " not found."); + println("Rerun without the -id flag to scan the entire repository or use -forcenew to force creation of a new dataset."); + System.exit(1); + } + } + + } else if (item.isDirectory()) { + /* + * /We're looking for a folder Since folders in 2 have no + * metadata and can't be moved, we will assume for now that if + * the dataset exists and the folder's relative path in the + * dataset matches, we've found the folder. + */ + String sourcepath = relPath + item.getName().trim(); + sourcepath = sourcepath.substring(sourcepath.substring(1) + .indexOf("/") + 1); + if (sead2datasetId != null && !folderMDRetrieved) { // Can't be in a dataset if it + // wasn't found/created already + CloseableHttpClient httpclient = getSharedHttpClient(); + try { + String serviceUrl = server + "/api/datasets/" + + sead2datasetId + "/folders"; + HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + CloseableHttpResponse response = httpclient.execute( + httpget, getLocalContext()); + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + JSONArray folders = new JSONArray( + EntityUtils.toString(resEntity)); + for (int i = 0; i < folders.length(); i++) { + existingFolders.put(folders.getJSONObject(i) + .getString("name"), folders.getJSONObject(i) + .getString("id")); + } + } + } else { + println("Error response when checking for existing item at " + + sourcepath + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting to prevent duplicates. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + } catch (IOException e) { + println("Error processing check on " + sourcepath + + " : " + e.getMessage()); + } finally { + folderMDRetrieved = true; + } + } + if (existingFolders.containsKey(sourcepath)) { + tagId = existingFolders.get(sourcepath); + + } + } else { + // A file + String sourcepath = path + item.getName().trim(); + + if (sead2datasetId != null && !fileMDRetrieved) { + // One-time retrieval of all file id/Upload Path info + + CloseableHttpClient httpclient = getSharedHttpClient(); + try { + String serviceUrl = server + "/api/datasets/" + + sead2datasetId + "/listAllFiles"; + + HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + + CloseableHttpResponse response = httpclient.execute( + httpget, getLocalContext()); + JSONArray fileList = null; + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + fileList = new JSONArray( + EntityUtils.toString(resEntity)); + } + } else { + println("Error response when checking for existing item at " + + sourcepath + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting to prevent duplicates. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + if (fileList != null) { + for (int i = 0; i < fileList.length(); i++) { + String id = fileList.getJSONObject(i) + .getString("id"); + serviceUrl = server + "/api/files/" + id + + "/metadata.jsonld"; + + httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + + response = httpclient.execute(httpget, + getLocalContext()); + + try { + if (response.getStatusLine() + .getStatusCode() == 200) { + HttpEntity resEntity = response + .getEntity(); + if (resEntity != null) { + JSONArray mdList = new JSONArray( + EntityUtils + .toString(resEntity)); + for (int j = 0; j < mdList.length(); j++) { + if (mdList + .getJSONObject(j) + .getJSONObject( + "content") + .has("Upload Path")) { + + existingFiles + .put(mdList + .getJSONObject( + j) + .getJSONObject( + "content") + .getString( + "Upload Path") + .trim(), id); + break; + } + } + } + } else { + println("Error response when getting metadata for file: " + + id + + " : " + + response.getStatusLine() + .getReasonPhrase()); + println("Exiting to prevent duplicates. Please contact SEAD about the error."); + System.exit(1); + + } + } finally { + response.close(); + } + } + + } + + } catch (IOException e) { + println("Error processing check on " + sourcepath + + " : " + e.getMessage()); + } finally { + fileMDRetrieved = true; + } + } + if (existingFiles.containsKey(sourcepath)) { + tagId = existingFiles.get(sourcepath); + } + } + + if (verify && (tagId != null) && (!item.isDirectory())) { + tagId = verifyDataByHash(tagId, path, item); + } + return (tagId); + } + + HashMap hashIssues = new HashMap(); + + protected String verifyDataByHash(String tagId, String path, + Resource item) { + + String serviceUrl; + CloseableHttpClient httpclient = getSharedHttpClient(); + + try { + // Work-around - our sead2 servers have issues with incorrect or + // missing hash values + // So implementing a direct download and compute option for now. + // Can be added as a + // permanent option or replaced with the metadata check later + serviceUrl = server + "/api/files/" + + URLEncoder.encode(tagId, "UTF-8") + "/blob"; + HttpGet httpget = new HttpGet(appendKeyIfUsed(serviceUrl)); + + CloseableHttpResponse response = httpclient.execute(httpget, + getLocalContext()); + try { + if (response.getStatusLine().getStatusCode() == 200) { + HttpEntity resEntity = response.getEntity(); + if (resEntity != null) { + String realHash = null; + InputStream inputStream = resEntity.getContent(); + realHash = DigestUtils.sha1Hex(inputStream); + /* + * if (hashtype != null) { if + * (hashtype.equals("SHA1 Hash")) { realHash = + * DigestUtils.sha1Hex(inputStream); + * + * } else if (hashtype.equals("SHA512 Hash")) { + * realHash = DigestUtils.sha512Hex(inputStream); } + */ + + if (realHash != null) { + if (!realHash.equals(item.getHash("SHA-1"))) { + hashIssues.put(path + item.getName(), + "!!!: A different version of this item exists with ID: " + + tagId); + return null; + } // else it matches! + } else { + hashIssues.put( + path + item.getName(), + "Error calculating hash for " + + item.getAbsolutePath() + + " - cannot verify it"); + return null; + } + } + } else { + println("Error downloading file to verify " + + item.getAbsolutePath() + " : " + + response.getStatusLine().getReasonPhrase()); + + } + } finally { + response.close(); + } + /* + * // sha1: "http://www.w3.org/2001/04/xmldsig-more#sha1" + * serviceUrl = server + "/api/files/" + + * URLEncoder.encode(tagId, "UTF-8") + "/metadata.jsonld"; + * HttpGet httpget = new HttpGet(serviceUrl); + * + * CloseableHttpResponse response = httpclient.execute(httpget, + * getLocalContext()); try { if + * (response.getStatusLine().getStatusCode() == 200) { + * HttpEntity resEntity = response.getEntity(); if (resEntity != + * null) { String json = EntityUtils.toString(resEntity); + * JSONArray metadata = new JSONArray(json); String remoteHash = + * null; for (int i = 0; i < metadata.length(); i++) { + * JSONObject content = metadata.getJSONObject(i) + * .getJSONObject("content"); if (content != null) { if + * (content.has("sha1")) { remoteHash = + * content.getString("sha1"); break; } } } if (remoteHash != + * null) { if (!remoteHash.equals(item.getSHA1Hash())) { + * hashIssues.put(path + item.getName(), + * "!!!: A different version of this item exists with ID: " + + * tagId); return null; } // else it matches! } else { + * hashIssues.put(path + item.getName(), + * "Remote Hash does not exist for " + item.getAbsolutePath() + + * " - cannot verify it"); return null; } } } else { + * println("Error response while verifying " + + * item.getAbsolutePath() + " : " + + * response.getStatusLine().getReasonPhrase()); + * + * } } finally { response.close(); } + */ + + } catch (UnsupportedEncodingException e1) { + + e1.printStackTrace(); + + } catch (IOException e) { + println("Error processing verify on " + item.getAbsolutePath() + + " : " + e.getMessage()); + } + return tagId; + } + + void addLiteralMetadata(MultipartEntityBuilder meb, + String predicate, String value) { + meb.addTextBody(predicate, value); + + } + + void addURIMetadata(MultipartEntityBuilder meb, String predicate, + String value) { + meb.addTextBody(predicate, value, + ContentType.create("text/uri-list", Consts.ISO_8859_1)); + } + + @SuppressWarnings("unchecked") + @Override + public void addDatasetMetadata(String newSubject, String type, JSONObject relationships) { + + JSONObject content = new JSONObject(); + + JSONObject agent = new JSONObject(); + JSONObject me = get2me(); + agent.put("name", me.getString("fullName")); + agent.put("@type", "cat:user"); + agent.put("user_id", + server + "/api/users/" + me.getString("id")); + + for (String predLabel : (Set) relationships + .keySet()) { + Object newObject = null; + if (relationships.get(predLabel) instanceof String) { + newObject = roCollIdToNewId.get(relationships + .getString(predLabel)); + if (newObject != null) { + if (newObject.equals(sead2datasetId)) { + newObject = server + "/datasets/" + + newObject; + } else { + newObject = server + "/datasets/" + + sead2datasetId + "#folderId" + + newObject; + } + } else { + newObject = roDataIdToNewId + .get(relationships + .getString(predLabel)); + if (newObject != null) { + newObject = server + "/files/" + + newObject; + } else { // Object is not in this Dataset + // and + // can't be translated - use + // original URI + newObject = relationships + .getString(predLabel); + } + } + println(newSubject + ": " + predLabel + ": " + + newObject.toString()); + } else { // JSONArray + newObject = new JSONArray(); + + JSONArray objects = (JSONArray) relationships + .get(predLabel); + for (int i = 0; i < objects.length(); i++) { + String ob = objects.getString(i); + String newOb = null; + newOb = roCollIdToNewId.get(ob); + if (newOb != null) { + if (newOb.equals(sead2datasetId)) { + newOb = server + "/datasets/" + + newOb; + } else { + newOb = server + "/datasets/" + + sead2datasetId + + "#folderId" + newOb; + } + } else { + newOb = roDataIdToNewId.get(ob); + if (newOb != null) { + newOb = server + "/files/" + newOb; + } else { // Object is not in this + // Dataset and + // can't be translated - use + // original URI + newOb = ob; + } + } + ((JSONArray) newObject).put(newOb); + } + + } + println("Writing: " + predLabel + " : " + + newObject.toString()); + content.put(predLabel, newObject); + + } + JSONObject context = new JSONObject(); + context.put("@vocab", CLOWDER_DEFAULT_VOCAB); + // Create flattened context for 2.0 + for (String key : ((Set) content.keySet())) { + String pred = rf.getURIForContextEntry(key); + if (pred != null) { + context.put(key, pred); + } + } + if (type.equals("datasets") + || newSubject.equals(sead2datasetId)) { + CloseableHttpClient httpclient = getSharedHttpClient(); + + String uri = server + + "/api/" + + (type.equals("datasets") ? "files/" + : "datasets/") + newSubject + + "/metadata.jsonld"; + postMetadata(httpclient, uri, newSubject, content, + context, agent); + } else { + println("Folder: Would've written: " + newSubject + + ": " + content.toString()); + + } + + } + + @Override + protected void postProcessChildren() { + // TODO Auto-generated method stub + + } + + @Override + protected void postProcessCollection() { + // TODO Auto-generated method stub + + } + + @Override + protected String preprocessCollection(Resource dir, String path, String parentId, String collectionId) throws UploaderException { + // SEAD2 - create the dataset or folder first before processing + // children + if (!listonly) { + if (collectionId == null) { + if (parentId == null) { + collectionId = create2Dataset(dir, path); + sead2datasetId = collectionId; + } else { + collectionId = create2Folder(parentId, sead2datasetId, + path, dir); + if (collectionId == null) { + throw new UploaderException("Failed to create Folder - will not process contents of :" + path); + } + } + + } else { + // We already have the dataset uploaded so record it's id + if (parentId == null) { + sead2datasetId = collectionId; + } + } + } else { + if (collectionId != null && parentId == null) { + sead2datasetId = collectionId; + } + } + if (sead2datasetId != null) { + println("Dataset ID: " + sead2datasetId); + } + return collectionId; + } + + @Override + protected String postProcessChild(Resource dir, String path, String parentId, String collectionId) { + // TODO Auto-generated method stub + return null; + } + + @Override + protected void postProcessDatafile(String newUri, String existingUri, String collectionId, Resource file, Resource dir) throws ClientProtocolException, IOException { + + //IF NOT LISTONLY? + if (existingUri == null) { // file didn't exist + // before + if ((collectionId != null) + && (!sead2datasetId + .equals(collectionId))) { + // it's in a folder and not the dataset + if (newUri != null) { // and it was just + // created + moveFileToFolder(newUri, collectionId, + file); + } + } + } else { // the file existed + // FixMe - need to check if it is already in the + // folder or not... + if (!sead2datasetId.equals(existingUri)) { + + CloseableHttpClient httpclient = getSharedHttpClient(); + + HttpGet httpget = new HttpGet(appendKeyIfUsed(server + + "/api/datasets/" + sead2datasetId + + "/listFiles")); + + CloseableHttpResponse getResponse = httpclient + .execute(httpget, getLocalContext()); + try { + if (getResponse.getStatusLine() + .getStatusCode() == 200) { + JSONArray files = new JSONArray( + EntityUtils + .toString(getResponse + .getEntity())); + for (int i = 0; i < files.length(); i++) { + if (files.getJSONObject(i) + .getString("id") + .equals(existingUri)) { + // File is in dataset + // directly, not in a + // folder, so move it if + // needed + if ((collectionId != null) + && (!sead2datasetId + .equals(collectionId))) { // it's + + moveFileToFolder( + existingUri, + collectionId, + file); + } + break; + } + } + } else { + println("Error response when listing files " + + dir.getAbsolutePath() + + " : " + + getResponse + .getStatusLine() + .getReasonPhrase()); + println("Details: " + + EntityUtils + .toString(getResponse + .getEntity())); + + } + } finally { + EntityUtils.consumeQuietly(getResponse + .getEntity()); + getResponse.close(); + } + } + } + } + + @Override + protected HttpClientContext reauthenticate(long startTime) { + if (apiKey != null) { + return getLocalContext(); + } else { + return SEADAuthenticator.UPReAuthenticateIfNeeded(server, + startTime); + } + } + + private String appendKeyIfUsed(String url) { + if (apiKey != null) { + if (url.contains("?")) { + url = url + "&key=" + apiKey; + } else { + url = url + "?key=" + apiKey; + } + } + return url; + } + +} diff --git a/src/main/java/org/sead/uploader/dataverse/DVUploader.java b/src/main/java/org/sead/uploader/dataverse/DVUploader.java index dc4ef11..dd51e02 100644 --- a/src/main/java/org/sead/uploader/dataverse/DVUploader.java +++ b/src/main/java/org/sead/uploader/dataverse/DVUploader.java @@ -17,7 +17,6 @@ import java.io.IOException; import java.io.InputStream; -import java.net.URL; import java.security.DigestInputStream; import java.security.KeyManagementException; import java.security.KeyStoreException; @@ -70,6 +69,7 @@ public class DVUploader extends AbstractUploader { private static int maxWaitTime = 60; private static boolean recurse = false; private static boolean directUpload = false; + private static boolean trustCerts = false; public static void main(String args[]) throws Exception { @@ -113,6 +113,7 @@ private static void usage() { println(" -skip= - a number of files to skip before starting processing (saves time when you know the first n files have been uploaded before)"); println(" -recurse - recurse into subdirectories"); println(" -maxlockwait - the maximum time to wait (in seconds) for a Dataset lock (i.e. while the last file is ingested) to expire (default 60 seconds)"); + println(" -trustall - trust all server certificates (i.e. for use when testing with self-signed certificates)"); println(""); } @@ -136,6 +137,10 @@ public boolean parseCustomArg(String arg) { directUpload = true; println("Will use direct upload of files (if configured on this server)"); return true; + } else if (arg.equals("-trustall")) { + trustCerts = true; + println("Will trust all certificates"); + return true; } else if (arg.startsWith("-maxlockwait")) { try { maxWaitTime = Integer.parseInt(arg.substring(arg.indexOf(argSeparator) + 1)); @@ -157,23 +162,30 @@ public CloseableHttpClient getSharedHttpClient() { if (httpclient == null) { // use the TrustSelfSignedStrategy to allow Self Signed Certificates SSLContext sslContext; + SSLConnectionSocketFactory connectionFactory; try { - sslContext = SSLContextBuilder - .create() - .loadTrustMaterial(new TrustAllStrategy()) - .build(); - - // create an SSL Socket Factory to use the SSLContext with the trust self signed certificate strategy - // and allow all hosts verifier. - SSLConnectionSocketFactory connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); - - // finally create the HttpClient using HttpClient factory methods and assign the ssl socket factory - httpclient = HttpClients - .custom() - .setSSLSocketFactory(connectionFactory) - .setUserAgent("curl/7.61.1") - .setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build()) - .build(); + if (trustCerts) { + sslContext = SSLContextBuilder + .create() + .loadTrustMaterial(new TrustAllStrategy()) + .build(); + // create an SSL Socket Factory to use the SSLContext with the trust self signed certificate strategy + // and allow all hosts verifier. + connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE); + // finally create the HttpClient using HttpClient factory methods and assign the ssl socket factory + httpclient = HttpClients + .custom() + .setSSLSocketFactory(connectionFactory) + .setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build()) + .build(); + } else { + httpclient = HttpClients + .custom() + .setDefaultRequestConfig(RequestConfig.custom().setCookieSpec(CookieSpecs.STANDARD).build()) + .build(); + + } + } catch (NoSuchAlgorithmException | KeyStoreException | KeyManagementException ex) { Logger.getLogger(DVUploader.class.getName()).log(Level.SEVERE, null, ex); } @@ -397,32 +409,23 @@ protected String uploadDatafile(Resource file, String path) { JSONObject data = (new JSONObject(jsonResponse)).getJSONObject("data"); uploadUrl = data.getString("url"); String storageIdentifier = data.getString("storageIdentifier"); - println("Put to: " + uploadUrl); - println("storageId: " + storageIdentifier); HttpPut httpput = new HttpPut(uploadUrl); MessageDigest messageDigest = MessageDigest.getInstance("MD5"); - println(file.getAbsolutePath() + " " + file.length()); try (InputStream inStream = file.getInputStream(); DigestInputStream digestInputStream = new DigestInputStream(inStream, messageDigest)) { - println("Set S3 entity"); httpput.setEntity(new BufferedHttpEntity(new InputStreamEntity(digestInputStream, file.length()))); - println("Calling S3"); CloseableHttpResponse putResponse = httpclient.execute(httpput); try { int putStatus = putResponse.getStatusLine().getStatusCode(); - println("Status " + putStatus); String putRes = null; HttpEntity putEntity = putResponse.getEntity(); if (putEntity != null) { putRes = EntityUtils.toString(putEntity); - println(putRes); } if (putStatus == 200) { - println("S3 Success"); String localchecksum = Hex.encodeHexString(digestInputStream.getMessageDigest().digest()); - println("Checksum: " + localchecksum); // Now post data urlString = server + "/api/datasets/:persistentId/add"; urlString = urlString + "?persistentId=" + datasetPID + "&key=" + apiKey; @@ -510,7 +513,7 @@ protected String uploadDatafile(Resource file, String path) { } catch (IOException e) { e.printStackTrace(System.out); - println("Error processing 1" + file.getAbsolutePath() + " : " + e.getMessage()); + println("Error processing POST to Dataverse" + file.getAbsolutePath() + " : " + e.getMessage()); retry = 0; } } @@ -524,7 +527,7 @@ protected String uploadDatafile(Resource file, String path) { } catch (IOException e) { e.printStackTrace(System.out); - println("Error processing 2 " + file.getAbsolutePath() + " : " + e.getMessage()); + println("Error processing file upload " + file.getAbsolutePath() + " : " + e.getMessage()); retry = 0; } catch (NoSuchAlgorithmException e1) { // TODO Auto-generated catch block @@ -532,7 +535,7 @@ protected String uploadDatafile(Resource file, String path) { } } catch (IOException e) { - println("Error processing 3" + file.getAbsolutePath() + " : " + e.getMessage()); + println("Error processing request for storage id" + file.getAbsolutePath() + " : " + e.getMessage()); retry = 0; } } @@ -623,6 +626,7 @@ protected String uploadDatafile(Resource file, String path) { } } return dataId; + } private boolean isLocked() { diff --git a/src/main/java/org/sead/uploader/util/FileResource.java b/src/main/java/org/sead/uploader/util/FileResource.java index 9c9997b..41c0b5b 100644 --- a/src/main/java/org/sead/uploader/util/FileResource.java +++ b/src/main/java/org/sead/uploader/util/FileResource.java @@ -1,19 +1,18 @@ -/******************************************************************************* +/** ***************************************************************************** * Copyright 2016 University of Michigan - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. - ******************************************************************************/ - + ***************************************************************************** */ package org.sead.uploader.util; import java.io.File; @@ -39,130 +38,130 @@ public class FileResource implements Resource { - private File f; - - public FileResource(String fileName) { - f = new File(fileName); - } - - private FileResource(File file) { - f = file; - } - - @Override - public String getName() { - - return f.getName(); - } - - @Override - public boolean isDirectory() { - return f.isDirectory(); - } - - @Override - public String getPath() { - return f.getPath(); - } - - @Override - public String getMimeType() { - String type=null; - try { - type = Files.probeContentType(FileSystems.getDefault().getPath(f.getPath())); - - } catch (Exception e) { - // Could log - } - if (type == null) { - type = "application/octet-stream"; - } - return type; - } - - @Override - public Iterator iterator() { - return listResources().iterator(); - } - - @Override - public Iterable listResources() { - ArrayList resources = new ArrayList(); - for (File file : f.listFiles()) { - resources.add(new FileResource(file)); - } - return resources; - } - - @Override - public long length() { - return f.length(); - } - - @Override - public String getAbsolutePath() { - return f.getAbsolutePath(); - } - - @Override - public ContentBody getContentBody() { - ContentType cType = ContentType.DEFAULT_BINARY; - try { - String mType = Files.probeContentType(f.toPath()); - - if (mType != null) { - cType = ContentType.create(mType); - } - } catch (Exception e) { - System.out.println(e.getMessage()); - } - return new FileBody(f, cType, f.getName()); - } - - @Override - public InputStream getInputStream() { - try { - InputStream is = new FileInputStream(f); - return is; - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - return null; - } - - @Override - public String getHash(String algorithm) { - MessageDigest digester = null; - InputStream is = null; - try { - is = new FileInputStream(f); - digester = MessageDigest.getInstance(algorithm); - is = new DigestInputStream(is, digester); - byte[] b = new byte[8192]; - while (is.read(b) != -1) + private File f; + + public FileResource(String fileName) { + f = new File(fileName); + } + + private FileResource(File file) { + f = file; + } + + @Override + public String getName() { + + return f.getName(); + } + + @Override + public boolean isDirectory() { + return f.isDirectory(); + } + + @Override + public String getPath() { + return f.getPath(); + } + + @Override + public String getMimeType() { + String type = null; + try { + type = Files.probeContentType(FileSystems.getDefault().getPath(f.getPath())); + + } catch (Exception e) { + // Could log + } + if (type == null) { + type = "application/octet-stream"; + } + return type; + } + + @Override + public Iterator iterator() { + return listResources().iterator(); + } + + @Override + public Iterable listResources() { + ArrayList resources = new ArrayList(); + for (File file : f.listFiles()) { + resources.add(new FileResource(file)); + } + return resources; + } + + @Override + public long length() { + return f.length(); + } + + @Override + public String getAbsolutePath() { + return f.getAbsolutePath(); + } + + @Override + public ContentBody getContentBody() { + ContentType cType = ContentType.DEFAULT_BINARY; + try { + String mType = Files.probeContentType(f.toPath()); + + if (mType != null) { + cType = ContentType.create(mType); + } + } catch (Exception e) { + System.out.println(e.getMessage()); + } + return new FileBody(f, cType, f.getName()); + } + + @Override + public InputStream getInputStream() { + try { + InputStream is = new FileInputStream(f); + return is; + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return null; + } + + @Override + public String getHash(String algorithm) { + MessageDigest digester = null; + InputStream is = null; + try { + is = new FileInputStream(f); + digester = MessageDigest.getInstance(algorithm); + is = new DigestInputStream(is, digester); + byte[] b = new byte[8192]; + while (is.read(b) != -1) ; - byte[] digest = digester.digest(); - return Hex.encodeHexString(digest); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } catch (NoSuchAlgorithmException e1) { - e1.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } finally { - IOUtils.closeQuietly(is); - } - return null; - } - - @Override - public JSONObject getMetadata() { - // No extra metadata by default for files - return new JSONObject(); - } + byte[] digest = digester.digest(); + return Hex.encodeHexString(digest); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } catch (NoSuchAlgorithmException e1) { + e1.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } finally { + IOUtils.closeQuietly(is); + } + return null; + } + + @Override + public JSONObject getMetadata() { + // No extra metadata by default for files + return new JSONObject(); + } } diff --git a/src/main/java/org/sead/uploader/util/PublishedFolderProxyResource.java b/src/main/java/org/sead/uploader/util/PublishedFolderProxyResource.java index 6132369..3d09de3 100644 --- a/src/main/java/org/sead/uploader/util/PublishedFolderProxyResource.java +++ b/src/main/java/org/sead/uploader/util/PublishedFolderProxyResource.java @@ -1,144 +1,143 @@ -/******************************************************************************* - * Copyright 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -package org.sead.uploader.util; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.Iterator; - -import org.apache.commons.codec.binary.Hex; -import org.apache.commons.io.IOUtils; -import org.apache.http.entity.ContentType; -import org.apache.http.entity.mime.content.ContentBody; -import org.apache.http.entity.mime.content.InputStreamBody; -import org.json.JSONObject; - -/** - * This class represents a file (2.0 Dataset) that includes the - * metadata/tags/comments associated with a directory (2.0 folder). When - * importing a SEAD publication, metadata for folders will be recorded in a file - * added to that folder. - * - * @author Jim - * - */ - -public class PublishedFolderProxyResource extends PublishedResource implements Resource { - - private PublishedResource resource; - private String message; - - public PublishedFolderProxyResource(PublishedResource pr, String folderId) { - super(pr.resource); - resource = pr; - - StringBuilder sb = new StringBuilder(); - sb.append("SEAD Importer README:\n\n"); - sb.append( - "The metadata, tags, and comments on this file are intended to apply to the SEAD 2.0 Folder it was created in:\n"); - sb.append("\tID: " + folderId); - sb.append("\n\tPath: " + resource.getPath()); - sb.append( - "\n\nThis file manages the difference between the SEAD 1.x and current SEAD 2.0 data models, ensuring that information about published SEAD 1.x sub-collections is available when they are imported into SEAD 2.0."); - sb.append( - "\n\nNote that generated metadata, such as the file type, size, and cryptographic hash values apply to the file (containing this message) and not to the folder."); - message = sb.toString(); - - } - - @Override - public String getName() { - return "SEADImport.ReadMe.txt"; - } - - @Override - public String getAndRemoveTitle() { - return getName(); - - } - - @Override - public boolean isDirectory() { - return false; - } - - @Override - public String getPath() { - return resource.getPath() + "/" + getName(); - } - - @Override - public Iterator iterator() { - return listResources().iterator(); - } - - @Override - public Iterable listResources() { - ArrayList resources = new ArrayList(); - return resources; - } - - @Override - public long length() { - long size = message.getBytes(StandardCharsets.UTF_8).length; - return size; - } - - @Override - public String getAbsolutePath() { - return resource.getAbsolutePath() + "/" + getName(); - } - - @Override - public ContentBody getContentBody() { - InputStream stream = new ByteArrayInputStream(message.getBytes(StandardCharsets.UTF_8)); - return new InputStreamBody(stream, ContentType.create("text/plain"), getName()); - } - - private String hash = null; - - @Override - public String getHash(String algorithm) { - if (hash == null) { - MessageDigest digester = null; - InputStream is = null; - try { - digester = MessageDigest.getInstance(algorithm); - digester.update(message.getBytes("UTF-8")); - byte[] digest = digester.digest(); - return Hex.encodeHexString(digest); - } catch (NoSuchAlgorithmException e1) { - e1.printStackTrace(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } finally { - IOUtils.closeQuietly(is); - } - } - return hash; - } - - @Override - public JSONObject getMetadata() { - return (resource.getMetadata()); - } -} +/** ***************************************************************************** + * Copyright 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.util; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.Iterator; + +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.io.IOUtils; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.entity.mime.content.InputStreamBody; +import org.json.JSONObject; + +/** + * This class represents a file (2.0 Dataset) that includes the + * metadata/tags/comments associated with a directory (2.0 folder). When + * importing a SEAD publication, metadata for folders will be recorded in a file + * added to that folder. + * + * @author Jim + * + */ +public class PublishedFolderProxyResource extends PublishedResource implements Resource { + + private PublishedResource resource; + private String message; + + public PublishedFolderProxyResource(PublishedResource pr, String folderId) { + super(pr.resource); + resource = pr; + + StringBuilder sb = new StringBuilder(); + sb.append("SEAD Importer README:\n\n"); + sb.append( + "The metadata, tags, and comments on this file are intended to apply to the SEAD 2.0 Folder it was created in:\n"); + sb.append("\tID: " + folderId); + sb.append("\n\tPath: " + resource.getPath()); + sb.append( + "\n\nThis file manages the difference between the SEAD 1.x and current SEAD 2.0 data models, ensuring that information about published SEAD 1.x sub-collections is available when they are imported into SEAD 2.0."); + sb.append( + "\n\nNote that generated metadata, such as the file type, size, and cryptographic hash values apply to the file (containing this message) and not to the folder."); + message = sb.toString(); + + } + + @Override + public String getName() { + return "SEADImport.ReadMe.txt"; + } + + @Override + public String getAndRemoveTitle() { + return getName(); + + } + + @Override + public boolean isDirectory() { + return false; + } + + @Override + public String getPath() { + return resource.getPath() + "/" + getName(); + } + + @Override + public Iterator iterator() { + return listResources().iterator(); + } + + @Override + public Iterable listResources() { + ArrayList resources = new ArrayList(); + return resources; + } + + @Override + public long length() { + long size = message.getBytes(StandardCharsets.UTF_8).length; + return size; + } + + @Override + public String getAbsolutePath() { + return resource.getAbsolutePath() + "/" + getName(); + } + + @Override + public ContentBody getContentBody() { + InputStream stream = new ByteArrayInputStream(message.getBytes(StandardCharsets.UTF_8)); + return new InputStreamBody(stream, ContentType.create("text/plain"), getName()); + } + + private String hash = null; + + @Override + public String getHash(String algorithm) { + if (hash == null) { + MessageDigest digester = null; + InputStream is = null; + try { + digester = MessageDigest.getInstance(algorithm); + digester.update(message.getBytes("UTF-8")); + byte[] digest = digester.digest(); + return Hex.encodeHexString(digest); + } catch (NoSuchAlgorithmException e1) { + e1.printStackTrace(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } finally { + IOUtils.closeQuietly(is); + } + } + return hash; + } + + @Override + public JSONObject getMetadata() { + return (resource.getMetadata()); + } +} diff --git a/src/main/java/org/sead/uploader/util/PublishedResource.java b/src/main/java/org/sead/uploader/util/PublishedResource.java index 0ce21a4..34b24c3 100644 --- a/src/main/java/org/sead/uploader/util/PublishedResource.java +++ b/src/main/java/org/sead/uploader/util/PublishedResource.java @@ -1,542 +1,543 @@ -/******************************************************************************* - * Copyright 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -package org.sead.uploader.util; - -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Set; - -import org.apache.http.HttpEntity; -import org.apache.http.entity.ContentType; -import org.apache.http.entity.mime.content.ContentBody; -import org.apache.http.entity.mime.content.InputStreamBody; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -public class PublishedResource implements Resource { - - protected JSONObject resource; - private String path; - private String absPath; - private String origTitle = null; - private static ResourceFactory myFactory; - - static final HashMap blackList = new HashMap() { - /** - * - */ - private static final long serialVersionUID = 1L; - - { - put("Purpose", "http://sead-data.net/vocab/publishing#Purpose"); - put("Rights Holder", "http://purl.org/dc/terms/rightsHolder"); - put("Repository", "http://sead-data.net/terms/requestedrepository"); - put("Max Dataset Size", "http://sead-data.net/terms/maxdatasetsize"); - put("Total Size", "tag:tupeloproject.org,2006:/2.0/files/length"); - put("Aggregation Statistics", "http://sead-data.net/terms/publicationstatistics"); - - put("Number of Datasets", "http://sead-data.net/terms/datasetcount"); - put("Preferences", "http://sead-data.net/terms/publicationpreferences"); - put("Publication Callback", "http://sead-data.net/terms/publicationcallback"); - put("Max Collection Depth", "http://sead-data.net/terms/maxcollectiondepth"); - put("Number of Collections", "http://sead-data.net/terms/collectioncount"); - put("Affiliations", "http://sead-data.net/terms/affiliations"); - - put("Publishing Project", "http://sead-data.net/terms/publishingProject"); - put("Publishing Project Name", "http://sead-data.net/terms/publishingProjectName"); - // "https://w3id.org/ore/context", - put("Has Part", "http://purl.org/dc/terms/hasPart"); - put("Is Version Of", "http://purl.org/dc/terms/isVersionOf"); - put("@type", "@type"); - put("@id", "@id"); - put("similarTo", "http://www.openarchives.org/ore/terms/similarTo"); - put("SHA1 Hash", "http://sead-data.net/terms/hasSHA1Digest"); - put("Size", "tag:tupeloproject.org,2006:/2.0/files/length"); - put("Mimetype", "http://purl.org/dc/elements/1.1/format"); // used - // directly - // to - // be - // the - // mimetype - // for - // the - // uploaded - // file - - } - }; - - static final HashMap grayList = new HashMap() { - private static final long serialVersionUID = 1L; - - { - - // put("Creator","http://purl.org/dc/terms/creator"); - // put("Contact","http://sead-data.net/terms/contact"); - put("Creation Date", "http://purl.org/dc/terms/created"); - put("Data Mimetypes", "http://purl.org/dc/elements/1.1/format"); - put("Uploaded By", "http://purl.org/dc/elements/1.1/creator"); - put("Date", "http://purl.org/dc/elements/1.1/date"); - - put("Label", "http://www.w3.org/2000/01/rdf-schema#label"); - put("Keyword", "http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag"); - put("Publication Date", "http://purl.org/dc/terms/issued"); - put("GeoPoint", "tag:tupeloproject.org,2006:/2.0/gis/hasGeoPoint"); - put("Comment", "http://cet.ncsa.uiuc.edu/2007/annotation/hasAnnotation"); - - /* - * put("GeoPoint: { put("@id: "tag:tupeloproject.org,2006:/2.0/gis/hasGeoPoint", - * long");"http://www.w3.org/2003/01/geo/wgs84_pos#long", - * lat");"http://www.w3.org/2003/01/geo/wgs84_pos#lat" }, put("Comment: { - * put("@id: "http://cet.ncsa.uiuc.edu/2007/annotation /hasAnnotation", - * put("comment_body: "http://purl.org/dc/elements/1.1 /description"); - * put("comment_author: "http://purl.org/dc/elements/1.1/creator", - * comment_date");"http://purl.org/dc/elements/1.1/date" }, - */ - put("Instance Of", "http://purl.org/vocab/frbr/core#embodimentOf"); - put("External Identifier", "http://purl.org/dc/terms/identifier"); - put("License", "http://purl.org/dc/terms/license"); - put("Rights Holder", "http://purl.org/dc/terms/rightsHolder"); - put("Rights", "http://purl.org/dc/terms/rights"); - put("Created", "http://purl.org/dc/elements/1.1/created"); - put("Size", "tag:tupeloproject.org,2006:/2.0/files/length"); - put("Label", "http://www.w3.org/2000/01/rdf-schema#label"); - put("Identifier", "http://purl.org/dc/elements/1.1/identifier"); - } - }; - - public static void setResourceFactory(ResourceFactory rf) { - myFactory = rf; - } - - public PublishedResource(JSONObject jo) { - resource = jo; - } - - @Override - public String getName() { - String name = resource.getString("Label"); - if (resource.has("Title")) { - origTitle = resource.getString("Title"); - } - if (isDirectory()) { - /* - * Since Clowder doesn't keep the original name of a folder if it is changed, - * and we use the name in paths, we can't write the name and change the visible - * label like we do with Files (done because labels may not be valid filenames - * if they have unicode chars, etc.). So - a work-around is to always use the - * title for collections and ignore the Label This means the Label is lost for - * 1.5 collections - acceptable since, while it is captured in a publication, it - * is not visible to the user via the GUI. - */ - if (origTitle == null || (origTitle.isEmpty())) { - name = ""; - } else { - name = origTitle; - } - } - // Label should always exist and be valid.... - if (name == null || name.isEmpty()) { - System.err.println("Warning: Bad Label found for resource with title: " + origTitle); - // Handle rare/test cases where all are empty strings - if (origTitle == null || (origTitle.isEmpty())) { - name = ""; - } else { - name = origTitle; - } - } - return name; - } - - @Override - public boolean isDirectory() { - if (getChildren().length() != 0) { - return true; - } - Object o = resource.get("@type"); - if (o != null) { - if (o instanceof JSONArray) { - for (int i = 0; i < ((JSONArray) o).length(); i++) { - String type = ((JSONArray) o).getString(i).trim(); - // 1.5 and 2.0 types - if ("http://cet.ncsa.uiuc.edu/2007/Collection".equals(type) - || "http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) { - return true; - } - - } - } else if (o instanceof String) { - String type = ((String) o).trim(); - // 1.5 and 2.0 types - if ("http://cet.ncsa.uiuc.edu/2007/Collection".equals(type) - || "http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) { - return true; - } - } - } - return false; - } - - @Override - public String getPath() { - return path; - } - - @Override - public Iterator iterator() { - return listResources().iterator(); - } - - @Override - public Iterable listResources() { - ArrayList resources = new ArrayList(); - JSONArray children = getChildren(); - for (int i = 0; i < children.length(); i++) { - resources.add(myFactory.getPublishedResource(children.getString(i), getPath())); - } - return resources; - } - - // Get's all "Has Part" children, standardized to send an array with 0,1, or - // more elements - JSONArray getChildren() { - Object o = null; - try { - o = resource.get("Has Part"); - } catch (JSONException e) { - // Doesn't exist - that's OK - } - if (o == null) { - return new JSONArray(); - } else { - if (o instanceof JSONArray) { - return (JSONArray) o; - } else if (o instanceof String) { - return new JSONArray("[ " + (String) o + " ]"); - } - return new JSONArray(); - } - } - - @Override - public long length() { - long size = 0; - if (!isDirectory()) { - size = Long.parseLong(resource.optString("Size")); // sead2 sends a - // number, 1.5 a - // string - } - return size; - } - - @Override - public String getAbsolutePath() { - return absPath; - } - - @Override - public ContentBody getContentBody() { - // While space and / etc. are already encoded, the quote char is not and - // it is not a valid char - // Fix Me - identify additional chars that need to be encoded... - String uri = resource.getString("similarTo").replace("\"", "%22").replace(";", "%3b"); - - try { - HttpEntity entity = myFactory.getURI(new URI(uri)); - return new InputStreamBody(entity.getContent(), ContentType.create(resource.getString("Mimetype")), - getName()); - } catch (IllegalStateException e) { - e.printStackTrace(); - } catch (JSONException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - return null; - } - - @Override - public InputStream getInputStream() { - // While space and / etc. are already encoded, the quote char is not and - // it is not a valid char - // Fix Me - identify additional chars that need to be encoded... - String uri = resource.getString("similarTo").replace("\"", "%22").replace(";", "%3b"); - - try { - HttpEntity entity = myFactory.getURI(new URI(uri)); - return entity.getContent(); - - } catch (IOException e) { - e.printStackTrace(); - } catch (URISyntaxException e) { - e.printStackTrace(); - } - return null; - } - - public void setPath(String path) { - this.path = path; - } - - public void setAbsolutePath(String abspath) { - absPath = abspath; - } - - @Override - public String getHash(String algorithm) { - String hash = null; - if (algorithm.equals("SHA-1")) { - hash = resource.getString("SHA1 Hash"); - } - return hash; - } - - private static HashMap allRelationships = new HashMap(); - - @SuppressWarnings("unchecked") - @Override - public JSONObject getMetadata() { - ArrayList keysToKeep = new ArrayList(); - keysToKeep.addAll(resource.keySet()); - HashMap relationships = new HashMap(); - - HashMap changed = new HashMap(); - for (String key : (Set) resource.keySet()) { - if (blackList.containsKey(key)) { - // SEADUploader.println("Found: " + key + " : dropped"); - keysToKeep.remove(key); - } else if (grayList.containsKey(key)) { - // SEADUploader.println("Found: " + key + " : converted"); - if (!willConvert(key, resource.get(key))) { - keysToKeep.remove(key); - } - } else { - // else keep it, including the @context - // SEADUploader.println("Found: " + key + " : keeping"); - } - - } - // SEADUploader.println(resource.toString()); - JSONObject md = new JSONObject(resource, keysToKeep.toArray(new String[keysToKeep.size()])); - // Note @context may have unnecessary elements now - should not be - // harmful but could perhaps be removed - // SEADUploader.println(md.toString()); - - for (String key : (Set) md.keySet()) { - // SEADUploader.println("Checking: " + key + " : " - // + md.get(key).toString()); - if (md.get(key) instanceof String) { - String val = resource.getString(key); - // SEADUploader.println(key + " : " + val); - if (val.startsWith("tag:") || val.startsWith("urn:")) { - - relationships.put(key, val); - } else { - Object updated = convert(key, val); - if (updated != null) { - changed.put(key, updated); - } - } - } else if (md.get(key) instanceof JSONArray) { - JSONArray vals = md.getJSONArray(key); - JSONArray newvals = new JSONArray(); - JSONArray newrels = new JSONArray(); - - for (int i = 0; i < vals.length(); i++) { - // SEADUploader.println("Checking: " + i + " : " - // + vals.get(i).toString()); - if (vals.get(i) instanceof String) { - // relationships always have a string value by definition - if (vals.getString(i).startsWith("tag:") || vals.getString(i).startsWith("urn:")) { - newrels.put(vals.getString(i)); - } else { - Object updated = convert(key, vals.getString(i)); - if (updated != null) { - newvals.put(updated); - } - } - } else { - Object updated = convert(key, vals.get(i)); - if (updated != null) { - newvals.put(updated); - } - } - } - if (newvals.length() != 0) { - changed.put(key, newvals); - } - if (newrels.length() != 0) { - relationships.put(key, newrels); - } - } else { - changed.put(key, md.get(key)); - } - } - md = new JSONObject(changed); - md.put("Metadata on Original", origMD); - - allRelationships.put(resource.getString("Identifier"), relationships); - return md; - - } - - HashMap origMD = new HashMap(); - - private boolean willConvert(String key, Object object) { - - if (!ResourceFactory.grayConversions.containsKey(key)) { - if (key.equals("Label")) { - if ((origTitle != null) && (!((String) object).equals(origTitle))) { - // It's unique - move it to orig metadata - origMD.put(key, object); - } else { - // It's the same as the name/title - don't move it to the - // original metadata - } - } else { - origMD.put(key, object); - } - // Regardless, don't keep the original item - return false; - } - return true; - } - - private Object convert(String key, Object object) { - switch (key) { - case "Creator": - if (object instanceof JSONObject) { - object = ((JSONObject) object).getString("@id"); - } - break; - case "Contact": - if (object instanceof JSONObject) { - object = ((JSONObject) object).getString("@id"); - } - break; - case "External Identifier": - if (object instanceof String) { - if (((String) object).startsWith("http://doi.org/10.5072/")) - object = null; - } - } - return object; - } - - public static HashMap getAllRelationships() { - return allRelationships; - } - - public String getIdentifier() { - return resource.getString("Identifier"); - - } - - /* - * For datasets, we send the abstract as a description - before processing - * metadata in general, so retrieve the value and then remove it so that - * duplicate metadata is not sent. - */ - public String getAndRemoveAbstract(boolean d2a) { - String theAbstract = null; - if (d2a) { - if (resource.has("Has Description")) { - Object descObject = resource.get("Has Description"); - if (descObject instanceof String) { - theAbstract = descObject.toString(); - } else if (descObject instanceof JSONArray) { - theAbstract = ((JSONArray) descObject).toString(); - } - resource.remove("Has Description"); - } - } - if (resource.has("Abstract")) { - if (theAbstract == null) { - theAbstract = ""; - } else { // Combining with a description - add a space between - theAbstract = theAbstract + " "; - } - if (resource.get("Abstract") instanceof JSONArray) { - // Convert multiple abstracts into 1 so it fits - // Clowder's single description field - // Could concatenate, but JSON will help if anyone wants - // to separate abstracts after migration - theAbstract = theAbstract + ((JSONArray) resource.getJSONArray("Abstract")).toString(2); - } else { - theAbstract = theAbstract + resource.getString("Abstract").toString(); - } - resource.remove("Abstract"); - } - return theAbstract; - } - - /* - * return the "Title" (which may be different than getName which comes from the - * "Label" - */ - public String getAndRemoveTitle() { - - if (resource.has("Title")) { - origTitle = resource.getString("Title"); - resource.remove("Title"); - } - return origTitle; - } - - /* - * For datasets, we send the creators via the dataset api - before processing - * metadata, so retrieve the value and then remove it so that duplicate metadata - * is not sent. - */ - public void getAndRemoveCreator(List creators) { - - if (resource.has("Creator")) { - Object creatorField = resource.get("Creator"); - if (creatorField instanceof JSONArray) { - for (int i = 0; i < ((JSONArray) creatorField).length(); i++) { - Object creator = ((JSONArray) creatorField).get(i); - if (creator instanceof JSONObject) { - creators.add(((String) convert("Creator", creator))); - } else { - creators.add((String) creator); - } - } - } else if (creatorField instanceof JSONObject) { - creators.add(((String) convert("Creator", creatorField))); - } else { - creators.add(((String) creatorField)); - } - resource.remove("Creator"); - } - } - - @Override - public String getMimeType() { - if (resource.has("MimeType")) { - return resource.getString("MimeType"); - } - return null; - } -} +/** ***************************************************************************** + * Copyright 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.util; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.http.HttpEntity; +import org.apache.http.entity.ContentType; +import org.apache.http.entity.mime.content.ContentBody; +import org.apache.http.entity.mime.content.InputStreamBody; +import org.json.JSONArray; +import org.json.JSONException; +import org.json.JSONObject; + +public class PublishedResource implements Resource { + + protected JSONObject resource; + private String path; + private String absPath; + private String origTitle = null; + private static ResourceFactory myFactory; + + static final HashMap blackList = new HashMap() { + /** + * + */ + private static final long serialVersionUID = 1L; + + { + put("Purpose", "http://sead-data.net/vocab/publishing#Purpose"); + put("Rights Holder", "http://purl.org/dc/terms/rightsHolder"); + put("Repository", "http://sead-data.net/terms/requestedrepository"); + put("Max Dataset Size", "http://sead-data.net/terms/maxdatasetsize"); + put("Total Size", "tag:tupeloproject.org,2006:/2.0/files/length"); + put("Aggregation Statistics", "http://sead-data.net/terms/publicationstatistics"); + + put("Number of Datasets", "http://sead-data.net/terms/datasetcount"); + put("Preferences", "http://sead-data.net/terms/publicationpreferences"); + put("Publication Callback", "http://sead-data.net/terms/publicationcallback"); + put("Max Collection Depth", "http://sead-data.net/terms/maxcollectiondepth"); + put("Number of Collections", "http://sead-data.net/terms/collectioncount"); + put("Affiliations", "http://sead-data.net/terms/affiliations"); + + put("Publishing Project", "http://sead-data.net/terms/publishingProject"); + put("Publishing Project Name", "http://sead-data.net/terms/publishingProjectName"); + // "https://w3id.org/ore/context", + put("Has Part", "http://purl.org/dc/terms/hasPart"); + put("Is Version Of", "http://purl.org/dc/terms/isVersionOf"); + put("@type", "@type"); + put("@id", "@id"); + put("similarTo", "http://www.openarchives.org/ore/terms/similarTo"); + put("SHA1 Hash", "http://sead-data.net/terms/hasSHA1Digest"); + put("Size", "tag:tupeloproject.org,2006:/2.0/files/length"); + put("Mimetype", "http://purl.org/dc/elements/1.1/format"); // used + // directly + // to + // be + // the + // mimetype + // for + // the + // uploaded + // file + + } + }; + + static final HashMap grayList = new HashMap() { + private static final long serialVersionUID = 1L; + + { + + // put("Creator","http://purl.org/dc/terms/creator"); + // put("Contact","http://sead-data.net/terms/contact"); + put("Creation Date", "http://purl.org/dc/terms/created"); + put("Data Mimetypes", "http://purl.org/dc/elements/1.1/format"); + put("Uploaded By", "http://purl.org/dc/elements/1.1/creator"); + put("Date", "http://purl.org/dc/elements/1.1/date"); + + put("Label", "http://www.w3.org/2000/01/rdf-schema#label"); + put("Keyword", "http://www.holygoat.co.uk/owl/redwood/0.1/tags/taggedWithTag"); + put("Publication Date", "http://purl.org/dc/terms/issued"); + put("GeoPoint", "tag:tupeloproject.org,2006:/2.0/gis/hasGeoPoint"); + put("Comment", "http://cet.ncsa.uiuc.edu/2007/annotation/hasAnnotation"); + + /* + * put("GeoPoint: { put("@id: "tag:tupeloproject.org,2006:/2.0/gis/hasGeoPoint", + * long");"http://www.w3.org/2003/01/geo/wgs84_pos#long", + * lat");"http://www.w3.org/2003/01/geo/wgs84_pos#lat" }, put("Comment: { + * put("@id: "http://cet.ncsa.uiuc.edu/2007/annotation /hasAnnotation", + * put("comment_body: "http://purl.org/dc/elements/1.1 /description"); + * put("comment_author: "http://purl.org/dc/elements/1.1/creator", + * comment_date");"http://purl.org/dc/elements/1.1/date" }, + */ + put("Instance Of", "http://purl.org/vocab/frbr/core#embodimentOf"); + put("External Identifier", "http://purl.org/dc/terms/identifier"); + put("License", "http://purl.org/dc/terms/license"); + put("Rights Holder", "http://purl.org/dc/terms/rightsHolder"); + put("Rights", "http://purl.org/dc/terms/rights"); + put("Created", "http://purl.org/dc/elements/1.1/created"); + put("Size", "tag:tupeloproject.org,2006:/2.0/files/length"); + put("Label", "http://www.w3.org/2000/01/rdf-schema#label"); + put("Identifier", "http://purl.org/dc/elements/1.1/identifier"); + } + }; + + public static void setResourceFactory(ResourceFactory rf) { + myFactory = rf; + } + + public PublishedResource(JSONObject jo) { + resource = jo; + } + + @Override + public String getName() { + String name = resource.getString("Label"); + if (resource.has("Title")) { + origTitle = resource.getString("Title"); + } + if (isDirectory()) { + /* + * Since Clowder doesn't keep the original name of a folder if it is changed, + * and we use the name in paths, we can't write the name and change the visible + * label like we do with Files (done because labels may not be valid filenames + * if they have unicode chars, etc.). So - a work-around is to always use the + * title for collections and ignore the Label This means the Label is lost for + * 1.5 collections - acceptable since, while it is captured in a publication, it + * is not visible to the user via the GUI. + */ + if (origTitle == null || (origTitle.isEmpty())) { + name = ""; + } else { + name = origTitle; + } + } + // Label should always exist and be valid.... + if (name == null || name.isEmpty()) { + System.err.println("Warning: Bad Label found for resource with title: " + origTitle); + // Handle rare/test cases where all are empty strings + if (origTitle == null || (origTitle.isEmpty())) { + name = ""; + } else { + name = origTitle; + } + } + return name; + } + + @Override + public boolean isDirectory() { + if (getChildren().length() != 0) { + return true; + } + Object o = resource.get("@type"); + if (o != null) { + if (o instanceof JSONArray) { + for (int i = 0; i < ((JSONArray) o).length(); i++) { + String type = ((JSONArray) o).getString(i).trim(); + // 1.5 and 2.0 types + if ("http://cet.ncsa.uiuc.edu/2007/Collection".equals(type) + || "http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) { + return true; + } + + } + } else if (o instanceof String) { + String type = ((String) o).trim(); + // 1.5 and 2.0 types + if ("http://cet.ncsa.uiuc.edu/2007/Collection".equals(type) + || "http://cet.ncsa.uiuc.edu/2016/Folder".equals(type)) { + return true; + } + } + } + return false; + } + + @Override + public String getPath() { + return path; + } + + @Override + public Iterator iterator() { + return listResources().iterator(); + } + + @Override + public Iterable listResources() { + ArrayList resources = new ArrayList(); + JSONArray children = getChildren(); + for (int i = 0; i < children.length(); i++) { + resources.add(myFactory.getPublishedResource(children.getString(i), getPath())); + } + return resources; + } + + // Get's all "Has Part" children, standardized to send an array with 0,1, or + // more elements + JSONArray getChildren() { + Object o = null; + try { + o = resource.get("Has Part"); + } catch (JSONException e) { + // Doesn't exist - that's OK + } + if (o == null) { + return new JSONArray(); + } else { + if (o instanceof JSONArray) { + return (JSONArray) o; + } else if (o instanceof String) { + return new JSONArray("[ " + (String) o + " ]"); + } + return new JSONArray(); + } + } + + @Override + public long length() { + long size = 0; + if (!isDirectory()) { + size = Long.parseLong(resource.optString("Size")); // sead2 sends a + // number, 1.5 a + // string + } + return size; + } + + @Override + public String getAbsolutePath() { + return absPath; + } + + @Override + public ContentBody getContentBody() { + // While space and / etc. are already encoded, the quote char is not and + // it is not a valid char + // Fix Me - identify additional chars that need to be encoded... + String uri = resource.getString("similarTo").replace("\"", "%22").replace(";", "%3b"); + + try { + HttpEntity entity = myFactory.getURI(new URI(uri)); + return new InputStreamBody(entity.getContent(), ContentType.create(resource.getString("Mimetype")), + getName()); + } catch (IllegalStateException e) { + e.printStackTrace(); + } catch (JSONException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + @Override + public InputStream getInputStream() { + // While space and / etc. are already encoded, the quote char is not and + // it is not a valid char + // Fix Me - identify additional chars that need to be encoded... + String uri = resource.getString("similarTo").replace("\"", "%22").replace(";", "%3b"); + + try { + HttpEntity entity = myFactory.getURI(new URI(uri)); + return entity.getContent(); + + } catch (IOException e) { + e.printStackTrace(); + } catch (URISyntaxException e) { + e.printStackTrace(); + } + return null; + } + + public void setPath(String path) { + this.path = path; + } + + public void setAbsolutePath(String abspath) { + absPath = abspath; + } + + @Override + public String getHash(String algorithm) { + String hash = null; + if (algorithm.equals("SHA-1")) { + hash = resource.getString("SHA1 Hash"); + } + return hash; + } + + private static HashMap allRelationships = new HashMap(); + + @SuppressWarnings("unchecked") + @Override + public JSONObject getMetadata() { + ArrayList keysToKeep = new ArrayList(); + keysToKeep.addAll(resource.keySet()); + HashMap relationships = new HashMap(); + + HashMap changed = new HashMap(); + for (String key : (Set) resource.keySet()) { + if (blackList.containsKey(key)) { + // SEADUploader.println("Found: " + key + " : dropped"); + keysToKeep.remove(key); + } else if (grayList.containsKey(key)) { + // SEADUploader.println("Found: " + key + " : converted"); + if (!willConvert(key, resource.get(key))) { + keysToKeep.remove(key); + } + } else { + // else keep it, including the @context + // SEADUploader.println("Found: " + key + " : keeping"); + } + + } + // SEADUploader.println(resource.toString()); + JSONObject md = new JSONObject(resource, keysToKeep.toArray(new String[keysToKeep.size()])); + // Note @context may have unnecessary elements now - should not be + // harmful but could perhaps be removed + // SEADUploader.println(md.toString()); + + for (String key : (Set) md.keySet()) { + // SEADUploader.println("Checking: " + key + " : " + // + md.get(key).toString()); + if (md.get(key) instanceof String) { + String val = resource.getString(key); + // SEADUploader.println(key + " : " + val); + if (val.startsWith("tag:") || val.startsWith("urn:")) { + + relationships.put(key, val); + } else { + Object updated = convert(key, val); + if (updated != null) { + changed.put(key, updated); + } + } + } else if (md.get(key) instanceof JSONArray) { + JSONArray vals = md.getJSONArray(key); + JSONArray newvals = new JSONArray(); + JSONArray newrels = new JSONArray(); + + for (int i = 0; i < vals.length(); i++) { + // SEADUploader.println("Checking: " + i + " : " + // + vals.get(i).toString()); + if (vals.get(i) instanceof String) { + // relationships always have a string value by definition + if (vals.getString(i).startsWith("tag:") || vals.getString(i).startsWith("urn:")) { + newrels.put(vals.getString(i)); + } else { + Object updated = convert(key, vals.getString(i)); + if (updated != null) { + newvals.put(updated); + } + } + } else { + Object updated = convert(key, vals.get(i)); + if (updated != null) { + newvals.put(updated); + } + } + } + if (newvals.length() != 0) { + changed.put(key, newvals); + } + if (newrels.length() != 0) { + relationships.put(key, newrels); + } + } else { + changed.put(key, md.get(key)); + } + } + md = new JSONObject(changed); + md.put("Metadata on Original", origMD); + + allRelationships.put(resource.getString("Identifier"), relationships); + return md; + + } + + HashMap origMD = new HashMap(); + + private boolean willConvert(String key, Object object) { + + if (!ResourceFactory.grayConversions.containsKey(key)) { + if (key.equals("Label")) { + if ((origTitle != null) && (!((String) object).equals(origTitle))) { + // It's unique - move it to orig metadata + origMD.put(key, object); + } else { + // It's the same as the name/title - don't move it to the + // original metadata + } + } else { + origMD.put(key, object); + } + // Regardless, don't keep the original item + return false; + } + return true; + } + + private Object convert(String key, Object object) { + switch (key) { + case "Creator": + if (object instanceof JSONObject) { + object = ((JSONObject) object).getString("@id"); + } + break; + case "Contact": + if (object instanceof JSONObject) { + object = ((JSONObject) object).getString("@id"); + } + break; + case "External Identifier": + if (object instanceof String) { + if (((String) object).startsWith("http://doi.org/10.5072/")) { + object = null; + } + } + } + return object; + } + + public static HashMap getAllRelationships() { + return allRelationships; + } + + public String getIdentifier() { + return resource.getString("Identifier"); + + } + + /* + * For datasets, we send the abstract as a description - before processing + * metadata in general, so retrieve the value and then remove it so that + * duplicate metadata is not sent. + */ + public String getAndRemoveAbstract(boolean d2a) { + String theAbstract = null; + if (d2a) { + if (resource.has("Has Description")) { + Object descObject = resource.get("Has Description"); + if (descObject instanceof String) { + theAbstract = descObject.toString(); + } else if (descObject instanceof JSONArray) { + theAbstract = ((JSONArray) descObject).toString(); + } + resource.remove("Has Description"); + } + } + if (resource.has("Abstract")) { + if (theAbstract == null) { + theAbstract = ""; + } else { // Combining with a description - add a space between + theAbstract = theAbstract + " "; + } + if (resource.get("Abstract") instanceof JSONArray) { + // Convert multiple abstracts into 1 so it fits + // Clowder's single description field + // Could concatenate, but JSON will help if anyone wants + // to separate abstracts after migration + theAbstract = theAbstract + ((JSONArray) resource.getJSONArray("Abstract")).toString(2); + } else { + theAbstract = theAbstract + resource.getString("Abstract").toString(); + } + resource.remove("Abstract"); + } + return theAbstract; + } + + /* + * return the "Title" (which may be different than getName which comes from the + * "Label" + */ + public String getAndRemoveTitle() { + + if (resource.has("Title")) { + origTitle = resource.getString("Title"); + resource.remove("Title"); + } + return origTitle; + } + + /* + * For datasets, we send the creators via the dataset api - before processing + * metadata, so retrieve the value and then remove it so that duplicate metadata + * is not sent. + */ + public void getAndRemoveCreator(List creators) { + + if (resource.has("Creator")) { + Object creatorField = resource.get("Creator"); + if (creatorField instanceof JSONArray) { + for (int i = 0; i < ((JSONArray) creatorField).length(); i++) { + Object creator = ((JSONArray) creatorField).get(i); + if (creator instanceof JSONObject) { + creators.add(((String) convert("Creator", creator))); + } else { + creators.add((String) creator); + } + } + } else if (creatorField instanceof JSONObject) { + creators.add(((String) convert("Creator", creatorField))); + } else { + creators.add(((String) creatorField)); + } + resource.remove("Creator"); + } + } + + @Override + public String getMimeType() { + if (resource.has("MimeType")) { + return resource.getString("MimeType"); + } + return null; + } +} diff --git a/src/main/java/org/sead/uploader/util/Resource.java b/src/main/java/org/sead/uploader/util/Resource.java index fe14e40..ce75f93 100644 --- a/src/main/java/org/sead/uploader/util/Resource.java +++ b/src/main/java/org/sead/uploader/util/Resource.java @@ -1,48 +1,47 @@ -/******************************************************************************* - * Copyright 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ - -package org.sead.uploader.util; - -import java.io.InputStream; - -import org.apache.http.entity.mime.content.ContentBody; -import org.json.JSONObject; - -public interface Resource extends Iterable { - - String getName(); - - boolean isDirectory(); - - String getPath(); - - long length(); - - String getAbsolutePath(); - - ContentBody getContentBody(); - - InputStream getInputStream(); - - Iterable listResources(); - - String getHash(String algorithm); - - JSONObject getMetadata(); - - String getMimeType(); - -} +/** ***************************************************************************** + * Copyright 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.util; + +import java.io.InputStream; + +import org.apache.http.entity.mime.content.ContentBody; +import org.json.JSONObject; + +public interface Resource extends Iterable { + + String getName(); + + boolean isDirectory(); + + String getPath(); + + long length(); + + String getAbsolutePath(); + + ContentBody getContentBody(); + + InputStream getInputStream(); + + Iterable listResources(); + + String getHash(String algorithm); + + JSONObject getMetadata(); + + String getMimeType(); + +} diff --git a/src/main/java/org/sead/uploader/util/ResourceFactory.java b/src/main/java/org/sead/uploader/util/ResourceFactory.java index 2af8ee2..bc60a31 100644 --- a/src/main/java/org/sead/uploader/util/ResourceFactory.java +++ b/src/main/java/org/sead/uploader/util/ResourceFactory.java @@ -1,218 +1,218 @@ -/******************************************************************************* - * Copyright 2016 University of Michigan - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - ******************************************************************************/ -package org.sead.uploader.util; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; - -import org.apache.http.HttpEntity; -import org.apache.http.ParseException; -import org.apache.http.client.ClientProtocolException; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.CloseableHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.util.EntityUtils; -import org.json.JSONArray; -import org.json.JSONObject; -import org.sead.uploader.clowder.SEADUploader; - -public class ResourceFactory { - - public static final HashMap grayConversions = new HashMap() { - { - - put("Creation Date", - "http://sead-data.net/terms/originalcreationdate"); - put("Uploaded By", "http://purl-data.net/terms/originalsubmitter"); - put("Date", "http://sead-data.net/terms/originalcreationdate"); - put("Instance Of", "http://sead-data.net/terms/originaldiskpath"); - put("External Identifier", - "http://sead-data.net/terms/publishedidoforiginal"); - } - }; - - public static final HashMap graySwaps = new HashMap() { - { - - put("Creation Date", "Original Creation Date"); - put("Uploaded By", "Originally Uploaded By"); - put("Date", "Original Creation Date"); - put("Instance Of", "Original Disk Path"); - put("External Identifier", "Persistent Identifier of Original"); - } - }; - - JSONObject oremap; - ArrayList index; - JSONArray aggregates; - String rootPath; - - private CloseableHttpClient client; - - public ResourceFactory(URL oremapURL) { - client = HttpClients.custom().build(); - try { - HttpEntity he = getURI(oremapURL.toURI()); - String mapString; - - mapString = EntityUtils.toString(he, "UTF-8"); - - oremap = new JSONObject(mapString); - // private ArrayList indexResources(String aggId, JSONArray - // aggregates) { - JSONObject aggregation = oremap.getJSONObject("describes"); - String aggId = aggregation.getString("Identifier"); - rootPath = "/" + aggId + "/data/" + aggregation.getString("Title"); - aggregates = aggregation.getJSONArray("aggregates"); - ArrayList l = new ArrayList(aggregates.length() + 1); - l.add(aggId); - for (int i = 0; i < aggregates.length(); i++) { - l.add(aggregates.getJSONObject(i).getString("Identifier")); - } - index = l; - } catch (ParseException | IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (URISyntaxException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - PublishedResource.setResourceFactory(this); - - } - - HttpEntity getURI(URI uri) { - int tries = 0; - while (tries < 5) { - try { - HttpGet getResource = new HttpGet(uri); - getResource.setHeader("Content-type", "application/json;charset=utf-8"); - - CloseableHttpResponse response; - response = client.execute(getResource); - if (response.getStatusLine().getStatusCode() == 200) { - return response.getEntity(); - } - tries++; - - } catch (ClientProtocolException e) { - tries += 5; - // TODO Auto-generated catch block - e.printStackTrace(); - } catch (IOException e) { - // Retry if this is a potentially temporary error such - // as a timeout - tries++; -/* log.warn("Attempt# " + tries + " : Unable to retrieve file: " - + uri, e); - if (tries == 5) { - log.error("Final attempt failed for " + uri); - } -*/ - e.printStackTrace(); - } - } - return null; - - } - - public Resource getPublishedResource(String id, String parentPath) { - int i = index.indexOf(id); - PublishedResource p = new PublishedResource( - aggregates.getJSONObject(i - 1)); - String path = parentPath + "/" + p.getName(); - p.setAbsolutePath(rootPath + "/" + path); - p.setPath(path); - return p; - } - - public PublishedResource getParentResource() { - JSONObject agg = oremap.getJSONObject("describes"); - agg.remove("aggregates"); - PublishedResource p = new PublishedResource(agg); - p.setAbsolutePath(rootPath); - p.setPath(p.getName()); - return p; - } - - static HashMap grayContext = null; - - public String getURIForContextEntry(String key) { - if (grayContext == null) { - grayContext = new HashMap(); - for (String oldKey : graySwaps.keySet()) { - grayContext.put(graySwaps.get(oldKey), - grayConversions.get(oldKey)); - } - } - String uri = null; - if (oreTerms.contains(key)) { - uri = orePredBaseString + key; - } else if ("Metadata on Original".equals(key)) { - uri = "http://sead-data.net/terms/sourceMetadata"; - } else if ("Geolocation".equals(key)) { - uri = "http://sead-data.net/terms/hasGeolocation"; - }else if ("Latitude".equals(key)) { - uri = "http://www.w3.org/2003/01/geo/wgs84_pos#lat"; - }else if ("Longitude".equals(key)) { - uri = "http://www.w3.org/2003/01/geo/wgs84_pos#long"; - } else if ("Upload Path".equals(key)) { - uri = SEADUploader.FRBR_EO; - } else if (grayContext.containsKey(key)) { - uri = grayContext.get(key); - } else { - Object context = oremap.get("@context"); - uri = getURIForContextEntry(context, key); - } - return uri; - } - - private String[] words = { "describes", "AggregatedResource", - "Aggregation", "ResourceMap", "similarTo", "aggregates" }; - private HashSet oreTerms = new HashSet(Arrays.asList(words)); - - private String orePredBaseString = "http://www.openarchives.org/ore/terms/"; - - private String getURIForContextEntry(Object context, String key) { - String uri = null; - if (context instanceof JSONArray) { - for (int i = 0; i < ((JSONArray) context).length(); i++) { - uri = getURIForContextEntry(((JSONArray) context).get(i), key); - if (uri != null) { - return uri; - } - } - } else if (context instanceof JSONObject) { - if (((JSONObject) context).has(key)) { - - // FixMe - support values that are objects with an @id entry... - uri = ((JSONObject) context).getString(key); - } - } - if (grayConversions.containsKey(key)) { - uri = grayConversions.get(key); - } - return uri; - } - -} +/** ***************************************************************************** + * Copyright 2016 University of Michigan + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + ***************************************************************************** */ +package org.sead.uploader.util; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; + +import org.apache.http.HttpEntity; +import org.apache.http.ParseException; +import org.apache.http.client.ClientProtocolException; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.util.EntityUtils; +import org.json.JSONArray; +import org.json.JSONObject; +import org.sead.uploader.clowder.SEADUploader; + +public class ResourceFactory { + + public static final HashMap grayConversions = new HashMap() { + { + + put("Creation Date", + "http://sead-data.net/terms/originalcreationdate"); + put("Uploaded By", "http://purl-data.net/terms/originalsubmitter"); + put("Date", "http://sead-data.net/terms/originalcreationdate"); + put("Instance Of", "http://sead-data.net/terms/originaldiskpath"); + put("External Identifier", + "http://sead-data.net/terms/publishedidoforiginal"); + } + }; + + public static final HashMap graySwaps = new HashMap() { + { + + put("Creation Date", "Original Creation Date"); + put("Uploaded By", "Originally Uploaded By"); + put("Date", "Original Creation Date"); + put("Instance Of", "Original Disk Path"); + put("External Identifier", "Persistent Identifier of Original"); + } + }; + + JSONObject oremap; + ArrayList index; + JSONArray aggregates; + String rootPath; + + private CloseableHttpClient client; + + public ResourceFactory(URL oremapURL) { + client = HttpClients.custom().build(); + try { + HttpEntity he = getURI(oremapURL.toURI()); + String mapString; + + mapString = EntityUtils.toString(he, "UTF-8"); + + oremap = new JSONObject(mapString); + // private ArrayList indexResources(String aggId, JSONArray + // aggregates) { + JSONObject aggregation = oremap.getJSONObject("describes"); + String aggId = aggregation.getString("Identifier"); + rootPath = "/" + aggId + "/data/" + aggregation.getString("Title"); + aggregates = aggregation.getJSONArray("aggregates"); + ArrayList l = new ArrayList(aggregates.length() + 1); + l.add(aggId); + for (int i = 0; i < aggregates.length(); i++) { + l.add(aggregates.getJSONObject(i).getString("Identifier")); + } + index = l; + } catch (ParseException | IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (URISyntaxException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + PublishedResource.setResourceFactory(this); + + } + + HttpEntity getURI(URI uri) { + int tries = 0; + while (tries < 5) { + try { + HttpGet getResource = new HttpGet(uri); + getResource.setHeader("Content-type", "application/json;charset=utf-8"); + + CloseableHttpResponse response; + response = client.execute(getResource); + if (response.getStatusLine().getStatusCode() == 200) { + return response.getEntity(); + } + tries++; + + } catch (ClientProtocolException e) { + tries += 5; + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IOException e) { + // Retry if this is a potentially temporary error such + // as a timeout + tries++; + /* log.warn("Attempt# " + tries + " : Unable to retrieve file: " + + uri, e); + if (tries == 5) { + log.error("Final attempt failed for " + uri); + } + */ + e.printStackTrace(); + } + } + return null; + + } + + public Resource getPublishedResource(String id, String parentPath) { + int i = index.indexOf(id); + PublishedResource p = new PublishedResource( + aggregates.getJSONObject(i - 1)); + String path = parentPath + "/" + p.getName(); + p.setAbsolutePath(rootPath + "/" + path); + p.setPath(path); + return p; + } + + public PublishedResource getParentResource() { + JSONObject agg = oremap.getJSONObject("describes"); + agg.remove("aggregates"); + PublishedResource p = new PublishedResource(agg); + p.setAbsolutePath(rootPath); + p.setPath(p.getName()); + return p; + } + + static HashMap grayContext = null; + + public String getURIForContextEntry(String key) { + if (grayContext == null) { + grayContext = new HashMap(); + for (String oldKey : graySwaps.keySet()) { + grayContext.put(graySwaps.get(oldKey), + grayConversions.get(oldKey)); + } + } + String uri = null; + if (oreTerms.contains(key)) { + uri = orePredBaseString + key; + } else if ("Metadata on Original".equals(key)) { + uri = "http://sead-data.net/terms/sourceMetadata"; + } else if ("Geolocation".equals(key)) { + uri = "http://sead-data.net/terms/hasGeolocation"; + } else if ("Latitude".equals(key)) { + uri = "http://www.w3.org/2003/01/geo/wgs84_pos#lat"; + } else if ("Longitude".equals(key)) { + uri = "http://www.w3.org/2003/01/geo/wgs84_pos#long"; + } else if ("Upload Path".equals(key)) { + uri = SEADUploader.FRBR_EO; + } else if (grayContext.containsKey(key)) { + uri = grayContext.get(key); + } else { + Object context = oremap.get("@context"); + uri = getURIForContextEntry(context, key); + } + return uri; + } + + private String[] words = {"describes", "AggregatedResource", + "Aggregation", "ResourceMap", "similarTo", "aggregates"}; + private HashSet oreTerms = new HashSet(Arrays.asList(words)); + + private String orePredBaseString = "http://www.openarchives.org/ore/terms/"; + + private String getURIForContextEntry(Object context, String key) { + String uri = null; + if (context instanceof JSONArray) { + for (int i = 0; i < ((JSONArray) context).length(); i++) { + uri = getURIForContextEntry(((JSONArray) context).get(i), key); + if (uri != null) { + return uri; + } + } + } else if (context instanceof JSONObject) { + if (((JSONObject) context).has(key)) { + + // FixMe - support values that are objects with an @id entry... + uri = ((JSONObject) context).getString(key); + } + } + if (grayConversions.containsKey(key)) { + uri = grayConversions.get(key); + } + return uri; + } + +}