Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use loading cache in google cloud executor #1377

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,29 +1,33 @@
package org.datatransferproject.cloud.google;

import static java.lang.String.format;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import com.google.cloud.datastore.*;
import com.google.cloud.datastore.StructuredQuery.CompositeFilter;
import com.google.cloud.datastore.StructuredQuery.PropertyFilter;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.datatransferproject.api.launcher.Monitor;
import org.datatransferproject.spi.transfer.idempotentexecutor.IdempotentImportExecutor;
import org.datatransferproject.types.transfer.errors.ErrorDetail;

import java.io.IOException;
import java.io.Serializable;
import java.time.Duration;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.UUID;
import java.util.concurrent.Callable;

import static java.lang.String.format;
import java.util.concurrent.ExecutionException;
import org.datatransferproject.api.launcher.Monitor;
import org.datatransferproject.spi.transfer.idempotentexecutor.IdempotentImportExecutor;
import org.datatransferproject.types.transfer.errors.ErrorDetail;

public class GoogleCloudIdempotentImportExecutor implements IdempotentImportExecutor {

Expand All @@ -39,9 +43,10 @@ public class GoogleCloudIdempotentImportExecutor implements IdempotentImportExec
private final Monitor monitor;
private final ObjectMapper objectMapper;

// These are all variables corresponding to the job state. Only initialized when setJobId() is called
private Map<String, Serializable> knownValues;
private Map<String, ErrorDetail> errors;
// These are all variables corresponding to the job state. Only initialized when setJobId() is
// called
private LoadingCache<String, Optional<Serializable>> knownValuesCache = null;
private LoadingCache<String, Optional<ErrorDetail>> errorsCache = null;
private UUID jobId;
private String jobIdPrefix;

Expand Down Expand Up @@ -69,12 +74,12 @@ public <T extends Serializable> T executeOrThrowException(
String idempotentId, String itemName, Callable<T> callable) throws Exception {
Preconditions.checkNotNull(jobId, "executing a callable before initialization of a job");

if (knownValues.containsKey(idempotentId)) {
if (knownValuesCache.get(idempotentId).isPresent()) {
monitor.debug(
() ->
jobIdPrefix
+ format("Using cached key %s from cache for %s", idempotentId, itemName));
return (T) knownValues.get(idempotentId);
return (T) knownValuesCache.get(idempotentId).get();
}

try {
Expand All @@ -98,54 +103,60 @@ public <T extends Serializable> T executeOrThrowException(

private <T extends Serializable> void addResult(String idempotentId, T result)
throws IOException {
knownValues.put(idempotentId, result);

try {
Transaction transaction = datastore.newTransaction();

transaction.put(createResultEntity(idempotentId, result));
if (errors.containsKey(idempotentId)) {
// if the errors contain this key, that means the ID
if (errorsCache.get(idempotentId).isPresent()) {
// if the errors contain this key, that means the ID failed perviously,
// since it is succeeding now, remove it from the errors datastore
transaction.delete(getErrorKey(idempotentId, jobId));
errors.remove(idempotentId);
}
transaction.commit();
} catch (DatastoreException e) {
} catch (DatastoreException | ExecutionException e) {
monitor.severe(() -> jobIdPrefix + "Error writing result to datastore: " + e);
} finally {
errorsCache.invalidate(idempotentId);
knownValuesCache.put(idempotentId, Optional.of(result));
}
}

private void addError(String idempotentId, ErrorDetail errorDetail) throws IOException {
errors.put(idempotentId, errorDetail);
try {
Transaction transaction = datastore.newTransaction();
transaction.put(createErrorEntity(idempotentId, errorDetail));
transaction.commit();
} catch (DatastoreException e) {
monitor.severe(() -> jobIdPrefix + "Error writing ErrorDetails to datastore: " + e);
} finally {
errorsCache.put(idempotentId, Optional.of(errorDetail));
}
}

@Override
public <T extends Serializable> T getCachedValue(String idempotentId)
throws IllegalArgumentException {
if (!knownValues.containsKey(idempotentId)) {
throw new IllegalArgumentException(
idempotentId
+ " is not a known key, known keys: "
+ Joiner.on(", ").join(knownValues.keySet()));
try {
if (knownValuesCache.get(idempotentId).isEmpty()) {
throw new IllegalArgumentException(idempotentId + " is not a known key");
}
return (T) knownValuesCache.get(idempotentId).get();
} catch (ExecutionException e) {
throw new IllegalStateException(e);
}
return (T) knownValues.get(idempotentId);
}

@Override
public boolean isKeyCached(String idempotentId) {
return knownValues.containsKey(idempotentId);
try {
return knownValuesCache.get(idempotentId).isPresent();
} catch (ExecutionException e) {
throw new IllegalStateException(e);
}
}

@Override
public Collection<ErrorDetail> getErrors() {
return ImmutableList.copyOf(errors.values());
return ImmutableList.copyOf(getErrorDetailsForJob(jobId).values());
}

// In non-tests setJobId is only ever called once per executor, so the initialization of
Expand All @@ -154,9 +165,53 @@ public Collection<ErrorDetail> getErrors() {
public void setJobId(UUID jobId) {
Preconditions.checkNotNull(jobId);
this.jobId = jobId;
this.knownValues = getKnownValuesForJob(jobId);
this.errors = getErrorDetailsForJob(jobId);
jobIdPrefix = "Job " + jobId + ": ";
this.knownValuesCache =
CacheBuilder.newBuilder()
.maximumSize(1000)
.expireAfterAccess(Duration.ofMinutes(30))
.build(
new CacheLoader<String, Optional<Serializable>>() {
@Override
public Optional<Serializable> load(String idempotentId) {
return getKnownValue(idempotentId);
}
});

this.errorsCache =
CacheBuilder.newBuilder()
.maximumSize(1000)
.expireAfterAccess(Duration.ofMinutes(30))
.build(
new CacheLoader<String, Optional<ErrorDetail>>() {
@Override
public Optional<ErrorDetail> load(String idempotentId) {
return getError(idempotentId);
}
});
}

private Optional<Serializable> getKnownValue(String idempotentId) {
Preconditions.checkNotNull(jobId);
try {
return Optional.of(
datastore.get(getResultsKey(idempotentId, jobId)).getString(RESULTS_FIELD));
} catch (Exception e) {
return Optional.empty();
}
}

private Optional<ErrorDetail> getError(String idempotentId) {
Preconditions.checkNotNull(jobId);
try {
Entity error = datastore.get(getErrorKey(idempotentId, jobId));
return error == null
? Optional.empty()
: Optional.of(
objectMapper.readerFor(ErrorDetail.class).readValue(error.getString(ERROR_FIELD)));
} catch (IOException e) {
return Optional.empty();
}
}

private Map<String, Serializable> getKnownValuesForJob(UUID jobId) {
Expand Down Expand Up @@ -201,7 +256,6 @@ private Map<String, ErrorDetail> getErrorDetailsForJob(UUID jobId) {
return datastoreKnownErrors;
}


private <T extends Serializable> Entity createResultEntity(String idempotentId, T result)
throws IOException {
return createResultEntity(idempotentId, this.jobId, result);
Expand All @@ -211,10 +265,14 @@ private <T extends Serializable> Entity createResultEntity(String idempotentId,
<T extends Serializable> Entity createResultEntity(String idempotentId, UUID jobId, T result)
throws IOException {
return GoogleCloudUtils.createEntityBuilder(
getResultsKey(idempotentId, jobId),
ImmutableMap.of(
RESULTS_FIELD, result, JOB_ID_FIELD, jobId.toString(), IDEMPOTENT_ID_FIELD,
idempotentId))
getResultsKey(idempotentId, jobId),
ImmutableMap.of(
RESULTS_FIELD,
result,
JOB_ID_FIELD,
jobId.toString(),
IDEMPOTENT_ID_FIELD,
idempotentId))
.build();
}

Expand All @@ -230,17 +288,16 @@ private Entity createErrorEntity(String idempotentId, ErrorDetail error) throws
}

@VisibleForTesting
Entity createErrorEntity(String idempotentId, UUID jobId, ErrorDetail error)
throws IOException {
Entity createErrorEntity(String idempotentId, UUID jobId, ErrorDetail error) throws IOException {
return GoogleCloudUtils.createEntityBuilder(
getErrorKey(idempotentId, jobId),
ImmutableMap.of(
ERROR_FIELD,
objectMapper.writeValueAsString(error),
JOB_ID_FIELD,
jobId.toString(),
IDEMPOTENT_ID_FIELD,
idempotentId))
getErrorKey(idempotentId, jobId),
ImmutableMap.of(
ERROR_FIELD,
objectMapper.writeValueAsString(error),
JOB_ID_FIELD,
jobId.toString(),
IDEMPOTENT_ID_FIELD,
idempotentId))
.build();
}

Expand Down
Loading