Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GOBBLIN-2162] Only load added jars in the cache #4062

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,13 @@ private void addContainerLocalResources(Path destDir, Map<String, LocalResource>

FileStatus[] statuses = this.fs.listStatus(destDir);
if (statuses != null) {
Set<String> appLibJars = YarnHelixUtils.getAppLibJarList(this.config);
for (FileStatus status : statuses) {
String fileName = status.getPath().getName();
// Ensure that we are only adding jars that were uploaded by the YarnAppLauncher for this application
if (fileName.contains(".jar") && !appLibJars.contains(fileName)) {
continue;
}
YarnHelixUtils.addFileAsLocalResource(this.fs, status.getPath(), LocalResourceType.FILE, resourceMap);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ public class GobblinYarnAppLauncher {

private final boolean jarCacheEnabled;

private final Set<String> libJarNames = new HashSet<>(); // List of jars that are shared between appMaster and containers

public GobblinYarnAppLauncher(Config config, YarnConfiguration yarnConfiguration) throws IOException {
this.config = config.withValue(GobblinYarnConfigurationKeys.YARN_APPLICATION_LAUNCHER_START_TIME_KEY,
ConfigValueFactory.fromAnyRef(System.currentTimeMillis()));
Expand Down Expand Up @@ -668,6 +670,7 @@ private Map<String, LocalResource> addAppMasterLocalResources(ApplicationId appl
Path unsharedJarsDestDir = new Path(appWorkDir, GobblinYarnConfigurationKeys.LIB_JARS_DIR_NAME);
addLibJars(new Path(this.config.getString(GobblinYarnConfigurationKeys.LIB_JARS_DIR_KEY)),
Optional.of(appMasterResources), libJarsDestDir, unsharedJarsDestDir, localFs);
this.libJarNames.addAll(appMasterResources.keySet());
LOGGER.info("Added lib jars to directory: {} and execution-private directory: {}", libJarsDestDir, unsharedJarsDestDir);
}
if (this.config.hasPath(GobblinYarnConfigurationKeys.APP_MASTER_JARS_KEY)) {
Expand Down Expand Up @@ -814,6 +817,7 @@ protected String buildApplicationMasterCommand(String applicationId, int memoryM
.append(" -D").append(GobblinYarnConfigurationKeys.GOBBLIN_YARN_CONTAINER_LOG_DIR_NAME).append("=").append(ApplicationConstants.LOG_DIR_EXPANSION_VAR)
.append(" -D").append(GobblinYarnConfigurationKeys.GOBBLIN_YARN_CONTAINER_LOG_FILE_NAME).append("=").append(logFileName).append(".").append(ApplicationConstants.STDOUT)
.append(" -D").append(GobblinYarnConfigurationKeys.YARN_APPLICATION_LAUNCHER_START_TIME_KEY).append("=").append(config.getString(GobblinYarnConfigurationKeys.YARN_APPLICATION_LAUNCHER_START_TIME_KEY))
.append(" -D").append(GobblinYarnConfigurationKeys.YARN_APPLICATION_LIB_JAR_LIST).append("=").append(String.join(",", this.libJarNames))
.append(" ").append(JvmUtils.formatJvmArguments(this.appMasterJvmArgs))
.append(" ").append(appMasterClass.getName())
.append(" --").append(GobblinClusterConfigurationKeys.APPLICATION_NAME_OPTION_NAME)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ public class GobblinYarnConfigurationKeys {

public static final String JAR_CACHE_DIR = GOBBLIN_YARN_PREFIX + "jar.cache.dir";

public static final String YARN_APPLICATION_LIB_JAR_LIST = GOBBLIN_YARN_PREFIX + "lib.jar.list";

// Used to store the start time of the app launcher to propagate to workers and appmaster
public static final String YARN_APPLICATION_LAUNCHER_START_TIME_KEY = GOBBLIN_YARN_PREFIX + "application.start.time";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;

Expand All @@ -52,6 +54,7 @@

import com.google.common.base.Splitter;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import com.typesafe.config.Config;

import org.apache.gobblin.util.ConfigUtils;
Expand Down Expand Up @@ -237,6 +240,16 @@ public static boolean retainKLatestJarCachePaths(Path parentCachePath, int k, Fi
return deletesSuccessful;
}


public static Set<String> getAppLibJarList(Config config) {
Set<String> libAppJars = new HashSet<>(Arrays.asList(
ConfigUtils.getString(config, GobblinYarnConfigurationKeys.YARN_APPLICATION_LIB_JAR_LIST, "").split(",")));
Set<String> containerJars = new HashSet<>(Arrays.asList(
ConfigUtils.getString(config, GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY, "").split(",")));
libAppJars.addAll(containerJars);
return Sets.filter(libAppJars, s -> !s.isEmpty());
}

public static void addRemoteFilesToLocalResources(String hdfsFileList, Map<String, LocalResource> resourceMap, Configuration yarnConfiguration) throws IOException {
for (String hdfsFilePath : SPLITTER.split(hdfsFileList)) {
Path srcFilePath = new Path(hdfsFilePath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.gobblin.yarn;

import java.io.IOException;
import java.util.Set;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
Expand Down Expand Up @@ -97,6 +98,39 @@ public void retainLatestKJarCachePaths() throws IOException {
// Should be cleaned up
Assert.assertFalse(fs.exists(new Path(this.tempDir, "tmp/2024-07")));
Assert.assertFalse(fs.exists(new Path(this.tempDir, "tmp/2024-06")));
}

@Test
public void testGetJarListFromConfigs() {
// Test when container jars is empty
Config emptyContainerJarsList = ConfigFactory.empty()
.withValue(GobblinYarnConfigurationKeys.YARN_APPLICATION_LIB_JAR_LIST, ConfigValueFactory.fromAnyRef("a.jar,b.jar"))
.withValue(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY, ConfigValueFactory.fromAnyRef(""));

Set<String> jars = YarnHelixUtils.getAppLibJarList(emptyContainerJarsList);
Assert.assertEquals(2, jars.size());
Assert.assertTrue(jars.contains("a.jar"));
Assert.assertTrue(jars.contains("b.jar"));

// Test when yarn application lib jars is empty
Config emptyYarnAppLibJarsConfig = ConfigFactory.empty()
.withValue(GobblinYarnConfigurationKeys.YARN_APPLICATION_LIB_JAR_LIST, ConfigValueFactory.fromAnyRef(""))
.withValue(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY, ConfigValueFactory.fromAnyRef("c.jar,d.jar"));

jars = YarnHelixUtils.getAppLibJarList(emptyYarnAppLibJarsConfig);
Assert.assertEquals(2, jars.size());
Assert.assertTrue(jars.contains("c.jar"));
Assert.assertTrue(jars.contains("d.jar"));

// Test when both yarn application lib jars and container jars are not empty
Config config = ConfigFactory.empty()
.withValue(GobblinYarnConfigurationKeys.YARN_APPLICATION_LIB_JAR_LIST, ConfigValueFactory.fromAnyRef("a.jar,b.jar"))
.withValue(GobblinYarnConfigurationKeys.CONTAINER_JARS_KEY, ConfigValueFactory.fromAnyRef("c.jar,d.jar"));
jars = YarnHelixUtils.getAppLibJarList(config);
Assert.assertEquals(4, jars.size());
Assert.assertTrue(jars.contains("a.jar"));
Assert.assertTrue(jars.contains("b.jar"));
Assert.assertTrue(jars.contains("c.jar"));
Assert.assertTrue(jars.contains("d.jar"));
}
}
Loading