Skip to content

Commit

Permalink
SOLR-16844: Optionally skip backup of configset files (apache#1733)
Browse files Browse the repository at this point in the history
  • Loading branch information
tflobbe authored Jun 28, 2023
1 parent b191650 commit e96309a
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 6 deletions.
2 changes: 2 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ New Features

* SOLR-16827: Add min/max scaling to the reranker (Joel Bernstein)

* SOLR-16844: Added new parameter `backupConfigset` to collection Backup to optionally skip backing up configsets. (Tomás Fernández Löbbe)

Improvements
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ public void call(ClusterState state, ZkNodeProps message, NamedList<Object> resu
String backupName = message.getStr(NAME);
String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY);
boolean incremental = message.getBool(CoreAdminParams.BACKUP_INCREMENTAL, true);
boolean backupConfigset = message.getBool(CoreAdminParams.BACKUP_CONFIGSET, true);
String configName =
ccc.getSolrCloudManager()
.getClusterStateProvider()
Expand Down Expand Up @@ -145,8 +146,12 @@ public void call(ClusterState state, ZkNodeProps message, NamedList<Object> resu

log.info("Starting to backup ZK data for backupName={}", backupName);

// Download the configs
backupMgr.downloadConfigDir(configName, cc.getConfigSetService());
backupMgr.createZkStateDir();

if (backupConfigset) {
// Download the configs
backupMgr.downloadConfigDir(configName, cc.getConfigSetService());
}

// Save the collection's state. Can be part of the monolithic clusterstate.json or a
// individual state.json. Since we don't want to distinguish we extract the state and back it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,14 @@ private void uploadConfig(
ConfigSetService configSetService)
throws IOException {
if (configSetService.checkConfigExists(restoreConfigName)) {
log.warn(
log.info(
"Config with name {} already exists. Skipping upload to Zookeeper and using existing config.",
restoreConfigName);
// TODO add overwrite option?
} else {
log.info("Uploading config {}", restoreConfigName);
log.info(
"Config with name {} does not already exist in ZooKeeper. Will restore from Backup.",
restoreConfigName);

backupMgr.uploadConfigDir(configName, restoreConfigName, configSetService);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ public void uploadConfigDir(
throws IOException {
URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName);
if (!repository.exists(source)) {
throw new IllegalArgumentException("Path " + source + " does not exist");
throw new IllegalArgumentException("Configset expected at " + source + " does not exist");
}
uploadConfigToSolrCloud(configSetService, source, targetConfigName, "");
}
Expand All @@ -270,7 +270,6 @@ public void uploadConfigDir(
public void downloadConfigDir(String configName, ConfigSetService configSetService)
throws IOException {
URI dest = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, configName);
repository.createDirectory(getZkStateDir());
repository.createDirectory(repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR));
repository.createDirectory(dest);

Expand Down Expand Up @@ -404,4 +403,8 @@ private URI getZkStateDir() {
}
return zkStateDir;
}

public void createZkStateDir() throws IOException {
repository.createDirectory(getZkStateDir());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.solr.common.params.CollectionAdminParams.INDEX_BACKUP_STRATEGY;
import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
import static org.apache.solr.common.params.CommonParams.NAME;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_CONFIGSET;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_INCREMENTAL;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_LOCATION;
import static org.apache.solr.common.params.CoreAdminParams.BACKUP_REPOSITORY;
Expand Down Expand Up @@ -162,6 +163,7 @@ public static CreateCollectionBackupRequestBody createRequestBodyFromV1Params(So
requestBody.backupStrategy = params.get(INDEX_BACKUP_STRATEGY);
requestBody.snapshotName = params.get(COMMIT_NAME);
requestBody.incremental = params.getBool(BACKUP_INCREMENTAL);
requestBody.backupConfigset = params.getBool(BACKUP_CONFIGSET);
requestBody.maxNumBackupPoints = params.getInt(MAX_NUM_BACKUP_POINTS);
requestBody.async = params.get(ASYNC);

Expand All @@ -186,6 +188,7 @@ public static class CreateCollectionBackupRequestBody implements JacksonReflectM
@JsonProperty public String backupStrategy;
@JsonProperty public String snapshotName;
@JsonProperty public Boolean incremental;
@JsonProperty public Boolean backupConfigset;
@JsonProperty public Integer maxNumBackupPoints;
@JsonProperty public String async;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1664,6 +1664,15 @@ The upper-bound on how many backups should be retained at the backup location.
If the current number exceeds this bound, older backups will be deleted until only `maxNumBackupPoints` backups remain.
This parameter has no effect if `incremental=false` is specified.

`backupConfigset`::
+
[%autowidth,frame=none]
|===
|Optional |Default: true
|===
+
Indicates if configset files should be included with the index backup or not. Note that in order to restore a collection, the configset must either exist in ZooKeeper or be part of the backup. Only set this to `false` if you can restore configsets by other means external to Solr (i.e. you have it stored with your application source code, is part of your ZooKeeper backups, etc).

`incremental`::
+
[%autowidth,frame=none]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1110,6 +1110,7 @@ public static class Backup extends AsyncCollectionSpecificAdminRequest {
protected Optional<String> indexBackupStrategy = Optional.empty();
protected boolean incremental = true;
protected Optional<Integer> maxNumBackupPoints = Optional.empty();
protected boolean backupConfigset = true;

public Backup(String collection, String name) {
super(CollectionAction.BACKUP, collection);
Expand Down Expand Up @@ -1188,6 +1189,22 @@ public Backup setMaxNumberBackupPoints(int maxNumBackupPoints) {
return this;
}

/**
* Indicates weather the Backup operation should also backup the configset files (such as
* schema.xml, solrconfig.xml. etc).
*
* <p>Note that the configset is needed to restore the collection, so only set this to false if
* you know the cluster will have the configset available before restoring.
*
* @param backupConfigset {@code true} if you want to backup configsets (default). {@code false}
* to skip configset files.
* @return {@code this} builder
*/
public Backup setBackupConfigset(boolean backupConfigset) {
this.backupConfigset = backupConfigset;
return this;
}

@Override
public SolrParams getParams() {
ModifiableSolrParams params = (ModifiableSolrParams) super.getParams();
Expand All @@ -1207,6 +1224,7 @@ public SolrParams getParams() {
params.set(CoreAdminParams.MAX_NUM_BACKUP_POINTS, maxNumBackupPoints.get());
}
params.set(CoreAdminParams.BACKUP_INCREMENTAL, incremental);
params.set(CoreAdminParams.BACKUP_CONFIGSET, backupConfigset);
return params;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,9 @@ public abstract class CoreAdminParams {
/** A boolean parameter specifying if a core is being created as part of a new collection */
public static final String NEW_COLLECTION = "newCollection";

/** A parameter to specify if Configsets should be included in the backup or not */
public static final String BACKUP_CONFIGSET = "backupConfigset";

/**
* Tells the CoreAdminHandler that the new Core will be a replica of a particular {@link
* org.apache.solr.common.cloud.Replica.Type}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,114 @@ public void testBackupIncremental() throws Exception {
}
}

@Test
public void testSkipConfigset() throws Exception {
setTestSuffix("testskipconfigset");
final String backupCollectionName = getCollectionName();
final String restoreCollectionName = backupCollectionName + "-restore";

CloudSolrClient solrClient = cluster.getSolrClient();

CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1)
.process(solrClient);
int numDocs = indexDocs(backupCollectionName, true);
String backupName = BACKUPNAME_PREFIX + testSuffix;
try (BackupRepository repository =
cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) {
String backupLocation = repository.getBackupLocation(getBackupLocation());
CollectionAdminRequest.backupCollection(backupCollectionName, backupName)
.setLocation(backupLocation)
.setBackupConfigset(false)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(cluster.getSolrClient(), 100);

assertFalse(
"Configset shouldn't be part of the backup but found:\n"
+ TrackingBackupRepository.directoriesCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.directoriesCreated().stream()
.anyMatch(f -> f.getPath().contains("configs/conf1")));
assertFalse(
"Configset shouldn't be part of the backup but found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("configs/conf1")));
assertFalse(
"solrconfig.xml shouldn't be part of the backup but found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("solrconfig.xml")));
assertFalse(
"schema.xml shouldn't be part of the backup but found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("schema.xml")));

CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName)
.setLocation(backupLocation)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(solrClient, 500);

AbstractDistribZkTestBase.waitForRecoveriesToFinish(
restoreCollectionName, ZkStateReader.from(solrClient), log.isDebugEnabled(), false, 3);
assertEquals(
numDocs,
cluster
.getSolrClient()
.query(restoreCollectionName, new SolrQuery("*:*"))
.getResults()
.getNumFound());
}

TrackingBackupRepository.clear();

try (BackupRepository repository =
cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) {
String backupLocation = repository.getBackupLocation(getBackupLocation());
CollectionAdminRequest.backupCollection(backupCollectionName, backupName)
.setLocation(backupLocation)
.setBackupConfigset(true)
.setRepositoryName(BACKUP_REPO_NAME)
.processAndWait(cluster.getSolrClient(), 100);

assertTrue(
"Configset should be part of the backup but not found:\n"
+ TrackingBackupRepository.directoriesCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.directoriesCreated().stream()
.anyMatch(f -> f.getPath().contains("configs/conf1")));
assertTrue(
"Configset should be part of the backup but not found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("configs/conf1")));
assertTrue(
"solrconfig.xml should be part of the backup but not found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("solrconfig.xml")));
assertTrue(
"schema.xml should be part of the backup but not found:\n"
+ TrackingBackupRepository.outputsCreated().stream()
.map(URI::toString)
.collect(Collectors.joining("\n")),
TrackingBackupRepository.outputsCreated().stream()
.anyMatch(f -> f.getPath().contains("schema.xml")));
}
}

protected void corruptIndexFiles() throws IOException {
List<Slice> slices = new ArrayList<>(getCollectionState(getCollectionName()).getSlices());
Replica leader = slices.get(random().nextInt(slices.size())).getLeader();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@

public class TrackingBackupRepository implements BackupRepository {
private static final List<URI> COPIED_FILES = Collections.synchronizedList(new ArrayList<>());
private static final List<URI> DIRECTORIES_CREATED =
Collections.synchronizedList(new ArrayList<>());
private static final List<URI> OUTPUTS_CREATED = Collections.synchronizedList(new ArrayList<>());

private BackupRepository delegate;

Expand Down Expand Up @@ -84,11 +87,13 @@ public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws

@Override
public OutputStream createOutput(URI path) throws IOException {
OUTPUTS_CREATED.add(path);
return delegate.createOutput(path);
}

@Override
public void createDirectory(URI path) throws IOException {
DIRECTORIES_CREATED.add(path);
delegate.createDirectory(path);
}

Expand Down Expand Up @@ -136,9 +141,19 @@ public static List<URI> copiedFiles() {
return new ArrayList<>(COPIED_FILES);
}

public static List<URI> directoriesCreated() {
return new ArrayList<>(DIRECTORIES_CREATED);
}

public static List<URI> outputsCreated() {
return new ArrayList<>(OUTPUTS_CREATED);
}

/** Clear all tracking data */
public static void clear() {
COPIED_FILES.clear();
DIRECTORIES_CREATED.clear();
OUTPUTS_CREATED.clear();
}

@Override
Expand Down

0 comments on commit e96309a

Please sign in to comment.