From e96309a6dcb952c3c1e047d04d623f43242f87af Mon Sep 17 00:00:00 2001 From: Tomas Eduardo Fernandez Lobbe Date: Wed, 28 Jun 2023 14:52:14 -0700 Subject: [PATCH] SOLR-16844: Optionally skip backup of configset files (#1733) --- solr/CHANGES.txt | 2 + .../solr/cloud/api/collections/BackupCmd.java | 9 +- .../cloud/api/collections/RestoreCmd.java | 6 +- .../solr/core/backup/BackupManager.java | 7 +- .../admin/api/CreateCollectionBackupAPI.java | 3 + .../pages/collection-management.adoc | 9 ++ .../solrj/request/CollectionAdminRequest.java | 18 +++ .../solr/common/params/CoreAdminParams.java | 3 + .../AbstractIncrementalBackupTest.java | 108 ++++++++++++++++++ .../solr/core/TrackingBackupRepository.java | 15 +++ 10 files changed, 174 insertions(+), 6 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 881ab7335b0..0429a5f46f0 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -72,6 +72,8 @@ New Features * SOLR-16827: Add min/max scaling to the reranker (Joel Bernstein) +* SOLR-16844: Added new parameter `backupConfigset` to collection Backup to optionally skip backing up configsets. (Tomás Fernández Löbbe) + Improvements --------------------- diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java index 5499767d69d..5d4aaf7fd79 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/BackupCmd.java @@ -84,6 +84,7 @@ public void call(ClusterState state, ZkNodeProps message, NamedList resu String backupName = message.getStr(NAME); String repo = message.getStr(CoreAdminParams.BACKUP_REPOSITORY); boolean incremental = message.getBool(CoreAdminParams.BACKUP_INCREMENTAL, true); + boolean backupConfigset = message.getBool(CoreAdminParams.BACKUP_CONFIGSET, true); String configName = ccc.getSolrCloudManager() .getClusterStateProvider() @@ -145,8 +146,12 @@ public void call(ClusterState state, ZkNodeProps message, NamedList resu log.info("Starting to backup ZK data for backupName={}", backupName); - // Download the configs - backupMgr.downloadConfigDir(configName, cc.getConfigSetService()); + backupMgr.createZkStateDir(); + + if (backupConfigset) { + // Download the configs + backupMgr.downloadConfigDir(configName, cc.getConfigSetService()); + } // Save the collection's state. Can be part of the monolithic clusterstate.json or a // individual state.json. Since we don't want to distinguish we extract the state and back it diff --git a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java index 11bff6e23d5..2140078ed1e 100644 --- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java +++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java @@ -321,12 +321,14 @@ private void uploadConfig( ConfigSetService configSetService) throws IOException { if (configSetService.checkConfigExists(restoreConfigName)) { - log.warn( + log.info( "Config with name {} already exists. Skipping upload to Zookeeper and using existing config.", restoreConfigName); // TODO add overwrite option? } else { - log.info("Uploading config {}", restoreConfigName); + log.info( + "Config with name {} does not already exist in ZooKeeper. Will restore from Backup.", + restoreConfigName); backupMgr.uploadConfigDir(configName, restoreConfigName, configSetService); } diff --git a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java index 806ba1f6b69..e193928214a 100644 --- a/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java +++ b/solr/core/src/java/org/apache/solr/core/backup/BackupManager.java @@ -254,7 +254,7 @@ public void uploadConfigDir( throws IOException { URI source = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, sourceConfigName); if (!repository.exists(source)) { - throw new IllegalArgumentException("Path " + source + " does not exist"); + throw new IllegalArgumentException("Configset expected at " + source + " does not exist"); } uploadConfigToSolrCloud(configSetService, source, targetConfigName, ""); } @@ -270,7 +270,6 @@ public void uploadConfigDir( public void downloadConfigDir(String configName, ConfigSetService configSetService) throws IOException { URI dest = repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR, configName); - repository.createDirectory(getZkStateDir()); repository.createDirectory(repository.resolveDirectory(getZkStateDir(), CONFIG_STATE_DIR)); repository.createDirectory(dest); @@ -404,4 +403,8 @@ private URI getZkStateDir() { } return zkStateDir; } + + public void createZkStateDir() throws IOException { + repository.createDirectory(getZkStateDir()); + } } diff --git a/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java b/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java index 01cb621ac4b..4a5f1991fde 100644 --- a/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java +++ b/solr/core/src/java/org/apache/solr/handler/admin/api/CreateCollectionBackupAPI.java @@ -24,6 +24,7 @@ import static org.apache.solr.common.params.CollectionAdminParams.INDEX_BACKUP_STRATEGY; import static org.apache.solr.common.params.CommonAdminParams.ASYNC; import static org.apache.solr.common.params.CommonParams.NAME; +import static org.apache.solr.common.params.CoreAdminParams.BACKUP_CONFIGSET; import static org.apache.solr.common.params.CoreAdminParams.BACKUP_INCREMENTAL; import static org.apache.solr.common.params.CoreAdminParams.BACKUP_LOCATION; import static org.apache.solr.common.params.CoreAdminParams.BACKUP_REPOSITORY; @@ -162,6 +163,7 @@ public static CreateCollectionBackupRequestBody createRequestBodyFromV1Params(So requestBody.backupStrategy = params.get(INDEX_BACKUP_STRATEGY); requestBody.snapshotName = params.get(COMMIT_NAME); requestBody.incremental = params.getBool(BACKUP_INCREMENTAL); + requestBody.backupConfigset = params.getBool(BACKUP_CONFIGSET); requestBody.maxNumBackupPoints = params.getInt(MAX_NUM_BACKUP_POINTS); requestBody.async = params.get(ASYNC); @@ -186,6 +188,7 @@ public static class CreateCollectionBackupRequestBody implements JacksonReflectM @JsonProperty public String backupStrategy; @JsonProperty public String snapshotName; @JsonProperty public Boolean incremental; + @JsonProperty public Boolean backupConfigset; @JsonProperty public Integer maxNumBackupPoints; @JsonProperty public String async; } diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc index 61e987509c6..12137859df3 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/collection-management.adoc @@ -1664,6 +1664,15 @@ The upper-bound on how many backups should be retained at the backup location. If the current number exceeds this bound, older backups will be deleted until only `maxNumBackupPoints` backups remain. This parameter has no effect if `incremental=false` is specified. +`backupConfigset`:: ++ +[%autowidth,frame=none] +|=== +|Optional |Default: true +|=== ++ +Indicates if configset files should be included with the index backup or not. Note that in order to restore a collection, the configset must either exist in ZooKeeper or be part of the backup. Only set this to `false` if you can restore configsets by other means external to Solr (i.e. you have it stored with your application source code, is part of your ZooKeeper backups, etc). + `incremental`:: + [%autowidth,frame=none] diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java index 83dbd1fae8b..c7814aab55b 100644 --- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java +++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/CollectionAdminRequest.java @@ -1110,6 +1110,7 @@ public static class Backup extends AsyncCollectionSpecificAdminRequest { protected Optional indexBackupStrategy = Optional.empty(); protected boolean incremental = true; protected Optional maxNumBackupPoints = Optional.empty(); + protected boolean backupConfigset = true; public Backup(String collection, String name) { super(CollectionAction.BACKUP, collection); @@ -1188,6 +1189,22 @@ public Backup setMaxNumberBackupPoints(int maxNumBackupPoints) { return this; } + /** + * Indicates weather the Backup operation should also backup the configset files (such as + * schema.xml, solrconfig.xml. etc). + * + *

Note that the configset is needed to restore the collection, so only set this to false if + * you know the cluster will have the configset available before restoring. + * + * @param backupConfigset {@code true} if you want to backup configsets (default). {@code false} + * to skip configset files. + * @return {@code this} builder + */ + public Backup setBackupConfigset(boolean backupConfigset) { + this.backupConfigset = backupConfigset; + return this; + } + @Override public SolrParams getParams() { ModifiableSolrParams params = (ModifiableSolrParams) super.getParams(); @@ -1207,6 +1224,7 @@ public SolrParams getParams() { params.set(CoreAdminParams.MAX_NUM_BACKUP_POINTS, maxNumBackupPoints.get()); } params.set(CoreAdminParams.BACKUP_INCREMENTAL, incremental); + params.set(CoreAdminParams.BACKUP_CONFIGSET, backupConfigset); return params; } } diff --git a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java index 59f184a92c8..4d9427f31bb 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/CoreAdminParams.java @@ -150,6 +150,9 @@ public abstract class CoreAdminParams { /** A boolean parameter specifying if a core is being created as part of a new collection */ public static final String NEW_COLLECTION = "newCollection"; + /** A parameter to specify if Configsets should be included in the backup or not */ + public static final String BACKUP_CONFIGSET = "backupConfigset"; + /** * Tells the CoreAdminHandler that the new Core will be a replica of a particular {@link * org.apache.solr.common.cloud.Replica.Type} diff --git a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java index 43bd98e378a..3e8a8d9dbae 100644 --- a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java +++ b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java @@ -348,6 +348,114 @@ public void testBackupIncremental() throws Exception { } } + @Test + public void testSkipConfigset() throws Exception { + setTestSuffix("testskipconfigset"); + final String backupCollectionName = getCollectionName(); + final String restoreCollectionName = backupCollectionName + "-restore"; + + CloudSolrClient solrClient = cluster.getSolrClient(); + + CollectionAdminRequest.createCollection(backupCollectionName, "conf1", NUM_SHARDS, 1) + .process(solrClient); + int numDocs = indexDocs(backupCollectionName, true); + String backupName = BACKUPNAME_PREFIX + testSuffix; + try (BackupRepository repository = + cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) { + String backupLocation = repository.getBackupLocation(getBackupLocation()); + CollectionAdminRequest.backupCollection(backupCollectionName, backupName) + .setLocation(backupLocation) + .setBackupConfigset(false) + .setRepositoryName(BACKUP_REPO_NAME) + .processAndWait(cluster.getSolrClient(), 100); + + assertFalse( + "Configset shouldn't be part of the backup but found:\n" + + TrackingBackupRepository.directoriesCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.directoriesCreated().stream() + .anyMatch(f -> f.getPath().contains("configs/conf1"))); + assertFalse( + "Configset shouldn't be part of the backup but found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("configs/conf1"))); + assertFalse( + "solrconfig.xml shouldn't be part of the backup but found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("solrconfig.xml"))); + assertFalse( + "schema.xml shouldn't be part of the backup but found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("schema.xml"))); + + CollectionAdminRequest.restoreCollection(restoreCollectionName, backupName) + .setLocation(backupLocation) + .setRepositoryName(BACKUP_REPO_NAME) + .processAndWait(solrClient, 500); + + AbstractDistribZkTestBase.waitForRecoveriesToFinish( + restoreCollectionName, ZkStateReader.from(solrClient), log.isDebugEnabled(), false, 3); + assertEquals( + numDocs, + cluster + .getSolrClient() + .query(restoreCollectionName, new SolrQuery("*:*")) + .getResults() + .getNumFound()); + } + + TrackingBackupRepository.clear(); + + try (BackupRepository repository = + cluster.getJettySolrRunner(0).getCoreContainer().newBackupRepository(BACKUP_REPO_NAME)) { + String backupLocation = repository.getBackupLocation(getBackupLocation()); + CollectionAdminRequest.backupCollection(backupCollectionName, backupName) + .setLocation(backupLocation) + .setBackupConfigset(true) + .setRepositoryName(BACKUP_REPO_NAME) + .processAndWait(cluster.getSolrClient(), 100); + + assertTrue( + "Configset should be part of the backup but not found:\n" + + TrackingBackupRepository.directoriesCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.directoriesCreated().stream() + .anyMatch(f -> f.getPath().contains("configs/conf1"))); + assertTrue( + "Configset should be part of the backup but not found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("configs/conf1"))); + assertTrue( + "solrconfig.xml should be part of the backup but not found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("solrconfig.xml"))); + assertTrue( + "schema.xml should be part of the backup but not found:\n" + + TrackingBackupRepository.outputsCreated().stream() + .map(URI::toString) + .collect(Collectors.joining("\n")), + TrackingBackupRepository.outputsCreated().stream() + .anyMatch(f -> f.getPath().contains("schema.xml"))); + } + } + protected void corruptIndexFiles() throws IOException { List slices = new ArrayList<>(getCollectionState(getCollectionName()).getSlices()); Replica leader = slices.get(random().nextInt(slices.size())).getLeader(); diff --git a/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java b/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java index e02b16dcffb..5e28d7aff62 100644 --- a/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java +++ b/solr/test-framework/src/java/org/apache/solr/core/TrackingBackupRepository.java @@ -34,6 +34,9 @@ public class TrackingBackupRepository implements BackupRepository { private static final List COPIED_FILES = Collections.synchronizedList(new ArrayList<>()); + private static final List DIRECTORIES_CREATED = + Collections.synchronizedList(new ArrayList<>()); + private static final List OUTPUTS_CREATED = Collections.synchronizedList(new ArrayList<>()); private BackupRepository delegate; @@ -84,11 +87,13 @@ public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws @Override public OutputStream createOutput(URI path) throws IOException { + OUTPUTS_CREATED.add(path); return delegate.createOutput(path); } @Override public void createDirectory(URI path) throws IOException { + DIRECTORIES_CREATED.add(path); delegate.createDirectory(path); } @@ -136,9 +141,19 @@ public static List copiedFiles() { return new ArrayList<>(COPIED_FILES); } + public static List directoriesCreated() { + return new ArrayList<>(DIRECTORIES_CREATED); + } + + public static List outputsCreated() { + return new ArrayList<>(OUTPUTS_CREATED); + } + /** Clear all tracking data */ public static void clear() { COPIED_FILES.clear(); + DIRECTORIES_CREATED.clear(); + OUTPUTS_CREATED.clear(); } @Override