From c8ee862b7a8c39b3ee9f43fbdcee80799d56f3df Mon Sep 17 00:00:00 2001 From: Colvin Cowie <51863265+colvinco@users.noreply.github.com> Date: Thu, 29 Jun 2023 11:17:45 +0100 Subject: [PATCH] SOLR-13396 - Disable deletion of unknown cores by default (#1321) Previous behavior is now disabled by default, set -Dsolr.deleteUnknownCores=true to use the old behavior. --- solr/CHANGES.txt | 7 +++++++ solr/bin/solr | 4 ++++ solr/bin/solr.cmd | 4 ++++ solr/bin/solr.in.cmd | 8 +++++++- solr/bin/solr.in.sh | 10 ++++++++-- .../java/org/apache/solr/core/CoreContainer.java | 14 ++++++++++++-- .../solr/cloud/DeleteInactiveReplicaTest.java | 7 +++++++ .../pages/taking-solr-to-production.adoc | 13 +++++++++++++ .../pages/major-changes-in-solr-9.adoc | 10 ++++++++++ 9 files changed, 72 insertions(+), 5 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 0429a5f46f0..98db7c4d699 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -52,6 +52,13 @@ Other Changes ================== 9.3.0 ================== +Upgrade Notes +--------------------- + +* SOLR-13396: Add property to control the deletion of unknown cores. The default setting is to not delete unknown cores. + This is a change to behaviour introduced in Solr 7.3.1 by SOLR-12066. To continue using the previous behaviour, Solr can be + started with the system property `solr.deleteUnknownCores` set to `true`. (Colvin Cowie, janhoy) + New Features --------------------- * SOLR-16697: Solr now provides an "Install Shard" API that allows users who have built (per-shard) indices offline to import diff --git a/solr/bin/solr b/solr/bin/solr index 3f5df81421b..16adbe9c987 100644 --- a/solr/bin/solr +++ b/solr/bin/solr @@ -2258,6 +2258,10 @@ function start_solr() { SOLR_OPTS+=("-Dsolr.data.home=$SOLR_DATA_HOME") fi + if [ -n "${SOLR_DELETE_UNKNOWN_CORES:-}" ]; then + SOLR_OPTS+=("-Dsolr.deleteUnknownCores=$SOLR_DELETE_UNKNOWN_CORES") + fi + # If SSL-related system props are set, add them to SOLR_OPTS if [ "$SOLR_SSL_ENABLED" == "true" ]; then # If using SSL and solr.jetty.https.port not set explicitly, use the jetty.port diff --git a/solr/bin/solr.cmd b/solr/bin/solr.cmd index c28b8ef726d..e7fe9a898f2 100755 --- a/solr/bin/solr.cmd +++ b/solr/bin/solr.cmd @@ -1180,6 +1180,10 @@ IF "%SOLR_MODE%"=="solrcloud" ( set "CLOUD_MODE_OPTS=!CLOUD_MODE_OPTS! -DwaitForZk=%SOLR_WAIT_FOR_ZK%" ) + IF NOT "%SOLR_DELETE_UNKNOWN_CORES%"=="" ( + set "CLOUD_MODE_OPTS=!CLOUD_MODE_OPTS! -Dsolr.deleteUnknownCores=%SOLR_DELETE_UNKNOWN_CORES%" + ) + IF NOT "%ZK_HOST%"=="" ( set "CLOUD_MODE_OPTS=!CLOUD_MODE_OPTS! -DzkHost=%ZK_HOST%" ) ELSE ( diff --git a/solr/bin/solr.in.cmd b/solr/bin/solr.in.cmd index 1363ee43d5f..0ca0c12c3d5 100755 --- a/solr/bin/solr.in.cmd +++ b/solr/bin/solr.in.cmd @@ -78,6 +78,12 @@ REM set SOLR_HOST=192.168.1.1 REM By default Solr will try to connect to Zookeeper with 30 seconds in timeout; override the timeout if needed REM set SOLR_WAIT_FOR_ZK=30 +REM By default Solr will log a warning for cores that are not registered in Zookeeper at startup +REM but otherwise ignore them. This protects against misconfiguration (e.g. connecting to the +REM wrong Zookeeper instance or chroot), however you need to manually delete the cores if +REM they are no longer required. Set to "true" to have Solr automatically delete unknown cores. +REM set SOLR_DELETE_UNKNOWN_CORES=false + REM By default the start script uses UTC; override the timezone if needed REM set SOLR_TIMEZONE=UTC @@ -238,7 +244,7 @@ REM This parameter lets you specify file system path(s) to explicitly allow. The REM set SOLR_OPTS=%SOLR_OPTS% -Dsolr.allowPaths=D:\,E:\other\path REM Before version 9.0, Solr required a copy of solr.xml file in $SOLR_HOME. Now Solr will use a default file if not found. -REM To restore the old behaviour, set the variable below to true +REM To restore the old behavior, set the variable below to true REM set SOLR_SOLRXML_REQUIRED=false REM Some previous versions of Solr use an outdated log4j dependency. If you are unable to use at least log4j version 2.15.0 diff --git a/solr/bin/solr.in.sh b/solr/bin/solr.in.sh index 9a87144a0bc..b1dc6ab5ab4 100644 --- a/solr/bin/solr.in.sh +++ b/solr/bin/solr.in.sh @@ -83,6 +83,12 @@ # By default Solr will try to connect to Zookeeper with 30 seconds in timeout; override the timeout if needed #SOLR_WAIT_FOR_ZK="30" +# By default Solr will log a warning for cores that are not registered in Zookeeper at startup +# but otherwise ignore them. This protects against misconfiguration (e.g. connecting to the +# wrong Zookeeper instance or chroot), however you need to manually delete the cores if +# they are no longer required. Set to "true" to have Solr automatically delete unknown cores. +#SOLR_DELETE_UNKNOWN_CORES=false + # By default the start script uses UTC; override the timezone if needed #SOLR_TIMEZONE="UTC" @@ -267,12 +273,12 @@ # When using this feature, it is recommended to have an external service monitoring the given dir. # If more fine grained control is required, you can manually add the appropriate flags to SOLR_OPTS # See https://docs.oracle.com/en/java/javase/11/troubleshoot/command-line-options1.html -# You can test this behaviour by setting SOLR_HEAP=25m +# You can test this behavior by setting SOLR_HEAP=25m #SOLR_HEAP_DUMP=true #SOLR_HEAP_DUMP_DIR=/var/log/dumps # Before version 9.0, Solr required a copy of solr.xml file in $SOLR_HOME. Now Solr will use a default file if not found. -# To restore the old behaviour, set the variable below to true +# To restore the old behavior, set the variable below to true #SOLR_SOLRXML_REQUIRED=false # Some previous versions of Solr use an outdated log4j dependency. If you are unable to use at least log4j version 2.15.0 diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java index b1c92abd48f..f223f41e36f 100644 --- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java +++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java @@ -1681,8 +1681,18 @@ private SolrCore createFromDescriptor( } catch (Exception e) { coreInitFailures.put(dcore.getName(), new CoreLoadFailure(dcore, e)); if (e instanceof ZkController.NotInClusterStateException && !newCollection) { - // this mostly happen when the core is deleted when this node is down - unload(dcore.getName(), true, true, true); + // this mostly happens when the core is deleted when this node is down + // but it can also happen if connecting to the wrong zookeeper + final boolean deleteUnknownCores = + Boolean.parseBoolean(System.getProperty("solr.deleteUnknownCores", "false")); + log.error( + "SolrCore {} in {} is not in cluster state.{}", + dcore.getName(), + dcore.getInstanceDir(), + (deleteUnknownCores + ? " It will be deleted. See SOLR-13396 for more information." + : "")); + unload(dcore.getName(), deleteUnknownCores, deleteUnknownCores, deleteUnknownCores); throw e; } solrCores.removeCoreDescriptor(dcore); diff --git a/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java b/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java index ea7c7dbcba1..8d3749b7eba 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DeleteInactiveReplicaTest.java @@ -31,6 +31,7 @@ import org.apache.solr.embedded.JettySolrRunner; import org.apache.solr.util.FileUtils; import org.apache.solr.util.TimeOut; +import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.slf4j.Logger; @@ -45,8 +46,14 @@ public static void setupCluster() throws Exception { configureCluster(4).addConfig("conf", configset("cloud-minimal")).configure(); } + @AfterClass + public static void reset() { + System.setProperty("solr.deleteUnknownCores", "false"); + } + @Test public void deleteInactiveReplicaTest() throws Exception { + System.setProperty("solr.deleteUnknownCores", "true"); String collectionName = "delDeadColl"; int replicationFactor = 2; diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc index bab49a5dfea..48fe805ac29 100644 --- a/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc +++ b/solr/solr-ref-guide/modules/deployment-guide/pages/taking-solr-to-production.adoc @@ -290,6 +290,19 @@ If you also want to bootstrap ZooKeeper with existing `solr_home`, you can inste See xref:zookeeper-utilities.adoc[] for more info. ==== +==== Unknown core deletion + +When Solr loads a core from a filesystem it will check for corresponding cluster state in ZooKeeper. If no corresponding entry exists the core will be skipped and a warning will be logged. +This protects against misconfiguration (e.g. connecting to the wrong ZooKeeper instance or chroot) where the index would still be valid once the configuration is corrected. However you may +need to manually delete unwanted cores that have not been removed successfully as part of intentional deletion of a collection. + +If you would prefer to automatically remove the orphaned files you can edit your include file to set `SOLR_DELETE_UNKNOWN_CORES` to `true`. + +[source,bash] +---- +SOLR_DELETE_UNKNOWN_CORES=true +---- + === Solr Hostname Use the `SOLR_HOST` variable in the include file to set the hostname of the Solr server. diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc index f468fc58830..857d905f4b7 100644 --- a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc +++ b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-9.adoc @@ -91,6 +91,16 @@ If the server URL does not contain `apache.org`, then the gpg signature checking It is still strongly recommended to use the Dockerfile included in the Solr binary TGZs if you want to build Solr images with custom versions of Solr. The custom version of Solr will include this Dockerfile when it is built. +=== Deletion of unknown cores is now disabled by default +When Solr loads a core from a filesystem it will check for corresponding cluster state in ZooKeeper. +Prior to Solr 9.3, if no corresponding entry existed the core was deleted automatically to remove the orphaned files. +As of Solr 9.3 that behaviour is no longer enabled by default. See xref:deployment-guide:taking-solr-to-production.adoc#unknown-core-deletion[Unknown core deletion]. + +[source,bash] +---- +SOLR_DELETE_UNKNOWN_CORES=true +---- + == Solr 9.2 === Upgrade to Jetty 10.x * Solr upgraded to Jetty 10.x from 9.x due to Jetty 9.x is now end of life. Jetty 10.x has a Java 11 minimum and matches Solr 9 minimum Java version. Jetty logging has been replaced with slf4j again matching Solr. See https://webtide.com/jetty-10-and-11-have-arrived/ for additional Jetty 10.x highlights.