SOLR-17066: Switch "LB" clients away from core URLs (apache#2283)

Providing a core URL String as a SolrClient's "base URL" prevents it from communicating with other cores or making core-agnostic API requests (e.g. node healthcheck, list cores, etc.) This commit migrates usage of both "LB" clients away from using raw core-URL Strings, in favor of using a new structured 'Endpoint' class, which allows access to both the "root URL" and "collection" separately. This commit also updates various ref-guides and Javadoc snippets to reflect the fact that clients now only accept "root URLs" (with a small exception for LB clients which may use the Endpoint class, as mentioned above).
gus-asf · Feb 26, 2024 · c5709bd · c5709bd
1 parent b187595
commit c5709bd
Show file tree

Hide file tree

Showing 28 changed files with 684 additions and 439 deletions.
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
@@ -52,6 +52,11 @@ Deprecation Removals
 
 * SOLR-17159: Remove deprecated bin/post and bin/postlogs scripts in favour of bin/solr equivalents. (Eric Pugh)
 
+* SOLR-17066: `SolrClient` implementations that rely on "base URL" strings now only accept "root" URL paths (i.e. URLs that
+  end in "/solr").  Users may still specify a default collection through use of the `withDefaultCollection` method available
+  on most SolrClient builders. (Jason Gerlowski, David Smiley, Eric Pugh)
+
+
 Dependency Upgrades
 ---------------------
 (No changes)

diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandlerFactory.java
@@ -28,6 +28,7 @@
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
 import org.apache.solr.client.solrj.SolrClient;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.impl.HttpClientUtil;
@@ -347,7 +348,10 @@ protected LBSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List<S
     if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) {
       numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute;
     }
-    return new LBSolrClient.Req(req, urls, numServersToTry);
+
+    final var endpoints =
+        urls.stream().map(url -> LBSolrClient.Endpoint.from(url)).collect(Collectors.toList());
+    return new LBSolrClient.Req(req, endpoints, numServersToTry);
   }
 
   /**

diff --git a/solr/core/src/test/org/apache/solr/cloud/SSLMigrationTest.java b/solr/core/src/test/org/apache/solr/cloud/SSLMigrationTest.java
@@ -127,7 +127,7 @@ private void setUrlScheme(String value) throws Exception {
             .map(r -> r.getStr(ZkStateReader.BASE_URL_PROP))
             .toArray(String[]::new);
     // Create new SolrServer to configure new HttpClient w/ SSL config
-    try (SolrClient client = new LBHttpSolrClient.Builder().withBaseSolrUrls(urls).build()) {
+    try (SolrClient client = new LBHttpSolrClient.Builder().withBaseEndpoints(urls).build()) {
       client.request(request);
     }
   }

diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedDebugComponentTest.java
@@ -403,7 +403,7 @@ public void testCompareWithNonDistributedRequest() throws SolrServerException, I
   }
 
   public void testTolerantSearch() throws SolrServerException, IOException {
-    String badShard = DEAD_HOST_1;
+    String badShard = DEAD_HOST_1 + "/solr/collection1";
     SolrQuery query = new SolrQuery();
     query.setQuery("*:*");
     query.set("debug", "true");

diff --git a/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java b/solr/core/src/test/org/apache/solr/handler/component/TestHttpShardHandlerFactory.java
@@ -93,7 +93,7 @@ public void testLoadBalancerRequestsMinMax() {
       final QueryRequest queryRequest = null;
       final List<String> urls = new ArrayList<>();
       for (int ii = 0; ii < 10; ++ii) {
-        urls.add(null);
+        urls.add("http://localhost" + ii + ":8983/solr");
       }
 
       // create LBHttpSolrClient request

diff --git a/solr/core/src/test/org/apache/solr/update/DeleteByIdWithRouterFieldTest.java b/solr/core/src/test/org/apache/solr/update/DeleteByIdWithRouterFieldTest.java
@@ -303,7 +303,9 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
     final Map<String, List<String>> urlMap =
         docCol.getActiveSlices().stream()
             .collect(
-                Collectors.toMap(s -> s.getName(), s -> Collections.singletonList(s.getName())));
+                Collectors.toMap(
+                    s -> s.getName(),
+                    s -> Collections.singletonList(fakeSolrUrlForShard(s.getName()))));
 
     // simplified rote info we'll build up with the shards for each delete (after sanity checking
     // they have routing info at all)...
@@ -314,7 +316,7 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
             .getRoutesToCollection(docCol.getRouter(), docCol, urlMap, params(), ROUTE_FIELD);
     for (LBSolrClient.Req lbreq : rawDelRoutes.values()) {
       assertTrue(lbreq.getRequest() instanceof UpdateRequest);
-      final String shard = lbreq.getServers().get(0);
+      final LBSolrClient.Endpoint shard = lbreq.getEndpoints().get(0);
       final UpdateRequest req = (UpdateRequest) lbreq.getRequest();
       for (Map.Entry<String, Map<String, Object>> entry : req.getDeleteByIdMap().entrySet()) {
         final String id = entry.getKey();
@@ -327,7 +329,7 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
             RVAL_PRE + id.substring(id.length() - 1),
             route.toString());
 
-        actualDelRoutes.put(id, shard);
+        actualDelRoutes.put(id, shard.toString());
       }
     }
 
@@ -342,11 +344,18 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
               .getRouter()
               .getTargetSlice(id, doc, doc.getFieldValue(ROUTE_FIELD).toString(), params(), docCol);
       assertNotNull(id + " add route is null?", expectedShard);
-      assertEquals("Wrong shard for delete of id: " + id, expectedShard.getName(), actualShard);
+      assertEquals(
+          "Wrong shard for delete of id: " + id,
+          fakeSolrUrlForShard(expectedShard.getName()),
+          actualShard.toString());
     }
 
     // sanity check no one broke our test and made it a waste of time
     assertEquals(100, add100Docs().getDocuments().size());
     assertEquals(100, actualDelRoutes.entrySet().size());
   }
+
+  private static String fakeSolrUrlForShard(String shardName) {
+    return "http://localhost:8983/solr/" + shardName;
+  }
 }
diff --git a/solr/solr-ref-guide/modules/deployment-guide/pages/solrj.adoc b/solr/solr-ref-guide/modules/deployment-guide/pages/solrj.adoc
@@ -117,51 +117,48 @@ The most common/important of these are discussed below.
 For comprehensive information on how to tweak your `SolrClient`, see the Javadocs for the involved client, and its corresponding builder object.
 
 ==== Base URLs
-Most `SolrClient` implementations (except for `CloudSolrClient` and `Http2SolrClient`) require users to specify one or more Solr base URLs, which the client then uses to send HTTP requests to Solr.
-The path users include on the base URL they provide has an effect on the behavior of the created client from that point on.
-
-. A URL with a path pointing to a specific core or collection (e.g., `\http://hostname:8983/solr/core1`).
-When a core or collection is specified in the base URL, subsequent requests made with that client are not required to re-specify the affected collection.
-However, the client is limited to sending requests to  that core/collection, and can not send requests to any others.
-. A URL pointing to the root Solr path (e.g., `\http://hostname:8983/solr`).
-When no core or collection is specified in the base URL, requests can be made to any core/collection, but the affected core/collection must be specified on all requests.
-
-Generally speaking, if your `SolrClient` will only be used on a single core/collection, including that entity in the path is the most convenient.
-Where more flexibility is required, the collection/core should be excluded.
-
-==== Base URLs of Http2SolrClient
-The `Http2SolrClient` manages connections to different nodes efficiently.
-`Http2SolrClient` does not require a `baseUrl`.
-In case a `baseUrl` is not provided, then `SolrRequest.basePath` must be set, so
-`Http2SolrClient` knows which nodes to send requests to.
-If not an `IllegalArgumentException` will be thrown.
-
-==== Base URLs of CloudSolrClient
-
-It is also possible to specify base URLs for `CloudSolrClient`, but URLs are expected to point to the root Solr path (e.g., `\http://hostname:8983/solr`).
-They should not include any collections, cores, or other path components.
-
+Many `SolrClient` implementations require users to specify one or more Solr URLs, which the client then uses to send HTTP requests to Solr.
+Unless otherwise specified, SolrJ expects these URLs to point to the root Solr path (i.e. "/solr").
+
+A few notable exceptions to this are described below:
+
+- *Http2SolrClient* - Users of `Http2SolrClient` may choose to skip providing a root URL to their client, in favor of specifing the URL on a request-by-request basis using `SolrRequest.setBasePath`.
+`Http2SolrClient` will throw an `IllegalArgumentException` if neither the client nor the request specify a URL.
+- *LBHttpSolrClient* and *LBHttp2SolrClient* - Solr's "load balancing" clients are frequently used to round-robin requests across a set of replicas or cores.
+URLs are still expected to point to the Solr root (i.e. "/solr"), but to support this use-case the URLs are often supplemented by an additional parameter to specify the targeted core.
+Alternatively, some "load balancing" methods make use of an `Endpoint` abstraction to provide this URL and core information in a more structured way.
+- *CloudSolrClient* - Like many clients, CloudSolrClient accepts a series of URLs pointing to the Solr root path (i.e. "/solr").
++
 [source,java,indent=0]
 ----
 include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-baseurl]
 ----
-
-In case a `baseUrl` is not provided, then a list of ZooKeeper hosts (with ports) and ZooKeeper root must be provided.
-If no ZooKeeper root is used then `java.util.Optional.empty()` has to be provided as part of the method.
-
++
+However, unlike other clients, these URLs aren't used to send user-provided requests, but instead serve to fetch information about the layout and health of the Solr cluster.
+If Solr URLs are not known, users may instead specify a list of ZooKeeper hosts (with ports) and ZooKeeper root path, for the `CloudSolrClient` to fetch this information from ZooKeeper directly.
++
 [source,java,indent=0]
 ----
-include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeepernoroot]
+include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeeperroot]
 ----
-
++
+If no ZooKeeper root path is used then `java.util.Optional.empty()` should be provided.
++
 [source,java,indent=0]
 ----
-include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeeperroot]
+include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeepernoroot]
 ----
++
+`CloudSolrClient` users who wish to provide ZooKeeper connection information, must depend on the `solr-solrj-zookeeper` artifact to have all of the necessary classes available.
+The ZooKeeper based connection is the most reliable and performant means for CloudSolrClient to work.  On the other hand, it means exposing ZooKeeper more broadly than to Solr nodes, which is a security risk.  It also adds more JAR dependencies.
 
-Additionally, you will need to depend on the `solr-solrj-zookeeper` artifact or else you will get a `ClassNotFoundException`.
+==== Default Collections
 
-The ZooKeeper based connection is the most reliable and performant means for CloudSolrClient to work.  On the other hand, it means exposing ZooKeeper more broadly than to Solr nodes, which is a security risk.  It also adds more JAR dependencies.
+Most `SolrClient` methods allow users to specify the core or collection they wish to query, etc. as a `String` parameter.
+However continually specifying this parameter can become tedious, especially for users who always work with the same collection.
+
+Users can avoid this pattern by specifying a "default" collection when creating their client, using the `withDefaultCollection(String)` method available on the relevant `SolrClient` Builder object.
+If specified on a Builder, the created `SolrClient` will use this default for making requests whenever a collection or core is needed (and no overriding value is specified).
 
 ==== Timeouts
 All `SolrClient` implementations allow users to specify the connection and read timeouts for communicating with Solr.

diff --git a/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc b/solr/solr-ref-guide/modules/upgrade-notes/pages/major-changes-in-solr-10.adoc
@@ -28,7 +28,10 @@ Before starting an upgrade to this version of Solr, please take the time to revi
 
 === SolrJ
 
-Starting in 10, the Maven POM for SolrJ does not refer to SolrJ modules like ZooKeeper.  If you require such functionality, you need to add additional dependencies.
+* Starting in 10, the Maven POM for SolrJ does not refer to SolrJ modules like ZooKeeper.  If you require such functionality, you need to add additional dependencies.
+
+* `SolrClient` implementations that rely on "base URL" strings now only accept "root" URL paths (i.e. URLs that end in "/solr").
+Users who previously relied on collection-specific URLs to avoid including the collection name with each request can instead achieve this by specifying a "default collection" using the `withDefaultCollection` method available on most `SolrClient` Builders.
 
 === Deprecation removals
 
@@ -47,4 +50,4 @@ has been removed. Please use `-Dsolr.hiddenSysProps` or the envVar `SOLR_HIDDEN_
 
 * The node configuration file `/solr.xml` can no longer be loaded from Zookeeper. Solr startup will fail if it is present.
 
-* The legacy Circuit Breaker named `CircuitBreakerManager`, is removed. Please use individual Circuit Breaker plugins instead.
+* The legacy Circuit Breaker named `CircuitBreakerManager`, is removed. Please use individual Circuit Breaker plugins instead.
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/CloudSolrClient.java
@@ -492,14 +492,18 @@ private NamedList<Object> directUpdate(AbstractUpdateRequest request, String col
       nonRoutableRequest.setParams(nonRoutableParams);
       nonRoutableRequest.setBasicAuthCredentials(
           request.getBasicAuthUser(), request.getBasicAuthPassword());
-      List<String> urlList = new ArrayList<>(routes.keySet());
-      Collections.shuffle(urlList, rand);
-      LBSolrClient.Req req = new LBSolrClient.Req(nonRoutableRequest, urlList);
+      final var endpoints =
+          routes.keySet().stream()
+              .map(url -> LBSolrClient.Endpoint.from(url))
+              .collect(Collectors.toList());
+      Collections.shuffle(endpoints, rand);
+      LBSolrClient.Req req = new LBSolrClient.Req(nonRoutableRequest, endpoints);
       try {
         LBSolrClient.Rsp rsp = getLbClient().request(req);
-        shardResponses.add(urlList.get(0), rsp.getResponse());
+        shardResponses.add(endpoints.get(0).toString(), rsp.getResponse());
       } catch (Exception e) {
-        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, urlList.get(0), e);
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR, endpoints.get(0).toString(), e);
       }
     }
 
@@ -1018,18 +1022,21 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
     final String urlScheme = provider.getClusterProperty(ClusterState.URL_SCHEME, "http");
     final Set<String> liveNodes = provider.getLiveNodes();
 
-    final List<String> theUrlList = new ArrayList<>(); // we populate this as follows...
+    final List<LBSolrClient.Endpoint> requestEndpoints =
+        new ArrayList<>(); // we populate this as follows...
 
     if (request instanceof V2Request) {
       if (!liveNodes.isEmpty()) {
         List<String> liveNodesList = new ArrayList<>(liveNodes);
         Collections.shuffle(liveNodesList, rand);
-        theUrlList.add(Utils.getBaseUrlForNodeName(liveNodesList.get(0), urlScheme));
+        final var chosenNodeUrl = Utils.getBaseUrlForNodeName(liveNodesList.get(0), urlScheme);
+        requestEndpoints.add(new LBSolrClient.Endpoint(chosenNodeUrl));
       }
 
     } else if (ADMIN_PATHS.contains(request.getPath())) {
       for (String liveNode : liveNodes) {
-        theUrlList.add(Utils.getBaseUrlForNodeName(liveNode, urlScheme));
+        final var nodeBaseUrl = Utils.getBaseUrlForNodeName(liveNode, urlScheme);
+        requestEndpoints.add(new LBSolrClient.Endpoint(nodeBaseUrl));
       }
 
     } else { // Typical...
@@ -1044,13 +1051,13 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
       List<String> preferredNodes = request.getPreferredNodes();
       if (preferredNodes != null && !preferredNodes.isEmpty()) {
         String joinedInputCollections = StrUtils.join(inputCollections, ',');
-        List<String> urlList = new ArrayList<>(preferredNodes.size());
-        for (String nodeName : preferredNodes) {
-          urlList.add(
-              Utils.getBaseUrlForNodeName(nodeName, urlScheme) + "/" + joinedInputCollections);
-        }
-        if (!urlList.isEmpty()) {
-          LBSolrClient.Req req = new LBSolrClient.Req(request, urlList);
+        final var endpoints =
+            preferredNodes.stream()
+                .map(nodeName -> Utils.getBaseUrlForNodeName(nodeName, urlScheme))
+                .map(nodeUrl -> new LBSolrClient.Endpoint(nodeUrl, joinedInputCollections))
+                .collect(Collectors.toList());
+        if (!endpoints.isEmpty()) {
+          LBSolrClient.Req req = new LBSolrClient.Req(request, endpoints);
           LBSolrClient.Rsp rsp = getLbClient().request(req);
           return rsp.getResponse();
         }
@@ -1110,23 +1117,24 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
               if (inputCollections.size() == 1 && collectionNames.size() == 1) {
                 // If we have a single collection name (and not a alias to multiple collection),
                 // send the query directly to a replica of this collection.
-                theUrlList.add(replica.getCoreUrl());
+                requestEndpoints.add(
+                    new LBSolrClient.Endpoint(replica.getBaseUrl(), replica.getCoreName()));
               } else {
-                theUrlList.add(
-                    ZkCoreNodeProps.getCoreUrl(replica.getBaseUrl(), joinedInputCollections));
+                requestEndpoints.add(
+                    new LBSolrClient.Endpoint(replica.getBaseUrl(), joinedInputCollections));
               }
             }
           });
 
-      if (theUrlList.isEmpty()) {
+      if (requestEndpoints.isEmpty()) {
         collectionStateCache.keySet().removeAll(collectionNames);
         throw new SolrException(
             SolrException.ErrorCode.INVALID_STATE,
             "Could not find a healthy node to handle the request.");
       }
     }
 
-    LBSolrClient.Req req = new LBSolrClient.Req(request, theUrlList);
+    LBSolrClient.Req req = new LBSolrClient.Req(request, requestEndpoints);
     LBSolrClient.Rsp rsp = getLbClient().request(req);
     return rsp.getResponse();
   }

diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateHttp2SolrClient.java
@@ -709,10 +709,48 @@ public static class Builder {
     protected boolean closeHttp2Client;
     private long pollQueueTimeMillis;
 
+    /**
+     * Initialize a Builder object, based on the provided URL and client.
+     *
+     * <p>The provided URL must point to the root Solr path (i.e. "/solr"), for example:
+     *
+     * <pre>
+     *   SolrClient client = new ConcurrentUpdateHttp2SolrClient.Builder("http://my-solr-server:8983/solr", http2Client)
+     *       .withDefaultCollection("core1")
+     *       .build();
+     *   QueryResponse resp = client.query(new SolrQuery("*:*"));
+     * </pre>
+     *
+     * @param baseSolrUrl a URL pointing to the root Solr path, typically of the form
+     *     "http[s]://host:port/solr"
+     * @param client a client for this ConcurrentUpdateHttp2SolrClient to use for all requests
+     *     internally. Callers are responsible for closing the provided client (after closing any
+     *     clients created by this builder)
+     */
     public Builder(String baseSolrUrl, Http2SolrClient client) {
       this(baseSolrUrl, client, false);
     }
 
+    /**
+     * Initialize a Builder object, based on the provided arguments.
+     *
+     * <p>The provided URL must point to the root Solr path (i.e. "/solr"), for example:
+     *
+     * <pre>
+     *   SolrClient client = new ConcurrentUpdateHttp2SolrClient.Builder("http://my-solr-server:8983/solr", http2Client)
+     *       .withDefaultCollection("core1")
+     *       .build();
+     *   QueryResponse resp = client.query(new SolrQuery("*:*"));
+     * </pre>
+     *
+     * @param baseSolrUrl a URL pointing to the root Solr path, typically of the form
+     *     "http[s]://host:port/solr"
+     * @param client a client for this ConcurrentUpdateHttp2SolrClient to use for all requests
+     *     internally.
+     * @param closeHttp2Client a boolean flag indicating whether the created
+     *     ConcurrentUpdateHttp2SolrClient should assume responsibility for closing the provided
+     *     'client'
+     */
     public Builder(String baseSolrUrl, Http2SolrClient client, boolean closeHttp2Client) {
       this.baseSolrUrl = baseSolrUrl;
       this.client = client;