Skip to content

Commit

Permalink
SOLR-17066: Switch "LB" clients away from core URLs (apache#2283)
Browse files Browse the repository at this point in the history
Providing a core URL String as a SolrClient's "base URL" prevents
it from communicating with other cores or making core-agnostic API
requests (e.g. node healthcheck, list cores, etc.)

This commit migrates usage of both "LB" clients away from using
raw core-URL Strings, in favor of using a new structured 'Endpoint'
class, which allows access to both the "root URL" and "collection"
separately.

This commit also updates various ref-guides and Javadoc snippets to
reflect the fact that clients now only accept "root URLs" (with a small
exception for LB clients which may use the Endpoint class, as mentioned
above).
  • Loading branch information
gerlowskija authored Feb 26, 2024
1 parent b187595 commit c5709bd
Show file tree
Hide file tree
Showing 28 changed files with 684 additions and 439 deletions.
5 changes: 5 additions & 0 deletions solr/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,11 @@ Deprecation Removals

* SOLR-17159: Remove deprecated bin/post and bin/postlogs scripts in favour of bin/solr equivalents. (Eric Pugh)

* SOLR-17066: `SolrClient` implementations that rely on "base URL" strings now only accept "root" URL paths (i.e. URLs that
end in "/solr"). Users may still specify a default collection through use of the `withDefaultCollection` method available
on most SolrClient builders. (Jason Gerlowski, David Smiley, Eric Pugh)


Dependency Upgrades
---------------------
(No changes)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.concurrent.ExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.impl.Http2SolrClient;
import org.apache.solr.client.solrj.impl.HttpClientUtil;
Expand Down Expand Up @@ -347,7 +348,10 @@ protected LBSolrClient.Req newLBHttpSolrClientReq(final QueryRequest req, List<S
if (numServersToTry < this.permittedLoadBalancerRequestsMinimumAbsolute) {
numServersToTry = this.permittedLoadBalancerRequestsMinimumAbsolute;
}
return new LBSolrClient.Req(req, urls, numServersToTry);

final var endpoints =
urls.stream().map(url -> LBSolrClient.Endpoint.from(url)).collect(Collectors.toList());
return new LBSolrClient.Req(req, endpoints, numServersToTry);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ private void setUrlScheme(String value) throws Exception {
.map(r -> r.getStr(ZkStateReader.BASE_URL_PROP))
.toArray(String[]::new);
// Create new SolrServer to configure new HttpClient w/ SSL config
try (SolrClient client = new LBHttpSolrClient.Builder().withBaseSolrUrls(urls).build()) {
try (SolrClient client = new LBHttpSolrClient.Builder().withBaseEndpoints(urls).build()) {
client.request(request);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ public void testCompareWithNonDistributedRequest() throws SolrServerException, I
}

public void testTolerantSearch() throws SolrServerException, IOException {
String badShard = DEAD_HOST_1;
String badShard = DEAD_HOST_1 + "/solr/collection1";
SolrQuery query = new SolrQuery();
query.setQuery("*:*");
query.set("debug", "true");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public void testLoadBalancerRequestsMinMax() {
final QueryRequest queryRequest = null;
final List<String> urls = new ArrayList<>();
for (int ii = 0; ii < 10; ++ii) {
urls.add(null);
urls.add("http://localhost" + ii + ":8983/solr");
}

// create LBHttpSolrClient request
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,9 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
final Map<String, List<String>> urlMap =
docCol.getActiveSlices().stream()
.collect(
Collectors.toMap(s -> s.getName(), s -> Collections.singletonList(s.getName())));
Collectors.toMap(
s -> s.getName(),
s -> Collections.singletonList(fakeSolrUrlForShard(s.getName()))));

// simplified rote info we'll build up with the shards for each delete (after sanity checking
// they have routing info at all)...
Expand All @@ -314,7 +316,7 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
.getRoutesToCollection(docCol.getRouter(), docCol, urlMap, params(), ROUTE_FIELD);
for (LBSolrClient.Req lbreq : rawDelRoutes.values()) {
assertTrue(lbreq.getRequest() instanceof UpdateRequest);
final String shard = lbreq.getServers().get(0);
final LBSolrClient.Endpoint shard = lbreq.getEndpoints().get(0);
final UpdateRequest req = (UpdateRequest) lbreq.getRequest();
for (Map.Entry<String, Map<String, Object>> entry : req.getDeleteByIdMap().entrySet()) {
final String id = entry.getKey();
Expand All @@ -327,7 +329,7 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
RVAL_PRE + id.substring(id.length() - 1),
route.toString());

actualDelRoutes.put(id, shard);
actualDelRoutes.put(id, shard.toString());
}
}

Expand All @@ -342,11 +344,18 @@ public void testGlassBoxUpdateRequestRoutesToShards() {
.getRouter()
.getTargetSlice(id, doc, doc.getFieldValue(ROUTE_FIELD).toString(), params(), docCol);
assertNotNull(id + " add route is null?", expectedShard);
assertEquals("Wrong shard for delete of id: " + id, expectedShard.getName(), actualShard);
assertEquals(
"Wrong shard for delete of id: " + id,
fakeSolrUrlForShard(expectedShard.getName()),
actualShard.toString());
}

// sanity check no one broke our test and made it a waste of time
assertEquals(100, add100Docs().getDocuments().size());
assertEquals(100, actualDelRoutes.entrySet().size());
}

private static String fakeSolrUrlForShard(String shardName) {
return "http://localhost:8983/solr/" + shardName;
}
}
63 changes: 30 additions & 33 deletions solr/solr-ref-guide/modules/deployment-guide/pages/solrj.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -117,51 +117,48 @@ The most common/important of these are discussed below.
For comprehensive information on how to tweak your `SolrClient`, see the Javadocs for the involved client, and its corresponding builder object.

==== Base URLs
Most `SolrClient` implementations (except for `CloudSolrClient` and `Http2SolrClient`) require users to specify one or more Solr base URLs, which the client then uses to send HTTP requests to Solr.
The path users include on the base URL they provide has an effect on the behavior of the created client from that point on.

. A URL with a path pointing to a specific core or collection (e.g., `\http://hostname:8983/solr/core1`).
When a core or collection is specified in the base URL, subsequent requests made with that client are not required to re-specify the affected collection.
However, the client is limited to sending requests to that core/collection, and can not send requests to any others.
. A URL pointing to the root Solr path (e.g., `\http://hostname:8983/solr`).
When no core or collection is specified in the base URL, requests can be made to any core/collection, but the affected core/collection must be specified on all requests.

Generally speaking, if your `SolrClient` will only be used on a single core/collection, including that entity in the path is the most convenient.
Where more flexibility is required, the collection/core should be excluded.

==== Base URLs of Http2SolrClient
The `Http2SolrClient` manages connections to different nodes efficiently.
`Http2SolrClient` does not require a `baseUrl`.
In case a `baseUrl` is not provided, then `SolrRequest.basePath` must be set, so
`Http2SolrClient` knows which nodes to send requests to.
If not an `IllegalArgumentException` will be thrown.

==== Base URLs of CloudSolrClient

It is also possible to specify base URLs for `CloudSolrClient`, but URLs are expected to point to the root Solr path (e.g., `\http://hostname:8983/solr`).
They should not include any collections, cores, or other path components.

Many `SolrClient` implementations require users to specify one or more Solr URLs, which the client then uses to send HTTP requests to Solr.
Unless otherwise specified, SolrJ expects these URLs to point to the root Solr path (i.e. "/solr").

A few notable exceptions to this are described below:

- *Http2SolrClient* - Users of `Http2SolrClient` may choose to skip providing a root URL to their client, in favor of specifing the URL on a request-by-request basis using `SolrRequest.setBasePath`.
`Http2SolrClient` will throw an `IllegalArgumentException` if neither the client nor the request specify a URL.
- *LBHttpSolrClient* and *LBHttp2SolrClient* - Solr's "load balancing" clients are frequently used to round-robin requests across a set of replicas or cores.
URLs are still expected to point to the Solr root (i.e. "/solr"), but to support this use-case the URLs are often supplemented by an additional parameter to specify the targeted core.
Alternatively, some "load balancing" methods make use of an `Endpoint` abstraction to provide this URL and core information in a more structured way.
- *CloudSolrClient* - Like many clients, CloudSolrClient accepts a series of URLs pointing to the Solr root path (i.e. "/solr").
+
[source,java,indent=0]
----
include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-baseurl]
----

In case a `baseUrl` is not provided, then a list of ZooKeeper hosts (with ports) and ZooKeeper root must be provided.
If no ZooKeeper root is used then `java.util.Optional.empty()` has to be provided as part of the method.

+
However, unlike other clients, these URLs aren't used to send user-provided requests, but instead serve to fetch information about the layout and health of the Solr cluster.
If Solr URLs are not known, users may instead specify a list of ZooKeeper hosts (with ports) and ZooKeeper root path, for the `CloudSolrClient` to fetch this information from ZooKeeper directly.
+
[source,java,indent=0]
----
include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeepernoroot]
include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeeperroot]
----

+
If no ZooKeeper root path is used then `java.util.Optional.empty()` should be provided.
+
[source,java,indent=0]
----
include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeeperroot]
include::example$UsingSolrJRefGuideExamplesTest.java[tag=solrj-cloudsolrclient-zookeepernoroot]
----
+
`CloudSolrClient` users who wish to provide ZooKeeper connection information, must depend on the `solr-solrj-zookeeper` artifact to have all of the necessary classes available.
The ZooKeeper based connection is the most reliable and performant means for CloudSolrClient to work. On the other hand, it means exposing ZooKeeper more broadly than to Solr nodes, which is a security risk. It also adds more JAR dependencies.

Additionally, you will need to depend on the `solr-solrj-zookeeper` artifact or else you will get a `ClassNotFoundException`.
==== Default Collections

The ZooKeeper based connection is the most reliable and performant means for CloudSolrClient to work. On the other hand, it means exposing ZooKeeper more broadly than to Solr nodes, which is a security risk. It also adds more JAR dependencies.
Most `SolrClient` methods allow users to specify the core or collection they wish to query, etc. as a `String` parameter.
However continually specifying this parameter can become tedious, especially for users who always work with the same collection.

Users can avoid this pattern by specifying a "default" collection when creating their client, using the `withDefaultCollection(String)` method available on the relevant `SolrClient` Builder object.
If specified on a Builder, the created `SolrClient` will use this default for making requests whenever a collection or core is needed (and no overriding value is specified).

==== Timeouts
All `SolrClient` implementations allow users to specify the connection and read timeouts for communicating with Solr.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@ Before starting an upgrade to this version of Solr, please take the time to revi

=== SolrJ

Starting in 10, the Maven POM for SolrJ does not refer to SolrJ modules like ZooKeeper. If you require such functionality, you need to add additional dependencies.
* Starting in 10, the Maven POM for SolrJ does not refer to SolrJ modules like ZooKeeper. If you require such functionality, you need to add additional dependencies.

* `SolrClient` implementations that rely on "base URL" strings now only accept "root" URL paths (i.e. URLs that end in "/solr").
Users who previously relied on collection-specific URLs to avoid including the collection name with each request can instead achieve this by specifying a "default collection" using the `withDefaultCollection` method available on most `SolrClient` Builders.

=== Deprecation removals

Expand All @@ -47,4 +50,4 @@ has been removed. Please use `-Dsolr.hiddenSysProps` or the envVar `SOLR_HIDDEN_

* The node configuration file `/solr.xml` can no longer be loaded from Zookeeper. Solr startup will fail if it is present.

* The legacy Circuit Breaker named `CircuitBreakerManager`, is removed. Please use individual Circuit Breaker plugins instead.
* The legacy Circuit Breaker named `CircuitBreakerManager`, is removed. Please use individual Circuit Breaker plugins instead.
Original file line number Diff line number Diff line change
Expand Up @@ -492,14 +492,18 @@ private NamedList<Object> directUpdate(AbstractUpdateRequest request, String col
nonRoutableRequest.setParams(nonRoutableParams);
nonRoutableRequest.setBasicAuthCredentials(
request.getBasicAuthUser(), request.getBasicAuthPassword());
List<String> urlList = new ArrayList<>(routes.keySet());
Collections.shuffle(urlList, rand);
LBSolrClient.Req req = new LBSolrClient.Req(nonRoutableRequest, urlList);
final var endpoints =
routes.keySet().stream()
.map(url -> LBSolrClient.Endpoint.from(url))
.collect(Collectors.toList());
Collections.shuffle(endpoints, rand);
LBSolrClient.Req req = new LBSolrClient.Req(nonRoutableRequest, endpoints);
try {
LBSolrClient.Rsp rsp = getLbClient().request(req);
shardResponses.add(urlList.get(0), rsp.getResponse());
shardResponses.add(endpoints.get(0).toString(), rsp.getResponse());
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, urlList.get(0), e);
throw new SolrException(
SolrException.ErrorCode.SERVER_ERROR, endpoints.get(0).toString(), e);
}
}

Expand Down Expand Up @@ -1018,18 +1022,21 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
final String urlScheme = provider.getClusterProperty(ClusterState.URL_SCHEME, "http");
final Set<String> liveNodes = provider.getLiveNodes();

final List<String> theUrlList = new ArrayList<>(); // we populate this as follows...
final List<LBSolrClient.Endpoint> requestEndpoints =
new ArrayList<>(); // we populate this as follows...

if (request instanceof V2Request) {
if (!liveNodes.isEmpty()) {
List<String> liveNodesList = new ArrayList<>(liveNodes);
Collections.shuffle(liveNodesList, rand);
theUrlList.add(Utils.getBaseUrlForNodeName(liveNodesList.get(0), urlScheme));
final var chosenNodeUrl = Utils.getBaseUrlForNodeName(liveNodesList.get(0), urlScheme);
requestEndpoints.add(new LBSolrClient.Endpoint(chosenNodeUrl));
}

} else if (ADMIN_PATHS.contains(request.getPath())) {
for (String liveNode : liveNodes) {
theUrlList.add(Utils.getBaseUrlForNodeName(liveNode, urlScheme));
final var nodeBaseUrl = Utils.getBaseUrlForNodeName(liveNode, urlScheme);
requestEndpoints.add(new LBSolrClient.Endpoint(nodeBaseUrl));
}

} else { // Typical...
Expand All @@ -1044,13 +1051,13 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
List<String> preferredNodes = request.getPreferredNodes();
if (preferredNodes != null && !preferredNodes.isEmpty()) {
String joinedInputCollections = StrUtils.join(inputCollections, ',');
List<String> urlList = new ArrayList<>(preferredNodes.size());
for (String nodeName : preferredNodes) {
urlList.add(
Utils.getBaseUrlForNodeName(nodeName, urlScheme) + "/" + joinedInputCollections);
}
if (!urlList.isEmpty()) {
LBSolrClient.Req req = new LBSolrClient.Req(request, urlList);
final var endpoints =
preferredNodes.stream()
.map(nodeName -> Utils.getBaseUrlForNodeName(nodeName, urlScheme))
.map(nodeUrl -> new LBSolrClient.Endpoint(nodeUrl, joinedInputCollections))
.collect(Collectors.toList());
if (!endpoints.isEmpty()) {
LBSolrClient.Req req = new LBSolrClient.Req(request, endpoints);
LBSolrClient.Rsp rsp = getLbClient().request(req);
return rsp.getResponse();
}
Expand Down Expand Up @@ -1110,23 +1117,24 @@ protected NamedList<Object> sendRequest(SolrRequest<?> request, List<String> inp
if (inputCollections.size() == 1 && collectionNames.size() == 1) {
// If we have a single collection name (and not a alias to multiple collection),
// send the query directly to a replica of this collection.
theUrlList.add(replica.getCoreUrl());
requestEndpoints.add(
new LBSolrClient.Endpoint(replica.getBaseUrl(), replica.getCoreName()));
} else {
theUrlList.add(
ZkCoreNodeProps.getCoreUrl(replica.getBaseUrl(), joinedInputCollections));
requestEndpoints.add(
new LBSolrClient.Endpoint(replica.getBaseUrl(), joinedInputCollections));
}
}
});

if (theUrlList.isEmpty()) {
if (requestEndpoints.isEmpty()) {
collectionStateCache.keySet().removeAll(collectionNames);
throw new SolrException(
SolrException.ErrorCode.INVALID_STATE,
"Could not find a healthy node to handle the request.");
}
}

LBSolrClient.Req req = new LBSolrClient.Req(request, theUrlList);
LBSolrClient.Req req = new LBSolrClient.Req(request, requestEndpoints);
LBSolrClient.Rsp rsp = getLbClient().request(req);
return rsp.getResponse();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -709,10 +709,48 @@ public static class Builder {
protected boolean closeHttp2Client;
private long pollQueueTimeMillis;

/**
* Initialize a Builder object, based on the provided URL and client.
*
* <p>The provided URL must point to the root Solr path (i.e. "/solr"), for example:
*
* <pre>
* SolrClient client = new ConcurrentUpdateHttp2SolrClient.Builder("http://my-solr-server:8983/solr", http2Client)
* .withDefaultCollection("core1")
* .build();
* QueryResponse resp = client.query(new SolrQuery("*:*"));
* </pre>
*
* @param baseSolrUrl a URL pointing to the root Solr path, typically of the form
* "http[s]://host:port/solr"
* @param client a client for this ConcurrentUpdateHttp2SolrClient to use for all requests
* internally. Callers are responsible for closing the provided client (after closing any
* clients created by this builder)
*/
public Builder(String baseSolrUrl, Http2SolrClient client) {
this(baseSolrUrl, client, false);
}

/**
* Initialize a Builder object, based on the provided arguments.
*
* <p>The provided URL must point to the root Solr path (i.e. "/solr"), for example:
*
* <pre>
* SolrClient client = new ConcurrentUpdateHttp2SolrClient.Builder("http://my-solr-server:8983/solr", http2Client)
* .withDefaultCollection("core1")
* .build();
* QueryResponse resp = client.query(new SolrQuery("*:*"));
* </pre>
*
* @param baseSolrUrl a URL pointing to the root Solr path, typically of the form
* "http[s]://host:port/solr"
* @param client a client for this ConcurrentUpdateHttp2SolrClient to use for all requests
* internally.
* @param closeHttp2Client a boolean flag indicating whether the created
* ConcurrentUpdateHttp2SolrClient should assume responsibility for closing the provided
* 'client'
*/
public Builder(String baseSolrUrl, Http2SolrClient client, boolean closeHttp2Client) {
this.baseSolrUrl = baseSolrUrl;
this.client = client;
Expand Down
Loading

0 comments on commit c5709bd

Please sign in to comment.