Skip to content

Commit b981ef2

Browse files
committed
idle timeouts should cause servers to be added to the zombie list
1 parent d794181 commit b981ef2

File tree

4 files changed

+93
-2
lines changed

4 files changed

+93
-2
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Add unresponsive servers to zombie list
3+
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: James Vanneman
6+
links:
7+
- name: SOLR-18002
8+
url: https://issues.apache.org/jira/browse/SOLR-18002

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBAsyncSolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.net.SocketException;
2222
import java.net.SocketTimeoutException;
2323
import java.util.concurrent.CompletableFuture;
24+
import java.util.concurrent.TimeoutException;
2425
import java.util.concurrent.atomic.AtomicReference;
2526
import org.apache.solr.client.solrj.SolrClient;
2627
import org.apache.solr.client.solrj.SolrRequest;
@@ -208,7 +209,7 @@ private void onFailedRequest(
208209
}
209210
} catch (SolrServerException e) {
210211
Throwable rootCause = e.getRootCause();
211-
if (!isNonRetryable && rootCause instanceof IOException) {
212+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
212213
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);
213214
} else if (isNonRetryable && rootCause instanceof ConnectException) {
214215
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.concurrent.Executors;
3939
import java.util.concurrent.ScheduledExecutorService;
4040
import java.util.concurrent.TimeUnit;
41+
import java.util.concurrent.TimeoutException;
4142
import java.util.concurrent.atomic.AtomicInteger;
4243
import java.util.stream.Collectors;
4344
import org.apache.solr.client.solrj.SolrClient;
@@ -663,7 +664,7 @@ protected Exception doRequest(
663664
}
664665
} catch (SolrServerException e) {
665666
Throwable rootCause = e.getRootCause();
666-
if (!isNonRetryable && rootCause instanceof IOException) {
667+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
667668
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;
668669
} else if (isNonRetryable && rootCause instanceof ConnectException) {
669670
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;

solr/solrj/src/test/org/apache/solr/client/solrj/impl/LB2SolrClientTest.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.io.UncheckedIOException;
21+
import java.net.ServerSocket;
2122
import java.nio.file.Files;
2223
import java.nio.file.Path;
2324
import java.util.ArrayList;
@@ -32,6 +33,7 @@
3233
import org.apache.solr.client.solrj.SolrServerException;
3334
import org.apache.solr.client.solrj.jetty.HttpJettySolrClient;
3435
import org.apache.solr.client.solrj.request.SolrQuery;
36+
import org.apache.solr.client.solrj.request.QueryRequest;
3537
import org.apache.solr.client.solrj.response.QueryResponse;
3638
import org.apache.solr.client.solrj.response.SolrResponseBase;
3739
import org.apache.solr.common.SolrInputDocument;
@@ -202,6 +204,29 @@ public void testTwoServers() throws Exception {
202204
}
203205
}
204206

207+
public void testTimeoutExceptionMarksServerAsZombie() throws Exception {
208+
try (ZombieTestContext ctx = new ZombieTestContext()) {
209+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
210+
211+
try {
212+
ctx.lbClient.request(lbReq);
213+
} catch (Exception e) {
214+
}
215+
216+
ctx.assertZombieState();
217+
}
218+
}
219+
220+
public void testTimeoutExceptionMarksServerAsZombieAsyncRequest() throws Exception {
221+
try (ZombieTestContext ctx = new ZombieTestContext()) {
222+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
223+
224+
ctx.lbClient.requestAsync(lbReq).exceptionally(e -> null).get();
225+
226+
ctx.assertZombieState();
227+
}
228+
}
229+
205230
private LBSolrClient.Endpoint[] bootstrapBaseSolrEndpoints(int max) {
206231
LBSolrClient.Endpoint[] solrUrls = new LBSolrClient.Endpoint[max];
207232
for (int i = 0; i < max; i++) {
@@ -330,4 +355,60 @@ public void close() {
330355
}
331356
}
332357
}
358+
359+
private class ZombieTestContext implements AutoCloseable {
360+
final ServerSocket blackhole;
361+
final LBSolrClient.Endpoint nonRoutableEndpoint;
362+
final HttpJettySolrClient delegateClient;
363+
final LBAsyncSolrClient<?> lbClient;
364+
365+
ZombieTestContext() throws Exception {
366+
//create a socket that allows a client to connect but causes them to hang until idleTimeout is triggered
367+
blackhole = new ServerSocket(0);
368+
int blackholePort = blackhole.getLocalPort();
369+
nonRoutableEndpoint =
370+
new LBSolrClient.Endpoint("http://localhost:" + blackholePort + "/solr");
371+
372+
delegateClient =
373+
new HttpJettySolrClient.Builder()
374+
.withConnectionTimeout(1000, TimeUnit.MILLISECONDS)
375+
.withIdleTimeout(100, TimeUnit.MILLISECONDS)
376+
.build();
377+
378+
lbClient =
379+
new HttpJettySolrClient.Builder<>(delegateClient)
380+
.setAliveCheckInterval(500, TimeUnit.MILLISECONDS)
381+
.build();
382+
}
383+
384+
LBSolrClient.Req createQueryRequest() {
385+
SolrQuery solrQuery = new SolrQuery("*:*");
386+
QueryRequest queryRequest = new QueryRequest(solrQuery);
387+
388+
List<LBSolrClient.Endpoint> endpoints =
389+
List.of(
390+
new LBSolrClient.Endpoint(
391+
nonRoutableEndpoint.getBaseUrl(), solr[0].getDefaultCollection())
392+
);
393+
return new LBSolrClient.Req(queryRequest, endpoints);
394+
}
395+
396+
void assertZombieState() {
397+
assertTrue(
398+
"Non-routable endpoint should be marked as zombie due to timeout",
399+
lbClient.zombieServers.containsKey(
400+
nonRoutableEndpoint.getBaseUrl() + "/" + solr[0].getDefaultCollection()));
401+
}
402+
403+
@Override
404+
public void close() {
405+
lbClient.close();
406+
delegateClient.close();
407+
try {
408+
blackhole.close();
409+
} catch (IOException ioe) {
410+
411+
}
412+
}
413+
}
333414
}

0 commit comments

Comments
 (0)