Skip to content

Commit a395c3e

Browse files
committed
idle timeouts should cause servers to be added to the zombie list
1 parent d794181 commit a395c3e

File tree

4 files changed

+91
-2
lines changed

4 files changed

+91
-2
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Add unresponsive servers to zombie list
3+
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: James Vanneman
6+
links:
7+
- name: SOLR-18002
8+
url: https://issues.apache.org/jira/browse/SOLR-18002

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBAsyncSolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.net.SocketException;
2222
import java.net.SocketTimeoutException;
2323
import java.util.concurrent.CompletableFuture;
24+
import java.util.concurrent.TimeoutException;
2425
import java.util.concurrent.atomic.AtomicReference;
2526
import org.apache.solr.client.solrj.SolrClient;
2627
import org.apache.solr.client.solrj.SolrRequest;
@@ -208,7 +209,7 @@ private void onFailedRequest(
208209
}
209210
} catch (SolrServerException e) {
210211
Throwable rootCause = e.getRootCause();
211-
if (!isNonRetryable && rootCause instanceof IOException) {
212+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
212213
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);
213214
} else if (isNonRetryable && rootCause instanceof ConnectException) {
214215
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.concurrent.Executors;
3939
import java.util.concurrent.ScheduledExecutorService;
4040
import java.util.concurrent.TimeUnit;
41+
import java.util.concurrent.TimeoutException;
4142
import java.util.concurrent.atomic.AtomicInteger;
4243
import java.util.stream.Collectors;
4344
import org.apache.solr.client.solrj.SolrClient;
@@ -663,7 +664,7 @@ protected Exception doRequest(
663664
}
664665
} catch (SolrServerException e) {
665666
Throwable rootCause = e.getRootCause();
666-
if (!isNonRetryable && rootCause instanceof IOException) {
667+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
667668
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;
668669
} else if (isNonRetryable && rootCause instanceof ConnectException) {
669670
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;

solr/solrj/src/test/org/apache/solr/client/solrj/impl/LB2SolrClientTest.java

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import java.io.IOException;
2020
import java.io.UncheckedIOException;
21+
import java.net.ServerSocket;
2122
import java.nio.file.Files;
2223
import java.nio.file.Path;
2324
import java.util.ArrayList;
@@ -31,7 +32,9 @@
3132
import org.apache.solr.client.solrj.SolrClient;
3233
import org.apache.solr.client.solrj.SolrServerException;
3334
import org.apache.solr.client.solrj.jetty.HttpJettySolrClient;
35+
import org.apache.solr.client.solrj.jetty.LBJettySolrClient;
3436
import org.apache.solr.client.solrj.request.SolrQuery;
37+
import org.apache.solr.client.solrj.request.QueryRequest;
3538
import org.apache.solr.client.solrj.response.QueryResponse;
3639
import org.apache.solr.client.solrj.response.SolrResponseBase;
3740
import org.apache.solr.common.SolrInputDocument;
@@ -202,6 +205,29 @@ public void testTwoServers() throws Exception {
202205
}
203206
}
204207

208+
public void testTimeoutExceptionMarksServerAsZombie() throws Exception {
209+
try (ZombieTestContext ctx = new ZombieTestContext()) {
210+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
211+
212+
try {
213+
ctx.lbClient.request(lbReq);
214+
} catch (Exception e) {
215+
}
216+
217+
ctx.assertZombieState();
218+
}
219+
}
220+
221+
public void testTimeoutExceptionMarksServerAsZombieAsyncRequest() throws Exception {
222+
try (ZombieTestContext ctx = new ZombieTestContext()) {
223+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
224+
225+
ctx.lbClient.requestAsync(lbReq).exceptionally(e -> null).get();
226+
227+
ctx.assertZombieState();
228+
}
229+
}
230+
205231
private LBSolrClient.Endpoint[] bootstrapBaseSolrEndpoints(int max) {
206232
LBSolrClient.Endpoint[] solrUrls = new LBSolrClient.Endpoint[max];
207233
for (int i = 0; i < max; i++) {
@@ -330,4 +356,57 @@ public void close() {
330356
}
331357
}
332358
}
359+
360+
private class ZombieTestContext implements AutoCloseable {
361+
final ServerSocket blackhole;
362+
final LBSolrClient.Endpoint nonRoutableEndpoint;
363+
final HttpJettySolrClient delegateClient;
364+
final LBAsyncSolrClient lbClient;
365+
366+
ZombieTestContext() throws Exception {
367+
//create a socket that allows a client to connect but causes them to hang until idleTimeout is triggered
368+
blackhole = new ServerSocket(0);
369+
int blackholePort = blackhole.getLocalPort();
370+
nonRoutableEndpoint =
371+
new LBSolrClient.Endpoint("http://localhost:" + blackholePort + "/solr");
372+
373+
delegateClient =
374+
new HttpJettySolrClient.Builder()
375+
.withConnectionTimeout(1000, TimeUnit.MILLISECONDS)
376+
.withIdleTimeout(100, TimeUnit.MILLISECONDS)
377+
.build();
378+
379+
lbClient = new LBJettySolrClient.Builder(delegateClient, nonRoutableEndpoint).build();
380+
}
381+
382+
LBSolrClient.Req createQueryRequest() {
383+
SolrQuery solrQuery = new SolrQuery("*:*");
384+
QueryRequest queryRequest = new QueryRequest(solrQuery);
385+
386+
List<LBSolrClient.Endpoint> endpoints =
387+
List.of(
388+
new LBSolrClient.Endpoint(
389+
nonRoutableEndpoint.getBaseUrl(), solr[0].getDefaultCollection())
390+
);
391+
return new LBSolrClient.Req(queryRequest, endpoints);
392+
}
393+
394+
void assertZombieState() {
395+
assertTrue(
396+
"Non-routable endpoint should be marked as zombie due to timeout",
397+
lbClient.zombieServers.containsKey(
398+
nonRoutableEndpoint.getBaseUrl() + "/" + solr[0].getDefaultCollection()));
399+
}
400+
401+
@Override
402+
public void close() {
403+
lbClient.close();
404+
delegateClient.close();
405+
try {
406+
blackhole.close();
407+
} catch (IOException ioe) {
408+
409+
}
410+
}
411+
}
333412
}

0 commit comments

Comments
 (0)