Skip to content

Commit eb601b8

Browse files
committed
idle timeouts should cause servers to be added to the zombie list
1 parent b00dfc4 commit eb601b8

File tree

4 files changed

+93
-2
lines changed

4 files changed

+93
-2
lines changed
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Add unresponsive servers to zombie list
3+
type: fixed # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: James Vanneman
6+
links:
7+
- name: SOLR-18002
8+
url: https://issues.apache.org/jira/browse/SOLR-18002

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBHttp2SolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import java.util.Set;
2525
import java.util.concurrent.CompletableFuture;
2626
import java.util.concurrent.TimeUnit;
27+
import java.util.concurrent.TimeoutException;
2728
import java.util.concurrent.atomic.AtomicReference;
2829
import org.apache.solr.client.solrj.ResponseParser;
2930
import org.apache.solr.client.solrj.SolrClient;
@@ -282,7 +283,7 @@ private void onFailedRequest(
282283
}
283284
} catch (SolrServerException e) {
284285
Throwable rootCause = e.getRootCause();
285-
if (!isNonRetryable && rootCause instanceof IOException) {
286+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
286287
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);
287288
} else if (isNonRetryable && rootCause instanceof ConnectException) {
288289
listener.onFailure((!isZombie) ? makeServerAZombie(endpoint, e) : e, true);

solr/solrj/src/java/org/apache/solr/client/solrj/impl/LBSolrClient.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import java.util.concurrent.Executors;
3939
import java.util.concurrent.ScheduledExecutorService;
4040
import java.util.concurrent.TimeUnit;
41+
import java.util.concurrent.TimeoutException;
4142
import java.util.concurrent.atomic.AtomicInteger;
4243
import java.util.stream.Collectors;
4344
import org.apache.solr.client.solrj.ResponseParser;
@@ -554,7 +555,7 @@ protected Exception doRequest(
554555
}
555556
} catch (SolrServerException e) {
556557
Throwable rootCause = e.getRootCause();
557-
if (!isNonRetryable && rootCause instanceof IOException) {
558+
if (!isNonRetryable && (rootCause instanceof IOException || rootCause instanceof TimeoutException)) {
558559
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;
559560
} else if (isNonRetryable && rootCause instanceof ConnectException) {
560561
ex = (!isZombie) ? makeServerAZombie(baseUrl, e) : e;

solr/solrj/src/test/org/apache/solr/client/solrj/impl/LBHttp2SolrClientIntegrationTest.java

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.IOException;
2020
import java.io.UncheckedIOException;
2121
import java.lang.invoke.MethodHandles;
22+
import java.net.ServerSocket;
2223
import java.nio.file.Files;
2324
import java.nio.file.Path;
2425
import java.util.ArrayList;
@@ -32,6 +33,7 @@
3233
import org.apache.solr.client.solrj.SolrClient;
3334
import org.apache.solr.client.solrj.SolrQuery;
3435
import org.apache.solr.client.solrj.SolrServerException;
36+
import org.apache.solr.client.solrj.request.QueryRequest;
3537
import org.apache.solr.client.solrj.response.QueryResponse;
3638
import org.apache.solr.client.solrj.response.SolrResponseBase;
3739
import org.apache.solr.common.SolrInputDocument;
@@ -206,6 +208,29 @@ public void testTwoServers() throws Exception {
206208
}
207209
}
208210

211+
public void testTimeoutExceptionMarksServerAsZombie() throws Exception {
212+
try (ZombieTestContext ctx = new ZombieTestContext()) {
213+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
214+
215+
try {
216+
ctx.lbClient.request(lbReq);
217+
} catch (Exception e) {
218+
}
219+
220+
ctx.assertZombieState();
221+
}
222+
}
223+
224+
public void testTimeoutExceptionMarksServerAsZombieAsyncRequest() throws Exception {
225+
try (ZombieTestContext ctx = new ZombieTestContext()) {
226+
LBSolrClient.Req lbReq = ctx.createQueryRequest();
227+
228+
ctx.lbClient.requestAsync(lbReq).exceptionally(e -> null).get();
229+
230+
ctx.assertZombieState();
231+
}
232+
}
233+
209234
private LBSolrClient.Endpoint[] bootstrapBaseSolrEndpoints(int max) {
210235
LBSolrClient.Endpoint[] solrUrls = new LBSolrClient.Endpoint[max];
211236
for (int i = 0; i < max; i++) {
@@ -334,4 +359,60 @@ public void close() {
334359
}
335360
}
336361
}
362+
363+
private class ZombieTestContext implements AutoCloseable {
364+
final ServerSocket blackhole;
365+
final LBSolrClient.Endpoint nonRoutableEndpoint;
366+
final Http2SolrClient delegateClient;
367+
final LBHttp2SolrClient<?> lbClient;
368+
369+
ZombieTestContext() throws Exception {
370+
//create a socket that allows a client to connect but causes them to hang until idleTimeout is triggered
371+
blackhole = new ServerSocket(0);
372+
int blackholePort = blackhole.getLocalPort();
373+
nonRoutableEndpoint =
374+
new LBSolrClient.Endpoint("http://localhost:" + blackholePort + "/solr");
375+
376+
delegateClient =
377+
new Http2SolrClient.Builder()
378+
.withConnectionTimeout(1000, TimeUnit.MILLISECONDS)
379+
.withIdleTimeout(100, TimeUnit.MILLISECONDS)
380+
.build();
381+
382+
lbClient =
383+
new LBHttp2SolrClient.Builder<>(delegateClient)
384+
.setAliveCheckInterval(500, TimeUnit.MILLISECONDS)
385+
.build();
386+
}
387+
388+
LBSolrClient.Req createQueryRequest() {
389+
SolrQuery solrQuery = new SolrQuery("*:*");
390+
QueryRequest queryRequest = new QueryRequest(solrQuery);
391+
392+
List<LBSolrClient.Endpoint> endpoints =
393+
List.of(
394+
new LBSolrClient.Endpoint(
395+
nonRoutableEndpoint.getBaseUrl(), solr[0].getDefaultCollection())
396+
);
397+
return new LBSolrClient.Req(queryRequest, endpoints);
398+
}
399+
400+
void assertZombieState() {
401+
assertTrue(
402+
"Non-routable endpoint should be marked as zombie due to timeout",
403+
lbClient.zombieServers.containsKey(
404+
nonRoutableEndpoint.getBaseUrl() + "/" + solr[0].getDefaultCollection()));
405+
}
406+
407+
@Override
408+
public void close() {
409+
lbClient.close();
410+
delegateClient.close();
411+
try {
412+
blackhole.close();
413+
} catch (IOException ioe) {
414+
415+
}
416+
}
417+
}
337418
}

0 commit comments

Comments
 (0)