Skip to content

Commit 616c7d6

Browse files
SOLR-17984: Create a Solr MergePolicyFactory for MergeOnFlushMergePolicy (#3848)
1 parent 9088feb commit 616c7d6

File tree

6 files changed

+170
-2
lines changed

6 files changed

+170
-2
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
2+
title: Enable MergeOnFlushMergePolicy in Solr
3+
type: added # added, changed, fixed, deprecated, removed, dependency_update, security, other
4+
authors:
5+
- name: Houston Putman
6+
nick: HoustonPutman
7+
url: https://home.apache.org/phonebook.html?uid=houston
8+
links:
9+
- name: SOLR-17984
10+
url: https://issues.apache.org/jira/browse/SOLR-17984

solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@ public class SolrResourceLoader
111111
"security.cert.",
112112
"handler.sql.",
113113
"crossdc.handler.",
114-
"crossdc.update.processor."
114+
"crossdc.update.processor.",
115+
"index."
115116
};
116117
private static final Charset UTF_8 = StandardCharsets.UTF_8;
117118
public static final String SOLR_RESOURCELOADING_RESTRICTED_ENABLED_PARAM =
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
package org.apache.solr.index;
18+
19+
import org.apache.lucene.index.MergePolicy;
20+
import org.apache.lucene.sandbox.index.MergeOnFlushMergePolicy;
21+
import org.apache.solr.core.SolrResourceLoader;
22+
import org.apache.solr.schema.IndexSchema;
23+
24+
/** A {@link MergePolicyFactory} for {@code SortingMergePolicy} objects. */
25+
public class MergeOnFlushMergePolicyFactory extends WrapperMergePolicyFactory {
26+
27+
private static final String SSTMB = "smallSegmentThresholdMB";
28+
29+
protected final Double smallSegmentThresholdMB;
30+
31+
public MergeOnFlushMergePolicyFactory(
32+
SolrResourceLoader resourceLoader, MergePolicyFactoryArgs args, IndexSchema schema) {
33+
super(resourceLoader, args, schema);
34+
final String smallSegmentThresholdMBArg = (String) args.remove(SSTMB);
35+
if (smallSegmentThresholdMBArg == null) {
36+
this.smallSegmentThresholdMB = null;
37+
} else {
38+
this.smallSegmentThresholdMB = Double.parseDouble(smallSegmentThresholdMBArg);
39+
}
40+
}
41+
42+
@Override
43+
protected MergePolicy getMergePolicyInstance(MergePolicy wrappedMP) {
44+
final MergeOnFlushMergePolicy mp = new MergeOnFlushMergePolicy(wrappedMP);
45+
if (smallSegmentThresholdMB != null) {
46+
mp.setSmallSegmentThresholdMB(smallSegmentThresholdMB);
47+
}
48+
return mp;
49+
}
50+
}
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
<?xml version="1.0" ?>
2+
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
20+
<config>
21+
<luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
22+
<directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.MockDirectoryFactory}"/>
23+
<schemaFactory class="ClassicIndexSchemaFactory"/>
24+
25+
<indexConfig>
26+
<mergePolicyFactory class="org.apache.solr.index.SortingMergePolicyFactory">
27+
<str name="sort">${mergePolicySort:timestamp_i_dvo desc}</str>
28+
<str name="wrapped.prefix">in</str>
29+
<str name="in.class">solr.MergeOnFlushMergePolicyFactory</str>
30+
<str name="in.smallSegmentThresholdMB">10</str>
31+
<str name="in.wrapped.prefix">in</str>
32+
<str name="in.in.class">org.apache.solr.util.RandomForceMergePolicyFactory</str>
33+
</mergePolicyFactory>
34+
<lockType>${solr.tests.lockType:single}</lockType>
35+
</indexConfig>
36+
37+
<requestHandler name="/select" class="solr.SearchHandler" />
38+
39+
<updateHandler class="solr.DirectUpdateHandler2">
40+
<updateLog>
41+
<str name="dir">${solr.ulog.dir:}</str>
42+
</updateLog>
43+
44+
<autoCommit>
45+
<maxTime>${solr.autoCommit.maxTime:-1}</maxTime>
46+
<openSearcher>false</openSearcher>
47+
</autoCommit>
48+
49+
<autoSoftCommit>
50+
<maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
51+
</autoSoftCommit>
52+
</updateHandler>
53+
<initParams path="/select">
54+
<lst name="defaults">
55+
<str name="df">text</str>
56+
</lst>
57+
</initParams>
58+
59+
</config>

solr/core/src/test/org/apache/solr/update/SolrIndexConfigTest.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.lucene.index.SimpleMergedSegmentWarmer;
2626
import org.apache.lucene.index.TieredMergePolicy;
2727
import org.apache.lucene.misc.index.BPReorderingMergePolicy;
28+
import org.apache.lucene.sandbox.index.MergeOnFlushMergePolicy;
2829
import org.apache.lucene.search.Sort;
2930
import org.apache.lucene.search.SortField;
3031
import org.apache.solr.SolrTestCaseJ4;
@@ -36,6 +37,7 @@
3637
import org.apache.solr.index.SortingMergePolicy;
3738
import org.apache.solr.schema.IndexSchema;
3839
import org.apache.solr.schema.IndexSchemaFactory;
40+
import org.apache.solr.util.RandomForceMergePolicy;
3941
import org.junit.After;
4042
import org.junit.BeforeClass;
4143
import org.junit.Test;
@@ -58,6 +60,8 @@ public class SolrIndexConfigTest extends SolrTestCaseJ4 {
5860
"solrconfig-sortingmergepolicyfactory.xml";
5961
private static final String solrConfigFileNameBPReorderingMergePolicyFactory =
6062
"solrconfig-bpreorderingmergepolicyfactory.xml";
63+
private static final String solrConfigFileNameMergeOnFlushMergePolicyFactory =
64+
"solrconfig-mergeonflushmergepolicyfactory.xml";
6165
private static final String schemaFileName = "schema.xml";
6266

6367
private static boolean compoundMergePolicySort = false;
@@ -172,6 +176,47 @@ public void testSortingMPSolrIndexConfigCreation() throws Exception {
172176
assertEquals("SortingMergePolicy.getSort", expected, actual);
173177
}
174178

179+
public void testMergeOnFlushMPSolrIndexConfigCreation() throws Exception {
180+
final SortField sortField1 = new SortField("timestamp_i_dvo", SortField.Type.INT, true);
181+
final SortField sortField2 = new SortField("id", SortField.Type.STRING, false);
182+
sortField2.setMissingValue(SortField.STRING_LAST);
183+
184+
SolrConfig solrConfig =
185+
new SolrConfig(instanceDir, solrConfigFileNameMergeOnFlushMergePolicyFactory);
186+
SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null);
187+
assertNotNull(solrIndexConfig);
188+
IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
189+
190+
h.getCore().setLatestSchema(indexSchema);
191+
IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());
192+
193+
final MergePolicy mergePolicy = iwc.getMergePolicy();
194+
assertNotNull("null mergePolicy", mergePolicy);
195+
assertTrue(
196+
"mergePolicy (" + mergePolicy + ") is not a SortingMergePolicy",
197+
mergePolicy instanceof SortingMergePolicy);
198+
final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) mergePolicy;
199+
200+
MergePolicy firstInnerPolicy = sortingMergePolicy.unwrap();
201+
assertNotNull("null firstInnerMergePolicy", firstInnerPolicy);
202+
assertTrue(
203+
"mergePolicy (" + firstInnerPolicy + ") is not a MergeOnFlushMergePolicy",
204+
firstInnerPolicy instanceof MergeOnFlushMergePolicy);
205+
final MergeOnFlushMergePolicy mergeOnFlushMergePolicy =
206+
(MergeOnFlushMergePolicy) firstInnerPolicy;
207+
assertEquals(
208+
"Wrong maxSegmentThresholdMB for MergeOnFlushMergePolicy",
209+
10,
210+
mergeOnFlushMergePolicy.getSmallSegmentThresholdMB(),
211+
.01);
212+
213+
MergePolicy secondInnerPolicy = mergeOnFlushMergePolicy.unwrap();
214+
assertNotNull("null secondInnerMergePolicy", secondInnerPolicy);
215+
assertTrue(
216+
"mergePolicy (" + secondInnerPolicy + ") is not a RandomForceMergePolicyFactory",
217+
secondInnerPolicy instanceof RandomForceMergePolicy);
218+
}
219+
175220
public void testBPReorderingMPSolrIndexConfigCreation() throws Exception {
176221
SolrConfig solrConfig =
177222
new SolrConfig(instanceDir, solrConfigFileNameBPReorderingMergePolicyFactory);

solr/solr-ref-guide/modules/configuration-guide/pages/index-segments-merging.adoc

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ Defines how merging segments is done.
9696

9797
The default in Solr is to use `TieredMergePolicy`, which merges segments of approximately equal size, subject to an allowed number of segments per tier.
9898

99-
Other policies available are the `LogByteSizeMergePolicy` and `LogDocMergePolicy`.
99+
Other policies available are the `LogByteSizeMergePolicy`, `LogDocMergePolicy` and `MergeOnFlushMergePolicy`.
100100
For more information on these policies, please see {lucene-javadocs}/core/org/apache/lucene/index/MergePolicy.html[the MergePolicy javadocs].
101101

102102
[source,xml]
@@ -134,6 +134,9 @@ Having fewer segments in the index generally accelerates searches, because there
134134
It also can also result in fewer physical files on disk.
135135
But to keep the number of segments low, merges will occur more often, which can add load to the system and slow down updates to the index.
136136

137+
`MergeOnFlushMergePolicy` allows users to ensure that all segments are larger than a specified `smallSegmentThresholdMB` option (default `100`).
138+
When using this merge policy, all segments smaller than the given threshold will be merged during a commit.
139+
137140
Conversely, keeping more segments can accelerate indexing, because merges happen less often, making an update is less likely to trigger a merge.
138141
But searches become more computationally expensive and will likely be slower, because search terms must be looked up in more index segments.
139142
Faster index updates also means shorter commit turnaround times, which means more timely search results.

0 commit comments

Comments
 (0)