Skip to content

Commit 12410e4

Browse files
committed
Tune solr caching performance
1 parent 5d13f22 commit 12410e4

File tree

2 files changed

+81
-5
lines changed

2 files changed

+81
-5
lines changed

conf/solr/conf/solrconfig.xml

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,9 @@
388388
to occupy. Note that when this option is specified, the size
389389
and initialSize parameters are ignored.
390390
-->
391-
<filterCache size="512"
392-
initialSize="512"
393-
autowarmCount="128"/>
391+
<filterCache size="2048"
392+
initialSize="2048"
393+
autowarmCount="1024"/>
394394

395395
<!-- Query Result Cache
396396
@@ -537,14 +537,31 @@
537537
<arr name="queries">
538538
<!-- Work search -->
539539
<lst>
540-
<str name="userWorkQuery">harry potter</str>
541-
<str name="q">({!edismax q.op="AND" qf="text alternative_title^20 author_name^20" bf="min(100,edition_count)" v=$userWorkQuery})</str>
542540
<str name="fq">type:work</str>
541+
<str name="start">0</str>
543542
<str name="rows">20</str>
543+
<str name="wt">json</str>
544+
<str name="spellcheck">true</str>
545+
<str name="spellcheck.count">3</str>
546+
<str name="fl">want_to_read_count,author_name,ebook_access,ratings_average,key,first_publish_year,public_scan_b,id_wikisource,id_project_runeberg,editions:[subquery],cover_edition_key,cover_i,ratings_count,language,author_key,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,ia</str>
547+
<str name="userWorkQuery">harry potter</str>
548+
<str name="editions.fq">type:edition</str>
549+
<str name="userEdQuery">harry potter</str>
550+
<str name="editions.userEdQuery">harry potter</str>
551+
<str name="fullEdQuery">({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
552+
<str name="q">+({!edismax q.op="AND" qf="text alternative_title^10 author_name^10" pf="alternative_title^10 author_name^10" bf="min(100,edition_count) min(100,def(readinglog_count,0))" v=$userWorkQuery}) +(_query_:"{!parent which=type:work v=$fullEdQuery filters=$editions.fq}" OR edition_count:0)</str>
553+
<str name="hl">true</str>
554+
<str name="hl.fl">subject</str>
555+
<str name="hl.q">harry potter</str>
556+
<str name="hl.snippets">10</str>
557+
<str name="editions.q">({!terms f=_root_ v=$row.key}) AND ({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
558+
<str name="editions.rows">1</str>
559+
<str name="editions.fl">want_to_read_count,ebook_access,ratings_average,key,first_publish_year,id_project_runeberg,cover_edition_key,ia,cover_i,ratings_count,language,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,public_scan_b,id_wikisource</str>
544560
<str name="facet">true</str>
545561
<str name="facet.field">author_facet</str>
546562
<str name="facet.field">first_publish_year</str>
547563
<str name="facet.field">has_fulltext</str>
564+
<str name="facet.field">public_scan_b</str>
548565
<str name="facet.field">language</str>
549566
<str name="facet.field">person_facet</str>
550567
<str name="facet.field">place_facet</str>

openlibrary/plugins/worksearch/tests/test_worksearch.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from pathlib import Path
2+
13
import web
24

35
from openlibrary.plugins.worksearch.code import (
@@ -91,3 +93,60 @@ def test_prepare_solr_query_params_first_publish_year_string():
9193
# Check that the fq param for first_publish_year is correctly added
9294
fq_params = [p for p in params if p[0] == 'fq']
9395
assert ('fq', 'first_publish_year:"1997"') in fq_params
96+
97+
98+
def test_solr_config_contains_realistic_search(mock_site):
99+
import xml.etree.ElementTree as ET
100+
101+
SOLR_CONFIG_PATH = (
102+
Path(__file__).parent.parent.parent.parent.parent
103+
/ "conf"
104+
/ "solr"
105+
/ "conf"
106+
/ "solrconfig.xml"
107+
)
108+
parsed_solr_config = ET.parse(SOLR_CONFIG_PATH)
109+
root = parsed_solr_config.getroot()
110+
# Find listener[event=newSearcher] > arr[name=queries] > lst:firstchild
111+
first_query = root.find(
112+
".//listener[@event='newSearcher']/arr[@name='queries']/lst"
113+
)
114+
assert first_query is not None
115+
# `<lst>` has a child `<str name="q">...</str>`; convert to a list of tuples
116+
new_searcher_query_params = [
117+
(child.attrib['name'], str(child.text)) for child in first_query.findall('str')
118+
]
119+
120+
expected_params, _ = _prepare_solr_query_params(
121+
scheme=WorkSearchScheme(),
122+
param={'q': 'harry potter'},
123+
spellcheck_count=3,
124+
facet=True,
125+
highlight=True,
126+
fields=list(
127+
WorkSearchScheme.default_fetched_fields
128+
| {
129+
'editions',
130+
'providers',
131+
'ratings_average',
132+
'ratings_count',
133+
'want_to_read_count',
134+
}
135+
),
136+
rows=20,
137+
)
138+
139+
def normalize_params(params: list[tuple[str, str]]):
140+
ignored_fields = {'ol.label', 'editions.ol.label'}
141+
sorted_fields = {'fl', 'editions.fl'}
142+
params = [(k, str(v)) for k, v in params if k not in ignored_fields]
143+
params = [
144+
(k, ','.join(sorted(v.split(','))) if k in sorted_fields else v)
145+
for k, v in params
146+
]
147+
return params
148+
149+
new_searcher_query_params = normalize_params(new_searcher_query_params)
150+
expected_params = normalize_params(expected_params)
151+
152+
assert set(new_searcher_query_params) == set(expected_params)

0 commit comments

Comments
 (0)