Skip to content

Commit c2f7f55

Browse files
committed
Tune solr caching performance
1 parent 7bf4617 commit c2f7f55

File tree

2 files changed

+83
-5
lines changed

2 files changed

+83
-5
lines changed

conf/solr/conf/solrconfig.xml

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,9 @@
388388
to occupy. Note that when this option is specified, the size
389389
and initialSize parameters are ignored.
390390
-->
391-
<filterCache size="512"
392-
initialSize="512"
393-
autowarmCount="128"/>
391+
<filterCache size="2048"
392+
initialSize="2048"
393+
autowarmCount="1024"/>
394394

395395
<!-- Query Result Cache
396396
@@ -537,14 +537,31 @@
537537
<arr name="queries">
538538
<!-- Work search -->
539539
<lst>
540-
<str name="userWorkQuery">harry potter</str>
541-
<str name="q">({!edismax q.op="AND" qf="text alternative_title^20 author_name^20" bf="min(100,edition_count)" v=$userWorkQuery})</str>
542540
<str name="fq">type:work</str>
541+
<str name="start">0</str>
543542
<str name="rows">20</str>
543+
<str name="wt">json</str>
544+
<str name="spellcheck">true</str>
545+
<str name="spellcheck.count">3</str>
546+
<str name="fl">want_to_read_count,author_name,ebook_access,ratings_average,key,first_publish_year,public_scan_b,id_wikisource,id_project_runeberg,editions:[subquery],cover_edition_key,cover_i,ratings_count,language,author_key,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,ia</str>
547+
<str name="userWorkQuery">harry potter</str>
548+
<str name="editions.fq">type:edition</str>
549+
<str name="userEdQuery">harry potter</str>
550+
<str name="editions.userEdQuery">harry potter</str>
551+
<str name="fullEdQuery">({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
552+
<str name="q">+({!edismax q.op="AND" qf="text alternative_title^10 author_name^10" pf="alternative_title^10 author_name^10" bf="min(100,edition_count) min(100,def(readinglog_count,0))" v=$userWorkQuery}) +(_query_:"{!parent which=type:work v=$fullEdQuery filters=$editions.fq}" OR edition_count:0)</str>
553+
<str name="hl">true</str>
554+
<str name="hl.fl">subject</str>
555+
<str name="hl.q">harry potter</str>
556+
<str name="hl.snippets">10</str>
557+
<str name="editions.q">({!terms f=_root_ v=$row.key}) AND ({!edismax bq="language:eng^40 ebook_access:public^10 ebook_access:borrowable^8 ebook_access:printdisabled^2 cover_i:*^2" v=$userEdQuery qf="text alternative_title^4 author_name^4"})</str>
558+
<str name="editions.rows">1</str>
559+
<str name="editions.fl">want_to_read_count,ebook_access,ratings_average,key,first_publish_year,id_project_runeberg,cover_edition_key,ia,cover_i,ratings_count,language,lending_identifier_s,id_openstax,id_cita_press,id_project_gutenberg,id_librivox,lending_edition_s,subtitle,ia_collection_s,edition_count,has_fulltext,title,id_standard_ebooks,public_scan_b,id_wikisource</str>
544560
<str name="facet">true</str>
545561
<str name="facet.field">author_facet</str>
546562
<str name="facet.field">first_publish_year</str>
547563
<str name="facet.field">has_fulltext</str>
564+
<str name="facet.field">public_scan_b</str>
548565
<str name="facet.field">language</str>
549566
<str name="facet.field">person_facet</str>
550567
<str name="facet.field">place_facet</str>

openlibrary/plugins/worksearch/tests/test_worksearch.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1+
from pathlib import Path
2+
13
import web
24

35
from openlibrary.plugins.worksearch.code import (
6+
_prepare_solr_query_params,
47
get_doc,
58
process_facet,
69
)
10+
from openlibrary.plugins.worksearch.schemes.works import WorkSearchScheme
711

812

913
def test_process_facet():
@@ -74,3 +78,60 @@ def test_get_doc():
7478
'want_to_read_count': None,
7579
}
7680
)
81+
82+
83+
def test_solr_config_contains_realistic_search(mock_site):
84+
import xml.etree.ElementTree as ET
85+
86+
SOLR_CONFIG_PATH = (
87+
Path(__file__).parent.parent.parent.parent.parent
88+
/ "conf"
89+
/ "solr"
90+
/ "conf"
91+
/ "solrconfig.xml"
92+
)
93+
parsed_solr_config = ET.parse(SOLR_CONFIG_PATH)
94+
root = parsed_solr_config.getroot()
95+
# Find listener[event=newSearcher] > arr[name=queries] > lst:firstchild
96+
first_query = root.find(
97+
".//listener[@event='newSearcher']/arr[@name='queries']/lst"
98+
)
99+
assert first_query is not None
100+
# `<lst>` has a child `<str name="q">...</str>`; convert to a list of tuples
101+
new_searcher_query_params = [
102+
(child.attrib['name'], str(child.text)) for child in first_query.findall('str')
103+
]
104+
105+
expected_params, _ = _prepare_solr_query_params(
106+
scheme=WorkSearchScheme(),
107+
param={'q': 'harry potter'},
108+
spellcheck_count=3,
109+
facet=True,
110+
highlight=True,
111+
fields=list(
112+
WorkSearchScheme.default_fetched_fields
113+
| {
114+
'editions',
115+
'providers',
116+
'ratings_average',
117+
'ratings_count',
118+
'want_to_read_count',
119+
}
120+
),
121+
rows=20,
122+
)
123+
124+
def normalize_params(params: list[tuple[str, str]]):
125+
ignored_fields = {'ol.label', 'editions.ol.label'}
126+
sorted_fields = {'fl', 'editions.fl'}
127+
params = [(k, str(v)) for k, v in params if k not in ignored_fields]
128+
params = [
129+
(k, ','.join(sorted(v.split(','))) if k in sorted_fields else v)
130+
for k, v in params
131+
]
132+
return params
133+
134+
new_searcher_query_params = normalize_params(new_searcher_query_params)
135+
expected_params = normalize_params(expected_params)
136+
137+
assert set(new_searcher_query_params) == set(expected_params)

0 commit comments

Comments
 (0)