Return the search terms as search highlights for SQLite instead of nothing (#17000)

Fixes https://github.com/element-hq/synapse/issues/16999 and
https://github.com/element-hq/element-android/pull/8729 by returning the
search terms as search highlights.
This commit is contained in:
Melvyn Laïly 2024-04-26 10:43:52 +02:00 committed by GitHub
parent 9985aa6821
commit 59710437e4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 31 additions and 14 deletions

1
changelog.d/17000.bugfix Normal file
View file

@ -0,0 +1 @@
Fixed search feature of Element Android on homesevers using SQLite by returning search terms as search highlights.

View file

@ -470,6 +470,8 @@ class SearchStore(SearchBackgroundUpdateStore):
count_args = args count_args = args
count_clauses = clauses count_clauses = clauses
sqlite_highlights: List[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
search_query = search_term search_query = search_term
sql = """ sql = """
@ -486,7 +488,7 @@ class SearchStore(SearchBackgroundUpdateStore):
""" """
count_args = [search_query] + count_args count_args = [search_query] + count_args
elif isinstance(self.database_engine, Sqlite3Engine): elif isinstance(self.database_engine, Sqlite3Engine):
search_query = _parse_query_for_sqlite(search_term) search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
sql = """ sql = """
SELECT rank(matchinfo(event_search)) as rank, room_id, event_id SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
@ -531,9 +533,11 @@ class SearchStore(SearchBackgroundUpdateStore):
event_map = {ev.event_id: ev for ev in events} event_map = {ev.event_id: ev for ev in events}
highlights = None highlights: Collection[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(search_query, events) highlights = await self._find_highlights_in_postgres(search_query, events)
else:
highlights = sqlite_highlights
count_sql += " GROUP BY room_id" count_sql += " GROUP BY room_id"
@ -597,6 +601,8 @@ class SearchStore(SearchBackgroundUpdateStore):
count_args = list(args) count_args = list(args)
count_clauses = list(clauses) count_clauses = list(clauses)
sqlite_highlights: List[str] = []
if pagination_token: if pagination_token:
try: try:
origin_server_ts_str, stream_str = pagination_token.split(",") origin_server_ts_str, stream_str = pagination_token.split(",")
@ -647,7 +653,7 @@ class SearchStore(SearchBackgroundUpdateStore):
CROSS JOIN events USING (event_id) CROSS JOIN events USING (event_id)
WHERE WHERE
""" """
search_query = _parse_query_for_sqlite(search_term) search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
args = [search_query] + args args = [search_query] + args
count_sql = """ count_sql = """
@ -694,9 +700,11 @@ class SearchStore(SearchBackgroundUpdateStore):
event_map = {ev.event_id: ev for ev in events} event_map = {ev.event_id: ev for ev in events}
highlights = None highlights: Collection[str] = []
if isinstance(self.database_engine, PostgresEngine): if isinstance(self.database_engine, PostgresEngine):
highlights = await self._find_highlights_in_postgres(search_query, events) highlights = await self._find_highlights_in_postgres(search_query, events)
else:
highlights = sqlite_highlights
count_sql += " GROUP BY room_id" count_sql += " GROUP BY room_id"
@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList:
return tokens return tokens
def _tokens_to_sqlite_match_query(tokens: TokenList) -> str: def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]:
""" """
Convert the list of tokens to a string suitable for passing to sqlite's MATCH. Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
Assume sqlite was compiled with enhanced query syntax. Assume sqlite was compiled with enhanced query syntax.
Returns the sqlite-formatted query string and the tokenized search terms
that can be used as highlights.
Ref: https://www.sqlite.org/fts3.html#full_text_index_queries Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
""" """
match_query = [] match_query = []
highlights = []
for token in tokens: for token in tokens:
if isinstance(token, str): if isinstance(token, str):
match_query.append(token) match_query.append(token)
highlights.append(token)
elif isinstance(token, Phrase): elif isinstance(token, Phrase):
match_query.append('"' + " ".join(token.phrase) + '"') match_query.append('"' + " ".join(token.phrase) + '"')
highlights.append(" ".join(token.phrase))
elif token == SearchToken.Not: elif token == SearchToken.Not:
# TODO: SQLite treats NOT as a *binary* operator. Hopefully a search # TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
# term has already been added before this. # term has already been added before this.
@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
else: else:
raise ValueError(f"unknown token {token}") raise ValueError(f"unknown token {token}")
return "".join(match_query) return "".join(match_query), highlights
def _parse_query_for_sqlite(search_term: str) -> str: def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]:
"""Takes a plain unicode string from the user and converts it into a form """Takes a plain unicode string from the user and converts it into a form
that can be passed to sqllite's matchinfo(). that can be passed to sqllite's matchinfo().
Returns the converted query string and the tokenized search terms
that can be used as highlights.
""" """
return _tokens_to_sqlite_match_query(_tokenize_query(search_term)) return _tokens_to_sqlite_match_query(_tokenize_query(search_term))

View file

@ -71,17 +71,16 @@ class EventSearchInsertionTest(HomeserverTestCase):
store.search_msgs([room_id], "hi bob", ["content.body"]) store.search_msgs([room_id], "hi bob", ["content.body"])
) )
self.assertEqual(result.get("count"), 1) self.assertEqual(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine): self.assertIn("hi", result.get("highlights"))
self.assertIn("hi", result.get("highlights")) self.assertIn("bob", result.get("highlights"))
self.assertIn("bob", result.get("highlights"))
# Check that search works for an unrelated message # Check that search works for an unrelated message
result = self.get_success( result = self.get_success(
store.search_msgs([room_id], "another", ["content.body"]) store.search_msgs([room_id], "another", ["content.body"])
) )
self.assertEqual(result.get("count"), 1) self.assertEqual(result.get("count"), 1)
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("another", result.get("highlights")) self.assertIn("another", result.get("highlights"))
# Check that search works for a search term that overlaps with the message # Check that search works for a search term that overlaps with the message
# containing a null byte and an unrelated message. # containing a null byte and an unrelated message.
@ -90,8 +89,8 @@ class EventSearchInsertionTest(HomeserverTestCase):
result = self.get_success( result = self.get_success(
store.search_msgs([room_id], "hi alice", ["content.body"]) store.search_msgs([room_id], "hi alice", ["content.body"])
) )
if isinstance(store.database_engine, PostgresEngine):
self.assertIn("alice", result.get("highlights")) self.assertIn("alice", result.get("highlights"))
def test_non_string(self) -> None: def test_non_string(self) -> None:
"""Test that non-string `value`s are not inserted into `event_search`. """Test that non-string `value`s are not inserted into `event_search`.