From 9c6fb29edbb5c95af6aa54f8f2bac3ec36eff37b Mon Sep 17 00:00:00 2001 From: trevor-signal <131492920+trevor-signal@users.noreply.github.com> Date: Mon, 26 Jun 2023 14:25:48 -0400 Subject: [PATCH] Include @mentioned names in search results --- ts/sql/Client.ts | 62 ++-- ts/sql/Interface.ts | 54 ++-- ts/sql/Server.ts | 114 +++++-- ts/sql/migrations/84-all-mentions.ts | 58 ++++ ts/sql/migrations/index.ts | 2 + ts/state/ducks/search.ts | 59 +++- .../util/searchConversationTitles_test.ts | 56 ++++ ts/test-both/util/search_test.ts | 53 ++++ ts/test-electron/sql/fullTextSearch_test.ts | 211 ++++++++++++- ts/test-node/sql_migrations_test.ts | 292 +++++++++++++++++- ts/util/filterAndSortConversations.ts | 28 +- ts/util/fuse.ts | 53 ++++ ts/util/search.ts | 94 ++++++ ts/util/searchConversationTitles.ts | 42 +++ 14 files changed, 1052 insertions(+), 126 deletions(-) create mode 100644 ts/sql/migrations/84-all-mentions.ts create mode 100644 ts/test-both/util/searchConversationTitles_test.ts create mode 100644 ts/test-both/util/search_test.ts create mode 100644 ts/util/fuse.ts create mode 100644 ts/util/searchConversationTitles.ts diff --git a/ts/sql/Client.ts b/ts/sql/Client.ts index 96754f48d..0fb24ad4e 100644 --- a/ts/sql/Client.ts +++ b/ts/sql/Client.ts @@ -57,6 +57,7 @@ import { MINUTE } from '../util/durations'; import { getMessageIdForLogging } from '../util/idForLogging'; import type { MessageAttributesType } from '../model-types'; import { incrementMessageCounter } from '../util/incrementMessageCounter'; +import { generateSnippetAroundMention } from '../util/search'; const ERASE_SQL_KEY = 'erase-sql-key'; const ERASE_ATTACHMENTS_KEY = 'erase-attachments'; @@ -90,7 +91,6 @@ const exclusiveInterface: ClientExclusiveInterface = { removeConversation, searchMessages, - searchMessagesInConversation, getOlderMessagesByConversation, getConversationRangeCenteredOnMessage, @@ -415,36 +415,48 @@ async function removeConversation(id: string): Promise { function handleSearchMessageJSON( messages: Array ): Array { - return messages.map(message => ({ - json: message.json, + return messages.map(message => { + const parsedMessage = JSON.parse(message.json); + assertDev( + message.ftsSnippet ?? typeof message.mentionStart === 'number', + 'Neither ftsSnippet nor matching mention returned from message search' + ); + const snippet = + message.ftsSnippet ?? + generateSnippetAroundMention({ + body: parsedMessage.body, + mentionStart: message.mentionStart ?? 0, + mentionLength: message.mentionLength ?? 1, + }); - // Empty array is a default value. `message.json` has the real field - bodyRanges: [], + return { + json: message.json, - ...JSON.parse(message.json), - snippet: message.snippet, - })); + // Empty array is a default value. `message.json` has the real field + bodyRanges: [], + ...parsedMessage, + snippet, + }; + }); } -async function searchMessages( - query: string, - { limit }: { limit?: number } = {} -): Promise> { - const messages = await channels.searchMessages(query, { limit }); - - return handleSearchMessageJSON(messages); -} - -async function searchMessagesInConversation( - query: string, - conversationId: string, - { limit }: { limit?: number } = {} -): Promise> { - const messages = await channels.searchMessagesInConversation( +async function searchMessages({ + query, + options, + contactUuidsMatchingQuery, + conversationId, +}: { + query: string; + options?: { limit?: number }; + contactUuidsMatchingQuery?: Array; + conversationId?: string; +}): Promise> { + const messages = await channels.searchMessages({ query, conversationId, - { limit } - ); + options, + contactUuidsMatchingQuery, + }); return handleSearchMessageJSON(messages); } diff --git a/ts/sql/Interface.ts b/ts/sql/Interface.ts index b79d6432d..96173ac23 100644 --- a/ts/sql/Interface.ts +++ b/ts/sql/Interface.ts @@ -126,7 +126,14 @@ export type StoredPreKeyType = { export type PreKeyIdType = PreKeyType['id']; export type ServerSearchResultMessageType = { json: string; - snippet: string; + + // If the FTS matches text in message.body, snippet will be populated + ftsSnippet: string | null; + + // Otherwise, a matching mention will be returned + mentionUuid: string | null; + mentionStart: number | null; + mentionLength: number | null; }; export type ClientSearchResultMessageType = MessageType & { json: string; @@ -488,9 +495,6 @@ export type DataInterface = { id: UUIDStringType ) => Promise>; - // searchMessages is JSON on server, full message on Client - // searchMessagesInConversation is JSON on server, full message on Client - getMessageCount: (conversationId?: string) => Promise; getStoryCount: (conversationId: string) => Promise; saveMessage: ( @@ -788,16 +792,17 @@ export type ServerInterface = DataInterface & { updateConversation: (data: ConversationType) => Promise; removeConversation: (id: Array | string) => Promise; - searchMessages: ( - query: string, - options?: { limit?: number } - ) => Promise>; - searchMessagesInConversation: ( - query: string, - conversationId: string, - options?: { limit?: number } - ) => Promise>; - + searchMessages: ({ + query, + conversationId, + options, + contactUuidsMatchingQuery, + }: { + query: string; + conversationId?: string; + options?: { limit?: number }; + contactUuidsMatchingQuery?: Array; + }) => Promise>; getOlderMessagesByConversation: ( options: AdjacentMessagesByConversationOptionsType ) => Promise>; @@ -868,16 +873,17 @@ export type ClientExclusiveInterface = { updateConversation: (data: ConversationType) => void; removeConversation: (id: string) => Promise; - searchMessages: ( - query: string, - options?: { limit?: number } - ) => Promise>; - searchMessagesInConversation: ( - query: string, - conversationId: string, - options?: { limit?: number } - ) => Promise>; - + searchMessages: ({ + query, + conversationId, + options, + contactUuidsMatchingQuery, + }: { + query: string; + conversationId?: string; + options?: { limit?: number }; + contactUuidsMatchingQuery?: Array; + }) => Promise>; getOlderMessagesByConversation: ( options: AdjacentMessagesByConversationOptionsType ) => Promise>; diff --git a/ts/sql/Server.ts b/ts/sql/Server.ts index 7c2e8918f..f6912f766 100644 --- a/ts/sql/Server.ts +++ b/ts/sql/Server.ts @@ -135,6 +135,11 @@ import type { GetNearbyMessageFromDeletedSetOptionsType, } from './Interface'; import { SeenStatus } from '../MessageSeenStatus'; +import { + SNIPPET_LEFT_PLACEHOLDER, + SNIPPET_RIGHT_PLACEHOLDER, + SNIPPET_TRUNCATION_PLACEHOLDER, +} from '../util/search'; type ConversationRow = Readonly<{ json: string; @@ -234,7 +239,6 @@ const dataInterface: ServerInterface = { getAllGroupsInvolvingUuid, searchMessages, - searchMessagesInConversation, getMessageCount, getStoryCount, @@ -1587,11 +1591,18 @@ async function getAllGroupsInvolvingUuid( return rows.map(row => rowToConversation(row)); } -async function searchMessages( - query: string, - params: { limit?: number; conversationId?: string } = {} -): Promise> { - const { limit = 500, conversationId } = params; +async function searchMessages({ + query, + options, + conversationId, + contactUuidsMatchingQuery, +}: { + query: string; + options?: { limit?: number }; + conversationId?: string; + contactUuidsMatchingQuery?: Array; +}): Promise> { + const { limit = conversationId ? 100 : 500 } = options ?? {}; const db = getInstance(); @@ -1662,24 +1673,70 @@ async function searchMessages( // give us the right results. We can't call `snippet()` in the query above // because it would bloat the temporary table with text data and we want // to keep its size minimal for `ORDER BY` + `LIMIT` to be fast. - const result = db - .prepare( - ` - SELECT - messages.json, - snippet(messages_fts, -1, '<>', '<>', '<>', 10) - AS snippet - FROM tmp_filtered_results - INNER JOIN messages_fts - ON messages_fts.rowid = tmp_filtered_results.rowid - INNER JOIN messages - ON messages.rowid = tmp_filtered_results.rowid - WHERE - messages_fts.body MATCH $query - ORDER BY messages.received_at DESC, messages.sent_at DESC; - ` - ) - .all({ query }); + const ftsFragment = sqlFragment` + SELECT + messages.rowid, + messages.json, + messages.sent_at, + messages.received_at, + snippet(messages_fts, -1, ${SNIPPET_LEFT_PLACEHOLDER}, ${SNIPPET_RIGHT_PLACEHOLDER}, ${SNIPPET_TRUNCATION_PLACEHOLDER}, 10) AS ftsSnippet + FROM tmp_filtered_results + INNER JOIN messages_fts + ON messages_fts.rowid = tmp_filtered_results.rowid + INNER JOIN messages + ON messages.rowid = tmp_filtered_results.rowid + WHERE + messages_fts.body MATCH ${query} + ORDER BY messages.received_at DESC, messages.sent_at DESC + LIMIT ${limit} + `; + + let result: Array; + + if (!contactUuidsMatchingQuery?.length) { + const [sqlQuery, params] = sql`${ftsFragment};`; + result = db.prepare(sqlQuery).all(params); + } else { + // If contactUuidsMatchingQuery is not empty, we due an OUTER JOIN between: + // 1) the messages that mention at least one of contactUuidsMatchingQuery, and + // 2) the messages that match all the search terms via FTS + // + // Note: this groups the results by rowid, so even if one message mentions multiple + // matching UUIDs, we only return one to be highlighted + const [sqlQuery, params] = sql` + SELECT + messages.rowid as rowid, + COALESCE(messages.json, ftsResults.json) as json, + COALESCE(messages.sent_at, ftsResults.sent_at) as sent_at, + COALESCE(messages.received_at, ftsResults.received_at) as received_at, + ftsResults.ftsSnippet, + mentionUuid, + start as mentionStart, + length as mentionLength + FROM mentions + INNER JOIN messages + ON + messages.id = mentions.messageId + AND mentions.mentionUuid IN ( + ${sqlJoin(contactUuidsMatchingQuery, ', ')} + ) + AND ${ + conversationId + ? sqlFragment`messages.conversationId = ${conversationId}` + : '1 IS 1' + } + AND messages.isViewOnce IS NOT 1 + AND messages.storyId IS NULL + FULL OUTER JOIN ( + ${ftsFragment} + ) as ftsResults + USING (rowid) + GROUP BY rowid + ORDER BY received_at DESC, sent_at DESC + LIMIT ${limit}; + `; + result = db.prepare(sqlQuery).all(params); + } db.exec( ` @@ -1687,19 +1744,10 @@ async function searchMessages( DROP TABLE tmp_filtered_results; ` ); - return result; })(); } -async function searchMessagesInConversation( - query: string, - conversationId: string, - { limit = 100 }: { limit?: number } = {} -): Promise> { - return searchMessages(query, { conversationId, limit }); -} - function getMessageCountSync( conversationId?: string, db = getInstance() diff --git a/ts/sql/migrations/84-all-mentions.ts b/ts/sql/migrations/84-all-mentions.ts new file mode 100644 index 000000000..632ff0a06 --- /dev/null +++ b/ts/sql/migrations/84-all-mentions.ts @@ -0,0 +1,58 @@ +// Copyright 2021 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import type { Database } from '@signalapp/better-sqlite3'; +import type { LoggerType } from '../../types/Logging'; + +export default function updateToSchemaVersion84( + currentVersion: number, + db: Database, + logger: LoggerType +): void { + if (currentVersion >= 84) { + return; + } + + db.transaction(() => { + const selectMentionsFromMessages = ` + SELECT messages.id, bodyRanges.value ->> 'mentionUuid' as mentionUuid, bodyRanges.value ->> 'start' as start, bodyRanges.value ->> 'length' as length + FROM messages, json_each(messages.json ->> 'bodyRanges') as bodyRanges + WHERE bodyRanges.value ->> 'mentionUuid' IS NOT NULL + `; + + db.exec(` + DROP TABLE IF EXISTS mentions; + + CREATE TABLE mentions ( + messageId REFERENCES messages(id) ON DELETE CASCADE, + mentionUuid STRING, + start INTEGER, + length INTEGER + ); + + CREATE INDEX mentions_uuid ON mentions (mentionUuid); + + INSERT INTO mentions (messageId, mentionUuid, start, length) + ${selectMentionsFromMessages}; + + CREATE TRIGGER messages_on_insert_insert_mentions AFTER INSERT ON messages + BEGIN + INSERT INTO mentions (messageId, mentionUuid, start, length) + ${selectMentionsFromMessages} + AND messages.id = new.id; + END; + + CREATE TRIGGER messages_on_update_update_mentions AFTER UPDATE ON messages + BEGIN + DELETE FROM mentions WHERE messageId = new.id; + INSERT INTO mentions (messageId, mentionUuid, start, length) + ${selectMentionsFromMessages} + AND messages.id = new.id; + END; + `); + + db.pragma('user_version = 84'); + })(); + + logger.info('updateToSchemaVersion84: success!'); +} diff --git a/ts/sql/migrations/index.ts b/ts/sql/migrations/index.ts index 34993d651..6886b5e93 100644 --- a/ts/sql/migrations/index.ts +++ b/ts/sql/migrations/index.ts @@ -59,6 +59,7 @@ import updateToSchemaVersion80 from './80-edited-messages'; import updateToSchemaVersion81 from './81-contact-removed-notification'; import updateToSchemaVersion82 from './82-edited-messages-read-index'; import updateToSchemaVersion83 from './83-mentions'; +import updateToSchemaVersion84 from './84-all-mentions'; function updateToSchemaVersion1( currentVersion: number, @@ -1987,6 +1988,7 @@ export const SCHEMA_VERSIONS = [ updateToSchemaVersion81, updateToSchemaVersion82, updateToSchemaVersion83, + updateToSchemaVersion84, ]; export function updateSchema(db: Database, logger: LoggerType): void { diff --git a/ts/state/ducks/search.ts b/ts/state/ducks/search.ts index 38d6b1c19..6ee0039a2 100644 --- a/ts/state/ducks/search.ts +++ b/ts/state/ducks/search.ts @@ -19,10 +19,10 @@ import type { ConversationType, ConversationUnloadedActionType, MessageDeletedActionType, - MessageType, RemoveAllConversationsActionType, TargetedConversationChangedActionType, ShowArchivedConversationsActionType, + MessageType, } from './conversations'; import { getQuery, getSearchConversation } from '../selectors/search'; import { getAllConversations } from '../selectors/conversations'; @@ -38,11 +38,10 @@ import { } from './conversations'; import { removeDiacritics } from '../../util/removeDiacritics'; import * as log from '../../logging/log'; +import { searchConversationTitles } from '../../util/searchConversationTitles'; +import { isDirectConversation } from '../../util/whatTypeOfConversation'; -const { - searchMessages: dataSearchMessages, - searchMessagesInConversation, -}: ClientInterface = dataInterface; +const { searchMessages: dataSearchMessages }: ClientInterface = dataInterface; // State @@ -221,11 +220,35 @@ const doSearch = debounce( return; } + // Limit the number of contacts to something reasonable + const MAX_MATCHING_CONTACTS = 100; + void (async () => { + const segmenter = new Intl.Segmenter([], { granularity: 'word' }); + const queryWords = [...segmenter.segment(query)] + .filter(word => word.isWordLike) + .map(word => word.segment); + const contactUuidsMatchingQuery = searchConversationTitles( + allConversations, + queryWords + ) + .filter( + conversation => + isDirectConversation(conversation) && Boolean(conversation.uuid) + ) + .map(conversation => conversation.uuid as string) + .slice(0, MAX_MATCHING_CONTACTS); + + const messages = await queryMessages({ + query, + searchConversationId, + contactUuidsMatchingQuery, + }); + dispatch({ type: 'SEARCH_MESSAGES_RESULTS_FULFILLED', payload: { - messages: await queryMessages(query, searchConversationId), + messages, query, }, }); @@ -255,10 +278,15 @@ const doSearch = debounce( 200 ); -async function queryMessages( - query: string, - searchConversationId?: string -): Promise> { +async function queryMessages({ + query, + searchConversationId, + contactUuidsMatchingQuery, +}: { + query: string; + searchConversationId?: string; + contactUuidsMatchingQuery?: Array; +}): Promise> { try { const normalized = cleanSearchTerm(query); if (normalized.length === 0) { @@ -266,10 +294,17 @@ async function queryMessages( } if (searchConversationId) { - return searchMessagesInConversation(normalized, searchConversationId); + return dataSearchMessages({ + query: normalized, + conversationId: searchConversationId, + contactUuidsMatchingQuery, + }); } - return dataSearchMessages(normalized); + return dataSearchMessages({ + query: normalized, + contactUuidsMatchingQuery, + }); } catch (e) { return []; } diff --git a/ts/test-both/util/searchConversationTitles_test.ts b/ts/test-both/util/searchConversationTitles_test.ts new file mode 100644 index 000000000..2d0e15af7 --- /dev/null +++ b/ts/test-both/util/searchConversationTitles_test.ts @@ -0,0 +1,56 @@ +// Copyright 2023 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { assert } from 'chai'; +import { getDefaultConversation } from '../helpers/getDefaultConversation'; +import { searchConversationTitles } from '../../util/searchConversationTitles'; + +describe('searchContactTitles', () => { + const conversations = [ + getDefaultConversation({ + title: 'Ally Apple', + }), + getDefaultConversation({ + title: 'Betty Banana', + }), + getDefaultConversation({ + title: 'Catty Cantaloupe', + }), + getDefaultConversation({ + title: 'Debby Dancing Date', + }), + ]; + + function assertSearchEquals( + terms: Array, + expectedTitles: Array, + message?: string + ) { + const titles = searchConversationTitles(conversations, terms).map( + contact => contact.title + ); + + assert.sameMembers(titles, expectedTitles, message); + } + + it('matches full name components', () => { + assertSearchEquals(['ally'], ['Ally Apple'], 'first name'); + assertSearchEquals(['apple'], ['Ally Apple'], 'last name'); + assertSearchEquals(['danc'], ['Debby Dancing Date'], 'middle name'); + }); + + it('matches based on name component prefix', () => { + assertSearchEquals(['all'], ['Ally Apple']); + assertSearchEquals(['app'], ['Ally Apple']); + }); + + it('does not return single character matches', () => { + assertSearchEquals(['a'], []); + assertSearchEquals([], []); + }); + + it('only returns prefix matches', () => { + assertSearchEquals(['lly'], []); + assertSearchEquals(['anana'], []); + }); +}); diff --git a/ts/test-both/util/search_test.ts b/ts/test-both/util/search_test.ts new file mode 100644 index 000000000..249b72d4f --- /dev/null +++ b/ts/test-both/util/search_test.ts @@ -0,0 +1,53 @@ +// Copyright 2023 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import { assert } from 'chai'; +import { generateSnippetAroundMention } from '../../util/search'; + +describe('generateSnippetAroundMention', () => { + it('generates snippet around mention at start of body', () => { + const snippet = generateSnippetAroundMention({ + body: ' can you sing that again but in the voice of Mr. Snuffleupagus?', + mentionStart: 0, + mentionLength: 1, + }); + + assert.strictEqual( + snippet, + '<> <> can you sing that again but in the voice of Mr<>' + ); + }); + it('generates snippet around mention in middle of body', () => { + const snippet = generateSnippetAroundMention({ + body: 'Stevie can you sing that again with but in the voice of Mr. Snuffleupagus?', + mentionStart: 36, + mentionLength: 1, + }); + + assert.strictEqual( + snippet, + '<>you sing that again with <> <> but in the voice of Mr<>' + ); + }); + it('generates snippet around mention at end of body', () => { + const snippet = generateSnippetAroundMention({ + body: 'Stevie can you sing that again but in the voice of ', + mentionStart: 51, + mentionLength: 1, + }); + + assert.strictEqual( + snippet, + '<>again but in the voice of <> <>' + ); + }); + it('generates snippet around mention-only body', () => { + const snippet = generateSnippetAroundMention({ + body: ' ', + mentionStart: 0, + mentionLength: 1, + }); + + assert.strictEqual(snippet, '<> <>'); + }); +}); diff --git a/ts/test-electron/sql/fullTextSearch_test.ts b/ts/test-electron/sql/fullTextSearch_test.ts index 22f80ad88..cf6817ed5 100644 --- a/ts/test-electron/sql/fullTextSearch_test.ts +++ b/ts/test-electron/sql/fullTextSearch_test.ts @@ -21,7 +21,7 @@ function getUuid(): UUIDStringType { return UUID.generate().toString(); } -describe('sql/fullTextSearch', () => { +describe('sql/searchMessages', () => { beforeEach(async () => { await removeAll(); }); @@ -67,14 +67,14 @@ describe('sql/fullTextSearch', () => { assert.lengthOf(await _getAllMessages(), 3); - const searchResults = await searchMessages('unique'); + const searchResults = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults, 1); assert.strictEqual(searchResults[0].id, message2.id); message3.body = 'message 3 - unique string'; await saveMessage(message3, { ourUuid }); - const searchResults2 = await searchMessages('unique'); + const searchResults2 = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults2, 2); assert.strictEqual(searchResults2[0].id, message3.id); assert.strictEqual(searchResults2[1].id, message2.id); @@ -123,14 +123,14 @@ describe('sql/fullTextSearch', () => { assert.lengthOf(await _getAllMessages(), 3); - const searchResults = await searchMessages('unique'); + const searchResults = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults, 1); assert.strictEqual(searchResults[0].id, message1.id); message1.body = 'message 3 - unique string'; await saveMessage(message3, { ourUuid }); - const searchResults2 = await searchMessages('unique'); + const searchResults2 = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults2, 1); assert.strictEqual(searchResults2[0].id, message1.id); }); @@ -178,15 +178,212 @@ describe('sql/fullTextSearch', () => { assert.lengthOf(await _getAllMessages(), 3); - const searchResults = await searchMessages('unique'); + const searchResults = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults, 1); assert.strictEqual(searchResults[0].id, message1.id); message1.body = 'message 3 - unique string'; await saveMessage(message3, { ourUuid }); - const searchResults2 = await searchMessages('unique'); + const searchResults2 = await searchMessages({ query: 'unique' }); assert.lengthOf(searchResults2, 1); assert.strictEqual(searchResults2[0].id, message1.id); }); + + it('limits messages returned to a specific conversation if specified', async () => { + assert.lengthOf(await _getAllMessages(), 0); + + const now = Date.now(); + const conversationId = getUuid(); + const otherConversationId = getUuid(); + const ourUuid = getUuid(); + + const message1: MessageAttributesType = { + id: getUuid(), + body: 'message 1 - unique string', + type: 'outgoing', + conversationId, + sent_at: now - 20, + received_at: now - 20, + timestamp: now - 20, + }; + const message2: MessageAttributesType = { + id: getUuid(), + body: 'message 2 - unique string', + type: 'outgoing', + conversationId: otherConversationId, + sent_at: now - 10, + received_at: now - 10, + timestamp: now - 10, + }; + + await saveMessages([message1, message2], { + forceSave: true, + ourUuid, + }); + + assert.lengthOf(await _getAllMessages(), 2); + + const searchResults = await searchMessages({ query: 'unique' }); + assert.lengthOf(searchResults, 2); + + const searchResultsWithConversationId = await searchMessages({ + query: 'unique', + conversationId: otherConversationId, + }); + assert.lengthOf(searchResultsWithConversationId, 1); + assert.strictEqual(searchResultsWithConversationId[0].id, message2.id); + }); +}); + +describe('sql/searchMessages/withMentions', () => { + beforeEach(async () => { + await removeAll(); + }); + const ourUuid = getUuid(); + async function storeMessages( + messageOverrides: Array> + ) { + const now = Date.now(); + const messages: Array = messageOverrides.map( + (overrides, idx) => ({ + id: getUuid(), + body: ' ', + type: 'incoming', + sent_at: now - idx, + received_at: now - idx, + timestamp: now - idx, + conversationId: getUuid(), + ...overrides, + }) + ); + await saveMessages(messages, { + forceSave: true, + ourUuid, + }); + return messages; + } + + it('includes messages with mentions', async () => { + const mentionedUuids = [getUuid(), getUuid()]; + const messages = await storeMessages([ + { + bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }], + }, + { + bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[1] }], + }, + { + bodyRanges: [ + { start: 0, length: 1, mentionUuid: mentionedUuids[0] }, + { start: 1, length: 1, mentionUuid: mentionedUuids[1] }, + ], + }, + {}, + ]); + + const searchResults = await searchMessages({ + query: 'alice', + contactUuidsMatchingQuery: [mentionedUuids[0], getUuid()], + }); + + assert.sameOrderedMembers( + searchResults.map(res => res.id), + [messages[0].id, messages[2].id] + ); + + const searchResultsForMultipleMatchingUuids = await searchMessages({ + query: 'alice', + contactUuidsMatchingQuery: [mentionedUuids[0], mentionedUuids[1]], + }); + + assert.sameOrderedMembers( + searchResultsForMultipleMatchingUuids.map(res => res.id), + // TODO: should only return unique messages + [messages[0].id, messages[1].id, messages[2].id] + ); + }); + + it('includes messages with mentions and those that match the body text', async () => { + const mentionedUuids = [getUuid(), getUuid()]; + const messages = await storeMessages([ + { + body: 'cat', + }, + { + body: 'dog', + bodyRanges: [ + { start: 0, length: 1, mentionUuid: mentionedUuids[0] }, + { start: 1, length: 1, mentionUuid: mentionedUuids[1] }, + ], + }, + { + body: 'dog', + }, + ]); + + const searchResults = await searchMessages({ + query: 'cat', + contactUuidsMatchingQuery: [mentionedUuids[0], getUuid()], + }); + + assert.sameOrderedMembers( + searchResults.map(res => res.id), + [messages[0].id, messages[1].id] + ); + + // check that results get returned in the right order, independent of whether they + // match the mention or the text + const searchResultsForDog = await searchMessages({ + query: 'dog', + contactUuidsMatchingQuery: [mentionedUuids[1], getUuid()], + }); + assert.sameOrderedMembers( + searchResultsForDog.map(res => res.id), + [messages[1].id, messages[2].id] + ); + }); + it('respects conversationId for mention matches', async () => { + const mentionedUuids = [getUuid(), getUuid()]; + const conversationId = getUuid(); + const messages = await storeMessages([ + { + body: 'cat', + conversationId, + }, + { + body: 'dog', + bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }], + conversationId, + }, + { + body: 'dog', + bodyRanges: [{ start: 0, length: 1, mentionUuid: mentionedUuids[0] }], + }, + { + body: 'cat', + }, + ]); + + const searchResults = await searchMessages({ + query: 'cat', + contactUuidsMatchingQuery: [mentionedUuids[0]], + conversationId, + }); + + assert.sameOrderedMembers( + searchResults.map(res => res.id), + [messages[0].id, messages[1].id] + ); + + const searchResultsWithoutConversationid = await searchMessages({ + query: 'cat', + contactUuidsMatchingQuery: [mentionedUuids[0]], + }); + + assert.sameOrderedMembers( + searchResultsWithoutConversationid.map(res => res.id), + [messages[0].id, messages[1].id, messages[2].id, messages[3].id] + ); + }); }); diff --git a/ts/test-node/sql_migrations_test.ts b/ts/test-node/sql_migrations_test.ts index 6df3f3161..202fa4de1 100644 --- a/ts/test-node/sql_migrations_test.ts +++ b/ts/test-node/sql_migrations_test.ts @@ -15,7 +15,9 @@ import { } from '../sql/Server'; import { ReadStatus } from '../messages/MessageReadStatus'; import { SeenStatus } from '../MessageSeenStatus'; -import { sql } from '../sql/util'; +import { objectToJSON, sql, sqlJoin } from '../sql/util'; +import type { MessageType } from '../sql/Interface'; +import { BodyRange } from '../types/BodyRange'; const OUR_UUID = generateGuid(); @@ -3186,4 +3188,292 @@ describe('SQL migrations test', () => { ); }); }); + + describe('updateToSchemaVersion84', () => { + const schemaVersion = 84; + function composeMessage({ + id, + mentions, + boldRanges, + }: { + id?: string; + mentions?: Array; + boldRanges?: Array>; + }) { + const json: Partial = { + id: id ?? generateGuid(), + body: `Message body: ${id}`, + }; + if (mentions) { + json.bodyRanges = mentions.map((mentionUuid, mentionIdx) => ({ + start: mentionIdx, + length: 1, + mentionUuid, + })); + } + + // Add some other body ranges in that are not mentions + if (boldRanges) { + json.bodyRanges = (json.bodyRanges ?? []).concat( + boldRanges.map(([start, length]) => ({ + start, + length, + style: BodyRange.Style.BOLD, + })) + ); + } + return json; + } + + function addMessages( + messages: Array<{ + mentions?: Array; + boldRanges?: Array>; + }> + ) { + const formattedMessages = messages.map(composeMessage); + + db.exec( + ` + INSERT INTO messages + (id, json) + VALUES + ${formattedMessages + .map(message => `('${message.id}', '${objectToJSON(message)}')`) + .join(', ')}; + ` + ); + + assert.equal( + db.prepare('SELECT COUNT(*) FROM messages;').pluck().get(), + messages.length + ); + + return { formattedMessages }; + } + + function getMentions() { + return db + .prepare('SELECT messageId, mentionUuid, start, length FROM mentions;') + .all(); + } + + it('Creates and populates the mentions table with existing mentions', () => { + updateToVersion(schemaVersion - 1); + + const userIds = new Array(5).fill(undefined).map(() => generateGuid()); + const { formattedMessages } = addMessages([ + { mentions: [userIds[0]] }, + { mentions: [userIds[1]], boldRanges: [[1, 1]] }, + { mentions: [userIds[1], userIds[2]] }, + {}, + { boldRanges: [[1, 4]] }, + ]); + + // now create mentions table + updateToVersion(schemaVersion); + + // only the 4 mentions should be included, with multiple rows for multiple mentions + // in a message + const mentions = getMentions(); + + assert.equal(mentions.length, 4); + assert.sameDeepMembers(mentions, [ + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[0], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[1].id, + mentionUuid: userIds[1], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[2].id, + mentionUuid: userIds[1], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[2].id, + mentionUuid: userIds[2], + start: 1, + length: 1, + }, + ]); + }); + + it('Updates mention table when new messages are added', () => { + updateToVersion(schemaVersion); + assert.equal( + db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(), + 0 + ); + + const userIds = new Array(5).fill(undefined).map(() => generateGuid()); + const { formattedMessages } = addMessages([ + { mentions: [userIds[0]] }, + { mentions: [userIds[1]], boldRanges: [[1, 1]] }, + { mentions: [userIds[1], userIds[2]] }, + {}, + { boldRanges: [[1, 4]] }, + ]); + + // the 4 mentions should be included, with multiple rows for multiple mentions in a + // message + const mentions = getMentions(); + + assert.sameDeepMembers(mentions, [ + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[0], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[1].id, + mentionUuid: userIds[1], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[2].id, + mentionUuid: userIds[1], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[2].id, + mentionUuid: userIds[2], + start: 1, + length: 1, + }, + ]); + }); + + it('Removes mentions when messages are deleted', () => { + updateToVersion(schemaVersion); + assert.equal( + db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(), + 0 + ); + + const userIds = new Array(5).fill(undefined).map(() => generateGuid()); + const { formattedMessages } = addMessages([ + { mentions: [userIds[0]] }, + { mentions: [userIds[1], userIds[2]], boldRanges: [[1, 1]] }, + ]); + + assert.equal(getMentions().length, 3); + + // The foreign key ON DELETE CASCADE relationship should delete mentions when the + // referenced message is deleted + db.exec(`DELETE FROM messages WHERE id = '${formattedMessages[1].id}';`); + const mentions = getMentions(); + assert.equal(getMentions().length, 1); + assert.sameDeepMembers(mentions, [ + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[0], + start: 0, + length: 1, + }, + ]); + }); + + it('Updates mentions when messages are updated', () => { + updateToVersion(schemaVersion); + assert.equal( + db.prepare('SELECT COUNT(*) FROM mentions;').pluck().get(), + 0 + ); + + const userIds = new Array(5).fill(undefined).map(() => generateGuid()); + const { formattedMessages } = addMessages([{ mentions: [userIds[0]] }]); + + assert.equal(getMentions().length, 1); + + // update it with 0 mentions + db.prepare( + `UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';` + ).run({ + json: objectToJSON(composeMessage({ id: formattedMessages[0].id })), + }); + assert.equal(getMentions().length, 0); + + // update it with a bold bodyrange + db.prepare( + `UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';` + ).run({ + json: objectToJSON( + composeMessage({ id: formattedMessages[0].id, boldRanges: [[1, 2]] }) + ), + }); + assert.equal(getMentions().length, 0); + + // update it with a three new mentions + db.prepare( + `UPDATE messages SET json = $json WHERE id = '${formattedMessages[0].id}';` + ).run({ + json: objectToJSON( + composeMessage({ + id: formattedMessages[0].id, + mentions: [userIds[2], userIds[3], userIds[4]], + boldRanges: [[1, 2]], + }) + ), + }); + assert.sameDeepMembers(getMentions(), [ + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[2], + start: 0, + length: 1, + }, + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[3], + start: 1, + length: 1, + }, + { + messageId: formattedMessages[0].id, + mentionUuid: userIds[4], + start: 2, + length: 1, + }, + ]); + }); + it('uses the mentionUuid index for searching mentions', () => { + updateToVersion(schemaVersion); + const [query, params] = sql` + EXPLAIN QUERY PLAN + SELECT + messages.rowid, + mentionUuid + FROM mentions + INNER JOIN messages + ON + messages.id = mentions.messageId + AND mentions.mentionUuid IN ( + ${sqlJoin(['a', 'b', 'c'], ', ')} + ) + AND messages.isViewOnce IS NOT 1 + AND messages.storyId IS NULL + + LIMIT 100; + `; + const { detail } = db.prepare(query).get(params); + + assert.notInclude(detail, 'B-TREE'); + assert.notInclude(detail, 'SCAN'); + assert.include( + detail, + 'SEARCH mentions USING INDEX mentions_uuid (mentionUuid=?)' + ); + }); + }); }); diff --git a/ts/util/filterAndSortConversations.ts b/ts/util/filterAndSortConversations.ts index 43b56e0c0..a05eff480 100644 --- a/ts/util/filterAndSortConversations.ts +++ b/ts/util/filterAndSortConversations.ts @@ -1,12 +1,12 @@ // Copyright 2021 Signal Messenger, LLC // SPDX-License-Identifier: AGPL-3.0-only -import Fuse from 'fuse.js'; +import type Fuse from 'fuse.js'; import type { ConversationType } from '../state/ducks/conversations'; import { parseAndFormatPhoneNumber } from './libphonenumberInstance'; import { WEEK } from './durations'; -import { removeDiacritics } from './removeDiacritics'; +import { fuseGetFnRemoveDiacritics, getCachedFuseIndex } from './fuse'; // Fuse.js scores have order of 0.01 const ACTIVE_AT_SCORE_FACTOR = (1 / WEEK) * 0.01; @@ -45,25 +45,9 @@ const FUSE_OPTIONS: Fuse.IFuseOptions = { weight: 0.5, }, ], - getFn: (...args) => { - const text = Fuse.config.getFn(...args); - if (!text) { - return text; - } - - if (typeof text === 'string') { - return removeDiacritics(text); - } - - return text.map(removeDiacritics); - }, + getFn: fuseGetFnRemoveDiacritics, }; -const cachedIndices = new WeakMap< - ReadonlyArray, - Fuse ->(); - type CommandRunnerType = ( conversations: ReadonlyArray, query: string @@ -114,11 +98,7 @@ function searchConversations( extendedSearchTerm += ` | ${phoneNumber.e164}`; } - let index = cachedIndices.get(conversations); - if (!index) { - index = new Fuse(conversations, FUSE_OPTIONS); - cachedIndices.set(conversations, index); - } + const index = getCachedFuseIndex(conversations, FUSE_OPTIONS); return index.search(extendedSearchTerm); } diff --git a/ts/util/fuse.ts b/ts/util/fuse.ts new file mode 100644 index 000000000..625b87ac0 --- /dev/null +++ b/ts/util/fuse.ts @@ -0,0 +1,53 @@ +// Copyright 2023 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import Fuse from 'fuse.js'; + +import { removeDiacritics } from './removeDiacritics'; + +const cachedIndices: Map< + Fuse.IFuseOptions, + WeakMap, Fuse> +> = new Map(); + +export function getCachedFuseIndex( + list: ReadonlyArray, + options: Fuse.IFuseOptions +): Fuse { + // Helper to retrieve a cached fuse index or create one if needed. Indices are uniquely + // identified by their `options` and the `list` of values being indexed. Both should + // remain referentially static in order to avoid unnecessarily re-indexing + let indicesForOptions = cachedIndices.get( + options as Fuse.IFuseOptions + ); + + if (!indicesForOptions) { + indicesForOptions = new WeakMap(); + cachedIndices.set(options as Fuse.IFuseOptions, indicesForOptions); + } + + let index = indicesForOptions.get(list); + if (!index) { + index = new Fuse(list, options); + indicesForOptions.set(list, index); + } + + // Map's types don't allow us to specify that the type of the value depends on the + // type of the key, so we have to cast it here. + return index as unknown as Fuse; +} + +export const fuseGetFnRemoveDiacritics: Fuse.FuseGetFunction = ( + ...args +) => { + const text = Fuse.config.getFn(...args); + if (!text) { + return text; + } + + if (typeof text === 'string') { + return removeDiacritics(text); + } + + return text.map(removeDiacritics); +}; diff --git a/ts/util/search.ts b/ts/util/search.ts index 89362cf28..8cc43de17 100644 --- a/ts/util/search.ts +++ b/ts/util/search.ts @@ -4,3 +4,97 @@ export const SNIPPET_LEFT_PLACEHOLDER = '<>'; export const SNIPPET_RIGHT_PLACEHOLDER = '<>'; export const SNIPPET_TRUNCATION_PLACEHOLDER = '<>'; + +/** + * Generate a snippet suitable for rendering search results, in the style returned from + * FTS's snippet() function. + * + * @param approxSnippetLength - If generating a snippet from a mention, the approximate + * length of snippet (not including any hydrated mentions that might occur when rendering) + * @param maxCharsBeforeHighlight - Max chars to show before the highlight, to ensure the + * highlight is visible even at narrow search result pane widths + * + * If generating a snippet from a mention, will not truncate in the middle of a word. + * + * @returns Return a snippet suitable for rendering search results, e.g. + * `<>some text with a <>highlight<>.` + */ +export function generateSnippetAroundMention({ + body, + mentionStart, + mentionLength = 1, + approxSnippetLength = 50, + maxCharsBeforeHighlight = 30, +}: { + body: string; + mentionStart: number; + mentionLength: number; + approxSnippetLength?: number; + maxCharsBeforeHighlight?: number; +}): string { + const segmenter = new Intl.Segmenter([], { granularity: 'word' }); + + // Grab a substring of the body around the mention, larger than the desired snippet + const bodyAroundMention = body.substring( + mentionStart - 2 * approxSnippetLength, + mentionStart + mentionLength + 2 * approxSnippetLength + ); + + const words = [...segmenter.segment(bodyAroundMention)].filter( + word => word.isWordLike + ); + + let snippetStartIdx = 0; + let snippetEndIdx = body.length; + + let leftWordIdx = 0; + let rightWordIdx = words.length - 1; + + // Gradually narrow the substring, word by word, until a snippet of appropriate length + // is found + while (leftWordIdx <= rightWordIdx) { + const leftWord = words[leftWordIdx]; + const rightWord = words[rightWordIdx]; + + snippetStartIdx = Math.min(leftWord.index, mentionStart); + snippetEndIdx = Math.max( + rightWord.index + rightWord.segment.length, + mentionStart + mentionLength + ); + + const lengthBeforeMention = mentionStart - snippetStartIdx; + const lengthAfterMention = snippetEndIdx - mentionStart - mentionLength; + + if ( + lengthBeforeMention + lengthAfterMention <= approxSnippetLength && + lengthBeforeMention <= maxCharsBeforeHighlight + ) { + break; + } + + if (lengthBeforeMention > maxCharsBeforeHighlight) { + leftWordIdx += 1; + } else if (lengthBeforeMention > lengthAfterMention) { + leftWordIdx += 1; + } else { + rightWordIdx -= 1; + } + } + + const mentionStartInSnippet = mentionStart - snippetStartIdx; + const snippedBody = body.substring(snippetStartIdx, snippetEndIdx); + + const snippedBodyWithPlaceholders = + (snippetStartIdx > 0 ? SNIPPET_TRUNCATION_PLACEHOLDER : '') + + snippedBody.substring(0, mentionStartInSnippet) + + SNIPPET_LEFT_PLACEHOLDER + + snippedBody.substring( + mentionStartInSnippet, + mentionStartInSnippet + mentionLength + ) + + SNIPPET_RIGHT_PLACEHOLDER + + snippedBody.substring(mentionStartInSnippet + mentionLength) + + (snippetEndIdx < body.length ? SNIPPET_TRUNCATION_PLACEHOLDER : ''); + + return snippedBodyWithPlaceholders; +} diff --git a/ts/util/searchConversationTitles.ts b/ts/util/searchConversationTitles.ts new file mode 100644 index 000000000..1d2477f2e --- /dev/null +++ b/ts/util/searchConversationTitles.ts @@ -0,0 +1,42 @@ +// Copyright 2023 Signal Messenger, LLC +// SPDX-License-Identifier: AGPL-3.0-only + +import type Fuse from 'fuse.js'; + +import type { ConversationType } from '../state/ducks/conversations'; +import { fuseGetFnRemoveDiacritics, getCachedFuseIndex } from './fuse'; + +const CONVERSATION_TITLE = 'title'; +const MIN_SEARCH_TERM_LENGTH = 2; +const segmenter = new Intl.Segmenter([], { granularity: 'word' }); +const FUSE_OPTIONS: Fuse.IFuseOptions = { + keys: [CONVERSATION_TITLE], + getFn: (...args) => { + const text = fuseGetFnRemoveDiacritics(...args); + return [ + ...segmenter.segment(typeof text === 'string' ? text : text.join(' ')), + ].map(word => word.segment); + }, + isCaseSensitive: false, + includeScore: false, + shouldSort: true, + // Setting location, distance, and threshold to zero returns only exact prefix matches + // i.e. matches that start at index 0 and where every character matches the query + location: 0, + distance: 0, + threshold: 0, +}; + +export function searchConversationTitles( + conversations: ReadonlyArray, + searchTerms: Array +): Array { + // Searches all conversation titles where + const index = getCachedFuseIndex(conversations, FUSE_OPTIONS); + const searchQuery: Fuse.Expression = { + $or: searchTerms + .filter(term => term.length >= MIN_SEARCH_TERM_LENGTH) + .map(term => ({ [CONVERSATION_TITLE]: term })), + }; + return index.search(searchQuery).map(result => result.item); +}