From af33e28d381d47d6d8451d2a545eff281e4afd04 Mon Sep 17 00:00:00 2001 From: Ionatan Wiznia Date: Mon, 30 Dec 2024 19:34:14 +0100 Subject: [PATCH] Revert "Search suffix tree implementation" --- src/CONST.ts | 3 - .../Search/SearchRouter/SearchRouterList.tsx | 18 +- src/hooks/useFastSearchFromOptions.ts | 113 ---------- src/libs/FastSearch.ts | 167 -------------- src/libs/OptionsListUtils.ts | 115 ++++------ src/libs/SuffixUkkonenTree/index.ts | 211 ------------------ src/libs/SuffixUkkonenTree/utils.ts | 115 ---------- tests/unit/FastSearchTest.ts | 169 -------------- tests/unit/OptionsListUtilsTest.ts | 28 +-- tests/unit/SuffixUkkonenTreeTest.ts | 63 ------ tests/unit/useFastSearchFromOptions.tsx | 49 ---- 11 files changed, 46 insertions(+), 1005 deletions(-) delete mode 100644 src/hooks/useFastSearchFromOptions.ts delete mode 100644 src/libs/FastSearch.ts delete mode 100644 src/libs/SuffixUkkonenTree/index.ts delete mode 100644 src/libs/SuffixUkkonenTree/utils.ts delete mode 100644 tests/unit/FastSearchTest.ts delete mode 100644 tests/unit/SuffixUkkonenTreeTest.ts delete mode 100644 tests/unit/useFastSearchFromOptions.tsx diff --git a/src/CONST.ts b/src/CONST.ts index cf9e5d8a2886..83a31b9ea9e9 100755 --- a/src/CONST.ts +++ b/src/CONST.ts @@ -1336,9 +1336,6 @@ const CONST = { SEARCH_OPTION_LIST_DEBOUNCE_TIME: 300, RESIZE_DEBOUNCE_TIME: 100, UNREAD_UPDATE_DEBOUNCE_TIME: 300, - SEARCH_CONVERT_SEARCH_VALUES: 'search_convert_search_values', - SEARCH_MAKE_TREE: 'search_make_tree', - SEARCH_BUILD_TREE: 'search_build_tree', SEARCH_FILTER_OPTIONS: 'search_filter_options', USE_DEBOUNCED_STATE_DELAY: 300, LIST_SCROLLING_DEBOUNCE_TIME: 200, diff --git a/src/components/Search/SearchRouter/SearchRouterList.tsx b/src/components/Search/SearchRouter/SearchRouterList.tsx index 6210c8882136..a53e49374d81 100644 --- a/src/components/Search/SearchRouter/SearchRouterList.tsx +++ b/src/components/Search/SearchRouter/SearchRouterList.tsx @@ -12,7 +12,6 @@ import type {SearchQueryItem, SearchQueryListItemProps} from '@components/Select import type {SectionListDataType, SelectionListHandle, UserListItemProps} from '@components/SelectionList/types'; import UserListItem from '@components/SelectionList/UserListItem'; import useActiveWorkspace from '@hooks/useActiveWorkspace'; -import useFastSearchFromOptions from '@hooks/useFastSearchFromOptions'; import useLocalize from '@hooks/useLocalize'; import usePolicy from '@hooks/usePolicy'; import useResponsiveLayout from '@hooks/useResponsiveLayout'; @@ -180,7 +179,7 @@ function SearchRouterList( if (currentUser) { autocompleteOptions.push({ name: currentUser.displayName ?? Str.removeSMSDomain(currentUser.login ?? ''), - accountID: currentUser.accountID?.toString(), + accountID: currentUser.accountID?.toString() ?? '-1', }); } @@ -383,30 +382,21 @@ function SearchRouterList( }; }); - /** - * Builds a suffix tree and returns a function to search in it. - */ - const filterOptions = useFastSearchFromOptions(searchOptions, {includeUserToInvite: true}); - const recentReportsOptions = useMemo(() => { if (autocompleteQueryValue.trim() === '') { return searchOptions.recentReports.slice(0, 20); } Timing.start(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const filteredOptions = filterOptions(autocompleteQueryValue); - const orderedOptions = OptionsListUtils.combineOrderingOfReportsAndPersonalDetails(filteredOptions, autocompleteQueryValue, { - sortByReportTypeInSearch: true, - preferChatroomsOverThreads: true, - }); + const filteredOptions = OptionsListUtils.filterAndOrderOptions(searchOptions, autocompleteQueryValue, {sortByReportTypeInSearch: true, preferChatroomsOverThreads: true}); Timing.end(CONST.TIMING.SEARCH_FILTER_OPTIONS); - const reportOptions: OptionData[] = [...orderedOptions.recentReports, ...orderedOptions.personalDetails]; + const reportOptions: OptionData[] = [...filteredOptions.recentReports, ...filteredOptions.personalDetails]; if (filteredOptions.userToInvite) { reportOptions.push(filteredOptions.userToInvite); } return reportOptions.slice(0, 20); - }, [autocompleteQueryValue, filterOptions, searchOptions]); + }, [autocompleteQueryValue, searchOptions]); useEffect(() => { ReportUserActions.searchInServer(autocompleteQueryValue.trim()); diff --git a/src/hooks/useFastSearchFromOptions.ts b/src/hooks/useFastSearchFromOptions.ts deleted file mode 100644 index 7856eed479bd..000000000000 --- a/src/hooks/useFastSearchFromOptions.ts +++ /dev/null @@ -1,113 +0,0 @@ -import {useMemo} from 'react'; -import FastSearch from '@libs/FastSearch'; -import * as OptionsListUtils from '@libs/OptionsListUtils'; - -type AllOrSelectiveOptions = OptionsListUtils.ReportAndPersonalDetailOptions | OptionsListUtils.Options; - -type Options = { - includeUserToInvite: boolean; -}; - -const emptyResult = { - personalDetails: [], - recentReports: [], -}; - -// You can either use this to search within report and personal details options -function useFastSearchFromOptions( - options: OptionsListUtils.ReportAndPersonalDetailOptions, - config?: {includeUserToInvite: false}, -): (searchInput: string) => OptionsListUtils.ReportAndPersonalDetailOptions; -// Or you can use this to include the user invite option. This will require passing all options -function useFastSearchFromOptions(options: OptionsListUtils.Options, config?: {includeUserToInvite: true}): (searchInput: string) => OptionsListUtils.Options; - -/** - * Hook for making options from OptionsListUtils searchable with FastSearch. - * Builds a suffix tree and returns a function to search in it. - * - * @example - * ``` - * const options = OptionsListUtils.getSearchOptions(...); - * const filterOptions = useFastSearchFromOptions(options); - */ -function useFastSearchFromOptions( - options: OptionsListUtils.ReportAndPersonalDetailOptions | OptionsListUtils.Options, - {includeUserToInvite}: Options = {includeUserToInvite: false}, -): (searchInput: string) => AllOrSelectiveOptions { - const findInSearchTree = useMemo(() => { - const fastSearch = FastSearch.createFastSearch([ - { - data: options.personalDetails, - toSearchableString: (option) => { - const displayName = option.participantsList?.[0]?.displayName ?? ''; - return [option.login ?? '', option.login !== displayName ? displayName : ''].join(); - }, - uniqueId: (option) => option.login, - }, - { - data: options.recentReports, - toSearchableString: (option) => { - const searchStringForTree = [option.text ?? '', option.login ?? '']; - - if (option.isThread) { - if (option.alternateText) { - searchStringForTree.push(option.alternateText); - } - } else if (!!option.isChatRoom || !!option.isPolicyExpenseChat) { - if (option.subtitle) { - searchStringForTree.push(option.subtitle); - } - } - - return searchStringForTree.join(); - }, - }, - ]); - - function search(searchInput: string): AllOrSelectiveOptions { - const searchWords = searchInput.split(' ').sort(); // asc sorted - const longestSearchWord = searchWords.at(searchWords.length - 1); // longest word is the last element - if (!longestSearchWord) { - return emptyResult; - } - - // The user might separated words with spaces to do a search such as: "jo d" -> "john doe" - // With the suffix search tree you can only search for one word at a time. Its most efficient to search for the longest word, - // (as this will limit the results the most) and then afterwards run a quick filter on the results to see if the other words are present. - let [personalDetails, recentReports] = fastSearch.search(longestSearchWord); - - if (searchWords.length > 1) { - personalDetails = personalDetails.filter((pd) => OptionsListUtils.isSearchStringMatch(searchInput, pd.text)); - recentReports = recentReports.filter((rr) => OptionsListUtils.isSearchStringMatch(searchInput, rr.text)); - } - - if (includeUserToInvite && 'currentUserOption' in options) { - const userToInvite = OptionsListUtils.filterUserToInvite( - { - ...options, - personalDetails, - recentReports, - }, - searchInput, - ); - return { - personalDetails, - recentReports, - userToInvite, - currentUserOption: options.currentUserOption, - }; - } - - return { - personalDetails, - recentReports, - }; - } - - return search; - }, [includeUserToInvite, options]); - - return findInSearchTree; -} - -export default useFastSearchFromOptions; diff --git a/src/libs/FastSearch.ts b/src/libs/FastSearch.ts deleted file mode 100644 index a947867f596c..000000000000 --- a/src/libs/FastSearch.ts +++ /dev/null @@ -1,167 +0,0 @@ -/* eslint-disable rulesdir/prefer-at */ -import CONST from '@src/CONST'; -import Timing from './actions/Timing'; -import SuffixUkkonenTree from './SuffixUkkonenTree'; - -type SearchableData = { - /** - * The data that should be searchable - */ - data: T[]; - /** - * A function that generates a string from a data entry. The string's value is used for searching. - * If you have multiple fields that should be searchable, simply concat them to the string and return it. - */ - toSearchableString: (data: T) => string; - - /** - * Gives the possibility to identify data by a unique attribute. Assume you have two search results with the same text they might be valid - * and represent different data. In this case, you can provide a function that returns a unique identifier for the data. - * If multiple items with the same identifier are found, only the first one will be returned. - * This fixes: https://github.com/Expensify/App/issues/53579 - */ - uniqueId?: (data: T) => string | undefined; -}; - -// There are certain characters appear very often in our search data (email addresses), which we don't need to search for. -const charSetToSkip = new Set(['@', '.', '#', '$', '%', '&', '*', '+', '-', '/', ':', ';', '<', '=', '>', '?', '_', '~', '!', ' ', ',', '(', ')']); - -/** - * Creates a new "FastSearch" instance. "FastSearch" uses a suffix tree to search for substrings in a list of strings. - * You can provide multiple datasets. The search results will be returned for each dataset. - * - * Note: Creating a FastSearch instance with a lot of data is computationally expensive. You should create an instance once and reuse it. - * Searches will be very fast though, even with a lot of data. - */ -function createFastSearch(dataSets: Array>) { - Timing.start(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES); - const maxNumericListSize = 400_000; - // The user might provide multiple data sets, but internally, the search values will be stored in this one list: - let concatenatedNumericList = new Uint8Array(maxNumericListSize); - // Here we store the index of the data item in the original data list, so we can map the found occurrences back to the original data: - const occurrenceToIndex = new Uint32Array(maxNumericListSize * 4); - // As we are working with ArrayBuffers, we need to keep track of the current offset: - const offset = {value: 1}; - // We store the last offset for a dataSet, so we can map the found occurrences to the correct dataSet: - const listOffsets: number[] = []; - - for (const {data, toSearchableString} of dataSets) { - // Performance critical: the array parameters are passed by reference, so we don't have to create new arrays every time: - dataToNumericRepresentation(concatenatedNumericList, occurrenceToIndex, offset, {data, toSearchableString}); - listOffsets.push(offset.value); - } - concatenatedNumericList[offset.value++] = SuffixUkkonenTree.END_CHAR_CODE; - listOffsets[listOffsets.length - 1] = offset.value; - Timing.end(CONST.TIMING.SEARCH_CONVERT_SEARCH_VALUES); - - // The list might be larger than necessary, so we clamp it to the actual size: - concatenatedNumericList = concatenatedNumericList.slice(0, offset.value); - - // Create & build the suffix tree: - Timing.start(CONST.TIMING.SEARCH_MAKE_TREE); - const tree = SuffixUkkonenTree.makeTree(concatenatedNumericList); - Timing.end(CONST.TIMING.SEARCH_MAKE_TREE); - - Timing.start(CONST.TIMING.SEARCH_BUILD_TREE); - tree.build(); - Timing.end(CONST.TIMING.SEARCH_BUILD_TREE); - - /** - * Searches for the given input and returns results for each dataset. - */ - function search(searchInput: string): T[][] { - const cleanedSearchString = cleanString(searchInput); - const {numeric} = SuffixUkkonenTree.stringToNumeric(cleanedSearchString, { - charSetToSkip, - // stringToNumeric might return a list that is larger than necessary, so we clamp it to the actual size - // (otherwise the search could fail as we include in our search empty array values): - clamp: true, - }); - const result = tree.findSubstring(Array.from(numeric)); - - const resultsByDataSet = Array.from({length: dataSets.length}, () => new Set()); - const uniqueMap: Record> = {}; - // eslint-disable-next-line @typescript-eslint/prefer-for-of - for (let i = 0; i < result.length; i++) { - const occurrenceIndex = result[i]; - const itemIndexInDataSet = occurrenceToIndex[occurrenceIndex]; - const dataSetIndex = listOffsets.findIndex((listOffset) => occurrenceIndex < listOffset); - - if (dataSetIndex === -1) { - throw new Error(`[FastSearch] The occurrence index ${occurrenceIndex} is not in any dataset`); - } - const item = dataSets[dataSetIndex].data[itemIndexInDataSet]; - if (!item) { - throw new Error(`[FastSearch] The item with index ${itemIndexInDataSet} in dataset ${dataSetIndex} is not defined`); - } - - // Check for uniqueness eventually - const getUniqueId = dataSets[dataSetIndex].uniqueId; - if (getUniqueId) { - const uniqueId = getUniqueId(item); - if (uniqueId) { - const hasId = uniqueMap[dataSetIndex]?.[uniqueId]; - if (hasId) { - // eslint-disable-next-line no-continue - continue; - } - if (!uniqueMap[dataSetIndex]) { - uniqueMap[dataSetIndex] = {}; - } - uniqueMap[dataSetIndex][uniqueId] = item; - } - } - - resultsByDataSet[dataSetIndex].add(item); - } - - return resultsByDataSet.map((set) => Array.from(set)); - } - - return { - search, - }; -} - -/** - * The suffix tree can only store string like values, and internally stores those as numbers. - * This function converts the user data (which are most likely objects) to a numeric representation. - * Additionally a list of the original data and their index position in the numeric list is created, which is used to map the found occurrences back to the original data. - */ -function dataToNumericRepresentation(concatenatedNumericList: Uint8Array, occurrenceToIndex: Uint32Array, offset: {value: number}, {data, toSearchableString}: SearchableData): void { - data.forEach((option, index) => { - const searchStringForTree = toSearchableString(option); - const cleanedSearchStringForTree = cleanString(searchStringForTree); - - if (cleanedSearchStringForTree.length === 0) { - return; - } - - SuffixUkkonenTree.stringToNumeric(cleanedSearchStringForTree, { - charSetToSkip, - out: { - outArray: concatenatedNumericList, - offset, - outOccurrenceToIndex: occurrenceToIndex, - index, - }, - }); - // eslint-disable-next-line no-param-reassign - occurrenceToIndex[offset.value] = index; - // eslint-disable-next-line no-param-reassign - concatenatedNumericList[offset.value++] = SuffixUkkonenTree.DELIMITER_CHAR_CODE; - }); -} - -/** - * Everything in the tree is treated as lowercase. - */ -function cleanString(input: string) { - return input.toLowerCase(); -} - -const FastSearch = { - createFastSearch, -}; - -export default FastSearch; diff --git a/src/libs/OptionsListUtils.ts b/src/libs/OptionsListUtils.ts index b588b3dd5359..5072830d1b8f 100644 --- a/src/libs/OptionsListUtils.ts +++ b/src/libs/OptionsListUtils.ts @@ -147,26 +147,15 @@ type FilterUserToInviteConfig = Pick; - /** * OptionsListUtils is used to build a list options passed to the OptionsList component. Several different UI views can * be configured to display different results based on the options passed to the private getOptions() method. Public @@ -955,7 +944,7 @@ function orderReportOptions(options: ReportUtils.OptionData[]) { function orderReportOptionsWithSearch( options: ReportUtils.OptionData[], searchValue: string, - {preferChatroomsOverThreads = false, preferPolicyExpenseChat = false, preferRecentExpenseReports = false}: OrderReportOptionsConfig = {}, + {preferChatroomsOverThreads = false, preferPolicyExpenseChat = false, preferRecentExpenseReports = false}: OrderOptionsConfig = {}, ) { const orderedByDate = orderReportOptions(options); @@ -1011,16 +1000,11 @@ function sortComparatorReportOptionByDate(options: ReportUtils.OptionData) { return options.lastVisibleActionCreated ?? ''; } -/** - * Sorts reports and personal details independently. - */ -function orderOptions(options: ReportAndPersonalDetailOptions): ReportAndPersonalDetailOptions; +type ReportAndPersonalDetailOptions = Pick; -/** - * Sorts reports and personal details independently, but prioritizes the search value. - */ -function orderOptions(options: ReportAndPersonalDetailOptions, searchValue: string, config?: OrderReportOptionsConfig): ReportAndPersonalDetailOptions; -function orderOptions(options: ReportAndPersonalDetailOptions, searchValue?: string, config?: OrderReportOptionsConfig): ReportAndPersonalDetailOptions { +function orderOptions(options: ReportAndPersonalDetailOptions): ReportAndPersonalDetailOptions; +function orderOptions(options: ReportAndPersonalDetailOptions, searchValue: string, config?: OrderOptionsConfig): ReportAndPersonalDetailOptions; +function orderOptions(options: ReportAndPersonalDetailOptions, searchValue?: string, config?: OrderOptionsConfig) { let orderedReportOptions: ReportUtils.OptionData[]; if (searchValue) { orderedReportOptions = orderReportOptionsWithSearch(options.recentReports, searchValue, config); @@ -1039,20 +1023,12 @@ function canCreateOptimisticPersonalDetailOption({ recentReportOptions, personalDetailsOptions, currentUserOption, - searchValue, }: { recentReportOptions: ReportUtils.OptionData[]; personalDetailsOptions: ReportUtils.OptionData[]; currentUserOption?: ReportUtils.OptionData | null; - searchValue: string; }) { - if (recentReportOptions.length + personalDetailsOptions.length > 0) { - return false; - } - if (!currentUserOption) { - return true; - } - return currentUserOption.login !== PhoneNumber.addSMSDomainIfPhoneNumber(searchValue ?? '').toLowerCase() && currentUserOption.login !== searchValue?.toLowerCase(); + return recentReportOptions.length + personalDetailsOptions.length === 0 && !currentUserOption; } /** @@ -1717,7 +1693,6 @@ function filterUserToInvite(options: Omit, searchValue: recentReportOptions: options.recentReports, personalDetailsOptions: options.personalDetails, currentUserOption: options.currentUserOption, - searchValue, }); if (!canCreateOptimisticDetail) { @@ -1762,58 +1737,48 @@ function filterOptions(options: Options, searchInputValue: string, config?: Filt }; } -type AllOrderConfigs = OrderReportOptionsConfig & OrderOptionsConfig; -type FilterAndOrderConfig = FilterUserToInviteConfig & AllOrderConfigs; - -/** - * Orders the reports and personal details based on the search input value. - * Personal details will be filtered out if they are part of the recent reports. - * Additional configs can be applied. - */ -function combineOrderingOfReportsAndPersonalDetails( - options: ReportAndPersonalDetailOptions, - searchInputValue: string, - {maxRecentReportsToShow, sortByReportTypeInSearch, ...orderReportOptionsConfig}: AllOrderConfigs = {}, -): ReportAndPersonalDetailOptions { - // sortByReportTypeInSearch will show the personal details as part of the recent reports - if (sortByReportTypeInSearch) { - const personalDetailsWithoutDMs = filteredPersonalDetailsOfRecentReports(options.recentReports, options.personalDetails); - const reportsAndPersonalDetails = options.recentReports.concat(personalDetailsWithoutDMs); - return orderOptions({recentReports: reportsAndPersonalDetails, personalDetails: []}, searchInputValue, orderReportOptionsConfig); - } - - let orderedReports = orderReportOptionsWithSearch(options.recentReports, searchInputValue, orderReportOptionsConfig); - if (typeof maxRecentReportsToShow === 'number') { - orderedReports = orderedReports.slice(0, maxRecentReportsToShow); - } - - const personalDetailsWithoutDMs = filteredPersonalDetailsOfRecentReports(orderedReports, options.personalDetails); - const orderedPersonalDetails = orderPersonalDetailsOptions(personalDetailsWithoutDMs); - - return { - recentReports: orderedReports, - personalDetails: orderedPersonalDetails, - }; -} +type FilterAndOrderConfig = FilterUserToInviteConfig & OrderOptionsConfig; /** * Filters and orders the options based on the search input value. * Note that personal details that are part of the recent reports will always be shown as part of the recent reports (ie. DMs). */ function filterAndOrderOptions(options: Options, searchInputValue: string, config: FilterAndOrderConfig = {}): Options { + const {sortByReportTypeInSearch = false} = config; + let filterResult = options; if (searchInputValue.trim().length > 0) { filterResult = filterOptions(options, searchInputValue, config); } - const orderedOptions = combineOrderingOfReportsAndPersonalDetails(filterResult, searchInputValue, config); + let {recentReports: filteredReports, personalDetails: filteredPersonalDetails} = filterResult; // on staging server, in specific cases (see issue) BE returns duplicated personalDetails entries - orderedOptions.personalDetails = orderedOptions.personalDetails.filter((detail, index, array) => array.findIndex((i) => i.login === detail.login) === index); + filteredPersonalDetails = filteredPersonalDetails.filter((detail, index, array) => array.findIndex((i) => i.login === detail.login) === index); + + if (typeof config?.maxRecentReportsToShow === 'number') { + filteredReports = orderReportOptionsWithSearch(filteredReports, searchInputValue, config); + filteredReports = filteredReports.slice(0, config.maxRecentReportsToShow); + } + + const personalDetailsWithoutDMs = filteredPersonalDetailsOfRecentReports(filteredReports, filteredPersonalDetails); + const orderedPersonalDetails = orderPersonalDetailsOptions(personalDetailsWithoutDMs); + + // sortByReportTypeInSearch option will show the personal details as part of the recent reports + if (sortByReportTypeInSearch) { + filteredReports = filteredReports.concat(orderedPersonalDetails); + filteredPersonalDetails = []; + } else { + filteredPersonalDetails = orderedPersonalDetails; + } + + const orderedReports = orderReportOptionsWithSearch(filteredReports, searchInputValue, config); return { - ...filterResult, - ...orderedOptions, + recentReports: orderedReports, + personalDetails: filteredPersonalDetails, + userToInvite: filterResult.userToInvite, + currentUserOption: filterResult.currentUserOption, }; } @@ -1862,13 +1827,12 @@ export { formatMemberForList, formatSectionsFromSearchTerm, getShareLogOptions, - orderOptions, - filterUserToInvite, filterOptions, filteredPersonalDetailsOfRecentReports, orderReportOptions, orderReportOptionsWithSearch, orderPersonalDetailsOptions, + orderOptions, filterAndOrderOptions, createOptionList, createOptionFromReport, @@ -1883,7 +1847,6 @@ export { getAttendeeOptions, getAlternateText, hasReportErrors, - combineOrderingOfReportsAndPersonalDetails, }; -export type {Section, SectionBase, MemberForList, Options, OptionList, SearchOption, PayeePersonalDetails, Option, OptionTree, ReportAndPersonalDetailOptions, GetUserToInviteConfig}; +export type {Section, SectionBase, MemberForList, Options, OptionList, SearchOption, PayeePersonalDetails, Option, OptionTree}; diff --git a/src/libs/SuffixUkkonenTree/index.ts b/src/libs/SuffixUkkonenTree/index.ts deleted file mode 100644 index bcefd1008493..000000000000 --- a/src/libs/SuffixUkkonenTree/index.ts +++ /dev/null @@ -1,211 +0,0 @@ -/* eslint-disable rulesdir/prefer-at */ -// .at() has a performance overhead we explicitly want to avoid here - -/* eslint-disable no-continue */ -import {ALPHABET_SIZE, DELIMITER_CHAR_CODE, END_CHAR_CODE, SPECIAL_CHAR_CODE, stringToNumeric} from './utils'; - -/** - * This implements a suffix tree using Ukkonen's algorithm. - * A good visualization to learn about the algorithm can be found here: https://brenden.github.io/ukkonen-animation/ - * A good video explaining Ukkonen's algorithm can be found here: https://www.youtube.com/watch?v=ALEV0Hc5dDk - * Note: This implementation is optimized for performance, not necessarily for readability. - * - * You probably don't want to use this directly, but rather use @libs/FastSearch.ts as a easy to use wrapper around this. - */ - -/** - * Creates a new tree instance that can be used to build a suffix tree and search in it. - * The input is a numeric representation of the search string, which can be created using {@link stringToNumeric}. - * Separate search values must be separated by the {@link DELIMITER_CHAR_CODE}. The search string must end with the {@link END_CHAR_CODE}. - * - * The tree will be built using the Ukkonen's algorithm: https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf - */ -function makeTree(numericSearchValues: Uint8Array) { - // Every leaf represents a suffix. There can't be more than n suffixes. - // Every internal node has to have at least 2 children. So the total size of ukkonen tree is not bigger than 2n - 1. - // + 1 is because an extra character at the beginning to offset the 1-based indexing. - const maxNodes = 2 * numericSearchValues.length + 1; - /* - This array represents all internal nodes in the suffix tree. - When building this tree, we'll be given a character in the string, and we need to be able to lookup in constant time - if there's any edge connected to a node starting with that character. For example, given a tree like this: - - root - / | \ - a b c - - and the next character in our string is 'd', we need to be able do check if any of the edges from the root node - start with the letter 'd', without looping through all the edges. - - To accomplish this, each node gets an array matching the alphabet size. - So you can imagine if our alphabet was just [a,b,c,d], then each node would get an array like [0,0,0,0]. - If we add an edge starting with 'a', then the root node would be [1,0,0,0] - So given an arbitrary letter such as 'd', then we can take the position of that letter in its alphabet (position 3 in our example) - and check whether that index in the array is 0 or 1. If it's a 1, then there's an edge starting with the letter 'd'. - - Note that for efficiency, all nodes are stored in a single flat array. That's how we end up with (maxNodes * alphabet_size). - In the example of a 4-character alphabet, we'd have an array like this: - - root root.left root.right last possible node - / \ / \ / \ / \ - [0,0,0,0, 0,0,0,0, 0,0,0,0, ................. 0,0,0,0] - */ - const transitionNodes = new Uint32Array(maxNodes * ALPHABET_SIZE); - - // Storing the range of the original string that each node represents: - const rangeStart = new Uint32Array(maxNodes); - const rangeEnd = new Uint32Array(maxNodes); - - const parent = new Uint32Array(maxNodes); - const suffixLink = new Uint32Array(maxNodes); - - let currentNode = 1; - let currentPosition = 1; - let nodeCounter = 3; - let currentIndex = 1; - - function initializeTree() { - rangeEnd.fill(numericSearchValues.length); - rangeEnd[1] = 0; - rangeEnd[2] = 0; - suffixLink[1] = 2; - for (let i = 0; i < ALPHABET_SIZE; ++i) { - transitionNodes[ALPHABET_SIZE * 2 + i] = 1; - } - } - - function processCharacter(char: number) { - // eslint-disable-next-line no-constant-condition - while (true) { - if (rangeEnd[currentNode] < currentPosition) { - if (transitionNodes[currentNode * ALPHABET_SIZE + char] === 0) { - createNewLeaf(char); - continue; - } - currentNode = transitionNodes[currentNode * ALPHABET_SIZE + char]; - currentPosition = rangeStart[currentNode]; - } - if (currentPosition === 0 || char === numericSearchValues[currentPosition]) { - currentPosition++; - } else { - splitEdge(char); - continue; - } - break; - } - } - - function createNewLeaf(c: number) { - transitionNodes[currentNode * ALPHABET_SIZE + c] = nodeCounter; - rangeStart[nodeCounter] = currentIndex; - parent[nodeCounter++] = currentNode; - currentNode = suffixLink[currentNode]; - - currentPosition = rangeEnd[currentNode] + 1; - } - - function splitEdge(c: number) { - rangeStart[nodeCounter] = rangeStart[currentNode]; - rangeEnd[nodeCounter] = currentPosition - 1; - parent[nodeCounter] = parent[currentNode]; - - transitionNodes[nodeCounter * ALPHABET_SIZE + numericSearchValues[currentPosition]] = currentNode; - transitionNodes[nodeCounter * ALPHABET_SIZE + c] = nodeCounter + 1; - rangeStart[nodeCounter + 1] = currentIndex; - parent[nodeCounter + 1] = nodeCounter; - rangeStart[currentNode] = currentPosition; - parent[currentNode] = nodeCounter; - - transitionNodes[parent[nodeCounter] * ALPHABET_SIZE + numericSearchValues[rangeStart[nodeCounter]]] = nodeCounter; - nodeCounter += 2; - handleDescent(nodeCounter); - } - - function handleDescent(latestNodeIndex: number) { - currentNode = suffixLink[parent[latestNodeIndex - 2]]; - currentPosition = rangeStart[latestNodeIndex - 2]; - while (currentPosition <= rangeEnd[latestNodeIndex - 2]) { - currentNode = transitionNodes[currentNode * ALPHABET_SIZE + numericSearchValues[currentPosition]]; - currentPosition += rangeEnd[currentNode] - rangeStart[currentNode] + 1; - } - if (currentPosition === rangeEnd[latestNodeIndex - 2] + 1) { - suffixLink[latestNodeIndex - 2] = currentNode; - } else { - suffixLink[latestNodeIndex - 2] = latestNodeIndex; - } - currentPosition = rangeEnd[currentNode] - (currentPosition - rangeEnd[latestNodeIndex - 2]) + 2; - } - - function build() { - initializeTree(); - for (currentIndex = 1; currentIndex < numericSearchValues.length; ++currentIndex) { - const c = numericSearchValues[currentIndex]; - processCharacter(c); - } - } - - /** - * Returns all occurrences of the given (sub)string in the input string. - * - * You can think of the tree that we create as a big string that looks like this: - * - * "banana$pancake$apple|" - * The example delimiter character '$' is used to separate the different strings. - * The end character '|' is used to indicate the end of our search string. - * - * This function will return the index(es) of found occurrences within this big string. - * So, when searching for "an", it would return [1, 3, 8]. - */ - function findSubstring(searchValue: number[]) { - const occurrences: number[] = []; - - function dfs(node: number, depth: number) { - const leftRange = rangeStart[node]; - const rightRange = rangeEnd[node]; - const rangeLen = node === 1 ? 0 : rightRange - leftRange + 1; - - for (let i = 0; i < rangeLen && depth + i < searchValue.length && leftRange + i < numericSearchValues.length; i++) { - if (searchValue[depth + i] !== numericSearchValues[leftRange + i]) { - return; - } - } - - let isLeaf = true; - for (let i = 0; i < ALPHABET_SIZE; ++i) { - const tNode = transitionNodes[node * ALPHABET_SIZE + i]; - - // Search speed optimization: don't go through the edge if it's different than the next char: - const correctChar = depth + rangeLen >= searchValue.length || i === searchValue[depth + rangeLen]; - - if (tNode !== 0 && tNode !== 1 && correctChar) { - isLeaf = false; - dfs(tNode, depth + rangeLen); - } - } - - if (isLeaf && depth + rangeLen >= searchValue.length) { - occurrences.push(numericSearchValues.length - (depth + rangeLen) + 1); - } - } - - dfs(1, 0); - return occurrences; - } - - return { - build, - findSubstring, - }; -} - -const SuffixUkkonenTree = { - makeTree, - - // Re-exported from utils: - DELIMITER_CHAR_CODE, - SPECIAL_CHAR_CODE, - END_CHAR_CODE, - stringToNumeric, -}; - -export default SuffixUkkonenTree; diff --git a/src/libs/SuffixUkkonenTree/utils.ts b/src/libs/SuffixUkkonenTree/utils.ts deleted file mode 100644 index 96ee35b15796..000000000000 --- a/src/libs/SuffixUkkonenTree/utils.ts +++ /dev/null @@ -1,115 +0,0 @@ -/* eslint-disable rulesdir/prefer-at */ // .at() has a performance overhead we explicitly want to avoid here -/* eslint-disable no-continue */ - -const CHAR_CODE_A = 'a'.charCodeAt(0); -const ALPHABET = 'abcdefghijklmnopqrstuvwxyz'; -const LETTER_ALPHABET_SIZE = ALPHABET.length; -const ALPHABET_SIZE = LETTER_ALPHABET_SIZE + 3; // +3: special char, delimiter char, end char -const SPECIAL_CHAR_CODE = ALPHABET_SIZE - 3; -const DELIMITER_CHAR_CODE = ALPHABET_SIZE - 2; -const END_CHAR_CODE = ALPHABET_SIZE - 1; - -// Store the results for a char code in a lookup table to avoid recalculating the same values (performance optimization) -const base26LookupTable = new Array(); - -/** - * Converts a number to a base26 representation. - */ -function convertToBase26(num: number): number[] { - if (base26LookupTable[num]) { - return base26LookupTable[num]; - } - if (num < 0) { - throw new Error('convertToBase26: Input must be a non-negative integer'); - } - - const result: number[] = []; - - do { - // eslint-disable-next-line no-param-reassign - num--; - result.unshift(num % 26); - // eslint-disable-next-line no-bitwise, no-param-reassign - num >>= 5; // Equivalent to Math.floor(num / 26), but faster - } while (num > 0); - - base26LookupTable[num] = result; - return result; -} - -/** - * Converts a string to an array of numbers representing the characters of the string. - * Every number in the array is in the range [0, ALPHABET_SIZE-1] (0-28). - * - * The numbers are offset by the character code of 'a' (97). - * - This is so that the numbers from a-z are in the range 0-28. - * - 26 is for encoding special characters. Character numbers that are not within the range of a-z will be encoded as "specialCharacter + base26(charCode)" - * - 27 is for the delimiter character - * - 28 is for the end character - * - * Note: The string should be converted to lowercase first (otherwise uppercase letters get base26'ed taking more space than necessary). - */ -function stringToNumeric( - // The string we want to convert to a numeric representation - input: string, - options?: { - // A set of characters that should be skipped and not included in the numeric representation - charSetToSkip?: Set; - // When out is provided, the function will write the result to the provided arrays instead of creating new ones (performance) - out?: { - outArray: Uint8Array; - // As outArray is a ArrayBuffer we need to keep track of the current offset - offset: {value: number}; - // A map of to map the found occurrences to the correct data set - // As the search string can be very long for high traffic accounts (500k+), this has to be big enough, thus its a Uint32Array - outOccurrenceToIndex?: Uint32Array; - // The index that will be used in the outOccurrenceToIndex array (this is the index of your original data position) - index?: number; - }; - // By default false. By default the outArray may be larger than necessary. If clamp is set to true the outArray will be clamped to the actual size. - clamp?: boolean; - }, -): { - numeric: Uint8Array; - occurrenceToIndex: Uint32Array; - offset: {value: number}; -} { - // The out array might be longer than our input string length, because we encode special characters as multiple numbers using the base26 encoding. - // * 6 is because the upper limit of encoding any char in UTF-8 to base26 is at max 6 numbers. - const outArray = options?.out?.outArray ?? new Uint8Array(input.length * 6); - const offset = options?.out?.offset ?? {value: 0}; - const occurrenceToIndex = options?.out?.outOccurrenceToIndex ?? new Uint32Array(input.length * 16 * 4); - const index = options?.out?.index ?? 0; - - for (let i = 0; i < input.length; i++) { - const char = input[i]; - - if (options?.charSetToSkip?.has(char)) { - continue; - } - - if (char >= 'a' && char <= 'z') { - // char is an alphabet character - occurrenceToIndex[offset.value] = index; - outArray[offset.value++] = char.charCodeAt(0) - CHAR_CODE_A; - } else { - const charCode = input.charCodeAt(i); - occurrenceToIndex[offset.value] = index; - outArray[offset.value++] = SPECIAL_CHAR_CODE; - const asBase26Numeric = convertToBase26(charCode); - // eslint-disable-next-line @typescript-eslint/prefer-for-of - for (let j = 0; j < asBase26Numeric.length; j++) { - occurrenceToIndex[offset.value] = index; - outArray[offset.value++] = asBase26Numeric[j]; - } - } - } - - return { - numeric: options?.clamp ? outArray.slice(0, offset.value) : outArray, - occurrenceToIndex, - offset, - }; -} - -export {stringToNumeric, ALPHABET, ALPHABET_SIZE, SPECIAL_CHAR_CODE, DELIMITER_CHAR_CODE, END_CHAR_CODE}; diff --git a/tests/unit/FastSearchTest.ts b/tests/unit/FastSearchTest.ts deleted file mode 100644 index 42487b716d09..000000000000 --- a/tests/unit/FastSearchTest.ts +++ /dev/null @@ -1,169 +0,0 @@ -import FastSearch from '../../src/libs/FastSearch'; - -describe('FastSearch', () => { - it('should insert, and find the word', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['banana'], - toSearchableString: (data) => data, - }, - ]); - expect(search('an')).toEqual([['banana']]); - }); - - it('should work with multiple words', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['banana', 'test'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('es')).toEqual([['test']]); - }); - - it('should work when providing two data sets', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['erica', 'banana'], - toSearchableString: (data) => data, - }, - { - data: ['banana', 'test'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('es')).toEqual([[], ['test']]); - }); - - it('should work with numbers', () => { - const {search} = FastSearch.createFastSearch([ - { - data: [1, 2, 3, 4, 5], - toSearchableString: (data) => String(data), - }, - ]); - - expect(search('2')).toEqual([[2]]); - }); - - it('should work with unicodes', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['banana', 'ñèşťǒř', 'test'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('èşť')).toEqual([['ñèşťǒř']]); - }); - - it('should work with words containing "reserved special characters"', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['ba|nana', 'te{st', 'he}llo'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('st')).toEqual([['te{st']]); - expect(search('llo')).toEqual([['he}llo']]); - expect(search('nana')).toEqual([['ba|nana']]); - }); - - it('should be case insensitive', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['banana', 'TeSt', 'TEST', 'X'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('test')).toEqual([['TeSt', 'TEST']]); - }); - - it('should work with large random data sets', () => { - const data = Array.from({length: 1000}, () => { - return Array.from({length: Math.floor(Math.random() * 22 + 9)}, () => { - const alphabet = 'abcdefghijklmnopqrstuvwxyz0123456789@-_.'; - return alphabet.charAt(Math.floor(Math.random() * alphabet.length)); - }).join(''); - }); - - const {search} = FastSearch.createFastSearch([ - { - data, - toSearchableString: (x) => x, - }, - ]); - - data.forEach((word) => { - expect(search(word)).toEqual([expect.arrayContaining([word])]); - }); - }); - - it('should find email addresses without dots', () => { - const {search} = FastSearch.createFastSearch([ - { - data: ['test.user@example.com', 'unrelated'], - toSearchableString: (data) => data, - }, - ]); - - expect(search('testuser')).toEqual([['test.user@example.com']]); - expect(search('test.user')).toEqual([['test.user@example.com']]); - expect(search('examplecom')).toEqual([['test.user@example.com']]); - }); - - it('should filter duplicate IDs', () => { - const {search} = FastSearch.createFastSearch([ - { - data: [ - { - text: 'qa.guide@team.expensify.com', - alternateText: 'qa.guide@team.expensify.com', - keyForList: '14365522', - isSelected: false, - isDisabled: false, - accountID: 14365522, - login: 'qa.guide@team.expensify.com', - icons: [ - { - source: 'https://d2k5nsl2zxldvw.cloudfront.net/images/avatars/default-avatar_11.png', - type: 'avatar', - name: 'qa.guide@team.expensify.com', - id: 14365522, - }, - ], - reportID: '', - }, - { - text: 'qa.guide@team.expensify.com', - alternateText: 'qa.guide@team.expensify.com', - keyForList: '714749267', - isSelected: false, - isDisabled: false, - accountID: 714749267, - login: 'qa.guide@team.expensify.com', - icons: [ - { - source: 'ƒ SvgFallbackAvatar(props)', - type: 'avatar', - name: 'qa.guide@team.expensify.com', - id: 714749267, - }, - ], - reportID: '', - }, - ], - toSearchableString: (data) => data.text, - uniqueId: (data) => data.login, - }, - ]); - - const [result] = search('qa.g'); - // The both items are represented using the same string. - expect(result).toHaveLength(1); - }); -}); diff --git a/tests/unit/OptionsListUtilsTest.ts b/tests/unit/OptionsListUtilsTest.ts index 6c0ad88619cb..39406e6a0995 100644 --- a/tests/unit/OptionsListUtilsTest.ts +++ b/tests/unit/OptionsListUtilsTest.ts @@ -1005,33 +1005,11 @@ describe('OptionsListUtils', () => { }); describe('canCreateOptimisticPersonalDetailOption', () => { - const VALID_EMAIL = 'valid@email.com'; - it('should allow to create optimistic personal detail option if email is valid', () => { - const currentUserEmail = 'tonystark@expensify.com'; - const canCreate = OptionsListUtils.canCreateOptimisticPersonalDetailOption({ - searchValue: VALID_EMAIL, - currentUserOption: { - login: currentUserEmail, - } as ReportUtils.OptionData, - // Note: in the past this would check for the existence of the email in the personalDetails list, this has changed. - // We expect only filtered lists to be passed to this function, so we don't need to check for the existence of the email in the personalDetails list. - // This is a performance optimization. - personalDetailsOptions: [], - recentReportOptions: [], - }); - - expect(canCreate).toBe(true); - }); - it('should not allow to create option if email is an email of current user', () => { - const currentUserEmail = 'tonystark@expensify.com'; const canCreate = OptionsListUtils.canCreateOptimisticPersonalDetailOption({ - searchValue: currentUserEmail, - recentReportOptions: [], - personalDetailsOptions: [], - currentUserOption: { - login: currentUserEmail, - } as ReportUtils.OptionData, + recentReportOptions: OPTIONS.reports, + personalDetailsOptions: OPTIONS.personalDetails, + currentUserOption: null, }); expect(canCreate).toBe(false); diff --git a/tests/unit/SuffixUkkonenTreeTest.ts b/tests/unit/SuffixUkkonenTreeTest.ts deleted file mode 100644 index c0c556c16e14..000000000000 --- a/tests/unit/SuffixUkkonenTreeTest.ts +++ /dev/null @@ -1,63 +0,0 @@ -import SuffixUkkonenTree from '@libs/SuffixUkkonenTree/index'; - -describe('SuffixUkkonenTree', () => { - // The suffix tree doesn't take strings, but expects an array buffer, where strings have been separated by a delimiter. - function helperStringsToNumericForTree(strings: string[]) { - const numericLists = strings.map((s) => SuffixUkkonenTree.stringToNumeric(s, {clamp: true})); - const numericList = numericLists.reduce( - (acc, {numeric}) => { - acc.push(...numeric, SuffixUkkonenTree.DELIMITER_CHAR_CODE); - return acc; - }, - // The value we pass to makeTree needs to be offset by one - [0], - ); - numericList.push(SuffixUkkonenTree.END_CHAR_CODE); - return Uint8Array.from(numericList); - } - - it('should insert, build, and find all occurrences', () => { - const strings = ['banana', 'pancake']; - const numericIntArray = helperStringsToNumericForTree(strings); - - const tree = SuffixUkkonenTree.makeTree(numericIntArray); - tree.build(); - const searchValue = SuffixUkkonenTree.stringToNumeric('an', {clamp: true}).numeric; - expect(tree.findSubstring(Array.from(searchValue))).toEqual(expect.arrayContaining([2, 4, 9])); - }); - - it('should find by first character', () => { - const strings = ['pancake', 'banana']; - const numericIntArray = helperStringsToNumericForTree(strings); - const tree = SuffixUkkonenTree.makeTree(numericIntArray); - tree.build(); - const searchValue = SuffixUkkonenTree.stringToNumeric('p', {clamp: true}).numeric; - expect(tree.findSubstring(Array.from(searchValue))).toEqual(expect.arrayContaining([1])); - }); - - it('should handle identical words', () => { - const strings = ['banana', 'banana', 'x']; - const numericIntArray = helperStringsToNumericForTree(strings); - const tree = SuffixUkkonenTree.makeTree(numericIntArray); - tree.build(); - const searchValue = SuffixUkkonenTree.stringToNumeric('an', {clamp: true}).numeric; - expect(tree.findSubstring(Array.from(searchValue))).toEqual(expect.arrayContaining([2, 4, 9, 11])); - }); - - it('should convert string to numeric with a list of chars to skip', () => { - const {numeric} = SuffixUkkonenTree.stringToNumeric('abcabc', { - charSetToSkip: new Set(['b']), - clamp: true, - }); - expect(Array.from(numeric)).toEqual([0, 2, 0, 2]); - }); - - it('should convert string outside of a-z to numeric with clamping', () => { - const {numeric} = SuffixUkkonenTree.stringToNumeric('2', { - clamp: true, - }); - - // "2" in ASCII is 50, so base26(50) = [0, 23] - expect(Array.from(numeric)).toEqual([SuffixUkkonenTree.SPECIAL_CHAR_CODE, 0, 23]); - }); -}); diff --git a/tests/unit/useFastSearchFromOptions.tsx b/tests/unit/useFastSearchFromOptions.tsx deleted file mode 100644 index 105f8a276e5b..000000000000 --- a/tests/unit/useFastSearchFromOptions.tsx +++ /dev/null @@ -1,49 +0,0 @@ -import {renderHook} from '@testing-library/react-native'; -import useFastSearchFromOptions from '@hooks/useFastSearchFromOptions'; -import type {Options} from '@libs/OptionsListUtils'; - -describe('useFastSearchFromOptions', () => { - it('should return sub word matches', () => { - const options = { - currentUserOption: null, - userToInvite: null, - personalDetails: [ - { - text: 'Ahmed Gaber', - participantsList: [ - { - displayName: 'Ahmed Gaber', - }, - ], - }, - { - text: 'Banana', - participantsList: [ - { - displayName: 'Banana', - }, - ], - }, - ], - recentReports: [ - { - text: 'Ahmed Gaber (Report)', - }, - { - text: 'Something else', - }, - { - // This starts with Ah as well, but should not match - text: 'Ahntony', - }, - ], - } as Options; - const {result} = renderHook(() => useFastSearchFromOptions(options)); - const search = result.current; - - const {personalDetails, recentReports} = search('Ah Ga'); - - expect(personalDetails).toEqual([expect.objectContaining({text: 'Ahmed Gaber'})]); - expect(recentReports).toEqual([{text: 'Ahmed Gaber (Report)'}]); - }); -});