import { CareInfo } from '../types'; const { SEARCH_INTENT_CONFIG } = require('../constants/searchIntentConfig'); type SearchIntentConfig = { aliases?: string[]; entryHints?: string[]; lightHints?: string[]; }; export interface HybridSearchEntryLike { name: string; botanicalName?: string; description?: string; categories?: string[]; careInfo?: Partial | null; } interface RankedEntry { entry: T; score: number; } const normalizeArray = (values: string[]): string[] => { return [...new Set(values.map((value) => normalizeSearchText(value)).filter(Boolean))]; }; export const normalizeSearchText = (value: string): string => { return value .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .trim() .replace(/[^a-z0-9\s_-]+/g, ' ') .replace(/[_-]+/g, ' ') .replace(/\s+/g, ' '); }; const tokenize = (normalizedValue: string): string[] => { return normalizedValue.split(' ').filter(Boolean); }; const tokenSetFromQuery = (normalizedQuery: string): Set => { const rawTokens = tokenize(normalizedQuery); const noise = new Set(SEARCH_INTENT_CONFIG.noiseTokens.map((token: string) => normalizeSearchText(token))); return new Set(rawTokens.filter((token) => !noise.has(token))); }; const includesPhrase = (normalizedQuery: string, normalizedAlias: string, queryTokens: Set): boolean => { if (!normalizedAlias) return false; if (normalizedQuery.includes(normalizedAlias)) return true; const aliasTokens = tokenize(normalizedAlias); if (aliasTokens.length <= 1) return queryTokens.has(normalizedAlias); return aliasTokens.every((token) => queryTokens.has(token)); }; const detectQueryIntents = (normalizedQuery: string): string[] => { const queryTokens = tokenSetFromQuery(normalizedQuery); const intents = (Object.entries(SEARCH_INTENT_CONFIG.intents) as Array<[string, SearchIntentConfig]>) .filter(([, value]) => (value.aliases || []).some((alias) => includesPhrase(normalizedQuery, normalizeSearchText(alias), queryTokens))) .map(([intentId]) => intentId); return intents; }; const getLevenshteinDistance = (left: string, right: string): number => { const rows = left.length + 1; const cols = right.length + 1; const matrix: number[][] = Array.from({ length: rows }, (_, rowIndex) => [rowIndex]); for (let col = 0; col < cols; col += 1) { matrix[0][col] = col; } for (let row = 1; row < rows; row += 1) { for (let col = 1; col < cols; col += 1) { const cost = left[row - 1] === right[col - 1] ? 0 : 1; matrix[row][col] = Math.min( matrix[row - 1][col] + 1, matrix[row][col - 1] + 1, matrix[row - 1][col - 1] + cost, ); } } return matrix[left.length][right.length]; }; const fuzzyBonus = (normalizedQuery: string, candidates: string[]): number => { if (normalizedQuery.length < 3 || normalizedQuery.length > 32) return 0; let best = Number.POSITIVE_INFINITY; candidates.forEach((candidate) => { if (!candidate) return; tokenize(candidate).forEach((token) => { best = Math.min(best, getLevenshteinDistance(normalizedQuery, token)); }); best = Math.min(best, getLevenshteinDistance(normalizedQuery, candidate)); }); if (best === 1) return 14; if (best === 2) return 8; return 0; }; const scoreTextMatch = (normalizedQuery: string, normalizedTarget: string, exact: number, prefix: number, contains: number): number => { if (!normalizedQuery || !normalizedTarget) return 0; if (normalizedTarget === normalizedQuery) return exact; if (normalizedTarget.startsWith(normalizedQuery)) return prefix; if (normalizedTarget.includes(normalizedQuery)) return contains; return 0; }; const buildDerivedIntentSignals = (entry: HybridSearchEntryLike): string[] => { const normalizedDescription = normalizeSearchText(entry.description || ''); const normalizedLight = normalizeSearchText(entry.careInfo?.light || ''); const derivedSignals = new Set(); const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category)); normalizedCategories.forEach((category) => derivedSignals.add(category)); (Object.entries(SEARCH_INTENT_CONFIG.intents) as Array<[string, SearchIntentConfig]>).forEach(([intentId, intentConfig]) => { const entryHints = normalizeArray(intentConfig.entryHints || []); if (entryHints.some((hint) => normalizedDescription.includes(hint))) { derivedSignals.add(intentId); } const lightHints = normalizeArray(intentConfig.lightHints || []); if (lightHints.some((hint) => normalizedLight.includes(hint))) { derivedSignals.add(intentId); } }); return [...derivedSignals]; }; export const scoreHybridEntry = (entry: HybridSearchEntryLike, query: string): number => { const normalizedQuery = normalizeSearchText(query); if (!normalizedQuery) return 0; const normalizedName = normalizeSearchText(entry.name || ''); const normalizedBotanical = normalizeSearchText(entry.botanicalName || ''); const normalizedDescription = normalizeSearchText(entry.description || ''); const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category)); const derivedSignals = buildDerivedIntentSignals(entry); const requestedIntents = detectQueryIntents(normalizedQuery); let score = 0; score += Math.max( scoreTextMatch(normalizedQuery, normalizedName, 140, 100, 64), scoreTextMatch(normalizedQuery, normalizedBotanical, 130, 96, 58), ); if (normalizedDescription.includes(normalizedQuery)) { score += 24; } score += fuzzyBonus(normalizedQuery, [normalizedName, normalizedBotanical, ...normalizedCategories]); let matchedIntentCount = 0; requestedIntents.forEach((intentId) => { const categoryHit = normalizedCategories.includes(intentId); const derivedHit = derivedSignals.includes(intentId); if (categoryHit) { score += 92; matchedIntentCount += 1; return; } if (derivedHit) { score += 56; matchedIntentCount += 1; } }); if (matchedIntentCount >= 2) { score += 38 * matchedIntentCount; } else if (matchedIntentCount === 1) { score += 10; } const queryTokens = [...tokenSetFromQuery(normalizedQuery)]; if (queryTokens.length > 1) { const searchableText = [normalizedName, normalizedBotanical, normalizedDescription, ...normalizedCategories, ...derivedSignals].join(' '); const tokenHits = queryTokens.filter((token) => searchableText.includes(token)).length; score += tokenHits * 8; if (tokenHits === queryTokens.length && queryTokens.length > 0) { score += 16; } } return score; }; export const rankHybridEntries = ( entries: T[], query: string, limit = 30, ): RankedEntry[] => { const normalizedQuery = normalizeSearchText(query); if (!normalizedQuery) { return entries.slice(0, limit).map((entry) => ({ entry, score: 0 })); } return entries .map((entry) => ({ entry, score: scoreHybridEntry(entry, normalizedQuery) })) .filter((candidate) => candidate.score > 0) .sort((left, right) => right.score - left.score || left.entry.name.length - right.entry.name.length || left.entry.name.localeCompare(right.entry.name)) .slice(0, limit); };