const { SEARCH_INTENT_CONFIG } = require('./searchIntentConfig'); const normalizeSearchText = (value) => { return String(value || '') .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .trim() .replace(/[^a-z0-9\s_-]+/g, ' ') .replace(/[_-]+/g, ' ') .replace(/\s+/g, ' '); }; const tokenize = (normalizedValue) => normalizedValue.split(' ').filter(Boolean); const normalizeArray = (values) => { return [...new Set((values || []).map((value) => normalizeSearchText(value)).filter(Boolean))]; }; const tokenSetFromQuery = (normalizedQuery) => { const noise = new Set(SEARCH_INTENT_CONFIG.noiseTokens.map((token) => normalizeSearchText(token))); return new Set(tokenize(normalizedQuery).filter((token) => !noise.has(token))); }; const includesPhrase = (normalizedQuery, normalizedAlias, queryTokens) => { if (!normalizedAlias) return false; if (normalizedQuery.includes(normalizedAlias)) return true; const aliasTokens = tokenize(normalizedAlias); if (aliasTokens.length <= 1) return queryTokens.has(normalizedAlias); return aliasTokens.every((token) => queryTokens.has(token)); }; const detectQueryIntents = (normalizedQuery) => { const queryTokens = tokenSetFromQuery(normalizedQuery); return Object.entries(SEARCH_INTENT_CONFIG.intents) .filter(([, value]) => (value.aliases || []).some((alias) => includesPhrase(normalizedQuery, normalizeSearchText(alias), queryTokens))) .map(([intentId]) => intentId); }; const getLevenshteinDistance = (left, right) => { const rows = left.length + 1; const cols = right.length + 1; const matrix = Array.from({ length: rows }, (_, rowIndex) => [rowIndex]); for (let col = 0; col < cols; col += 1) { matrix[0][col] = col; } for (let row = 1; row < rows; row += 1) { for (let col = 1; col < cols; col += 1) { const cost = left[row - 1] === right[col - 1] ? 0 : 1; matrix[row][col] = Math.min( matrix[row - 1][col] + 1, matrix[row][col - 1] + 1, matrix[row - 1][col - 1] + cost, ); } } return matrix[left.length][right.length]; }; const fuzzyBonus = (normalizedQuery, candidates) => { if (normalizedQuery.length < 3 || normalizedQuery.length > 32) return 0; let best = Number.POSITIVE_INFINITY; (candidates || []).forEach((candidate) => { if (!candidate) return; tokenize(candidate).forEach((token) => { best = Math.min(best, getLevenshteinDistance(normalizedQuery, token)); }); best = Math.min(best, getLevenshteinDistance(normalizedQuery, candidate)); }); if (best === 1) return 14; if (best === 2) return 8; return 0; }; const scoreTextMatch = (normalizedQuery, normalizedTarget, exact, prefix, contains) => { if (!normalizedQuery || !normalizedTarget) return 0; if (normalizedTarget === normalizedQuery) return exact; if (normalizedTarget.startsWith(normalizedQuery)) return prefix; if (normalizedTarget.includes(normalizedQuery)) return contains; return 0; }; const buildDerivedIntentSignals = (entry) => { const normalizedDescription = normalizeSearchText(entry.description || ''); const normalizedLight = normalizeSearchText(entry.careInfo && entry.careInfo.light ? entry.careInfo.light : ''); const derivedSignals = new Set((entry.categories || []).map((category) => normalizeSearchText(category))); Object.entries(SEARCH_INTENT_CONFIG.intents).forEach(([intentId, intentConfig]) => { const entryHints = normalizeArray(intentConfig.entryHints || []); if (entryHints.some((hint) => normalizedDescription.includes(hint))) { derivedSignals.add(intentId); } const lightHints = normalizeArray(intentConfig.lightHints || []); if (lightHints.some((hint) => normalizedLight.includes(hint))) { derivedSignals.add(intentId); } }); return [...derivedSignals]; }; const scoreHybridEntry = (entry, query) => { const normalizedQuery = normalizeSearchText(query); if (!normalizedQuery) return 0; const normalizedName = normalizeSearchText(entry.name || ''); const normalizedBotanical = normalizeSearchText(entry.botanicalName || ''); const normalizedDescription = normalizeSearchText(entry.description || ''); const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category)); const derivedSignals = buildDerivedIntentSignals(entry); const requestedIntents = detectQueryIntents(normalizedQuery); let score = 0; score += Math.max( scoreTextMatch(normalizedQuery, normalizedName, 140, 100, 64), scoreTextMatch(normalizedQuery, normalizedBotanical, 130, 96, 58), ); if (normalizedDescription.includes(normalizedQuery)) { score += 24; } score += fuzzyBonus(normalizedQuery, [normalizedName, normalizedBotanical, ...normalizedCategories]); let matchedIntentCount = 0; requestedIntents.forEach((intentId) => { const categoryHit = normalizedCategories.includes(intentId); const derivedHit = derivedSignals.includes(intentId); if (categoryHit) { score += 92; matchedIntentCount += 1; return; } if (derivedHit) { score += 56; matchedIntentCount += 1; } }); if (matchedIntentCount >= 2) { score += 38 * matchedIntentCount; } else if (matchedIntentCount === 1) { score += 10; } const queryTokens = [...tokenSetFromQuery(normalizedQuery)]; if (queryTokens.length > 1) { const searchableText = [ normalizedName, normalizedBotanical, normalizedDescription, ...normalizedCategories, ...derivedSignals, ].join(' '); const tokenHits = queryTokens.filter((token) => searchableText.includes(token)).length; score += tokenHits * 8; if (tokenHits === queryTokens.length) { score += 16; } } return score; }; const rankHybridEntries = (entries, query, limit = 30) => { const normalizedQuery = normalizeSearchText(query); if (!normalizedQuery) { return entries.slice(0, limit).map((entry) => ({ entry, score: 0 })); } return entries .map((entry) => ({ entry, score: scoreHybridEntry(entry, normalizedQuery) })) .filter((candidate) => candidate.score > 0) .sort((left, right) => right.score - left.score || left.entry.name.length - right.entry.name.length || left.entry.name.localeCompare(right.entry.name)) .slice(0, limit); }; module.exports = { normalizeSearchText, rankHybridEntries, scoreHybridEntry, };