214 lines
7.2 KiB
TypeScript
214 lines
7.2 KiB
TypeScript
import { CareInfo } from '../types';
|
|
|
|
const { SEARCH_INTENT_CONFIG } = require('../constants/searchIntentConfig');
|
|
|
|
type SearchIntentConfig = {
|
|
aliases?: string[];
|
|
entryHints?: string[];
|
|
lightHints?: string[];
|
|
};
|
|
|
|
export interface HybridSearchEntryLike {
|
|
name: string;
|
|
botanicalName?: string;
|
|
description?: string;
|
|
categories?: string[];
|
|
careInfo?: Partial<CareInfo> | null;
|
|
}
|
|
|
|
interface RankedEntry<T> {
|
|
entry: T;
|
|
score: number;
|
|
}
|
|
|
|
const normalizeArray = (values: string[]): string[] => {
|
|
return [...new Set(values.map((value) => normalizeSearchText(value)).filter(Boolean))];
|
|
};
|
|
|
|
export const normalizeSearchText = (value: string): string => {
|
|
return value
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.trim()
|
|
.replace(/[^a-z0-9\s_-]+/g, ' ')
|
|
.replace(/[_-]+/g, ' ')
|
|
.replace(/\s+/g, ' ');
|
|
};
|
|
|
|
const tokenize = (normalizedValue: string): string[] => {
|
|
return normalizedValue.split(' ').filter(Boolean);
|
|
};
|
|
|
|
const tokenSetFromQuery = (normalizedQuery: string): Set<string> => {
|
|
const rawTokens = tokenize(normalizedQuery);
|
|
const noise = new Set<string>(SEARCH_INTENT_CONFIG.noiseTokens.map((token: string) => normalizeSearchText(token)));
|
|
return new Set(rawTokens.filter((token) => !noise.has(token)));
|
|
};
|
|
|
|
const includesPhrase = (normalizedQuery: string, normalizedAlias: string, queryTokens: Set<string>): boolean => {
|
|
if (!normalizedAlias) return false;
|
|
if (normalizedQuery.includes(normalizedAlias)) return true;
|
|
|
|
const aliasTokens = tokenize(normalizedAlias);
|
|
if (aliasTokens.length <= 1) return queryTokens.has(normalizedAlias);
|
|
return aliasTokens.every((token) => queryTokens.has(token));
|
|
};
|
|
|
|
const detectQueryIntents = (normalizedQuery: string): string[] => {
|
|
const queryTokens = tokenSetFromQuery(normalizedQuery);
|
|
const intents = (Object.entries(SEARCH_INTENT_CONFIG.intents) as Array<[string, SearchIntentConfig]>)
|
|
.filter(([, value]) =>
|
|
(value.aliases || []).some((alias) => includesPhrase(normalizedQuery, normalizeSearchText(alias), queryTokens)))
|
|
.map(([intentId]) => intentId);
|
|
return intents;
|
|
};
|
|
|
|
const getLevenshteinDistance = (left: string, right: string): number => {
|
|
const rows = left.length + 1;
|
|
const cols = right.length + 1;
|
|
const matrix: number[][] = Array.from({ length: rows }, (_, rowIndex) => [rowIndex]);
|
|
|
|
for (let col = 0; col < cols; col += 1) {
|
|
matrix[0][col] = col;
|
|
}
|
|
|
|
for (let row = 1; row < rows; row += 1) {
|
|
for (let col = 1; col < cols; col += 1) {
|
|
const cost = left[row - 1] === right[col - 1] ? 0 : 1;
|
|
matrix[row][col] = Math.min(
|
|
matrix[row - 1][col] + 1,
|
|
matrix[row][col - 1] + 1,
|
|
matrix[row - 1][col - 1] + cost,
|
|
);
|
|
}
|
|
}
|
|
|
|
return matrix[left.length][right.length];
|
|
};
|
|
|
|
const fuzzyBonus = (normalizedQuery: string, candidates: string[]): number => {
|
|
if (normalizedQuery.length < 3 || normalizedQuery.length > 32) return 0;
|
|
|
|
let best = Number.POSITIVE_INFINITY;
|
|
candidates.forEach((candidate) => {
|
|
if (!candidate) return;
|
|
tokenize(candidate).forEach((token) => {
|
|
best = Math.min(best, getLevenshteinDistance(normalizedQuery, token));
|
|
});
|
|
best = Math.min(best, getLevenshteinDistance(normalizedQuery, candidate));
|
|
});
|
|
|
|
if (best === 1) return 14;
|
|
if (best === 2) return 8;
|
|
return 0;
|
|
};
|
|
|
|
const scoreTextMatch = (normalizedQuery: string, normalizedTarget: string, exact: number, prefix: number, contains: number): number => {
|
|
if (!normalizedQuery || !normalizedTarget) return 0;
|
|
if (normalizedTarget === normalizedQuery) return exact;
|
|
if (normalizedTarget.startsWith(normalizedQuery)) return prefix;
|
|
if (normalizedTarget.includes(normalizedQuery)) return contains;
|
|
return 0;
|
|
};
|
|
|
|
const buildDerivedIntentSignals = (entry: HybridSearchEntryLike): string[] => {
|
|
const normalizedDescription = normalizeSearchText(entry.description || '');
|
|
const normalizedLight = normalizeSearchText(entry.careInfo?.light || '');
|
|
|
|
const derivedSignals = new Set<string>();
|
|
const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category));
|
|
normalizedCategories.forEach((category) => derivedSignals.add(category));
|
|
|
|
(Object.entries(SEARCH_INTENT_CONFIG.intents) as Array<[string, SearchIntentConfig]>).forEach(([intentId, intentConfig]) => {
|
|
const entryHints = normalizeArray(intentConfig.entryHints || []);
|
|
if (entryHints.some((hint) => normalizedDescription.includes(hint))) {
|
|
derivedSignals.add(intentId);
|
|
}
|
|
|
|
const lightHints = normalizeArray(intentConfig.lightHints || []);
|
|
if (lightHints.some((hint) => normalizedLight.includes(hint))) {
|
|
derivedSignals.add(intentId);
|
|
}
|
|
});
|
|
|
|
return [...derivedSignals];
|
|
};
|
|
|
|
export const scoreHybridEntry = (entry: HybridSearchEntryLike, query: string): number => {
|
|
const normalizedQuery = normalizeSearchText(query);
|
|
if (!normalizedQuery) return 0;
|
|
|
|
const normalizedName = normalizeSearchText(entry.name || '');
|
|
const normalizedBotanical = normalizeSearchText(entry.botanicalName || '');
|
|
const normalizedDescription = normalizeSearchText(entry.description || '');
|
|
const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category));
|
|
const derivedSignals = buildDerivedIntentSignals(entry);
|
|
const requestedIntents = detectQueryIntents(normalizedQuery);
|
|
|
|
let score = 0;
|
|
score += Math.max(
|
|
scoreTextMatch(normalizedQuery, normalizedName, 140, 100, 64),
|
|
scoreTextMatch(normalizedQuery, normalizedBotanical, 130, 96, 58),
|
|
);
|
|
|
|
if (normalizedDescription.includes(normalizedQuery)) {
|
|
score += 24;
|
|
}
|
|
|
|
score += fuzzyBonus(normalizedQuery, [normalizedName, normalizedBotanical, ...normalizedCategories]);
|
|
|
|
let matchedIntentCount = 0;
|
|
requestedIntents.forEach((intentId) => {
|
|
const categoryHit = normalizedCategories.includes(intentId);
|
|
const derivedHit = derivedSignals.includes(intentId);
|
|
if (categoryHit) {
|
|
score += 92;
|
|
matchedIntentCount += 1;
|
|
return;
|
|
}
|
|
if (derivedHit) {
|
|
score += 56;
|
|
matchedIntentCount += 1;
|
|
}
|
|
});
|
|
|
|
if (matchedIntentCount >= 2) {
|
|
score += 38 * matchedIntentCount;
|
|
} else if (matchedIntentCount === 1) {
|
|
score += 10;
|
|
}
|
|
|
|
const queryTokens = [...tokenSetFromQuery(normalizedQuery)];
|
|
if (queryTokens.length > 1) {
|
|
const searchableText = [normalizedName, normalizedBotanical, normalizedDescription, ...normalizedCategories, ...derivedSignals].join(' ');
|
|
const tokenHits = queryTokens.filter((token) => searchableText.includes(token)).length;
|
|
score += tokenHits * 8;
|
|
if (tokenHits === queryTokens.length && queryTokens.length > 0) {
|
|
score += 16;
|
|
}
|
|
}
|
|
|
|
return score;
|
|
};
|
|
|
|
export const rankHybridEntries = <T extends HybridSearchEntryLike>(
|
|
entries: T[],
|
|
query: string,
|
|
limit = 30,
|
|
): RankedEntry<T>[] => {
|
|
const normalizedQuery = normalizeSearchText(query);
|
|
if (!normalizedQuery) {
|
|
return entries.slice(0, limit).map((entry) => ({ entry, score: 0 }));
|
|
}
|
|
|
|
return entries
|
|
.map((entry) => ({ entry, score: scoreHybridEntry(entry, normalizedQuery) }))
|
|
.filter((candidate) => candidate.score > 0)
|
|
.sort((left, right) =>
|
|
right.score - left.score ||
|
|
left.entry.name.length - right.entry.name.length ||
|
|
left.entry.name.localeCompare(right.entry.name))
|
|
.slice(0, limit);
|
|
};
|