194 lines
6.3 KiB
JavaScript
194 lines
6.3 KiB
JavaScript
const { SEARCH_INTENT_CONFIG } = require('./searchIntentConfig');
|
|
|
|
const normalizeSearchText = (value) => {
|
|
return String(value || '')
|
|
.toLowerCase()
|
|
.normalize('NFD')
|
|
.replace(/[\u0300-\u036f]/g, '')
|
|
.trim()
|
|
.replace(/[^a-z0-9\s_-]+/g, ' ')
|
|
.replace(/[_-]+/g, ' ')
|
|
.replace(/\s+/g, ' ');
|
|
};
|
|
|
|
const tokenize = (normalizedValue) => normalizedValue.split(' ').filter(Boolean);
|
|
|
|
const normalizeArray = (values) => {
|
|
return [...new Set((values || []).map((value) => normalizeSearchText(value)).filter(Boolean))];
|
|
};
|
|
|
|
const tokenSetFromQuery = (normalizedQuery) => {
|
|
const noise = new Set(SEARCH_INTENT_CONFIG.noiseTokens.map((token) => normalizeSearchText(token)));
|
|
return new Set(tokenize(normalizedQuery).filter((token) => !noise.has(token)));
|
|
};
|
|
|
|
const includesPhrase = (normalizedQuery, normalizedAlias, queryTokens) => {
|
|
if (!normalizedAlias) return false;
|
|
if (normalizedQuery.includes(normalizedAlias)) return true;
|
|
|
|
const aliasTokens = tokenize(normalizedAlias);
|
|
if (aliasTokens.length <= 1) return queryTokens.has(normalizedAlias);
|
|
return aliasTokens.every((token) => queryTokens.has(token));
|
|
};
|
|
|
|
const detectQueryIntents = (normalizedQuery) => {
|
|
const queryTokens = tokenSetFromQuery(normalizedQuery);
|
|
return Object.entries(SEARCH_INTENT_CONFIG.intents)
|
|
.filter(([, value]) =>
|
|
(value.aliases || []).some((alias) => includesPhrase(normalizedQuery, normalizeSearchText(alias), queryTokens)))
|
|
.map(([intentId]) => intentId);
|
|
};
|
|
|
|
const getLevenshteinDistance = (left, right) => {
|
|
const rows = left.length + 1;
|
|
const cols = right.length + 1;
|
|
const matrix = Array.from({ length: rows }, (_, rowIndex) => [rowIndex]);
|
|
|
|
for (let col = 0; col < cols; col += 1) {
|
|
matrix[0][col] = col;
|
|
}
|
|
|
|
for (let row = 1; row < rows; row += 1) {
|
|
for (let col = 1; col < cols; col += 1) {
|
|
const cost = left[row - 1] === right[col - 1] ? 0 : 1;
|
|
matrix[row][col] = Math.min(
|
|
matrix[row - 1][col] + 1,
|
|
matrix[row][col - 1] + 1,
|
|
matrix[row - 1][col - 1] + cost,
|
|
);
|
|
}
|
|
}
|
|
|
|
return matrix[left.length][right.length];
|
|
};
|
|
|
|
const fuzzyBonus = (normalizedQuery, candidates) => {
|
|
if (normalizedQuery.length < 3 || normalizedQuery.length > 32) return 0;
|
|
|
|
let best = Number.POSITIVE_INFINITY;
|
|
(candidates || []).forEach((candidate) => {
|
|
if (!candidate) return;
|
|
tokenize(candidate).forEach((token) => {
|
|
best = Math.min(best, getLevenshteinDistance(normalizedQuery, token));
|
|
});
|
|
best = Math.min(best, getLevenshteinDistance(normalizedQuery, candidate));
|
|
});
|
|
|
|
if (best === 1) return 14;
|
|
if (best === 2) return 8;
|
|
return 0;
|
|
};
|
|
|
|
const scoreTextMatch = (normalizedQuery, normalizedTarget, exact, prefix, contains) => {
|
|
if (!normalizedQuery || !normalizedTarget) return 0;
|
|
if (normalizedTarget === normalizedQuery) return exact;
|
|
if (normalizedTarget.startsWith(normalizedQuery)) return prefix;
|
|
if (normalizedTarget.includes(normalizedQuery)) return contains;
|
|
return 0;
|
|
};
|
|
|
|
const buildDerivedIntentSignals = (entry) => {
|
|
const normalizedDescription = normalizeSearchText(entry.description || '');
|
|
const normalizedLight = normalizeSearchText(entry.careInfo && entry.careInfo.light ? entry.careInfo.light : '');
|
|
const derivedSignals = new Set((entry.categories || []).map((category) => normalizeSearchText(category)));
|
|
|
|
Object.entries(SEARCH_INTENT_CONFIG.intents).forEach(([intentId, intentConfig]) => {
|
|
const entryHints = normalizeArray(intentConfig.entryHints || []);
|
|
if (entryHints.some((hint) => normalizedDescription.includes(hint))) {
|
|
derivedSignals.add(intentId);
|
|
}
|
|
|
|
const lightHints = normalizeArray(intentConfig.lightHints || []);
|
|
if (lightHints.some((hint) => normalizedLight.includes(hint))) {
|
|
derivedSignals.add(intentId);
|
|
}
|
|
});
|
|
|
|
return [...derivedSignals];
|
|
};
|
|
|
|
const scoreHybridEntry = (entry, query) => {
|
|
const normalizedQuery = normalizeSearchText(query);
|
|
if (!normalizedQuery) return 0;
|
|
|
|
const normalizedName = normalizeSearchText(entry.name || '');
|
|
const normalizedBotanical = normalizeSearchText(entry.botanicalName || '');
|
|
const normalizedDescription = normalizeSearchText(entry.description || '');
|
|
const normalizedCategories = (entry.categories || []).map((category) => normalizeSearchText(category));
|
|
const derivedSignals = buildDerivedIntentSignals(entry);
|
|
const requestedIntents = detectQueryIntents(normalizedQuery);
|
|
|
|
let score = 0;
|
|
score += Math.max(
|
|
scoreTextMatch(normalizedQuery, normalizedName, 140, 100, 64),
|
|
scoreTextMatch(normalizedQuery, normalizedBotanical, 130, 96, 58),
|
|
);
|
|
|
|
if (normalizedDescription.includes(normalizedQuery)) {
|
|
score += 24;
|
|
}
|
|
|
|
score += fuzzyBonus(normalizedQuery, [normalizedName, normalizedBotanical, ...normalizedCategories]);
|
|
|
|
let matchedIntentCount = 0;
|
|
requestedIntents.forEach((intentId) => {
|
|
const categoryHit = normalizedCategories.includes(intentId);
|
|
const derivedHit = derivedSignals.includes(intentId);
|
|
if (categoryHit) {
|
|
score += 92;
|
|
matchedIntentCount += 1;
|
|
return;
|
|
}
|
|
if (derivedHit) {
|
|
score += 56;
|
|
matchedIntentCount += 1;
|
|
}
|
|
});
|
|
|
|
if (matchedIntentCount >= 2) {
|
|
score += 38 * matchedIntentCount;
|
|
} else if (matchedIntentCount === 1) {
|
|
score += 10;
|
|
}
|
|
|
|
const queryTokens = [...tokenSetFromQuery(normalizedQuery)];
|
|
if (queryTokens.length > 1) {
|
|
const searchableText = [
|
|
normalizedName,
|
|
normalizedBotanical,
|
|
normalizedDescription,
|
|
...normalizedCategories,
|
|
...derivedSignals,
|
|
].join(' ');
|
|
const tokenHits = queryTokens.filter((token) => searchableText.includes(token)).length;
|
|
score += tokenHits * 8;
|
|
if (tokenHits === queryTokens.length) {
|
|
score += 16;
|
|
}
|
|
}
|
|
|
|
return score;
|
|
};
|
|
|
|
const rankHybridEntries = (entries, query, limit = 30) => {
|
|
const normalizedQuery = normalizeSearchText(query);
|
|
if (!normalizedQuery) {
|
|
return entries.slice(0, limit).map((entry) => ({ entry, score: 0 }));
|
|
}
|
|
|
|
return entries
|
|
.map((entry) => ({ entry, score: scoreHybridEntry(entry, normalizedQuery) }))
|
|
.filter((candidate) => candidate.score > 0)
|
|
.sort((left, right) =>
|
|
right.score - left.score ||
|
|
left.entry.name.length - right.entry.name.length ||
|
|
left.entry.name.localeCompare(right.entry.name))
|
|
.slice(0, limit);
|
|
};
|
|
|
|
module.exports = {
|
|
normalizeSearchText,
|
|
rankHybridEntries,
|
|
scoreHybridEntry,
|
|
};
|