const crypto = require('crypto'); const { all, get, run } = require('./sqlite'); const DEFAULT_LIMIT = 60; const MAX_LIMIT = 500; const MAX_AUDIT_DETAILS = 80; const WIKIMEDIA_FILEPATH_SEGMENT = 'Special:FilePath/'; const WIKIMEDIA_REDIRECT_BASE = 'https://commons.wikimedia.org/wiki/Special:FilePath/'; class PlantImportValidationError extends Error { constructor(message, details) { super(message); this.name = 'PlantImportValidationError'; this.details = details; } } const normalizeWhitespace = (value) => { return value.trim().replace(/\s+/g, ' '); }; const normalizeKey = (value) => { return normalizeWhitespace(value) .toLowerCase() .normalize('NFD') .replace(/[\u0300-\u036f]/g, ''); }; const unwrapMarkdownLink = (value) => { const markdownMatch = value.match(/^\[[^\]]+]\((https?:\/\/[^)]+)\)(.*)$/i); if (!markdownMatch) return value; const [, url, suffix] = markdownMatch; return `${url}${suffix || ''}`; }; const tryDecode = (value) => { try { return decodeURIComponent(value); } catch { return value; } }; const convertWikimediaFilePathUrl = (value) => { const segmentIndex = value.indexOf(WIKIMEDIA_FILEPATH_SEGMENT); if (segmentIndex < 0) return null; const fileNameStart = segmentIndex + WIKIMEDIA_FILEPATH_SEGMENT.length; const rawFileName = value.slice(fileNameStart).split(/[?#]/)[0].trim(); if (!rawFileName) return null; const decodedFileName = tryDecode(rawFileName).replace(/\s+/g, ' ').trim(); if (!decodedFileName) return null; const encodedFileName = encodeURIComponent(decodedFileName).replace(/%2F/g, '/'); return `${WIKIMEDIA_REDIRECT_BASE}${encodedFileName}`; }; const normalizeImageUri = (rawUri) => { if (typeof rawUri !== 'string') return null; const trimmed = rawUri.trim(); if (!trimmed) return null; const normalized = unwrapMarkdownLink(trimmed); const converted = convertWikimediaFilePathUrl(normalized); const candidate = (converted || normalized).replace(/^http:\/\//i, 'https://'); let parsedUrl; try { parsedUrl = new URL(candidate); } catch { return null; } const protocol = parsedUrl.protocol.toLowerCase(); if (protocol !== 'https:' && protocol !== 'http:') return null; if (!parsedUrl.hostname) return null; parsedUrl.protocol = 'https:'; return parsedUrl.toString(); }; const toArrayOfStrings = (value) => { if (!Array.isArray(value)) return []; const normalized = value .map((item) => (typeof item === 'string' ? normalizeWhitespace(item) : '')) .filter(Boolean); return [...new Set(normalized)]; }; const parseNumber = (value, fallback) => { const parsed = Number(value); if (!Number.isFinite(parsed)) return fallback; return parsed; }; const buildStablePlantId = (botanicalName) => { const hash = crypto .createHash('sha1') .update(normalizeKey(botanicalName)) .digest('hex') .slice(0, 16); return `plant_${hash}`; }; const parseExistingIdMap = (rows) => { const botanicalToId = new Map(); rows.forEach((row) => { if (!row || typeof row.botanicalName !== 'string' || typeof row.id !== 'string') return; botanicalToId.set(normalizeKey(row.botanicalName), row.id); }); return botanicalToId; }; const prepareEntry = (rawEntry, index, existingIdMap, preserveExistingIds) => { const errors = []; const name = typeof rawEntry?.name === 'string' ? normalizeWhitespace(rawEntry.name) : ''; const botanicalName = typeof rawEntry?.botanicalName === 'string' ? normalizeWhitespace(rawEntry.botanicalName) : ''; if (!name) { errors.push({ index, field: 'name', message: 'name is required.' }); } if (!botanicalName) { errors.push({ index, field: 'botanicalName', message: 'botanicalName is required.' }); } const normalizedBotanicalKey = botanicalName ? normalizeKey(botanicalName) : ''; const existingId = preserveExistingIds ? existingIdMap.get(normalizedBotanicalKey) : null; const incomingId = typeof rawEntry?.id === 'string' ? normalizeWhitespace(rawEntry.id) : ''; const id = incomingId || existingId || (botanicalName ? buildStablePlantId(botanicalName) : ''); if (!id) { errors.push({ index, field: 'id', message: 'Could not derive stable plant id.' }); } const imageUri = normalizeImageUri(rawEntry?.imageUri); if (!imageUri) { errors.push({ index, field: 'imageUri', message: 'imageUri is missing or invalid. A valid http(s) URL is required.', value: rawEntry?.imageUri ?? null, }); } const categories = toArrayOfStrings(rawEntry?.categories); const confidence = parseNumber(rawEntry?.confidence, 1); const clampedConfidence = Math.max(0, Math.min(1, Number(confidence.toFixed(4)))); const description = typeof rawEntry?.description === 'string' ? rawEntry.description.trim() : ''; const careInfoRaw = rawEntry?.careInfo || {}; const careInfo = { waterIntervalDays: Math.max(1, Math.round(parseNumber(careInfoRaw.waterIntervalDays, 7))), light: typeof careInfoRaw.light === 'string' && careInfoRaw.light.trim() ? normalizeWhitespace(careInfoRaw.light) : 'Unknown', temp: typeof careInfoRaw.temp === 'string' && careInfoRaw.temp.trim() ? normalizeWhitespace(careInfoRaw.temp) : 'Unknown', }; return { entry: { id, name, botanicalName, imageUri, imageStatus: 'ok', description, categories, careInfo, confidence: clampedConfidence, }, errors, }; }; const collectDuplicateErrors = (entries, getKey, fieldName, message) => { const counts = new Map(); entries.forEach((entry, index) => { const key = getKey(entry); if (!key) return; const existing = counts.get(key) || []; existing.push(index); counts.set(key, existing); }); const duplicateErrors = []; counts.forEach((indices, key) => { if (indices.length <= 1) return; indices.forEach((index) => { duplicateErrors.push({ index, field: fieldName, message, value: key, }); }); }); return duplicateErrors; }; const assertValidPreparedEntries = (entries, enforceUniqueImages) => { const duplicateErrors = []; duplicateErrors.push( ...collectDuplicateErrors( entries, (entry) => entry.id, 'id', 'Duplicate plant id detected in import payload.', ), ); duplicateErrors.push( ...collectDuplicateErrors( entries, (entry) => normalizeKey(entry.botanicalName), 'botanicalName', 'Duplicate botanicalName detected in import payload.', ), ); if (enforceUniqueImages) { duplicateErrors.push( ...collectDuplicateErrors( entries, (entry) => entry.imageUri, 'imageUri', 'Duplicate imageUri detected across multiple plants.', ), ); } if (duplicateErrors.length > 0) { throw new PlantImportValidationError( 'Import payload contains duplicate keys.', duplicateErrors.slice(0, MAX_AUDIT_DETAILS), ); } }; const ensureColumn = async (db, tableName, columnName, definitionSql) => { const columns = await all(db, `PRAGMA table_info(${tableName})`); const hasColumn = columns.some((column) => column.name === columnName); if (hasColumn) return; await run(db, `ALTER TABLE ${tableName} ADD COLUMN ${columnName} ${definitionSql}`); }; const ensurePlantSchema = async (db) => { await run( db, `CREATE TABLE IF NOT EXISTS plants ( id TEXT PRIMARY KEY, name TEXT NOT NULL, botanicalName TEXT NOT NULL, imageUri TEXT NOT NULL, description TEXT, categories TEXT NOT NULL, careInfo TEXT NOT NULL, confidence REAL NOT NULL )`, ); await ensureColumn(db, 'plants', 'imageStatus', `TEXT NOT NULL DEFAULT 'ok'`); await ensureColumn(db, 'plants', 'createdAt', `TEXT`); await ensureColumn(db, 'plants', 'updatedAt', `TEXT`); await run( db, `CREATE TABLE IF NOT EXISTS plant_import_audit ( id INTEGER PRIMARY KEY AUTOINCREMENT, source TEXT NOT NULL, importedCount INTEGER NOT NULL DEFAULT 0, preservedIds INTEGER NOT NULL DEFAULT 0, duplicateImageCount INTEGER NOT NULL DEFAULT 0, status TEXT NOT NULL, details TEXT, backupTable TEXT, startedAt TEXT NOT NULL, completedAt TEXT NOT NULL )`, ); await run( db, `CREATE INDEX IF NOT EXISTS idx_plants_name ON plants(name COLLATE NOCASE)`, ); await run( db, `CREATE INDEX IF NOT EXISTS idx_plants_botanical_name ON plants(botanicalName COLLATE NOCASE)`, ); await run( db, `CREATE INDEX IF NOT EXISTS idx_plant_import_audit_started_at ON plant_import_audit(startedAt DESC)`, ); await run( db, `UPDATE plants SET imageStatus = COALESCE(NULLIF(imageStatus, ''), 'ok')`, ); await run( db, `UPDATE plants SET createdAt = COALESCE(createdAt, datetime('now'))`, ); await run( db, `UPDATE plants SET updatedAt = COALESCE(updatedAt, datetime('now'))`, ); }; const parseJsonArray = (value) => { if (!value) return []; if (Array.isArray(value)) return value; try { const parsed = JSON.parse(value); return Array.isArray(parsed) ? parsed : []; } catch { return []; } }; const parseJsonObject = (value) => { if (!value) return {}; if (typeof value === 'object') return value; try { const parsed = JSON.parse(value); return parsed && typeof parsed === 'object' ? parsed : {}; } catch { return {}; } }; const toApiPlant = (row) => { const categories = parseJsonArray(row.categories); const careInfo = parseJsonObject(row.careInfo); return { id: row.id, name: row.name, botanicalName: row.botanicalName, imageUri: row.imageUri, imageStatus: row.imageStatus || 'ok', description: row.description || '', categories, careInfo, confidence: Number(row.confidence) || 0, }; }; const getPlants = async (db, options = {}) => { const query = typeof options.query === 'string' ? options.query.trim().toLowerCase() : ''; const category = typeof options.category === 'string' ? options.category.trim() : ''; const limitRaw = Number(options.limit); const limit = Number.isFinite(limitRaw) ? Math.max(1, Math.min(MAX_LIMIT, Math.round(limitRaw))) : DEFAULT_LIMIT; let sql = `SELECT id, name, botanicalName, imageUri, imageStatus, description, categories, careInfo, confidence FROM plants`; const params = []; if (query) { sql += ` WHERE ( LOWER(name) LIKE ? OR LOWER(botanicalName) LIKE ? OR LOWER(COALESCE(description, '')) LIKE ? )`; const likePattern = `%${query}%`; params.push(likePattern, likePattern, likePattern); } sql += ' ORDER BY name COLLATE NOCASE ASC'; const rows = await all(db, sql, params); let results = rows.map(toApiPlant); if (category) { results = results.filter((plant) => plant.categories.includes(category)); } return results.slice(0, limit); }; const getPlantDiagnostics = async (db) => { const totals = await get( db, `SELECT COUNT(*) AS totalCount, SUM(CASE WHEN imageUri IS NULL OR TRIM(imageUri) = '' THEN 1 ELSE 0 END) AS missingImageCount, SUM(CASE WHEN COALESCE(imageStatus, 'ok') <> 'ok' THEN 1 ELSE 0 END) AS nonOkImageStatusCount FROM plants`, ); const duplicateImages = await all( db, `SELECT imageUri, COUNT(*) AS count FROM plants WHERE imageUri IS NOT NULL AND TRIM(imageUri) <> '' GROUP BY imageUri HAVING COUNT(*) > 1 ORDER BY count DESC, imageUri ASC LIMIT 200`, ); const duplicateBotanicalNames = await all( db, `SELECT botanicalName, COUNT(*) AS count FROM plants WHERE botanicalName IS NOT NULL AND TRIM(botanicalName) <> '' GROUP BY LOWER(botanicalName) HAVING COUNT(*) > 1 ORDER BY count DESC, botanicalName ASC LIMIT 200`, ); const recentAudits = await all( db, `SELECT id, source, importedCount, preservedIds, duplicateImageCount, status, details, backupTable, startedAt, completedAt FROM plant_import_audit ORDER BY startedAt DESC LIMIT 20`, ); return { totalCount: Number(totals?.totalCount || 0), missingImageCount: Number(totals?.missingImageCount || 0), nonOkImageStatusCount: Number(totals?.nonOkImageStatusCount || 0), duplicateImageCount: duplicateImages.length, duplicateImages, duplicateBotanicalNameCount: duplicateBotanicalNames.length, duplicateBotanicalNames, recentAudits: recentAudits.map((audit) => ({ ...audit, details: audit.details ? parseJsonObject(audit.details) : null, })), }; }; const writeAuditRow = async (db, audit) => { await run( db, `INSERT INTO plant_import_audit ( source, importedCount, preservedIds, duplicateImageCount, status, details, backupTable, startedAt, completedAt ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ audit.source, audit.importedCount, audit.preservedIds, audit.duplicateImageCount, audit.status, JSON.stringify(audit.details || {}), audit.backupTable || null, audit.startedAt, audit.completedAt, ], ); }; const sanitizeIdentifier = (value) => { if (!/^[A-Za-z_][A-Za-z0-9_]*$/.test(value)) { throw new Error(`Invalid SQL identifier: ${value}`); } return value; }; const rebuildPlantsCatalog = async (db, rawEntries, options = {}) => { if (!Array.isArray(rawEntries)) { throw new PlantImportValidationError('Import payload must be an array of entries.', [ { field: 'entries', message: 'Expected an array of plant objects.' }, ]); } const source = typeof options.source === 'string' && options.source.trim() ? options.source.trim() : 'manual'; const preserveExistingIds = options.preserveExistingIds !== false; const enforceUniqueImages = options.enforceUniqueImages !== false; const startedAtIso = new Date().toISOString(); const existingRows = await all(db, 'SELECT id, botanicalName FROM plants'); const existingIdMap = parseExistingIdMap(existingRows); const validationErrors = []; const preparedEntries = rawEntries.map((rawEntry, index) => { const prepared = prepareEntry(rawEntry, index, existingIdMap, preserveExistingIds); if (prepared.errors.length > 0) { validationErrors.push(...prepared.errors); } return prepared.entry; }); if (validationErrors.length > 0) { throw new PlantImportValidationError( 'Import payload failed validation checks.', validationErrors.slice(0, MAX_AUDIT_DETAILS), ); } assertValidPreparedEntries(preparedEntries, enforceUniqueImages); const preservedIds = preparedEntries.reduce((count, entry) => { if (existingIdMap.get(normalizeKey(entry.botanicalName)) === entry.id) return count + 1; return count; }, 0); const timestamp = startedAtIso.replace(/[-:.TZ]/g, '').slice(0, 14); const backupTable = sanitizeIdentifier(`plants_backup_${timestamp}`); const details = { enforceUniqueImages, preserveExistingIds, inputCount: rawEntries.length, preparedCount: preparedEntries.length, }; try { await run(db, 'BEGIN IMMEDIATE TRANSACTION'); await run(db, `DROP TABLE IF EXISTS ${backupTable}`); await run(db, `CREATE TABLE ${backupTable} AS SELECT * FROM plants`); await run(db, 'DELETE FROM plants'); for (const entry of preparedEntries) { await run( db, `INSERT INTO plants ( id, name, botanicalName, imageUri, imageStatus, description, categories, careInfo, confidence, createdAt, updatedAt ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, [ entry.id, entry.name, entry.botanicalName, entry.imageUri, 'ok', entry.description, JSON.stringify(entry.categories), JSON.stringify(entry.careInfo), entry.confidence, startedAtIso, startedAtIso, ], ); } await run( db, 'CREATE UNIQUE INDEX IF NOT EXISTS idx_plants_botanical_name_unique ON plants(botanicalName)', ); if (enforceUniqueImages) { await run( db, 'CREATE UNIQUE INDEX IF NOT EXISTS idx_plants_image_uri_unique ON plants(imageUri)', ); } else { await run(db, 'DROP INDEX IF EXISTS idx_plants_image_uri_unique'); } await run(db, 'COMMIT'); } catch (error) { await run(db, 'ROLLBACK'); const completedAtIso = new Date().toISOString(); await writeAuditRow(db, { source, importedCount: 0, preservedIds: 0, duplicateImageCount: 0, status: 'failed', details: { ...details, error: error instanceof Error ? error.message : String(error), }, backupTable: null, startedAt: startedAtIso, completedAt: completedAtIso, }); throw error; } const duplicateImages = await all( db, `SELECT imageUri, COUNT(*) AS count FROM plants GROUP BY imageUri HAVING COUNT(*) > 1`, ); const completedAtIso = new Date().toISOString(); await writeAuditRow(db, { source, importedCount: preparedEntries.length, preservedIds, duplicateImageCount: duplicateImages.length, status: 'success', details, backupTable, startedAt: startedAtIso, completedAt: completedAtIso, }); return { source, importedCount: preparedEntries.length, preservedIds, duplicateImageCount: duplicateImages.length, backupTable, startedAt: startedAtIso, completedAt: completedAtIso, }; }; module.exports = { PlantImportValidationError, ensurePlantSchema, getPlantDiagnostics, getPlants, normalizeImageUri, rebuildPlantsCatalog, };