#!/usr/bin/env node /* eslint-disable no-console */ const fs = require('fs'); const path = require('path'); const crypto = require('crypto'); require('dotenv').config(); const sharp = require('sharp'); const { openDatabase, closeDatabase, all, run } = require('../lib/sqlite'); const { ensurePlantSchema } = require('../lib/plants'); const OUTPUT_DIR = path.join(__dirname, '..', 'public', 'plants'); const MANIFEST_PATH = path.join(OUTPUT_DIR, 'manifest.json'); const ROOT_DIR = path.join(__dirname, '..', '..'); const PLANTS_DUMP_PATH = path.join(ROOT_DIR, 'plants_dump_utf8.json'); const SEARCH_CACHE_PATH = path.join(OUTPUT_DIR, 'wikimedia-search-cache.json'); const MAX_CONCURRENCY = Number(process.env.PLANT_IMAGE_CONCURRENCY || 1); const REQUEST_TIMEOUT_MS = 20000; const MAX_FETCH_RETRIES = 5; const WIKIMEDIA_SEARCH_PREFIX = 'wikimedia-search:'; const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const slugify = (value) => { const normalized = String(value || '') .normalize('NFD') .replace(/[\u0300-\u036f]/g, '') .toLowerCase() .replace(/[^a-z0-9]+/g, '-') .replace(/^-+|-+$/g, ''); return normalized || 'plant'; }; const buildFileBaseName = (plant) => { const botanicalSlug = slugify(plant.botanicalName); const nameSlug = slugify(plant.name); const suffix = crypto .createHash('sha1') .update(`${plant.id}|${plant.botanicalName}|${plant.name}`) .digest('hex') .slice(0, 8); if (nameSlug && nameSlug !== botanicalSlug) { return `${botanicalSlug}--${nameSlug}--${suffix}`; } return `${botanicalSlug}--${suffix}`; }; const ensureOutputDir = () => { fs.mkdirSync(OUTPUT_DIR, { recursive: true }); }; const loadRefreshMatchers = () => new Set( String(process.env.PLANT_IMAGE_REFRESH || '') .split(',') .map((value) => value.trim().toLowerCase()) .filter(Boolean), ); const loadManifest = () => { try { const raw = fs.readFileSync(MANIFEST_PATH, 'utf8'); return JSON.parse(raw); } catch { return { generatedAt: null, items: [] }; } }; const saveManifest = (manifest) => { fs.writeFileSync(MANIFEST_PATH, JSON.stringify(manifest, null, 2)); }; const loadSearchCache = () => { try { return JSON.parse(fs.readFileSync(SEARCH_CACHE_PATH, 'utf8')); } catch { return {}; } }; const saveSearchCache = (cache) => { fs.writeFileSync(SEARCH_CACHE_PATH, JSON.stringify(cache, null, 2)); }; const shouldRefreshPlantImage = (plant, refreshMatchers) => { if (!refreshMatchers || refreshMatchers.size === 0) return false; return [ plant.id, plant.name, plant.botanicalName, ].some((value) => refreshMatchers.has(String(value || '').trim().toLowerCase())); }; const loadDumpFallbackMap = () => { try { const raw = fs.readFileSync(PLANTS_DUMP_PATH, 'utf8'); const entries = JSON.parse(raw); if (!Array.isArray(entries)) return new Map(); const map = new Map(); for (const entry of entries) { if (!entry || typeof entry.botanicalName !== 'string' || typeof entry.imageUri !== 'string') continue; const key = entry.botanicalName.trim().toLowerCase(); if (!key || !/^https?:\/\//i.test(entry.imageUri)) continue; if (!map.has(key)) map.set(key, entry.imageUri.trim()); } return map; } catch { return new Map(); } }; const getRetryDelayMs = (attempt, retryAfterHeader) => { const retryAfterSeconds = Number(retryAfterHeader); if (Number.isFinite(retryAfterSeconds) && retryAfterSeconds > 0) { return retryAfterSeconds * 1000; } return Math.min(30000, 3000 * 2 ** attempt); }; const tryDecode = (value) => { try { return decodeURIComponent(value); } catch { return value; } }; const decodeRepeatedly = (value, rounds = 3) => { let current = value; for (let index = 0; index < rounds; index += 1) { const decoded = tryDecode(current); if (decoded === current) break; current = decoded; } return current; }; const toWikimediaFilePathUrl = (rawUrl) => { if (typeof rawUrl !== 'string' || !rawUrl.includes('upload.wikimedia.org/wikipedia/commons/')) { return null; } const cleanUrl = rawUrl.split(/[?#]/)[0]; const parts = cleanUrl.split('/').filter(Boolean); if (parts.length < 2) return null; let fileName = null; const thumbIndex = parts.indexOf('thumb'); if (thumbIndex >= 0 && parts.length >= thumbIndex + 5) { fileName = parts[parts.length - 2]; } else { fileName = parts[parts.length - 1]; } if (!fileName) return null; const decoded = tryDecode(fileName).trim(); if (!decoded) return null; return `https://commons.wikimedia.org/wiki/Special:FilePath/${encodeURIComponent(decoded)}`; }; const parseWikimediaSearchQuery = (value) => { if (typeof value !== 'string') return null; const trimmed = value.trim(); if (!trimmed.toLowerCase().startsWith(WIKIMEDIA_SEARCH_PREFIX)) return null; const rawQuery = trimmed.slice(WIKIMEDIA_SEARCH_PREFIX.length).trim(); if (!rawQuery) return null; return decodeRepeatedly(rawQuery); }; const fetchImageBuffer = async (url, attempt = 0, redirectCount = 0) => { if (redirectCount > 5) { throw new Error('Too many redirects'); } const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); try { const response = await fetch(url, { headers: { 'User-Agent': 'GreenLens-PlantImageImporter/1.0', 'Accept': 'image/avif,image/webp,image/apng,image/*,*/*;q=0.8', 'Referer': 'https://commons.wikimedia.org/', }, redirect: 'manual', signal: controller.signal, }); if ([301, 302, 303, 307, 308].includes(response.status)) { const location = response.headers.get('location'); if (!location) throw new Error(`Redirect without location for ${url}`); const nextUrl = new URL(location, url).toString(); return fetchImageBuffer(nextUrl, attempt, redirectCount + 1); } if ((response.status === 429 || response.status >= 500) && attempt < MAX_FETCH_RETRIES) { const delayMs = getRetryDelayMs(attempt, response.headers.get('retry-after')); await sleep(delayMs); return fetchImageBuffer(url, attempt + 1, redirectCount); } if (!response.ok) { throw new Error(`HTTP ${response.status}`); } const arrayBuffer = await response.arrayBuffer(); return Buffer.from(arrayBuffer); } finally { clearTimeout(timeout); } }; const searchWikimediaImage = async (query, searchCache) => { const normalizedQuery = String(query || '').trim(); if (!normalizedQuery) return null; if (Object.prototype.hasOwnProperty.call(searchCache, normalizedQuery)) { return searchCache[normalizedQuery] || null; } const apiUrl = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encodeURIComponent(normalizedQuery)}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=1200&format=json`; try { const controller = new AbortController(); const timeout = setTimeout(() => controller.abort(), REQUEST_TIMEOUT_MS); const response = await fetch(apiUrl, { headers: { 'User-Agent': 'GreenLens-PlantImageImporter/1.0', 'Accept': 'application/json', }, signal: controller.signal, }); clearTimeout(timeout); if (!response.ok) { searchCache[normalizedQuery] = null; saveSearchCache(searchCache); return null; } const data = await response.json(); const pages = data?.query?.pages ? Object.values(data.query.pages) : []; for (const page of pages) { const imageInfo = page?.imageinfo?.[0]; const candidate = imageInfo?.thumburl || imageInfo?.url || null; if (candidate && /^https?:\/\//i.test(candidate)) { searchCache[normalizedQuery] = candidate; saveSearchCache(searchCache); return candidate; } } } catch { // Ignore and cache as null below. } searchCache[normalizedQuery] = null; saveSearchCache(searchCache); return null; }; const convertToWebp = async (inputBuffer, outputPath) => { const tempPath = `${outputPath}.tmp-${process.pid}-${Date.now()}.webp`; await sharp(inputBuffer) .rotate() .resize({ width: 1200, height: 1200, fit: 'inside', withoutEnlargement: true, }) .webp({ quality: 82 }) .toFile(tempPath); fs.copyFileSync(tempPath, outputPath); fs.unlinkSync(tempPath); }; const updatePlantImageUri = async (db, plantId, localImageUri) => { await run( db, 'UPDATE plants SET imageUri = ?, imageStatus = ?, updatedAt = datetime(\'now\') WHERE id = ?', [localImageUri, 'ok', plantId], ); }; const processPlant = async (db, plant, manifestItems, dumpFallbackMap, searchCache, refreshMatchers) => { const currentUri = String(plant.imageUri || '').trim(); const placeholderQuery = parseWikimediaSearchQuery(currentUri); const fileBaseName = buildFileBaseName(plant); const fileName = `${fileBaseName}.webp`; const localImageUri = `/plants/${fileName}`; const outputPath = path.join(OUTPUT_DIR, fileName); const dumpFallbackUri = dumpFallbackMap.get(String(plant.botanicalName || '').trim().toLowerCase()) || null; const shouldRefresh = shouldRefreshPlantImage(plant, refreshMatchers); if (fs.existsSync(outputPath) && !shouldRefresh) { await updatePlantImageUri(db, plant.id, localImageUri); manifestItems.push({ id: plant.id, botanicalName: plant.botanicalName, name: plant.name, sourceUri: currentUri, localImageUri, status: 'existing', }); return { status: 'existing', plantId: plant.id, localImageUri }; } if (!/^https?:\/\//i.test(currentUri) && !placeholderQuery) { manifestItems.push({ id: plant.id, botanicalName: plant.botanicalName, name: plant.name, sourceUri: currentUri, localImageUri, status: 'skipped', reason: 'Current imageUri is not a remote URL and no local file exists yet.', }); return { status: 'skipped', plantId: plant.id, localImageUri }; } let lastError = null; let sourceUsed = currentUri; let buffer = null; const searchedUri = await searchWikimediaImage(placeholderQuery, searchCache) || await searchWikimediaImage(plant.botanicalName, searchCache) || await searchWikimediaImage(plant.name, searchCache); const candidateUris = [ /^https?:\/\//i.test(currentUri) ? currentUri : null, /^https?:\/\//i.test(currentUri) ? toWikimediaFilePathUrl(currentUri) : null, dumpFallbackUri, toWikimediaFilePathUrl(dumpFallbackUri), searchedUri, toWikimediaFilePathUrl(searchedUri), ].filter(Boolean); for (const candidateUri of [...new Set(candidateUris)]) { try { buffer = await fetchImageBuffer(candidateUri); sourceUsed = candidateUri; break; } catch (error) { lastError = error; } } if (!buffer) { throw lastError || new Error('Image download failed'); } await convertToWebp(buffer, outputPath); await updatePlantImageUri(db, plant.id, localImageUri); manifestItems.push({ id: plant.id, botanicalName: plant.botanicalName, name: plant.name, sourceUri: sourceUsed, localImageUri, status: 'downloaded', }); await sleep(900); return { status: 'downloaded', plantId: plant.id, localImageUri }; }; const runWithConcurrency = async (items, worker, concurrency) => { const queue = [...items]; const results = []; const runners = Array.from({ length: Math.min(concurrency, queue.length) }, async () => { while (queue.length > 0) { const item = queue.shift(); if (!item) return; results.push(await worker(item)); } }); await Promise.all(runners); return results; }; const main = async () => { ensureOutputDir(); const manifest = loadManifest(); const manifestItems = []; const dumpFallbackMap = loadDumpFallbackMap(); const searchCache = loadSearchCache(); const refreshMatchers = loadRefreshMatchers(); const db = await openDatabase(); try { await ensurePlantSchema(db); const plants = await all( db, `SELECT id, name, botanicalName, imageUri FROM plants ORDER BY name COLLATE NOCASE ASC`, ); console.log(`Preparing ${plants.length} plant images...`); const failures = []; let completed = 0; await runWithConcurrency( plants, async (plant) => { try { const result = await processPlant(db, plant, manifestItems, dumpFallbackMap, searchCache, refreshMatchers); completed += 1; console.log(`[${completed}/${plants.length}] ${plant.botanicalName} -> ${result.status}`); return result; } catch (error) { completed += 1; const message = error instanceof Error ? error.message : String(error); console.error(`[${completed}/${plants.length}] ${plant.botanicalName} -> failed: ${message}`); failures.push({ id: plant.id, name: plant.name, botanicalName: plant.botanicalName, sourceUri: plant.imageUri, error: message, }); manifestItems.push({ id: plant.id, botanicalName: plant.botanicalName, name: plant.name, sourceUri: plant.imageUri, status: 'failed', error: message, }); return { status: 'failed', plantId: plant.id }; } }, MAX_CONCURRENCY, ); const downloadedCount = manifestItems.filter((item) => item.status === 'downloaded').length; const existingCount = manifestItems.filter((item) => item.status === 'existing').length; const skippedCount = manifestItems.filter((item) => item.status === 'skipped').length; saveManifest({ generatedAt: new Date().toISOString(), summary: { totalPlants: plants.length, downloadedCount, existingCount, skippedCount, failureCount: failures.length, }, failures, items: manifestItems, }); console.log(''); console.log(`Downloaded: ${downloadedCount}`); console.log(`Already present: ${existingCount}`); console.log(`Skipped: ${skippedCount}`); console.log(`Failed: ${failures.length}`); console.log(`Manifest: ${MANIFEST_PATH}`); if (failures.length > 0) { process.exitCode = 1; } } finally { await closeDatabase(db); } }; main().catch((error) => { console.error('Plant image import failed.'); console.error(error instanceof Error ? error.stack || error.message : String(error)); process.exit(1); });