#!/usr/bin/env node /** * fix_images.js * Finds broken image URLs in lexicon/catalog files and replaces them * using Wikimedia Commons API. */ const fs = require('fs'); const https = require('https'); const FILES = [ 'constants/lexiconBatch1.ts', 'constants/lexiconBatch2.ts', 'services/backend/mockCatalog.ts', ]; // Known manual fixes (botanicalName -> correct Wikimedia filename) const MANUAL_FIXES = { 'Chlorophytum comosum': 'Chlorophytum_comosum_01.jpg', 'Syngonium podophyllum': 'Syngonium_podophyllum1.jpg', 'Fuchsia hybrida': 'Fuchsia_%27Beacon%27.jpg', 'Tillandsia usneoides': 'Tillandsia_usneoides_leaves.jpg', 'Tillandsia ionantha': 'Tillandsia_ionantha0.jpg', }; function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } function httpGet(url) { return new Promise((resolve, reject) => { const req = https.get(url, { headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0 (educational plant app)' } }, (res) => { if (res.statusCode === 301 || res.statusCode === 302) { resolve(httpGet(res.headers.location)); return; } let data = ''; res.on('data', chunk => data += chunk); res.on('end', () => resolve({ status: res.statusCode, body: data })); }); req.on('error', reject); req.setTimeout(10000, () => { req.destroy(); reject(new Error('timeout')); }); }); } function checkUrl(url) { return new Promise((resolve) => { const req = https.get(url, { headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0' } }, (res) => { res.resume(); resolve(res.statusCode === 200); }); req.on('error', () => resolve(false)); req.setTimeout(8000, () => { req.destroy(); resolve(false); }); }); } async function searchWikimediaImage(botanicalName) { const encoded = encodeURIComponent(botanicalName); const url = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encoded}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=500&format=json`; try { const res = await httpGet(url); if (res.status !== 200) return null; const data = JSON.parse(res.body); const pages = data.query && data.query.pages; if (!pages) return null; for (const page of Object.values(pages)) { const info = page.imageinfo && page.imageinfo[0]; if (!info) continue; const thumbUrl = info.thumburl || info.url; if (thumbUrl && (thumbUrl.endsWith('.jpg') || thumbUrl.endsWith('.png') || thumbUrl.endsWith('.JPG') || thumbUrl.endsWith('.PNG'))) { return thumbUrl; } } } catch (e) { console.error(` API error for "${botanicalName}": ${e.message}`); } return null; } function wikimediaThumbUrl(filename) { // Build a 500px thumb URL from a bare filename const name = filename.replace(/ /g, '_'); const hash = require('crypto').createHash('md5').update(name).digest('hex'); const d1 = hash[0]; const d2 = hash.substring(0, 2); const ext = name.split('.').pop().toLowerCase(); const isJpg = ['jpg', 'jpeg'].includes(ext); return `https://upload.wikimedia.org/wikipedia/commons/thumb/${d1}/${d2}/${name}/500px-${name}`; } function parseEntries(content) { // Match blocks: find name, botanicalName, imageUri const entries = []; const regex = /name:\s*['"]([^'"]+)['"]\s*,[\s\S]*?botanicalName:\s*['"]([^'"]+)['"]\s*,[\s\S]*?imageUri:\s*['"]([^'"]+)['"]/g; let m; while ((m = regex.exec(content)) !== null) { entries.push({ name: m[1], botanicalName: m[2], imageUri: m[3], index: m.index, }); } return entries; } async function processFile(filepath) { console.log(`\n=== Processing ${filepath} ===`); let content = fs.readFileSync(filepath, 'utf8'); const entries = parseEntries(content); console.log(`Found ${entries.length} entries`); let fixCount = 0; for (const entry of entries) { const { name, botanicalName, imageUri } = entry; // Check if URL is broken process.stdout.write(` Checking ${botanicalName}... `); const ok = await checkUrl(imageUri); if (ok) { console.log('OK'); await sleep(100); continue; } console.log('BROKEN'); let newUrl = null; // Check manual fixes first if (MANUAL_FIXES[botanicalName]) { const filename = MANUAL_FIXES[botanicalName]; const thumb = wikimediaThumbUrl(filename); console.log(` -> Manual fix: ${thumb}`); newUrl = thumb; } else { // Query Wikimedia Commons API console.log(` -> Searching Wikimedia for "${botanicalName}"...`); newUrl = await searchWikimediaImage(botanicalName); if (newUrl) { console.log(` -> Found: ${newUrl}`); } else { console.log(` -> No result found, skipping`); } } if (newUrl) { // Replace the old URL in content (escape for regex) const escapedOld = imageUri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); content = content.replace(new RegExp(escapedOld, 'g'), newUrl); fixCount++; } await sleep(200); } if (fixCount > 0) { fs.writeFileSync(filepath, content, 'utf8'); console.log(` => Wrote ${fixCount} fixes to ${filepath}`); } else { console.log(` => No changes needed`); } return fixCount; } async function main() { console.log('GreenLens Image URL Fixer'); console.log('========================'); let totalFixes = 0; for (const file of FILES) { if (!fs.existsSync(file)) { console.log(`\nSkipping ${file} (not found)`); continue; } totalFixes += await processFile(file); } console.log(`\nDone. Total fixes: ${totalFixes}`); } main().catch(console.error);