Greenlens/scripts/fix_images.js

193 lines
5.6 KiB
JavaScript

#!/usr/bin/env node
/**
* fix_images.js
* Finds broken image URLs in lexicon/catalog files and replaces them
* using Wikimedia Commons API.
*/
const fs = require('fs');
const https = require('https');
const FILES = [
'constants/lexiconBatch1.ts',
'constants/lexiconBatch2.ts',
'services/backend/mockCatalog.ts',
];
// Known manual fixes (botanicalName -> correct Wikimedia filename)
const MANUAL_FIXES = {
'Chlorophytum comosum': 'Chlorophytum_comosum_01.jpg',
'Syngonium podophyllum': 'Syngonium_podophyllum1.jpg',
'Fuchsia hybrida': 'Fuchsia_%27Beacon%27.jpg',
'Tillandsia usneoides': 'Tillandsia_usneoides_leaves.jpg',
'Tillandsia ionantha': 'Tillandsia_ionantha0.jpg',
};
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
function httpGet(url) {
return new Promise((resolve, reject) => {
const req = https.get(url, {
headers: {
'User-Agent': 'GreenLens-ImageFixer/1.0 (educational plant app)'
}
}, (res) => {
if (res.statusCode === 301 || res.statusCode === 302) {
resolve(httpGet(res.headers.location));
return;
}
let data = '';
res.on('data', chunk => data += chunk);
res.on('end', () => resolve({ status: res.statusCode, body: data }));
});
req.on('error', reject);
req.setTimeout(10000, () => {
req.destroy();
reject(new Error('timeout'));
});
});
}
function checkUrl(url) {
return new Promise((resolve) => {
const req = https.get(url, {
headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0' }
}, (res) => {
res.resume();
resolve(res.statusCode === 200);
});
req.on('error', () => resolve(false));
req.setTimeout(8000, () => { req.destroy(); resolve(false); });
});
}
async function searchWikimediaImage(botanicalName) {
const encoded = encodeURIComponent(botanicalName);
const url = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encoded}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=500&format=json`;
try {
const res = await httpGet(url);
if (res.status !== 200) return null;
const data = JSON.parse(res.body);
const pages = data.query && data.query.pages;
if (!pages) return null;
for (const page of Object.values(pages)) {
const info = page.imageinfo && page.imageinfo[0];
if (!info) continue;
const thumbUrl = info.thumburl || info.url;
if (thumbUrl && (thumbUrl.endsWith('.jpg') || thumbUrl.endsWith('.png') || thumbUrl.endsWith('.JPG') || thumbUrl.endsWith('.PNG'))) {
return thumbUrl;
}
}
} catch (e) {
console.error(` API error for "${botanicalName}": ${e.message}`);
}
return null;
}
function wikimediaThumbUrl(filename) {
// Build a 500px thumb URL from a bare filename
const name = filename.replace(/ /g, '_');
const hash = require('crypto').createHash('md5').update(name).digest('hex');
const d1 = hash[0];
const d2 = hash.substring(0, 2);
const ext = name.split('.').pop().toLowerCase();
const isJpg = ['jpg', 'jpeg'].includes(ext);
return `https://upload.wikimedia.org/wikipedia/commons/thumb/${d1}/${d2}/${name}/500px-${name}`;
}
function parseEntries(content) {
// Match blocks: find name, botanicalName, imageUri
const entries = [];
const regex = /name:\s*['"]([^'"]+)['"]\s*,[\s\S]*?botanicalName:\s*['"]([^'"]+)['"]\s*,[\s\S]*?imageUri:\s*['"]([^'"]+)['"]/g;
let m;
while ((m = regex.exec(content)) !== null) {
entries.push({
name: m[1],
botanicalName: m[2],
imageUri: m[3],
index: m.index,
});
}
return entries;
}
async function processFile(filepath) {
console.log(`\n=== Processing ${filepath} ===`);
let content = fs.readFileSync(filepath, 'utf8');
const entries = parseEntries(content);
console.log(`Found ${entries.length} entries`);
let fixCount = 0;
for (const entry of entries) {
const { name, botanicalName, imageUri } = entry;
// Check if URL is broken
process.stdout.write(` Checking ${botanicalName}... `);
const ok = await checkUrl(imageUri);
if (ok) {
console.log('OK');
await sleep(100);
continue;
}
console.log('BROKEN');
let newUrl = null;
// Check manual fixes first
if (MANUAL_FIXES[botanicalName]) {
const filename = MANUAL_FIXES[botanicalName];
const thumb = wikimediaThumbUrl(filename);
console.log(` -> Manual fix: ${thumb}`);
newUrl = thumb;
} else {
// Query Wikimedia Commons API
console.log(` -> Searching Wikimedia for "${botanicalName}"...`);
newUrl = await searchWikimediaImage(botanicalName);
if (newUrl) {
console.log(` -> Found: ${newUrl}`);
} else {
console.log(` -> No result found, skipping`);
}
}
if (newUrl) {
// Replace the old URL in content (escape for regex)
const escapedOld = imageUri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
content = content.replace(new RegExp(escapedOld, 'g'), newUrl);
fixCount++;
}
await sleep(200);
}
if (fixCount > 0) {
fs.writeFileSync(filepath, content, 'utf8');
console.log(` => Wrote ${fixCount} fixes to ${filepath}`);
} else {
console.log(` => No changes needed`);
}
return fixCount;
}
async function main() {
console.log('GreenLens Image URL Fixer');
console.log('========================');
let totalFixes = 0;
for (const file of FILES) {
if (!fs.existsSync(file)) {
console.log(`\nSkipping ${file} (not found)`);
continue;
}
totalFixes += await processFile(file);
}
console.log(`\nDone. Total fixes: ${totalFixes}`);
}
main().catch(console.error);