193 lines
5.6 KiB
JavaScript
193 lines
5.6 KiB
JavaScript
#!/usr/bin/env node
|
|
/**
|
|
* fix_images.js
|
|
* Finds broken image URLs in lexicon/catalog files and replaces them
|
|
* using Wikimedia Commons API.
|
|
*/
|
|
|
|
const fs = require('fs');
|
|
const https = require('https');
|
|
|
|
const FILES = [
|
|
'constants/lexiconBatch1.ts',
|
|
'constants/lexiconBatch2.ts',
|
|
'services/backend/mockCatalog.ts',
|
|
];
|
|
|
|
// Known manual fixes (botanicalName -> correct Wikimedia filename)
|
|
const MANUAL_FIXES = {
|
|
'Chlorophytum comosum': 'Chlorophytum_comosum_01.jpg',
|
|
'Syngonium podophyllum': 'Syngonium_podophyllum1.jpg',
|
|
'Fuchsia hybrida': 'Fuchsia_%27Beacon%27.jpg',
|
|
'Tillandsia usneoides': 'Tillandsia_usneoides_leaves.jpg',
|
|
'Tillandsia ionantha': 'Tillandsia_ionantha0.jpg',
|
|
};
|
|
|
|
function sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
function httpGet(url) {
|
|
return new Promise((resolve, reject) => {
|
|
const req = https.get(url, {
|
|
headers: {
|
|
'User-Agent': 'GreenLens-ImageFixer/1.0 (educational plant app)'
|
|
}
|
|
}, (res) => {
|
|
if (res.statusCode === 301 || res.statusCode === 302) {
|
|
resolve(httpGet(res.headers.location));
|
|
return;
|
|
}
|
|
let data = '';
|
|
res.on('data', chunk => data += chunk);
|
|
res.on('end', () => resolve({ status: res.statusCode, body: data }));
|
|
});
|
|
req.on('error', reject);
|
|
req.setTimeout(10000, () => {
|
|
req.destroy();
|
|
reject(new Error('timeout'));
|
|
});
|
|
});
|
|
}
|
|
|
|
function checkUrl(url) {
|
|
return new Promise((resolve) => {
|
|
const req = https.get(url, {
|
|
headers: { 'User-Agent': 'GreenLens-ImageFixer/1.0' }
|
|
}, (res) => {
|
|
res.resume();
|
|
resolve(res.statusCode === 200);
|
|
});
|
|
req.on('error', () => resolve(false));
|
|
req.setTimeout(8000, () => { req.destroy(); resolve(false); });
|
|
});
|
|
}
|
|
|
|
async function searchWikimediaImage(botanicalName) {
|
|
const encoded = encodeURIComponent(botanicalName);
|
|
const url = `https://commons.wikimedia.org/w/api.php?action=query&generator=search&gsrnamespace=6&gsrsearch=${encoded}&gsrlimit=5&prop=imageinfo&iiprop=url&iiurlwidth=500&format=json`;
|
|
|
|
try {
|
|
const res = await httpGet(url);
|
|
if (res.status !== 200) return null;
|
|
const data = JSON.parse(res.body);
|
|
const pages = data.query && data.query.pages;
|
|
if (!pages) return null;
|
|
|
|
for (const page of Object.values(pages)) {
|
|
const info = page.imageinfo && page.imageinfo[0];
|
|
if (!info) continue;
|
|
const thumbUrl = info.thumburl || info.url;
|
|
if (thumbUrl && (thumbUrl.endsWith('.jpg') || thumbUrl.endsWith('.png') || thumbUrl.endsWith('.JPG') || thumbUrl.endsWith('.PNG'))) {
|
|
return thumbUrl;
|
|
}
|
|
}
|
|
} catch (e) {
|
|
console.error(` API error for "${botanicalName}": ${e.message}`);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
function wikimediaThumbUrl(filename) {
|
|
// Build a 500px thumb URL from a bare filename
|
|
const name = filename.replace(/ /g, '_');
|
|
const hash = require('crypto').createHash('md5').update(name).digest('hex');
|
|
const d1 = hash[0];
|
|
const d2 = hash.substring(0, 2);
|
|
const ext = name.split('.').pop().toLowerCase();
|
|
const isJpg = ['jpg', 'jpeg'].includes(ext);
|
|
return `https://upload.wikimedia.org/wikipedia/commons/thumb/${d1}/${d2}/${name}/500px-${name}`;
|
|
}
|
|
|
|
function parseEntries(content) {
|
|
// Match blocks: find name, botanicalName, imageUri
|
|
const entries = [];
|
|
const regex = /name:\s*['"]([^'"]+)['"]\s*,[\s\S]*?botanicalName:\s*['"]([^'"]+)['"]\s*,[\s\S]*?imageUri:\s*['"]([^'"]+)['"]/g;
|
|
let m;
|
|
while ((m = regex.exec(content)) !== null) {
|
|
entries.push({
|
|
name: m[1],
|
|
botanicalName: m[2],
|
|
imageUri: m[3],
|
|
index: m.index,
|
|
});
|
|
}
|
|
return entries;
|
|
}
|
|
|
|
async function processFile(filepath) {
|
|
console.log(`\n=== Processing ${filepath} ===`);
|
|
let content = fs.readFileSync(filepath, 'utf8');
|
|
const entries = parseEntries(content);
|
|
console.log(`Found ${entries.length} entries`);
|
|
|
|
let fixCount = 0;
|
|
|
|
for (const entry of entries) {
|
|
const { name, botanicalName, imageUri } = entry;
|
|
|
|
// Check if URL is broken
|
|
process.stdout.write(` Checking ${botanicalName}... `);
|
|
const ok = await checkUrl(imageUri);
|
|
if (ok) {
|
|
console.log('OK');
|
|
await sleep(100);
|
|
continue;
|
|
}
|
|
console.log('BROKEN');
|
|
|
|
let newUrl = null;
|
|
|
|
// Check manual fixes first
|
|
if (MANUAL_FIXES[botanicalName]) {
|
|
const filename = MANUAL_FIXES[botanicalName];
|
|
const thumb = wikimediaThumbUrl(filename);
|
|
console.log(` -> Manual fix: ${thumb}`);
|
|
newUrl = thumb;
|
|
} else {
|
|
// Query Wikimedia Commons API
|
|
console.log(` -> Searching Wikimedia for "${botanicalName}"...`);
|
|
newUrl = await searchWikimediaImage(botanicalName);
|
|
if (newUrl) {
|
|
console.log(` -> Found: ${newUrl}`);
|
|
} else {
|
|
console.log(` -> No result found, skipping`);
|
|
}
|
|
}
|
|
|
|
if (newUrl) {
|
|
// Replace the old URL in content (escape for regex)
|
|
const escapedOld = imageUri.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
content = content.replace(new RegExp(escapedOld, 'g'), newUrl);
|
|
fixCount++;
|
|
}
|
|
|
|
await sleep(200);
|
|
}
|
|
|
|
if (fixCount > 0) {
|
|
fs.writeFileSync(filepath, content, 'utf8');
|
|
console.log(` => Wrote ${fixCount} fixes to ${filepath}`);
|
|
} else {
|
|
console.log(` => No changes needed`);
|
|
}
|
|
|
|
return fixCount;
|
|
}
|
|
|
|
async function main() {
|
|
console.log('GreenLens Image URL Fixer');
|
|
console.log('========================');
|
|
let totalFixes = 0;
|
|
for (const file of FILES) {
|
|
if (!fs.existsSync(file)) {
|
|
console.log(`\nSkipping ${file} (not found)`);
|
|
continue;
|
|
}
|
|
totalFixes += await processFile(file);
|
|
}
|
|
console.log(`\nDone. Total fixes: ${totalFixes}`);
|
|
}
|
|
|
|
main().catch(console.error);
|