/** * 批量取图脚本(在有网络的本机运行): * 按 Wikidata P18 → Commons 同名分类 → Commons 关键词搜索 的顺序, * 为每个车型下载若干张真实照片到「本地共享图库」(uploads + photos 表), * 并记录署名(作者/许可证/来源)。下载后应用即从本地提供图片,运行时不再访问 Commons。 * * 用法(在 apps/api 目录): * node scripts/fetch-images.mjs # 全部缺图车型,每个 1 张 * node scripts/fetch-images.mjs --per 3 # 每个车型最多 3 张 * node scripts/fetch-images.mjs --limit 20 # 仅处理前 20 个缺图车型 * node scripts/fetch-images.mjs --category 电力机车 * node scripts/fetch-images.mjs --dry # 只检索不下载不入库 * * 所有下载图片一律入库为「候选(candidate)」,进入管理员「候选审图」队列, * 由人工确认后才会作为封面/图册展示(候选 + 人工确认 双闸门,保证准确与合规)。 * * 注意:需联网访问 wikidata.org / commons.wikimedia.org(自由授权,已记录署名)。 * 下载完成后图片保存在本地 app/data/uploads/,库表记录在 app/data/app.db。 */ import Database from 'better-sqlite3'; import { mkdirSync } from 'fs'; import { writeFile } from 'fs/promises'; import { join, resolve, dirname } from 'path'; import { fileURLToPath } from 'url'; import { randomBytes } from 'crypto'; const __dirname = dirname(fileURLToPath(import.meta.url)); const ROOT = resolve(__dirname, '..', '..', '..'); // Train/ const MACHINES_DB = join(ROOT, 'app', 'data', 'machines.db'); const APP_DB = process.env.APP_DB_PATH || join(ROOT, 'app', 'data', 'app.db'); const UPLOAD_DIR = process.env.UPLOAD_DIR || join(ROOT, 'app', 'data', 'uploads'); const UA = 'ChinaLocoAtlas/0.1 (image fetcher; contact admin)'; const args = process.argv.slice(2); const opt = (k) => { const i = args.indexOf(k); return i >= 0 ? args[i + 1] : undefined; }; const LIMIT = opt('--limit') ? Number(opt('--limit')) : Infinity; const PER = opt('--per') ? Math.max(1, Number(opt('--per'))) : 1; const CATEGORY = opt('--category'); const DRY = args.includes('--dry'); const STATUS = 'candidate'; // 一律入候选,交管理员「候选审图」确认 const CAT_HINT = { 蒸汽机车: 'steam locomotive', 电力机车: 'electric locomotive', 内燃机车: 'diesel locomotive', 动车组: 'EMU', 客车: 'passenger car', 货车: 'freight car', 检测车: 'inspection car', 旅游列车: 'tourist train', }; const sleep = (ms) => new Promise((r) => setTimeout(r, ms)); const latin = (s) => (s.match(/[A-Za-z0-9-]+/g) || []).join(''); const strip = (h) => (h || '').replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim(); const isImg = (s) => /\.(jpe?g|png)$/i.test(s || ''); async function api(url) { const res = await fetch(url, { headers: { 'User-Agent': UA } }); if (!res.ok) throw new Error('HTTP ' + res.status); return res.json(); } /** 从 Commons File:xxx 取缩略图 url + 署名。*/ async function commonsFileInfo(title) { const u = 'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' + `&titles=${encodeURIComponent(title)}&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=1280`; const d = await api(u); const pages = d?.query?.pages || {}; const p = Object.values(pages)[0]; const ii = p?.imageinfo?.[0]; if (!ii?.thumburl) return null; const em = ii.extmetadata || {}; return { url: ii.thumburl, descriptionUrl: ii.descriptionurl || '', author: strip(em.Artist?.value) || '未署名', license: strip(em.LicenseShortName?.value) || '见来源', }; } /** 1) Wikidata P18 主图(最多 1 张,最准)。*/ async function viaWikidata(model) { const q = `${latin(model.model_code) || model.model_code} ${CAT_HINT[model.category] || 'train'}`; const s = await api( 'https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&origin=*' + `&language=en&type=item&limit=3&search=${encodeURIComponent(q)}`, ); for (const hit of s?.search || []) { const e = await api( 'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&origin=*' + `&props=claims&ids=${hit.id}`, ); const p18 = e?.entities?.[hit.id]?.claims?.P18?.[0]?.mainsnak?.datavalue?.value; if (p18) { const info = await commonsFileInfo('File:' + p18); if (info) return [{ ...info, via: 'wikidata' }]; } } return []; } /** 2) Commons 同名分类(可多张)。*/ async function viaCategory(model, want) { const code = latin(model.model_code) || model.model_code; const out = []; for (const cat of [`Category:China Railways ${code}`, `Category:${code}`]) { const d = await api( 'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' + `&list=categorymembers&cmtype=file&cmlimit=20&cmtitle=${encodeURIComponent(cat)}`, ); for (const f of d?.query?.categorymembers || []) { if (!isImg(f.title)) continue; const info = await commonsFileInfo(f.title); if (info) out.push({ ...info, via: 'category' }); if (out.length >= want) return out; } } return out; } /** 3) Commons 关键词搜索(最不准,兜底,可多张)。*/ async function viaSearch(model, want) { const q = `${latin(model.model_code) || model.model_code} ${CAT_HINT[model.category] || 'train'} China Railway`; const d = await api( 'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' + `&generator=search&gsrnamespace=6&gsrlimit=${Math.max(3, want)}&gsrsearch=${encodeURIComponent(q)}` + '&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=1280', ); const pages = d?.query?.pages || {}; const out = []; for (const p of Object.values(pages)) { const ii = p?.imageinfo?.[0]; if (!ii?.thumburl || !isImg(ii.thumburl)) continue; const em = ii.extmetadata || {}; out.push({ url: ii.thumburl, descriptionUrl: ii.descriptionurl || '', author: strip(em.Artist?.value) || '未署名', license: strip(em.LicenseShortName?.value) || '见来源', via: 'search', }); if (out.length >= want) break; } return out; } /** 聚合各来源,按 url 去重,返回最多 want 张。*/ async function collectImages(model, want) { const seen = new Set(); const out = []; const push = (arr) => { for (const it of arr) { if (out.length >= want) break; if (seen.has(it.url)) continue; seen.add(it.url); out.push(it); } }; push(await viaWikidata(model)); if (out.length < want) push(await viaCategory(model, want - out.length)); if (out.length < want) push(await viaSearch(model, want - out.length)); return out; } async function main() { mkdirSync(UPLOAD_DIR, { recursive: true }); const mdb = new Database(MACHINES_DB, { readonly: true }); const adb = new Database(APP_DB); const admin = adb.prepare("SELECT id FROM users WHERE role='admin' ORDER BY id LIMIT 1").get() || adb.prepare('SELECT id FROM users ORDER BY id LIMIT 1').get(); if (!admin) { console.error('找不到用户:请先启动后端注册一个管理员(或设 ADMIN_EMAIL)。'); process.exit(1); } const where = CATEGORY ? 'WHERE c.name = ?' : ''; const models = mdb .prepare( `SELECT m.id, m.model_code, c.name AS category FROM model m JOIN category c ON c.id = m.category_id ${where} ORDER BY m.id`, ) .all(...(CATEGORY ? [CATEGORY] : [])); const hasPhoto = adb.prepare('SELECT 1 FROM photos WHERE model_id = ? LIMIT 1'); const insert = adb.prepare( `INSERT INTO photos (model_id, uploader_id, filename, caption, status, source_url, author, license) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, ); console.log( `取图:每车型最多 ${PER} 张,入库状态=候选(candidate)${DRY ? '(dry-run)' : ''};保存到 ${UPLOAD_DIR}`, ); const report = []; let done = 0; let saved = 0; for (const m of models) { if (done >= LIMIT) break; if (hasPhoto.get(m.id)) continue; done++; let hits = []; try { hits = await collectImages(m, PER); } catch (e) { report.push(`✗ ${m.model_code}:检索出错 ${e.message}`); await sleep(300); continue; } if (hits.length === 0) { report.push(`— ${m.model_code}:未找到`); await sleep(200); continue; } if (DRY) { report.push(`✓ ${m.model_code}:[dry] ${hits.map((h) => h.via).join(',')} (${hits.length} 张)`); await sleep(200); continue; } let n = 0; for (const hit of hits) { try { const buf = Buffer.from( await (await fetch(hit.url, { headers: { 'User-Agent': UA } })).arrayBuffer(), ); const ext = (hit.url.match(/\.(jpe?g|png)/i) || ['.jpg'])[0].toLowerCase(); const fn = `${Date.now()}-${randomBytes(6).toString('hex')}${ext}`; await writeFile(join(UPLOAD_DIR, fn), buf); insert.run(m.id, admin.id, fn, `${m.model_code}(${hit.via})`, STATUS, hit.descriptionUrl, hit.author, hit.license); n++; saved++; } catch (e) { report.push(`✗ ${m.model_code}:下载失败 ${e.message}`); } await sleep(300); // 礼貌限速 } if (n > 0) report.push(`✓ ${m.model_code}:入库 ${n} 张(${STATUS})`); } const ok = report.filter((r) => r.startsWith('✓')).length; const miss = report.filter((r) => r.startsWith('—')).length; const err = report.filter((r) => r.startsWith('✗')).length; console.log(report.join('\n')); console.log(`\n命中 ${ok} · 未找到 ${miss} · 出错 ${err}(处理 ${done} 个车型,保存 ${saved} 张)`); console.log('候选图已入库,请用管理员在网页"候选审图 / 图册"逐个确认后作为封面展示。'); } main();