init: AI培训与智能巡检系统

This commit is contained in:
selfrelease
2026-06-16 00:55:20 +08:00
commit c55598494b
201 changed files with 53131 additions and 0 deletions
+255
View File
@@ -0,0 +1,255 @@
/**
* 批量取图脚本(在有网络的本机运行):
* 按 Wikidata P18 → Commons 同名分类 → Commons 关键词搜索 的顺序,
* 为每个车型下载若干张真实照片到「本地共享图库」(uploads + photos 表),
* 并记录署名(作者/许可证/来源)。下载后应用即从本地提供图片,运行时不再访问 Commons。
*
* 用法(在 apps/api 目录):
* node scripts/fetch-images.mjs # 全部缺图车型,每个 1 张
* node scripts/fetch-images.mjs --per 3 # 每个车型最多 3 张
* node scripts/fetch-images.mjs --limit 20 # 仅处理前 20 个缺图车型
* node scripts/fetch-images.mjs --category 电力机车
* node scripts/fetch-images.mjs --dry # 只检索不下载不入库
*
* 所有下载图片一律入库为「候选(candidate)」,进入管理员「候选审图」队列,
* 由人工确认后才会作为封面/图册展示(候选 + 人工确认 双闸门,保证准确与合规)。
*
* 注意:需联网访问 wikidata.org / commons.wikimedia.org(自由授权,已记录署名)。
* 下载完成后图片保存在本地 app/data/uploads/,库表记录在 app/data/app.db。
*/
import Database from 'better-sqlite3';
import { mkdirSync } from 'fs';
import { writeFile } from 'fs/promises';
import { join, resolve, dirname } from 'path';
import { fileURLToPath } from 'url';
import { randomBytes } from 'crypto';
const __dirname = dirname(fileURLToPath(import.meta.url));
const ROOT = resolve(__dirname, '..', '..', '..'); // Train/
const MACHINES_DB = join(ROOT, 'app', 'data', 'machines.db');
const APP_DB = process.env.APP_DB_PATH || join(ROOT, 'app', 'data', 'app.db');
const UPLOAD_DIR = process.env.UPLOAD_DIR || join(ROOT, 'app', 'data', 'uploads');
const UA = 'ChinaLocoAtlas/0.1 (image fetcher; contact admin)';
const args = process.argv.slice(2);
const opt = (k) => {
const i = args.indexOf(k);
return i >= 0 ? args[i + 1] : undefined;
};
const LIMIT = opt('--limit') ? Number(opt('--limit')) : Infinity;
const PER = opt('--per') ? Math.max(1, Number(opt('--per'))) : 1;
const CATEGORY = opt('--category');
const DRY = args.includes('--dry');
const STATUS = 'candidate'; // 一律入候选,交管理员「候选审图」确认
const CAT_HINT = {
蒸汽机车: 'steam locomotive',
电力机车: 'electric locomotive',
内燃机车: 'diesel locomotive',
动车组: 'EMU',
客车: 'passenger car',
货车: 'freight car',
检测车: 'inspection car',
旅游列车: 'tourist train',
};
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
const latin = (s) => (s.match(/[A-Za-z0-9-]+/g) || []).join('');
const strip = (h) => (h || '').replace(/<[^>]*>/g, '').replace(/\s+/g, ' ').trim();
const isImg = (s) => /\.(jpe?g|png)$/i.test(s || '');
async function api(url) {
const res = await fetch(url, { headers: { 'User-Agent': UA } });
if (!res.ok) throw new Error('HTTP ' + res.status);
return res.json();
}
/** 从 Commons File:xxx 取缩略图 url + 署名。*/
async function commonsFileInfo(title) {
const u =
'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' +
`&titles=${encodeURIComponent(title)}&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=1280`;
const d = await api(u);
const pages = d?.query?.pages || {};
const p = Object.values(pages)[0];
const ii = p?.imageinfo?.[0];
if (!ii?.thumburl) return null;
const em = ii.extmetadata || {};
return {
url: ii.thumburl,
descriptionUrl: ii.descriptionurl || '',
author: strip(em.Artist?.value) || '未署名',
license: strip(em.LicenseShortName?.value) || '见来源',
};
}
/** 1) Wikidata P18 主图(最多 1 张,最准)。*/
async function viaWikidata(model) {
const q = `${latin(model.model_code) || model.model_code} ${CAT_HINT[model.category] || 'train'}`;
const s = await api(
'https://www.wikidata.org/w/api.php?action=wbsearchentities&format=json&origin=*' +
`&language=en&type=item&limit=3&search=${encodeURIComponent(q)}`,
);
for (const hit of s?.search || []) {
const e = await api(
'https://www.wikidata.org/w/api.php?action=wbgetentities&format=json&origin=*' +
`&props=claims&ids=${hit.id}`,
);
const p18 = e?.entities?.[hit.id]?.claims?.P18?.[0]?.mainsnak?.datavalue?.value;
if (p18) {
const info = await commonsFileInfo('File:' + p18);
if (info) return [{ ...info, via: 'wikidata' }];
}
}
return [];
}
/** 2) Commons 同名分类(可多张)。*/
async function viaCategory(model, want) {
const code = latin(model.model_code) || model.model_code;
const out = [];
for (const cat of [`Category:China Railways ${code}`, `Category:${code}`]) {
const d = await api(
'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' +
`&list=categorymembers&cmtype=file&cmlimit=20&cmtitle=${encodeURIComponent(cat)}`,
);
for (const f of d?.query?.categorymembers || []) {
if (!isImg(f.title)) continue;
const info = await commonsFileInfo(f.title);
if (info) out.push({ ...info, via: 'category' });
if (out.length >= want) return out;
}
}
return out;
}
/** 3) Commons 关键词搜索(最不准,兜底,可多张)。*/
async function viaSearch(model, want) {
const q = `${latin(model.model_code) || model.model_code} ${CAT_HINT[model.category] || 'train'} China Railway`;
const d = await api(
'https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*' +
`&generator=search&gsrnamespace=6&gsrlimit=${Math.max(3, want)}&gsrsearch=${encodeURIComponent(q)}` +
'&prop=imageinfo&iiprop=url|extmetadata&iiurlwidth=1280',
);
const pages = d?.query?.pages || {};
const out = [];
for (const p of Object.values(pages)) {
const ii = p?.imageinfo?.[0];
if (!ii?.thumburl || !isImg(ii.thumburl)) continue;
const em = ii.extmetadata || {};
out.push({
url: ii.thumburl,
descriptionUrl: ii.descriptionurl || '',
author: strip(em.Artist?.value) || '未署名',
license: strip(em.LicenseShortName?.value) || '见来源',
via: 'search',
});
if (out.length >= want) break;
}
return out;
}
/** 聚合各来源,按 url 去重,返回最多 want 张。*/
async function collectImages(model, want) {
const seen = new Set();
const out = [];
const push = (arr) => {
for (const it of arr) {
if (out.length >= want) break;
if (seen.has(it.url)) continue;
seen.add(it.url);
out.push(it);
}
};
push(await viaWikidata(model));
if (out.length < want) push(await viaCategory(model, want - out.length));
if (out.length < want) push(await viaSearch(model, want - out.length));
return out;
}
async function main() {
mkdirSync(UPLOAD_DIR, { recursive: true });
const mdb = new Database(MACHINES_DB, { readonly: true });
const adb = new Database(APP_DB);
const admin =
adb.prepare("SELECT id FROM users WHERE role='admin' ORDER BY id LIMIT 1").get() ||
adb.prepare('SELECT id FROM users ORDER BY id LIMIT 1').get();
if (!admin) {
console.error('找不到用户:请先启动后端注册一个管理员(或设 ADMIN_EMAIL)。');
process.exit(1);
}
const where = CATEGORY ? 'WHERE c.name = ?' : '';
const models = mdb
.prepare(
`SELECT m.id, m.model_code, c.name AS category
FROM model m JOIN category c ON c.id = m.category_id ${where}
ORDER BY m.id`,
)
.all(...(CATEGORY ? [CATEGORY] : []));
const hasPhoto = adb.prepare('SELECT 1 FROM photos WHERE model_id = ? LIMIT 1');
const insert = adb.prepare(
`INSERT INTO photos (model_id, uploader_id, filename, caption, status, source_url, author, license)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
);
console.log(
`取图:每车型最多 ${PER} 张,入库状态=候选(candidate)${DRY ? 'dry-run' : ''};保存到 ${UPLOAD_DIR}`,
);
const report = [];
let done = 0;
let saved = 0;
for (const m of models) {
if (done >= LIMIT) break;
if (hasPhoto.get(m.id)) continue;
done++;
let hits = [];
try {
hits = await collectImages(m, PER);
} catch (e) {
report.push(`${m.model_code}:检索出错 ${e.message}`);
await sleep(300);
continue;
}
if (hits.length === 0) {
report.push(`${m.model_code}:未找到`);
await sleep(200);
continue;
}
if (DRY) {
report.push(`${m.model_code}[dry] ${hits.map((h) => h.via).join(',')} (${hits.length} 张)`);
await sleep(200);
continue;
}
let n = 0;
for (const hit of hits) {
try {
const buf = Buffer.from(
await (await fetch(hit.url, { headers: { 'User-Agent': UA } })).arrayBuffer(),
);
const ext = (hit.url.match(/\.(jpe?g|png)/i) || ['.jpg'])[0].toLowerCase();
const fn = `${Date.now()}-${randomBytes(6).toString('hex')}${ext}`;
await writeFile(join(UPLOAD_DIR, fn), buf);
insert.run(m.id, admin.id, fn, `${m.model_code}${hit.via}`, STATUS, hit.descriptionUrl, hit.author, hit.license);
n++;
saved++;
} catch (e) {
report.push(`${m.model_code}:下载失败 ${e.message}`);
}
await sleep(300); // 礼貌限速
}
if (n > 0) report.push(`${m.model_code}:入库 ${n} 张(${STATUS}`);
}
const ok = report.filter((r) => r.startsWith('✓')).length;
const miss = report.filter((r) => r.startsWith('—')).length;
const err = report.filter((r) => r.startsWith('✗')).length;
console.log(report.join('\n'));
console.log(`\n命中 ${ok} · 未找到 ${miss} · 出错 ${err}(处理 ${done} 个车型,保存 ${saved} 张)`);
console.log('候选图已入库,请用管理员在网页"候选审图 / 图册"逐个确认后作为封面展示。');
}
main();