Files
WenwuMap/scripts/download-artifact-images.mjs
selfrelease 2d847e154f chore: 初始化仓库
中华文明全图鉴——文物全图系统(PC Web 地图 + NestJS API + 管理后台)。
含三大 IP(文物南迁北归 / 国宝海外回归 / 博物馆手艺人)、AI 文物对话、
文物地图与详情、以及 demo-video-kit 演示视频生成工具。
2026-06-13 20:55:44 +08:00

97 lines
3.0 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import { chromium } from "playwright";
import fs from "fs";
import path from "path";
const API = process.env.API_URL ?? "http://localhost:3002";
const OUT = path.resolve("apps/web/public/artifacts");
fs.mkdirSync(OUT, { recursive: true });
const toHd = (url, w = 1600) =>
/\?width=\d+/.test(url)
? url.replace(/\?width=\d+/, `?width=${w}`)
: `${url}${url.includes("?") ? "&" : "?"}width=${w}`;
const cleanName = (n) => n.replace(/[^]*/g, "").replace(/\([^)]*\)/g, "").trim();
// 通过 Commons MediaWiki API 按名称搜图,返回 1600px 缩略图直链
async function resolveFromCommons(page, name) {
const q = cleanName(name);
const api =
"https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*" +
"&generator=search&gsrnamespace=6&gsrlimit=6" +
`&gsrsearch=${encodeURIComponent(q)}` +
"&prop=imageinfo&iiprop=url|mime&iiurlwidth=1600";
try {
const data = await page.evaluate((u) => fetch(u).then((r) => r.json()), api);
const pages = data?.query?.pages ? Object.values(data.query.pages) : [];
for (const pg of pages) {
const info = pg.imageinfo?.[0];
if (info && /^image\/(jpeg|png)$/.test(info.mime) && info.thumburl) {
return info.thumburl;
}
}
} catch {
/* ignore */
}
return null;
}
const main = async () => {
// 从运行中的 API 取得带图文物列表
const points = await fetch(`${API}/api/v1/map/points`).then((r) => r.json());
const withImg = points.filter((p) => p.image_url);
console.log(`待下载文物图片:${withImg.length}`);
const browser = await chromium.launch({ headless: true });
const page = await browser.newPage();
const saveFrom = async (url, dest) => {
const resp = await page.goto(url, { timeout: 30000, waitUntil: "load" });
const ct = resp?.headers()["content-type"] ?? "";
if (resp && resp.ok() && ct.startsWith("image/")) {
const buf = await resp.body();
fs.writeFileSync(dest, buf);
return buf.length;
}
return 0;
};
let ok = 0;
let fail = 0;
for (const p of withImg) {
const dest = path.join(OUT, `${p.id}.jpg`);
if (fs.existsSync(dest)) {
ok++;
console.log(`· 跳过(已存在) ${p.name}`);
continue;
}
try {
// 1) 先用 seed 里的直链
let size = await saveFrom(toHd(p.image_url), dest);
// 2) 失败则用 Commons 搜索兜底
if (!size) {
const alt = await resolveFromCommons(page, p.name);
if (alt) size = await saveFrom(alt, dest);
}
if (size) {
ok++;
console.log(`${p.name} (${(size / 1024).toFixed(0)} KB)`);
} else {
fail++;
console.log(`${p.name} 未找到可用图片`);
}
} catch (e) {
fail++;
console.log(`${p.name} ${e.message.split("\n")[0]}`);
}
}
await browser.close();
console.log(`\n完成:成功 ${ok},失败 ${fail},输出目录 ${OUT}`);
};
main().catch((e) => {
console.error(e);
process.exit(1);
});