chore: 初始化仓库
中华文明全图鉴——文物全图系统(PC Web 地图 + NestJS API + 管理后台)。 含三大 IP(文物南迁北归 / 国宝海外回归 / 博物馆手艺人)、AI 文物对话、 文物地图与详情、以及 demo-video-kit 演示视频生成工具。
This commit is contained in:
@@ -0,0 +1,96 @@
|
||||
import { chromium } from "playwright";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
const API = process.env.API_URL ?? "http://localhost:3002";
|
||||
const OUT = path.resolve("apps/web/public/artifacts");
|
||||
fs.mkdirSync(OUT, { recursive: true });
|
||||
|
||||
const toHd = (url, w = 1600) =>
|
||||
/\?width=\d+/.test(url)
|
||||
? url.replace(/\?width=\d+/, `?width=${w}`)
|
||||
: `${url}${url.includes("?") ? "&" : "?"}width=${w}`;
|
||||
|
||||
const cleanName = (n) => n.replace(/([^)]*)/g, "").replace(/\([^)]*\)/g, "").trim();
|
||||
|
||||
// 通过 Commons MediaWiki API 按名称搜图,返回 1600px 缩略图直链
|
||||
async function resolveFromCommons(page, name) {
|
||||
const q = cleanName(name);
|
||||
const api =
|
||||
"https://commons.wikimedia.org/w/api.php?action=query&format=json&origin=*" +
|
||||
"&generator=search&gsrnamespace=6&gsrlimit=6" +
|
||||
`&gsrsearch=${encodeURIComponent(q)}` +
|
||||
"&prop=imageinfo&iiprop=url|mime&iiurlwidth=1600";
|
||||
try {
|
||||
const data = await page.evaluate((u) => fetch(u).then((r) => r.json()), api);
|
||||
const pages = data?.query?.pages ? Object.values(data.query.pages) : [];
|
||||
for (const pg of pages) {
|
||||
const info = pg.imageinfo?.[0];
|
||||
if (info && /^image\/(jpeg|png)$/.test(info.mime) && info.thumburl) {
|
||||
return info.thumburl;
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const main = async () => {
|
||||
// 从运行中的 API 取得带图文物列表
|
||||
const points = await fetch(`${API}/api/v1/map/points`).then((r) => r.json());
|
||||
const withImg = points.filter((p) => p.image_url);
|
||||
console.log(`待下载文物图片:${withImg.length} 件`);
|
||||
|
||||
const browser = await chromium.launch({ headless: true });
|
||||
const page = await browser.newPage();
|
||||
|
||||
const saveFrom = async (url, dest) => {
|
||||
const resp = await page.goto(url, { timeout: 30000, waitUntil: "load" });
|
||||
const ct = resp?.headers()["content-type"] ?? "";
|
||||
if (resp && resp.ok() && ct.startsWith("image/")) {
|
||||
const buf = await resp.body();
|
||||
fs.writeFileSync(dest, buf);
|
||||
return buf.length;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
let ok = 0;
|
||||
let fail = 0;
|
||||
for (const p of withImg) {
|
||||
const dest = path.join(OUT, `${p.id}.jpg`);
|
||||
if (fs.existsSync(dest)) {
|
||||
ok++;
|
||||
console.log(`· 跳过(已存在) ${p.name}`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
// 1) 先用 seed 里的直链
|
||||
let size = await saveFrom(toHd(p.image_url), dest);
|
||||
// 2) 失败则用 Commons 搜索兜底
|
||||
if (!size) {
|
||||
const alt = await resolveFromCommons(page, p.name);
|
||||
if (alt) size = await saveFrom(alt, dest);
|
||||
}
|
||||
if (size) {
|
||||
ok++;
|
||||
console.log(`✓ ${p.name} (${(size / 1024).toFixed(0)} KB)`);
|
||||
} else {
|
||||
fail++;
|
||||
console.log(`✗ ${p.name} 未找到可用图片`);
|
||||
}
|
||||
} catch (e) {
|
||||
fail++;
|
||||
console.log(`✗ ${p.name} ${e.message.split("\n")[0]}`);
|
||||
}
|
||||
}
|
||||
|
||||
await browser.close();
|
||||
console.log(`\n完成:成功 ${ok},失败 ${fail},输出目录 ${OUT}`);
|
||||
};
|
||||
|
||||
main().catch((e) => {
|
||||
console.error(e);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user