// 把文物图片下载到本地(apps/web/public/artifacts),并把数据库 image_url 指向本地路径。 // 服务器直连不了 Wikimedia,但浏览器可以,所以用 Playwright 真实浏览器下载。 import { chromium } from "playwright"; import { Pool } from "pg"; import fs from "fs"; import path from "path"; import { fileURLToPath } from "url"; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const PUBLIC_DIR = path.resolve(__dirname, "../../apps/web/public/artifacts"); fs.mkdirSync(PUBLIC_DIR, { recursive: true }); const pool = new Pool({ connectionString: process.env.DATABASE_URL ?? "postgresql://postgres@localhost:5432/wenwumap", }); const extOf = (ct) => ct?.includes("png") ? "png" : ct?.includes("svg") ? "svg" : ct?.includes("webp") ? "webp" : "jpg"; const main = async () => { const { rows } = await pool.query( "SELECT id, image_url FROM artifacts WHERE image_url LIKE 'http%' ORDER BY unified_map_id" ); console.log(`待下载:${rows.length} 张`); const browser = await chromium.launch({ headless: true }); const page = await browser.newPage(); let ok = 0; let failed = 0; for (const { id, image_url } of rows) { try { const resp = await page.goto(image_url, { timeout: 30000, waitUntil: "commit" }); if (!resp || !resp.ok()) throw new Error(`HTTP ${resp ? resp.status() : "?"}`); const ct = resp.headers()["content-type"] ?? ""; if (!ct.startsWith("image/")) throw new Error(`非图片:${ct}`); const buf = await resp.body(); const ext = extOf(ct); const file = `${id}.${ext}`; fs.writeFileSync(path.join(PUBLIC_DIR, file), buf); const localPath = `/artifacts/${file}`; await pool.query("UPDATE artifacts SET image_url = $1 WHERE id = $2", [localPath, id]); ok++; console.log(`✓ ${id} -> ${localPath} (${(buf.length / 1024).toFixed(0)}KB)`); } catch (e) { failed++; console.log(`✗ ${id} 失败:${e.message.split("\n")[0]}(保留原链接)`); } } await browser.close(); await pool.end(); console.log(`完成:成功 ${ok},失败 ${failed}`); }; main().catch((e) => { console.error(e); process.exit(1); });