// 预生成解说语音(DashScope qwen-tts),输出 e2e/voice/*.wav 与 clips.json(文本→{file,dur})。 // run.mjs 据此让每步停留足够时长;add-voice.mjs 据此把语音放到字幕出现的时间点。 import { execSync } from "child_process"; import crypto from "crypto"; import fs from "fs"; import path from "path"; import { NARRATION, clean } from "./narration.mjs"; const OUT = path.resolve("e2e/voice"); const QWEN_VOICE = process.env.QWEN_TTS_VOICE ?? "Cherry"; const FALLBACK_VOICE = process.env.TTS_VOICE ?? "Tingting"; function readApiKey() { const env = fs.readFileSync(path.resolve(".env"), "utf-8"); const m = env.match(/^AI_API_KEY=(.*)$/m); return m ? m[1].trim() : ""; } const KEY = readApiKey(); const sh = (cmd) => execSync(cmd, { stdio: ["ignore", "pipe", "pipe"] }).toString(); const durationOf = (f) => parseFloat(sh(`ffprobe -v error -show_entries format=duration -of csv=p=0 "${f}"`).trim()); async function qwenTts(text, outFile) { const r = await fetch( "https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation", { method: "POST", headers: { Authorization: `Bearer ${KEY}`, "Content-Type": "application/json" }, body: JSON.stringify({ model: "qwen-tts", input: { text, voice: QWEN_VOICE } }), signal: AbortSignal.timeout(40000), } ); const j = await r.json(); const url = j?.output?.audio?.url; if (!url) throw new Error(JSON.stringify(j).slice(0, 160)); const a = await fetch(url, { signal: AbortSignal.timeout(40000) }); if (!a.ok) throw new Error(`下载音频失败 ${a.status}`); fs.writeFileSync(outFile, Buffer.from(await a.arrayBuffer())); } const main = async () => { fs.rmSync(OUT, { recursive: true, force: true }); fs.mkdirSync(OUT, { recursive: true }); const texts = Array.from(new Set(Object.values(NARRATION).map(clean))).filter(Boolean); const clips = {}; for (const text of texts) { const id = crypto.createHash("md5").update(text).digest("hex").slice(0, 10); const wav = path.join(OUT, `${id}.wav`); let engine = "qwen-tts"; let ok = false; for (let attempt = 0; attempt < 5 && !ok; attempt++) { try { await qwenTts(text, wav); ok = true; } catch (e) { if (attempt < 4) await new Promise((r) => setTimeout(r, 2500 * (attempt + 1))); } } if (!ok) { engine = "say"; const aiff = path.join(OUT, `${id}.aiff`); sh(`say -v ${FALLBACK_VOICE} -o "${aiff}" "${text.replace(/"/g, "")}"`); sh(`ffmpeg -y -i "${aiff}" "${wav}"`); fs.rmSync(aiff, { force: true }); } const dur = durationOf(wav); clips[text] = { file: path.relative(process.cwd(), wav), dur, engine }; console.log(`✓ ${dur.toFixed(1)}s [${engine}] ${text.slice(0, 26)}…`); await new Promise((r) => setTimeout(r, 1200)); // 限速,避免触发节流 } fs.writeFileSync(path.join(OUT, "clips.json"), JSON.stringify(clips, null, 2)); console.log(`完成:${texts.length} 条语音 → e2e/voice/clips.json`); }; main().catch((e) => { console.error(e); process.exit(1); });