feat(tts): 新增通用流式 TTS 引擎并接入 AI 对话

- 新增 @wenwumap/tts 独立包：边流式边合成、按句排队顺序播放、专业 TTS 失败自动降级浏览器朗读，含 README 使用说明 - AI 后端新增 /ai/tts 接口，改用 DashScope CosyVoice(cosyvoice-v3-flash) 输出 mp3，串行+退避重试规避 429 限流 - web 对话面板接入 SpeechQueue，按角色配音色，加语音开关与朗读按钮 - admin 支持 /admin/ 基路径部署 - 地图页移除大面积 backdrop-blur，降低 GPU 占用
2026-06-14 23:13:26 +08:00
parent 3a55cd1978
commit 4a9397bccc
17 changed files with 955 additions and 26 deletions
@@ -0,0 +1,3 @@
+export { SpeechQueue } from "./speech-queue";
+export type { SpeechQueueOptions } from "./speech-queue";
+export { stripMarkdown, splitSpeakable } from "./text";
@@ -0,0 +1,313 @@
+import { splitSpeakable, stripMarkdown } from "./text";
+
+export interface SpeechQueueOptions {
+  /** 后端 TTS 接口地址：POST { text, voice } -> 音频二进制（mp3/wav 等） */
+  endpoint: string;
+  /** 默认音色（可随时用 setVoice 覆盖） */
+  voice?: string;
+  /** 浏览器朗读兜底语言，默认 zh-CN */
+  lang?: string;
+  /** 成句最小长度（去标记后），默认 14 */
+  minSentenceLen?: number;
+  /** 最大在途合成请求数，默认 3 */
+  maxInFlight?: number;
+  /** 自定义 fetch（默认使用全局 fetch） */
+  fetchImpl?: typeof fetch;
+  /** 播放状态变化：开始播放某会话时回调 tag，停止/结束时回调 null */
+  onSpeakingChange?: (tag: unknown | null) => void;
+  /** 合成或播放出错（不致命，会自动降级/跳过） */
+  onError?: (err: unknown) => void;
+}
+
+type Slot = string | null | "error" | "speech";
+// null = 合成中；string = 已就绪的 objectURL；"error" = 跳过；"speech" = 浏览器朗读兜底
+
+/**
+ * 通用流式 TTS 播放引擎（与框架无关，仅依赖浏览器 API）。
+ *
+ * 设计要点：
+ * - 边流式边合成：feed() 持续喂入增量文本，凑齐整句即合成，降低首声延迟。
+ * - 顺序播放：合成可并发/乱序完成，但严格按入队顺序播放。
+ * - 双轨兜底：专业 TTS 失败（限流/异常）自动降级到浏览器 Web Speech，绝不静默。
+ * - 自动播放授权：unlock() 须在用户手势内调用一次。
+ */
+export class SpeechQueue {
+  private readonly endpoint: string;
+  private readonly lang: string;
+  private readonly minLen: number;
+  private readonly maxInFlight: number;
+  private readonly fetchImpl: typeof fetch;
+  private readonly onSpeakingChange?: (tag: unknown | null) => void;
+  private readonly onError?: (err: unknown) => void;
+
+  private voice: string;
+  private audio: HTMLAudioElement | null = null;
+  private unlocked = false;
+
+  private texts: string[] = [];
+  private slots: Slot[] = [];
+  private playIdx = 0;
+  private nextFetch = 0;
+  private inFlight = 0;
+  private playing = false;
+  private streamDone = false;
+  private pending = "";
+  private tag: unknown = null;
+  private session = 0;
+
+  private static readonly SILENT_WAV =
+    "data:audio/wav;base64,UklGRjIAAABXQVZFZm10IBIAAAABAAEARKwAAIhYAQACABAAAABkYXRhAAAAAA==";
+
+  constructor(opts: SpeechQueueOptions) {
+    this.endpoint = opts.endpoint;
+    this.voice = opts.voice ?? "";
+    this.lang = opts.lang ?? "zh-CN";
+    this.minLen = opts.minSentenceLen ?? 14;
+    this.maxInFlight = opts.maxInFlight ?? 3;
+    this.fetchImpl = opts.fetchImpl ?? globalThis.fetch?.bind(globalThis);
+    this.onSpeakingChange = opts.onSpeakingChange;
+    this.onError = opts.onError;
+  }
+
+  /** 当前音色 */
+  setVoice(v: string): void {
+    if (v) this.voice = v;
+  }
+
+  private getAudio(): HTMLAudioElement {
+    if (!this.audio) {
+      this.audio = new Audio();
+      this.audio.preload = "auto";
+    }
+    return this.audio;
+  }
+
+  /** 必须在用户手势（点击）同步调用一次：解锁音频自动播放权限 */
+  unlock(): void {
+    if (this.unlocked || typeof window === "undefined") return;
+    const el = this.getAudio();
+    try {
+      el.src = SpeechQueue.SILENT_WAV;
+      el.muted = true;
+      const p = el.play();
+      if (p && typeof p.then === "function") {
+        p.then(() => {
+          el.pause();
+          el.currentTime = 0;
+          el.muted = false;
+          this.unlocked = true;
+        }).catch(() => {});
+      } else {
+        this.unlocked = true;
+      }
+    } catch {
+      /* ignore */
+    }
+  }
+
+  /** 开启一个新的朗读会话（作废旧会话）。tag 用于标识当前在读的内容（如消息下标） */
+  begin(tag: unknown = null): void {
+    this.session += 1;
+    this.revokeAll();
+    this.texts = [];
+    this.slots = [];
+    this.playIdx = 0;
+    this.nextFetch = 0;
+    this.inFlight = 0;
+    this.playing = false;
+    this.streamDone = false;
+    this.pending = "";
+    this.tag = tag;
+    if (typeof window !== "undefined") window.speechSynthesis?.cancel();
+    if (this.audio) {
+      this.audio.pause();
+      this.audio.onended = null;
+    }
+  }
+
+  /** 流式喂入增量文本：凑齐整句即入队合成 */
+  feed(delta: string): void {
+    this.pending += delta;
+    const { chunks, rest } = splitSpeakable(this.pending, this.minLen);
+    this.pending = rest;
+    for (const c of chunks) this.enqueue(c);
+  }
+
+  /** 流式结束：把剩余文本作为最后一段入队并尝试播放 */
+  flush(): void {
+    const rest = this.pending.trim();
+    this.pending = "";
+    if (rest) this.enqueue(rest);
+    this.streamDone = true;
+    this.pumpPlay();
+  }
+
+  /** 一次性朗读整段文本（如重播某条消息） */
+  speakWhole(text: string, tag: unknown = null): void {
+    this.begin(tag);
+    const { chunks, rest } = splitSpeakable(text, this.minLen);
+    for (const c of chunks) this.enqueue(c);
+    if (rest.trim()) this.enqueue(rest);
+    this.streamDone = true;
+    this.pumpPlay();
+  }
+
+  /** 停止播放并清空队列 */
+  stop(): void {
+    this.session += 1;
+    this.streamDone = true;
+    this.playing = false;
+    this.revokeAll();
+    this.texts = [];
+    this.slots = [];
+    this.playIdx = 0;
+    this.nextFetch = 0;
+    this.inFlight = 0;
+    this.pending = "";
+    this.tag = null;
+    if (typeof window !== "undefined") window.speechSynthesis?.cancel();
+    if (this.audio) {
+      this.audio.pause();
+      this.audio.onended = null;
+    }
+    this.onSpeakingChange?.(null);
+  }
+
+  /** 释放资源（组件卸载时调用） */
+  destroy(): void {
+    this.session += 1;
+    this.revokeAll();
+    if (typeof window !== "undefined") window.speechSynthesis?.cancel();
+    if (this.audio) this.audio.pause();
+  }
+
+  // ===== 内部实现 =====
+
+  private revokeAll(): void {
+    for (const u of this.slots) {
+      if (typeof u === "string" && u.startsWith("blob:")) URL.revokeObjectURL(u);
+    }
+  }
+
+  private enqueue(text: string): void {
+    this.texts.push(text);
+    this.slots.push(null);
+    this.pumpFetch();
+  }
+
+  private pumpFetch(): void {
+    while (this.inFlight < this.maxInFlight && this.nextFetch < this.texts.length) {
+      const i = this.nextFetch++;
+      this.inFlight += 1;
+      void this.fetchChunk(i, this.session);
+    }
+  }
+
+  private async fetchChunk(i: number, session: number): Promise<void> {
+    const clean = stripMarkdown(this.texts[i] ?? "").slice(0, 600);
+    if (!clean) {
+      if (session === this.session) {
+        this.slots[i] = "error";
+        this.pumpPlay();
+      }
+      return;
+    }
+    try {
+      const res = await this.fetchImpl(this.endpoint, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({ text: clean, voice: this.voice }),
+      });
+      if (session !== this.session) return;
+      if (!res.ok) {
+        this.slots[i] = "speech"; // 专业 TTS 失败 → 浏览器朗读兜底
+      } else {
+        const blob = await res.blob();
+        if (session !== this.session) return;
+        this.slots[i] = URL.createObjectURL(blob);
+      }
+    } catch (err) {
+      if (session === this.session) {
+        this.slots[i] = "speech";
+        this.onError?.(err);
+      }
+    }
+    if (session === this.session) {
+      this.inFlight = Math.max(0, this.inFlight - 1);
+      this.pumpFetch();
+      this.pumpPlay();
+    }
+  }
+
+  private pumpPlay(): void {
+    if (this.playing) return;
+    const i = this.playIdx;
+    if (i >= this.texts.length) {
+      if (this.streamDone) {
+        this.playing = false;
+        this.onSpeakingChange?.(null);
+      }
+      return;
+    }
+    const slot = this.slots[i];
+    if (slot === null || slot === undefined) return; // 合成中，待回调
+    if (slot === "error") {
+      this.playIdx = i + 1;
+      this.pumpPlay();
+      return;
+    }
+    if (slot === "speech") {
+      this.playViaBrowser(i);
+      return;
+    }
+    this.playViaAudio(i, slot);
+  }
+
+  private playViaAudio(i: number, url: string): void {
+    const el = this.getAudio();
+    el.src = url;
+    el.muted = false;
+    this.playing = true;
+    this.onSpeakingChange?.(this.tag);
+    el.onended = () => {
+      this.playing = false;
+      if (url.startsWith("blob:")) URL.revokeObjectURL(url);
+      this.slots[i] = "error";
+      this.playIdx = i + 1;
+      this.pumpPlay();
+    };
+    el.play().catch((err) => {
+      this.playing = false;
+      this.onError?.(err);
+      this.onSpeakingChange?.(null);
+    });
+  }
+
+  private playViaBrowser(i: number): void {
+    const text = stripMarkdown(this.texts[i] ?? "");
+    const synth = typeof window !== "undefined" ? window.speechSynthesis : undefined;
+    if (!text || !synth) {
+      this.playIdx = i + 1;
+      this.pumpPlay();
+      return;
+    }
+    this.playing = true;
+    this.onSpeakingChange?.(this.tag);
+    const done = () => {
+      this.playing = false;
+      this.slots[i] = "error";
+      this.playIdx = i + 1;
+      this.pumpPlay();
+    };
+    try {
+      const u = new SpeechSynthesisUtterance(text);
+      u.lang = this.lang;
+      u.rate = 1;
+      u.onend = done;
+      u.onerror = done;
+      synth.speak(u);
+    } catch {
+      done();
+    }
+  }
+}
@@ -0,0 +1,47 @@
+/**
+ * 文本处理工具：用于把 Markdown 回答转成适合朗读的纯文本，
+ * 以及在流式输出时按句切分，便于「边流式边合成」。
+ */
+
+/** 去除 Markdown 标记，得到适合朗读的纯文本 */
+export function stripMarkdown(md: string): string {
+  return md
+    .replace(/```[\s\S]*?```/g, "")
+    .replace(/`([^`]+)`/g, "$1")
+    .replace(/!\[[^\]]*\]\([^)]*\)/g, "")
+    .replace(/\[([^\]]+)\]\([^)]*\)/g, "$1")
+    .replace(/^#{1,6}\s+/gm, "")
+    .replace(/^\s*>\s?/gm, "")
+    .replace(/^\s*[-*+]\s+/gm, "")
+    .replace(/\*\*([^*]+)\*\*/g, "$1")
+    .replace(/\*([^*]+)\*/g, "$1")
+    .replace(/_{1,2}([^_]+)_{1,2}/g, "$1")
+    .replace(/~~([^~]+)~~/g, "$1")
+    .replace(/\s{2,}/g, " ")
+    .trim();
+}
+
+/**
+ * 按句子切分文本：返回可朗读的完整句子块 chunks（每块去标记后长度 >= minLen），
+ * 以及尾部尚未成句的剩余文本 rest。用于边流式边合成、降低首声延迟。
+ */
+export function splitSpeakable(
+  text: string,
+  minLen = 14
+): { chunks: string[]; rest: string } {
+  const chunks: string[] = [];
+  let rest = text;
+  let buf = "";
+  const re = /^[\s\S]*?[。！？!?\n；;…]+/;
+  let m: RegExpExecArray | null;
+  while ((m = re.exec(rest))) {
+    buf += m[0];
+    rest = rest.slice(m[0].length);
+    if (stripMarkdown(buf).length >= minLen) {
+      chunks.push(buf);
+      buf = "";
+    }
+  }
+  rest = buf + rest;
+  return { chunks, rest };
+}