feat(tts): 新增通用流式 TTS 引擎并接入 AI 对话

- 新增 @wenwumap/tts 独立包：边流式边合成、按句排队顺序播放、专业 TTS 失败自动降级浏览器朗读，含 README 使用说明 - AI 后端新增 /ai/tts 接口，改用 DashScope CosyVoice(cosyvoice-v3-flash) 输出 mp3，串行+退避重试规避 429 限流 - web 对话面板接入 SpeechQueue，按角色配音色，加语音开关与朗读按钮 - admin 支持 /admin/ 基路径部署 - 地图页移除大面积 backdrop-blur，降低 GPU 占用
2026-06-14 23:13:26 +08:00
parent 3a55cd1978
commit 4a9397bccc
17 changed files with 955 additions and 26 deletions
@@ -3,6 +3,7 @@ import { ApiOperation, ApiTags } from "@nestjs/swagger";
 import { Response } from "express";
 import { AiService } from "./ai.service";
 import { ChatDto } from "./dto/chat.dto";
+import { TtsDto } from "./dto/tts.dto";
 import { RateLimitGuard } from "../common/rate-limit.guard";

@ApiTags("ai")
@@ -39,4 +40,18 @@ export class AiController {
    const suggestions = await this.ai.getSuggestions(dto);
    return { suggestions };
  }
+
+  @Post("tts")
+  @ApiOperation({ summary: "通义千问 TTS：将文本合成语音（返回音频）" })
+  async tts(@Body() dto: TtsDto, @Res() res: Response): Promise<void> {
+    try {
+      const { buffer, contentType } = await this.ai.synthesizeSpeech(dto.text, dto.voice);
+      res.setHeader("Content-Type", contentType);
+      res.setHeader("Cache-Control", "no-store");
+      res.send(buffer);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : "TTS 服务异常";
+      res.status(502).json({ message });
+    }
+  }
 }
@@ -50,6 +50,28 @@ export type ChatPersona = "artifact" | "guide" | "scholar" | "migration" | "repa
 export class AiService {
  private readonly logger = new Logger(AiService.name);

+  // 限制对 DashScope qwen-tts 的并发与节奏，避免触发账号级 QPS 限流（429 Throttling）
+  private static ttsActive = 0;
+  private static readonly ttsWaiters: (() => void)[] = [];
+  private static readonly TTS_MAX = Number(process.env["AI_TTS_CONCURRENCY"] ?? 1);
+  private static lastTtsAt = 0;
+  private static readonly TTS_MIN_GAP = Number(process.env["AI_TTS_MIN_GAP_MS"] ?? 300);
+
+  private static async acquireTtsSlot(): Promise<void> {
+    if (AiService.ttsActive < AiService.TTS_MAX) {
+      AiService.ttsActive++;
+      return;
+    }
+    await new Promise<void>((resolve) => AiService.ttsWaiters.push(resolve));
+    // 被唤醒即代表从释放者手中接过名额，ttsActive 保持不变
+  }
+
+  private static releaseTtsSlot(): void {
+    const next = AiService.ttsWaiters.shift();
+    if (next) next();
+    else AiService.ttsActive--;
+  }
+
  constructor(
    private readonly config: ConfigService,
    private readonly db: DatabaseService
@@ -251,6 +273,104 @@ ${common}`;
    return parseSuggestions(content);
  }

+  /**
+   * 通义千问 TTS：将文本合成语音。
+   * 调用 DashScope qwen-tts，拿到临时音频 URL 后由后端拉取音频字节返回，
+   * 避免前端直连 OSS（跨域 / 过期）问题，实现同源音频。
+   */
+  async synthesizeSpeech(
+    text: string,
+    voice?: string
+  ): Promise<{ buffer: Buffer; contentType: string }> {
+    const apiKey = this.config.get<string>("AI_API_KEY");
+    if (!apiKey) throw new Error("AI 服务未配置：请在 .env 中设置 AI_API_KEY");
+
+    const model = this.config.get<string>("AI_TTS_MODEL") ?? "cosyvoice-v3-flash";
+    const v = voice || this.config.get<string>("AI_TTS_VOICE") || "longxiaochun_v3";
+    const clean = text.trim().slice(0, 800);
+    if (!clean) throw new Error("待合成文本为空");
+
+    const genUrl =
+      this.config.get<string>("AI_TTS_URL") ??
+      "https://dashscope.aliyuncs.com/api/v1/services/audio/tts/SpeechSynthesizer";
+
+    const audioUrl = await this.requestTtsUrl(genUrl, apiKey, model, v, clean);
+
+    const audioResp = await fetch(audioUrl, { signal: AbortSignal.timeout(20_000) });
+    if (!audioResp.ok) throw new Error(`音频下载失败 ${audioResp.status}`);
+    const contentType = audioResp.headers.get("content-type") ?? "audio/wav";
+    const buffer = Buffer.from(await audioResp.arrayBuffer());
+    return { buffer, contentType };
+  }
+
+  /**
+   * 请求 DashScope 生成语音并返回音频 URL。
+   * 通过信号量限制并发，并对 429/5xx 做指数退避重试，规避账号级 QPS 限流。
+   */
+  private async requestTtsUrl(
+    genUrl: string,
+    apiKey: string,
+    model: string,
+    voice: string,
+    text: string
+  ): Promise<string> {
+    const maxAttempts = 5;
+    let lastErr = "unknown";
+
+    for (let attempt = 0; attempt < maxAttempts; attempt++) {
+      await AiService.acquireTtsSlot();
+      try {
+        // 节流：与上一次请求保持最小间隔，降低 QPS 峰值
+        const since = Date.now() - AiService.lastTtsAt;
+        if (since < AiService.TTS_MIN_GAP) {
+          await new Promise((r) => setTimeout(r, AiService.TTS_MIN_GAP - since));
+        }
+        AiService.lastTtsAt = Date.now();
+
+        const resp = await fetch(genUrl, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: `Bearer ${apiKey}`,
+          },
+          body: JSON.stringify({
+            model,
+            input: { text, voice, format: "mp3", sample_rate: 22050 },
+          }),
+          signal: AbortSignal.timeout(25_000),
+        });
+
+        if (resp.status === 429 || resp.status >= 500) {
+          lastErr = `${resp.status}`;
+          // 触发限流/上游错误：退避后重试
+        } else if (!resp.ok) {
+          const t = await resp.text().catch(() => "");
+          throw new Error(`TTS 接口返回 ${resp.status}: ${t.slice(0, 200)}`);
+        } else {
+          const json = (await resp.json()) as {
+            output?: { audio?: { url?: string } };
+          };
+          const url = json?.output?.audio?.url;
+          if (url) return url;
+          lastErr = "no-url";
+        }
+      } catch (err) {
+        // 网络/超时错误也重试
+        lastErr = err instanceof Error ? err.message : "network";
+      } finally {
+        AiService.releaseTtsSlot();
+      }
+
+      if (attempt < maxAttempts - 1) {
+        const delay = Math.min(500 * 2 ** attempt, 5000) + Math.floor(Math.random() * 300);
+        await new Promise((r) => setTimeout(r, delay));
+      }
+    }
+
+    this.logger.error(`TTS 重试仍失败：${lastErr}`);
+    throw new Error(`TTS 限流，请稍后再试（${lastErr}）`);
+  }
+
  private async chatComplete(
    messages: { role: string; content: string }[],
    temperature = 0.7
@@ -0,0 +1,14 @@
+import { IsOptional, IsString, MaxLength } from "class-validator";
+
+export class TtsDto {
+  /** 待合成的纯文本（已去除 Markdown 标记） */
+  @IsString()
+  @MaxLength(2000)
+  text!: string;
+
+  /** 音色，默认 Cherry */
+  @IsOptional()
+  @IsString()
+  @MaxLength(40)
+  voice?: string;
+}
@@ -14,7 +14,8 @@ import { Request } from "express";
@Injectable()
 export class RateLimitGuard implements CanActivate {
  private static readonly WINDOW_MS = 60_000;
-  private static readonly MAX_REQUESTS = 20;
+  // 单条回答会按句拆分为多次 TTS 调用，故放宽阈值（仍可防刷）。
+  private static readonly MAX_REQUESTS = 120;
  private static readonly buckets = new Map<string, number[]>();

  canActivate(context: ExecutionContext): boolean {