Files
InternalAuditInterprise/backend/app/nlq/service.py
T

107 lines
3.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""自然语言查询服务。
采用"结构化意图优先 + LLM 兜底"策略:
- 若问题命中线索检索意图(置信度/场景/状态/列出线索等),直接查审计数据库返回真实结果,
实现"数据找人",不依赖外部模型,数据不出域。
- 其余开放性问题再交给 LLMProvider(本地优先)。
对应 R4 / R20 / R7。
"""
from __future__ import annotations
from dataclasses import dataclass
from sqlalchemy.orm import Session
from app.clues import service as clue_svc
from app.clues.models import ClueStatus, ConfidenceTier
from app.llm import ChatMessage, get_llm_provider
SYSTEM_PROMPT = (
"你是电信运营商内部审计助手。基于审计数据中台的数据回答问题,"
"给出可解释的依据;无证据支撑时明确说明,不臆造数据。"
)
# 关键词 → 过滤条件映射
_CONFIDENCE_KW = {"高置信": ConfidenceTier.HIGH, "高风险": ConfidenceTier.HIGH,
"中置信": ConfidenceTier.MEDIUM, "低置信": ConfidenceTier.LOW}
_SCENARIO_KW = {"拆单": "R8", "政企": "R8", "养卡": "R9", "骗补": "R9", "彩铃": "R9"}
_STATUS_KW = {"待处理": ClueStatus.NEW, "已分派": ClueStatus.ASSIGNED,
"研判": ClueStatus.REVIEWING, "属实": ClueStatus.CONFIRMED,
"误报": ClueStatus.DISMISSED, "已销项": ClueStatus.CLOSED}
_LIST_KW = ("线索", "列出", "", "有哪些", "多少", "列表", "看看", "显示")
_SCENARIO_NAME = {"R8": "政企拆单", "R9": "养卡骗补"}
_CONF_NAME = {ConfidenceTier.HIGH: "高置信", ConfidenceTier.MEDIUM: "中置信",
ConfidenceTier.LOW: "低置信"}
@dataclass
class NLQAnswer:
question: str
answer: str
provider: str
model: str
egress: bool
def _match_first(question: str, mapping: dict):
for kw, val in mapping.items():
if kw in question:
return val
return None
def _is_clue_query(question: str) -> bool:
return any(kw in question for kw in _LIST_KW) or any(
kw in question for kw in {**_CONFIDENCE_KW, **_SCENARIO_KW, **_STATUS_KW}
)
def _format_clue_answer(question: str, clues: list) -> str:
if not clues:
return "未检索到符合条件的线索。可调整筛选条件,或先运行扫描生成线索。"
lines = [f"共检索到 {len(clues)} 条线索:"]
for i, c in enumerate(clues, 1):
amount = f",涉及金额约 {c.amount_involved/10000:.1f} 万元" if c.amount_involved else ""
lines.append(
f"{i}. [{_SCENARIO_NAME.get(c.scenario_code, c.scenario_code)}] {c.title}"
f"{_CONF_NAME.get(c.confidence, c.confidence.value)},评分 {c.score:.2f}{amount}"
f"——{c.rationale}"
)
return "\n".join(lines)
def ask(question: str, session: Session | None = None) -> NLQAnswer:
"""处理一次自然语言查询:优先结构化检索,其余交给 LLM。"""
# 结构化意图:检索线索(数据找人,不出域)
if session is not None and _is_clue_query(question):
confidence = _match_first(question, _CONFIDENCE_KW)
scenario = _match_first(question, _SCENARIO_KW)
status = _match_first(question, _STATUS_KW)
clues = clue_svc.list_clues(
session, status=status, scenario_code=scenario, confidence=confidence
)
return NLQAnswer(
question=question,
answer=_format_clue_answer(question, clues),
provider="datahub",
model="结构化检索",
egress=False,
)
# 开放性问题:交给 LLM(本地优先)
provider = get_llm_provider()
messages = [
ChatMessage(role="system", content=SYSTEM_PROMPT),
ChatMessage(role="user", content=question),
]
resp = provider.chat(messages)
return NLQAnswer(
question=question,
answer=resp.content,
provider=resp.provider,
model=resp.model,
egress=resp.egress,
)