107 lines
3.9 KiB
Python
107 lines
3.9 KiB
Python
"""自然语言查询服务。
|
||
|
||
采用"结构化意图优先 + LLM 兜底"策略:
|
||
- 若问题命中线索检索意图(置信度/场景/状态/列出线索等),直接查审计数据库返回真实结果,
|
||
实现"数据找人",不依赖外部模型,数据不出域。
|
||
- 其余开放性问题再交给 LLMProvider(本地优先)。
|
||
对应 R4 / R20 / R7。
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.clues import service as clue_svc
|
||
from app.clues.models import ClueStatus, ConfidenceTier
|
||
from app.llm import ChatMessage, get_llm_provider
|
||
|
||
SYSTEM_PROMPT = (
|
||
"你是电信运营商内部审计助手。基于审计数据中台的数据回答问题,"
|
||
"给出可解释的依据;无证据支撑时明确说明,不臆造数据。"
|
||
)
|
||
|
||
# 关键词 → 过滤条件映射
|
||
_CONFIDENCE_KW = {"高置信": ConfidenceTier.HIGH, "高风险": ConfidenceTier.HIGH,
|
||
"中置信": ConfidenceTier.MEDIUM, "低置信": ConfidenceTier.LOW}
|
||
_SCENARIO_KW = {"拆单": "R8", "政企": "R8", "养卡": "R9", "骗补": "R9", "彩铃": "R9"}
|
||
_STATUS_KW = {"待处理": ClueStatus.NEW, "已分派": ClueStatus.ASSIGNED,
|
||
"研判": ClueStatus.REVIEWING, "属实": ClueStatus.CONFIRMED,
|
||
"误报": ClueStatus.DISMISSED, "已销项": ClueStatus.CLOSED}
|
||
_LIST_KW = ("线索", "列出", "查", "有哪些", "多少", "列表", "看看", "显示")
|
||
|
||
_SCENARIO_NAME = {"R8": "政企拆单", "R9": "养卡骗补"}
|
||
_CONF_NAME = {ConfidenceTier.HIGH: "高置信", ConfidenceTier.MEDIUM: "中置信",
|
||
ConfidenceTier.LOW: "低置信"}
|
||
|
||
|
||
@dataclass
|
||
class NLQAnswer:
|
||
question: str
|
||
answer: str
|
||
provider: str
|
||
model: str
|
||
egress: bool
|
||
|
||
|
||
def _match_first(question: str, mapping: dict):
|
||
for kw, val in mapping.items():
|
||
if kw in question:
|
||
return val
|
||
return None
|
||
|
||
|
||
def _is_clue_query(question: str) -> bool:
|
||
return any(kw in question for kw in _LIST_KW) or any(
|
||
kw in question for kw in {**_CONFIDENCE_KW, **_SCENARIO_KW, **_STATUS_KW}
|
||
)
|
||
|
||
|
||
def _format_clue_answer(question: str, clues: list) -> str:
|
||
if not clues:
|
||
return "未检索到符合条件的线索。可调整筛选条件,或先运行扫描生成线索。"
|
||
lines = [f"共检索到 {len(clues)} 条线索:"]
|
||
for i, c in enumerate(clues, 1):
|
||
amount = f",涉及金额约 {c.amount_involved/10000:.1f} 万元" if c.amount_involved else ""
|
||
lines.append(
|
||
f"{i}. [{_SCENARIO_NAME.get(c.scenario_code, c.scenario_code)}] {c.title}"
|
||
f"({_CONF_NAME.get(c.confidence, c.confidence.value)},评分 {c.score:.2f}{amount})"
|
||
f"——{c.rationale}"
|
||
)
|
||
return "\n".join(lines)
|
||
|
||
|
||
def ask(question: str, session: Session | None = None) -> NLQAnswer:
|
||
"""处理一次自然语言查询:优先结构化检索,其余交给 LLM。"""
|
||
# 结构化意图:检索线索(数据找人,不出域)
|
||
if session is not None and _is_clue_query(question):
|
||
confidence = _match_first(question, _CONFIDENCE_KW)
|
||
scenario = _match_first(question, _SCENARIO_KW)
|
||
status = _match_first(question, _STATUS_KW)
|
||
clues = clue_svc.list_clues(
|
||
session, status=status, scenario_code=scenario, confidence=confidence
|
||
)
|
||
return NLQAnswer(
|
||
question=question,
|
||
answer=_format_clue_answer(question, clues),
|
||
provider="datahub",
|
||
model="结构化检索",
|
||
egress=False,
|
||
)
|
||
|
||
# 开放性问题:交给 LLM(本地优先)
|
||
provider = get_llm_provider()
|
||
messages = [
|
||
ChatMessage(role="system", content=SYSTEM_PROMPT),
|
||
ChatMessage(role="user", content=question),
|
||
]
|
||
resp = provider.chat(messages)
|
||
return NLQAnswer(
|
||
question=question,
|
||
answer=resp.content,
|
||
provider=resp.provider,
|
||
model=resp.model,
|
||
egress=resp.egress,
|
||
)
|