"""自然语言查询服务。 采用"结构化意图优先 + LLM 兜底"策略: - 若问题命中线索检索意图(置信度/场景/状态/列出线索等),直接查审计数据库返回真实结果, 实现"数据找人",不依赖外部模型,数据不出域。 - 其余开放性问题再交给 LLMProvider(本地优先)。 对应 R4 / R20 / R7。 """ from __future__ import annotations from dataclasses import dataclass from sqlalchemy.orm import Session from app.clues import service as clue_svc from app.clues.models import ClueStatus, ConfidenceTier from app.llm import ChatMessage, get_llm_provider SYSTEM_PROMPT = ( "你是电信运营商内部审计助手。基于审计数据中台的数据回答问题," "给出可解释的依据;无证据支撑时明确说明,不臆造数据。" ) # 关键词 → 过滤条件映射 _CONFIDENCE_KW = {"高置信": ConfidenceTier.HIGH, "高风险": ConfidenceTier.HIGH, "中置信": ConfidenceTier.MEDIUM, "低置信": ConfidenceTier.LOW} _SCENARIO_KW = {"拆单": "R8", "政企": "R8", "养卡": "R9", "骗补": "R9", "彩铃": "R9"} _STATUS_KW = {"待处理": ClueStatus.NEW, "已分派": ClueStatus.ASSIGNED, "研判": ClueStatus.REVIEWING, "属实": ClueStatus.CONFIRMED, "误报": ClueStatus.DISMISSED, "已销项": ClueStatus.CLOSED} _LIST_KW = ("线索", "列出", "查", "有哪些", "多少", "列表", "看看", "显示") _SCENARIO_NAME = {"R8": "政企拆单", "R9": "养卡骗补"} _CONF_NAME = {ConfidenceTier.HIGH: "高置信", ConfidenceTier.MEDIUM: "中置信", ConfidenceTier.LOW: "低置信"} @dataclass class NLQAnswer: question: str answer: str provider: str model: str egress: bool def _match_first(question: str, mapping: dict): for kw, val in mapping.items(): if kw in question: return val return None def _is_clue_query(question: str) -> bool: return any(kw in question for kw in _LIST_KW) or any( kw in question for kw in {**_CONFIDENCE_KW, **_SCENARIO_KW, **_STATUS_KW} ) def _format_clue_answer(question: str, clues: list) -> str: if not clues: return "未检索到符合条件的线索。可调整筛选条件,或先运行扫描生成线索。" lines = [f"共检索到 {len(clues)} 条线索:"] for i, c in enumerate(clues, 1): amount = f",涉及金额约 {c.amount_involved/10000:.1f} 万元" if c.amount_involved else "" lines.append( f"{i}. [{_SCENARIO_NAME.get(c.scenario_code, c.scenario_code)}] {c.title}" f"({_CONF_NAME.get(c.confidence, c.confidence.value)},评分 {c.score:.2f}{amount})" f"——{c.rationale}" ) return "\n".join(lines) def ask(question: str, session: Session | None = None) -> NLQAnswer: """处理一次自然语言查询:优先结构化检索,其余交给 LLM。""" # 结构化意图:检索线索(数据找人,不出域) if session is not None and _is_clue_query(question): confidence = _match_first(question, _CONFIDENCE_KW) scenario = _match_first(question, _SCENARIO_KW) status = _match_first(question, _STATUS_KW) clues = clue_svc.list_clues( session, status=status, scenario_code=scenario, confidence=confidence ) return NLQAnswer( question=question, answer=_format_clue_answer(question, clues), provider="datahub", model="结构化检索", egress=False, ) # 开放性问题:交给 LLM(本地优先) provider = get_llm_provider() messages = [ ChatMessage(role="system", content=SYSTEM_PROMPT), ChatMessage(role="user", content=question), ] resp = provider.chat(messages) return NLQAnswer( question=question, answer=resp.content, provider=resp.provider, model=resp.model, egress=resp.egress, )