feat: 添加线索引擎、NLQ、场景检测、前端界面等核心功能模块

This commit is contained in:
freedakgmail
2026-06-16 08:15:15 +08:00
parent 7b1e2b10a8
commit 48340f6011
62 changed files with 6772 additions and 65 deletions
+1
View File
@@ -0,0 +1 @@
"""审计场景检测器:将业务数据中的异常模式转化为线索。"""
+85
View File
@@ -0,0 +1,85 @@
"""场景二 · 市场业务真实性:养卡骗补检测(R9)。
检测"脉冲式增长 + 规律性衰减"的周期性造假:渠道每月新增大量用户订购,
固定周期后这些用户集中退订(骗补后弃养)。结合佣金与业务质量匹配度。
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class CohortPoint:
"""某新增批次(cohort)在第 N 个月的留存率。"""
month_index: int
retention: float # 0-1
@dataclass
class ChurnFinding:
cliff_month: int | None
max_drop: float
pulse_then_decay: bool
def detect_pulse_decay(
retention_curve: list[CohortPoint],
cliff_drop: float = 0.5,
) -> ChurnFinding:
"""识别留存曲线中的"断崖式集中退订"
若某月留存相对上月骤降超过 cliff_drop(默认 50%),判为规律性衰减。
"""
ordered = sorted(retention_curve, key=lambda p: p.month_index)
max_drop = 0.0
cliff_month: int | None = None
for prev, cur in zip(ordered, ordered[1:], strict=False):
drop = prev.retention - cur.retention
if drop > max_drop:
max_drop = drop
if drop >= cliff_drop:
cliff_month = cur.month_index
return ChurnFinding(
cliff_month=cliff_month,
max_drop=round(max_drop, 3),
pulse_then_decay=cliff_month is not None,
)
def commission_quality_mismatch(
commission_paid: float,
active_ratio: float,
zero_usage_ratio: float,
) -> float:
"""佣金与业务质量不匹配度(0-1)。
active_ratio:仍活跃用户占比;zero_usage_ratio:零通话/零流量用户占比。
佣金已发但活跃低、零使用高 → 不匹配度高。
"""
if commission_paid <= 0:
return 0.0
mismatch = 0.6 * zero_usage_ratio + 0.4 * (1 - active_ratio)
return round(min(max(mismatch, 0.0), 1.0), 3)
def churn_risk_score(finding: ChurnFinding, mismatch: float) -> float:
"""综合评分:断崖退订 + 佣金质量不匹配。"""
if not finding.pulse_then_decay:
return round(0.3 * mismatch, 3)
base = 0.4 + 0.4 * finding.max_drop + 0.2 * mismatch
return round(min(base, 1.0), 3)
def build_rationale(finding: ChurnFinding, mismatch: float) -> str:
if finding.pulse_then_decay:
return (
f"渠道新增用户在第 {finding.cliff_month} 个月出现断崖式集中退订"
f"(最大单月留存骤降 {finding.max_drop:.0%}),呈"
f"'脉冲式增长 + 规律性衰减'特征;佣金与业务质量不匹配度 {mismatch:.0%}"
f"高度疑似养卡骗补(骗补后弃养)。"
)
return (
f"未见明显断崖退订,但佣金与业务质量不匹配度为 {mismatch:.0%},建议关注。"
)
+78
View File
@@ -0,0 +1,78 @@
"""场景一 · 政企收入全链路穿透:拆单规避检测(R8)。
检测点:
1. 合同金额集中分布在审批阈值边缘(如阈值 80% 以上但不超阈值)。
2. 结合知识图谱穿透识别隐性实控人(多个客户经法人关联到同一实控人)。
满足上述模式则生成线索,附证据链与人话理由。
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class ContractRecord:
"""穿透分析输入:一份合同的关键信息。"""
contract_id: str
customer_key: str
amount: float
@dataclass
class SplitFinding:
"""拆单检测结果。"""
near_threshold: list[ContractRecord] = field(default_factory=list)
ratio: float = 0.0
total_amount: float = 0.0
@property
def hit(self) -> bool:
return len(self.near_threshold) >= 3
def detect_threshold_edge(
contracts: list[ContractRecord],
approval_threshold: float,
edge_ratio: float = 0.8,
) -> SplitFinding:
"""识别金额集中在审批阈值边缘 [edge_ratio*阈值, 阈值) 的合同。
这类"刚好低于阈值"的批量合同是典型的拆单规避特征。
"""
if approval_threshold <= 0:
raise ValueError("审批阈值必须为正数")
lower = edge_ratio * approval_threshold
near = [c for c in contracts if lower <= c.amount < approval_threshold]
finding = SplitFinding(
near_threshold=near,
ratio=(len(near) / len(contracts)) if contracts else 0.0,
total_amount=sum(c.amount for c in near),
)
return finding
def split_risk_score(finding: SplitFinding, shared_controller: bool) -> float:
"""综合评分:阈值边缘集中度 + 是否穿透到同一实控人。"""
if not finding.hit:
return 0.0
base = min(0.6, 0.1 * len(finding.near_threshold)) # 数量越多越可疑
base += 0.2 * finding.ratio
if shared_controller:
base += 0.3 # 同一实控人是强证据
return round(min(base, 1.0), 3)
def build_rationale(finding: SplitFinding, threshold: float, shared_controller: bool) -> str:
parts = [
f"检测到 {len(finding.near_threshold)} 份合同金额集中在审批阈值 "
f"{threshold:.0f} 的边缘区间(占比 {finding.ratio:.0%}),",
f"边缘合同金额合计约 {finding.total_amount:.0f}",
]
if shared_controller:
parts.append("且经工商关联穿透,相关客户疑似同属一个隐性实控人,高度符合拆单规避特征。")
else:
parts.append("建议进一步穿透客户关联关系以确认是否同一实控人。")
return "".join(parts)