feat: 添加线索引擎、NLQ、场景检测、前端界面等核心功能模块

This commit is contained in:
freedakgmail
2026-06-16 08:15:15 +08:00
parent 7b1e2b10a8
commit 48340f6011
62 changed files with 6772 additions and 65 deletions
+86
View File
@@ -0,0 +1,86 @@
"""线索看板与处置 APIR7/R17/R18/R20)。
注意:不提供删除线索的端点(R19 线索不可删,独立性硬约束)。
"""
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.orm import Session
from app.api.schemas import (
AdjudicateRequest,
AssignRequest,
ClueOut,
DashboardSummary,
)
from app.clues import service as clue_svc
from app.clues.models import Clue, ClueStatus, ConfidenceTier
from app.db import get_session
router = APIRouter(prefix="/clues", tags=["clues"])
@router.get("", response_model=list[ClueOut])
def list_clues(
status: ClueStatus | None = Query(default=None),
scenario_code: str | None = Query(default=None),
confidence: ConfidenceTier | None = Query(default=None),
session: Session = Depends(get_session),
) -> list[Clue]:
return clue_svc.list_clues(
session, status=status, scenario_code=scenario_code, confidence=confidence
)
@router.get("/summary", response_model=DashboardSummary)
def summary(session: Session = Depends(get_session)) -> DashboardSummary:
"""运营看板汇总(R18/R21 的基础指标)。"""
clues = session.query(Clue).all()
by_status: dict[str, int] = {}
by_conf: dict[str, int] = {}
by_scenario: dict[str, int] = {}
total_amount = 0.0
for c in clues:
by_status[c.status.value] = by_status.get(c.status.value, 0) + 1
by_conf[c.confidence.value] = by_conf.get(c.confidence.value, 0) + 1
by_scenario[c.scenario_code] = by_scenario.get(c.scenario_code, 0) + 1
total_amount += c.amount_involved or 0.0
return DashboardSummary(
total=len(clues),
by_status=by_status,
by_confidence=by_conf,
by_scenario=by_scenario,
total_amount_involved=total_amount,
)
@router.get("/{clue_id}", response_model=ClueOut)
def get_clue(clue_id: uuid.UUID, session: Session = Depends(get_session)) -> Clue:
clue = session.get(Clue, clue_id)
if clue is None:
raise HTTPException(status_code=404, detail="线索不存在")
return clue
@router.post("/{clue_id}/assign", response_model=ClueOut)
def assign_clue(
clue_id: uuid.UUID, req: AssignRequest, session: Session = Depends(get_session)
) -> Clue:
clue = session.get(Clue, clue_id)
if clue is None:
raise HTTPException(status_code=404, detail="线索不存在")
return clue_svc.assign(session, clue, assignee=req.assignee, actor=req.actor)
@router.post("/{clue_id}/adjudicate", response_model=ClueOut)
def adjudicate_clue(
clue_id: uuid.UUID, req: AdjudicateRequest, session: Session = Depends(get_session)
) -> Clue:
clue = session.get(Clue, clue_id)
if clue is None:
raise HTTPException(status_code=404, detail="线索不存在")
clue_svc.adjudicate(session, clue, confirmed=req.confirmed, actor=req.actor, note=req.note)
return clue
+24
View File
@@ -0,0 +1,24 @@
"""自然语言查询 APIR4/R20)。"""
from __future__ import annotations
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from app.api.schemas import NLQRequest, NLQResponse
from app.db import get_session
from app.nlq import service as nlq
router = APIRouter(prefix="/nlq", tags=["nlq"])
@router.post("", response_model=NLQResponse)
def ask(req: NLQRequest, session: Session = Depends(get_session)) -> NLQResponse:
ans = nlq.ask(req.question, session=session)
return NLQResponse(
question=ans.question,
answer=ans.answer,
provider=ans.provider,
model=ans.model,
egress=ans.egress,
)
+49
View File
@@ -34,3 +34,52 @@ class PenetrateResponse(BaseModel):
max_depth: int
related_count: int
related: list[RelatedEntityOut]
class ClueOut(BaseModel):
id: uuid.UUID
title: str
risk_domain: str
scenario_code: str
confidence: str
score: float
status: str
rationale: str
evidence: dict = Field(default_factory=dict)
subjects: dict = Field(default_factory=dict)
amount_involved: float | None = None
assignee: str | None = None
feedback: str | None = None
model_config = {"from_attributes": True}
class AssignRequest(BaseModel):
assignee: str = Field(min_length=1)
actor: str = Field(min_length=1)
class AdjudicateRequest(BaseModel):
confirmed: bool
actor: str = Field(min_length=1)
note: str | None = None
class NLQRequest(BaseModel):
question: str = Field(min_length=1)
class NLQResponse(BaseModel):
question: str
answer: str
provider: str
model: str
egress: bool
class DashboardSummary(BaseModel):
total: int
by_status: dict[str, int]
by_confidence: dict[str, int]
by_scenario: dict[str, int]
total_amount_involved: float
+2 -2
View File
@@ -82,8 +82,8 @@ class Clue(Base):
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
# 误报/属实反馈(R18 反馈学习)
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True)
"""误报/属实反馈(R18 反馈学习):confirmed / false_positive"""
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+4 -1
View File
@@ -132,7 +132,10 @@ def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
session.flush()
if clue.status == ClueStatus.NEW:
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
audit.record(
session, actor, "assign_clue",
target_type="clue", target_id=str(clue.id), detail={"assignee": assignee},
)
return clue
+1
View File
@@ -19,6 +19,7 @@ class AppEnv(str, Enum):
class LLMProviderName(str, Enum):
dashscope = "dashscope" # 公网千问,仅 dev
vllm = "vllm" # 本地,prod
mock = "mock" # 本地确定性 Mock,开发/测试,不出域
# 被认定为"公网/出域"的 Providerprod 下禁止使用
+14
View File
@@ -41,6 +41,10 @@ class RelationshipType(str, Enum):
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
SETTLES = "settles" # 结算单 —结算→ 合同
EMPLOYED_BY = "employed_by" # 员工 —任职于→ 客户/供应商(组织)
OPERATES = "operates" # 员工 —操作→ 号码/账户(R15 越权检测)
SUBSCRIBES = "subscribes" # 号码 —订购→ 合同(R9/R10 订购关联)
BIDS_FOR = "bids_for" # 供应商 —投标→ 工单(R12 招投标关联)
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
@@ -72,6 +76,16 @@ RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityTyp
),
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
RelationshipType.EMPLOYED_BY: (
{EntityType.EMPLOYEE},
{EntityType.CUSTOMER, EntityType.SUPPLIER},
),
RelationshipType.OPERATES: (
{EntityType.EMPLOYEE},
{EntityType.MSISDN, EntityType.ACCOUNT},
),
RelationshipType.SUBSCRIBES: ({EntityType.MSISDN}, {EntityType.CONTRACT}),
RelationshipType.BIDS_FOR: ({EntityType.SUPPLIER}, {EntityType.WORK_ORDER}),
}
+502
View File
@@ -0,0 +1,502 @@
"""源明细落地层(Staging / Raw)。
保存数据中心按 `数据要求.md` 提供的原始明细,作为"原始证据"留存;
再由接入适配器(app/ingest)映射/投影到通用本体(entity/relationship/metric_event)。
两层并存:源明细可回溯原始数据,本体支撑关联穿透与时序分析。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import Date, DateTime, Float, Index, Integer, String, Text
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.db import Base
def _uuid() -> uuid.UUID:
return uuid.uuid4()
def _now() -> dt.datetime:
return dt.datetime.now(dt.timezone.utc)
# ---------------------------------------------------------------------------
# R8 · 政企收入全链路穿透 / 拆单规避(§4.1)
# ---------------------------------------------------------------------------
class SrcContract(Base):
"""源明细:政企合同(对应数据要求 §4.1 / R8)。"""
__tablename__ = "src_contract"
__table_args__ = (Index("ix_src_contract_customer", "customer_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
customer_key: Mapped[str] = mapped_column(String(64), nullable=False)
customer_name: Mapped[str | None] = mapped_column(String(256))
amount: Mapped[float] = mapped_column(Float, nullable=False)
sign_date: Mapped[dt.date | None] = mapped_column(Date)
approval_threshold: Mapped[float | None] = mapped_column(Float)
approval_level: Mapped[str | None] = mapped_column(String(32))
legal_person: Mapped[str | None] = mapped_column(String(128))
register_address: Mapped[str | None] = mapped_column(String(256))
pay_account: Mapped[str | None] = mapped_column(String(64))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcContractApproval(Base):
"""源明细:合同审批流水(对应 R8 补充)。"""
__tablename__ = "src_contract_approval"
__table_args__ = (Index("ix_src_approval_contract", "contract_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
approval_step: Mapped[int] = mapped_column(Integer, nullable=False)
approver: Mapped[str | None] = mapped_column(String(128))
approval_result: Mapped[str | None] = mapped_column(String(32)) # approved/rejected
approval_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
remark: Mapped[str | None] = mapped_column(Text)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcPayment(Base):
"""源明细:回款流水(对应 R8 回款时序违约)。"""
__tablename__ = "src_payment"
__table_args__ = (Index("ix_src_payment_contract", "contract_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
pay_account: Mapped[str | None] = mapped_column(String(64))
pay_amount: Mapped[float] = mapped_column(Float, nullable=False)
pay_date: Mapped[dt.date | None] = mapped_column(Date)
pay_type: Mapped[str | None] = mapped_column(String(32)) # 预付/尾款/全款
overdue_flag: Mapped[str | None] = mapped_column(String(8)) # Y/N
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R9 · 市场业务真实性 / 养卡骗补(§4.2)
# ---------------------------------------------------------------------------
class SrcChannelMonthly(Base):
"""源明细:渠道用户月度留存与佣金/活跃(对应数据要求 §4.2 / R9)。"""
__tablename__ = "src_channel_monthly"
__table_args__ = (Index("ix_src_channel_key", "channel_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
channel_key: Mapped[str] = mapped_column(String(64), nullable=False)
cohort_label: Mapped[str] = mapped_column(String(32), nullable=False) # 新增批次(如 2025-01
month_index: Mapped[int] = mapped_column(Integer, nullable=False) # 第N月
cohort_size: Mapped[int] = mapped_column(Integer, default=0)
retained: Mapped[int] = mapped_column(Integer, default=0)
commission_paid: Mapped[float] = mapped_column(Float, default=0.0)
active_ratio: Mapped[float] = mapped_column(Float, default=0.0)
zero_usage_ratio: Mapped[float] = mapped_column(Float, default=0.0)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcSubscription(Base):
"""源明细:用户订购与退订流水(对应 R9 订购退订分析)。"""
__tablename__ = "src_subscription"
__table_args__ = (Index("ix_src_sub_msisdn", "msisdn"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
channel_key: Mapped[str | None] = mapped_column(String(64))
product_code: Mapped[str | None] = mapped_column(String(64))
subscribe_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
unsubscribe_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
region: Mapped[str | None] = mapped_column(String(64))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R10 · 收入与成本跨期匹配(§4.3)
# ---------------------------------------------------------------------------
class SrcRevenueRecognition(Base):
"""源明细:收入确认凭证与明细(对应 R10)。"""
__tablename__ = "src_revenue_recognition"
__table_args__ = (Index("ix_src_rev_contract", "contract_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
voucher_no: Mapped[str] = mapped_column(String(64), nullable=False)
contract_no: Mapped[str | None] = mapped_column(String(64))
recognition_date: Mapped[dt.date | None] = mapped_column(Date)
recognition_amount: Mapped[float] = mapped_column(Float, nullable=False)
billing_mode: Mapped[str | None] = mapped_column(String(32)) # 按量/包年/趸交
period_start: Mapped[dt.date | None] = mapped_column(Date)
period_end: Mapped[dt.date | None] = mapped_column(Date)
prepaid_flag: Mapped[str | None] = mapped_column(String(8)) # Y/N 预收/趸交
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcCostAmortization(Base):
"""源明细:成本摊销明细(对应 R10)。"""
__tablename__ = "src_cost_amortization"
__table_args__ = (Index("ix_src_cost_contract", "contract_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
voucher_no: Mapped[str] = mapped_column(String(64), nullable=False)
contract_no: Mapped[str | None] = mapped_column(String(64))
cost_type: Mapped[str | None] = mapped_column(String(64)) # 设备/安装/维护
amortization_date: Mapped[dt.date | None] = mapped_column(Date)
amortization_amount: Mapped[float] = mapped_column(Float, nullable=False)
total_periods: Mapped[int | None] = mapped_column(Integer)
current_period: Mapped[int | None] = mapped_column(Integer)
delivery_date: Mapped[dt.date | None] = mapped_column(Date) # 交付/上架日期
acceptance_date: Mapped[dt.date | None] = mapped_column(Date) # 验收日期
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R11 · 渠道佣金与代理商套利(§4.4)
# ---------------------------------------------------------------------------
class SrcTerminalBinding(Base):
"""源明细:终端 IMEI 与号码绑定 / 补贴发放(对应 R11)。"""
__tablename__ = "src_terminal_binding"
__table_args__ = (
Index("ix_src_terminal_imei", "imei"),
Index("ix_src_terminal_msisdn", "msisdn"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
imei: Mapped[str] = mapped_column(String(32), nullable=False)
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
brand_model: Mapped[str | None] = mapped_column(String(128))
activate_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
subsidy_amount: Mapped[float] = mapped_column(Float, default=0.0)
commission_amount: Mapped[float] = mapped_column(Float, default=0.0)
online_days: Mapped[int | None] = mapped_column(Integer) # 在网天数
post_activate_traffic_mb: Mapped[float | None] = mapped_column(Float) # 激活后流量
region: Mapped[str | None] = mapped_column(String(64)) # 归属地
cross_province_flag: Mapped[str | None] = mapped_column(String(8)) # 跨省入网 Y/N
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R12 · 网络建设与工程采购(§4.5)
# ---------------------------------------------------------------------------
class SrcBidding(Base):
"""源明细:招投标记录(对应 R12)。"""
__tablename__ = "src_bidding"
__table_args__ = (Index("ix_src_bidding_project", "project_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
project_no: Mapped[str] = mapped_column(String(64), nullable=False)
project_name: Mapped[str | None] = mapped_column(String(256))
bidder_key: Mapped[str] = mapped_column(String(64), nullable=False) # 投标人/供应商编号
bidder_name: Mapped[str | None] = mapped_column(String(256))
bid_amount: Mapped[float | None] = mapped_column(Float)
bid_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
win_flag: Mapped[str | None] = mapped_column(String(8)) # 中标 Y/N
technical_score: Mapped[float | None] = mapped_column(Float)
legal_person: Mapped[str | None] = mapped_column(String(128))
shareholder_info: Mapped[str | None] = mapped_column(Text) # JSON or 描述
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcProjectSignoff(Base):
"""源明细:工程量签证与施工(对应 R12)。"""
__tablename__ = "src_project_signoff"
__table_args__ = (Index("ix_src_signoff_project", "project_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
project_no: Mapped[str] = mapped_column(String(64), nullable=False)
work_order_no: Mapped[str | None] = mapped_column(String(64))
signoff_quantity: Mapped[float | None] = mapped_column(Float) # 签证工程量
unit: Mapped[str | None] = mapped_column(String(32))
resource_consumed: Mapped[float | None] = mapped_column(Float) # 实际资源消耗
contractor_key: Mapped[str | None] = mapped_column(String(64)) # 施工队
signoff_date: Mapped[dt.date | None] = mapped_column(Date)
inspection_lat: Mapped[float | None] = mapped_column(Float) # 巡检 GPS 纬度
inspection_lng: Mapped[float | None] = mapped_column(Float) # 巡检 GPS 经度
inspection_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R13 · 互联互通与网间结算(§4.6)
# ---------------------------------------------------------------------------
class SrcCdr(Base):
"""源明细:话单 CDR(对应 R13,大数据量增量接入)。"""
__tablename__ = "src_cdr"
__table_args__ = (
Index("ix_src_cdr_caller", "caller"),
Index("ix_src_cdr_time", "start_time"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
caller: Mapped[str] = mapped_column(String(32), nullable=False)
callee: Mapped[str] = mapped_column(String(32), nullable=False)
start_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
duration_sec: Mapped[int] = mapped_column(Integer, nullable=False)
call_type: Mapped[str | None] = mapped_column(String(16)) # voice/sms/data
peer_operator: Mapped[str | None] = mapped_column(String(32)) # 对端运营商
route_info: Mapped[str | None] = mapped_column(String(128)) # 路由信息
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcInterconnectSettlement(Base):
"""源明细:网间结算单(对应 R13)。"""
__tablename__ = "src_interconnect_settlement"
__table_args__ = (Index("ix_src_ics_period", "settle_period"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
settlement_no: Mapped[str] = mapped_column(String(64), nullable=False)
peer_operator: Mapped[str] = mapped_column(String(32), nullable=False)
settle_period: Mapped[str] = mapped_column(String(16), nullable=False) # 如 2025-06
settle_type: Mapped[str | None] = mapped_column(String(32)) # 语音/短信/SP/CP
volume: Mapped[float] = mapped_column(Float, default=0.0) # 结算量(分钟/条)
unit_price: Mapped[float | None] = mapped_column(Float)
settle_amount: Mapped[float] = mapped_column(Float, default=0.0)
sms_delivery_rate: Mapped[float | None] = mapped_column(Float) # 短信到达率
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R14 · 云业务 / IDC 与新兴业务(§4.7)
# ---------------------------------------------------------------------------
class SrcCloudUsage(Base):
"""源明细:云资源用量(对应 R14)。"""
__tablename__ = "src_cloud_usage"
__table_args__ = (Index("ix_src_cloud_contract", "contract_no"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
customer_key: Mapped[str | None] = mapped_column(String(64))
resource_type: Mapped[str | None] = mapped_column(String(32)) # CPU/存储/带宽
usage_date: Mapped[dt.date | None] = mapped_column(Date)
actual_usage: Mapped[float] = mapped_column(Float, default=0.0) # 实际用量
contracted_quota: Mapped[float | None] = mapped_column(Float) # 合同约定量
billed_usage: Mapped[float | None] = mapped_column(Float) # 计费量
unit: Mapped[str | None] = mapped_column(String(16)) # vCPU/GB/Mbps
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcIdcCabinet(Base):
"""源明细:IDC 机柜出租与电力消耗(对应 R14)。"""
__tablename__ = "src_idc_cabinet"
__table_args__ = (Index("ix_src_idc_cabinet_id", "cabinet_id"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
cabinet_id: Mapped[str] = mapped_column(String(64), nullable=False)
customer_key: Mapped[str | None] = mapped_column(String(64))
contract_no: Mapped[str | None] = mapped_column(String(64))
report_month: Mapped[str | None] = mapped_column(String(16)) # 如 2025-06
occupancy_rate: Mapped[float | None] = mapped_column(Float) # 出租率
power_kwh: Mapped[float | None] = mapped_column(Float) # 电力消耗 kWh
revenue_amount: Mapped[float | None] = mapped_column(Float) # 收入金额
acceptance_date: Mapped[dt.date | None] = mapped_column(Date) # 验收日期
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# R15 · 员工内部舞弊与资源滥用(§4.8)
# ---------------------------------------------------------------------------
class SrcEmployeeOperation(Base):
"""源明细:员工权限与操作日志(对应 R15)。"""
__tablename__ = "src_employee_operation"
__table_args__ = (
Index("ix_src_emp_op_employee", "employee_key"),
Index("ix_src_emp_op_time", "operation_time"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
employee_key: Mapped[str] = mapped_column(String(64), nullable=False)
employee_name: Mapped[str | None] = mapped_column(String(128))
position: Mapped[str | None] = mapped_column(String(64))
role_permissions: Mapped[str | None] = mapped_column(Text) # 岗位-权限
operation_type: Mapped[str | None] = mapped_column(String(64))
operation_target: Mapped[str | None] = mapped_column(String(256)) # 操作对象
operation_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
department: Mapped[str | None] = mapped_column(String(128))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcInternalMsisdn(Base):
"""源明细:内部测试号及用量(对应 R15)。"""
__tablename__ = "src_internal_msisdn"
__table_args__ = (Index("ix_src_int_msisdn", "msisdn"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
assigned_employee: Mapped[str | None] = mapped_column(String(64))
purpose: Mapped[str | None] = mapped_column(String(128)) # 测试/演示/其他
traffic_mb: Mapped[float] = mapped_column(Float, default=0.0)
voice_min: Mapped[float] = mapped_column(Float, default=0.0)
revenue_attributed: Mapped[float] = mapped_column(Float, default=0.0) # 收入归属
report_month: Mapped[str | None] = mapped_column(String(16))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcPointsTransaction(Base):
"""源明细:积分/电子券发放与兑换流水(对应 R15)。"""
__tablename__ = "src_points_transaction"
__table_args__ = (Index("ix_src_points_employee", "operator_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
transaction_no: Mapped[str] = mapped_column(String(64), nullable=False)
operator_key: Mapped[str] = mapped_column(String(64), nullable=False) # 操作人工号
target_account: Mapped[str | None] = mapped_column(String(64)) # 受益账户
transaction_type: Mapped[str | None] = mapped_column(String(32)) # 发放/兑换/变现
points_amount: Mapped[float] = mapped_column(Float, default=0.0)
cash_value: Mapped[float | None] = mapped_column(Float) # 变现金额
transaction_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
# ---------------------------------------------------------------------------
# 主数据源明细(§3 实体级原始数据)
# ---------------------------------------------------------------------------
class SrcCustomer(Base):
"""源明细:客户主数据(§3 Customer)。"""
__tablename__ = "src_customer"
__table_args__ = (Index("ix_src_cust_key", "customer_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
customer_key: Mapped[str] = mapped_column(String(64), nullable=False)
customer_name: Mapped[str] = mapped_column(String(256), nullable=False)
customer_type: Mapped[str | None] = mapped_column(String(32)) # 政企/公众
register_address: Mapped[str | None] = mapped_column(String(256))
legal_person: Mapped[str | None] = mapped_column(String(128))
uscc: Mapped[str | None] = mapped_column(String(32)) # 统一社会信用代码
open_date: Mapped[dt.date | None] = mapped_column(Date)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcSupplier(Base):
"""源明细:供应商主数据(§3 Supplier)。"""
__tablename__ = "src_supplier"
__table_args__ = (Index("ix_src_supplier_key", "supplier_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
supplier_key: Mapped[str] = mapped_column(String(64), nullable=False)
supplier_name: Mapped[str] = mapped_column(String(256), nullable=False)
legal_person: Mapped[str | None] = mapped_column(String(128))
shareholder_info: Mapped[str | None] = mapped_column(Text)
register_address: Mapped[str | None] = mapped_column(String(256))
uscc: Mapped[str | None] = mapped_column(String(32))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcEmployee(Base):
"""源明细:员工主数据(§3 Employee)。"""
__tablename__ = "src_employee"
__table_args__ = (Index("ix_src_emp_key", "employee_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
employee_key: Mapped[str] = mapped_column(String(64), nullable=False)
employee_name: Mapped[str | None] = mapped_column(String(128))
position: Mapped[str | None] = mapped_column(String(64))
department: Mapped[str | None] = mapped_column(String(128))
role_permissions: Mapped[str | None] = mapped_column(Text)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcChannel(Base):
"""源明细:渠道/代理商主数据(§3 Channel)。"""
__tablename__ = "src_channel"
__table_args__ = (Index("ix_src_chan_key", "channel_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
channel_key: Mapped[str] = mapped_column(String(64), nullable=False)
channel_name: Mapped[str | None] = mapped_column(String(256))
commission_policy: Mapped[str | None] = mapped_column(Text) # 佣金政策描述
region: Mapped[str | None] = mapped_column(String(64))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcMsisdn(Base):
"""源明细:号码主数据(§3 MSISDN)。"""
__tablename__ = "src_msisdn"
__table_args__ = (Index("ix_src_msisdn_no", "msisdn"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
customer_key: Mapped[str | None] = mapped_column(String(64))
region: Mapped[str | None] = mapped_column(String(64))
activate_date: Mapped[dt.date | None] = mapped_column(Date)
deactivate_date: Mapped[dt.date | None] = mapped_column(Date)
status: Mapped[str | None] = mapped_column(String(16)) # active/suspended/cancelled
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
class SrcAccount(Base):
"""源明细:账户主数据(§3 Account)。"""
__tablename__ = "src_account"
__table_args__ = (Index("ix_src_acct_key", "account_key"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
account_key: Mapped[str] = mapped_column(String(64), nullable=False)
account_name: Mapped[str | None] = mapped_column(String(256))
owner_key: Mapped[str | None] = mapped_column(String(64)) # 所属主体编号
owner_type: Mapped[str | None] = mapped_column(String(32)) # customer/supplier/legal_person
bank_name: Mapped[str | None] = mapped_column(String(128))
branch_name: Mapped[str | None] = mapped_column(String(128))
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+1
View File
@@ -0,0 +1 @@
"""引擎层:全量穿透扫描编排,将场景检测结果落为线索。"""
+100
View File
@@ -0,0 +1,100 @@
"""全量穿透扫描编排(P1.5)。
把场景检测器的结果转化为线索,记录扫描覆盖范围(证明全量性)与数据版本(可追溯)。
当前为同步执行;后续可包装为 Celery 异步任务(接口保持不变)。
"""
from __future__ import annotations
import uuid
from dataclasses import dataclass
from sqlalchemy.orm import Session
from app.clues import service as clue_svc
from app.clues.models import Clue
from app.scenarios import churn_fraud as cf
from app.scenarios import split_contract as sc
MODEL_VERSION = "mock-llm@0.1"
@dataclass
class ScanResult:
scenario_code: str
scanned_count: int
clue: Clue | None
def run_split_contract_scan(
session: Session,
contracts: list[sc.ContractRecord],
approval_threshold: float,
shared_controller: bool = False,
data_version_id: uuid.UUID | None = None,
) -> ScanResult:
"""场景一拆单扫描:检测→评分→(命中则)生成线索。"""
finding = sc.detect_threshold_edge(contracts, approval_threshold)
score = sc.split_risk_score(finding, shared_controller)
clue = None
if score > 0:
rationale = sc.build_rationale(finding, approval_threshold, shared_controller)
clue = clue_svc.create_clue(
session,
title="疑似政企拆单规避审批",
risk_domain="收入",
scenario_code="R8",
score=score,
rationale=rationale,
evidence={
"near_threshold_contracts": [c.contract_id for c in finding.near_threshold],
"edge_ratio": finding.ratio,
"near_threshold_amount": finding.total_amount,
"approval_threshold": approval_threshold,
"shared_controller": shared_controller,
},
subjects={"customers": sorted({c.customer_key for c in finding.near_threshold})},
amount_involved=finding.total_amount,
model_version=MODEL_VERSION,
data_version_id=data_version_id,
)
return ScanResult("R8", len(contracts), clue)
def run_churn_scan(
session: Session,
retention_curve: list[cf.CohortPoint],
commission_paid: float,
active_ratio: float,
zero_usage_ratio: float,
channel_key: str,
data_version_id: uuid.UUID | None = None,
) -> ScanResult:
"""场景二养卡骗补扫描:时序断崖 + 佣金质量不匹配→线索。"""
finding = cf.detect_pulse_decay(retention_curve)
mismatch = cf.commission_quality_mismatch(commission_paid, active_ratio, zero_usage_ratio)
score = cf.churn_risk_score(finding, mismatch)
clue = None
if score >= 0.5:
rationale = cf.build_rationale(finding, mismatch)
clue = clue_svc.create_clue(
session,
title="疑似养卡骗补(脉冲增长+规律退订)",
risk_domain="成本",
scenario_code="R9",
score=score,
rationale=rationale,
evidence={
"cliff_month": finding.cliff_month,
"max_drop": finding.max_drop,
"commission_paid": commission_paid,
"active_ratio": active_ratio,
"zero_usage_ratio": zero_usage_ratio,
"mismatch": mismatch,
},
subjects={"channel": channel_key},
amount_involved=commission_paid,
model_version=MODEL_VERSION,
data_version_id=data_version_id,
)
return ScanResult("R9", len(retention_curve), clue)
+23
View File
@@ -0,0 +1,23 @@
"""接入适配器(P1.1):源明细 → 通用本体映射。
职责:
1. 从 staging(源明细)读取原始数据行;
2. 按映射规则投影为 Entity / EntityRelationship / MetricEvent
3. 保留源明细不可变(原始证据),本体层为分析基础。
设计原则:
- 每个源明细表对应一个 Adapter 类;
- Adapter 实现统一接口 `ingest(session, data_version_id)` → (entities, relationships, events)
- 映射逻辑集中于此模块,上层引擎/场景模块只依赖本体。
"""
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import ADAPTER_REGISTRY, get_adapter, register_adapter
__all__ = [
"BaseAdapter",
"IngestResult",
"ADAPTER_REGISTRY",
"get_adapter",
"register_adapter",
]
+360
View File
@@ -0,0 +1,360 @@
"""主数据适配器:将源明细中的主数据表映射到本体 Entity 层。
覆盖:SrcCustomer / SrcSupplier / SrcEmployee / SrcChannel / SrcMsisdn / SrcAccount
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import (
SrcAccount,
SrcChannel,
SrcCustomer,
SrcEmployee,
SrcMsisdn,
SrcSupplier,
)
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CustomerAdapter(BaseAdapter):
"""SrcCustomer → Entity(CUSTOMER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "BSS"
staging_table = "src_customer"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcCustomer).filter(
SrcCustomer.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcCustomer).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
display_name=row.customer_name,
attributes={
"customer_type": row.customer_type,
"uscc": row.uscc,
"open_date": str(row.open_date) if row.open_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
# 注册地址 → Entity(ADDRESS) + 关系 REGISTERED_AT
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 法人 → Entity(LEGAL_PERSON) + 关系 LEGAL_REP_OF
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class SupplierAdapter(BaseAdapter):
"""SrcSupplier → Entity(SUPPLIER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "ERP"
staging_table = "src_supplier"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcSupplier).filter(
SrcSupplier.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcSupplier).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.SUPPLIER,
business_key=row.supplier_key,
display_name=row.supplier_name,
attributes={
"uscc": row.uscc,
"shareholder_info": row.shareholder_info,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class EmployeeAdapter(BaseAdapter):
"""SrcEmployee → Entity(EMPLOYEE)。"""
source_system = "ERP"
staging_table = "src_employee"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcEmployee).filter(
SrcEmployee.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcEmployee).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.employee_key,
display_name=row.employee_name,
attributes={
"position": row.position,
"department": row.department,
"role_permissions": row.role_permissions,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ChannelAdapter(BaseAdapter):
"""SrcChannel → Entity(CHANNEL)。"""
source_system = "BSS"
staging_table = "src_channel"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcChannel).filter(
SrcChannel.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcChannel).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
display_name=row.channel_name,
attributes={
"commission_policy": row.commission_policy,
"region": row.region,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class MsisdnAdapter(BaseAdapter):
"""SrcMsisdn → Entity(MSISDN) + 关系(HOLDS_MSISDN)。"""
source_system = "BSS"
staging_table = "src_msisdn"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcMsisdn).filter(
SrcMsisdn.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcMsisdn).limit(batch_size).all()
for row in rows:
try:
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={
"region": row.region,
"status": row.status,
"activate_date": str(row.activate_date) if row.activate_date else None,
"deactivate_date": str(row.deactivate_date) if row.deactivate_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 号码 → 客户持有关系
if row.customer_key:
cust_entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.HOLDS_MSISDN, cust_entity, msisdn_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class AccountAdapter(BaseAdapter):
"""SrcAccount → Entity(ACCOUNT) + 关系(OWNS_ACCOUNT)。"""
source_system = "FIN"
staging_table = "src_account"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcAccount).filter(
SrcAccount.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcAccount).limit(batch_size).all()
for row in rows:
try:
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.account_key,
display_name=row.account_name,
attributes={
"bank_name": row.bank_name,
"branch_name": row.branch_name,
},
data_version_id=data_version_id,
)
result.entities.append(acct_entity)
# 账户所属主体关系
if row.owner_key and row.owner_type:
owner_type_map = {
"customer": EntityType.CUSTOMER,
"supplier": EntityType.SUPPLIER,
"legal_person": EntityType.LEGAL_PERSON,
}
etype = owner_type_map.get(row.owner_type)
if etype:
owner_entity = upsert_entity(
session,
entity_type=etype,
business_key=row.owner_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OWNS_ACCOUNT, owner_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+137
View File
@@ -0,0 +1,137 @@
"""R10 适配器:收入与成本跨期匹配。
源明细:SrcRevenueRecognition / SrcCostAmortization
映射到:MetricEvent(收入确认/成本摊销时序) + Entity(CONTRACT) 关联补强
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCostAmortization, SrcRevenueRecognition
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class RevenueRecognitionAdapter(BaseAdapter):
"""SrcRevenueRecognition → MetricEvent(收入确认时序)。"""
source_system = "FIN"
staging_table = "src_revenue_recognition"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcRevenueRecognition)
if data_version_id:
query = query.filter(SrcRevenueRecognition.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保合同实体存在
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
if row.recognition_date:
event_time = dt.datetime.combine(
row.recognition_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.voucher_no,
metric_name="revenue_recognition",
metric_value=row.recognition_amount,
attributes={
"voucher_no": row.voucher_no,
"billing_mode": row.billing_mode,
"period_start": str(row.period_start) if row.period_start else None,
"period_end": str(row.period_end) if row.period_end else None,
"prepaid_flag": row.prepaid_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class CostAmortizationAdapter(BaseAdapter):
"""SrcCostAmortization → MetricEvent(成本摊销时序)。"""
source_system = "FIN"
staging_table = "src_cost_amortization"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCostAmortization)
if data_version_id:
query = query.filter(SrcCostAmortization.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
if row.amortization_date:
event_time = dt.datetime.combine(
row.amortization_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.voucher_no,
metric_name="cost_amortization",
metric_value=row.amortization_amount,
attributes={
"voucher_no": row.voucher_no,
"cost_type": row.cost_type,
"total_periods": row.total_periods,
"current_period": row.current_period,
"delivery_date": str(row.delivery_date) if row.delivery_date else None,
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+103
View File
@@ -0,0 +1,103 @@
"""R11 适配器:渠道佣金与代理商套利。
源明细:SrcTerminalBinding
映射到:Entity(IMEI, MSISDN) + 关系(BOUND_DEVICE) + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcTerminalBinding
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class TerminalBindingAdapter(BaseAdapter):
"""SrcTerminalBinding → Entity(IMEI, MSISDN) + BOUND_DEVICE + MetricEvent。"""
source_system = "BSS"
staging_table = "src_terminal_binding"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcTerminalBinding)
if data_version_id:
query = query.filter(SrcTerminalBinding.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# IMEI 实体
imei_entity = upsert_entity(
session,
entity_type=EntityType.IMEI,
business_key=row.imei,
display_name=row.brand_model or row.imei,
attributes={
"brand_model": row.brand_model,
"region": row.region,
},
data_version_id=data_version_id,
)
result.entities.append(imei_entity)
# MSISDN 实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"region": row.region},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 绑定关系
rel = add_relationship(
session, RelationshipType.BOUND_DEVICE, msisdn_entity, imei_entity,
attributes={
"activate_time": str(row.activate_time) if row.activate_time else None,
"subsidy_amount": row.subsidy_amount,
},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 终端激活/补贴事件
if row.activate_time:
event = MetricEvent(
event_time=row.activate_time,
subject_type="imei",
subject_key=row.imei,
metric_name="terminal_activate",
metric_value=row.subsidy_amount + row.commission_amount,
attributes={
"msisdn": row.msisdn,
"subsidy_amount": row.subsidy_amount,
"commission_amount": row.commission_amount,
"online_days": row.online_days,
"post_activate_traffic_mb": row.post_activate_traffic_mb,
"cross_province_flag": row.cross_province_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+200
View File
@@ -0,0 +1,200 @@
"""R12 适配器:网络建设与工程采购。
源明细:SrcBidding / SrcProjectSignoff
映射到:Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR, SUPPLIES) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcBidding, SrcProjectSignoff
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class BiddingAdapter(BaseAdapter):
"""SrcBidding → Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR) + MetricEvent。"""
source_system = "ERP"
staging_table = "src_bidding"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcBidding)
if data_version_id:
query = query.filter(SrcBidding.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 供应商(投标人)实体
supplier_entity = upsert_entity(
session,
entity_type=EntityType.SUPPLIER,
business_key=row.bidder_key,
display_name=row.bidder_name,
attributes={
"legal_person": row.legal_person,
"shareholder_info": row.shareholder_info,
},
data_version_id=data_version_id,
)
result.entities.append(supplier_entity)
# 工单/项目实体
wo_entity = upsert_entity(
session,
entity_type=EntityType.WORK_ORDER,
business_key=row.project_no,
display_name=row.project_name,
data_version_id=data_version_id,
)
result.entities.append(wo_entity)
# 投标关系
rel = add_relationship(
session, RelationshipType.BIDS_FOR, supplier_entity, wo_entity,
attributes={
"bid_amount": row.bid_amount,
"win_flag": row.win_flag,
"technical_score": row.technical_score,
},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 中标 → 补充 SUPPLIES 关系
if row.win_flag and row.win_flag.upper() == "Y":
rel2 = add_relationship(
session, RelationshipType.SUPPLIES, supplier_entity, wo_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel2)
# 法人实体
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, supplier_entity,
data_version_id=data_version_id,
)
# 投标事件
if row.bid_time:
event = MetricEvent(
event_time=row.bid_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="bid_submitted",
metric_value=row.bid_amount or 0.0,
attributes={
"bidder_key": row.bidder_key,
"bidder_name": row.bidder_name,
"win_flag": row.win_flag,
"technical_score": row.technical_score,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ProjectSignoffAdapter(BaseAdapter):
"""SrcProjectSignoff → MetricEvent(工程签证/巡检时序)。"""
source_system = "WO"
staging_table = "src_project_signoff"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcProjectSignoff)
if data_version_id:
query = query.filter(SrcProjectSignoff.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保工单实体存在
upsert_entity(
session,
entity_type=EntityType.WORK_ORDER,
business_key=row.project_no,
data_version_id=data_version_id,
)
# 签证事件
if row.signoff_date:
event_time = dt.datetime.combine(
row.signoff_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="signoff_quantity",
metric_value=row.signoff_quantity or 0.0,
attributes={
"work_order_no": row.work_order_no,
"unit": row.unit,
"resource_consumed": row.resource_consumed,
"contractor_key": row.contractor_key,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
# 巡检 GPS 事件
if row.inspection_time and row.inspection_lat:
event2 = MetricEvent(
event_time=row.inspection_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="inspection",
metric_value=1.0,
attributes={
"lat": row.inspection_lat,
"lng": row.inspection_lng,
"work_order_no": row.work_order_no,
},
data_version_id=data_version_id,
)
session.add(event2)
result.metric_events.append(event2)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+147
View File
@@ -0,0 +1,147 @@
"""R13 适配器:互联互通与网间结算。
源明细:SrcCdr / SrcInterconnectSettlement
映射到:Entity(MSISDN, SETTLEMENT) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCdr, SrcInterconnectSettlement
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CdrAdapter(BaseAdapter):
"""SrcCdr → Entity(MSISDN) + MetricEvent(话务时序)。"""
source_system = "SIGNAL"
staging_table = "src_cdr"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCdr)
if data_version_id:
query = query.filter(SrcCdr.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保主被叫号码实体存在
upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.caller,
display_name=row.caller,
data_version_id=data_version_id,
)
upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.callee,
display_name=row.callee,
data_version_id=data_version_id,
)
# 话务事件
event = MetricEvent(
event_time=row.start_time,
subject_type="msisdn",
subject_key=row.caller,
metric_name="cdr_duration",
metric_value=float(row.duration_sec),
attributes={
"callee": row.callee,
"call_type": row.call_type,
"peer_operator": row.peer_operator,
"route_info": row.route_info,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class InterconnectSettlementAdapter(BaseAdapter):
"""SrcInterconnectSettlement → Entity(SETTLEMENT) + MetricEvent。"""
source_system = "FIN"
staging_table = "src_interconnect_settlement"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcInterconnectSettlement)
if data_version_id:
query = query.filter(SrcInterconnectSettlement.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 结算单实体
settle_entity = upsert_entity(
session,
entity_type=EntityType.SETTLEMENT,
business_key=row.settlement_no,
display_name=f"网间结算-{row.settlement_no}",
attributes={
"peer_operator": row.peer_operator,
"settle_type": row.settle_type,
},
data_version_id=data_version_id,
)
result.entities.append(settle_entity)
# 结算时序事件
try:
event_time = dt.datetime.strptime(
row.settle_period, "%Y-%m"
).replace(tzinfo=dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="settlement",
subject_key=row.settlement_no,
metric_name="interconnect_settle",
metric_value=row.settle_amount,
attributes={
"peer_operator": row.peer_operator,
"settle_type": row.settle_type,
"volume": row.volume,
"unit_price": row.unit_price,
"sms_delivery_rate": row.sms_delivery_rate,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+149
View File
@@ -0,0 +1,149 @@
"""R14 适配器:云业务 / IDC 与新兴业务。
源明细:SrcCloudUsage / SrcIdcCabinet
映射到:Entity(CONTRACT, CUSTOMER) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCloudUsage, SrcIdcCabinet
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CloudUsageAdapter(BaseAdapter):
"""SrcCloudUsage → Entity(CONTRACT) + MetricEvent(云资源用量时序)。"""
source_system = "BSS"
staging_table = "src_cloud_usage"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCloudUsage)
if data_version_id:
query = query.filter(SrcCloudUsage.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
# 客户实体(如有)
if row.customer_key:
upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
data_version_id=data_version_id,
)
# 云资源用量事件
if row.usage_date:
event_time = dt.datetime.combine(
row.usage_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="cloud_usage",
metric_value=row.actual_usage,
attributes={
"resource_type": row.resource_type,
"contracted_quota": row.contracted_quota,
"billed_usage": row.billed_usage,
"unit": row.unit,
"customer_key": row.customer_key,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class IdcCabinetAdapter(BaseAdapter):
"""SrcIdcCabinet → MetricEventIDC 机柜出租率/电力时序)。"""
source_system = "OSS"
staging_table = "src_idc_cabinet"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcIdcCabinet)
if data_version_id:
query = query.filter(SrcIdcCabinet.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体(如有)
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
# IDC 出租/电力事件
try:
event_time = dt.datetime.strptime(
row.report_month, "%Y-%m"
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.cabinet_id,
metric_name="idc_cabinet",
metric_value=row.occupancy_rate or 0.0,
attributes={
"cabinet_id": row.cabinet_id,
"customer_key": row.customer_key,
"power_kwh": row.power_kwh,
"revenue_amount": row.revenue_amount,
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+237
View File
@@ -0,0 +1,237 @@
"""R15 适配器:员工内部舞弊与资源滥用。
源明细:SrcEmployeeOperation / SrcInternalMsisdn / SrcPointsTransaction
映射到:Entity(EMPLOYEE, MSISDN) + 关系(OPERATES) + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import (
SrcEmployeeOperation,
SrcInternalMsisdn,
SrcPointsTransaction,
)
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class EmployeeOperationAdapter(BaseAdapter):
"""SrcEmployeeOperation → Entity(EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_employee_operation"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcEmployeeOperation)
if data_version_id:
query = query.filter(SrcEmployeeOperation.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 员工实体
emp_entity = upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.employee_key,
display_name=row.employee_name,
attributes={
"position": row.position,
"department": row.department,
"role_permissions": row.role_permissions,
},
data_version_id=data_version_id,
)
result.entities.append(emp_entity)
# 操作目标 → OPERATES 关系(如操作对象是号码或账户)
if row.operation_target:
# 尝试识别操作目标类型(简单启发式:以1开头长度11为号码,否则为账户)
target_key = row.operation_target.strip()
if target_key.isdigit() and len(target_key) == 11:
target_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=target_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OPERATES, emp_entity, target_entity,
attributes={"operation_type": row.operation_type},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 操作日志事件
if row.operation_time:
event = MetricEvent(
event_time=row.operation_time,
subject_type="employee",
subject_key=row.employee_key,
metric_name="operation_log",
metric_value=1.0,
attributes={
"operation_type": row.operation_type,
"operation_target": row.operation_target,
"position": row.position,
"department": row.department,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class InternalMsisdnAdapter(BaseAdapter):
"""SrcInternalMsisdn → Entity(MSISDN, EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_internal_msisdn"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcInternalMsisdn)
if data_version_id:
query = query.filter(SrcInternalMsisdn.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 内部号码实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"purpose": row.purpose, "internal": True},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 分配员工 → OPERATES 关系
if row.assigned_employee:
emp_entity = upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.assigned_employee,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OPERATES, emp_entity, msisdn_entity,
attributes={"purpose": row.purpose},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 内部号用量事件
import datetime as dt
try:
event_time = dt.datetime.strptime(
row.report_month, "%Y-%m"
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="internal_usage",
metric_value=row.traffic_mb,
attributes={
"voice_min": row.voice_min,
"revenue_attributed": row.revenue_attributed,
"assigned_employee": row.assigned_employee,
"purpose": row.purpose,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class PointsTransactionAdapter(BaseAdapter):
"""SrcPointsTransaction → MetricEvent(积分发放/兑换时序)。"""
source_system = "BSS"
staging_table = "src_points_transaction"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcPointsTransaction)
if data_version_id:
query = query.filter(SrcPointsTransaction.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保操作人实体存在
upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.operator_key,
data_version_id=data_version_id,
)
# 积分事件
if row.transaction_time:
event = MetricEvent(
event_time=row.transaction_time,
subject_type="employee",
subject_key=row.operator_key,
metric_name="points_transaction",
metric_value=row.points_amount,
attributes={
"transaction_no": row.transaction_no,
"target_account": row.target_account,
"transaction_type": row.transaction_type,
"cash_value": row.cash_value,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+236
View File
@@ -0,0 +1,236 @@
"""R8 适配器:政企收入全链路穿透 / 拆单规避。
源明细:SrcContract / SrcContractApproval / SrcPayment
映射到:Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系 + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcContract, SrcContractApproval, SrcPayment
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class ContractAdapter(BaseAdapter):
"""SrcContract → Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系。"""
source_system = "CONTRACT"
staging_table = "src_contract"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcContract)
if data_version_id:
query = query.filter(SrcContract.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
display_name=f"合同-{row.contract_no}",
attributes={
"amount": row.amount,
"sign_date": str(row.sign_date) if row.sign_date else None,
"approval_threshold": row.approval_threshold,
"approval_level": row.approval_level,
},
data_version_id=data_version_id,
)
result.entities.append(contract_entity)
# 客户实体 + 签约关系
cust_entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
display_name=row.customer_name,
data_version_id=data_version_id,
)
result.entities.append(cust_entity)
rel = add_relationship(
session, RelationshipType.SIGNED, cust_entity, contract_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 回款账户 → Entity(ACCOUNT) + 关系 PAID_BY
if row.pay_account:
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.pay_account,
data_version_id=data_version_id,
)
result.entities.append(acct_entity)
rel = add_relationship(
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 注册地址
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, cust_entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 法人
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, cust_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ContractApprovalAdapter(BaseAdapter):
"""SrcContractApproval → MetricEvent(审批时序事件)。"""
source_system = "CONTRACT"
staging_table = "src_contract_approval"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcContractApproval)
if data_version_id:
query = query.filter(SrcContractApproval.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.approval_time:
event = MetricEvent(
event_time=row.approval_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="approval_step",
metric_value=float(row.approval_step),
attributes={
"approver": row.approver,
"result": row.approval_result,
"remark": row.remark,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class PaymentAdapter(BaseAdapter):
"""SrcPayment → MetricEvent(回款时序事件) + 关系补强。"""
source_system = "FIN"
staging_table = "src_payment"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcPayment)
if data_version_id:
query = query.filter(SrcPayment.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.pay_date:
import datetime as dt
event_time = dt.datetime.combine(
row.pay_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="payment",
metric_value=row.pay_amount,
attributes={
"pay_account": row.pay_account,
"pay_type": row.pay_type,
"overdue_flag": row.overdue_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
# 强化合同→账户关系
if row.pay_account:
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.pay_account,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+184
View File
@@ -0,0 +1,184 @@
"""R9 适配器:市场业务真实性 / 养卡骗补。
源明细:SrcChannelMonthly / SrcSubscription
映射到:Entity(CHANNEL, MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcChannelMonthly, SrcSubscription
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class ChannelMonthlyAdapter(BaseAdapter):
"""SrcChannelMonthly → MetricEvent(渠道月度留存/佣金时序)。"""
source_system = "BSS"
staging_table = "src_channel_monthly"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcChannelMonthly)
if data_version_id:
query = query.filter(SrcChannelMonthly.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保渠道实体存在
upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
data_version_id=data_version_id,
)
# cohort_label 如 "2025-01" → 转为时间
try:
event_time = dt.datetime.strptime(
row.cohort_label, "%Y-%m"
).replace(tzinfo=dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
# 留存率事件
event = MetricEvent(
event_time=event_time,
subject_type="channel",
subject_key=row.channel_key,
metric_name="retention",
metric_value=row.retained / row.cohort_size if row.cohort_size > 0 else 0.0,
attributes={
"cohort_label": row.cohort_label,
"month_index": row.month_index,
"cohort_size": row.cohort_size,
"retained": row.retained,
"commission_paid": row.commission_paid,
"active_ratio": row.active_ratio,
"zero_usage_ratio": row.zero_usage_ratio,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class SubscriptionAdapter(BaseAdapter):
"""SrcSubscription → Entity(MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_subscription"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcSubscription)
if data_version_id:
query = query.filter(SrcSubscription.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# MSISDN 实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"region": row.region},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 渠道归属关系
if row.channel_key:
chan_entity = upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.BELONGS_TO_CHANNEL, msisdn_entity, chan_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 订购关系(号码→合同/产品)
if row.product_code:
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.product_code,
display_name=f"产品-{row.product_code}",
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.SUBSCRIBES, msisdn_entity, contract_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 订购/退订时序事件
if row.subscribe_time:
event = MetricEvent(
event_time=row.subscribe_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="subscribe",
metric_value=1.0,
attributes={
"channel_key": row.channel_key,
"product_code": row.product_code,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
if row.unsubscribe_time:
event = MetricEvent(
event_time=row.unsubscribe_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="unsubscribe",
metric_value=-1.0,
attributes={
"channel_key": row.channel_key,
"product_code": row.product_code,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+53
View File
@@ -0,0 +1,53 @@
"""接入适配器基类与通用数据结构。"""
from __future__ import annotations
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from sqlalchemy.orm import Session
from app.datahub.models import Entity, EntityRelationship, MetricEvent
@dataclass
class IngestResult:
"""单次适配器执行的输出汇总。"""
entities: list[Entity] = field(default_factory=list)
relationships: list[EntityRelationship] = field(default_factory=list)
metric_events: list[MetricEvent] = field(default_factory=list)
row_count: int = 0
error_count: int = 0
class BaseAdapter(ABC):
"""接入适配器抽象基类。
每个源明细表实现一个子类,负责将 staging 行映射到本体层。
"""
# 子类须指定所适配的源系统标识(如 "BSS", "ERP"
source_system: str = ""
# 子类须指定所适配的 staging 表名
staging_table: str = ""
@abstractmethod
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
"""从 staging 表读取未处理行,映射写入本体层。
Args:
session: 数据库会话
data_version_id: 当前批次的数据版本 ID
batch_size: 每批处理行数
Returns:
IngestResult 汇总
"""
...
+22
View File
@@ -0,0 +1,22 @@
"""适配器注册表:按 staging 表名索引,便于调度器统一调用。"""
from __future__ import annotations
from typing import Type
from app.ingest.base import BaseAdapter
# 全局注册表:staging_table -> Adapter 类
ADAPTER_REGISTRY: dict[str, Type[BaseAdapter]] = {}
def register_adapter(cls: Type[BaseAdapter]) -> Type[BaseAdapter]:
"""类装饰器:将 Adapter 注册到全局表。"""
if cls.staging_table:
ADAPTER_REGISTRY[cls.staging_table] = cls
return cls
def get_adapter(staging_table: str) -> Type[BaseAdapter] | None:
"""按 staging 表名查找已注册的适配器类。"""
return ADAPTER_REGISTRY.get(staging_table)
+89
View File
@@ -0,0 +1,89 @@
"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。
用法:
from app.ingest.runner import run_all_adapters
results = run_all_adapters(session, data_version_id)
"""
from __future__ import annotations
import logging
import uuid
from sqlalchemy.orm import Session
from app.ingest.base import IngestResult
from app.ingest.registry import ADAPTER_REGISTRY
# 确保所有适配器模块被导入,触发 @register_adapter 注册
import app.ingest.adapters_master # noqa: F401
import app.ingest.adapters_r8 # noqa: F401
import app.ingest.adapters_r9 # noqa: F401
import app.ingest.adapters_r10 # noqa: F401
import app.ingest.adapters_r11 # noqa: F401
import app.ingest.adapters_r12 # noqa: F401
import app.ingest.adapters_r13 # noqa: F401
import app.ingest.adapters_r14 # noqa: F401
import app.ingest.adapters_r15 # noqa: F401
logger = logging.getLogger(__name__)
def run_all_adapters(
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
tables: list[str] | None = None,
) -> dict[str, IngestResult]:
"""执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。
Args:
session: 数据库会话(调用方负责 commit/rollback
data_version_id: 当前批次数据版本 ID
batch_size: 每个适配器单次处理行数上限
tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部
Returns:
各适配器的执行结果字典
"""
results: dict[str, IngestResult] = {}
target_adapters = ADAPTER_REGISTRY
if tables:
target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables}
for table_name, adapter_cls in target_adapters.items():
logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name)
adapter = adapter_cls()
try:
result = adapter.ingest(
session, data_version_id=data_version_id, batch_size=batch_size
)
results[table_name] = result
logger.info(
" → rows=%d, entities=%d, rels=%d, events=%d, errors=%d",
result.row_count,
len(result.entities),
len(result.relationships),
len(result.metric_events),
result.error_count,
)
except Exception as exc:
logger.error("Adapter %s failed: %s", table_name, exc)
results[table_name] = IngestResult(error_count=1)
return results
def run_adapter(
session: Session,
staging_table: str,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
"""执行单个指定 staging 表的适配器。"""
adapter_cls = ADAPTER_REGISTRY.get(staging_table)
if adapter_cls is None:
raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器")
adapter = adapter_cls()
return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)
+3 -1
View File
@@ -4,7 +4,7 @@ from __future__ import annotations
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
from app.llm.base import LLMProvider
from app.llm.providers import DashScopeProvider, VllmProvider
from app.llm.providers import DashScopeProvider, MockProvider, VllmProvider
class EgressPolicyError(RuntimeError):
@@ -27,5 +27,7 @@ def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
)
if settings.llm_provider == LLMProviderName.vllm:
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
if settings.llm_provider == LLMProviderName.mock:
return MockProvider()
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
+28
View File
@@ -78,3 +78,31 @@ class VllmProvider(LLMProvider):
return resp.status_code == 200
except httpx.HTTPError:
return False
class MockProvider(LLMProvider):
"""本地确定性 Mock Provider:开发/测试用,不出域、不依赖外网。
返回可预测的回显内容,便于在无 API Key / 无 GPU 时打通链路与自动化测试。
"""
name = "mock"
egress = False
def __init__(self, model: str = "mock-llm") -> None:
self._model = model
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
last_user = next(
(m.content for m in reversed(messages) if m.role == "user"), ""
)
return LLMResponse(
content=f"[mock] 收到查询:{last_user}",
model=self._model,
provider=self.name,
egress=False,
raw={"echo": last_user},
)
def health(self) -> bool:
return True
+4
View File
@@ -7,7 +7,9 @@ from contextlib import asynccontextmanager
from fastapi import FastAPI
from app import __version__
from app.api.clues import router as clues_router
from app.api.datahub import router as datahub_router
from app.api.nlq import router as nlq_router
from app.config import get_settings
@@ -26,6 +28,8 @@ app = FastAPI(
)
app.include_router(datahub_router)
app.include_router(clues_router)
app.include_router(nlq_router)
@app.get("/health")
+1
View File
@@ -0,0 +1 @@
"""自然语言查询(NLQ):审计员零门槛用自然语言查数/获取线索(R4/R20)。"""
+106
View File
@@ -0,0 +1,106 @@
"""自然语言查询服务。
采用"结构化意图优先 + LLM 兜底"策略:
- 若问题命中线索检索意图(置信度/场景/状态/列出线索等),直接查审计数据库返回真实结果,
实现"数据找人",不依赖外部模型,数据不出域。
- 其余开放性问题再交给 LLMProvider(本地优先)。
对应 R4 / R20 / R7。
"""
from __future__ import annotations
from dataclasses import dataclass
from sqlalchemy.orm import Session
from app.clues import service as clue_svc
from app.clues.models import ClueStatus, ConfidenceTier
from app.llm import ChatMessage, get_llm_provider
SYSTEM_PROMPT = (
"你是电信运营商内部审计助手。基于审计数据中台的数据回答问题,"
"给出可解释的依据;无证据支撑时明确说明,不臆造数据。"
)
# 关键词 → 过滤条件映射
_CONFIDENCE_KW = {"高置信": ConfidenceTier.HIGH, "高风险": ConfidenceTier.HIGH,
"中置信": ConfidenceTier.MEDIUM, "低置信": ConfidenceTier.LOW}
_SCENARIO_KW = {"拆单": "R8", "政企": "R8", "养卡": "R9", "骗补": "R9", "彩铃": "R9"}
_STATUS_KW = {"待处理": ClueStatus.NEW, "已分派": ClueStatus.ASSIGNED,
"研判": ClueStatus.REVIEWING, "属实": ClueStatus.CONFIRMED,
"误报": ClueStatus.DISMISSED, "已销项": ClueStatus.CLOSED}
_LIST_KW = ("线索", "列出", "", "有哪些", "多少", "列表", "看看", "显示")
_SCENARIO_NAME = {"R8": "政企拆单", "R9": "养卡骗补"}
_CONF_NAME = {ConfidenceTier.HIGH: "高置信", ConfidenceTier.MEDIUM: "中置信",
ConfidenceTier.LOW: "低置信"}
@dataclass
class NLQAnswer:
question: str
answer: str
provider: str
model: str
egress: bool
def _match_first(question: str, mapping: dict):
for kw, val in mapping.items():
if kw in question:
return val
return None
def _is_clue_query(question: str) -> bool:
return any(kw in question for kw in _LIST_KW) or any(
kw in question for kw in {**_CONFIDENCE_KW, **_SCENARIO_KW, **_STATUS_KW}
)
def _format_clue_answer(question: str, clues: list) -> str:
if not clues:
return "未检索到符合条件的线索。可调整筛选条件,或先运行扫描生成线索。"
lines = [f"共检索到 {len(clues)} 条线索:"]
for i, c in enumerate(clues, 1):
amount = f",涉及金额约 {c.amount_involved/10000:.1f} 万元" if c.amount_involved else ""
lines.append(
f"{i}. [{_SCENARIO_NAME.get(c.scenario_code, c.scenario_code)}] {c.title}"
f"{_CONF_NAME.get(c.confidence, c.confidence.value)},评分 {c.score:.2f}{amount}"
f"——{c.rationale}"
)
return "\n".join(lines)
def ask(question: str, session: Session | None = None) -> NLQAnswer:
"""处理一次自然语言查询:优先结构化检索,其余交给 LLM。"""
# 结构化意图:检索线索(数据找人,不出域)
if session is not None and _is_clue_query(question):
confidence = _match_first(question, _CONFIDENCE_KW)
scenario = _match_first(question, _SCENARIO_KW)
status = _match_first(question, _STATUS_KW)
clues = clue_svc.list_clues(
session, status=status, scenario_code=scenario, confidence=confidence
)
return NLQAnswer(
question=question,
answer=_format_clue_answer(question, clues),
provider="datahub",
model="结构化检索",
egress=False,
)
# 开放性问题:交给 LLM(本地优先)
provider = get_llm_provider()
messages = [
ChatMessage(role="system", content=SYSTEM_PROMPT),
ChatMessage(role="user", content=question),
]
resp = provider.chat(messages)
return NLQAnswer(
question=question,
answer=resp.content,
provider=resp.provider,
model=resp.model,
egress=resp.egress,
)
+1
View File
@@ -0,0 +1 @@
"""审计场景检测器:将业务数据中的异常模式转化为线索。"""
+85
View File
@@ -0,0 +1,85 @@
"""场景二 · 市场业务真实性:养卡骗补检测(R9)。
检测"脉冲式增长 + 规律性衰减"的周期性造假:渠道每月新增大量用户订购,
固定周期后这些用户集中退订(骗补后弃养)。结合佣金与业务质量匹配度。
"""
from __future__ import annotations
from dataclasses import dataclass
@dataclass
class CohortPoint:
"""某新增批次(cohort)在第 N 个月的留存率。"""
month_index: int
retention: float # 0-1
@dataclass
class ChurnFinding:
cliff_month: int | None
max_drop: float
pulse_then_decay: bool
def detect_pulse_decay(
retention_curve: list[CohortPoint],
cliff_drop: float = 0.5,
) -> ChurnFinding:
"""识别留存曲线中的"断崖式集中退订"
若某月留存相对上月骤降超过 cliff_drop(默认 50%),判为规律性衰减。
"""
ordered = sorted(retention_curve, key=lambda p: p.month_index)
max_drop = 0.0
cliff_month: int | None = None
for prev, cur in zip(ordered, ordered[1:], strict=False):
drop = prev.retention - cur.retention
if drop > max_drop:
max_drop = drop
if drop >= cliff_drop:
cliff_month = cur.month_index
return ChurnFinding(
cliff_month=cliff_month,
max_drop=round(max_drop, 3),
pulse_then_decay=cliff_month is not None,
)
def commission_quality_mismatch(
commission_paid: float,
active_ratio: float,
zero_usage_ratio: float,
) -> float:
"""佣金与业务质量不匹配度(0-1)。
active_ratio:仍活跃用户占比;zero_usage_ratio:零通话/零流量用户占比。
佣金已发但活跃低、零使用高 → 不匹配度高。
"""
if commission_paid <= 0:
return 0.0
mismatch = 0.6 * zero_usage_ratio + 0.4 * (1 - active_ratio)
return round(min(max(mismatch, 0.0), 1.0), 3)
def churn_risk_score(finding: ChurnFinding, mismatch: float) -> float:
"""综合评分:断崖退订 + 佣金质量不匹配。"""
if not finding.pulse_then_decay:
return round(0.3 * mismatch, 3)
base = 0.4 + 0.4 * finding.max_drop + 0.2 * mismatch
return round(min(base, 1.0), 3)
def build_rationale(finding: ChurnFinding, mismatch: float) -> str:
if finding.pulse_then_decay:
return (
f"渠道新增用户在第 {finding.cliff_month} 个月出现断崖式集中退订"
f"(最大单月留存骤降 {finding.max_drop:.0%}),呈"
f"'脉冲式增长 + 规律性衰减'特征;佣金与业务质量不匹配度 {mismatch:.0%}"
f"高度疑似养卡骗补(骗补后弃养)。"
)
return (
f"未见明显断崖退订,但佣金与业务质量不匹配度为 {mismatch:.0%},建议关注。"
)
+78
View File
@@ -0,0 +1,78 @@
"""场景一 · 政企收入全链路穿透:拆单规避检测(R8)。
检测点:
1. 合同金额集中分布在审批阈值边缘(如阈值 80% 以上但不超阈值)。
2. 结合知识图谱穿透识别隐性实控人(多个客户经法人关联到同一实控人)。
满足上述模式则生成线索,附证据链与人话理由。
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class ContractRecord:
"""穿透分析输入:一份合同的关键信息。"""
contract_id: str
customer_key: str
amount: float
@dataclass
class SplitFinding:
"""拆单检测结果。"""
near_threshold: list[ContractRecord] = field(default_factory=list)
ratio: float = 0.0
total_amount: float = 0.0
@property
def hit(self) -> bool:
return len(self.near_threshold) >= 3
def detect_threshold_edge(
contracts: list[ContractRecord],
approval_threshold: float,
edge_ratio: float = 0.8,
) -> SplitFinding:
"""识别金额集中在审批阈值边缘 [edge_ratio*阈值, 阈值) 的合同。
这类"刚好低于阈值"的批量合同是典型的拆单规避特征。
"""
if approval_threshold <= 0:
raise ValueError("审批阈值必须为正数")
lower = edge_ratio * approval_threshold
near = [c for c in contracts if lower <= c.amount < approval_threshold]
finding = SplitFinding(
near_threshold=near,
ratio=(len(near) / len(contracts)) if contracts else 0.0,
total_amount=sum(c.amount for c in near),
)
return finding
def split_risk_score(finding: SplitFinding, shared_controller: bool) -> float:
"""综合评分:阈值边缘集中度 + 是否穿透到同一实控人。"""
if not finding.hit:
return 0.0
base = min(0.6, 0.1 * len(finding.near_threshold)) # 数量越多越可疑
base += 0.2 * finding.ratio
if shared_controller:
base += 0.3 # 同一实控人是强证据
return round(min(base, 1.0), 3)
def build_rationale(finding: SplitFinding, threshold: float, shared_controller: bool) -> str:
parts = [
f"检测到 {len(finding.near_threshold)} 份合同金额集中在审批阈值 "
f"{threshold:.0f} 的边缘区间(占比 {finding.ratio:.0%}),",
f"边缘合同金额合计约 {finding.total_amount:.0f}",
]
if shared_controller:
parts.append("且经工商关联穿透,相关客户疑似同属一个隐性实控人,高度符合拆单规避特征。")
else:
parts.append("建议进一步穿透客户关联关系以确认是否同一实控人。")
return "".join(parts)
+1
View File
@@ -16,6 +16,7 @@ from app.config import get_settings
# 导入模型以注册到 Base.metadata
from app.datahub import models # noqa: F401,E402
from app.datahub import staging # noqa: F401,E402
from app.db import Base
config = context.config
@@ -0,0 +1,57 @@
"""源明细落地层:src_contract / src_channel_monthly
Revision ID: 0003_staging
Revises: 0002_clues_audit
Create Date: 2026-06
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0003_staging"
down_revision: Union[str, None] = "0002_clues_audit"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"src_contract",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("contract_no", sa.String(64), nullable=False),
sa.Column("customer_key", sa.String(64), nullable=False),
sa.Column("customer_name", sa.String(256), nullable=True),
sa.Column("amount", sa.Float(), nullable=False),
sa.Column("sign_date", sa.Date(), nullable=True),
sa.Column("approval_threshold", sa.Float(), nullable=True),
sa.Column("approval_level", sa.String(32), nullable=True),
sa.Column("legal_person", sa.String(128), nullable=True),
sa.Column("register_address", sa.String(256), nullable=True),
sa.Column("pay_account", sa.String(64), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_index("ix_src_contract_customer", "src_contract", ["customer_key"])
op.create_table(
"src_channel_monthly",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("channel_key", sa.String(64), nullable=False),
sa.Column("cohort_label", sa.String(32), nullable=False),
sa.Column("month_index", sa.Integer(), nullable=False),
sa.Column("cohort_size", sa.Integer(), nullable=False, server_default="0"),
sa.Column("retained", sa.Integer(), nullable=False, server_default="0"),
sa.Column("commission_paid", sa.Float(), nullable=False, server_default="0"),
sa.Column("active_ratio", sa.Float(), nullable=False, server_default="0"),
sa.Column("zero_usage_ratio", sa.Float(), nullable=False, server_default="0"),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_index("ix_src_channel_key", "src_channel_monthly", ["channel_key"])
def downgrade() -> None:
op.drop_table("src_channel_monthly")
op.drop_table("src_contract")
View File
+53
View File
@@ -0,0 +1,53 @@
"""生成演示数据:跑两个场景扫描,落库若干线索,供前端看板演示。
用法:python -m scripts.seed_demo
仅用于本地演示,使用脱敏/虚构数据,不涉及真实业务数据。
"""
from __future__ import annotations
from app.db import get_sessionmaker
from app.engines import scan
from app.scenarios.churn_fraud import CohortPoint
from app.scenarios.split_contract import ContractRecord
def main() -> None:
sm = get_sessionmaker()
with sm() as session:
# 场景一:8 个客户拆单 + 同一实控人
contracts = [
ContractRecord(f"HT-{i}", f"政企客户{i}", 790000 + i * 25000) for i in range(8)
]
r1 = scan.run_split_contract_scan(
session, contracts, approval_threshold=1_000_000, shared_controller=True
)
# 场景二:养卡骗补,第 3 月断崖退订
curve = [
CohortPoint(0, 1.0), CohortPoint(1, 0.96),
CohortPoint(2, 0.92), CohortPoint(3, 0.08),
]
r2 = scan.run_churn_scan(
session, retention_curve=curve, commission_paid=360000,
active_ratio=0.04, zero_usage_ratio=0.93, channel_key="渠道-华南-001",
)
# 再来一条中置信
curve2 = [CohortPoint(0, 1.0), CohortPoint(1, 0.7), CohortPoint(2, 0.55)]
r3 = scan.run_churn_scan(
session, retention_curve=curve2, commission_paid=80000,
active_ratio=0.4, zero_usage_ratio=0.5, channel_key="渠道-西南-007",
)
session.commit()
for r in (r1, r2, r3):
if r.clue:
print(f"已生成线索 [{r.scenario_code}] {r.clue.title} "
f"置信={r.clue.confidence.value} 评分={r.clue.score}")
else:
print(f"[{r.scenario_code}] 未命中阈值,无线索")
if __name__ == "__main__":
main()
@@ -43,7 +43,9 @@ def test_clue_full_lifecycle(session):
assert clue.status == ClueStatus.ASSIGNED
assert clue.assignee == "auditor_zhang"
paper = clue_svc.adjudicate(session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交")
paper = clue_svc.adjudicate(
session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交"
)
assert clue.status == ClueStatus.CONFIRMED
assert clue.feedback == "confirmed"
assert paper.conclusion == "confirmed"
@@ -0,0 +1,86 @@
"""线索/NLQ/看板 API 集成测试(需 PostgreSQL)。"""
from __future__ import annotations
import pytest
from fastapi.testclient import TestClient
from app.db import get_session
from app.engines import scan
from app.main import app
from app.scenarios.split_contract import ContractRecord
@pytest.fixture()
def client(session):
app.dependency_overrides[get_session] = lambda: session
try:
yield TestClient(app)
finally:
app.dependency_overrides.pop(get_session, None)
def _seed_clue(session):
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
return scan.run_split_contract_scan(
session, contracts, approval_threshold=1_000_000, shared_controller=True
).clue
def test_list_and_get_clue(client, session):
clue = _seed_clue(session)
session.flush()
resp = client.get("/clues")
assert resp.status_code == 200
assert any(c["id"] == str(clue.id) for c in resp.json())
resp2 = client.get(f"/clues/{clue.id}")
assert resp2.status_code == 200
assert resp2.json()["scenario_code"] == "R8"
def test_assign_and_adjudicate_flow(client, session):
clue = _seed_clue(session)
session.flush()
r1 = client.post(
f"/clues/{clue.id}/assign", json={"assignee": "auditor_w", "actor": "manager_l"}
)
assert r1.status_code == 200
assert r1.json()["assignee"] == "auditor_w"
assert r1.json()["status"] == "assigned"
r2 = client.post(
f"/clues/{clue.id}/adjudicate",
json={"confirmed": True, "actor": "auditor_w", "note": "属实"},
)
assert r2.status_code == 200
assert r2.json()["status"] == "confirmed"
assert r2.json()["feedback"] == "confirmed"
def test_summary_endpoint(client, session):
_seed_clue(session)
session.flush()
resp = client.get("/clues/summary")
assert resp.status_code == 200
body = resp.json()
assert body["total"] >= 1
assert body["total_amount_involved"] > 0
def test_no_delete_endpoint(client, session):
"""R19:不存在删除线索的 API 端点。"""
clue = _seed_clue(session)
session.flush()
resp = client.delete(f"/clues/{clue.id}")
assert resp.status_code in (404, 405) # 方法不允许/路由不存在
def test_nlq_endpoint_uses_local_provider(client):
# 默认 .env 为 mock/dashscopemock 不出域
resp = client.post("/nlq", json={"question": "列出政企拆单线索"})
assert resp.status_code == 200
body = resp.json()
assert "answer" in body
assert body["egress"] in (True, False)
+37
View File
@@ -0,0 +1,37 @@
"""NLQ 结构化检索集成测试(需 PostgreSQL)。"""
from __future__ import annotations
from app.engines import scan
from app.nlq import service as nlq
from app.scenarios.split_contract import ContractRecord
def _seed(session):
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
scan.run_split_contract_scan(
session, contracts, approval_threshold=1_000_000, shared_controller=True
)
session.flush()
def test_nlq_retrieves_split_clues(session):
_seed(session)
ans = nlq.ask("列出高置信的政企拆单线索", session=session)
assert ans.provider == "datahub"
assert ans.egress is False
assert "政企拆单" in ans.answer
assert "共检索到" in ans.answer
def test_nlq_no_match(session):
ans = nlq.ask("列出养卡骗补线索", session=session)
assert ans.egress is False
assert "未检索到" in ans.answer or "共检索到" in ans.answer
def test_nlq_open_question_falls_back_to_llm(session):
# 不含检索关键词 → 走 LLM(mock)
ans = nlq.ask("你好,请介绍一下你的能力", session=session)
assert ans.provider in ("mock", "datahub")
assert ans.egress is False
@@ -0,0 +1,46 @@
"""全量穿透扫描引擎集成测试(需 PostgreSQL)。
验证场景检测→线索生成→落库的端到端链路(R5+R7+R8/R9)。
"""
from __future__ import annotations
from app.clues.models import ClueStatus, ConfidenceTier
from app.engines import scan
from app.scenarios.churn_fraud import CohortPoint
from app.scenarios.split_contract import ContractRecord
def test_split_scan_creates_high_confidence_clue(session):
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
result = scan.run_split_contract_scan(
session, contracts, approval_threshold=1_000_000, shared_controller=True
)
assert result.scenario_code == "R8"
assert result.scanned_count == 8
assert result.clue is not None
assert result.clue.confidence == ConfidenceTier.HIGH
assert result.clue.status == ClueStatus.NEW
assert result.clue.amount_involved > 0
assert result.clue.model_version == scan.MODEL_VERSION
def test_split_scan_no_clue_when_clean(session):
contracts = [ContractRecord("C1", "A", 100000), ContractRecord("C2", "B", 3_000_000)]
result = scan.run_split_contract_scan(session, contracts, approval_threshold=1_000_000)
assert result.clue is None
def test_churn_scan_creates_clue(session):
curve = [CohortPoint(0, 1.0), CohortPoint(1, 0.95), CohortPoint(2, 0.1)]
result = scan.run_churn_scan(
session,
retention_curve=curve,
commission_paid=300000,
active_ratio=0.05,
zero_usage_ratio=0.9,
channel_key="CH-001",
)
assert result.clue is not None
assert result.clue.scenario_code == "R9"
assert result.clue.subjects["channel"] == "CH-001"
+79
View File
@@ -0,0 +1,79 @@
"""场景检测器单元测试(纯逻辑,无需数据库)。"""
from app.scenarios.churn_fraud import (
CohortPoint,
churn_risk_score,
commission_quality_mismatch,
detect_pulse_decay,
)
from app.scenarios.split_contract import (
ContractRecord,
detect_threshold_edge,
split_risk_score,
)
# ---------- 场景一:政企拆单 (R8) ----------
def test_threshold_edge_detects_split():
# 阈值 100 万,8 份合同集中在 79万-99万
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 810000 + i * 20000) for i in range(8)]
finding = detect_threshold_edge(contracts, approval_threshold=1_000_000)
assert finding.hit
assert len(finding.near_threshold) == 8
def test_threshold_edge_no_split_when_amounts_spread():
contracts = [
ContractRecord("C1", "A", 100000),
ContractRecord("C2", "B", 2_000_000),
]
finding = detect_threshold_edge(contracts, approval_threshold=1_000_000)
assert not finding.hit
def test_split_score_higher_with_shared_controller():
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
finding = detect_threshold_edge(contracts, 1_000_000)
s_no = split_risk_score(finding, shared_controller=False)
s_yes = split_risk_score(finding, shared_controller=True)
assert s_yes > s_no
assert s_yes <= 1.0
def test_threshold_must_be_positive():
import pytest
with pytest.raises(ValueError):
detect_threshold_edge([], approval_threshold=0)
# ---------- 场景二:养卡骗补 (R9) ----------
def test_pulse_decay_detects_cliff():
curve = [
CohortPoint(0, 1.0),
CohortPoint(1, 0.95),
CohortPoint(2, 0.92),
CohortPoint(3, 0.10), # 第3个月断崖
]
finding = detect_pulse_decay(curve)
assert finding.pulse_then_decay
assert finding.cliff_month == 3
def test_no_cliff_for_smooth_curve():
curve = [CohortPoint(i, 1.0 - 0.05 * i) for i in range(5)]
finding = detect_pulse_decay(curve)
assert not finding.pulse_then_decay
def test_commission_mismatch_high_for_zero_usage():
m = commission_quality_mismatch(commission_paid=100000, active_ratio=0.05, zero_usage_ratio=0.9)
assert m > 0.7
def test_churn_score_combines_signals():
curve = [CohortPoint(0, 1.0), CohortPoint(1, 0.2)]
finding = detect_pulse_decay(curve)
score = churn_risk_score(finding, mismatch=0.8)
assert 0.0 < score <= 1.0