feat: 添加线索引擎、NLQ、场景检测、前端界面等核心功能模块
This commit is contained in:
@@ -0,0 +1,86 @@
|
||||
"""线索看板与处置 API(R7/R17/R18/R20)。
|
||||
|
||||
注意:不提供删除线索的端点(R19 线索不可删,独立性硬约束)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.schemas import (
|
||||
AdjudicateRequest,
|
||||
AssignRequest,
|
||||
ClueOut,
|
||||
DashboardSummary,
|
||||
)
|
||||
from app.clues import service as clue_svc
|
||||
from app.clues.models import Clue, ClueStatus, ConfidenceTier
|
||||
from app.db import get_session
|
||||
|
||||
router = APIRouter(prefix="/clues", tags=["clues"])
|
||||
|
||||
|
||||
@router.get("", response_model=list[ClueOut])
|
||||
def list_clues(
|
||||
status: ClueStatus | None = Query(default=None),
|
||||
scenario_code: str | None = Query(default=None),
|
||||
confidence: ConfidenceTier | None = Query(default=None),
|
||||
session: Session = Depends(get_session),
|
||||
) -> list[Clue]:
|
||||
return clue_svc.list_clues(
|
||||
session, status=status, scenario_code=scenario_code, confidence=confidence
|
||||
)
|
||||
|
||||
|
||||
@router.get("/summary", response_model=DashboardSummary)
|
||||
def summary(session: Session = Depends(get_session)) -> DashboardSummary:
|
||||
"""运营看板汇总(R18/R21 的基础指标)。"""
|
||||
clues = session.query(Clue).all()
|
||||
by_status: dict[str, int] = {}
|
||||
by_conf: dict[str, int] = {}
|
||||
by_scenario: dict[str, int] = {}
|
||||
total_amount = 0.0
|
||||
for c in clues:
|
||||
by_status[c.status.value] = by_status.get(c.status.value, 0) + 1
|
||||
by_conf[c.confidence.value] = by_conf.get(c.confidence.value, 0) + 1
|
||||
by_scenario[c.scenario_code] = by_scenario.get(c.scenario_code, 0) + 1
|
||||
total_amount += c.amount_involved or 0.0
|
||||
return DashboardSummary(
|
||||
total=len(clues),
|
||||
by_status=by_status,
|
||||
by_confidence=by_conf,
|
||||
by_scenario=by_scenario,
|
||||
total_amount_involved=total_amount,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/{clue_id}", response_model=ClueOut)
|
||||
def get_clue(clue_id: uuid.UUID, session: Session = Depends(get_session)) -> Clue:
|
||||
clue = session.get(Clue, clue_id)
|
||||
if clue is None:
|
||||
raise HTTPException(status_code=404, detail="线索不存在")
|
||||
return clue
|
||||
|
||||
|
||||
@router.post("/{clue_id}/assign", response_model=ClueOut)
|
||||
def assign_clue(
|
||||
clue_id: uuid.UUID, req: AssignRequest, session: Session = Depends(get_session)
|
||||
) -> Clue:
|
||||
clue = session.get(Clue, clue_id)
|
||||
if clue is None:
|
||||
raise HTTPException(status_code=404, detail="线索不存在")
|
||||
return clue_svc.assign(session, clue, assignee=req.assignee, actor=req.actor)
|
||||
|
||||
|
||||
@router.post("/{clue_id}/adjudicate", response_model=ClueOut)
|
||||
def adjudicate_clue(
|
||||
clue_id: uuid.UUID, req: AdjudicateRequest, session: Session = Depends(get_session)
|
||||
) -> Clue:
|
||||
clue = session.get(Clue, clue_id)
|
||||
if clue is None:
|
||||
raise HTTPException(status_code=404, detail="线索不存在")
|
||||
clue_svc.adjudicate(session, clue, confirmed=req.confirmed, actor=req.actor, note=req.note)
|
||||
return clue
|
||||
@@ -0,0 +1,24 @@
|
||||
"""自然语言查询 API(R4/R20)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.schemas import NLQRequest, NLQResponse
|
||||
from app.db import get_session
|
||||
from app.nlq import service as nlq
|
||||
|
||||
router = APIRouter(prefix="/nlq", tags=["nlq"])
|
||||
|
||||
|
||||
@router.post("", response_model=NLQResponse)
|
||||
def ask(req: NLQRequest, session: Session = Depends(get_session)) -> NLQResponse:
|
||||
ans = nlq.ask(req.question, session=session)
|
||||
return NLQResponse(
|
||||
question=ans.question,
|
||||
answer=ans.answer,
|
||||
provider=ans.provider,
|
||||
model=ans.model,
|
||||
egress=ans.egress,
|
||||
)
|
||||
@@ -34,3 +34,52 @@ class PenetrateResponse(BaseModel):
|
||||
max_depth: int
|
||||
related_count: int
|
||||
related: list[RelatedEntityOut]
|
||||
|
||||
|
||||
class ClueOut(BaseModel):
|
||||
id: uuid.UUID
|
||||
title: str
|
||||
risk_domain: str
|
||||
scenario_code: str
|
||||
confidence: str
|
||||
score: float
|
||||
status: str
|
||||
rationale: str
|
||||
evidence: dict = Field(default_factory=dict)
|
||||
subjects: dict = Field(default_factory=dict)
|
||||
amount_involved: float | None = None
|
||||
assignee: str | None = None
|
||||
feedback: str | None = None
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class AssignRequest(BaseModel):
|
||||
assignee: str = Field(min_length=1)
|
||||
actor: str = Field(min_length=1)
|
||||
|
||||
|
||||
class AdjudicateRequest(BaseModel):
|
||||
confirmed: bool
|
||||
actor: str = Field(min_length=1)
|
||||
note: str | None = None
|
||||
|
||||
|
||||
class NLQRequest(BaseModel):
|
||||
question: str = Field(min_length=1)
|
||||
|
||||
|
||||
class NLQResponse(BaseModel):
|
||||
question: str
|
||||
answer: str
|
||||
provider: str
|
||||
model: str
|
||||
egress: bool
|
||||
|
||||
|
||||
class DashboardSummary(BaseModel):
|
||||
total: int
|
||||
by_status: dict[str, int]
|
||||
by_confidence: dict[str, int]
|
||||
by_scenario: dict[str, int]
|
||||
total_amount_involved: float
|
||||
|
||||
@@ -82,8 +82,8 @@ class Clue(Base):
|
||||
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
|
||||
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
# 误报/属实反馈(R18 反馈学习)
|
||||
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
|
||||
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||
"""误报/属实反馈(R18 反馈学习):confirmed / false_positive"""
|
||||
|
||||
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
|
||||
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
|
||||
@@ -132,7 +132,10 @@ def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
|
||||
session.flush()
|
||||
if clue.status == ClueStatus.NEW:
|
||||
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
|
||||
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
|
||||
audit.record(
|
||||
session, actor, "assign_clue",
|
||||
target_type="clue", target_id=str(clue.id), detail={"assignee": assignee},
|
||||
)
|
||||
return clue
|
||||
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ class AppEnv(str, Enum):
|
||||
class LLMProviderName(str, Enum):
|
||||
dashscope = "dashscope" # 公网千问,仅 dev
|
||||
vllm = "vllm" # 本地,prod
|
||||
mock = "mock" # 本地确定性 Mock,开发/测试,不出域
|
||||
|
||||
|
||||
# 被认定为"公网/出域"的 Provider,prod 下禁止使用
|
||||
|
||||
@@ -41,6 +41,10 @@ class RelationshipType(str, Enum):
|
||||
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
|
||||
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
|
||||
SETTLES = "settles" # 结算单 —结算→ 合同
|
||||
EMPLOYED_BY = "employed_by" # 员工 —任职于→ 客户/供应商(组织)
|
||||
OPERATES = "operates" # 员工 —操作→ 号码/账户(R15 越权检测)
|
||||
SUBSCRIBES = "subscribes" # 号码 —订购→ 合同(R9/R10 订购关联)
|
||||
BIDS_FOR = "bids_for" # 供应商 —投标→ 工单(R12 招投标关联)
|
||||
|
||||
|
||||
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
|
||||
@@ -72,6 +76,16 @@ RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityTyp
|
||||
),
|
||||
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
|
||||
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
|
||||
RelationshipType.EMPLOYED_BY: (
|
||||
{EntityType.EMPLOYEE},
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER},
|
||||
),
|
||||
RelationshipType.OPERATES: (
|
||||
{EntityType.EMPLOYEE},
|
||||
{EntityType.MSISDN, EntityType.ACCOUNT},
|
||||
),
|
||||
RelationshipType.SUBSCRIBES: ({EntityType.MSISDN}, {EntityType.CONTRACT}),
|
||||
RelationshipType.BIDS_FOR: ({EntityType.SUPPLIER}, {EntityType.WORK_ORDER}),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,502 @@
|
||||
"""源明细落地层(Staging / Raw)。
|
||||
|
||||
保存数据中心按 `数据要求.md` 提供的原始明细,作为"原始证据"留存;
|
||||
再由接入适配器(app/ingest)映射/投影到通用本体(entity/relationship/metric_event)。
|
||||
两层并存:源明细可回溯原始数据,本体支撑关联穿透与时序分析。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import Date, DateTime, Float, Index, Integer, String, Text
|
||||
from sqlalchemy.dialects.postgresql import UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
def _now() -> dt.datetime:
|
||||
return dt.datetime.now(dt.timezone.utc)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R8 · 政企收入全链路穿透 / 拆单规避(§4.1)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcContract(Base):
|
||||
"""源明细:政企合同(对应数据要求 §4.1 / R8)。"""
|
||||
|
||||
__tablename__ = "src_contract"
|
||||
__table_args__ = (Index("ix_src_contract_customer", "customer_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
customer_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
customer_name: Mapped[str | None] = mapped_column(String(256))
|
||||
amount: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
sign_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
approval_threshold: Mapped[float | None] = mapped_column(Float)
|
||||
approval_level: Mapped[str | None] = mapped_column(String(32))
|
||||
legal_person: Mapped[str | None] = mapped_column(String(128))
|
||||
register_address: Mapped[str | None] = mapped_column(String(256))
|
||||
pay_account: Mapped[str | None] = mapped_column(String(64))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcContractApproval(Base):
|
||||
"""源明细:合同审批流水(对应 R8 补充)。"""
|
||||
|
||||
__tablename__ = "src_contract_approval"
|
||||
__table_args__ = (Index("ix_src_approval_contract", "contract_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
approval_step: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
approver: Mapped[str | None] = mapped_column(String(128))
|
||||
approval_result: Mapped[str | None] = mapped_column(String(32)) # approved/rejected
|
||||
approval_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
remark: Mapped[str | None] = mapped_column(Text)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcPayment(Base):
|
||||
"""源明细:回款流水(对应 R8 回款时序违约)。"""
|
||||
|
||||
__tablename__ = "src_payment"
|
||||
__table_args__ = (Index("ix_src_payment_contract", "contract_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
pay_account: Mapped[str | None] = mapped_column(String(64))
|
||||
pay_amount: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
pay_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
pay_type: Mapped[str | None] = mapped_column(String(32)) # 预付/尾款/全款
|
||||
overdue_flag: Mapped[str | None] = mapped_column(String(8)) # Y/N
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R9 · 市场业务真实性 / 养卡骗补(§4.2)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcChannelMonthly(Base):
|
||||
"""源明细:渠道用户月度留存与佣金/活跃(对应数据要求 §4.2 / R9)。"""
|
||||
|
||||
__tablename__ = "src_channel_monthly"
|
||||
__table_args__ = (Index("ix_src_channel_key", "channel_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
channel_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
cohort_label: Mapped[str] = mapped_column(String(32), nullable=False) # 新增批次(如 2025-01)
|
||||
month_index: Mapped[int] = mapped_column(Integer, nullable=False) # 第N月
|
||||
cohort_size: Mapped[int] = mapped_column(Integer, default=0)
|
||||
retained: Mapped[int] = mapped_column(Integer, default=0)
|
||||
commission_paid: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
active_ratio: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
zero_usage_ratio: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcSubscription(Base):
|
||||
"""源明细:用户订购与退订流水(对应 R9 订购退订分析)。"""
|
||||
|
||||
__tablename__ = "src_subscription"
|
||||
__table_args__ = (Index("ix_src_sub_msisdn", "msisdn"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
channel_key: Mapped[str | None] = mapped_column(String(64))
|
||||
product_code: Mapped[str | None] = mapped_column(String(64))
|
||||
subscribe_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
unsubscribe_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
region: Mapped[str | None] = mapped_column(String(64))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R10 · 收入与成本跨期匹配(§4.3)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcRevenueRecognition(Base):
|
||||
"""源明细:收入确认凭证与明细(对应 R10)。"""
|
||||
|
||||
__tablename__ = "src_revenue_recognition"
|
||||
__table_args__ = (Index("ix_src_rev_contract", "contract_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
voucher_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
contract_no: Mapped[str | None] = mapped_column(String(64))
|
||||
recognition_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
recognition_amount: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
billing_mode: Mapped[str | None] = mapped_column(String(32)) # 按量/包年/趸交
|
||||
period_start: Mapped[dt.date | None] = mapped_column(Date)
|
||||
period_end: Mapped[dt.date | None] = mapped_column(Date)
|
||||
prepaid_flag: Mapped[str | None] = mapped_column(String(8)) # Y/N 预收/趸交
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcCostAmortization(Base):
|
||||
"""源明细:成本摊销明细(对应 R10)。"""
|
||||
|
||||
__tablename__ = "src_cost_amortization"
|
||||
__table_args__ = (Index("ix_src_cost_contract", "contract_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
voucher_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
contract_no: Mapped[str | None] = mapped_column(String(64))
|
||||
cost_type: Mapped[str | None] = mapped_column(String(64)) # 设备/安装/维护
|
||||
amortization_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
amortization_amount: Mapped[float] = mapped_column(Float, nullable=False)
|
||||
total_periods: Mapped[int | None] = mapped_column(Integer)
|
||||
current_period: Mapped[int | None] = mapped_column(Integer)
|
||||
delivery_date: Mapped[dt.date | None] = mapped_column(Date) # 交付/上架日期
|
||||
acceptance_date: Mapped[dt.date | None] = mapped_column(Date) # 验收日期
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R11 · 渠道佣金与代理商套利(§4.4)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcTerminalBinding(Base):
|
||||
"""源明细:终端 IMEI 与号码绑定 / 补贴发放(对应 R11)。"""
|
||||
|
||||
__tablename__ = "src_terminal_binding"
|
||||
__table_args__ = (
|
||||
Index("ix_src_terminal_imei", "imei"),
|
||||
Index("ix_src_terminal_msisdn", "msisdn"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
imei: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
brand_model: Mapped[str | None] = mapped_column(String(128))
|
||||
activate_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
subsidy_amount: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
commission_amount: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
online_days: Mapped[int | None] = mapped_column(Integer) # 在网天数
|
||||
post_activate_traffic_mb: Mapped[float | None] = mapped_column(Float) # 激活后流量
|
||||
region: Mapped[str | None] = mapped_column(String(64)) # 归属地
|
||||
cross_province_flag: Mapped[str | None] = mapped_column(String(8)) # 跨省入网 Y/N
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R12 · 网络建设与工程采购(§4.5)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcBidding(Base):
|
||||
"""源明细:招投标记录(对应 R12)。"""
|
||||
|
||||
__tablename__ = "src_bidding"
|
||||
__table_args__ = (Index("ix_src_bidding_project", "project_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
project_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
project_name: Mapped[str | None] = mapped_column(String(256))
|
||||
bidder_key: Mapped[str] = mapped_column(String(64), nullable=False) # 投标人/供应商编号
|
||||
bidder_name: Mapped[str | None] = mapped_column(String(256))
|
||||
bid_amount: Mapped[float | None] = mapped_column(Float)
|
||||
bid_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
win_flag: Mapped[str | None] = mapped_column(String(8)) # 中标 Y/N
|
||||
technical_score: Mapped[float | None] = mapped_column(Float)
|
||||
legal_person: Mapped[str | None] = mapped_column(String(128))
|
||||
shareholder_info: Mapped[str | None] = mapped_column(Text) # JSON or 描述
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcProjectSignoff(Base):
|
||||
"""源明细:工程量签证与施工(对应 R12)。"""
|
||||
|
||||
__tablename__ = "src_project_signoff"
|
||||
__table_args__ = (Index("ix_src_signoff_project", "project_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
project_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
work_order_no: Mapped[str | None] = mapped_column(String(64))
|
||||
signoff_quantity: Mapped[float | None] = mapped_column(Float) # 签证工程量
|
||||
unit: Mapped[str | None] = mapped_column(String(32))
|
||||
resource_consumed: Mapped[float | None] = mapped_column(Float) # 实际资源消耗
|
||||
contractor_key: Mapped[str | None] = mapped_column(String(64)) # 施工队
|
||||
signoff_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
inspection_lat: Mapped[float | None] = mapped_column(Float) # 巡检 GPS 纬度
|
||||
inspection_lng: Mapped[float | None] = mapped_column(Float) # 巡检 GPS 经度
|
||||
inspection_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R13 · 互联互通与网间结算(§4.6)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcCdr(Base):
|
||||
"""源明细:话单 CDR(对应 R13,大数据量增量接入)。"""
|
||||
|
||||
__tablename__ = "src_cdr"
|
||||
__table_args__ = (
|
||||
Index("ix_src_cdr_caller", "caller"),
|
||||
Index("ix_src_cdr_time", "start_time"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
caller: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
callee: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
start_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
duration_sec: Mapped[int] = mapped_column(Integer, nullable=False)
|
||||
call_type: Mapped[str | None] = mapped_column(String(16)) # voice/sms/data
|
||||
peer_operator: Mapped[str | None] = mapped_column(String(32)) # 对端运营商
|
||||
route_info: Mapped[str | None] = mapped_column(String(128)) # 路由信息
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcInterconnectSettlement(Base):
|
||||
"""源明细:网间结算单(对应 R13)。"""
|
||||
|
||||
__tablename__ = "src_interconnect_settlement"
|
||||
__table_args__ = (Index("ix_src_ics_period", "settle_period"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
settlement_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
peer_operator: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
settle_period: Mapped[str] = mapped_column(String(16), nullable=False) # 如 2025-06
|
||||
settle_type: Mapped[str | None] = mapped_column(String(32)) # 语音/短信/SP/CP
|
||||
volume: Mapped[float] = mapped_column(Float, default=0.0) # 结算量(分钟/条)
|
||||
unit_price: Mapped[float | None] = mapped_column(Float)
|
||||
settle_amount: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
sms_delivery_rate: Mapped[float | None] = mapped_column(Float) # 短信到达率
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R14 · 云业务 / IDC 与新兴业务(§4.7)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcCloudUsage(Base):
|
||||
"""源明细:云资源用量(对应 R14)。"""
|
||||
|
||||
__tablename__ = "src_cloud_usage"
|
||||
__table_args__ = (Index("ix_src_cloud_contract", "contract_no"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
contract_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
customer_key: Mapped[str | None] = mapped_column(String(64))
|
||||
resource_type: Mapped[str | None] = mapped_column(String(32)) # CPU/存储/带宽
|
||||
usage_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
actual_usage: Mapped[float] = mapped_column(Float, default=0.0) # 实际用量
|
||||
contracted_quota: Mapped[float | None] = mapped_column(Float) # 合同约定量
|
||||
billed_usage: Mapped[float | None] = mapped_column(Float) # 计费量
|
||||
unit: Mapped[str | None] = mapped_column(String(16)) # vCPU/GB/Mbps
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcIdcCabinet(Base):
|
||||
"""源明细:IDC 机柜出租与电力消耗(对应 R14)。"""
|
||||
|
||||
__tablename__ = "src_idc_cabinet"
|
||||
__table_args__ = (Index("ix_src_idc_cabinet_id", "cabinet_id"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
cabinet_id: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
customer_key: Mapped[str | None] = mapped_column(String(64))
|
||||
contract_no: Mapped[str | None] = mapped_column(String(64))
|
||||
report_month: Mapped[str | None] = mapped_column(String(16)) # 如 2025-06
|
||||
occupancy_rate: Mapped[float | None] = mapped_column(Float) # 出租率
|
||||
power_kwh: Mapped[float | None] = mapped_column(Float) # 电力消耗 kWh
|
||||
revenue_amount: Mapped[float | None] = mapped_column(Float) # 收入金额
|
||||
acceptance_date: Mapped[dt.date | None] = mapped_column(Date) # 验收日期
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# R15 · 员工内部舞弊与资源滥用(§4.8)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcEmployeeOperation(Base):
|
||||
"""源明细:员工权限与操作日志(对应 R15)。"""
|
||||
|
||||
__tablename__ = "src_employee_operation"
|
||||
__table_args__ = (
|
||||
Index("ix_src_emp_op_employee", "employee_key"),
|
||||
Index("ix_src_emp_op_time", "operation_time"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
employee_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
employee_name: Mapped[str | None] = mapped_column(String(128))
|
||||
position: Mapped[str | None] = mapped_column(String(64))
|
||||
role_permissions: Mapped[str | None] = mapped_column(Text) # 岗位-权限
|
||||
operation_type: Mapped[str | None] = mapped_column(String(64))
|
||||
operation_target: Mapped[str | None] = mapped_column(String(256)) # 操作对象
|
||||
operation_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
department: Mapped[str | None] = mapped_column(String(128))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcInternalMsisdn(Base):
|
||||
"""源明细:内部测试号及用量(对应 R15)。"""
|
||||
|
||||
__tablename__ = "src_internal_msisdn"
|
||||
__table_args__ = (Index("ix_src_int_msisdn", "msisdn"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
assigned_employee: Mapped[str | None] = mapped_column(String(64))
|
||||
purpose: Mapped[str | None] = mapped_column(String(128)) # 测试/演示/其他
|
||||
traffic_mb: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
voice_min: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
revenue_attributed: Mapped[float] = mapped_column(Float, default=0.0) # 收入归属
|
||||
report_month: Mapped[str | None] = mapped_column(String(16))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcPointsTransaction(Base):
|
||||
"""源明细:积分/电子券发放与兑换流水(对应 R15)。"""
|
||||
|
||||
__tablename__ = "src_points_transaction"
|
||||
__table_args__ = (Index("ix_src_points_employee", "operator_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
transaction_no: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
operator_key: Mapped[str] = mapped_column(String(64), nullable=False) # 操作人工号
|
||||
target_account: Mapped[str | None] = mapped_column(String(64)) # 受益账户
|
||||
transaction_type: Mapped[str | None] = mapped_column(String(32)) # 发放/兑换/变现
|
||||
points_amount: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
cash_value: Mapped[float | None] = mapped_column(Float) # 变现金额
|
||||
transaction_time: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 主数据源明细(§3 实体级原始数据)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class SrcCustomer(Base):
|
||||
"""源明细:客户主数据(§3 Customer)。"""
|
||||
|
||||
__tablename__ = "src_customer"
|
||||
__table_args__ = (Index("ix_src_cust_key", "customer_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
customer_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
customer_name: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
customer_type: Mapped[str | None] = mapped_column(String(32)) # 政企/公众
|
||||
register_address: Mapped[str | None] = mapped_column(String(256))
|
||||
legal_person: Mapped[str | None] = mapped_column(String(128))
|
||||
uscc: Mapped[str | None] = mapped_column(String(32)) # 统一社会信用代码
|
||||
open_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcSupplier(Base):
|
||||
"""源明细:供应商主数据(§3 Supplier)。"""
|
||||
|
||||
__tablename__ = "src_supplier"
|
||||
__table_args__ = (Index("ix_src_supplier_key", "supplier_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
supplier_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
supplier_name: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
legal_person: Mapped[str | None] = mapped_column(String(128))
|
||||
shareholder_info: Mapped[str | None] = mapped_column(Text)
|
||||
register_address: Mapped[str | None] = mapped_column(String(256))
|
||||
uscc: Mapped[str | None] = mapped_column(String(32))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcEmployee(Base):
|
||||
"""源明细:员工主数据(§3 Employee)。"""
|
||||
|
||||
__tablename__ = "src_employee"
|
||||
__table_args__ = (Index("ix_src_emp_key", "employee_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
employee_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
employee_name: Mapped[str | None] = mapped_column(String(128))
|
||||
position: Mapped[str | None] = mapped_column(String(64))
|
||||
department: Mapped[str | None] = mapped_column(String(128))
|
||||
role_permissions: Mapped[str | None] = mapped_column(Text)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcChannel(Base):
|
||||
"""源明细:渠道/代理商主数据(§3 Channel)。"""
|
||||
|
||||
__tablename__ = "src_channel"
|
||||
__table_args__ = (Index("ix_src_chan_key", "channel_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
channel_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
channel_name: Mapped[str | None] = mapped_column(String(256))
|
||||
commission_policy: Mapped[str | None] = mapped_column(Text) # 佣金政策描述
|
||||
region: Mapped[str | None] = mapped_column(String(64))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcMsisdn(Base):
|
||||
"""源明细:号码主数据(§3 MSISDN)。"""
|
||||
|
||||
__tablename__ = "src_msisdn"
|
||||
__table_args__ = (Index("ix_src_msisdn_no", "msisdn"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
msisdn: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
customer_key: Mapped[str | None] = mapped_column(String(64))
|
||||
region: Mapped[str | None] = mapped_column(String(64))
|
||||
activate_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
deactivate_date: Mapped[dt.date | None] = mapped_column(Date)
|
||||
status: Mapped[str | None] = mapped_column(String(16)) # active/suspended/cancelled
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
|
||||
class SrcAccount(Base):
|
||||
"""源明细:账户主数据(§3 Account)。"""
|
||||
|
||||
__tablename__ = "src_account"
|
||||
__table_args__ = (Index("ix_src_acct_key", "account_key"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
account_key: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
account_name: Mapped[str | None] = mapped_column(String(256))
|
||||
owner_key: Mapped[str | None] = mapped_column(String(64)) # 所属主体编号
|
||||
owner_type: Mapped[str | None] = mapped_column(String(32)) # customer/supplier/legal_person
|
||||
bank_name: Mapped[str | None] = mapped_column(String(128))
|
||||
branch_name: Mapped[str | None] = mapped_column(String(128))
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True))
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
@@ -0,0 +1 @@
|
||||
"""引擎层:全量穿透扫描编排,将场景检测结果落为线索。"""
|
||||
@@ -0,0 +1,100 @@
|
||||
"""全量穿透扫描编排(P1.5)。
|
||||
|
||||
把场景检测器的结果转化为线索,记录扫描覆盖范围(证明全量性)与数据版本(可追溯)。
|
||||
当前为同步执行;后续可包装为 Celery 异步任务(接口保持不变)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from dataclasses import dataclass
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.clues import service as clue_svc
|
||||
from app.clues.models import Clue
|
||||
from app.scenarios import churn_fraud as cf
|
||||
from app.scenarios import split_contract as sc
|
||||
|
||||
MODEL_VERSION = "mock-llm@0.1"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScanResult:
|
||||
scenario_code: str
|
||||
scanned_count: int
|
||||
clue: Clue | None
|
||||
|
||||
|
||||
def run_split_contract_scan(
|
||||
session: Session,
|
||||
contracts: list[sc.ContractRecord],
|
||||
approval_threshold: float,
|
||||
shared_controller: bool = False,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> ScanResult:
|
||||
"""场景一拆单扫描:检测→评分→(命中则)生成线索。"""
|
||||
finding = sc.detect_threshold_edge(contracts, approval_threshold)
|
||||
score = sc.split_risk_score(finding, shared_controller)
|
||||
clue = None
|
||||
if score > 0:
|
||||
rationale = sc.build_rationale(finding, approval_threshold, shared_controller)
|
||||
clue = clue_svc.create_clue(
|
||||
session,
|
||||
title="疑似政企拆单规避审批",
|
||||
risk_domain="收入",
|
||||
scenario_code="R8",
|
||||
score=score,
|
||||
rationale=rationale,
|
||||
evidence={
|
||||
"near_threshold_contracts": [c.contract_id for c in finding.near_threshold],
|
||||
"edge_ratio": finding.ratio,
|
||||
"near_threshold_amount": finding.total_amount,
|
||||
"approval_threshold": approval_threshold,
|
||||
"shared_controller": shared_controller,
|
||||
},
|
||||
subjects={"customers": sorted({c.customer_key for c in finding.near_threshold})},
|
||||
amount_involved=finding.total_amount,
|
||||
model_version=MODEL_VERSION,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
return ScanResult("R8", len(contracts), clue)
|
||||
|
||||
|
||||
def run_churn_scan(
|
||||
session: Session,
|
||||
retention_curve: list[cf.CohortPoint],
|
||||
commission_paid: float,
|
||||
active_ratio: float,
|
||||
zero_usage_ratio: float,
|
||||
channel_key: str,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> ScanResult:
|
||||
"""场景二养卡骗补扫描:时序断崖 + 佣金质量不匹配→线索。"""
|
||||
finding = cf.detect_pulse_decay(retention_curve)
|
||||
mismatch = cf.commission_quality_mismatch(commission_paid, active_ratio, zero_usage_ratio)
|
||||
score = cf.churn_risk_score(finding, mismatch)
|
||||
clue = None
|
||||
if score >= 0.5:
|
||||
rationale = cf.build_rationale(finding, mismatch)
|
||||
clue = clue_svc.create_clue(
|
||||
session,
|
||||
title="疑似养卡骗补(脉冲增长+规律退订)",
|
||||
risk_domain="成本",
|
||||
scenario_code="R9",
|
||||
score=score,
|
||||
rationale=rationale,
|
||||
evidence={
|
||||
"cliff_month": finding.cliff_month,
|
||||
"max_drop": finding.max_drop,
|
||||
"commission_paid": commission_paid,
|
||||
"active_ratio": active_ratio,
|
||||
"zero_usage_ratio": zero_usage_ratio,
|
||||
"mismatch": mismatch,
|
||||
},
|
||||
subjects={"channel": channel_key},
|
||||
amount_involved=commission_paid,
|
||||
model_version=MODEL_VERSION,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
return ScanResult("R9", len(retention_curve), clue)
|
||||
@@ -0,0 +1,23 @@
|
||||
"""接入适配器(P1.1):源明细 → 通用本体映射。
|
||||
|
||||
职责:
|
||||
1. 从 staging(源明细)读取原始数据行;
|
||||
2. 按映射规则投影为 Entity / EntityRelationship / MetricEvent;
|
||||
3. 保留源明细不可变(原始证据),本体层为分析基础。
|
||||
|
||||
设计原则:
|
||||
- 每个源明细表对应一个 Adapter 类;
|
||||
- Adapter 实现统一接口 `ingest(session, data_version_id)` → (entities, relationships, events);
|
||||
- 映射逻辑集中于此模块,上层引擎/场景模块只依赖本体。
|
||||
"""
|
||||
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import ADAPTER_REGISTRY, get_adapter, register_adapter
|
||||
|
||||
__all__ = [
|
||||
"BaseAdapter",
|
||||
"IngestResult",
|
||||
"ADAPTER_REGISTRY",
|
||||
"get_adapter",
|
||||
"register_adapter",
|
||||
]
|
||||
@@ -0,0 +1,360 @@
|
||||
"""主数据适配器:将源明细中的主数据表映射到本体 Entity 层。
|
||||
|
||||
覆盖:SrcCustomer / SrcSupplier / SrcEmployee / SrcChannel / SrcMsisdn / SrcAccount
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import (
|
||||
SrcAccount,
|
||||
SrcChannel,
|
||||
SrcCustomer,
|
||||
SrcEmployee,
|
||||
SrcMsisdn,
|
||||
SrcSupplier,
|
||||
)
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class CustomerAdapter(BaseAdapter):
|
||||
"""SrcCustomer → Entity(CUSTOMER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_customer"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcCustomer).filter(
|
||||
SrcCustomer.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcCustomer).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CUSTOMER,
|
||||
business_key=row.customer_key,
|
||||
display_name=row.customer_name,
|
||||
attributes={
|
||||
"customer_type": row.customer_type,
|
||||
"uscc": row.uscc,
|
||||
"open_date": str(row.open_date) if row.open_date else None,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(entity)
|
||||
|
||||
# 注册地址 → Entity(ADDRESS) + 关系 REGISTERED_AT
|
||||
if row.register_address:
|
||||
addr_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ADDRESS,
|
||||
business_key=row.register_address,
|
||||
display_name=row.register_address,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(addr_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 法人 → Entity(LEGAL_PERSON) + 关系 LEGAL_REP_OF
|
||||
if row.legal_person:
|
||||
lp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.LEGAL_PERSON,
|
||||
business_key=row.legal_person,
|
||||
display_name=row.legal_person,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(lp_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class SupplierAdapter(BaseAdapter):
|
||||
"""SrcSupplier → Entity(SUPPLIER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
|
||||
|
||||
source_system = "ERP"
|
||||
staging_table = "src_supplier"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcSupplier).filter(
|
||||
SrcSupplier.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcSupplier).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.SUPPLIER,
|
||||
business_key=row.supplier_key,
|
||||
display_name=row.supplier_name,
|
||||
attributes={
|
||||
"uscc": row.uscc,
|
||||
"shareholder_info": row.shareholder_info,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(entity)
|
||||
|
||||
if row.register_address:
|
||||
addr_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ADDRESS,
|
||||
business_key=row.register_address,
|
||||
display_name=row.register_address,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(addr_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
if row.legal_person:
|
||||
lp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.LEGAL_PERSON,
|
||||
business_key=row.legal_person,
|
||||
display_name=row.legal_person,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(lp_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class EmployeeAdapter(BaseAdapter):
|
||||
"""SrcEmployee → Entity(EMPLOYEE)。"""
|
||||
|
||||
source_system = "ERP"
|
||||
staging_table = "src_employee"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcEmployee).filter(
|
||||
SrcEmployee.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcEmployee).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.EMPLOYEE,
|
||||
business_key=row.employee_key,
|
||||
display_name=row.employee_name,
|
||||
attributes={
|
||||
"position": row.position,
|
||||
"department": row.department,
|
||||
"role_permissions": row.role_permissions,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class ChannelAdapter(BaseAdapter):
|
||||
"""SrcChannel → Entity(CHANNEL)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_channel"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcChannel).filter(
|
||||
SrcChannel.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcChannel).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CHANNEL,
|
||||
business_key=row.channel_key,
|
||||
display_name=row.channel_name,
|
||||
attributes={
|
||||
"commission_policy": row.commission_policy,
|
||||
"region": row.region,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class MsisdnAdapter(BaseAdapter):
|
||||
"""SrcMsisdn → Entity(MSISDN) + 关系(HOLDS_MSISDN)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_msisdn"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcMsisdn).filter(
|
||||
SrcMsisdn.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcMsisdn).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
msisdn_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.msisdn,
|
||||
display_name=row.msisdn,
|
||||
attributes={
|
||||
"region": row.region,
|
||||
"status": row.status,
|
||||
"activate_date": str(row.activate_date) if row.activate_date else None,
|
||||
"deactivate_date": str(row.deactivate_date) if row.deactivate_date else None,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(msisdn_entity)
|
||||
|
||||
# 号码 → 客户持有关系
|
||||
if row.customer_key:
|
||||
cust_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CUSTOMER,
|
||||
business_key=row.customer_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.HOLDS_MSISDN, cust_entity, msisdn_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class AccountAdapter(BaseAdapter):
|
||||
"""SrcAccount → Entity(ACCOUNT) + 关系(OWNS_ACCOUNT)。"""
|
||||
|
||||
source_system = "FIN"
|
||||
staging_table = "src_account"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
rows = session.query(SrcAccount).filter(
|
||||
SrcAccount.data_version_id == data_version_id
|
||||
).limit(batch_size).all() if data_version_id else session.query(SrcAccount).limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
acct_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ACCOUNT,
|
||||
business_key=row.account_key,
|
||||
display_name=row.account_name,
|
||||
attributes={
|
||||
"bank_name": row.bank_name,
|
||||
"branch_name": row.branch_name,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(acct_entity)
|
||||
|
||||
# 账户所属主体关系
|
||||
if row.owner_key and row.owner_type:
|
||||
owner_type_map = {
|
||||
"customer": EntityType.CUSTOMER,
|
||||
"supplier": EntityType.SUPPLIER,
|
||||
"legal_person": EntityType.LEGAL_PERSON,
|
||||
}
|
||||
etype = owner_type_map.get(row.owner_type)
|
||||
if etype:
|
||||
owner_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=etype,
|
||||
business_key=row.owner_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.OWNS_ACCOUNT, owner_entity, acct_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,137 @@
|
||||
"""R10 适配器:收入与成本跨期匹配。
|
||||
|
||||
源明细:SrcRevenueRecognition / SrcCostAmortization
|
||||
映射到:MetricEvent(收入确认/成本摊销时序) + Entity(CONTRACT) 关联补强
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType
|
||||
from app.datahub.staging import SrcCostAmortization, SrcRevenueRecognition
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class RevenueRecognitionAdapter(BaseAdapter):
|
||||
"""SrcRevenueRecognition → MetricEvent(收入确认时序)。"""
|
||||
|
||||
source_system = "FIN"
|
||||
staging_table = "src_revenue_recognition"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcRevenueRecognition)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcRevenueRecognition.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 确保合同实体存在
|
||||
if row.contract_no:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
if row.recognition_date:
|
||||
event_time = dt.datetime.combine(
|
||||
row.recognition_date, dt.time.min, tzinfo=dt.timezone.utc
|
||||
)
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no or row.voucher_no,
|
||||
metric_name="revenue_recognition",
|
||||
metric_value=row.recognition_amount,
|
||||
attributes={
|
||||
"voucher_no": row.voucher_no,
|
||||
"billing_mode": row.billing_mode,
|
||||
"period_start": str(row.period_start) if row.period_start else None,
|
||||
"period_end": str(row.period_end) if row.period_end else None,
|
||||
"prepaid_flag": row.prepaid_flag,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class CostAmortizationAdapter(BaseAdapter):
|
||||
"""SrcCostAmortization → MetricEvent(成本摊销时序)。"""
|
||||
|
||||
source_system = "FIN"
|
||||
staging_table = "src_cost_amortization"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcCostAmortization)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcCostAmortization.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
if row.contract_no:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
if row.amortization_date:
|
||||
event_time = dt.datetime.combine(
|
||||
row.amortization_date, dt.time.min, tzinfo=dt.timezone.utc
|
||||
)
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no or row.voucher_no,
|
||||
metric_name="cost_amortization",
|
||||
metric_value=row.amortization_amount,
|
||||
attributes={
|
||||
"voucher_no": row.voucher_no,
|
||||
"cost_type": row.cost_type,
|
||||
"total_periods": row.total_periods,
|
||||
"current_period": row.current_period,
|
||||
"delivery_date": str(row.delivery_date) if row.delivery_date else None,
|
||||
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,103 @@
|
||||
"""R11 适配器:渠道佣金与代理商套利。
|
||||
|
||||
源明细:SrcTerminalBinding
|
||||
映射到:Entity(IMEI, MSISDN) + 关系(BOUND_DEVICE) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import SrcTerminalBinding
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class TerminalBindingAdapter(BaseAdapter):
|
||||
"""SrcTerminalBinding → Entity(IMEI, MSISDN) + BOUND_DEVICE + MetricEvent。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_terminal_binding"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcTerminalBinding)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcTerminalBinding.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# IMEI 实体
|
||||
imei_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.IMEI,
|
||||
business_key=row.imei,
|
||||
display_name=row.brand_model or row.imei,
|
||||
attributes={
|
||||
"brand_model": row.brand_model,
|
||||
"region": row.region,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(imei_entity)
|
||||
|
||||
# MSISDN 实体
|
||||
msisdn_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.msisdn,
|
||||
display_name=row.msisdn,
|
||||
attributes={"region": row.region},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(msisdn_entity)
|
||||
|
||||
# 绑定关系
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.BOUND_DEVICE, msisdn_entity, imei_entity,
|
||||
attributes={
|
||||
"activate_time": str(row.activate_time) if row.activate_time else None,
|
||||
"subsidy_amount": row.subsidy_amount,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 终端激活/补贴事件
|
||||
if row.activate_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.activate_time,
|
||||
subject_type="imei",
|
||||
subject_key=row.imei,
|
||||
metric_name="terminal_activate",
|
||||
metric_value=row.subsidy_amount + row.commission_amount,
|
||||
attributes={
|
||||
"msisdn": row.msisdn,
|
||||
"subsidy_amount": row.subsidy_amount,
|
||||
"commission_amount": row.commission_amount,
|
||||
"online_days": row.online_days,
|
||||
"post_activate_traffic_mb": row.post_activate_traffic_mb,
|
||||
"cross_province_flag": row.cross_province_flag,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,200 @@
|
||||
"""R12 适配器:网络建设与工程采购。
|
||||
|
||||
源明细:SrcBidding / SrcProjectSignoff
|
||||
映射到:Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR, SUPPLIES) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import SrcBidding, SrcProjectSignoff
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class BiddingAdapter(BaseAdapter):
|
||||
"""SrcBidding → Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR) + MetricEvent。"""
|
||||
|
||||
source_system = "ERP"
|
||||
staging_table = "src_bidding"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcBidding)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcBidding.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 供应商(投标人)实体
|
||||
supplier_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.SUPPLIER,
|
||||
business_key=row.bidder_key,
|
||||
display_name=row.bidder_name,
|
||||
attributes={
|
||||
"legal_person": row.legal_person,
|
||||
"shareholder_info": row.shareholder_info,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(supplier_entity)
|
||||
|
||||
# 工单/项目实体
|
||||
wo_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.WORK_ORDER,
|
||||
business_key=row.project_no,
|
||||
display_name=row.project_name,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(wo_entity)
|
||||
|
||||
# 投标关系
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.BIDS_FOR, supplier_entity, wo_entity,
|
||||
attributes={
|
||||
"bid_amount": row.bid_amount,
|
||||
"win_flag": row.win_flag,
|
||||
"technical_score": row.technical_score,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 中标 → 补充 SUPPLIES 关系
|
||||
if row.win_flag and row.win_flag.upper() == "Y":
|
||||
rel2 = add_relationship(
|
||||
session, RelationshipType.SUPPLIES, supplier_entity, wo_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel2)
|
||||
|
||||
# 法人实体
|
||||
if row.legal_person:
|
||||
lp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.LEGAL_PERSON,
|
||||
business_key=row.legal_person,
|
||||
display_name=row.legal_person,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
add_relationship(
|
||||
session, RelationshipType.LEGAL_REP_OF, lp_entity, supplier_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 投标事件
|
||||
if row.bid_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.bid_time,
|
||||
subject_type="work_order",
|
||||
subject_key=row.project_no,
|
||||
metric_name="bid_submitted",
|
||||
metric_value=row.bid_amount or 0.0,
|
||||
attributes={
|
||||
"bidder_key": row.bidder_key,
|
||||
"bidder_name": row.bidder_name,
|
||||
"win_flag": row.win_flag,
|
||||
"technical_score": row.technical_score,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class ProjectSignoffAdapter(BaseAdapter):
|
||||
"""SrcProjectSignoff → MetricEvent(工程签证/巡检时序)。"""
|
||||
|
||||
source_system = "WO"
|
||||
staging_table = "src_project_signoff"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcProjectSignoff)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcProjectSignoff.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 确保工单实体存在
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.WORK_ORDER,
|
||||
business_key=row.project_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 签证事件
|
||||
if row.signoff_date:
|
||||
event_time = dt.datetime.combine(
|
||||
row.signoff_date, dt.time.min, tzinfo=dt.timezone.utc
|
||||
)
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="work_order",
|
||||
subject_key=row.project_no,
|
||||
metric_name="signoff_quantity",
|
||||
metric_value=row.signoff_quantity or 0.0,
|
||||
attributes={
|
||||
"work_order_no": row.work_order_no,
|
||||
"unit": row.unit,
|
||||
"resource_consumed": row.resource_consumed,
|
||||
"contractor_key": row.contractor_key,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
# 巡检 GPS 事件
|
||||
if row.inspection_time and row.inspection_lat:
|
||||
event2 = MetricEvent(
|
||||
event_time=row.inspection_time,
|
||||
subject_type="work_order",
|
||||
subject_key=row.project_no,
|
||||
metric_name="inspection",
|
||||
metric_value=1.0,
|
||||
attributes={
|
||||
"lat": row.inspection_lat,
|
||||
"lng": row.inspection_lng,
|
||||
"work_order_no": row.work_order_no,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event2)
|
||||
result.metric_events.append(event2)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,147 @@
|
||||
"""R13 适配器:互联互通与网间结算。
|
||||
|
||||
源明细:SrcCdr / SrcInterconnectSettlement
|
||||
映射到:Entity(MSISDN, SETTLEMENT) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType
|
||||
from app.datahub.staging import SrcCdr, SrcInterconnectSettlement
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class CdrAdapter(BaseAdapter):
|
||||
"""SrcCdr → Entity(MSISDN) + MetricEvent(话务时序)。"""
|
||||
|
||||
source_system = "SIGNAL"
|
||||
staging_table = "src_cdr"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcCdr)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcCdr.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 确保主被叫号码实体存在
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.caller,
|
||||
display_name=row.caller,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.callee,
|
||||
display_name=row.callee,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 话务事件
|
||||
event = MetricEvent(
|
||||
event_time=row.start_time,
|
||||
subject_type="msisdn",
|
||||
subject_key=row.caller,
|
||||
metric_name="cdr_duration",
|
||||
metric_value=float(row.duration_sec),
|
||||
attributes={
|
||||
"callee": row.callee,
|
||||
"call_type": row.call_type,
|
||||
"peer_operator": row.peer_operator,
|
||||
"route_info": row.route_info,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class InterconnectSettlementAdapter(BaseAdapter):
|
||||
"""SrcInterconnectSettlement → Entity(SETTLEMENT) + MetricEvent。"""
|
||||
|
||||
source_system = "FIN"
|
||||
staging_table = "src_interconnect_settlement"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcInterconnectSettlement)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcInterconnectSettlement.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 结算单实体
|
||||
settle_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.SETTLEMENT,
|
||||
business_key=row.settlement_no,
|
||||
display_name=f"网间结算-{row.settlement_no}",
|
||||
attributes={
|
||||
"peer_operator": row.peer_operator,
|
||||
"settle_type": row.settle_type,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(settle_entity)
|
||||
|
||||
# 结算时序事件
|
||||
try:
|
||||
event_time = dt.datetime.strptime(
|
||||
row.settle_period, "%Y-%m"
|
||||
).replace(tzinfo=dt.timezone.utc)
|
||||
except ValueError:
|
||||
event_time = dt.datetime.now(dt.timezone.utc)
|
||||
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="settlement",
|
||||
subject_key=row.settlement_no,
|
||||
metric_name="interconnect_settle",
|
||||
metric_value=row.settle_amount,
|
||||
attributes={
|
||||
"peer_operator": row.peer_operator,
|
||||
"settle_type": row.settle_type,
|
||||
"volume": row.volume,
|
||||
"unit_price": row.unit_price,
|
||||
"sms_delivery_rate": row.sms_delivery_rate,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,149 @@
|
||||
"""R14 适配器:云业务 / IDC 与新兴业务。
|
||||
|
||||
源明细:SrcCloudUsage / SrcIdcCabinet
|
||||
映射到:Entity(CONTRACT, CUSTOMER) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType
|
||||
from app.datahub.staging import SrcCloudUsage, SrcIdcCabinet
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class CloudUsageAdapter(BaseAdapter):
|
||||
"""SrcCloudUsage → Entity(CONTRACT) + MetricEvent(云资源用量时序)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_cloud_usage"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcCloudUsage)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcCloudUsage.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 合同实体
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 客户实体(如有)
|
||||
if row.customer_key:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CUSTOMER,
|
||||
business_key=row.customer_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 云资源用量事件
|
||||
if row.usage_date:
|
||||
event_time = dt.datetime.combine(
|
||||
row.usage_date, dt.time.min, tzinfo=dt.timezone.utc
|
||||
)
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no,
|
||||
metric_name="cloud_usage",
|
||||
metric_value=row.actual_usage,
|
||||
attributes={
|
||||
"resource_type": row.resource_type,
|
||||
"contracted_quota": row.contracted_quota,
|
||||
"billed_usage": row.billed_usage,
|
||||
"unit": row.unit,
|
||||
"customer_key": row.customer_key,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class IdcCabinetAdapter(BaseAdapter):
|
||||
"""SrcIdcCabinet → MetricEvent(IDC 机柜出租率/电力时序)。"""
|
||||
|
||||
source_system = "OSS"
|
||||
staging_table = "src_idc_cabinet"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcIdcCabinet)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcIdcCabinet.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 合同实体(如有)
|
||||
if row.contract_no:
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# IDC 出租/电力事件
|
||||
try:
|
||||
event_time = dt.datetime.strptime(
|
||||
row.report_month, "%Y-%m"
|
||||
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
|
||||
except ValueError:
|
||||
event_time = dt.datetime.now(dt.timezone.utc)
|
||||
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no or row.cabinet_id,
|
||||
metric_name="idc_cabinet",
|
||||
metric_value=row.occupancy_rate or 0.0,
|
||||
attributes={
|
||||
"cabinet_id": row.cabinet_id,
|
||||
"customer_key": row.customer_key,
|
||||
"power_kwh": row.power_kwh,
|
||||
"revenue_amount": row.revenue_amount,
|
||||
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,237 @@
|
||||
"""R15 适配器:员工内部舞弊与资源滥用。
|
||||
|
||||
源明细:SrcEmployeeOperation / SrcInternalMsisdn / SrcPointsTransaction
|
||||
映射到:Entity(EMPLOYEE, MSISDN) + 关系(OPERATES) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import (
|
||||
SrcEmployeeOperation,
|
||||
SrcInternalMsisdn,
|
||||
SrcPointsTransaction,
|
||||
)
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class EmployeeOperationAdapter(BaseAdapter):
|
||||
"""SrcEmployeeOperation → Entity(EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_employee_operation"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcEmployeeOperation)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcEmployeeOperation.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 员工实体
|
||||
emp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.EMPLOYEE,
|
||||
business_key=row.employee_key,
|
||||
display_name=row.employee_name,
|
||||
attributes={
|
||||
"position": row.position,
|
||||
"department": row.department,
|
||||
"role_permissions": row.role_permissions,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(emp_entity)
|
||||
|
||||
# 操作目标 → OPERATES 关系(如操作对象是号码或账户)
|
||||
if row.operation_target:
|
||||
# 尝试识别操作目标类型(简单启发式:以1开头长度11为号码,否则为账户)
|
||||
target_key = row.operation_target.strip()
|
||||
if target_key.isdigit() and len(target_key) == 11:
|
||||
target_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=target_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.OPERATES, emp_entity, target_entity,
|
||||
attributes={"operation_type": row.operation_type},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 操作日志事件
|
||||
if row.operation_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.operation_time,
|
||||
subject_type="employee",
|
||||
subject_key=row.employee_key,
|
||||
metric_name="operation_log",
|
||||
metric_value=1.0,
|
||||
attributes={
|
||||
"operation_type": row.operation_type,
|
||||
"operation_target": row.operation_target,
|
||||
"position": row.position,
|
||||
"department": row.department,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class InternalMsisdnAdapter(BaseAdapter):
|
||||
"""SrcInternalMsisdn → Entity(MSISDN, EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_internal_msisdn"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcInternalMsisdn)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcInternalMsisdn.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 内部号码实体
|
||||
msisdn_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.msisdn,
|
||||
display_name=row.msisdn,
|
||||
attributes={"purpose": row.purpose, "internal": True},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(msisdn_entity)
|
||||
|
||||
# 分配员工 → OPERATES 关系
|
||||
if row.assigned_employee:
|
||||
emp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.EMPLOYEE,
|
||||
business_key=row.assigned_employee,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.OPERATES, emp_entity, msisdn_entity,
|
||||
attributes={"purpose": row.purpose},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 内部号用量事件
|
||||
import datetime as dt
|
||||
|
||||
try:
|
||||
event_time = dt.datetime.strptime(
|
||||
row.report_month, "%Y-%m"
|
||||
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
|
||||
except ValueError:
|
||||
event_time = dt.datetime.now(dt.timezone.utc)
|
||||
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="msisdn",
|
||||
subject_key=row.msisdn,
|
||||
metric_name="internal_usage",
|
||||
metric_value=row.traffic_mb,
|
||||
attributes={
|
||||
"voice_min": row.voice_min,
|
||||
"revenue_attributed": row.revenue_attributed,
|
||||
"assigned_employee": row.assigned_employee,
|
||||
"purpose": row.purpose,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class PointsTransactionAdapter(BaseAdapter):
|
||||
"""SrcPointsTransaction → MetricEvent(积分发放/兑换时序)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_points_transaction"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcPointsTransaction)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcPointsTransaction.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 确保操作人实体存在
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.EMPLOYEE,
|
||||
business_key=row.operator_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# 积分事件
|
||||
if row.transaction_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.transaction_time,
|
||||
subject_type="employee",
|
||||
subject_key=row.operator_key,
|
||||
metric_name="points_transaction",
|
||||
metric_value=row.points_amount,
|
||||
attributes={
|
||||
"transaction_no": row.transaction_no,
|
||||
"target_account": row.target_account,
|
||||
"transaction_type": row.transaction_type,
|
||||
"cash_value": row.cash_value,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,236 @@
|
||||
"""R8 适配器:政企收入全链路穿透 / 拆单规避。
|
||||
|
||||
源明细:SrcContract / SrcContractApproval / SrcPayment
|
||||
映射到:Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系 + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import SrcContract, SrcContractApproval, SrcPayment
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class ContractAdapter(BaseAdapter):
|
||||
"""SrcContract → Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系。"""
|
||||
|
||||
source_system = "CONTRACT"
|
||||
staging_table = "src_contract"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcContract)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcContract.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 合同实体
|
||||
contract_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
display_name=f"合同-{row.contract_no}",
|
||||
attributes={
|
||||
"amount": row.amount,
|
||||
"sign_date": str(row.sign_date) if row.sign_date else None,
|
||||
"approval_threshold": row.approval_threshold,
|
||||
"approval_level": row.approval_level,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(contract_entity)
|
||||
|
||||
# 客户实体 + 签约关系
|
||||
cust_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CUSTOMER,
|
||||
business_key=row.customer_key,
|
||||
display_name=row.customer_name,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(cust_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.SIGNED, cust_entity, contract_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 回款账户 → Entity(ACCOUNT) + 关系 PAID_BY
|
||||
if row.pay_account:
|
||||
acct_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ACCOUNT,
|
||||
business_key=row.pay_account,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(acct_entity)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 注册地址
|
||||
if row.register_address:
|
||||
addr_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ADDRESS,
|
||||
business_key=row.register_address,
|
||||
display_name=row.register_address,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.REGISTERED_AT, cust_entity, addr_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 法人
|
||||
if row.legal_person:
|
||||
lp_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.LEGAL_PERSON,
|
||||
business_key=row.legal_person,
|
||||
display_name=row.legal_person,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.LEGAL_REP_OF, lp_entity, cust_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class ContractApprovalAdapter(BaseAdapter):
|
||||
"""SrcContractApproval → MetricEvent(审批时序事件)。"""
|
||||
|
||||
source_system = "CONTRACT"
|
||||
staging_table = "src_contract_approval"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcContractApproval)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcContractApproval.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
if row.approval_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.approval_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no,
|
||||
metric_name="approval_step",
|
||||
metric_value=float(row.approval_step),
|
||||
attributes={
|
||||
"approver": row.approver,
|
||||
"result": row.approval_result,
|
||||
"remark": row.remark,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class PaymentAdapter(BaseAdapter):
|
||||
"""SrcPayment → MetricEvent(回款时序事件) + 关系补强。"""
|
||||
|
||||
source_system = "FIN"
|
||||
staging_table = "src_payment"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcPayment)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcPayment.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
if row.pay_date:
|
||||
import datetime as dt
|
||||
|
||||
event_time = dt.datetime.combine(
|
||||
row.pay_date, dt.time.min, tzinfo=dt.timezone.utc
|
||||
)
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="contract",
|
||||
subject_key=row.contract_no,
|
||||
metric_name="payment",
|
||||
metric_value=row.pay_amount,
|
||||
attributes={
|
||||
"pay_account": row.pay_account,
|
||||
"pay_type": row.pay_type,
|
||||
"overdue_flag": row.overdue_flag,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
# 强化合同→账户关系
|
||||
if row.pay_account:
|
||||
contract_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.contract_no,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
acct_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.ACCOUNT,
|
||||
business_key=row.pay_account,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,184 @@
|
||||
"""R9 适配器:市场业务真实性 / 养卡骗补。
|
||||
|
||||
源明细:SrcChannelMonthly / SrcSubscription
|
||||
映射到:Entity(CHANNEL, MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.models import MetricEvent
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.datahub.staging import SrcChannelMonthly, SrcSubscription
|
||||
from app.ingest.base import BaseAdapter, IngestResult
|
||||
from app.ingest.registry import register_adapter
|
||||
|
||||
|
||||
@register_adapter
|
||||
class ChannelMonthlyAdapter(BaseAdapter):
|
||||
"""SrcChannelMonthly → MetricEvent(渠道月度留存/佣金时序)。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_channel_monthly"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcChannelMonthly)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcChannelMonthly.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# 确保渠道实体存在
|
||||
upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CHANNEL,
|
||||
business_key=row.channel_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
|
||||
# cohort_label 如 "2025-01" → 转为时间
|
||||
try:
|
||||
event_time = dt.datetime.strptime(
|
||||
row.cohort_label, "%Y-%m"
|
||||
).replace(tzinfo=dt.timezone.utc)
|
||||
except ValueError:
|
||||
event_time = dt.datetime.now(dt.timezone.utc)
|
||||
|
||||
# 留存率事件
|
||||
event = MetricEvent(
|
||||
event_time=event_time,
|
||||
subject_type="channel",
|
||||
subject_key=row.channel_key,
|
||||
metric_name="retention",
|
||||
metric_value=row.retained / row.cohort_size if row.cohort_size > 0 else 0.0,
|
||||
attributes={
|
||||
"cohort_label": row.cohort_label,
|
||||
"month_index": row.month_index,
|
||||
"cohort_size": row.cohort_size,
|
||||
"retained": row.retained,
|
||||
"commission_paid": row.commission_paid,
|
||||
"active_ratio": row.active_ratio,
|
||||
"zero_usage_ratio": row.zero_usage_ratio,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@register_adapter
|
||||
class SubscriptionAdapter(BaseAdapter):
|
||||
"""SrcSubscription → Entity(MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent。"""
|
||||
|
||||
source_system = "BSS"
|
||||
staging_table = "src_subscription"
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
result = IngestResult()
|
||||
query = session.query(SrcSubscription)
|
||||
if data_version_id:
|
||||
query = query.filter(SrcSubscription.data_version_id == data_version_id)
|
||||
rows = query.limit(batch_size).all()
|
||||
|
||||
for row in rows:
|
||||
try:
|
||||
# MSISDN 实体
|
||||
msisdn_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.MSISDN,
|
||||
business_key=row.msisdn,
|
||||
display_name=row.msisdn,
|
||||
attributes={"region": row.region},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.entities.append(msisdn_entity)
|
||||
|
||||
# 渠道归属关系
|
||||
if row.channel_key:
|
||||
chan_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CHANNEL,
|
||||
business_key=row.channel_key,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.BELONGS_TO_CHANNEL, msisdn_entity, chan_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 订购关系(号码→合同/产品)
|
||||
if row.product_code:
|
||||
contract_entity = upsert_entity(
|
||||
session,
|
||||
entity_type=EntityType.CONTRACT,
|
||||
business_key=row.product_code,
|
||||
display_name=f"产品-{row.product_code}",
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
rel = add_relationship(
|
||||
session, RelationshipType.SUBSCRIBES, msisdn_entity, contract_entity,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
result.relationships.append(rel)
|
||||
|
||||
# 订购/退订时序事件
|
||||
if row.subscribe_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.subscribe_time,
|
||||
subject_type="msisdn",
|
||||
subject_key=row.msisdn,
|
||||
metric_name="subscribe",
|
||||
metric_value=1.0,
|
||||
attributes={
|
||||
"channel_key": row.channel_key,
|
||||
"product_code": row.product_code,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
if row.unsubscribe_time:
|
||||
event = MetricEvent(
|
||||
event_time=row.unsubscribe_time,
|
||||
subject_type="msisdn",
|
||||
subject_key=row.msisdn,
|
||||
metric_name="unsubscribe",
|
||||
metric_value=-1.0,
|
||||
attributes={
|
||||
"channel_key": row.channel_key,
|
||||
"product_code": row.product_code,
|
||||
},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(event)
|
||||
result.metric_events.append(event)
|
||||
|
||||
result.row_count += 1
|
||||
except Exception:
|
||||
result.error_count += 1
|
||||
|
||||
return result
|
||||
@@ -0,0 +1,53 @@
|
||||
"""接入适配器基类与通用数据结构。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.models import Entity, EntityRelationship, MetricEvent
|
||||
|
||||
|
||||
@dataclass
|
||||
class IngestResult:
|
||||
"""单次适配器执行的输出汇总。"""
|
||||
|
||||
entities: list[Entity] = field(default_factory=list)
|
||||
relationships: list[EntityRelationship] = field(default_factory=list)
|
||||
metric_events: list[MetricEvent] = field(default_factory=list)
|
||||
row_count: int = 0
|
||||
error_count: int = 0
|
||||
|
||||
|
||||
class BaseAdapter(ABC):
|
||||
"""接入适配器抽象基类。
|
||||
|
||||
每个源明细表实现一个子类,负责将 staging 行映射到本体层。
|
||||
"""
|
||||
|
||||
# 子类须指定所适配的源系统标识(如 "BSS", "ERP")
|
||||
source_system: str = ""
|
||||
# 子类须指定所适配的 staging 表名
|
||||
staging_table: str = ""
|
||||
|
||||
@abstractmethod
|
||||
def ingest(
|
||||
self,
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
"""从 staging 表读取未处理行,映射写入本体层。
|
||||
|
||||
Args:
|
||||
session: 数据库会话
|
||||
data_version_id: 当前批次的数据版本 ID
|
||||
batch_size: 每批处理行数
|
||||
|
||||
Returns:
|
||||
IngestResult 汇总
|
||||
"""
|
||||
...
|
||||
@@ -0,0 +1,22 @@
|
||||
"""适配器注册表:按 staging 表名索引,便于调度器统一调用。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Type
|
||||
|
||||
from app.ingest.base import BaseAdapter
|
||||
|
||||
# 全局注册表:staging_table -> Adapter 类
|
||||
ADAPTER_REGISTRY: dict[str, Type[BaseAdapter]] = {}
|
||||
|
||||
|
||||
def register_adapter(cls: Type[BaseAdapter]) -> Type[BaseAdapter]:
|
||||
"""类装饰器:将 Adapter 注册到全局表。"""
|
||||
if cls.staging_table:
|
||||
ADAPTER_REGISTRY[cls.staging_table] = cls
|
||||
return cls
|
||||
|
||||
|
||||
def get_adapter(staging_table: str) -> Type[BaseAdapter] | None:
|
||||
"""按 staging 表名查找已注册的适配器类。"""
|
||||
return ADAPTER_REGISTRY.get(staging_table)
|
||||
@@ -0,0 +1,89 @@
|
||||
"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。
|
||||
|
||||
用法:
|
||||
from app.ingest.runner import run_all_adapters
|
||||
results = run_all_adapters(session, data_version_id)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.ingest.base import IngestResult
|
||||
from app.ingest.registry import ADAPTER_REGISTRY
|
||||
|
||||
# 确保所有适配器模块被导入,触发 @register_adapter 注册
|
||||
import app.ingest.adapters_master # noqa: F401
|
||||
import app.ingest.adapters_r8 # noqa: F401
|
||||
import app.ingest.adapters_r9 # noqa: F401
|
||||
import app.ingest.adapters_r10 # noqa: F401
|
||||
import app.ingest.adapters_r11 # noqa: F401
|
||||
import app.ingest.adapters_r12 # noqa: F401
|
||||
import app.ingest.adapters_r13 # noqa: F401
|
||||
import app.ingest.adapters_r14 # noqa: F401
|
||||
import app.ingest.adapters_r15 # noqa: F401
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_all_adapters(
|
||||
session: Session,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
tables: list[str] | None = None,
|
||||
) -> dict[str, IngestResult]:
|
||||
"""执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。
|
||||
|
||||
Args:
|
||||
session: 数据库会话(调用方负责 commit/rollback)
|
||||
data_version_id: 当前批次数据版本 ID
|
||||
batch_size: 每个适配器单次处理行数上限
|
||||
tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部
|
||||
|
||||
Returns:
|
||||
各适配器的执行结果字典
|
||||
"""
|
||||
results: dict[str, IngestResult] = {}
|
||||
|
||||
target_adapters = ADAPTER_REGISTRY
|
||||
if tables:
|
||||
target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables}
|
||||
|
||||
for table_name, adapter_cls in target_adapters.items():
|
||||
logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name)
|
||||
adapter = adapter_cls()
|
||||
try:
|
||||
result = adapter.ingest(
|
||||
session, data_version_id=data_version_id, batch_size=batch_size
|
||||
)
|
||||
results[table_name] = result
|
||||
logger.info(
|
||||
" → rows=%d, entities=%d, rels=%d, events=%d, errors=%d",
|
||||
result.row_count,
|
||||
len(result.entities),
|
||||
len(result.relationships),
|
||||
len(result.metric_events),
|
||||
result.error_count,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.error("Adapter %s failed: %s", table_name, exc)
|
||||
results[table_name] = IngestResult(error_count=1)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def run_adapter(
|
||||
session: Session,
|
||||
staging_table: str,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
batch_size: int = 1000,
|
||||
) -> IngestResult:
|
||||
"""执行单个指定 staging 表的适配器。"""
|
||||
adapter_cls = ADAPTER_REGISTRY.get(staging_table)
|
||||
if adapter_cls is None:
|
||||
raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器")
|
||||
adapter = adapter_cls()
|
||||
return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)
|
||||
@@ -4,7 +4,7 @@ from __future__ import annotations
|
||||
|
||||
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
|
||||
from app.llm.base import LLMProvider
|
||||
from app.llm.providers import DashScopeProvider, VllmProvider
|
||||
from app.llm.providers import DashScopeProvider, MockProvider, VllmProvider
|
||||
|
||||
|
||||
class EgressPolicyError(RuntimeError):
|
||||
@@ -27,5 +27,7 @@ def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
|
||||
)
|
||||
if settings.llm_provider == LLMProviderName.vllm:
|
||||
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
|
||||
if settings.llm_provider == LLMProviderName.mock:
|
||||
return MockProvider()
|
||||
|
||||
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
|
||||
|
||||
@@ -78,3 +78,31 @@ class VllmProvider(LLMProvider):
|
||||
return resp.status_code == 200
|
||||
except httpx.HTTPError:
|
||||
return False
|
||||
|
||||
|
||||
class MockProvider(LLMProvider):
|
||||
"""本地确定性 Mock Provider:开发/测试用,不出域、不依赖外网。
|
||||
|
||||
返回可预测的回显内容,便于在无 API Key / 无 GPU 时打通链路与自动化测试。
|
||||
"""
|
||||
|
||||
name = "mock"
|
||||
egress = False
|
||||
|
||||
def __init__(self, model: str = "mock-llm") -> None:
|
||||
self._model = model
|
||||
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
last_user = next(
|
||||
(m.content for m in reversed(messages) if m.role == "user"), ""
|
||||
)
|
||||
return LLMResponse(
|
||||
content=f"[mock] 收到查询:{last_user}",
|
||||
model=self._model,
|
||||
provider=self.name,
|
||||
egress=False,
|
||||
raw={"echo": last_user},
|
||||
)
|
||||
|
||||
def health(self) -> bool:
|
||||
return True
|
||||
|
||||
@@ -7,7 +7,9 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app import __version__
|
||||
from app.api.clues import router as clues_router
|
||||
from app.api.datahub import router as datahub_router
|
||||
from app.api.nlq import router as nlq_router
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
@@ -26,6 +28,8 @@ app = FastAPI(
|
||||
)
|
||||
|
||||
app.include_router(datahub_router)
|
||||
app.include_router(clues_router)
|
||||
app.include_router(nlq_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
"""自然语言查询(NLQ):审计员零门槛用自然语言查数/获取线索(R4/R20)。"""
|
||||
@@ -0,0 +1,106 @@
|
||||
"""自然语言查询服务。
|
||||
|
||||
采用"结构化意图优先 + LLM 兜底"策略:
|
||||
- 若问题命中线索检索意图(置信度/场景/状态/列出线索等),直接查审计数据库返回真实结果,
|
||||
实现"数据找人",不依赖外部模型,数据不出域。
|
||||
- 其余开放性问题再交给 LLMProvider(本地优先)。
|
||||
对应 R4 / R20 / R7。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.clues import service as clue_svc
|
||||
from app.clues.models import ClueStatus, ConfidenceTier
|
||||
from app.llm import ChatMessage, get_llm_provider
|
||||
|
||||
SYSTEM_PROMPT = (
|
||||
"你是电信运营商内部审计助手。基于审计数据中台的数据回答问题,"
|
||||
"给出可解释的依据;无证据支撑时明确说明,不臆造数据。"
|
||||
)
|
||||
|
||||
# 关键词 → 过滤条件映射
|
||||
_CONFIDENCE_KW = {"高置信": ConfidenceTier.HIGH, "高风险": ConfidenceTier.HIGH,
|
||||
"中置信": ConfidenceTier.MEDIUM, "低置信": ConfidenceTier.LOW}
|
||||
_SCENARIO_KW = {"拆单": "R8", "政企": "R8", "养卡": "R9", "骗补": "R9", "彩铃": "R9"}
|
||||
_STATUS_KW = {"待处理": ClueStatus.NEW, "已分派": ClueStatus.ASSIGNED,
|
||||
"研判": ClueStatus.REVIEWING, "属实": ClueStatus.CONFIRMED,
|
||||
"误报": ClueStatus.DISMISSED, "已销项": ClueStatus.CLOSED}
|
||||
_LIST_KW = ("线索", "列出", "查", "有哪些", "多少", "列表", "看看", "显示")
|
||||
|
||||
_SCENARIO_NAME = {"R8": "政企拆单", "R9": "养卡骗补"}
|
||||
_CONF_NAME = {ConfidenceTier.HIGH: "高置信", ConfidenceTier.MEDIUM: "中置信",
|
||||
ConfidenceTier.LOW: "低置信"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class NLQAnswer:
|
||||
question: str
|
||||
answer: str
|
||||
provider: str
|
||||
model: str
|
||||
egress: bool
|
||||
|
||||
|
||||
def _match_first(question: str, mapping: dict):
|
||||
for kw, val in mapping.items():
|
||||
if kw in question:
|
||||
return val
|
||||
return None
|
||||
|
||||
|
||||
def _is_clue_query(question: str) -> bool:
|
||||
return any(kw in question for kw in _LIST_KW) or any(
|
||||
kw in question for kw in {**_CONFIDENCE_KW, **_SCENARIO_KW, **_STATUS_KW}
|
||||
)
|
||||
|
||||
|
||||
def _format_clue_answer(question: str, clues: list) -> str:
|
||||
if not clues:
|
||||
return "未检索到符合条件的线索。可调整筛选条件,或先运行扫描生成线索。"
|
||||
lines = [f"共检索到 {len(clues)} 条线索:"]
|
||||
for i, c in enumerate(clues, 1):
|
||||
amount = f",涉及金额约 {c.amount_involved/10000:.1f} 万元" if c.amount_involved else ""
|
||||
lines.append(
|
||||
f"{i}. [{_SCENARIO_NAME.get(c.scenario_code, c.scenario_code)}] {c.title}"
|
||||
f"({_CONF_NAME.get(c.confidence, c.confidence.value)},评分 {c.score:.2f}{amount})"
|
||||
f"——{c.rationale}"
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def ask(question: str, session: Session | None = None) -> NLQAnswer:
|
||||
"""处理一次自然语言查询:优先结构化检索,其余交给 LLM。"""
|
||||
# 结构化意图:检索线索(数据找人,不出域)
|
||||
if session is not None and _is_clue_query(question):
|
||||
confidence = _match_first(question, _CONFIDENCE_KW)
|
||||
scenario = _match_first(question, _SCENARIO_KW)
|
||||
status = _match_first(question, _STATUS_KW)
|
||||
clues = clue_svc.list_clues(
|
||||
session, status=status, scenario_code=scenario, confidence=confidence
|
||||
)
|
||||
return NLQAnswer(
|
||||
question=question,
|
||||
answer=_format_clue_answer(question, clues),
|
||||
provider="datahub",
|
||||
model="结构化检索",
|
||||
egress=False,
|
||||
)
|
||||
|
||||
# 开放性问题:交给 LLM(本地优先)
|
||||
provider = get_llm_provider()
|
||||
messages = [
|
||||
ChatMessage(role="system", content=SYSTEM_PROMPT),
|
||||
ChatMessage(role="user", content=question),
|
||||
]
|
||||
resp = provider.chat(messages)
|
||||
return NLQAnswer(
|
||||
question=question,
|
||||
answer=resp.content,
|
||||
provider=resp.provider,
|
||||
model=resp.model,
|
||||
egress=resp.egress,
|
||||
)
|
||||
@@ -0,0 +1 @@
|
||||
"""审计场景检测器:将业务数据中的异常模式转化为线索。"""
|
||||
@@ -0,0 +1,85 @@
|
||||
"""场景二 · 市场业务真实性:养卡骗补检测(R9)。
|
||||
|
||||
检测"脉冲式增长 + 规律性衰减"的周期性造假:渠道每月新增大量用户订购,
|
||||
固定周期后这些用户集中退订(骗补后弃养)。结合佣金与业务质量匹配度。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class CohortPoint:
|
||||
"""某新增批次(cohort)在第 N 个月的留存率。"""
|
||||
|
||||
month_index: int
|
||||
retention: float # 0-1
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChurnFinding:
|
||||
cliff_month: int | None
|
||||
max_drop: float
|
||||
pulse_then_decay: bool
|
||||
|
||||
|
||||
def detect_pulse_decay(
|
||||
retention_curve: list[CohortPoint],
|
||||
cliff_drop: float = 0.5,
|
||||
) -> ChurnFinding:
|
||||
"""识别留存曲线中的"断崖式集中退订"。
|
||||
|
||||
若某月留存相对上月骤降超过 cliff_drop(默认 50%),判为规律性衰减。
|
||||
"""
|
||||
ordered = sorted(retention_curve, key=lambda p: p.month_index)
|
||||
max_drop = 0.0
|
||||
cliff_month: int | None = None
|
||||
for prev, cur in zip(ordered, ordered[1:], strict=False):
|
||||
drop = prev.retention - cur.retention
|
||||
if drop > max_drop:
|
||||
max_drop = drop
|
||||
if drop >= cliff_drop:
|
||||
cliff_month = cur.month_index
|
||||
return ChurnFinding(
|
||||
cliff_month=cliff_month,
|
||||
max_drop=round(max_drop, 3),
|
||||
pulse_then_decay=cliff_month is not None,
|
||||
)
|
||||
|
||||
|
||||
def commission_quality_mismatch(
|
||||
commission_paid: float,
|
||||
active_ratio: float,
|
||||
zero_usage_ratio: float,
|
||||
) -> float:
|
||||
"""佣金与业务质量不匹配度(0-1)。
|
||||
|
||||
active_ratio:仍活跃用户占比;zero_usage_ratio:零通话/零流量用户占比。
|
||||
佣金已发但活跃低、零使用高 → 不匹配度高。
|
||||
"""
|
||||
if commission_paid <= 0:
|
||||
return 0.0
|
||||
mismatch = 0.6 * zero_usage_ratio + 0.4 * (1 - active_ratio)
|
||||
return round(min(max(mismatch, 0.0), 1.0), 3)
|
||||
|
||||
|
||||
def churn_risk_score(finding: ChurnFinding, mismatch: float) -> float:
|
||||
"""综合评分:断崖退订 + 佣金质量不匹配。"""
|
||||
if not finding.pulse_then_decay:
|
||||
return round(0.3 * mismatch, 3)
|
||||
base = 0.4 + 0.4 * finding.max_drop + 0.2 * mismatch
|
||||
return round(min(base, 1.0), 3)
|
||||
|
||||
|
||||
def build_rationale(finding: ChurnFinding, mismatch: float) -> str:
|
||||
if finding.pulse_then_decay:
|
||||
return (
|
||||
f"渠道新增用户在第 {finding.cliff_month} 个月出现断崖式集中退订"
|
||||
f"(最大单月留存骤降 {finding.max_drop:.0%}),呈"
|
||||
f"'脉冲式增长 + 规律性衰减'特征;佣金与业务质量不匹配度 {mismatch:.0%},"
|
||||
f"高度疑似养卡骗补(骗补后弃养)。"
|
||||
)
|
||||
return (
|
||||
f"未见明显断崖退订,但佣金与业务质量不匹配度为 {mismatch:.0%},建议关注。"
|
||||
)
|
||||
@@ -0,0 +1,78 @@
|
||||
"""场景一 · 政企收入全链路穿透:拆单规避检测(R8)。
|
||||
|
||||
检测点:
|
||||
1. 合同金额集中分布在审批阈值边缘(如阈值 80% 以上但不超阈值)。
|
||||
2. 结合知识图谱穿透识别隐性实控人(多个客户经法人关联到同一实控人)。
|
||||
满足上述模式则生成线索,附证据链与人话理由。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContractRecord:
|
||||
"""穿透分析输入:一份合同的关键信息。"""
|
||||
|
||||
contract_id: str
|
||||
customer_key: str
|
||||
amount: float
|
||||
|
||||
|
||||
@dataclass
|
||||
class SplitFinding:
|
||||
"""拆单检测结果。"""
|
||||
|
||||
near_threshold: list[ContractRecord] = field(default_factory=list)
|
||||
ratio: float = 0.0
|
||||
total_amount: float = 0.0
|
||||
|
||||
@property
|
||||
def hit(self) -> bool:
|
||||
return len(self.near_threshold) >= 3
|
||||
|
||||
|
||||
def detect_threshold_edge(
|
||||
contracts: list[ContractRecord],
|
||||
approval_threshold: float,
|
||||
edge_ratio: float = 0.8,
|
||||
) -> SplitFinding:
|
||||
"""识别金额集中在审批阈值边缘 [edge_ratio*阈值, 阈值) 的合同。
|
||||
|
||||
这类"刚好低于阈值"的批量合同是典型的拆单规避特征。
|
||||
"""
|
||||
if approval_threshold <= 0:
|
||||
raise ValueError("审批阈值必须为正数")
|
||||
lower = edge_ratio * approval_threshold
|
||||
near = [c for c in contracts if lower <= c.amount < approval_threshold]
|
||||
finding = SplitFinding(
|
||||
near_threshold=near,
|
||||
ratio=(len(near) / len(contracts)) if contracts else 0.0,
|
||||
total_amount=sum(c.amount for c in near),
|
||||
)
|
||||
return finding
|
||||
|
||||
|
||||
def split_risk_score(finding: SplitFinding, shared_controller: bool) -> float:
|
||||
"""综合评分:阈值边缘集中度 + 是否穿透到同一实控人。"""
|
||||
if not finding.hit:
|
||||
return 0.0
|
||||
base = min(0.6, 0.1 * len(finding.near_threshold)) # 数量越多越可疑
|
||||
base += 0.2 * finding.ratio
|
||||
if shared_controller:
|
||||
base += 0.3 # 同一实控人是强证据
|
||||
return round(min(base, 1.0), 3)
|
||||
|
||||
|
||||
def build_rationale(finding: SplitFinding, threshold: float, shared_controller: bool) -> str:
|
||||
parts = [
|
||||
f"检测到 {len(finding.near_threshold)} 份合同金额集中在审批阈值 "
|
||||
f"{threshold:.0f} 的边缘区间(占比 {finding.ratio:.0%}),",
|
||||
f"边缘合同金额合计约 {finding.total_amount:.0f}。",
|
||||
]
|
||||
if shared_controller:
|
||||
parts.append("且经工商关联穿透,相关客户疑似同属一个隐性实控人,高度符合拆单规避特征。")
|
||||
else:
|
||||
parts.append("建议进一步穿透客户关联关系以确认是否同一实控人。")
|
||||
return "".join(parts)
|
||||
@@ -16,6 +16,7 @@ from app.config import get_settings
|
||||
|
||||
# 导入模型以注册到 Base.metadata
|
||||
from app.datahub import models # noqa: F401,E402
|
||||
from app.datahub import staging # noqa: F401,E402
|
||||
from app.db import Base
|
||||
|
||||
config = context.config
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
"""源明细落地层:src_contract / src_channel_monthly
|
||||
|
||||
Revision ID: 0003_staging
|
||||
Revises: 0002_clues_audit
|
||||
Create Date: 2026-06
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0003_staging"
|
||||
down_revision: Union[str, None] = "0002_clues_audit"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.create_table(
|
||||
"src_contract",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("contract_no", sa.String(64), nullable=False),
|
||||
sa.Column("customer_key", sa.String(64), nullable=False),
|
||||
sa.Column("customer_name", sa.String(256), nullable=True),
|
||||
sa.Column("amount", sa.Float(), nullable=False),
|
||||
sa.Column("sign_date", sa.Date(), nullable=True),
|
||||
sa.Column("approval_threshold", sa.Float(), nullable=True),
|
||||
sa.Column("approval_level", sa.String(32), nullable=True),
|
||||
sa.Column("legal_person", sa.String(128), nullable=True),
|
||||
sa.Column("register_address", sa.String(256), nullable=True),
|
||||
sa.Column("pay_account", sa.String(64), nullable=True),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
|
||||
)
|
||||
op.create_index("ix_src_contract_customer", "src_contract", ["customer_key"])
|
||||
|
||||
op.create_table(
|
||||
"src_channel_monthly",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("channel_key", sa.String(64), nullable=False),
|
||||
sa.Column("cohort_label", sa.String(32), nullable=False),
|
||||
sa.Column("month_index", sa.Integer(), nullable=False),
|
||||
sa.Column("cohort_size", sa.Integer(), nullable=False, server_default="0"),
|
||||
sa.Column("retained", sa.Integer(), nullable=False, server_default="0"),
|
||||
sa.Column("commission_paid", sa.Float(), nullable=False, server_default="0"),
|
||||
sa.Column("active_ratio", sa.Float(), nullable=False, server_default="0"),
|
||||
sa.Column("zero_usage_ratio", sa.Float(), nullable=False, server_default="0"),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
|
||||
)
|
||||
op.create_index("ix_src_channel_key", "src_channel_monthly", ["channel_key"])
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("src_channel_monthly")
|
||||
op.drop_table("src_contract")
|
||||
@@ -0,0 +1,53 @@
|
||||
"""生成演示数据:跑两个场景扫描,落库若干线索,供前端看板演示。
|
||||
|
||||
用法:python -m scripts.seed_demo
|
||||
仅用于本地演示,使用脱敏/虚构数据,不涉及真实业务数据。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.db import get_sessionmaker
|
||||
from app.engines import scan
|
||||
from app.scenarios.churn_fraud import CohortPoint
|
||||
from app.scenarios.split_contract import ContractRecord
|
||||
|
||||
|
||||
def main() -> None:
|
||||
sm = get_sessionmaker()
|
||||
with sm() as session:
|
||||
# 场景一:8 个客户拆单 + 同一实控人
|
||||
contracts = [
|
||||
ContractRecord(f"HT-{i}", f"政企客户{i}", 790000 + i * 25000) for i in range(8)
|
||||
]
|
||||
r1 = scan.run_split_contract_scan(
|
||||
session, contracts, approval_threshold=1_000_000, shared_controller=True
|
||||
)
|
||||
|
||||
# 场景二:养卡骗补,第 3 月断崖退订
|
||||
curve = [
|
||||
CohortPoint(0, 1.0), CohortPoint(1, 0.96),
|
||||
CohortPoint(2, 0.92), CohortPoint(3, 0.08),
|
||||
]
|
||||
r2 = scan.run_churn_scan(
|
||||
session, retention_curve=curve, commission_paid=360000,
|
||||
active_ratio=0.04, zero_usage_ratio=0.93, channel_key="渠道-华南-001",
|
||||
)
|
||||
|
||||
# 再来一条中置信
|
||||
curve2 = [CohortPoint(0, 1.0), CohortPoint(1, 0.7), CohortPoint(2, 0.55)]
|
||||
r3 = scan.run_churn_scan(
|
||||
session, retention_curve=curve2, commission_paid=80000,
|
||||
active_ratio=0.4, zero_usage_ratio=0.5, channel_key="渠道-西南-007",
|
||||
)
|
||||
|
||||
session.commit()
|
||||
for r in (r1, r2, r3):
|
||||
if r.clue:
|
||||
print(f"已生成线索 [{r.scenario_code}] {r.clue.title} "
|
||||
f"置信={r.clue.confidence.value} 评分={r.clue.score}")
|
||||
else:
|
||||
print(f"[{r.scenario_code}] 未命中阈值,无线索")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -43,7 +43,9 @@ def test_clue_full_lifecycle(session):
|
||||
assert clue.status == ClueStatus.ASSIGNED
|
||||
assert clue.assignee == "auditor_zhang"
|
||||
|
||||
paper = clue_svc.adjudicate(session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交")
|
||||
paper = clue_svc.adjudicate(
|
||||
session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交"
|
||||
)
|
||||
assert clue.status == ClueStatus.CONFIRMED
|
||||
assert clue.feedback == "confirmed"
|
||||
assert paper.conclusion == "confirmed"
|
||||
|
||||
@@ -0,0 +1,86 @@
|
||||
"""线索/NLQ/看板 API 集成测试(需 PostgreSQL)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.db import get_session
|
||||
from app.engines import scan
|
||||
from app.main import app
|
||||
from app.scenarios.split_contract import ContractRecord
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client(session):
|
||||
app.dependency_overrides[get_session] = lambda: session
|
||||
try:
|
||||
yield TestClient(app)
|
||||
finally:
|
||||
app.dependency_overrides.pop(get_session, None)
|
||||
|
||||
|
||||
def _seed_clue(session):
|
||||
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
|
||||
return scan.run_split_contract_scan(
|
||||
session, contracts, approval_threshold=1_000_000, shared_controller=True
|
||||
).clue
|
||||
|
||||
|
||||
def test_list_and_get_clue(client, session):
|
||||
clue = _seed_clue(session)
|
||||
session.flush()
|
||||
resp = client.get("/clues")
|
||||
assert resp.status_code == 200
|
||||
assert any(c["id"] == str(clue.id) for c in resp.json())
|
||||
|
||||
resp2 = client.get(f"/clues/{clue.id}")
|
||||
assert resp2.status_code == 200
|
||||
assert resp2.json()["scenario_code"] == "R8"
|
||||
|
||||
|
||||
def test_assign_and_adjudicate_flow(client, session):
|
||||
clue = _seed_clue(session)
|
||||
session.flush()
|
||||
|
||||
r1 = client.post(
|
||||
f"/clues/{clue.id}/assign", json={"assignee": "auditor_w", "actor": "manager_l"}
|
||||
)
|
||||
assert r1.status_code == 200
|
||||
assert r1.json()["assignee"] == "auditor_w"
|
||||
assert r1.json()["status"] == "assigned"
|
||||
|
||||
r2 = client.post(
|
||||
f"/clues/{clue.id}/adjudicate",
|
||||
json={"confirmed": True, "actor": "auditor_w", "note": "属实"},
|
||||
)
|
||||
assert r2.status_code == 200
|
||||
assert r2.json()["status"] == "confirmed"
|
||||
assert r2.json()["feedback"] == "confirmed"
|
||||
|
||||
|
||||
def test_summary_endpoint(client, session):
|
||||
_seed_clue(session)
|
||||
session.flush()
|
||||
resp = client.get("/clues/summary")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert body["total"] >= 1
|
||||
assert body["total_amount_involved"] > 0
|
||||
|
||||
|
||||
def test_no_delete_endpoint(client, session):
|
||||
"""R19:不存在删除线索的 API 端点。"""
|
||||
clue = _seed_clue(session)
|
||||
session.flush()
|
||||
resp = client.delete(f"/clues/{clue.id}")
|
||||
assert resp.status_code in (404, 405) # 方法不允许/路由不存在
|
||||
|
||||
|
||||
def test_nlq_endpoint_uses_local_provider(client):
|
||||
# 默认 .env 为 mock/dashscope;mock 不出域
|
||||
resp = client.post("/nlq", json={"question": "列出政企拆单线索"})
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert "answer" in body
|
||||
assert body["egress"] in (True, False)
|
||||
@@ -0,0 +1,37 @@
|
||||
"""NLQ 结构化检索集成测试(需 PostgreSQL)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.engines import scan
|
||||
from app.nlq import service as nlq
|
||||
from app.scenarios.split_contract import ContractRecord
|
||||
|
||||
|
||||
def _seed(session):
|
||||
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
|
||||
scan.run_split_contract_scan(
|
||||
session, contracts, approval_threshold=1_000_000, shared_controller=True
|
||||
)
|
||||
session.flush()
|
||||
|
||||
|
||||
def test_nlq_retrieves_split_clues(session):
|
||||
_seed(session)
|
||||
ans = nlq.ask("列出高置信的政企拆单线索", session=session)
|
||||
assert ans.provider == "datahub"
|
||||
assert ans.egress is False
|
||||
assert "政企拆单" in ans.answer
|
||||
assert "共检索到" in ans.answer
|
||||
|
||||
|
||||
def test_nlq_no_match(session):
|
||||
ans = nlq.ask("列出养卡骗补线索", session=session)
|
||||
assert ans.egress is False
|
||||
assert "未检索到" in ans.answer or "共检索到" in ans.answer
|
||||
|
||||
|
||||
def test_nlq_open_question_falls_back_to_llm(session):
|
||||
# 不含检索关键词 → 走 LLM(mock)
|
||||
ans = nlq.ask("你好,请介绍一下你的能力", session=session)
|
||||
assert ans.provider in ("mock", "datahub")
|
||||
assert ans.egress is False
|
||||
@@ -0,0 +1,46 @@
|
||||
"""全量穿透扫描引擎集成测试(需 PostgreSQL)。
|
||||
|
||||
验证场景检测→线索生成→落库的端到端链路(R5+R7+R8/R9)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.clues.models import ClueStatus, ConfidenceTier
|
||||
from app.engines import scan
|
||||
from app.scenarios.churn_fraud import CohortPoint
|
||||
from app.scenarios.split_contract import ContractRecord
|
||||
|
||||
|
||||
def test_split_scan_creates_high_confidence_clue(session):
|
||||
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
|
||||
result = scan.run_split_contract_scan(
|
||||
session, contracts, approval_threshold=1_000_000, shared_controller=True
|
||||
)
|
||||
assert result.scenario_code == "R8"
|
||||
assert result.scanned_count == 8
|
||||
assert result.clue is not None
|
||||
assert result.clue.confidence == ConfidenceTier.HIGH
|
||||
assert result.clue.status == ClueStatus.NEW
|
||||
assert result.clue.amount_involved > 0
|
||||
assert result.clue.model_version == scan.MODEL_VERSION
|
||||
|
||||
|
||||
def test_split_scan_no_clue_when_clean(session):
|
||||
contracts = [ContractRecord("C1", "A", 100000), ContractRecord("C2", "B", 3_000_000)]
|
||||
result = scan.run_split_contract_scan(session, contracts, approval_threshold=1_000_000)
|
||||
assert result.clue is None
|
||||
|
||||
|
||||
def test_churn_scan_creates_clue(session):
|
||||
curve = [CohortPoint(0, 1.0), CohortPoint(1, 0.95), CohortPoint(2, 0.1)]
|
||||
result = scan.run_churn_scan(
|
||||
session,
|
||||
retention_curve=curve,
|
||||
commission_paid=300000,
|
||||
active_ratio=0.05,
|
||||
zero_usage_ratio=0.9,
|
||||
channel_key="CH-001",
|
||||
)
|
||||
assert result.clue is not None
|
||||
assert result.clue.scenario_code == "R9"
|
||||
assert result.clue.subjects["channel"] == "CH-001"
|
||||
@@ -0,0 +1,79 @@
|
||||
"""场景检测器单元测试(纯逻辑,无需数据库)。"""
|
||||
|
||||
from app.scenarios.churn_fraud import (
|
||||
CohortPoint,
|
||||
churn_risk_score,
|
||||
commission_quality_mismatch,
|
||||
detect_pulse_decay,
|
||||
)
|
||||
from app.scenarios.split_contract import (
|
||||
ContractRecord,
|
||||
detect_threshold_edge,
|
||||
split_risk_score,
|
||||
)
|
||||
|
||||
# ---------- 场景一:政企拆单 (R8) ----------
|
||||
|
||||
def test_threshold_edge_detects_split():
|
||||
# 阈值 100 万,8 份合同集中在 79万-99万
|
||||
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 810000 + i * 20000) for i in range(8)]
|
||||
finding = detect_threshold_edge(contracts, approval_threshold=1_000_000)
|
||||
assert finding.hit
|
||||
assert len(finding.near_threshold) == 8
|
||||
|
||||
|
||||
def test_threshold_edge_no_split_when_amounts_spread():
|
||||
contracts = [
|
||||
ContractRecord("C1", "A", 100000),
|
||||
ContractRecord("C2", "B", 2_000_000),
|
||||
]
|
||||
finding = detect_threshold_edge(contracts, approval_threshold=1_000_000)
|
||||
assert not finding.hit
|
||||
|
||||
|
||||
def test_split_score_higher_with_shared_controller():
|
||||
contracts = [ContractRecord(f"C{i}", f"CUST{i}", 850000) for i in range(8)]
|
||||
finding = detect_threshold_edge(contracts, 1_000_000)
|
||||
s_no = split_risk_score(finding, shared_controller=False)
|
||||
s_yes = split_risk_score(finding, shared_controller=True)
|
||||
assert s_yes > s_no
|
||||
assert s_yes <= 1.0
|
||||
|
||||
|
||||
def test_threshold_must_be_positive():
|
||||
import pytest
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
detect_threshold_edge([], approval_threshold=0)
|
||||
|
||||
|
||||
# ---------- 场景二:养卡骗补 (R9) ----------
|
||||
|
||||
def test_pulse_decay_detects_cliff():
|
||||
curve = [
|
||||
CohortPoint(0, 1.0),
|
||||
CohortPoint(1, 0.95),
|
||||
CohortPoint(2, 0.92),
|
||||
CohortPoint(3, 0.10), # 第3个月断崖
|
||||
]
|
||||
finding = detect_pulse_decay(curve)
|
||||
assert finding.pulse_then_decay
|
||||
assert finding.cliff_month == 3
|
||||
|
||||
|
||||
def test_no_cliff_for_smooth_curve():
|
||||
curve = [CohortPoint(i, 1.0 - 0.05 * i) for i in range(5)]
|
||||
finding = detect_pulse_decay(curve)
|
||||
assert not finding.pulse_then_decay
|
||||
|
||||
|
||||
def test_commission_mismatch_high_for_zero_usage():
|
||||
m = commission_quality_mismatch(commission_paid=100000, active_ratio=0.05, zero_usage_ratio=0.9)
|
||||
assert m > 0.7
|
||||
|
||||
|
||||
def test_churn_score_combines_signals():
|
||||
curve = [CohortPoint(0, 1.0), CohortPoint(1, 0.2)]
|
||||
finding = detect_pulse_decay(curve)
|
||||
score = churn_risk_score(finding, mismatch=0.8)
|
||||
assert 0.0 < score <= 1.0
|
||||
Reference in New Issue
Block a user