Initial commit: InternalAuditInterprise
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
"""AIAudit 后端应用包。"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1 @@
|
||||
"""HTTP API 层。"""
|
||||
@@ -0,0 +1,64 @@
|
||||
"""数据中台统一穿透查询 API(P1.2.5)。
|
||||
|
||||
作为各引擎与审计场景访问知识图谱的共同入口,对上层屏蔽底层是关系表还是图库。
|
||||
对应需求 R2。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.schemas import (
|
||||
EntityOut,
|
||||
PenetrateRequest,
|
||||
PenetrateResponse,
|
||||
RelatedEntityOut,
|
||||
)
|
||||
from app.datahub.graph_repo import find_related_entities
|
||||
from app.datahub.models import Entity
|
||||
from app.db import get_session
|
||||
|
||||
router = APIRouter(prefix="/datahub", tags=["datahub"])
|
||||
|
||||
|
||||
@router.get("/entities/{entity_id}", response_model=EntityOut)
|
||||
def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
|
||||
entity = session.get(Entity, entity_id)
|
||||
if entity is None:
|
||||
raise HTTPException(status_code=404, detail="实体不存在")
|
||||
return entity
|
||||
|
||||
|
||||
@router.post("/penetrate", response_model=PenetrateResponse)
|
||||
def penetrate(
|
||||
req: PenetrateRequest, session: Session = Depends(get_session)
|
||||
) -> PenetrateResponse:
|
||||
"""多跳穿透:返回与起点实体连通的关联实体(用于实控人/关联方/马甲识别)。"""
|
||||
start = session.get(Entity, req.start_entity_id)
|
||||
if start is None:
|
||||
raise HTTPException(status_code=404, detail="起点实体不存在")
|
||||
|
||||
related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
|
||||
|
||||
# 批量取出关联实体详情,组装可解释结果
|
||||
id_to_depth = {rid: depth for rid, depth in related_raw}
|
||||
entities = (
|
||||
session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
|
||||
if id_to_depth
|
||||
else []
|
||||
)
|
||||
related = [
|
||||
RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
|
||||
for e in entities
|
||||
]
|
||||
related.sort(key=lambda r: r.depth)
|
||||
|
||||
return PenetrateResponse(
|
||||
start_entity_id=req.start_entity_id,
|
||||
max_depth=req.max_depth,
|
||||
related_count=len(related),
|
||||
related=related,
|
||||
)
|
||||
@@ -0,0 +1,36 @@
|
||||
"""API 数据传输模型(Pydantic)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class EntityOut(BaseModel):
|
||||
id: uuid.UUID
|
||||
entity_type: str
|
||||
business_key: str
|
||||
display_name: str | None = None
|
||||
attributes: dict = Field(default_factory=dict)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class RelatedEntityOut(BaseModel):
|
||||
"""穿透命中的关联实体,附最短跳数(证据强度的初步指示)。"""
|
||||
|
||||
entity: EntityOut
|
||||
depth: int
|
||||
|
||||
|
||||
class PenetrateRequest(BaseModel):
|
||||
start_entity_id: uuid.UUID
|
||||
max_depth: int = Field(default=3, ge=1, le=6)
|
||||
|
||||
|
||||
class PenetrateResponse(BaseModel):
|
||||
start_entity_id: uuid.UUID
|
||||
max_depth: int
|
||||
related_count: int
|
||||
related: list[RelatedEntityOut]
|
||||
@@ -0,0 +1 @@
|
||||
"""系统自审计模块:不可篡改操作日志、独立性与分权(R19)。"""
|
||||
@@ -0,0 +1,50 @@
|
||||
"""系统自审计 ORM 模型:不可篡改操作日志(R19)。
|
||||
|
||||
每条日志含哈希链(prev_hash + 内容 → entry_hash),任何篡改都会断链,可检测。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, Identity, Index, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
def _now() -> dt.datetime:
|
||||
return dt.datetime.now(dt.UTC)
|
||||
|
||||
|
||||
class AuditLog(Base):
|
||||
"""不可篡改审计轨迹。仅追加,不可更新/删除(应用层与制度共同保证)。"""
|
||||
|
||||
__tablename__ = "audit_log"
|
||||
__table_args__ = (
|
||||
Index("ix_audit_actor", "actor"),
|
||||
Index("ix_audit_action", "action"),
|
||||
Index("ix_audit_seq", "seq", unique=True),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
# 自增序号,构成哈希链顺序
|
||||
seq: Mapped[int] = mapped_column(
|
||||
BigInteger, Identity(always=False), nullable=False, unique=True
|
||||
)
|
||||
actor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
role: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
action: Mapped[str] = mapped_column(String(64), nullable=False) # 如 rule.update/clue.assign
|
||||
target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
detail: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
@@ -0,0 +1,78 @@
|
||||
"""RBAC 权限与独立性约束(R19、PRD §6 权限矩阵)。
|
||||
|
||||
核心独立性规则(硬约束):
|
||||
- 任何角色都不能删除线索(DELETE_CLUE 不授予任何角色;数据库触发器再兜底)。
|
||||
- 业务方(business)对系统无任何写权限。
|
||||
- 配规则/改阈值/看线索/出报告分权制衡。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class Role(str, enum.Enum):
|
||||
AUDITOR = "auditor" # 审计员
|
||||
AUDIT_MANAGER = "audit_manager" # 审计主管
|
||||
RULE_ADMIN = "rule_admin" # 规则管理员
|
||||
SYS_ADMIN = "sys_admin" # 系统管理员
|
||||
SYS_AUDITOR = "sys_auditor" # 系统审计员(独立监督)
|
||||
BUSINESS = "business" # 被审计业务方(无写权限)
|
||||
|
||||
|
||||
class Permission(str, enum.Enum):
|
||||
QUERY = "query" # 自然语言查询
|
||||
VIEW_CLUE = "view_clue" # 查看线索
|
||||
ADJUDICATE_CLUE = "adjudicate_clue" # 研判/定性线索
|
||||
ASSIGN_CLUE = "assign_clue" # 分派线索
|
||||
DELETE_CLUE = "delete_clue" # 删除线索(禁止授予任何人)
|
||||
CONFIG_RULE = "config_rule" # 配置规则
|
||||
ADJUST_THRESHOLD = "adjust_threshold" # 调整阈值
|
||||
ISSUE_REPORT = "issue_report" # 出具报告
|
||||
DATA_INGEST = "data_ingest" # 数据接入配置
|
||||
VIEW_AUDIT_TRAIL = "view_audit_trail" # 查看自审计轨迹
|
||||
MODEL_DEPLOY = "model_deploy" # 模型部署/升级
|
||||
|
||||
|
||||
# 角色 -> 权限集合。注意:DELETE_CLUE 不出现在任何角色中(线索不可删,R19)。
|
||||
ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
|
||||
Role.AUDITOR: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.ADJUDICATE_CLUE,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.AUDIT_MANAGER: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.ADJUDICATE_CLUE,
|
||||
Permission.ASSIGN_CLUE,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.RULE_ADMIN: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.CONFIG_RULE,
|
||||
Permission.ADJUST_THRESHOLD,
|
||||
},
|
||||
Role.SYS_ADMIN: {
|
||||
Permission.DATA_INGEST,
|
||||
Permission.MODEL_DEPLOY,
|
||||
},
|
||||
Role.SYS_AUDITOR: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.VIEW_AUDIT_TRAIL,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.BUSINESS: set(), # 业务方无任何权限
|
||||
}
|
||||
|
||||
|
||||
def has_permission(role: Role, perm: Permission) -> bool:
|
||||
return perm in ROLE_PERMISSIONS.get(role, set())
|
||||
|
||||
|
||||
def can_delete_clue(role: Role) -> bool:
|
||||
"""线索不可删除——对所有角色恒为 False(独立性硬约束)。"""
|
||||
return False
|
||||
@@ -0,0 +1,81 @@
|
||||
"""系统自审计服务:写入哈希链审计日志、校验完整性(R19)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.audit.models import AuditLog
|
||||
|
||||
|
||||
def _compute_hash(prev_hash: str | None, payload: dict) -> str:
|
||||
body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
|
||||
raw = f"{prev_hash or ''}|{body}"
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def record(
|
||||
session: Session,
|
||||
actor: str,
|
||||
action: str,
|
||||
*,
|
||||
role: str | None = None,
|
||||
target_type: str | None = None,
|
||||
target_id: str | None = None,
|
||||
detail: dict | None = None,
|
||||
) -> AuditLog:
|
||||
"""追加一条审计日志,自动接续哈希链。"""
|
||||
last = session.execute(
|
||||
select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
|
||||
).scalar_one_or_none()
|
||||
prev_hash = last.entry_hash if last else None
|
||||
|
||||
payload = {
|
||||
"actor": actor,
|
||||
"role": role,
|
||||
"action": action,
|
||||
"target_type": target_type,
|
||||
"target_id": target_id,
|
||||
"detail": detail or {},
|
||||
}
|
||||
entry_hash = _compute_hash(prev_hash, payload)
|
||||
|
||||
log = AuditLog(
|
||||
actor=actor,
|
||||
role=role,
|
||||
action=action,
|
||||
target_type=target_type,
|
||||
target_id=target_id,
|
||||
detail=detail or {},
|
||||
prev_hash=prev_hash,
|
||||
entry_hash=entry_hash,
|
||||
)
|
||||
session.add(log)
|
||||
session.flush()
|
||||
return log
|
||||
|
||||
|
||||
def verify_chain(session: Session) -> tuple[bool, int | None]:
|
||||
"""校验审计日志哈希链完整性。
|
||||
|
||||
返回 (是否完整, 首个断链的 seq 或 None)。
|
||||
"""
|
||||
rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
|
||||
prev_hash: str | None = None
|
||||
for row in rows:
|
||||
payload = {
|
||||
"actor": row.actor,
|
||||
"role": row.role,
|
||||
"action": row.action,
|
||||
"target_type": row.target_type,
|
||||
"target_id": row.target_id,
|
||||
"detail": row.detail or {},
|
||||
}
|
||||
expected = _compute_hash(prev_hash, payload)
|
||||
if expected != row.entry_hash or row.prev_hash != prev_hash:
|
||||
return False, row.seq
|
||||
prev_hash = row.entry_hash
|
||||
return True, None
|
||||
@@ -0,0 +1 @@
|
||||
"""线索引擎模块:线索模型、生成、置信度分级、状态流转(人机闭环)。"""
|
||||
@@ -0,0 +1,136 @@
|
||||
"""线索 ORM 模型。
|
||||
|
||||
对应需求 R7(线索+证据链+解释)、R17(闭环状态)、R18(置信度分级)、R19(线索不可删)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import enum
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _enum_values(enum_cls):
|
||||
"""让 SQLAlchemy 使用枚举的 value(小写)写入 PG 原生 enum,而非 name。"""
|
||||
return [m.value for m in enum_cls]
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
def _now() -> dt.datetime:
|
||||
return dt.datetime.now(dt.UTC)
|
||||
|
||||
|
||||
class ConfidenceTier(str, enum.Enum):
|
||||
"""置信度三级分流(R18)。"""
|
||||
|
||||
HIGH = "high" # 高置信:直接推送处置
|
||||
MEDIUM = "medium" # 中置信:人工复核
|
||||
LOW = "low" # 低置信:归档备查
|
||||
|
||||
|
||||
class ClueStatus(str, enum.Enum):
|
||||
"""线索闭环状态机(R17)。"""
|
||||
|
||||
NEW = "new" # 新生成
|
||||
ASSIGNED = "assigned" # 已分派
|
||||
REVIEWING = "reviewing" # 研判中
|
||||
CONFIRMED = "confirmed" # 已定性属实
|
||||
DISMISSED = "dismissed" # 已定性误报
|
||||
RECTIFYING = "rectifying" # 整改中
|
||||
TRANSFERRED = "transferred" # 已移交
|
||||
CLOSED = "closed" # 已销项闭环
|
||||
|
||||
|
||||
class Clue(Base):
|
||||
"""审计线索。线索一经生成不可物理删除(R19),失效通过状态表达。"""
|
||||
|
||||
__tablename__ = "clue"
|
||||
__table_args__ = (
|
||||
Index("ix_clue_status", "status"),
|
||||
Index("ix_clue_scenario", "scenario_code"),
|
||||
Index("ix_clue_assignee", "assignee"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
title: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
risk_domain: Mapped[str] = mapped_column(String(32), nullable=False) # 收入/成本/采购/资金/合规
|
||||
scenario_code: Mapped[str] = mapped_column(String(32), nullable=False) # 如 R8/R9
|
||||
confidence: Mapped[ConfidenceTier] = mapped_column(
|
||||
Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
|
||||
nullable=False,
|
||||
)
|
||||
score: Mapped[float] = mapped_column(Float, default=0.0) # 0-1 风险评分
|
||||
status: Mapped[ClueStatus] = mapped_column(
|
||||
Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
|
||||
default=ClueStatus.NEW,
|
||||
nullable=False,
|
||||
)
|
||||
# 人话解释(判定理由)与证据链
|
||||
rationale: Mapped[str] = mapped_column(Text, default="")
|
||||
evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
# 涉及的主体(金额、实体 id 列表等)
|
||||
subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
|
||||
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
# 误报/属实反馈(R18 反馈学习)
|
||||
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
|
||||
|
||||
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
|
||||
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
||||
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
updated_at: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_now, onupdate=_now
|
||||
)
|
||||
|
||||
history: Mapped[list[ClueStatusHistory]] = relationship(
|
||||
back_populates="clue", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class ClueStatusHistory(Base):
|
||||
"""线索状态流转留痕(R17/R19)。"""
|
||||
|
||||
__tablename__ = "clue_status_history"
|
||||
__table_args__ = (Index("ix_csh_clue", "clue_id"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
clue_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
|
||||
)
|
||||
from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||
to_status: Mapped[str] = mapped_column(String(16), nullable=False)
|
||||
actor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
note: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
clue: Mapped[Clue] = relationship(back_populates="history")
|
||||
|
||||
|
||||
class WorkingPaper(Base):
|
||||
"""审计底稿(R17):研判完成自动生成,可追溯。"""
|
||||
|
||||
__tablename__ = "working_paper"
|
||||
__table_args__ = (Index("ix_wp_clue", "clue_id"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
clue_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
|
||||
)
|
||||
content: Mapped[str] = mapped_column(Text, default="")
|
||||
conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
author: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
snapshot: Mapped[dict] = mapped_column(JSONB, default=dict) # 证据/版本快照
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
@@ -0,0 +1,195 @@
|
||||
"""线索服务:生成、置信度分级、状态流转、底稿生成、反馈。
|
||||
|
||||
对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志(线索不可删)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.audit import service as audit
|
||||
from app.clues.models import (
|
||||
Clue,
|
||||
ClueStatus,
|
||||
ClueStatusHistory,
|
||||
ConfidenceTier,
|
||||
WorkingPaper,
|
||||
)
|
||||
|
||||
# 允许的状态流转(R17 闭环)
|
||||
_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
|
||||
ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
|
||||
ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
|
||||
ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
|
||||
ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
|
||||
ClueStatus.DISMISSED: {ClueStatus.CLOSED},
|
||||
ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
|
||||
ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
|
||||
ClueStatus.CLOSED: set(),
|
||||
}
|
||||
|
||||
|
||||
class IllegalTransitionError(ValueError):
|
||||
"""非法的线索状态流转。"""
|
||||
|
||||
|
||||
def score_to_tier(score: float) -> ConfidenceTier:
|
||||
"""风险评分映射到置信度三级(R18)。"""
|
||||
if score >= 0.8:
|
||||
return ConfidenceTier.HIGH
|
||||
if score >= 0.5:
|
||||
return ConfidenceTier.MEDIUM
|
||||
return ConfidenceTier.LOW
|
||||
|
||||
|
||||
def create_clue(
|
||||
session: Session,
|
||||
*,
|
||||
title: str,
|
||||
risk_domain: str,
|
||||
scenario_code: str,
|
||||
score: float,
|
||||
rationale: str,
|
||||
evidence: dict,
|
||||
subjects: dict | None = None,
|
||||
amount_involved: float | None = None,
|
||||
model_version: str | None = None,
|
||||
rule_version: str | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
actor: str = "system",
|
||||
) -> Clue:
|
||||
"""生成一条线索,自动按评分分级,并记录创建留痕。"""
|
||||
clue = Clue(
|
||||
title=title,
|
||||
risk_domain=risk_domain,
|
||||
scenario_code=scenario_code,
|
||||
confidence=score_to_tier(score),
|
||||
score=score,
|
||||
status=ClueStatus.NEW,
|
||||
rationale=rationale,
|
||||
evidence=evidence,
|
||||
subjects=subjects or {},
|
||||
amount_involved=amount_involved,
|
||||
model_version=model_version,
|
||||
rule_version=rule_version,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(clue)
|
||||
session.flush()
|
||||
_add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
|
||||
audit.record(
|
||||
session, actor, "create_clue",
|
||||
target_type="clue", target_id=str(clue.id),
|
||||
detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
|
||||
)
|
||||
return clue
|
||||
|
||||
|
||||
def _add_history(
|
||||
session: Session,
|
||||
clue: Clue,
|
||||
from_status: ClueStatus | None,
|
||||
to_status: ClueStatus,
|
||||
actor: str,
|
||||
note: str | None,
|
||||
) -> None:
|
||||
session.add(
|
||||
ClueStatusHistory(
|
||||
clue_id=clue.id,
|
||||
from_status=from_status.value if from_status else None,
|
||||
to_status=to_status.value,
|
||||
actor=actor,
|
||||
note=note,
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
|
||||
def transition(
|
||||
session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
|
||||
) -> Clue:
|
||||
"""执行状态流转,校验合法性并留痕。"""
|
||||
if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
|
||||
raise IllegalTransitionError(
|
||||
f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
|
||||
)
|
||||
from_status = clue.status
|
||||
clue.status = to_status
|
||||
session.flush()
|
||||
_add_history(session, clue, from_status, to_status, actor, note)
|
||||
audit.record(
|
||||
session, actor, "transition_clue",
|
||||
target_type="clue", target_id=str(clue.id),
|
||||
detail={"from": from_status.value, "to": to_status.value, "note": note},
|
||||
)
|
||||
return clue
|
||||
|
||||
|
||||
def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
|
||||
clue.assignee = assignee
|
||||
session.flush()
|
||||
if clue.status == ClueStatus.NEW:
|
||||
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
|
||||
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
|
||||
return clue
|
||||
|
||||
|
||||
def adjudicate(
|
||||
session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
|
||||
) -> WorkingPaper:
|
||||
"""研判定性:确认属实或误报,自动生成审计底稿并记录反馈(R17/R18)。"""
|
||||
if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
|
||||
# 允许从 NEW/ASSIGNED 直接进入研判
|
||||
pass
|
||||
if clue.status != ClueStatus.REVIEWING:
|
||||
# 先进入研判中
|
||||
target = ClueStatus.REVIEWING
|
||||
if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
|
||||
transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
|
||||
|
||||
to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
|
||||
transition(session, clue, to, actor, note)
|
||||
clue.feedback = "confirmed" if confirmed else "false_positive"
|
||||
session.flush()
|
||||
|
||||
paper = WorkingPaper(
|
||||
clue_id=clue.id,
|
||||
content=note or "",
|
||||
conclusion=to.value,
|
||||
author=actor,
|
||||
snapshot={
|
||||
"evidence": clue.evidence,
|
||||
"rationale": clue.rationale,
|
||||
"score": clue.score,
|
||||
"model_version": clue.model_version,
|
||||
"rule_version": clue.rule_version,
|
||||
"data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
|
||||
},
|
||||
)
|
||||
session.add(paper)
|
||||
session.flush()
|
||||
audit.record(
|
||||
session, actor, "create_working_paper",
|
||||
target_type="working_paper", target_id=str(paper.id),
|
||||
detail={"clue_id": str(clue.id), "conclusion": to.value},
|
||||
)
|
||||
return paper
|
||||
|
||||
|
||||
def list_clues(
|
||||
session: Session,
|
||||
*,
|
||||
status: ClueStatus | None = None,
|
||||
scenario_code: str | None = None,
|
||||
confidence: ConfidenceTier | None = None,
|
||||
) -> list[Clue]:
|
||||
q = session.query(Clue)
|
||||
if status:
|
||||
q = q.filter(Clue.status == status)
|
||||
if scenario_code:
|
||||
q = q.filter(Clue.scenario_code == scenario_code)
|
||||
if confidence:
|
||||
q = q.filter(Clue.confidence == confidence)
|
||||
return q.order_by(Clue.score.desc()).all()
|
||||
@@ -0,0 +1,70 @@
|
||||
"""应用配置。
|
||||
|
||||
通过环境变量加载,区分 dev / prod 运行环境。
|
||||
prod 环境强制执行"数据零出域"红线:禁用任何公网 LLM Provider。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class AppEnv(str, Enum):
|
||||
dev = "dev"
|
||||
prod = "prod"
|
||||
|
||||
|
||||
class LLMProviderName(str, Enum):
|
||||
dashscope = "dashscope" # 公网千问,仅 dev
|
||||
vllm = "vllm" # 本地,prod
|
||||
|
||||
|
||||
# 被认定为"公网/出域"的 Provider,prod 下禁止使用
|
||||
EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="",
|
||||
env_file=".env",
|
||||
extra="ignore",
|
||||
case_sensitive=False,
|
||||
)
|
||||
|
||||
aiaudit_env: AppEnv = AppEnv.dev
|
||||
|
||||
database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
|
||||
redis_url: str = "redis://localhost:6379/0"
|
||||
|
||||
llm_provider: LLMProviderName = LLMProviderName.dashscope
|
||||
dashscope_api_key: str = ""
|
||||
dashscope_model: str = "qwen-plus"
|
||||
vllm_base_url: str = "http://localhost:8001/v1"
|
||||
vllm_model: str = "qwen2.5-72b-instruct"
|
||||
|
||||
@property
|
||||
def is_prod(self) -> bool:
|
||||
return self.aiaudit_env == AppEnv.prod
|
||||
|
||||
def validate_egress_policy(self) -> None:
|
||||
"""数据零出域红线校验:prod 环境禁用公网 Provider。
|
||||
|
||||
在应用启动时调用;违反则抛出异常阻断启动。
|
||||
"""
|
||||
if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
|
||||
raise RuntimeError(
|
||||
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
|
||||
f"'{self.llm_provider.value}'。请改用本地 Provider(如 vllm)。"
|
||||
)
|
||||
|
||||
|
||||
_settings: Settings | None = None
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
global _settings
|
||||
if _settings is None:
|
||||
_settings = Settings()
|
||||
return _settings
|
||||
@@ -0,0 +1 @@
|
||||
"""审计数据中台模块:本体/知识图谱、双时态、时序、数据版本。"""
|
||||
@@ -0,0 +1,83 @@
|
||||
"""双时态事实仓储:写入与"按历史时点回放"查询。
|
||||
|
||||
对应需求 R3 / ADR-0002:
|
||||
- 业务有效期 valid_from/valid_to(应用时间)
|
||||
- 系统记录期 system_from/system_to(事务时间)
|
||||
回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.models import BitemporalFact
|
||||
|
||||
|
||||
def record_fact(
|
||||
session: Session,
|
||||
entity_id: uuid.UUID,
|
||||
attr_name: str,
|
||||
attr_value: dict,
|
||||
valid_from: dt.datetime,
|
||||
valid_to: dt.datetime | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> BitemporalFact:
|
||||
"""记录一条双时态事实(system_from 自动取当前事务时间)。"""
|
||||
fact = BitemporalFact(
|
||||
entity_id=entity_id,
|
||||
attr_name=attr_name,
|
||||
attr_value=attr_value,
|
||||
valid_from=valid_from,
|
||||
valid_to=valid_to,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(fact)
|
||||
session.flush()
|
||||
return fact
|
||||
|
||||
|
||||
def as_of(
|
||||
session: Session,
|
||||
entity_id: uuid.UUID,
|
||||
attr_name: str,
|
||||
as_of_valid: dt.datetime,
|
||||
as_of_system: dt.datetime | None = None,
|
||||
) -> BitemporalFact | None:
|
||||
"""回放:返回在给定业务时点且按给定系统时点可见的事实。
|
||||
|
||||
- 业务时间线:valid_from <= as_of_valid < valid_to(或为空表示至今)
|
||||
- 系统时间线:system_from <= as_of_system < system_to(或为空表示当前可见)
|
||||
"""
|
||||
as_of_system = as_of_system or dt.datetime.now(dt.UTC)
|
||||
|
||||
q = (
|
||||
session.query(BitemporalFact)
|
||||
.filter(BitemporalFact.entity_id == entity_id)
|
||||
.filter(BitemporalFact.attr_name == attr_name)
|
||||
.filter(BitemporalFact.valid_from <= as_of_valid)
|
||||
.filter(
|
||||
or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
|
||||
)
|
||||
.filter(BitemporalFact.system_from <= as_of_system)
|
||||
.filter(
|
||||
or_(
|
||||
BitemporalFact.system_to.is_(None),
|
||||
BitemporalFact.system_to > as_of_system,
|
||||
)
|
||||
)
|
||||
.order_by(BitemporalFact.system_from.desc())
|
||||
)
|
||||
return q.first()
|
||||
|
||||
|
||||
def close_fact(
|
||||
session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
|
||||
) -> None:
|
||||
"""逻辑关闭一条事实的系统可见期(用于更正/失效,而非物理删除)。"""
|
||||
fact.system_to = system_to or dt.datetime.now(dt.UTC)
|
||||
session.add(fact)
|
||||
session.flush()
|
||||
@@ -0,0 +1,58 @@
|
||||
"""数据中台 schema 初始化。
|
||||
|
||||
MVP 阶段以 SQLAlchemy metadata 建表(后续可迁移到 Alembic)。
|
||||
扩展按可用性可选启用:
|
||||
- btree_gist / vector:若可用则创建。
|
||||
- timescaledb:若可用则把 metric_event 转为超表;不可用则保持普通表(带时间索引)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Engine
|
||||
|
||||
from app.datahub import models # noqa: F401 确保模型注册到 metadata
|
||||
from app.db import Base, get_engine
|
||||
|
||||
|
||||
def _extension_available(engine: Engine, name: str) -> bool:
|
||||
with engine.connect() as conn:
|
||||
row = conn.execute(
|
||||
text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
|
||||
).first()
|
||||
return row is not None
|
||||
|
||||
|
||||
def init_extensions(engine: Engine) -> dict[str, bool]:
|
||||
"""按可用性创建扩展,返回各扩展启用状态。"""
|
||||
status: dict[str, bool] = {}
|
||||
for ext in ("btree_gist", "vector", "timescaledb"):
|
||||
available = _extension_available(engine, ext)
|
||||
status[ext] = available
|
||||
if available:
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
|
||||
return status
|
||||
|
||||
|
||||
def create_schema(engine: Engine | None = None) -> dict[str, bool]:
|
||||
"""创建数据中台全部表,并按需启用时序超表。返回扩展状态。"""
|
||||
engine = engine or get_engine()
|
||||
status = init_extensions(engine)
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
# 若 TimescaleDB 可用,将时序事件表转为超表(幂等)
|
||||
if status.get("timescaledb"):
|
||||
with engine.begin() as conn:
|
||||
conn.execute(
|
||||
text(
|
||||
"SELECT create_hypertable('metric_event', 'event_time', "
|
||||
"if_not_exists => TRUE, migrate_data => TRUE)"
|
||||
)
|
||||
)
|
||||
return status
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
st = create_schema()
|
||||
print("数据中台 schema 初始化完成。扩展状态:", st)
|
||||
@@ -0,0 +1,118 @@
|
||||
"""知识图谱仓储:实体/关系写入与多跳穿透(递归 CTE)。
|
||||
|
||||
对应需求 R2:支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
|
||||
统一穿透查询服务(P1.2.5)在此之上封装对外 API,对上层屏蔽底层是关系表还是图库。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.models import Entity, EntityRelationship
|
||||
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
|
||||
|
||||
|
||||
class OntologyViolationError(ValueError):
|
||||
"""关系不符合本体约束。"""
|
||||
|
||||
|
||||
def upsert_entity(
|
||||
session: Session,
|
||||
entity_type: EntityType,
|
||||
business_key: str,
|
||||
display_name: str | None = None,
|
||||
attributes: dict | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> Entity:
|
||||
"""按 (类型, 业务主键) 幂等写入实体(主数据对齐的归一锚点)。"""
|
||||
existing = (
|
||||
session.query(Entity)
|
||||
.filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
|
||||
.one_or_none()
|
||||
)
|
||||
if existing is not None:
|
||||
if display_name is not None:
|
||||
existing.display_name = display_name
|
||||
if attributes:
|
||||
existing.attributes = {**(existing.attributes or {}), **attributes}
|
||||
return existing
|
||||
|
||||
entity = Entity(
|
||||
entity_type=entity_type.value,
|
||||
business_key=business_key,
|
||||
display_name=display_name,
|
||||
attributes=attributes or {},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(entity)
|
||||
session.flush()
|
||||
return entity
|
||||
|
||||
|
||||
def add_relationship(
|
||||
session: Session,
|
||||
rel_type: RelationshipType,
|
||||
source: Entity,
|
||||
target: Entity,
|
||||
attributes: dict | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> EntityRelationship:
|
||||
"""新增一条关系边,写入前校验本体约束。"""
|
||||
src_type = EntityType(source.entity_type)
|
||||
tgt_type = EntityType(target.entity_type)
|
||||
if not is_valid_relationship(rel_type, src_type, tgt_type):
|
||||
raise OntologyViolationError(
|
||||
f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
|
||||
)
|
||||
rel = EntityRelationship(
|
||||
rel_type=rel_type.value,
|
||||
source_id=source.id,
|
||||
target_id=target.id,
|
||||
attributes=attributes or {},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(rel)
|
||||
session.flush()
|
||||
return rel
|
||||
|
||||
|
||||
# 多跳穿透:以无向方式遍历关系边,返回与起点在 max_depth 跳内连通的实体集合。
|
||||
# 用于"疑似同一实控人/关联方网络"识别。
|
||||
_TRAVERSE_SQL = text(
|
||||
"""
|
||||
WITH RECURSIVE reachable(entity_id, depth, path) AS (
|
||||
SELECT :start_id, 0, ARRAY[:start_id]
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
|
||||
rc.depth + 1,
|
||||
rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
|
||||
FROM reachable rc
|
||||
JOIN entity_relationship r
|
||||
ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
|
||||
WHERE rc.depth < :max_depth
|
||||
AND NOT (
|
||||
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
|
||||
= ANY(rc.path)
|
||||
)
|
||||
)
|
||||
SELECT DISTINCT entity_id, MIN(depth) AS depth
|
||||
FROM reachable
|
||||
WHERE entity_id <> :start_id
|
||||
GROUP BY entity_id
|
||||
ORDER BY depth;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def find_related_entities(
|
||||
session: Session, start_id: uuid.UUID, max_depth: int = 3
|
||||
) -> list[tuple[uuid.UUID, int]]:
|
||||
"""返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
|
||||
rows = session.execute(
|
||||
_TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
|
||||
).all()
|
||||
return [(r[0], r[1]) for r in rows]
|
||||
@@ -0,0 +1,157 @@
|
||||
"""审计数据中台 ORM 模型。
|
||||
|
||||
涵盖:数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
|
||||
对应需求 R2 / R3,建模决策见 ADR-0002。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import (
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
class DataVersion(Base):
|
||||
"""数据版本登记:每批接入数据的来源/批次/时间/行数,支撑结论可追溯(R3)。"""
|
||||
|
||||
__tablename__ = "data_version"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
source_system: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
row_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
|
||||
)
|
||||
note: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
|
||||
class Entity(Base):
|
||||
"""本体实体节点(知识图谱顶点)。
|
||||
|
||||
business_key 是源系统中的业务主键,用于主数据对齐(同一实体跨系统归一)。
|
||||
"""
|
||||
|
||||
__tablename__ = "entity"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
|
||||
Index("ix_entity_type", "entity_type"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
business_key: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
# 主数据对齐:被归并到的"金主"实体(同一实控人/同一主体)。NULL 表示自身即主实体。
|
||||
canonical_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
|
||||
)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
|
||||
class EntityRelationship(Base):
|
||||
"""知识图谱关系边(有向)。多跳穿透用递归 CTE 遍历本表。"""
|
||||
|
||||
__tablename__ = "entity_relationship"
|
||||
__table_args__ = (
|
||||
Index("ix_rel_source", "source_id"),
|
||||
Index("ix_rel_target", "target_id"),
|
||||
Index("ix_rel_type", "rel_type"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
source_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
target_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
source: Mapped[Entity] = relationship(foreign_keys=[source_id])
|
||||
target: Mapped[Entity] = relationship(foreign_keys=[target_id])
|
||||
|
||||
|
||||
class BitemporalFact(Base):
|
||||
"""双时态事实:实体的某个属性/状态随时间变化的记录。
|
||||
|
||||
- 业务有效期 valid_from/valid_to(应用时间)
|
||||
- 系统记录期 system_from/system_to(事务时间)
|
||||
回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线(见 repository)。
|
||||
"""
|
||||
|
||||
__tablename__ = "bitemporal_fact"
|
||||
__table_args__ = (
|
||||
Index("ix_btf_entity_attr", "entity_id", "attr_name"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
entity_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
system_from: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
|
||||
)
|
||||
system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
|
||||
class MetricEvent(Base):
|
||||
"""时序事件:行为/指标类数据(用户生命周期、回款、话务、佣金、资源使用)。
|
||||
|
||||
部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
|
||||
"""
|
||||
|
||||
__tablename__ = "metric_event"
|
||||
__table_args__ = (
|
||||
Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
|
||||
Index("ix_metric_name_time", "metric_name", "event_time"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
subject_type: Mapped[str] = mapped_column(String(32), nullable=False) # 如 msisdn/channel
|
||||
subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
metric_name: Mapped[str] = mapped_column(String(64), nullable=False) # 如 traffic_mb/commission
|
||||
metric_value: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
@@ -0,0 +1,86 @@
|
||||
"""审计本体(Ontology)定义。
|
||||
|
||||
定义电信内审域的核心实体类型与关系类型,作为知识图谱与主数据对齐的基准。
|
||||
对应需求 R2。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class EntityType(str, Enum):
|
||||
"""审计本体核心实体类型。"""
|
||||
|
||||
CUSTOMER = "customer" # 客户(含政企)
|
||||
CONTRACT = "contract" # 合同
|
||||
MSISDN = "msisdn" # 号码
|
||||
IMEI = "imei" # 终端设备
|
||||
ACCOUNT = "account" # 账户(付款/收款)
|
||||
WORK_ORDER = "work_order" # 工单
|
||||
SUPPLIER = "supplier" # 供应商
|
||||
SETTLEMENT = "settlement" # 结算单
|
||||
EMPLOYEE = "employee" # 员工
|
||||
CHANNEL = "channel" # 渠道/代理商
|
||||
LEGAL_PERSON = "legal_person" # 法人/自然人
|
||||
ADDRESS = "address" # 地址
|
||||
|
||||
|
||||
class RelationshipType(str, Enum):
|
||||
"""审计本体核心关系类型(有向)。"""
|
||||
|
||||
SIGNED = "signed" # 客户 —签约→ 合同
|
||||
PAID_BY = "paid_by" # 合同 —回款账户→ 账户
|
||||
OWNS_ACCOUNT = "owns_account" # 客户/供应商 —拥有→ 账户
|
||||
REGISTERED_AT = "registered_at" # 客户/供应商 —注册地址→ 地址
|
||||
LEGAL_REP_OF = "legal_rep_of" # 法人 —法定代表人→ 客户/供应商
|
||||
RELATED_TO = "related_to" # 法人 —亲属/关联→ 法人
|
||||
HOLDS_MSISDN = "holds_msisdn" # 客户 —持有→ 号码
|
||||
BOUND_DEVICE = "bound_device" # 号码 —绑定→ IMEI
|
||||
BELONGS_TO_CHANNEL = "belongs_to_channel" # 号码/合同 —归属→ 渠道
|
||||
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
|
||||
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
|
||||
SETTLES = "settles" # 结算单 —结算→ 合同
|
||||
|
||||
|
||||
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
|
||||
RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
|
||||
RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
|
||||
RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
|
||||
RelationshipType.OWNS_ACCOUNT: (
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
|
||||
{EntityType.ACCOUNT},
|
||||
),
|
||||
RelationshipType.REGISTERED_AT: (
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER},
|
||||
{EntityType.ADDRESS},
|
||||
),
|
||||
RelationshipType.LEGAL_REP_OF: (
|
||||
{EntityType.LEGAL_PERSON},
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER},
|
||||
),
|
||||
RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
|
||||
RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
|
||||
RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
|
||||
RelationshipType.BELONGS_TO_CHANNEL: (
|
||||
{EntityType.MSISDN, EntityType.CONTRACT},
|
||||
{EntityType.CHANNEL},
|
||||
),
|
||||
RelationshipType.SUPPLIES: (
|
||||
{EntityType.SUPPLIER},
|
||||
{EntityType.CONTRACT, EntityType.WORK_ORDER},
|
||||
),
|
||||
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
|
||||
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
|
||||
}
|
||||
|
||||
|
||||
def is_valid_relationship(
|
||||
rel: RelationshipType, source: EntityType, target: EntityType
|
||||
) -> bool:
|
||||
"""校验一条关系的源/目标实体类型是否符合本体约束。"""
|
||||
domain = RELATIONSHIP_DOMAIN.get(rel)
|
||||
if domain is None:
|
||||
return False
|
||||
sources, targets = domain
|
||||
return source in sources and target in targets
|
||||
@@ -0,0 +1,40 @@
|
||||
"""数据库引擎与会话管理。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""所有 ORM 模型的基类。"""
|
||||
|
||||
|
||||
_engine = None
|
||||
_SessionLocal: sessionmaker[Session] | None = None
|
||||
|
||||
|
||||
def get_engine():
|
||||
global _engine
|
||||
if _engine is None:
|
||||
settings = get_settings()
|
||||
_engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
|
||||
return _engine
|
||||
|
||||
|
||||
def get_sessionmaker() -> sessionmaker[Session]:
|
||||
global _SessionLocal
|
||||
if _SessionLocal is None:
|
||||
_SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
|
||||
return _SessionLocal
|
||||
|
||||
|
||||
def get_session() -> Iterator[Session]:
|
||||
"""FastAPI 依赖注入用的会话生成器。"""
|
||||
sm = get_sessionmaker()
|
||||
with sm() as session:
|
||||
yield session
|
||||
@@ -0,0 +1,10 @@
|
||||
"""LLM Provider 抽象层。
|
||||
|
||||
通过统一接口隔离 LLM 实现,使开发期可用公网千问、生产期无缝切换本地 vLLM。
|
||||
强约束:"数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
|
||||
"""
|
||||
|
||||
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
|
||||
from app.llm.factory import get_llm_provider
|
||||
|
||||
__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
|
||||
@@ -0,0 +1,44 @@
|
||||
"""LLM Provider 抽象接口与数据模型。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
role: str # "system" | "user" | "assistant"
|
||||
content: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
content: str
|
||||
model: str
|
||||
provider: str
|
||||
# 是否经过出域(公网)通道,便于审计轨迹记录
|
||||
egress: bool = False
|
||||
raw: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class LLMProvider(abc.ABC):
|
||||
"""所有 LLM 实现的统一接口。
|
||||
|
||||
业务代码只依赖本接口;切换公网/本地仅改配置,不改调用方。
|
||||
"""
|
||||
|
||||
#: provider 名称
|
||||
name: str = "base"
|
||||
#: 是否走公网(出域)。prod 环境禁止 egress=True 的 provider。
|
||||
egress: bool = False
|
||||
|
||||
@abc.abstractmethod
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
"""同步对话补全。"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def health(self) -> bool:
|
||||
"""探活:provider 是否可用。"""
|
||||
raise NotImplementedError
|
||||
@@ -0,0 +1,31 @@
|
||||
"""LLM Provider 工厂:按配置创建 provider,并执行数据零出域红线校验。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
|
||||
from app.llm.base import LLMProvider
|
||||
from app.llm.providers import DashScopeProvider, VllmProvider
|
||||
|
||||
|
||||
class EgressPolicyError(RuntimeError):
|
||||
"""数据零出域红线违规。"""
|
||||
|
||||
|
||||
def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
|
||||
settings = settings or get_settings()
|
||||
|
||||
# 红线:prod 环境禁止公网 provider
|
||||
if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
|
||||
raise EgressPolicyError(
|
||||
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
|
||||
f"'{settings.llm_provider.value}'。"
|
||||
)
|
||||
|
||||
if settings.llm_provider == LLMProviderName.dashscope:
|
||||
return DashScopeProvider(
|
||||
api_key=settings.dashscope_api_key, model=settings.dashscope_model
|
||||
)
|
||||
if settings.llm_provider == LLMProviderName.vllm:
|
||||
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
|
||||
|
||||
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
|
||||
@@ -0,0 +1,80 @@
|
||||
"""具体 LLM Provider 实现:DashScope(公网千问,仅 dev)、vLLM(本地,prod)。
|
||||
|
||||
两者均走 OpenAI 兼容的 /chat/completions 协议。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
|
||||
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
|
||||
|
||||
|
||||
class DashScopeProvider(LLMProvider):
|
||||
"""公网千问(DashScope,OpenAI 兼容模式)。仅限开发测试,且只允许脱敏/样例假数据。"""
|
||||
|
||||
name = "dashscope"
|
||||
egress = True # 走公网,出域
|
||||
|
||||
_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
|
||||
def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
|
||||
self._api_key = api_key
|
||||
self._model = model
|
||||
self._timeout = timeout
|
||||
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
payload = {
|
||||
"model": self._model,
|
||||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||||
**kwargs,
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self._api_key}"}
|
||||
with httpx.Client(timeout=self._timeout) as client:
|
||||
resp = client.post(
|
||||
f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
return LLMResponse(
|
||||
content=content, model=self._model, provider=self.name, egress=True, raw=data
|
||||
)
|
||||
|
||||
def health(self) -> bool:
|
||||
return bool(self._api_key)
|
||||
|
||||
|
||||
class VllmProvider(LLMProvider):
|
||||
"""本地 vLLM(OpenAI 兼容)。生产使用,数据不出域。"""
|
||||
|
||||
name = "vllm"
|
||||
egress = False
|
||||
|
||||
def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._model = model
|
||||
self._timeout = timeout
|
||||
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
payload = {
|
||||
"model": self._model,
|
||||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||||
**kwargs,
|
||||
}
|
||||
with httpx.Client(timeout=self._timeout) as client:
|
||||
resp = client.post(f"{self._base_url}/chat/completions", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
return LLMResponse(
|
||||
content=content, model=self._model, provider=self.name, egress=False, raw=data
|
||||
)
|
||||
|
||||
def health(self) -> bool:
|
||||
try:
|
||||
with httpx.Client(timeout=5.0) as client:
|
||||
resp = client.get(f"{self._base_url}/models")
|
||||
return resp.status_code == 200
|
||||
except httpx.HTTPError:
|
||||
return False
|
||||
@@ -0,0 +1,45 @@
|
||||
"""AIAudit FastAPI 应用入口。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app import __version__
|
||||
from app.api.datahub import router as datahub_router
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动时执行数据零出域红线校验,违规则阻断启动
|
||||
settings = get_settings()
|
||||
settings.validate_egress_policy()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="AIAudit · 本地 AI 内审平台",
|
||||
version=__version__,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
app.include_router(datahub_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> dict:
|
||||
"""存活探针。"""
|
||||
return {"status": "ok", "version": __version__}
|
||||
|
||||
|
||||
@app.get("/health/config")
|
||||
def health_config() -> dict:
|
||||
"""配置/合规探针:暴露环境与 LLM provider 出域状态(不含密钥)。"""
|
||||
settings = get_settings()
|
||||
return {
|
||||
"env": settings.aiaudit_env.value,
|
||||
"llm_provider": settings.llm_provider.value,
|
||||
"egress_blocked_in_prod": settings.is_prod,
|
||||
}
|
||||
Reference in New Issue
Block a user