Initial commit: InternalAuditInterprise
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
# 运行环境:dev | prod
|
||||
# prod 下禁用一切公网 LLM Provider(数据零出域红线)
|
||||
AIAUDIT_ENV=dev
|
||||
|
||||
# 数据库(本地 PostgreSQL 16 / Postgres.app,无密码)
|
||||
DATABASE_URL=postgresql+psycopg://freedak@localhost:5432/aiaudit
|
||||
|
||||
# Redis / Celery
|
||||
REDIS_URL=redis://localhost:6379/0
|
||||
|
||||
# MinIO
|
||||
MINIO_ENDPOINT=localhost:9000
|
||||
MINIO_ACCESS_KEY=aiaudit
|
||||
MINIO_SECRET_KEY=aiaudit_dev
|
||||
|
||||
# LLM Provider:dashscope(公网,仅 dev)| vllm(本地,prod)
|
||||
LLM_PROVIDER=dashscope
|
||||
# 公网千问(仅开发测试,且只允许脱敏/样例假数据)
|
||||
DASHSCOPE_API_KEY=
|
||||
DASHSCOPE_MODEL=qwen-plus
|
||||
# 本地 vLLM(生产)
|
||||
VLLM_BASE_URL=http://localhost:8001/v1
|
||||
VLLM_MODEL=qwen2.5-72b-instruct
|
||||
@@ -0,0 +1,38 @@
|
||||
[alembic]
|
||||
script_location = migrations
|
||||
prepend_sys_path = .
|
||||
sqlalchemy.url =
|
||||
|
||||
[loggers]
|
||||
keys = root,sqlalchemy,alembic
|
||||
|
||||
[handlers]
|
||||
keys = console
|
||||
|
||||
[formatters]
|
||||
keys = generic
|
||||
|
||||
[logger_root]
|
||||
level = WARNING
|
||||
handlers = console
|
||||
qualname =
|
||||
|
||||
[logger_sqlalchemy]
|
||||
level = WARNING
|
||||
handlers =
|
||||
qualname = sqlalchemy.engine
|
||||
|
||||
[logger_alembic]
|
||||
level = INFO
|
||||
handlers =
|
||||
qualname = alembic
|
||||
|
||||
[handler_console]
|
||||
class = StreamHandler
|
||||
args = (sys.stderr,)
|
||||
level = NOTSET
|
||||
formatter = generic
|
||||
|
||||
[formatter_generic]
|
||||
format = %(levelname)-5.5s [%(name)s] %(message)s
|
||||
datefmt = %H:%M:%S
|
||||
@@ -0,0 +1,3 @@
|
||||
"""AIAudit 后端应用包。"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1 @@
|
||||
"""HTTP API 层。"""
|
||||
@@ -0,0 +1,64 @@
|
||||
"""数据中台统一穿透查询 API(P1.2.5)。
|
||||
|
||||
作为各引擎与审计场景访问知识图谱的共同入口,对上层屏蔽底层是关系表还是图库。
|
||||
对应需求 R2。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.api.schemas import (
|
||||
EntityOut,
|
||||
PenetrateRequest,
|
||||
PenetrateResponse,
|
||||
RelatedEntityOut,
|
||||
)
|
||||
from app.datahub.graph_repo import find_related_entities
|
||||
from app.datahub.models import Entity
|
||||
from app.db import get_session
|
||||
|
||||
router = APIRouter(prefix="/datahub", tags=["datahub"])
|
||||
|
||||
|
||||
@router.get("/entities/{entity_id}", response_model=EntityOut)
|
||||
def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
|
||||
entity = session.get(Entity, entity_id)
|
||||
if entity is None:
|
||||
raise HTTPException(status_code=404, detail="实体不存在")
|
||||
return entity
|
||||
|
||||
|
||||
@router.post("/penetrate", response_model=PenetrateResponse)
|
||||
def penetrate(
|
||||
req: PenetrateRequest, session: Session = Depends(get_session)
|
||||
) -> PenetrateResponse:
|
||||
"""多跳穿透:返回与起点实体连通的关联实体(用于实控人/关联方/马甲识别)。"""
|
||||
start = session.get(Entity, req.start_entity_id)
|
||||
if start is None:
|
||||
raise HTTPException(status_code=404, detail="起点实体不存在")
|
||||
|
||||
related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
|
||||
|
||||
# 批量取出关联实体详情,组装可解释结果
|
||||
id_to_depth = {rid: depth for rid, depth in related_raw}
|
||||
entities = (
|
||||
session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
|
||||
if id_to_depth
|
||||
else []
|
||||
)
|
||||
related = [
|
||||
RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
|
||||
for e in entities
|
||||
]
|
||||
related.sort(key=lambda r: r.depth)
|
||||
|
||||
return PenetrateResponse(
|
||||
start_entity_id=req.start_entity_id,
|
||||
max_depth=req.max_depth,
|
||||
related_count=len(related),
|
||||
related=related,
|
||||
)
|
||||
@@ -0,0 +1,36 @@
|
||||
"""API 数据传输模型(Pydantic)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class EntityOut(BaseModel):
|
||||
id: uuid.UUID
|
||||
entity_type: str
|
||||
business_key: str
|
||||
display_name: str | None = None
|
||||
attributes: dict = Field(default_factory=dict)
|
||||
|
||||
model_config = {"from_attributes": True}
|
||||
|
||||
|
||||
class RelatedEntityOut(BaseModel):
|
||||
"""穿透命中的关联实体,附最短跳数(证据强度的初步指示)。"""
|
||||
|
||||
entity: EntityOut
|
||||
depth: int
|
||||
|
||||
|
||||
class PenetrateRequest(BaseModel):
|
||||
start_entity_id: uuid.UUID
|
||||
max_depth: int = Field(default=3, ge=1, le=6)
|
||||
|
||||
|
||||
class PenetrateResponse(BaseModel):
|
||||
start_entity_id: uuid.UUID
|
||||
max_depth: int
|
||||
related_count: int
|
||||
related: list[RelatedEntityOut]
|
||||
@@ -0,0 +1 @@
|
||||
"""系统自审计模块:不可篡改操作日志、独立性与分权(R19)。"""
|
||||
@@ -0,0 +1,50 @@
|
||||
"""系统自审计 ORM 模型:不可篡改操作日志(R19)。
|
||||
|
||||
每条日志含哈希链(prev_hash + 内容 → entry_hash),任何篡改都会断链,可检测。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import BigInteger, DateTime, Identity, Index, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
def _now() -> dt.datetime:
|
||||
return dt.datetime.now(dt.UTC)
|
||||
|
||||
|
||||
class AuditLog(Base):
|
||||
"""不可篡改审计轨迹。仅追加,不可更新/删除(应用层与制度共同保证)。"""
|
||||
|
||||
__tablename__ = "audit_log"
|
||||
__table_args__ = (
|
||||
Index("ix_audit_actor", "actor"),
|
||||
Index("ix_audit_action", "action"),
|
||||
Index("ix_audit_seq", "seq", unique=True),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
# 自增序号,构成哈希链顺序
|
||||
seq: Mapped[int] = mapped_column(
|
||||
BigInteger, Identity(always=False), nullable=False, unique=True
|
||||
)
|
||||
actor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
role: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
action: Mapped[str] = mapped_column(String(64), nullable=False) # 如 rule.update/clue.assign
|
||||
target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
|
||||
detail: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
@@ -0,0 +1,78 @@
|
||||
"""RBAC 权限与独立性约束(R19、PRD §6 权限矩阵)。
|
||||
|
||||
核心独立性规则(硬约束):
|
||||
- 任何角色都不能删除线索(DELETE_CLUE 不授予任何角色;数据库触发器再兜底)。
|
||||
- 业务方(business)对系统无任何写权限。
|
||||
- 配规则/改阈值/看线索/出报告分权制衡。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import enum
|
||||
|
||||
|
||||
class Role(str, enum.Enum):
|
||||
AUDITOR = "auditor" # 审计员
|
||||
AUDIT_MANAGER = "audit_manager" # 审计主管
|
||||
RULE_ADMIN = "rule_admin" # 规则管理员
|
||||
SYS_ADMIN = "sys_admin" # 系统管理员
|
||||
SYS_AUDITOR = "sys_auditor" # 系统审计员(独立监督)
|
||||
BUSINESS = "business" # 被审计业务方(无写权限)
|
||||
|
||||
|
||||
class Permission(str, enum.Enum):
|
||||
QUERY = "query" # 自然语言查询
|
||||
VIEW_CLUE = "view_clue" # 查看线索
|
||||
ADJUDICATE_CLUE = "adjudicate_clue" # 研判/定性线索
|
||||
ASSIGN_CLUE = "assign_clue" # 分派线索
|
||||
DELETE_CLUE = "delete_clue" # 删除线索(禁止授予任何人)
|
||||
CONFIG_RULE = "config_rule" # 配置规则
|
||||
ADJUST_THRESHOLD = "adjust_threshold" # 调整阈值
|
||||
ISSUE_REPORT = "issue_report" # 出具报告
|
||||
DATA_INGEST = "data_ingest" # 数据接入配置
|
||||
VIEW_AUDIT_TRAIL = "view_audit_trail" # 查看自审计轨迹
|
||||
MODEL_DEPLOY = "model_deploy" # 模型部署/升级
|
||||
|
||||
|
||||
# 角色 -> 权限集合。注意:DELETE_CLUE 不出现在任何角色中(线索不可删,R19)。
|
||||
ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
|
||||
Role.AUDITOR: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.ADJUDICATE_CLUE,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.AUDIT_MANAGER: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.ADJUDICATE_CLUE,
|
||||
Permission.ASSIGN_CLUE,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.RULE_ADMIN: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.CONFIG_RULE,
|
||||
Permission.ADJUST_THRESHOLD,
|
||||
},
|
||||
Role.SYS_ADMIN: {
|
||||
Permission.DATA_INGEST,
|
||||
Permission.MODEL_DEPLOY,
|
||||
},
|
||||
Role.SYS_AUDITOR: {
|
||||
Permission.QUERY,
|
||||
Permission.VIEW_CLUE,
|
||||
Permission.VIEW_AUDIT_TRAIL,
|
||||
Permission.ISSUE_REPORT,
|
||||
},
|
||||
Role.BUSINESS: set(), # 业务方无任何权限
|
||||
}
|
||||
|
||||
|
||||
def has_permission(role: Role, perm: Permission) -> bool:
|
||||
return perm in ROLE_PERMISSIONS.get(role, set())
|
||||
|
||||
|
||||
def can_delete_clue(role: Role) -> bool:
|
||||
"""线索不可删除——对所有角色恒为 False(独立性硬约束)。"""
|
||||
return False
|
||||
@@ -0,0 +1,81 @@
|
||||
"""系统自审计服务:写入哈希链审计日志、校验完整性(R19)。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.audit.models import AuditLog
|
||||
|
||||
|
||||
def _compute_hash(prev_hash: str | None, payload: dict) -> str:
|
||||
body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
|
||||
raw = f"{prev_hash or ''}|{body}"
|
||||
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def record(
|
||||
session: Session,
|
||||
actor: str,
|
||||
action: str,
|
||||
*,
|
||||
role: str | None = None,
|
||||
target_type: str | None = None,
|
||||
target_id: str | None = None,
|
||||
detail: dict | None = None,
|
||||
) -> AuditLog:
|
||||
"""追加一条审计日志,自动接续哈希链。"""
|
||||
last = session.execute(
|
||||
select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
|
||||
).scalar_one_or_none()
|
||||
prev_hash = last.entry_hash if last else None
|
||||
|
||||
payload = {
|
||||
"actor": actor,
|
||||
"role": role,
|
||||
"action": action,
|
||||
"target_type": target_type,
|
||||
"target_id": target_id,
|
||||
"detail": detail or {},
|
||||
}
|
||||
entry_hash = _compute_hash(prev_hash, payload)
|
||||
|
||||
log = AuditLog(
|
||||
actor=actor,
|
||||
role=role,
|
||||
action=action,
|
||||
target_type=target_type,
|
||||
target_id=target_id,
|
||||
detail=detail or {},
|
||||
prev_hash=prev_hash,
|
||||
entry_hash=entry_hash,
|
||||
)
|
||||
session.add(log)
|
||||
session.flush()
|
||||
return log
|
||||
|
||||
|
||||
def verify_chain(session: Session) -> tuple[bool, int | None]:
|
||||
"""校验审计日志哈希链完整性。
|
||||
|
||||
返回 (是否完整, 首个断链的 seq 或 None)。
|
||||
"""
|
||||
rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
|
||||
prev_hash: str | None = None
|
||||
for row in rows:
|
||||
payload = {
|
||||
"actor": row.actor,
|
||||
"role": row.role,
|
||||
"action": row.action,
|
||||
"target_type": row.target_type,
|
||||
"target_id": row.target_id,
|
||||
"detail": row.detail or {},
|
||||
}
|
||||
expected = _compute_hash(prev_hash, payload)
|
||||
if expected != row.entry_hash or row.prev_hash != prev_hash:
|
||||
return False, row.seq
|
||||
prev_hash = row.entry_hash
|
||||
return True, None
|
||||
@@ -0,0 +1 @@
|
||||
"""线索引擎模块:线索模型、生成、置信度分级、状态流转(人机闭环)。"""
|
||||
@@ -0,0 +1,136 @@
|
||||
"""线索 ORM 模型。
|
||||
|
||||
对应需求 R7(线索+证据链+解释)、R17(闭环状态)、R18(置信度分级)、R19(线索不可删)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import enum
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _enum_values(enum_cls):
|
||||
"""让 SQLAlchemy 使用枚举的 value(小写)写入 PG 原生 enum,而非 name。"""
|
||||
return [m.value for m in enum_cls]
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
def _now() -> dt.datetime:
|
||||
return dt.datetime.now(dt.UTC)
|
||||
|
||||
|
||||
class ConfidenceTier(str, enum.Enum):
|
||||
"""置信度三级分流(R18)。"""
|
||||
|
||||
HIGH = "high" # 高置信:直接推送处置
|
||||
MEDIUM = "medium" # 中置信:人工复核
|
||||
LOW = "low" # 低置信:归档备查
|
||||
|
||||
|
||||
class ClueStatus(str, enum.Enum):
|
||||
"""线索闭环状态机(R17)。"""
|
||||
|
||||
NEW = "new" # 新生成
|
||||
ASSIGNED = "assigned" # 已分派
|
||||
REVIEWING = "reviewing" # 研判中
|
||||
CONFIRMED = "confirmed" # 已定性属实
|
||||
DISMISSED = "dismissed" # 已定性误报
|
||||
RECTIFYING = "rectifying" # 整改中
|
||||
TRANSFERRED = "transferred" # 已移交
|
||||
CLOSED = "closed" # 已销项闭环
|
||||
|
||||
|
||||
class Clue(Base):
|
||||
"""审计线索。线索一经生成不可物理删除(R19),失效通过状态表达。"""
|
||||
|
||||
__tablename__ = "clue"
|
||||
__table_args__ = (
|
||||
Index("ix_clue_status", "status"),
|
||||
Index("ix_clue_scenario", "scenario_code"),
|
||||
Index("ix_clue_assignee", "assignee"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
title: Mapped[str] = mapped_column(String(256), nullable=False)
|
||||
risk_domain: Mapped[str] = mapped_column(String(32), nullable=False) # 收入/成本/采购/资金/合规
|
||||
scenario_code: Mapped[str] = mapped_column(String(32), nullable=False) # 如 R8/R9
|
||||
confidence: Mapped[ConfidenceTier] = mapped_column(
|
||||
Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
|
||||
nullable=False,
|
||||
)
|
||||
score: Mapped[float] = mapped_column(Float, default=0.0) # 0-1 风险评分
|
||||
status: Mapped[ClueStatus] = mapped_column(
|
||||
Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
|
||||
default=ClueStatus.NEW,
|
||||
nullable=False,
|
||||
)
|
||||
# 人话解释(判定理由)与证据链
|
||||
rationale: Mapped[str] = mapped_column(Text, default="")
|
||||
evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
# 涉及的主体(金额、实体 id 列表等)
|
||||
subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
|
||||
|
||||
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
# 误报/属实反馈(R18 反馈学习)
|
||||
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
|
||||
|
||||
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
|
||||
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
|
||||
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
updated_at: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_now, onupdate=_now
|
||||
)
|
||||
|
||||
history: Mapped[list[ClueStatusHistory]] = relationship(
|
||||
back_populates="clue", cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
|
||||
class ClueStatusHistory(Base):
|
||||
"""线索状态流转留痕(R17/R19)。"""
|
||||
|
||||
__tablename__ = "clue_status_history"
|
||||
__table_args__ = (Index("ix_csh_clue", "clue_id"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
clue_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
|
||||
)
|
||||
from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
|
||||
to_status: Mapped[str] = mapped_column(String(16), nullable=False)
|
||||
actor: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
note: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
|
||||
clue: Mapped[Clue] = relationship(back_populates="history")
|
||||
|
||||
|
||||
class WorkingPaper(Base):
|
||||
"""审计底稿(R17):研判完成自动生成,可追溯。"""
|
||||
|
||||
__tablename__ = "working_paper"
|
||||
__table_args__ = (Index("ix_wp_clue", "clue_id"),)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
clue_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
|
||||
)
|
||||
content: Mapped[str] = mapped_column(Text, default="")
|
||||
conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
|
||||
author: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
snapshot: Mapped[dict] = mapped_column(JSONB, default=dict) # 证据/版本快照
|
||||
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
|
||||
@@ -0,0 +1,195 @@
|
||||
"""线索服务:生成、置信度分级、状态流转、底稿生成、反馈。
|
||||
|
||||
对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志(线索不可删)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.audit import service as audit
|
||||
from app.clues.models import (
|
||||
Clue,
|
||||
ClueStatus,
|
||||
ClueStatusHistory,
|
||||
ConfidenceTier,
|
||||
WorkingPaper,
|
||||
)
|
||||
|
||||
# 允许的状态流转(R17 闭环)
|
||||
_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
|
||||
ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
|
||||
ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
|
||||
ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
|
||||
ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
|
||||
ClueStatus.DISMISSED: {ClueStatus.CLOSED},
|
||||
ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
|
||||
ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
|
||||
ClueStatus.CLOSED: set(),
|
||||
}
|
||||
|
||||
|
||||
class IllegalTransitionError(ValueError):
|
||||
"""非法的线索状态流转。"""
|
||||
|
||||
|
||||
def score_to_tier(score: float) -> ConfidenceTier:
|
||||
"""风险评分映射到置信度三级(R18)。"""
|
||||
if score >= 0.8:
|
||||
return ConfidenceTier.HIGH
|
||||
if score >= 0.5:
|
||||
return ConfidenceTier.MEDIUM
|
||||
return ConfidenceTier.LOW
|
||||
|
||||
|
||||
def create_clue(
|
||||
session: Session,
|
||||
*,
|
||||
title: str,
|
||||
risk_domain: str,
|
||||
scenario_code: str,
|
||||
score: float,
|
||||
rationale: str,
|
||||
evidence: dict,
|
||||
subjects: dict | None = None,
|
||||
amount_involved: float | None = None,
|
||||
model_version: str | None = None,
|
||||
rule_version: str | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
actor: str = "system",
|
||||
) -> Clue:
|
||||
"""生成一条线索,自动按评分分级,并记录创建留痕。"""
|
||||
clue = Clue(
|
||||
title=title,
|
||||
risk_domain=risk_domain,
|
||||
scenario_code=scenario_code,
|
||||
confidence=score_to_tier(score),
|
||||
score=score,
|
||||
status=ClueStatus.NEW,
|
||||
rationale=rationale,
|
||||
evidence=evidence,
|
||||
subjects=subjects or {},
|
||||
amount_involved=amount_involved,
|
||||
model_version=model_version,
|
||||
rule_version=rule_version,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(clue)
|
||||
session.flush()
|
||||
_add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
|
||||
audit.record(
|
||||
session, actor, "create_clue",
|
||||
target_type="clue", target_id=str(clue.id),
|
||||
detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
|
||||
)
|
||||
return clue
|
||||
|
||||
|
||||
def _add_history(
|
||||
session: Session,
|
||||
clue: Clue,
|
||||
from_status: ClueStatus | None,
|
||||
to_status: ClueStatus,
|
||||
actor: str,
|
||||
note: str | None,
|
||||
) -> None:
|
||||
session.add(
|
||||
ClueStatusHistory(
|
||||
clue_id=clue.id,
|
||||
from_status=from_status.value if from_status else None,
|
||||
to_status=to_status.value,
|
||||
actor=actor,
|
||||
note=note,
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
|
||||
def transition(
|
||||
session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
|
||||
) -> Clue:
|
||||
"""执行状态流转,校验合法性并留痕。"""
|
||||
if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
|
||||
raise IllegalTransitionError(
|
||||
f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
|
||||
)
|
||||
from_status = clue.status
|
||||
clue.status = to_status
|
||||
session.flush()
|
||||
_add_history(session, clue, from_status, to_status, actor, note)
|
||||
audit.record(
|
||||
session, actor, "transition_clue",
|
||||
target_type="clue", target_id=str(clue.id),
|
||||
detail={"from": from_status.value, "to": to_status.value, "note": note},
|
||||
)
|
||||
return clue
|
||||
|
||||
|
||||
def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
|
||||
clue.assignee = assignee
|
||||
session.flush()
|
||||
if clue.status == ClueStatus.NEW:
|
||||
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
|
||||
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
|
||||
return clue
|
||||
|
||||
|
||||
def adjudicate(
|
||||
session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
|
||||
) -> WorkingPaper:
|
||||
"""研判定性:确认属实或误报,自动生成审计底稿并记录反馈(R17/R18)。"""
|
||||
if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
|
||||
# 允许从 NEW/ASSIGNED 直接进入研判
|
||||
pass
|
||||
if clue.status != ClueStatus.REVIEWING:
|
||||
# 先进入研判中
|
||||
target = ClueStatus.REVIEWING
|
||||
if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
|
||||
transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
|
||||
|
||||
to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
|
||||
transition(session, clue, to, actor, note)
|
||||
clue.feedback = "confirmed" if confirmed else "false_positive"
|
||||
session.flush()
|
||||
|
||||
paper = WorkingPaper(
|
||||
clue_id=clue.id,
|
||||
content=note or "",
|
||||
conclusion=to.value,
|
||||
author=actor,
|
||||
snapshot={
|
||||
"evidence": clue.evidence,
|
||||
"rationale": clue.rationale,
|
||||
"score": clue.score,
|
||||
"model_version": clue.model_version,
|
||||
"rule_version": clue.rule_version,
|
||||
"data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
|
||||
},
|
||||
)
|
||||
session.add(paper)
|
||||
session.flush()
|
||||
audit.record(
|
||||
session, actor, "create_working_paper",
|
||||
target_type="working_paper", target_id=str(paper.id),
|
||||
detail={"clue_id": str(clue.id), "conclusion": to.value},
|
||||
)
|
||||
return paper
|
||||
|
||||
|
||||
def list_clues(
|
||||
session: Session,
|
||||
*,
|
||||
status: ClueStatus | None = None,
|
||||
scenario_code: str | None = None,
|
||||
confidence: ConfidenceTier | None = None,
|
||||
) -> list[Clue]:
|
||||
q = session.query(Clue)
|
||||
if status:
|
||||
q = q.filter(Clue.status == status)
|
||||
if scenario_code:
|
||||
q = q.filter(Clue.scenario_code == scenario_code)
|
||||
if confidence:
|
||||
q = q.filter(Clue.confidence == confidence)
|
||||
return q.order_by(Clue.score.desc()).all()
|
||||
@@ -0,0 +1,70 @@
|
||||
"""应用配置。
|
||||
|
||||
通过环境变量加载,区分 dev / prod 运行环境。
|
||||
prod 环境强制执行"数据零出域"红线:禁用任何公网 LLM Provider。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class AppEnv(str, Enum):
|
||||
dev = "dev"
|
||||
prod = "prod"
|
||||
|
||||
|
||||
class LLMProviderName(str, Enum):
|
||||
dashscope = "dashscope" # 公网千问,仅 dev
|
||||
vllm = "vllm" # 本地,prod
|
||||
|
||||
|
||||
# 被认定为"公网/出域"的 Provider,prod 下禁止使用
|
||||
EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="",
|
||||
env_file=".env",
|
||||
extra="ignore",
|
||||
case_sensitive=False,
|
||||
)
|
||||
|
||||
aiaudit_env: AppEnv = AppEnv.dev
|
||||
|
||||
database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
|
||||
redis_url: str = "redis://localhost:6379/0"
|
||||
|
||||
llm_provider: LLMProviderName = LLMProviderName.dashscope
|
||||
dashscope_api_key: str = ""
|
||||
dashscope_model: str = "qwen-plus"
|
||||
vllm_base_url: str = "http://localhost:8001/v1"
|
||||
vllm_model: str = "qwen2.5-72b-instruct"
|
||||
|
||||
@property
|
||||
def is_prod(self) -> bool:
|
||||
return self.aiaudit_env == AppEnv.prod
|
||||
|
||||
def validate_egress_policy(self) -> None:
|
||||
"""数据零出域红线校验:prod 环境禁用公网 Provider。
|
||||
|
||||
在应用启动时调用;违反则抛出异常阻断启动。
|
||||
"""
|
||||
if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
|
||||
raise RuntimeError(
|
||||
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
|
||||
f"'{self.llm_provider.value}'。请改用本地 Provider(如 vllm)。"
|
||||
)
|
||||
|
||||
|
||||
_settings: Settings | None = None
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
global _settings
|
||||
if _settings is None:
|
||||
_settings = Settings()
|
||||
return _settings
|
||||
@@ -0,0 +1 @@
|
||||
"""审计数据中台模块:本体/知识图谱、双时态、时序、数据版本。"""
|
||||
@@ -0,0 +1,83 @@
|
||||
"""双时态事实仓储:写入与"按历史时点回放"查询。
|
||||
|
||||
对应需求 R3 / ADR-0002:
|
||||
- 业务有效期 valid_from/valid_to(应用时间)
|
||||
- 系统记录期 system_from/system_to(事务时间)
|
||||
回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import or_
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.models import BitemporalFact
|
||||
|
||||
|
||||
def record_fact(
|
||||
session: Session,
|
||||
entity_id: uuid.UUID,
|
||||
attr_name: str,
|
||||
attr_value: dict,
|
||||
valid_from: dt.datetime,
|
||||
valid_to: dt.datetime | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> BitemporalFact:
|
||||
"""记录一条双时态事实(system_from 自动取当前事务时间)。"""
|
||||
fact = BitemporalFact(
|
||||
entity_id=entity_id,
|
||||
attr_name=attr_name,
|
||||
attr_value=attr_value,
|
||||
valid_from=valid_from,
|
||||
valid_to=valid_to,
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(fact)
|
||||
session.flush()
|
||||
return fact
|
||||
|
||||
|
||||
def as_of(
|
||||
session: Session,
|
||||
entity_id: uuid.UUID,
|
||||
attr_name: str,
|
||||
as_of_valid: dt.datetime,
|
||||
as_of_system: dt.datetime | None = None,
|
||||
) -> BitemporalFact | None:
|
||||
"""回放:返回在给定业务时点且按给定系统时点可见的事实。
|
||||
|
||||
- 业务时间线:valid_from <= as_of_valid < valid_to(或为空表示至今)
|
||||
- 系统时间线:system_from <= as_of_system < system_to(或为空表示当前可见)
|
||||
"""
|
||||
as_of_system = as_of_system or dt.datetime.now(dt.UTC)
|
||||
|
||||
q = (
|
||||
session.query(BitemporalFact)
|
||||
.filter(BitemporalFact.entity_id == entity_id)
|
||||
.filter(BitemporalFact.attr_name == attr_name)
|
||||
.filter(BitemporalFact.valid_from <= as_of_valid)
|
||||
.filter(
|
||||
or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
|
||||
)
|
||||
.filter(BitemporalFact.system_from <= as_of_system)
|
||||
.filter(
|
||||
or_(
|
||||
BitemporalFact.system_to.is_(None),
|
||||
BitemporalFact.system_to > as_of_system,
|
||||
)
|
||||
)
|
||||
.order_by(BitemporalFact.system_from.desc())
|
||||
)
|
||||
return q.first()
|
||||
|
||||
|
||||
def close_fact(
|
||||
session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
|
||||
) -> None:
|
||||
"""逻辑关闭一条事实的系统可见期(用于更正/失效,而非物理删除)。"""
|
||||
fact.system_to = system_to or dt.datetime.now(dt.UTC)
|
||||
session.add(fact)
|
||||
session.flush()
|
||||
@@ -0,0 +1,58 @@
|
||||
"""数据中台 schema 初始化。
|
||||
|
||||
MVP 阶段以 SQLAlchemy metadata 建表(后续可迁移到 Alembic)。
|
||||
扩展按可用性可选启用:
|
||||
- btree_gist / vector:若可用则创建。
|
||||
- timescaledb:若可用则把 metric_event 转为超表;不可用则保持普通表(带时间索引)。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Engine
|
||||
|
||||
from app.datahub import models # noqa: F401 确保模型注册到 metadata
|
||||
from app.db import Base, get_engine
|
||||
|
||||
|
||||
def _extension_available(engine: Engine, name: str) -> bool:
|
||||
with engine.connect() as conn:
|
||||
row = conn.execute(
|
||||
text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
|
||||
).first()
|
||||
return row is not None
|
||||
|
||||
|
||||
def init_extensions(engine: Engine) -> dict[str, bool]:
|
||||
"""按可用性创建扩展,返回各扩展启用状态。"""
|
||||
status: dict[str, bool] = {}
|
||||
for ext in ("btree_gist", "vector", "timescaledb"):
|
||||
available = _extension_available(engine, ext)
|
||||
status[ext] = available
|
||||
if available:
|
||||
with engine.begin() as conn:
|
||||
conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
|
||||
return status
|
||||
|
||||
|
||||
def create_schema(engine: Engine | None = None) -> dict[str, bool]:
|
||||
"""创建数据中台全部表,并按需启用时序超表。返回扩展状态。"""
|
||||
engine = engine or get_engine()
|
||||
status = init_extensions(engine)
|
||||
Base.metadata.create_all(engine)
|
||||
|
||||
# 若 TimescaleDB 可用,将时序事件表转为超表(幂等)
|
||||
if status.get("timescaledb"):
|
||||
with engine.begin() as conn:
|
||||
conn.execute(
|
||||
text(
|
||||
"SELECT create_hypertable('metric_event', 'event_time', "
|
||||
"if_not_exists => TRUE, migrate_data => TRUE)"
|
||||
)
|
||||
)
|
||||
return status
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
st = create_schema()
|
||||
print("数据中台 schema 初始化完成。扩展状态:", st)
|
||||
@@ -0,0 +1,118 @@
|
||||
"""知识图谱仓储:实体/关系写入与多跳穿透(递归 CTE)。
|
||||
|
||||
对应需求 R2:支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
|
||||
统一穿透查询服务(P1.2.5)在此之上封装对外 API,对上层屏蔽底层是关系表还是图库。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.datahub.models import Entity, EntityRelationship
|
||||
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
|
||||
|
||||
|
||||
class OntologyViolationError(ValueError):
|
||||
"""关系不符合本体约束。"""
|
||||
|
||||
|
||||
def upsert_entity(
|
||||
session: Session,
|
||||
entity_type: EntityType,
|
||||
business_key: str,
|
||||
display_name: str | None = None,
|
||||
attributes: dict | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> Entity:
|
||||
"""按 (类型, 业务主键) 幂等写入实体(主数据对齐的归一锚点)。"""
|
||||
existing = (
|
||||
session.query(Entity)
|
||||
.filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
|
||||
.one_or_none()
|
||||
)
|
||||
if existing is not None:
|
||||
if display_name is not None:
|
||||
existing.display_name = display_name
|
||||
if attributes:
|
||||
existing.attributes = {**(existing.attributes or {}), **attributes}
|
||||
return existing
|
||||
|
||||
entity = Entity(
|
||||
entity_type=entity_type.value,
|
||||
business_key=business_key,
|
||||
display_name=display_name,
|
||||
attributes=attributes or {},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(entity)
|
||||
session.flush()
|
||||
return entity
|
||||
|
||||
|
||||
def add_relationship(
|
||||
session: Session,
|
||||
rel_type: RelationshipType,
|
||||
source: Entity,
|
||||
target: Entity,
|
||||
attributes: dict | None = None,
|
||||
data_version_id: uuid.UUID | None = None,
|
||||
) -> EntityRelationship:
|
||||
"""新增一条关系边,写入前校验本体约束。"""
|
||||
src_type = EntityType(source.entity_type)
|
||||
tgt_type = EntityType(target.entity_type)
|
||||
if not is_valid_relationship(rel_type, src_type, tgt_type):
|
||||
raise OntologyViolationError(
|
||||
f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
|
||||
)
|
||||
rel = EntityRelationship(
|
||||
rel_type=rel_type.value,
|
||||
source_id=source.id,
|
||||
target_id=target.id,
|
||||
attributes=attributes or {},
|
||||
data_version_id=data_version_id,
|
||||
)
|
||||
session.add(rel)
|
||||
session.flush()
|
||||
return rel
|
||||
|
||||
|
||||
# 多跳穿透:以无向方式遍历关系边,返回与起点在 max_depth 跳内连通的实体集合。
|
||||
# 用于"疑似同一实控人/关联方网络"识别。
|
||||
_TRAVERSE_SQL = text(
|
||||
"""
|
||||
WITH RECURSIVE reachable(entity_id, depth, path) AS (
|
||||
SELECT :start_id, 0, ARRAY[:start_id]
|
||||
UNION ALL
|
||||
SELECT
|
||||
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
|
||||
rc.depth + 1,
|
||||
rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
|
||||
FROM reachable rc
|
||||
JOIN entity_relationship r
|
||||
ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
|
||||
WHERE rc.depth < :max_depth
|
||||
AND NOT (
|
||||
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
|
||||
= ANY(rc.path)
|
||||
)
|
||||
)
|
||||
SELECT DISTINCT entity_id, MIN(depth) AS depth
|
||||
FROM reachable
|
||||
WHERE entity_id <> :start_id
|
||||
GROUP BY entity_id
|
||||
ORDER BY depth;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def find_related_entities(
|
||||
session: Session, start_id: uuid.UUID, max_depth: int = 3
|
||||
) -> list[tuple[uuid.UUID, int]]:
|
||||
"""返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
|
||||
rows = session.execute(
|
||||
_TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
|
||||
).all()
|
||||
return [(r[0], r[1]) for r in rows]
|
||||
@@ -0,0 +1,157 @@
|
||||
"""审计数据中台 ORM 模型。
|
||||
|
||||
涵盖:数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
|
||||
对应需求 R2 / R3,建模决策见 ADR-0002。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
import uuid
|
||||
|
||||
from sqlalchemy import (
|
||||
DateTime,
|
||||
Float,
|
||||
ForeignKey,
|
||||
Index,
|
||||
Integer,
|
||||
String,
|
||||
Text,
|
||||
UniqueConstraint,
|
||||
)
|
||||
from sqlalchemy.dialects.postgresql import JSONB, UUID
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from app.db import Base
|
||||
|
||||
|
||||
def _uuid() -> uuid.UUID:
|
||||
return uuid.uuid4()
|
||||
|
||||
|
||||
class DataVersion(Base):
|
||||
"""数据版本登记:每批接入数据的来源/批次/时间/行数,支撑结论可追溯(R3)。"""
|
||||
|
||||
__tablename__ = "data_version"
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
source_system: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
row_count: Mapped[int] = mapped_column(Integer, default=0)
|
||||
ingested_at: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
|
||||
)
|
||||
note: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
|
||||
|
||||
class Entity(Base):
|
||||
"""本体实体节点(知识图谱顶点)。
|
||||
|
||||
business_key 是源系统中的业务主键,用于主数据对齐(同一实体跨系统归一)。
|
||||
"""
|
||||
|
||||
__tablename__ = "entity"
|
||||
__table_args__ = (
|
||||
UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
|
||||
Index("ix_entity_type", "entity_type"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
business_key: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
# 主数据对齐:被归并到的"金主"实体(同一实控人/同一主体)。NULL 表示自身即主实体。
|
||||
canonical_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
|
||||
)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
|
||||
class EntityRelationship(Base):
|
||||
"""知识图谱关系边(有向)。多跳穿透用递归 CTE 遍历本表。"""
|
||||
|
||||
__tablename__ = "entity_relationship"
|
||||
__table_args__ = (
|
||||
Index("ix_rel_source", "source_id"),
|
||||
Index("ix_rel_target", "target_id"),
|
||||
Index("ix_rel_type", "rel_type"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
|
||||
source_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
target_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
source: Mapped[Entity] = relationship(foreign_keys=[source_id])
|
||||
target: Mapped[Entity] = relationship(foreign_keys=[target_id])
|
||||
|
||||
|
||||
class BitemporalFact(Base):
|
||||
"""双时态事实:实体的某个属性/状态随时间变化的记录。
|
||||
|
||||
- 业务有效期 valid_from/valid_to(应用时间)
|
||||
- 系统记录期 system_from/system_to(事务时间)
|
||||
回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线(见 repository)。
|
||||
"""
|
||||
|
||||
__tablename__ = "bitemporal_fact"
|
||||
__table_args__ = (
|
||||
Index("ix_btf_entity_attr", "entity_id", "attr_name"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
entity_id: Mapped[uuid.UUID] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
|
||||
)
|
||||
attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
|
||||
attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
system_from: Mapped[dt.datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
|
||||
)
|
||||
system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
|
||||
|
||||
class MetricEvent(Base):
|
||||
"""时序事件:行为/指标类数据(用户生命周期、回款、话务、佣金、资源使用)。
|
||||
|
||||
部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
|
||||
"""
|
||||
|
||||
__tablename__ = "metric_event"
|
||||
__table_args__ = (
|
||||
Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
|
||||
Index("ix_metric_name_time", "metric_name", "event_time"),
|
||||
)
|
||||
|
||||
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
|
||||
event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
||||
subject_type: Mapped[str] = mapped_column(String(32), nullable=False) # 如 msisdn/channel
|
||||
subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
|
||||
metric_name: Mapped[str] = mapped_column(String(64), nullable=False) # 如 traffic_mb/commission
|
||||
metric_value: Mapped[float] = mapped_column(Float, default=0.0)
|
||||
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
|
||||
|
||||
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
|
||||
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
|
||||
)
|
||||
@@ -0,0 +1,86 @@
|
||||
"""审计本体(Ontology)定义。
|
||||
|
||||
定义电信内审域的核心实体类型与关系类型,作为知识图谱与主数据对齐的基准。
|
||||
对应需求 R2。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from enum import Enum
|
||||
|
||||
|
||||
class EntityType(str, Enum):
|
||||
"""审计本体核心实体类型。"""
|
||||
|
||||
CUSTOMER = "customer" # 客户(含政企)
|
||||
CONTRACT = "contract" # 合同
|
||||
MSISDN = "msisdn" # 号码
|
||||
IMEI = "imei" # 终端设备
|
||||
ACCOUNT = "account" # 账户(付款/收款)
|
||||
WORK_ORDER = "work_order" # 工单
|
||||
SUPPLIER = "supplier" # 供应商
|
||||
SETTLEMENT = "settlement" # 结算单
|
||||
EMPLOYEE = "employee" # 员工
|
||||
CHANNEL = "channel" # 渠道/代理商
|
||||
LEGAL_PERSON = "legal_person" # 法人/自然人
|
||||
ADDRESS = "address" # 地址
|
||||
|
||||
|
||||
class RelationshipType(str, Enum):
|
||||
"""审计本体核心关系类型(有向)。"""
|
||||
|
||||
SIGNED = "signed" # 客户 —签约→ 合同
|
||||
PAID_BY = "paid_by" # 合同 —回款账户→ 账户
|
||||
OWNS_ACCOUNT = "owns_account" # 客户/供应商 —拥有→ 账户
|
||||
REGISTERED_AT = "registered_at" # 客户/供应商 —注册地址→ 地址
|
||||
LEGAL_REP_OF = "legal_rep_of" # 法人 —法定代表人→ 客户/供应商
|
||||
RELATED_TO = "related_to" # 法人 —亲属/关联→ 法人
|
||||
HOLDS_MSISDN = "holds_msisdn" # 客户 —持有→ 号码
|
||||
BOUND_DEVICE = "bound_device" # 号码 —绑定→ IMEI
|
||||
BELONGS_TO_CHANNEL = "belongs_to_channel" # 号码/合同 —归属→ 渠道
|
||||
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
|
||||
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
|
||||
SETTLES = "settles" # 结算单 —结算→ 合同
|
||||
|
||||
|
||||
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
|
||||
RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
|
||||
RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
|
||||
RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
|
||||
RelationshipType.OWNS_ACCOUNT: (
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
|
||||
{EntityType.ACCOUNT},
|
||||
),
|
||||
RelationshipType.REGISTERED_AT: (
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER},
|
||||
{EntityType.ADDRESS},
|
||||
),
|
||||
RelationshipType.LEGAL_REP_OF: (
|
||||
{EntityType.LEGAL_PERSON},
|
||||
{EntityType.CUSTOMER, EntityType.SUPPLIER},
|
||||
),
|
||||
RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
|
||||
RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
|
||||
RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
|
||||
RelationshipType.BELONGS_TO_CHANNEL: (
|
||||
{EntityType.MSISDN, EntityType.CONTRACT},
|
||||
{EntityType.CHANNEL},
|
||||
),
|
||||
RelationshipType.SUPPLIES: (
|
||||
{EntityType.SUPPLIER},
|
||||
{EntityType.CONTRACT, EntityType.WORK_ORDER},
|
||||
),
|
||||
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
|
||||
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
|
||||
}
|
||||
|
||||
|
||||
def is_valid_relationship(
|
||||
rel: RelationshipType, source: EntityType, target: EntityType
|
||||
) -> bool:
|
||||
"""校验一条关系的源/目标实体类型是否符合本体约束。"""
|
||||
domain = RELATIONSHIP_DOMAIN.get(rel)
|
||||
if domain is None:
|
||||
return False
|
||||
sources, targets = domain
|
||||
return source in sources and target in targets
|
||||
@@ -0,0 +1,40 @@
|
||||
"""数据库引擎与会话管理。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Iterator
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
|
||||
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""所有 ORM 模型的基类。"""
|
||||
|
||||
|
||||
_engine = None
|
||||
_SessionLocal: sessionmaker[Session] | None = None
|
||||
|
||||
|
||||
def get_engine():
|
||||
global _engine
|
||||
if _engine is None:
|
||||
settings = get_settings()
|
||||
_engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
|
||||
return _engine
|
||||
|
||||
|
||||
def get_sessionmaker() -> sessionmaker[Session]:
|
||||
global _SessionLocal
|
||||
if _SessionLocal is None:
|
||||
_SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
|
||||
return _SessionLocal
|
||||
|
||||
|
||||
def get_session() -> Iterator[Session]:
|
||||
"""FastAPI 依赖注入用的会话生成器。"""
|
||||
sm = get_sessionmaker()
|
||||
with sm() as session:
|
||||
yield session
|
||||
@@ -0,0 +1,10 @@
|
||||
"""LLM Provider 抽象层。
|
||||
|
||||
通过统一接口隔离 LLM 实现,使开发期可用公网千问、生产期无缝切换本地 vLLM。
|
||||
强约束:"数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
|
||||
"""
|
||||
|
||||
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
|
||||
from app.llm.factory import get_llm_provider
|
||||
|
||||
__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
|
||||
@@ -0,0 +1,44 @@
|
||||
"""LLM Provider 抽象接口与数据模型。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import abc
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChatMessage:
|
||||
role: str # "system" | "user" | "assistant"
|
||||
content: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMResponse:
|
||||
content: str
|
||||
model: str
|
||||
provider: str
|
||||
# 是否经过出域(公网)通道,便于审计轨迹记录
|
||||
egress: bool = False
|
||||
raw: dict = field(default_factory=dict)
|
||||
|
||||
|
||||
class LLMProvider(abc.ABC):
|
||||
"""所有 LLM 实现的统一接口。
|
||||
|
||||
业务代码只依赖本接口;切换公网/本地仅改配置,不改调用方。
|
||||
"""
|
||||
|
||||
#: provider 名称
|
||||
name: str = "base"
|
||||
#: 是否走公网(出域)。prod 环境禁止 egress=True 的 provider。
|
||||
egress: bool = False
|
||||
|
||||
@abc.abstractmethod
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
"""同步对话补全。"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abc.abstractmethod
|
||||
def health(self) -> bool:
|
||||
"""探活:provider 是否可用。"""
|
||||
raise NotImplementedError
|
||||
@@ -0,0 +1,31 @@
|
||||
"""LLM Provider 工厂:按配置创建 provider,并执行数据零出域红线校验。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
|
||||
from app.llm.base import LLMProvider
|
||||
from app.llm.providers import DashScopeProvider, VllmProvider
|
||||
|
||||
|
||||
class EgressPolicyError(RuntimeError):
|
||||
"""数据零出域红线违规。"""
|
||||
|
||||
|
||||
def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
|
||||
settings = settings or get_settings()
|
||||
|
||||
# 红线:prod 环境禁止公网 provider
|
||||
if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
|
||||
raise EgressPolicyError(
|
||||
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
|
||||
f"'{settings.llm_provider.value}'。"
|
||||
)
|
||||
|
||||
if settings.llm_provider == LLMProviderName.dashscope:
|
||||
return DashScopeProvider(
|
||||
api_key=settings.dashscope_api_key, model=settings.dashscope_model
|
||||
)
|
||||
if settings.llm_provider == LLMProviderName.vllm:
|
||||
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
|
||||
|
||||
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
|
||||
@@ -0,0 +1,80 @@
|
||||
"""具体 LLM Provider 实现:DashScope(公网千问,仅 dev)、vLLM(本地,prod)。
|
||||
|
||||
两者均走 OpenAI 兼容的 /chat/completions 协议。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import httpx
|
||||
|
||||
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
|
||||
|
||||
|
||||
class DashScopeProvider(LLMProvider):
|
||||
"""公网千问(DashScope,OpenAI 兼容模式)。仅限开发测试,且只允许脱敏/样例假数据。"""
|
||||
|
||||
name = "dashscope"
|
||||
egress = True # 走公网,出域
|
||||
|
||||
_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
|
||||
def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
|
||||
self._api_key = api_key
|
||||
self._model = model
|
||||
self._timeout = timeout
|
||||
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
payload = {
|
||||
"model": self._model,
|
||||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||||
**kwargs,
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {self._api_key}"}
|
||||
with httpx.Client(timeout=self._timeout) as client:
|
||||
resp = client.post(
|
||||
f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
return LLMResponse(
|
||||
content=content, model=self._model, provider=self.name, egress=True, raw=data
|
||||
)
|
||||
|
||||
def health(self) -> bool:
|
||||
return bool(self._api_key)
|
||||
|
||||
|
||||
class VllmProvider(LLMProvider):
|
||||
"""本地 vLLM(OpenAI 兼容)。生产使用,数据不出域。"""
|
||||
|
||||
name = "vllm"
|
||||
egress = False
|
||||
|
||||
def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
|
||||
self._base_url = base_url.rstrip("/")
|
||||
self._model = model
|
||||
self._timeout = timeout
|
||||
|
||||
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
|
||||
payload = {
|
||||
"model": self._model,
|
||||
"messages": [{"role": m.role, "content": m.content} for m in messages],
|
||||
**kwargs,
|
||||
}
|
||||
with httpx.Client(timeout=self._timeout) as client:
|
||||
resp = client.post(f"{self._base_url}/chat/completions", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
content = data["choices"][0]["message"]["content"]
|
||||
return LLMResponse(
|
||||
content=content, model=self._model, provider=self.name, egress=False, raw=data
|
||||
)
|
||||
|
||||
def health(self) -> bool:
|
||||
try:
|
||||
with httpx.Client(timeout=5.0) as client:
|
||||
resp = client.get(f"{self._base_url}/models")
|
||||
return resp.status_code == 200
|
||||
except httpx.HTTPError:
|
||||
return False
|
||||
@@ -0,0 +1,45 @@
|
||||
"""AIAudit FastAPI 应用入口。"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
from fastapi import FastAPI
|
||||
|
||||
from app import __version__
|
||||
from app.api.datahub import router as datahub_router
|
||||
from app.config import get_settings
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# 启动时执行数据零出域红线校验,违规则阻断启动
|
||||
settings = get_settings()
|
||||
settings.validate_egress_policy()
|
||||
yield
|
||||
|
||||
|
||||
app = FastAPI(
|
||||
title="AIAudit · 本地 AI 内审平台",
|
||||
version=__version__,
|
||||
lifespan=lifespan,
|
||||
)
|
||||
|
||||
app.include_router(datahub_router)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health() -> dict:
|
||||
"""存活探针。"""
|
||||
return {"status": "ok", "version": __version__}
|
||||
|
||||
|
||||
@app.get("/health/config")
|
||||
def health_config() -> dict:
|
||||
"""配置/合规探针:暴露环境与 LLM provider 出域状态(不含密钥)。"""
|
||||
settings = get_settings()
|
||||
return {
|
||||
"env": settings.aiaudit_env.value,
|
||||
"llm_provider": settings.llm_provider.value,
|
||||
"egress_blocked_in_prod": settings.is_prod,
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
# 数据库迁移(Alembic)
|
||||
|
||||
- 生成迁移:`alembic revision --autogenerate -m "描述"`
|
||||
- 应用迁移:`alembic upgrade head`
|
||||
- 回滚一步:`alembic downgrade -1`
|
||||
|
||||
模型定义见 `app/datahub/models.py`;连接串取自应用配置(`DATABASE_URL`)。
|
||||
@@ -0,0 +1,59 @@
|
||||
"""Alembic 迁移环境。
|
||||
|
||||
从应用配置读取数据库 URL,并以 app.db.Base 的元数据作为 autogenerate 目标。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from logging.config import fileConfig
|
||||
|
||||
from alembic import context
|
||||
from sqlalchemy import engine_from_config, pool
|
||||
|
||||
from app.audit import models as audit_models # noqa: F401,E402
|
||||
from app.clues import models as clue_models # noqa: F401,E402
|
||||
from app.config import get_settings
|
||||
|
||||
# 导入模型以注册到 Base.metadata
|
||||
from app.datahub import models # noqa: F401,E402
|
||||
from app.db import Base
|
||||
|
||||
config = context.config
|
||||
|
||||
if config.config_file_name is not None:
|
||||
fileConfig(config.config_file_name)
|
||||
|
||||
# 用应用配置覆盖 sqlalchemy.url
|
||||
config.set_main_option("sqlalchemy.url", get_settings().database_url)
|
||||
|
||||
target_metadata = Base.metadata
|
||||
|
||||
|
||||
def run_migrations_offline() -> None:
|
||||
url = config.get_main_option("sqlalchemy.url")
|
||||
context.configure(
|
||||
url=url,
|
||||
target_metadata=target_metadata,
|
||||
literal_binds=True,
|
||||
dialect_opts={"paramstyle": "named"},
|
||||
)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
def run_migrations_online() -> None:
|
||||
connectable = engine_from_config(
|
||||
config.get_section(config.config_ini_section, {}),
|
||||
prefix="sqlalchemy.",
|
||||
poolclass=pool.NullPool,
|
||||
)
|
||||
with connectable.connect() as connection:
|
||||
context.configure(connection=connection, target_metadata=target_metadata)
|
||||
with context.begin_transaction():
|
||||
context.run_migrations()
|
||||
|
||||
|
||||
if context.is_offline_mode():
|
||||
run_migrations_offline()
|
||||
else:
|
||||
run_migrations_online()
|
||||
@@ -0,0 +1,24 @@
|
||||
"""${message}
|
||||
|
||||
Revision ID: ${up_revision}
|
||||
Revises: ${down_revision | comma,n}
|
||||
Create Date: ${create_date}
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
${imports if imports else ""}
|
||||
|
||||
revision: str = ${repr(up_revision)}
|
||||
down_revision: Union[str, None] = ${repr(down_revision)}
|
||||
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
|
||||
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
${upgrades if upgrades else "pass"}
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
${downgrades if downgrades else "pass"}
|
||||
@@ -0,0 +1,140 @@
|
||||
"""初始化数据中台表:数据版本 / 实体 / 关系 / 双时态事实 / 时序事件
|
||||
|
||||
Revision ID: 0001_init_datahub
|
||||
Revises:
|
||||
Create Date: 2026-06
|
||||
"""
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0001_init_datahub"
|
||||
down_revision: str | None = None
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# data_version
|
||||
op.create_table(
|
||||
"data_version",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("source_system", sa.String(64), nullable=False),
|
||||
sa.Column("batch_label", sa.String(128), nullable=False),
|
||||
sa.Column("row_count", sa.Integer(), nullable=False, server_default="0"),
|
||||
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("note", sa.Text(), nullable=True),
|
||||
)
|
||||
|
||||
# entity
|
||||
op.create_table(
|
||||
"entity",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("entity_type", sa.String(32), nullable=False),
|
||||
sa.Column("business_key", sa.String(128), nullable=False),
|
||||
sa.Column("display_name", sa.String(256), nullable=True),
|
||||
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("canonical_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.ForeignKeyConstraint(["canonical_id"], ["entity.id"]),
|
||||
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
||||
sa.UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
|
||||
)
|
||||
op.create_index("ix_entity_type", "entity", ["entity_type"])
|
||||
|
||||
# entity_relationship
|
||||
op.create_table(
|
||||
"entity_relationship",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("rel_type", sa.String(32), nullable=False),
|
||||
sa.Column("source_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("target_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.ForeignKeyConstraint(["source_id"], ["entity.id"]),
|
||||
sa.ForeignKeyConstraint(["target_id"], ["entity.id"]),
|
||||
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
||||
)
|
||||
op.create_index("ix_rel_source", "entity_relationship", ["source_id"])
|
||||
op.create_index("ix_rel_target", "entity_relationship", ["target_id"])
|
||||
op.create_index("ix_rel_type", "entity_relationship", ["rel_type"])
|
||||
|
||||
# bitemporal_fact
|
||||
op.create_table(
|
||||
"bitemporal_fact",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("attr_name", sa.String(64), nullable=False),
|
||||
sa.Column("attr_value", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("system_from", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("system_to", sa.DateTime(timezone=True), nullable=True),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"]),
|
||||
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
||||
)
|
||||
op.create_index("ix_btf_entity_attr", "bitemporal_fact", ["entity_id", "attr_name"])
|
||||
|
||||
# metric_event(时序)
|
||||
op.create_table(
|
||||
"metric_event",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("event_time", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("subject_type", sa.String(32), nullable=False),
|
||||
sa.Column("subject_key", sa.String(128), nullable=False),
|
||||
sa.Column("metric_name", sa.String(64), nullable=False),
|
||||
sa.Column("metric_value", sa.Float(), nullable=False, server_default="0"),
|
||||
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
# 超表主键需包含分区列 event_time
|
||||
sa.PrimaryKeyConstraint("id", "event_time"),
|
||||
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
||||
)
|
||||
op.create_index(
|
||||
"ix_metric_subject_time",
|
||||
"metric_event",
|
||||
["subject_type", "subject_key", "event_time"],
|
||||
)
|
||||
op.create_index("ix_metric_name_time", "metric_event", ["metric_name", "event_time"])
|
||||
|
||||
# 转为 TimescaleDB 超表(若扩展不存在则跳过,便于无 timescaledb 环境运行测试)
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
|
||||
PERFORM create_hypertable('metric_event', 'event_time', if_not_exists => TRUE);
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
# 双时态排他约束:同一实体同一属性,业务有效期不重叠(需 btree_gist)
|
||||
op.execute(
|
||||
"""
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'btree_gist') THEN
|
||||
ALTER TABLE bitemporal_fact
|
||||
ADD CONSTRAINT ex_btf_no_overlap
|
||||
EXCLUDE USING gist (
|
||||
entity_id WITH =,
|
||||
attr_name WITH =,
|
||||
tstzrange(valid_from, valid_to) WITH &&
|
||||
) WHERE (system_to IS NULL);
|
||||
END IF;
|
||||
END$$;
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_table("metric_event")
|
||||
op.drop_table("bitemporal_fact")
|
||||
op.drop_table("entity_relationship")
|
||||
op.drop_index("ix_entity_type", table_name="entity")
|
||||
op.drop_table("entity")
|
||||
op.drop_table("data_version")
|
||||
@@ -0,0 +1,146 @@
|
||||
"""线索引擎与系统自审计表:clue / clue_status_history / working_paper / audit_log
|
||||
|
||||
Revision ID: 0002_clues_audit
|
||||
Revises: 0001_init_datahub
|
||||
Create Date: 2026-06
|
||||
"""
|
||||
from collections.abc import Sequence
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
from sqlalchemy.dialects import postgresql
|
||||
|
||||
revision: str = "0002_clues_audit"
|
||||
down_revision: str | None = "0001_init_datahub"
|
||||
branch_labels: str | Sequence[str] | None = None
|
||||
depends_on: str | Sequence[str] | None = None
|
||||
|
||||
confidence_tier = postgresql.ENUM(
|
||||
"high", "medium", "low", name="confidence_tier", create_type=False
|
||||
)
|
||||
clue_status = postgresql.ENUM(
|
||||
"new", "assigned", "reviewing", "confirmed", "dismissed",
|
||||
"rectifying", "transferred", "closed", name="clue_status", create_type=False,
|
||||
)
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
bind = op.get_bind()
|
||||
confidence_tier.create(bind, checkfirst=True)
|
||||
clue_status.create(bind, checkfirst=True)
|
||||
|
||||
op.create_table(
|
||||
"clue",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("title", sa.String(256), nullable=False),
|
||||
sa.Column("risk_domain", sa.String(32), nullable=False),
|
||||
sa.Column("scenario_code", sa.String(32), nullable=False),
|
||||
sa.Column("confidence", confidence_tier, nullable=False),
|
||||
sa.Column("score", sa.Float(), nullable=False, server_default="0"),
|
||||
sa.Column("status", clue_status, nullable=False, server_default="new"),
|
||||
sa.Column("rationale", sa.Text(), nullable=False, server_default=""),
|
||||
sa.Column("evidence", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("subjects", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("amount_involved", sa.Float(), nullable=True),
|
||||
sa.Column("assignee", sa.String(64), nullable=True),
|
||||
sa.Column("feedback", sa.String(16), nullable=True),
|
||||
sa.Column("model_version", sa.String(64), nullable=True),
|
||||
sa.Column("rule_version", sa.String(64), nullable=True),
|
||||
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
|
||||
)
|
||||
op.create_index("ix_clue_status", "clue", ["status"])
|
||||
op.create_index("ix_clue_scenario", "clue", ["scenario_code"])
|
||||
op.create_index("ix_clue_assignee", "clue", ["assignee"])
|
||||
|
||||
op.create_table(
|
||||
"clue_status_history",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("from_status", sa.String(16), nullable=True),
|
||||
sa.Column("to_status", sa.String(16), nullable=False),
|
||||
sa.Column("actor", sa.String(64), nullable=False),
|
||||
sa.Column("note", sa.Text(), nullable=True),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
|
||||
)
|
||||
op.create_index("ix_csh_clue", "clue_status_history", ["clue_id"])
|
||||
|
||||
op.create_table(
|
||||
"working_paper",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
|
||||
sa.Column("content", sa.Text(), nullable=False, server_default=""),
|
||||
sa.Column("conclusion", sa.String(32), nullable=True),
|
||||
sa.Column("author", sa.String(64), nullable=False),
|
||||
sa.Column("snapshot", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
|
||||
)
|
||||
op.create_index("ix_wp_clue", "working_paper", ["clue_id"])
|
||||
|
||||
op.create_table(
|
||||
"audit_log",
|
||||
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
||||
sa.Column("seq", sa.BigInteger(), sa.Identity(always=False), nullable=False),
|
||||
sa.Column("actor", sa.String(64), nullable=False),
|
||||
sa.Column("role", sa.String(32), nullable=True),
|
||||
sa.Column("action", sa.String(64), nullable=False),
|
||||
sa.Column("target_type", sa.String(64), nullable=True),
|
||||
sa.Column("target_id", sa.String(128), nullable=True),
|
||||
sa.Column("detail", postgresql.JSONB(), nullable=False, server_default="{}"),
|
||||
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
|
||||
sa.Column("prev_hash", sa.String(64), nullable=True),
|
||||
sa.Column("entry_hash", sa.String(64), nullable=False),
|
||||
)
|
||||
op.create_index("ix_audit_actor", "audit_log", ["actor"])
|
||||
op.create_index("ix_audit_action", "audit_log", ["action"])
|
||||
op.create_index("ix_audit_seq", "audit_log", ["seq"], unique=True)
|
||||
|
||||
# R19:禁止物理删除线索与审计日志(数据库级触发器兜底)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE OR REPLACE FUNCTION forbid_delete() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
RAISE EXCEPTION '禁止删除:% 表受 R19 不可删除约束保护', TG_TABLE_NAME;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE TRIGGER trg_clue_no_delete BEFORE DELETE ON clue "
|
||||
"FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
|
||||
)
|
||||
op.execute(
|
||||
"CREATE TRIGGER trg_audit_no_delete BEFORE DELETE ON audit_log "
|
||||
"FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
|
||||
)
|
||||
# 审计日志禁止更新(仅追加)
|
||||
op.execute(
|
||||
"""
|
||||
CREATE OR REPLACE FUNCTION forbid_update() RETURNS trigger AS $$
|
||||
BEGIN
|
||||
RAISE EXCEPTION '禁止更新:% 表为仅追加日志', TG_TABLE_NAME;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
"""
|
||||
)
|
||||
op.execute(
|
||||
"CREATE TRIGGER trg_audit_no_update BEFORE UPDATE ON audit_log "
|
||||
"FOR EACH ROW EXECUTE FUNCTION forbid_update();"
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.execute("DROP TRIGGER IF EXISTS trg_audit_no_update ON audit_log;")
|
||||
op.execute("DROP TRIGGER IF EXISTS trg_audit_no_delete ON audit_log;")
|
||||
op.execute("DROP TRIGGER IF EXISTS trg_clue_no_delete ON clue;")
|
||||
op.drop_table("audit_log")
|
||||
op.drop_table("working_paper")
|
||||
op.drop_table("clue_status_history")
|
||||
op.drop_table("clue")
|
||||
clue_status.drop(op.get_bind(), checkfirst=True)
|
||||
confidence_tier.drop(op.get_bind(), checkfirst=True)
|
||||
op.execute("DROP FUNCTION IF EXISTS forbid_update();")
|
||||
op.execute("DROP FUNCTION IF EXISTS forbid_delete();")
|
||||
@@ -0,0 +1,24 @@
|
||||
[project]
|
||||
name = "aiaudit-backend"
|
||||
version = "0.1.0"
|
||||
description = "AIAudit 本地 AI 内审平台后端"
|
||||
requires-python = ">=3.11"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
pythonpath = ["."]
|
||||
asyncio_mode = "auto"
|
||||
asyncio_default_fixture_loop_scope = "function"
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 100
|
||||
target-version = "py311"
|
||||
|
||||
[tool.ruff.lint]
|
||||
select = ["E", "F", "I", "W", "UP", "B"]
|
||||
# B008:FastAPI 依赖注入 Depends() 作为默认值是官方推荐用法
|
||||
ignore = ["B008"]
|
||||
|
||||
[tool.mypy]
|
||||
python_version = "3.11"
|
||||
ignore_missing_imports = true
|
||||
@@ -0,0 +1,5 @@
|
||||
-r requirements.txt
|
||||
pytest==8.3.4
|
||||
pytest-asyncio==0.25.0
|
||||
ruff==0.8.4
|
||||
mypy==1.14.0
|
||||
@@ -0,0 +1,11 @@
|
||||
fastapi==0.115.6
|
||||
uvicorn[standard]==0.34.0
|
||||
pydantic==2.10.4
|
||||
pydantic-settings==2.7.1
|
||||
sqlalchemy==2.0.36
|
||||
psycopg[binary]==3.2.3
|
||||
alembic==1.14.0
|
||||
celery==5.4.0
|
||||
redis==5.2.1
|
||||
httpx==0.28.1
|
||||
python-dotenv==1.0.1
|
||||
@@ -0,0 +1,41 @@
|
||||
"""集成测试 fixture:连接本地 PostgreSQL 16,按事务隔离并回滚。
|
||||
|
||||
需要可连接的数据库(DATABASE_URL)。无法连接时跳过整组集成测试。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.exc import OperationalError
|
||||
|
||||
from app.db import get_engine
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def db_available() -> bool:
|
||||
try:
|
||||
with get_engine().connect() as conn:
|
||||
conn.execute(text("SELECT 1"))
|
||||
return True
|
||||
except OperationalError:
|
||||
return False
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def session(db_available):
|
||||
if not db_available:
|
||||
pytest.skip("数据库不可用,跳过集成测试")
|
||||
engine = get_engine()
|
||||
connection = engine.connect()
|
||||
trans = connection.begin()
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
sess = Session(bind=connection)
|
||||
try:
|
||||
yield sess
|
||||
finally:
|
||||
sess.close()
|
||||
if trans.is_active:
|
||||
trans.rollback()
|
||||
connection.close()
|
||||
@@ -0,0 +1,49 @@
|
||||
"""双时态集成测试(需 PostgreSQL)。
|
||||
|
||||
验证 R3:按历史业务时点回放属性值,以及双时态排他约束防止有效期重叠。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
|
||||
import pytest
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from app.datahub import bitemporal_repo as btr
|
||||
from app.datahub.graph_repo import upsert_entity
|
||||
from app.datahub.ontology import EntityType
|
||||
|
||||
|
||||
def test_bitemporal_replay(session):
|
||||
"""不同业务时点回放出不同的属性值。"""
|
||||
cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_BT", "丁公司")
|
||||
session.flush()
|
||||
|
||||
t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
|
||||
t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
|
||||
|
||||
btr.record_fact(session, cust.id, "credit_level", {"v": "A"}, valid_from=t1, valid_to=t2)
|
||||
btr.record_fact(session, cust.id, "credit_level", {"v": "C"}, valid_from=t2)
|
||||
session.flush()
|
||||
|
||||
early = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 3, 1, tzinfo=dt.UTC))
|
||||
late = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 9, 1, tzinfo=dt.UTC))
|
||||
assert early is not None and early.attr_value["v"] == "A"
|
||||
assert late is not None and late.attr_value["v"] == "C"
|
||||
|
||||
|
||||
def test_bitemporal_exclusion_constraint(session):
|
||||
"""同一实体同一属性的业务有效期重叠应被排他约束拒绝。"""
|
||||
cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_EX", "戊公司")
|
||||
session.flush()
|
||||
|
||||
t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
|
||||
t3 = dt.datetime(2025, 12, 1, tzinfo=dt.UTC)
|
||||
t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
|
||||
|
||||
btr.record_fact(session, cust.id, "status", {"v": "active"}, valid_from=t1, valid_to=t3)
|
||||
session.flush()
|
||||
# 与上一条 [t1,t3) 重叠:record_fact 内部 flush 时即触发排他约束
|
||||
with pytest.raises(IntegrityError):
|
||||
btr.record_fact(session, cust.id, "status", {"v": "frozen"}, valid_from=t2, valid_to=None)
|
||||
@@ -0,0 +1,87 @@
|
||||
"""线索闭环 + 系统自审计集成测试(需 PostgreSQL)。
|
||||
|
||||
覆盖 R7/R17/R18/R19:线索生成与分级、状态流转、底稿、审计哈希链、线索不可删。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.exc import InternalError, ProgrammingError
|
||||
|
||||
from app.audit import service as audit
|
||||
from app.clues import service as clue_svc
|
||||
from app.clues.models import ClueStatus, ConfidenceTier
|
||||
|
||||
|
||||
def _new_clue(session, score=0.9):
|
||||
return clue_svc.create_clue(
|
||||
session,
|
||||
title="疑似政企拆单",
|
||||
risk_domain="收入",
|
||||
scenario_code="R8",
|
||||
score=score,
|
||||
rationale="8 个客户金额集中在审批阈值边缘,且法人关联同一实控人",
|
||||
evidence={"contracts": 8, "threshold": 1000000},
|
||||
amount_involved=4800000,
|
||||
actor="system",
|
||||
)
|
||||
|
||||
|
||||
def test_score_to_confidence_tier():
|
||||
assert clue_svc.score_to_tier(0.9) == ConfidenceTier.HIGH
|
||||
assert clue_svc.score_to_tier(0.6) == ConfidenceTier.MEDIUM
|
||||
assert clue_svc.score_to_tier(0.2) == ConfidenceTier.LOW
|
||||
|
||||
|
||||
def test_clue_full_lifecycle(session):
|
||||
clue = _new_clue(session)
|
||||
assert clue.confidence == ConfidenceTier.HIGH
|
||||
assert clue.status == ClueStatus.NEW
|
||||
|
||||
clue_svc.assign(session, clue, assignee="auditor_zhang", actor="manager_li")
|
||||
assert clue.status == ClueStatus.ASSIGNED
|
||||
assert clue.assignee == "auditor_zhang"
|
||||
|
||||
paper = clue_svc.adjudicate(session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交")
|
||||
assert clue.status == ClueStatus.CONFIRMED
|
||||
assert clue.feedback == "confirmed"
|
||||
assert paper.conclusion == "confirmed"
|
||||
assert paper.snapshot["score"] == 0.9
|
||||
|
||||
# 继续闭环:确认 -> 移交 -> 销项
|
||||
clue_svc.transition(session, clue, ClueStatus.TRANSFERRED, actor="manager_li")
|
||||
clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="manager_li")
|
||||
assert clue.status == ClueStatus.CLOSED
|
||||
|
||||
|
||||
def test_illegal_transition_rejected(session):
|
||||
clue = _new_clue(session)
|
||||
with pytest.raises(clue_svc.IllegalTransitionError):
|
||||
# NEW 不能直接到 CLOSED
|
||||
clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="x")
|
||||
|
||||
|
||||
def test_audit_hash_chain_integrity(session):
|
||||
_new_clue(session)
|
||||
clue = _new_clue(session)
|
||||
clue_svc.assign(session, clue, "auditor_zhang", "manager_li")
|
||||
ok, broken = audit.verify_chain(session)
|
||||
assert ok is True
|
||||
assert broken is None
|
||||
|
||||
|
||||
def test_clue_cannot_be_deleted(session):
|
||||
"""R19:数据库触发器禁止物理删除线索。"""
|
||||
clue = _new_clue(session)
|
||||
session.flush()
|
||||
with pytest.raises((InternalError, ProgrammingError)):
|
||||
session.execute(text("DELETE FROM clue WHERE id = :i"), {"i": clue.id})
|
||||
session.flush()
|
||||
|
||||
|
||||
def test_list_clues_filters(session):
|
||||
_new_clue(session, score=0.9)
|
||||
_new_clue(session, score=0.3)
|
||||
highs = clue_svc.list_clues(session, confidence=ConfidenceTier.HIGH)
|
||||
assert all(c.confidence == ConfidenceTier.HIGH for c in highs)
|
||||
@@ -0,0 +1,63 @@
|
||||
"""数据中台穿透 API 集成测试(需 PostgreSQL)。
|
||||
|
||||
通过 TestClient 调用 /datahub/penetrate,验证统一穿透查询服务端到端可用。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.datahub.graph_repo import add_relationship, upsert_entity
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
from app.db import get_session
|
||||
from app.main import app
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def client(session):
|
||||
# 用集成测试的事务化 session 覆盖应用依赖,保证测试数据回滚
|
||||
app.dependency_overrides[get_session] = lambda: session
|
||||
try:
|
||||
yield TestClient(app)
|
||||
finally:
|
||||
app.dependency_overrides.pop(get_session, None)
|
||||
|
||||
|
||||
def test_penetrate_endpoint_detects_related(client, session):
|
||||
suffix = uuid.uuid4().hex[:8]
|
||||
controller = upsert_entity(session, EntityType.LEGAL_PERSON, f"CTRL-{suffix}", "实控人")
|
||||
cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{suffix}", "政企客户")
|
||||
rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"REP-{suffix}", "法人")
|
||||
add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
|
||||
add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
|
||||
session.flush()
|
||||
|
||||
resp = client.post(
|
||||
"/datahub/penetrate",
|
||||
json={"start_entity_id": str(controller.id), "max_depth": 3},
|
||||
)
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
related_ids = {r["entity"]["id"] for r in body["related"]}
|
||||
assert str(cust.id) in related_ids
|
||||
assert body["related_count"] >= 2
|
||||
|
||||
|
||||
def test_penetrate_unknown_entity_404(client):
|
||||
resp = client.post(
|
||||
"/datahub/penetrate",
|
||||
json={"start_entity_id": str(uuid.uuid4()), "max_depth": 2},
|
||||
)
|
||||
assert resp.status_code == 404
|
||||
|
||||
|
||||
def test_get_entity_endpoint(client, session):
|
||||
suffix = uuid.uuid4().hex[:8]
|
||||
e = upsert_entity(session, EntityType.SUPPLIER, f"SUP-{suffix}", "供应商甲")
|
||||
session.flush()
|
||||
resp = client.get(f"/datahub/entities/{e.id}")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["business_key"] == f"SUP-{suffix}"
|
||||
@@ -0,0 +1,76 @@
|
||||
"""知识图谱穿透集成测试(需 PostgreSQL)。
|
||||
|
||||
验证 R2 关键能力:通过关系边的多跳穿透识别"疑似同一实控人",
|
||||
以及本体约束对非法关系的拒绝。对应场景一(政企拆单+隐性实控人,R8)的图谱基础。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from app.datahub.graph_repo import (
|
||||
OntologyViolationError,
|
||||
add_relationship,
|
||||
find_related_entities,
|
||||
upsert_entity,
|
||||
)
|
||||
from app.datahub.ontology import EntityType, RelationshipType
|
||||
|
||||
|
||||
def test_upsert_entity_is_idempotent(session):
|
||||
e1 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
|
||||
e2 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
|
||||
assert e1.id == e2.id
|
||||
|
||||
|
||||
def test_ontology_violation_rejected(session):
|
||||
contract = upsert_entity(session, EntityType.CONTRACT, "C-1")
|
||||
customer = upsert_entity(session, EntityType.CUSTOMER, "CUST-2")
|
||||
# 合同 —签约→ 客户 方向非法
|
||||
with pytest.raises(OntologyViolationError):
|
||||
add_relationship(session, RelationshipType.SIGNED, contract, customer)
|
||||
|
||||
|
||||
def test_detect_shared_controller_across_customers(session):
|
||||
"""模拟"8 个客户疑似同一实控人":多个客户经法人关联到同一实控自然人。
|
||||
|
||||
构图:每个客户 <-法定代表人- 各自法人;各法人 -关联-> 同一实控人。
|
||||
从实控人出发,应能穿透到全部客户。
|
||||
"""
|
||||
controller = upsert_entity(session, EntityType.LEGAL_PERSON, "PER-CTRL", "实控人")
|
||||
|
||||
customers = []
|
||||
for i in range(8):
|
||||
cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{i}", f"政企客户{i}")
|
||||
rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"PER-{i}", f"法人{i}")
|
||||
# 法人 —法定代表人→ 客户
|
||||
add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
|
||||
# 法人 —关联(亲属/实控)→ 实控人
|
||||
add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
|
||||
customers.append(cust)
|
||||
session.flush()
|
||||
|
||||
related = find_related_entities(session, controller.id, max_depth=3)
|
||||
related_ids = {rid for rid, _ in related}
|
||||
|
||||
# 从实控人 3 跳内应能穿透到全部 8 个客户
|
||||
for cust in customers:
|
||||
assert cust.id in related_ids, f"未穿透到 {cust.business_key}"
|
||||
|
||||
|
||||
def test_traversal_respects_max_depth(session):
|
||||
a = upsert_entity(session, EntityType.LEGAL_PERSON, "A")
|
||||
b = upsert_entity(session, EntityType.LEGAL_PERSON, "B")
|
||||
c = upsert_entity(session, EntityType.CUSTOMER, "C")
|
||||
add_relationship(session, RelationshipType.RELATED_TO, a, b)
|
||||
add_relationship(session, RelationshipType.LEGAL_REP_OF, b, c)
|
||||
session.flush()
|
||||
|
||||
# depth=1:从 A 只能到 B,到不了 C
|
||||
ids_d1 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=1)}
|
||||
assert b.id in ids_d1
|
||||
assert c.id not in ids_d1
|
||||
|
||||
# depth=2:能到 C
|
||||
ids_d2 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=2)}
|
||||
assert c.id in ids_d2
|
||||
@@ -0,0 +1,42 @@
|
||||
"""数据零出域红线测试:prod 环境必须禁用公网 LLM Provider。"""
|
||||
|
||||
import pytest
|
||||
|
||||
from app.config import AppEnv, LLMProviderName, Settings
|
||||
from app.llm.factory import EgressPolicyError, get_llm_provider
|
||||
|
||||
|
||||
def _settings(env: AppEnv, provider: LLMProviderName) -> Settings:
|
||||
return Settings(aiaudit_env=env, llm_provider=provider, dashscope_api_key="x")
|
||||
|
||||
|
||||
def test_prod_blocks_public_dashscope():
|
||||
s = _settings(AppEnv.prod, LLMProviderName.dashscope)
|
||||
with pytest.raises(EgressPolicyError):
|
||||
get_llm_provider(s)
|
||||
|
||||
|
||||
def test_prod_allows_local_vllm():
|
||||
s = _settings(AppEnv.prod, LLMProviderName.vllm)
|
||||
provider = get_llm_provider(s)
|
||||
assert provider.name == "vllm"
|
||||
assert provider.egress is False
|
||||
|
||||
|
||||
def test_dev_allows_dashscope():
|
||||
s = _settings(AppEnv.dev, LLMProviderName.dashscope)
|
||||
provider = get_llm_provider(s)
|
||||
assert provider.name == "dashscope"
|
||||
assert provider.egress is True
|
||||
|
||||
|
||||
def test_validate_egress_policy_raises_in_prod():
|
||||
s = _settings(AppEnv.prod, LLMProviderName.dashscope)
|
||||
with pytest.raises(RuntimeError):
|
||||
s.validate_egress_policy()
|
||||
|
||||
|
||||
def test_validate_egress_policy_ok_in_dev():
|
||||
s = _settings(AppEnv.dev, LLMProviderName.dashscope)
|
||||
# dev 下不应抛出
|
||||
s.validate_egress_policy()
|
||||
@@ -0,0 +1,21 @@
|
||||
"""健康检查端点测试。"""
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app.main import app
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def test_health_ok():
|
||||
resp = client.get("/health")
|
||||
assert resp.status_code == 200
|
||||
assert resp.json()["status"] == "ok"
|
||||
|
||||
|
||||
def test_health_config():
|
||||
resp = client.get("/health/config")
|
||||
assert resp.status_code == 200
|
||||
body = resp.json()
|
||||
assert "env" in body
|
||||
assert "llm_provider" in body
|
||||
@@ -0,0 +1,42 @@
|
||||
"""审计本体约束测试(无需数据库)。"""
|
||||
|
||||
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
|
||||
|
||||
|
||||
def test_valid_signed_relationship():
|
||||
assert is_valid_relationship(
|
||||
RelationshipType.SIGNED, EntityType.CUSTOMER, EntityType.CONTRACT
|
||||
)
|
||||
|
||||
|
||||
def test_invalid_signed_direction():
|
||||
# 合同不能"签约"客户(方向反了)
|
||||
assert not is_valid_relationship(
|
||||
RelationshipType.SIGNED, EntityType.CONTRACT, EntityType.CUSTOMER
|
||||
)
|
||||
|
||||
|
||||
def test_legal_rep_relationship():
|
||||
assert is_valid_relationship(
|
||||
RelationshipType.LEGAL_REP_OF, EntityType.LEGAL_PERSON, EntityType.SUPPLIER
|
||||
)
|
||||
|
||||
|
||||
def test_related_to_between_legal_persons():
|
||||
# 实控人关联识别的基础:法人之间的亲属/关联关系
|
||||
assert is_valid_relationship(
|
||||
RelationshipType.RELATED_TO, EntityType.LEGAL_PERSON, EntityType.LEGAL_PERSON
|
||||
)
|
||||
|
||||
|
||||
def test_invalid_relationship_wrong_target():
|
||||
assert not is_valid_relationship(
|
||||
RelationshipType.HOLDS_MSISDN, EntityType.CUSTOMER, EntityType.CONTRACT
|
||||
)
|
||||
|
||||
|
||||
def test_all_relationship_types_have_domain():
|
||||
from app.datahub.ontology import RELATIONSHIP_DOMAIN
|
||||
|
||||
for rel in RelationshipType:
|
||||
assert rel in RELATIONSHIP_DOMAIN, f"关系 {rel} 缺少本体域定义"
|
||||
Reference in New Issue
Block a user