Initial commit: InternalAuditInterprise

This commit is contained in:
freedakgmail
2026-06-16 00:38:57 +08:00
commit 7b1e2b10a8
57 changed files with 4622 additions and 0 deletions
+23
View File
@@ -0,0 +1,23 @@
# 运行环境:dev | prod
# prod 下禁用一切公网 LLM Provider(数据零出域红线)
AIAUDIT_ENV=dev
# 数据库(本地 PostgreSQL 16 / Postgres.app,无密码)
DATABASE_URL=postgresql+psycopg://freedak@localhost:5432/aiaudit
# Redis / Celery
REDIS_URL=redis://localhost:6379/0
# MinIO
MINIO_ENDPOINT=localhost:9000
MINIO_ACCESS_KEY=aiaudit
MINIO_SECRET_KEY=aiaudit_dev
# LLM Providerdashscope(公网,仅 dev| vllm(本地,prod
LLM_PROVIDER=dashscope
# 公网千问(仅开发测试,且只允许脱敏/样例假数据)
DASHSCOPE_API_KEY=
DASHSCOPE_MODEL=qwen-plus
# 本地 vLLM(生产)
VLLM_BASE_URL=http://localhost:8001/v1
VLLM_MODEL=qwen2.5-72b-instruct
+38
View File
@@ -0,0 +1,38 @@
[alembic]
script_location = migrations
prepend_sys_path = .
sqlalchemy.url =
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARNING
handlers = console
qualname =
[logger_sqlalchemy]
level = WARNING
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+3
View File
@@ -0,0 +1,3 @@
"""AIAudit 后端应用包。"""
__version__ = "0.1.0"
+1
View File
@@ -0,0 +1 @@
"""HTTP API 层。"""
+64
View File
@@ -0,0 +1,64 @@
"""数据中台统一穿透查询 API(P1.2.5)。
作为各引擎与审计场景访问知识图谱的共同入口,对上层屏蔽底层是关系表还是图库。
对应需求 R2。
"""
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from app.api.schemas import (
EntityOut,
PenetrateRequest,
PenetrateResponse,
RelatedEntityOut,
)
from app.datahub.graph_repo import find_related_entities
from app.datahub.models import Entity
from app.db import get_session
router = APIRouter(prefix="/datahub", tags=["datahub"])
@router.get("/entities/{entity_id}", response_model=EntityOut)
def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
entity = session.get(Entity, entity_id)
if entity is None:
raise HTTPException(status_code=404, detail="实体不存在")
return entity
@router.post("/penetrate", response_model=PenetrateResponse)
def penetrate(
req: PenetrateRequest, session: Session = Depends(get_session)
) -> PenetrateResponse:
"""多跳穿透:返回与起点实体连通的关联实体(用于实控人/关联方/马甲识别)。"""
start = session.get(Entity, req.start_entity_id)
if start is None:
raise HTTPException(status_code=404, detail="起点实体不存在")
related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
# 批量取出关联实体详情,组装可解释结果
id_to_depth = {rid: depth for rid, depth in related_raw}
entities = (
session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
if id_to_depth
else []
)
related = [
RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
for e in entities
]
related.sort(key=lambda r: r.depth)
return PenetrateResponse(
start_entity_id=req.start_entity_id,
max_depth=req.max_depth,
related_count=len(related),
related=related,
)
+36
View File
@@ -0,0 +1,36 @@
"""API 数据传输模型(Pydantic)。"""
from __future__ import annotations
import uuid
from pydantic import BaseModel, Field
class EntityOut(BaseModel):
id: uuid.UUID
entity_type: str
business_key: str
display_name: str | None = None
attributes: dict = Field(default_factory=dict)
model_config = {"from_attributes": True}
class RelatedEntityOut(BaseModel):
"""穿透命中的关联实体,附最短跳数(证据强度的初步指示)。"""
entity: EntityOut
depth: int
class PenetrateRequest(BaseModel):
start_entity_id: uuid.UUID
max_depth: int = Field(default=3, ge=1, le=6)
class PenetrateResponse(BaseModel):
start_entity_id: uuid.UUID
max_depth: int
related_count: int
related: list[RelatedEntityOut]
+1
View File
@@ -0,0 +1 @@
"""系统自审计模块:不可篡改操作日志、独立性与分权(R19)。"""
+50
View File
@@ -0,0 +1,50 @@
"""系统自审计 ORM 模型:不可篡改操作日志(R19)。
每条日志含哈希链(prev_hash + 内容 → entry_hash),任何篡改都会断链,可检测。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import BigInteger, DateTime, Identity, Index, String
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.db import Base
def _uuid() -> uuid.UUID:
return uuid.uuid4()
def _now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
class AuditLog(Base):
"""不可篡改审计轨迹。仅追加,不可更新/删除(应用层与制度共同保证)。"""
__tablename__ = "audit_log"
__table_args__ = (
Index("ix_audit_actor", "actor"),
Index("ix_audit_action", "action"),
Index("ix_audit_seq", "seq", unique=True),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
# 自增序号,构成哈希链顺序
seq: Mapped[int] = mapped_column(
BigInteger, Identity(always=False), nullable=False, unique=True
)
actor: Mapped[str] = mapped_column(String(64), nullable=False)
role: Mapped[str | None] = mapped_column(String(32), nullable=True)
action: Mapped[str] = mapped_column(String(64), nullable=False) # 如 rule.update/clue.assign
target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
detail: Mapped[dict] = mapped_column(JSONB, default=dict)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
+78
View File
@@ -0,0 +1,78 @@
"""RBAC 权限与独立性约束(R19、PRD §6 权限矩阵)。
核心独立性规则(硬约束):
- 任何角色都不能删除线索(DELETE_CLUE 不授予任何角色;数据库触发器再兜底)。
- 业务方(business)对系统无任何写权限。
- 配规则/改阈值/看线索/出报告分权制衡。
"""
from __future__ import annotations
import enum
class Role(str, enum.Enum):
AUDITOR = "auditor" # 审计员
AUDIT_MANAGER = "audit_manager" # 审计主管
RULE_ADMIN = "rule_admin" # 规则管理员
SYS_ADMIN = "sys_admin" # 系统管理员
SYS_AUDITOR = "sys_auditor" # 系统审计员(独立监督)
BUSINESS = "business" # 被审计业务方(无写权限)
class Permission(str, enum.Enum):
QUERY = "query" # 自然语言查询
VIEW_CLUE = "view_clue" # 查看线索
ADJUDICATE_CLUE = "adjudicate_clue" # 研判/定性线索
ASSIGN_CLUE = "assign_clue" # 分派线索
DELETE_CLUE = "delete_clue" # 删除线索(禁止授予任何人)
CONFIG_RULE = "config_rule" # 配置规则
ADJUST_THRESHOLD = "adjust_threshold" # 调整阈值
ISSUE_REPORT = "issue_report" # 出具报告
DATA_INGEST = "data_ingest" # 数据接入配置
VIEW_AUDIT_TRAIL = "view_audit_trail" # 查看自审计轨迹
MODEL_DEPLOY = "model_deploy" # 模型部署/升级
# 角色 -> 权限集合。注意:DELETE_CLUE 不出现在任何角色中(线索不可删,R19)。
ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
Role.AUDITOR: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.ADJUDICATE_CLUE,
Permission.ISSUE_REPORT,
},
Role.AUDIT_MANAGER: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.ADJUDICATE_CLUE,
Permission.ASSIGN_CLUE,
Permission.ISSUE_REPORT,
},
Role.RULE_ADMIN: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.CONFIG_RULE,
Permission.ADJUST_THRESHOLD,
},
Role.SYS_ADMIN: {
Permission.DATA_INGEST,
Permission.MODEL_DEPLOY,
},
Role.SYS_AUDITOR: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.VIEW_AUDIT_TRAIL,
Permission.ISSUE_REPORT,
},
Role.BUSINESS: set(), # 业务方无任何权限
}
def has_permission(role: Role, perm: Permission) -> bool:
return perm in ROLE_PERMISSIONS.get(role, set())
def can_delete_clue(role: Role) -> bool:
"""线索不可删除——对所有角色恒为 False(独立性硬约束)。"""
return False
+81
View File
@@ -0,0 +1,81 @@
"""系统自审计服务:写入哈希链审计日志、校验完整性(R19)。"""
from __future__ import annotations
import hashlib
import json
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.audit.models import AuditLog
def _compute_hash(prev_hash: str | None, payload: dict) -> str:
body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
raw = f"{prev_hash or ''}|{body}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def record(
session: Session,
actor: str,
action: str,
*,
role: str | None = None,
target_type: str | None = None,
target_id: str | None = None,
detail: dict | None = None,
) -> AuditLog:
"""追加一条审计日志,自动接续哈希链。"""
last = session.execute(
select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
).scalar_one_or_none()
prev_hash = last.entry_hash if last else None
payload = {
"actor": actor,
"role": role,
"action": action,
"target_type": target_type,
"target_id": target_id,
"detail": detail or {},
}
entry_hash = _compute_hash(prev_hash, payload)
log = AuditLog(
actor=actor,
role=role,
action=action,
target_type=target_type,
target_id=target_id,
detail=detail or {},
prev_hash=prev_hash,
entry_hash=entry_hash,
)
session.add(log)
session.flush()
return log
def verify_chain(session: Session) -> tuple[bool, int | None]:
"""校验审计日志哈希链完整性。
返回 (是否完整, 首个断链的 seq 或 None)。
"""
rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
prev_hash: str | None = None
for row in rows:
payload = {
"actor": row.actor,
"role": row.role,
"action": row.action,
"target_type": row.target_type,
"target_id": row.target_id,
"detail": row.detail or {},
}
expected = _compute_hash(prev_hash, payload)
if expected != row.entry_hash or row.prev_hash != prev_hash:
return False, row.seq
prev_hash = row.entry_hash
return True, None
+1
View File
@@ -0,0 +1 @@
"""线索引擎模块:线索模型、生成、置信度分级、状态流转(人机闭环)。"""
+136
View File
@@ -0,0 +1,136 @@
"""线索 ORM 模型。
对应需求 R7(线索+证据链+解释)、R17(闭环状态)、R18(置信度分级)、R19(线索不可删)。
"""
from __future__ import annotations
import datetime as dt
import enum
import uuid
from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db import Base
def _enum_values(enum_cls):
"""让 SQLAlchemy 使用枚举的 value(小写)写入 PG 原生 enum,而非 name。"""
return [m.value for m in enum_cls]
def _uuid() -> uuid.UUID:
return uuid.uuid4()
def _now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
class ConfidenceTier(str, enum.Enum):
"""置信度三级分流(R18)。"""
HIGH = "high" # 高置信:直接推送处置
MEDIUM = "medium" # 中置信:人工复核
LOW = "low" # 低置信:归档备查
class ClueStatus(str, enum.Enum):
"""线索闭环状态机(R17)。"""
NEW = "new" # 新生成
ASSIGNED = "assigned" # 已分派
REVIEWING = "reviewing" # 研判中
CONFIRMED = "confirmed" # 已定性属实
DISMISSED = "dismissed" # 已定性误报
RECTIFYING = "rectifying" # 整改中
TRANSFERRED = "transferred" # 已移交
CLOSED = "closed" # 已销项闭环
class Clue(Base):
"""审计线索。线索一经生成不可物理删除(R19),失效通过状态表达。"""
__tablename__ = "clue"
__table_args__ = (
Index("ix_clue_status", "status"),
Index("ix_clue_scenario", "scenario_code"),
Index("ix_clue_assignee", "assignee"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
title: Mapped[str] = mapped_column(String(256), nullable=False)
risk_domain: Mapped[str] = mapped_column(String(32), nullable=False) # 收入/成本/采购/资金/合规
scenario_code: Mapped[str] = mapped_column(String(32), nullable=False) # 如 R8/R9
confidence: Mapped[ConfidenceTier] = mapped_column(
Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
nullable=False,
)
score: Mapped[float] = mapped_column(Float, default=0.0) # 0-1 风险评分
status: Mapped[ClueStatus] = mapped_column(
Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
default=ClueStatus.NEW,
nullable=False,
)
# 人话解释(判定理由)与证据链
rationale: Mapped[str] = mapped_column(Text, default="")
evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
# 涉及的主体(金额、实体 id 列表等)
subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
# 误报/属实反馈(R18 反馈学习)
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
updated_at: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=_now, onupdate=_now
)
history: Mapped[list[ClueStatusHistory]] = relationship(
back_populates="clue", cascade="all, delete-orphan"
)
class ClueStatusHistory(Base):
"""线索状态流转留痕(R17/R19)。"""
__tablename__ = "clue_status_history"
__table_args__ = (Index("ix_csh_clue", "clue_id"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
clue_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
)
from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
to_status: Mapped[str] = mapped_column(String(16), nullable=False)
actor: Mapped[str] = mapped_column(String(64), nullable=False)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
clue: Mapped[Clue] = relationship(back_populates="history")
class WorkingPaper(Base):
"""审计底稿(R17):研判完成自动生成,可追溯。"""
__tablename__ = "working_paper"
__table_args__ = (Index("ix_wp_clue", "clue_id"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
clue_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
)
content: Mapped[str] = mapped_column(Text, default="")
conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
author: Mapped[str] = mapped_column(String(64), nullable=False)
snapshot: Mapped[dict] = mapped_column(JSONB, default=dict) # 证据/版本快照
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+195
View File
@@ -0,0 +1,195 @@
"""线索服务:生成、置信度分级、状态流转、底稿生成、反馈。
对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志(线索不可删)。
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.audit import service as audit
from app.clues.models import (
Clue,
ClueStatus,
ClueStatusHistory,
ConfidenceTier,
WorkingPaper,
)
# 允许的状态流转(R17 闭环)
_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
ClueStatus.DISMISSED: {ClueStatus.CLOSED},
ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
ClueStatus.CLOSED: set(),
}
class IllegalTransitionError(ValueError):
"""非法的线索状态流转。"""
def score_to_tier(score: float) -> ConfidenceTier:
"""风险评分映射到置信度三级(R18)。"""
if score >= 0.8:
return ConfidenceTier.HIGH
if score >= 0.5:
return ConfidenceTier.MEDIUM
return ConfidenceTier.LOW
def create_clue(
session: Session,
*,
title: str,
risk_domain: str,
scenario_code: str,
score: float,
rationale: str,
evidence: dict,
subjects: dict | None = None,
amount_involved: float | None = None,
model_version: str | None = None,
rule_version: str | None = None,
data_version_id: uuid.UUID | None = None,
actor: str = "system",
) -> Clue:
"""生成一条线索,自动按评分分级,并记录创建留痕。"""
clue = Clue(
title=title,
risk_domain=risk_domain,
scenario_code=scenario_code,
confidence=score_to_tier(score),
score=score,
status=ClueStatus.NEW,
rationale=rationale,
evidence=evidence,
subjects=subjects or {},
amount_involved=amount_involved,
model_version=model_version,
rule_version=rule_version,
data_version_id=data_version_id,
)
session.add(clue)
session.flush()
_add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
audit.record(
session, actor, "create_clue",
target_type="clue", target_id=str(clue.id),
detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
)
return clue
def _add_history(
session: Session,
clue: Clue,
from_status: ClueStatus | None,
to_status: ClueStatus,
actor: str,
note: str | None,
) -> None:
session.add(
ClueStatusHistory(
clue_id=clue.id,
from_status=from_status.value if from_status else None,
to_status=to_status.value,
actor=actor,
note=note,
)
)
session.flush()
def transition(
session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
) -> Clue:
"""执行状态流转,校验合法性并留痕。"""
if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
raise IllegalTransitionError(
f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
)
from_status = clue.status
clue.status = to_status
session.flush()
_add_history(session, clue, from_status, to_status, actor, note)
audit.record(
session, actor, "transition_clue",
target_type="clue", target_id=str(clue.id),
detail={"from": from_status.value, "to": to_status.value, "note": note},
)
return clue
def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
clue.assignee = assignee
session.flush()
if clue.status == ClueStatus.NEW:
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
return clue
def adjudicate(
session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
) -> WorkingPaper:
"""研判定性:确认属实或误报,自动生成审计底稿并记录反馈(R17/R18)。"""
if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
# 允许从 NEW/ASSIGNED 直接进入研判
pass
if clue.status != ClueStatus.REVIEWING:
# 先进入研判中
target = ClueStatus.REVIEWING
if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
transition(session, clue, to, actor, note)
clue.feedback = "confirmed" if confirmed else "false_positive"
session.flush()
paper = WorkingPaper(
clue_id=clue.id,
content=note or "",
conclusion=to.value,
author=actor,
snapshot={
"evidence": clue.evidence,
"rationale": clue.rationale,
"score": clue.score,
"model_version": clue.model_version,
"rule_version": clue.rule_version,
"data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
},
)
session.add(paper)
session.flush()
audit.record(
session, actor, "create_working_paper",
target_type="working_paper", target_id=str(paper.id),
detail={"clue_id": str(clue.id), "conclusion": to.value},
)
return paper
def list_clues(
session: Session,
*,
status: ClueStatus | None = None,
scenario_code: str | None = None,
confidence: ConfidenceTier | None = None,
) -> list[Clue]:
q = session.query(Clue)
if status:
q = q.filter(Clue.status == status)
if scenario_code:
q = q.filter(Clue.scenario_code == scenario_code)
if confidence:
q = q.filter(Clue.confidence == confidence)
return q.order_by(Clue.score.desc()).all()
+70
View File
@@ -0,0 +1,70 @@
"""应用配置。
通过环境变量加载,区分 dev / prod 运行环境。
prod 环境强制执行"数据零出域"红线:禁用任何公网 LLM Provider。
"""
from __future__ import annotations
from enum import Enum
from pydantic_settings import BaseSettings, SettingsConfigDict
class AppEnv(str, Enum):
dev = "dev"
prod = "prod"
class LLMProviderName(str, Enum):
dashscope = "dashscope" # 公网千问,仅 dev
vllm = "vllm" # 本地,prod
# 被认定为"公网/出域"的 Providerprod 下禁止使用
EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_prefix="",
env_file=".env",
extra="ignore",
case_sensitive=False,
)
aiaudit_env: AppEnv = AppEnv.dev
database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
redis_url: str = "redis://localhost:6379/0"
llm_provider: LLMProviderName = LLMProviderName.dashscope
dashscope_api_key: str = ""
dashscope_model: str = "qwen-plus"
vllm_base_url: str = "http://localhost:8001/v1"
vllm_model: str = "qwen2.5-72b-instruct"
@property
def is_prod(self) -> bool:
return self.aiaudit_env == AppEnv.prod
def validate_egress_policy(self) -> None:
"""数据零出域红线校验:prod 环境禁用公网 Provider。
在应用启动时调用;违反则抛出异常阻断启动。
"""
if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
raise RuntimeError(
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
f"'{self.llm_provider.value}'。请改用本地 Provider(如 vllm)。"
)
_settings: Settings | None = None
def get_settings() -> Settings:
global _settings
if _settings is None:
_settings = Settings()
return _settings
+1
View File
@@ -0,0 +1 @@
"""审计数据中台模块:本体/知识图谱、双时态、时序、数据版本。"""
+83
View File
@@ -0,0 +1,83 @@
"""双时态事实仓储:写入与"按历史时点回放"查询。
对应需求 R3 / ADR-0002
- 业务有效期 valid_from/valid_to(应用时间)
- 系统记录期 system_from/system_to(事务时间)
回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import or_
from sqlalchemy.orm import Session
from app.datahub.models import BitemporalFact
def record_fact(
session: Session,
entity_id: uuid.UUID,
attr_name: str,
attr_value: dict,
valid_from: dt.datetime,
valid_to: dt.datetime | None = None,
data_version_id: uuid.UUID | None = None,
) -> BitemporalFact:
"""记录一条双时态事实(system_from 自动取当前事务时间)。"""
fact = BitemporalFact(
entity_id=entity_id,
attr_name=attr_name,
attr_value=attr_value,
valid_from=valid_from,
valid_to=valid_to,
data_version_id=data_version_id,
)
session.add(fact)
session.flush()
return fact
def as_of(
session: Session,
entity_id: uuid.UUID,
attr_name: str,
as_of_valid: dt.datetime,
as_of_system: dt.datetime | None = None,
) -> BitemporalFact | None:
"""回放:返回在给定业务时点且按给定系统时点可见的事实。
- 业务时间线:valid_from <= as_of_valid < valid_to(或为空表示至今)
- 系统时间线:system_from <= as_of_system < system_to(或为空表示当前可见)
"""
as_of_system = as_of_system or dt.datetime.now(dt.UTC)
q = (
session.query(BitemporalFact)
.filter(BitemporalFact.entity_id == entity_id)
.filter(BitemporalFact.attr_name == attr_name)
.filter(BitemporalFact.valid_from <= as_of_valid)
.filter(
or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
)
.filter(BitemporalFact.system_from <= as_of_system)
.filter(
or_(
BitemporalFact.system_to.is_(None),
BitemporalFact.system_to > as_of_system,
)
)
.order_by(BitemporalFact.system_from.desc())
)
return q.first()
def close_fact(
session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
) -> None:
"""逻辑关闭一条事实的系统可见期(用于更正/失效,而非物理删除)。"""
fact.system_to = system_to or dt.datetime.now(dt.UTC)
session.add(fact)
session.flush()
+58
View File
@@ -0,0 +1,58 @@
"""数据中台 schema 初始化。
MVP 阶段以 SQLAlchemy metadata 建表(后续可迁移到 Alembic)。
扩展按可用性可选启用:
- btree_gist / vector:若可用则创建。
- timescaledb:若可用则把 metric_event 转为超表;不可用则保持普通表(带时间索引)。
"""
from __future__ import annotations
from sqlalchemy import text
from sqlalchemy.engine import Engine
from app.datahub import models # noqa: F401 确保模型注册到 metadata
from app.db import Base, get_engine
def _extension_available(engine: Engine, name: str) -> bool:
with engine.connect() as conn:
row = conn.execute(
text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
).first()
return row is not None
def init_extensions(engine: Engine) -> dict[str, bool]:
"""按可用性创建扩展,返回各扩展启用状态。"""
status: dict[str, bool] = {}
for ext in ("btree_gist", "vector", "timescaledb"):
available = _extension_available(engine, ext)
status[ext] = available
if available:
with engine.begin() as conn:
conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
return status
def create_schema(engine: Engine | None = None) -> dict[str, bool]:
"""创建数据中台全部表,并按需启用时序超表。返回扩展状态。"""
engine = engine or get_engine()
status = init_extensions(engine)
Base.metadata.create_all(engine)
# 若 TimescaleDB 可用,将时序事件表转为超表(幂等)
if status.get("timescaledb"):
with engine.begin() as conn:
conn.execute(
text(
"SELECT create_hypertable('metric_event', 'event_time', "
"if_not_exists => TRUE, migrate_data => TRUE)"
)
)
return status
if __name__ == "__main__":
st = create_schema()
print("数据中台 schema 初始化完成。扩展状态:", st)
+118
View File
@@ -0,0 +1,118 @@
"""知识图谱仓储:实体/关系写入与多跳穿透(递归 CTE)。
对应需求 R2:支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
统一穿透查询服务(P1.2.5)在此之上封装对外 API,对上层屏蔽底层是关系表还是图库。
"""
from __future__ import annotations
import uuid
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.datahub.models import Entity, EntityRelationship
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
class OntologyViolationError(ValueError):
"""关系不符合本体约束。"""
def upsert_entity(
session: Session,
entity_type: EntityType,
business_key: str,
display_name: str | None = None,
attributes: dict | None = None,
data_version_id: uuid.UUID | None = None,
) -> Entity:
"""按 (类型, 业务主键) 幂等写入实体(主数据对齐的归一锚点)。"""
existing = (
session.query(Entity)
.filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
.one_or_none()
)
if existing is not None:
if display_name is not None:
existing.display_name = display_name
if attributes:
existing.attributes = {**(existing.attributes or {}), **attributes}
return existing
entity = Entity(
entity_type=entity_type.value,
business_key=business_key,
display_name=display_name,
attributes=attributes or {},
data_version_id=data_version_id,
)
session.add(entity)
session.flush()
return entity
def add_relationship(
session: Session,
rel_type: RelationshipType,
source: Entity,
target: Entity,
attributes: dict | None = None,
data_version_id: uuid.UUID | None = None,
) -> EntityRelationship:
"""新增一条关系边,写入前校验本体约束。"""
src_type = EntityType(source.entity_type)
tgt_type = EntityType(target.entity_type)
if not is_valid_relationship(rel_type, src_type, tgt_type):
raise OntologyViolationError(
f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
)
rel = EntityRelationship(
rel_type=rel_type.value,
source_id=source.id,
target_id=target.id,
attributes=attributes or {},
data_version_id=data_version_id,
)
session.add(rel)
session.flush()
return rel
# 多跳穿透:以无向方式遍历关系边,返回与起点在 max_depth 跳内连通的实体集合。
# 用于"疑似同一实控人/关联方网络"识别。
_TRAVERSE_SQL = text(
"""
WITH RECURSIVE reachable(entity_id, depth, path) AS (
SELECT :start_id, 0, ARRAY[:start_id]
UNION ALL
SELECT
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
rc.depth + 1,
rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
FROM reachable rc
JOIN entity_relationship r
ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
WHERE rc.depth < :max_depth
AND NOT (
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
= ANY(rc.path)
)
)
SELECT DISTINCT entity_id, MIN(depth) AS depth
FROM reachable
WHERE entity_id <> :start_id
GROUP BY entity_id
ORDER BY depth;
"""
)
def find_related_entities(
session: Session, start_id: uuid.UUID, max_depth: int = 3
) -> list[tuple[uuid.UUID, int]]:
"""返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
rows = session.execute(
_TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
).all()
return [(r[0], r[1]) for r in rows]
+157
View File
@@ -0,0 +1,157 @@
"""审计数据中台 ORM 模型。
涵盖:数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
对应需求 R2 / R3,建模决策见 ADR-0002。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db import Base
def _uuid() -> uuid.UUID:
return uuid.uuid4()
class DataVersion(Base):
"""数据版本登记:每批接入数据的来源/批次/时间/行数,支撑结论可追溯(R3)。"""
__tablename__ = "data_version"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
source_system: Mapped[str] = mapped_column(String(64), nullable=False)
batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
row_count: Mapped[int] = mapped_column(Integer, default=0)
ingested_at: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
class Entity(Base):
"""本体实体节点(知识图谱顶点)。
business_key 是源系统中的业务主键,用于主数据对齐(同一实体跨系统归一)。
"""
__tablename__ = "entity"
__table_args__ = (
UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
Index("ix_entity_type", "entity_type"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
business_key: Mapped[str] = mapped_column(String(128), nullable=False)
display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
# 主数据对齐:被归并到的"金主"实体(同一实控人/同一主体)。NULL 表示自身即主实体。
canonical_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
class EntityRelationship(Base):
"""知识图谱关系边(有向)。多跳穿透用递归 CTE 遍历本表。"""
__tablename__ = "entity_relationship"
__table_args__ = (
Index("ix_rel_source", "source_id"),
Index("ix_rel_target", "target_id"),
Index("ix_rel_type", "rel_type"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
source_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
target_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
source: Mapped[Entity] = relationship(foreign_keys=[source_id])
target: Mapped[Entity] = relationship(foreign_keys=[target_id])
class BitemporalFact(Base):
"""双时态事实:实体的某个属性/状态随时间变化的记录。
- 业务有效期 valid_from/valid_to(应用时间)
- 系统记录期 system_from/system_to(事务时间)
回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线(见 repository)。
"""
__tablename__ = "bitemporal_fact"
__table_args__ = (
Index("ix_btf_entity_attr", "entity_id", "attr_name"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
entity_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
system_from: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
)
system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
class MetricEvent(Base):
"""时序事件:行为/指标类数据(用户生命周期、回款、话务、佣金、资源使用)。
部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
"""
__tablename__ = "metric_event"
__table_args__ = (
Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
Index("ix_metric_name_time", "metric_name", "event_time"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
subject_type: Mapped[str] = mapped_column(String(32), nullable=False) # 如 msisdn/channel
subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
metric_name: Mapped[str] = mapped_column(String(64), nullable=False) # 如 traffic_mb/commission
metric_value: Mapped[float] = mapped_column(Float, default=0.0)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
+86
View File
@@ -0,0 +1,86 @@
"""审计本体(Ontology)定义。
定义电信内审域的核心实体类型与关系类型,作为知识图谱与主数据对齐的基准。
对应需求 R2。
"""
from __future__ import annotations
from enum import Enum
class EntityType(str, Enum):
"""审计本体核心实体类型。"""
CUSTOMER = "customer" # 客户(含政企)
CONTRACT = "contract" # 合同
MSISDN = "msisdn" # 号码
IMEI = "imei" # 终端设备
ACCOUNT = "account" # 账户(付款/收款)
WORK_ORDER = "work_order" # 工单
SUPPLIER = "supplier" # 供应商
SETTLEMENT = "settlement" # 结算单
EMPLOYEE = "employee" # 员工
CHANNEL = "channel" # 渠道/代理商
LEGAL_PERSON = "legal_person" # 法人/自然人
ADDRESS = "address" # 地址
class RelationshipType(str, Enum):
"""审计本体核心关系类型(有向)。"""
SIGNED = "signed" # 客户 —签约→ 合同
PAID_BY = "paid_by" # 合同 —回款账户→ 账户
OWNS_ACCOUNT = "owns_account" # 客户/供应商 —拥有→ 账户
REGISTERED_AT = "registered_at" # 客户/供应商 —注册地址→ 地址
LEGAL_REP_OF = "legal_rep_of" # 法人 —法定代表人→ 客户/供应商
RELATED_TO = "related_to" # 法人 —亲属/关联→ 法人
HOLDS_MSISDN = "holds_msisdn" # 客户 —持有→ 号码
BOUND_DEVICE = "bound_device" # 号码 —绑定→ IMEI
BELONGS_TO_CHANNEL = "belongs_to_channel" # 号码/合同 —归属→ 渠道
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
SETTLES = "settles" # 结算单 —结算→ 合同
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
RelationshipType.OWNS_ACCOUNT: (
{EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
{EntityType.ACCOUNT},
),
RelationshipType.REGISTERED_AT: (
{EntityType.CUSTOMER, EntityType.SUPPLIER},
{EntityType.ADDRESS},
),
RelationshipType.LEGAL_REP_OF: (
{EntityType.LEGAL_PERSON},
{EntityType.CUSTOMER, EntityType.SUPPLIER},
),
RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
RelationshipType.BELONGS_TO_CHANNEL: (
{EntityType.MSISDN, EntityType.CONTRACT},
{EntityType.CHANNEL},
),
RelationshipType.SUPPLIES: (
{EntityType.SUPPLIER},
{EntityType.CONTRACT, EntityType.WORK_ORDER},
),
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
}
def is_valid_relationship(
rel: RelationshipType, source: EntityType, target: EntityType
) -> bool:
"""校验一条关系的源/目标实体类型是否符合本体约束。"""
domain = RELATIONSHIP_DOMAIN.get(rel)
if domain is None:
return False
sources, targets = domain
return source in sources and target in targets
+40
View File
@@ -0,0 +1,40 @@
"""数据库引擎与会话管理。"""
from __future__ import annotations
from collections.abc import Iterator
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from app.config import get_settings
class Base(DeclarativeBase):
"""所有 ORM 模型的基类。"""
_engine = None
_SessionLocal: sessionmaker[Session] | None = None
def get_engine():
global _engine
if _engine is None:
settings = get_settings()
_engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
return _engine
def get_sessionmaker() -> sessionmaker[Session]:
global _SessionLocal
if _SessionLocal is None:
_SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
return _SessionLocal
def get_session() -> Iterator[Session]:
"""FastAPI 依赖注入用的会话生成器。"""
sm = get_sessionmaker()
with sm() as session:
yield session
+10
View File
@@ -0,0 +1,10 @@
"""LLM Provider 抽象层。
通过统一接口隔离 LLM 实现,使开发期可用公网千问、生产期无缝切换本地 vLLM。
强约束:"数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
"""
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
from app.llm.factory import get_llm_provider
__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
+44
View File
@@ -0,0 +1,44 @@
"""LLM Provider 抽象接口与数据模型。"""
from __future__ import annotations
import abc
from dataclasses import dataclass, field
@dataclass
class ChatMessage:
role: str # "system" | "user" | "assistant"
content: str
@dataclass
class LLMResponse:
content: str
model: str
provider: str
# 是否经过出域(公网)通道,便于审计轨迹记录
egress: bool = False
raw: dict = field(default_factory=dict)
class LLMProvider(abc.ABC):
"""所有 LLM 实现的统一接口。
业务代码只依赖本接口;切换公网/本地仅改配置,不改调用方。
"""
#: provider 名称
name: str = "base"
#: 是否走公网(出域)。prod 环境禁止 egress=True 的 provider。
egress: bool = False
@abc.abstractmethod
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
"""同步对话补全。"""
raise NotImplementedError
@abc.abstractmethod
def health(self) -> bool:
"""探活:provider 是否可用。"""
raise NotImplementedError
+31
View File
@@ -0,0 +1,31 @@
"""LLM Provider 工厂:按配置创建 provider,并执行数据零出域红线校验。"""
from __future__ import annotations
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
from app.llm.base import LLMProvider
from app.llm.providers import DashScopeProvider, VllmProvider
class EgressPolicyError(RuntimeError):
"""数据零出域红线违规。"""
def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
settings = settings or get_settings()
# 红线:prod 环境禁止公网 provider
if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
raise EgressPolicyError(
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
f"'{settings.llm_provider.value}'"
)
if settings.llm_provider == LLMProviderName.dashscope:
return DashScopeProvider(
api_key=settings.dashscope_api_key, model=settings.dashscope_model
)
if settings.llm_provider == LLMProviderName.vllm:
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
+80
View File
@@ -0,0 +1,80 @@
"""具体 LLM Provider 实现:DashScope(公网千问,仅 dev)、vLLM(本地,prod)。
两者均走 OpenAI 兼容的 /chat/completions 协议。
"""
from __future__ import annotations
import httpx
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
class DashScopeProvider(LLMProvider):
"""公网千问(DashScope,OpenAI 兼容模式)。仅限开发测试,且只允许脱敏/样例假数据。"""
name = "dashscope"
egress = True # 走公网,出域
_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
self._api_key = api_key
self._model = model
self._timeout = timeout
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
payload = {
"model": self._model,
"messages": [{"role": m.role, "content": m.content} for m in messages],
**kwargs,
}
headers = {"Authorization": f"Bearer {self._api_key}"}
with httpx.Client(timeout=self._timeout) as client:
resp = client.post(
f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
)
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"]
return LLMResponse(
content=content, model=self._model, provider=self.name, egress=True, raw=data
)
def health(self) -> bool:
return bool(self._api_key)
class VllmProvider(LLMProvider):
"""本地 vLLM(OpenAI 兼容)。生产使用,数据不出域。"""
name = "vllm"
egress = False
def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
self._base_url = base_url.rstrip("/")
self._model = model
self._timeout = timeout
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
payload = {
"model": self._model,
"messages": [{"role": m.role, "content": m.content} for m in messages],
**kwargs,
}
with httpx.Client(timeout=self._timeout) as client:
resp = client.post(f"{self._base_url}/chat/completions", json=payload)
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"]
return LLMResponse(
content=content, model=self._model, provider=self.name, egress=False, raw=data
)
def health(self) -> bool:
try:
with httpx.Client(timeout=5.0) as client:
resp = client.get(f"{self._base_url}/models")
return resp.status_code == 200
except httpx.HTTPError:
return False
+45
View File
@@ -0,0 +1,45 @@
"""AIAudit FastAPI 应用入口。"""
from __future__ import annotations
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app import __version__
from app.api.datahub import router as datahub_router
from app.config import get_settings
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时执行数据零出域红线校验,违规则阻断启动
settings = get_settings()
settings.validate_egress_policy()
yield
app = FastAPI(
title="AIAudit · 本地 AI 内审平台",
version=__version__,
lifespan=lifespan,
)
app.include_router(datahub_router)
@app.get("/health")
def health() -> dict:
"""存活探针。"""
return {"status": "ok", "version": __version__}
@app.get("/health/config")
def health_config() -> dict:
"""配置/合规探针:暴露环境与 LLM provider 出域状态(不含密钥)。"""
settings = get_settings()
return {
"env": settings.aiaudit_env.value,
"llm_provider": settings.llm_provider.value,
"egress_blocked_in_prod": settings.is_prod,
}
+7
View File
@@ -0,0 +1,7 @@
# 数据库迁移(Alembic
- 生成迁移:`alembic revision --autogenerate -m "描述"`
- 应用迁移:`alembic upgrade head`
- 回滚一步:`alembic downgrade -1`
模型定义见 `app/datahub/models.py`;连接串取自应用配置(`DATABASE_URL`)。
View File
+59
View File
@@ -0,0 +1,59 @@
"""Alembic 迁移环境。
从应用配置读取数据库 URL,并以 app.db.Base 的元数据作为 autogenerate 目标。
"""
from __future__ import annotations
from logging.config import fileConfig
from alembic import context
from sqlalchemy import engine_from_config, pool
from app.audit import models as audit_models # noqa: F401,E402
from app.clues import models as clue_models # noqa: F401,E402
from app.config import get_settings
# 导入模型以注册到 Base.metadata
from app.datahub import models # noqa: F401,E402
from app.db import Base
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
# 用应用配置覆盖 sqlalchemy.url
config.set_main_option("sqlalchemy.url", get_settings().database_url)
target_metadata = Base.metadata
def run_migrations_offline() -> None:
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+24
View File
@@ -0,0 +1,24 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}
@@ -0,0 +1,140 @@
"""初始化数据中台表:数据版本 / 实体 / 关系 / 双时态事实 / 时序事件
Revision ID: 0001_init_datahub
Revises:
Create Date: 2026-06
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0001_init_datahub"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# data_version
op.create_table(
"data_version",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("source_system", sa.String(64), nullable=False),
sa.Column("batch_label", sa.String(128), nullable=False),
sa.Column("row_count", sa.Integer(), nullable=False, server_default="0"),
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("note", sa.Text(), nullable=True),
)
# entity
op.create_table(
"entity",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("entity_type", sa.String(32), nullable=False),
sa.Column("business_key", sa.String(128), nullable=False),
sa.Column("display_name", sa.String(256), nullable=True),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("canonical_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["canonical_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
sa.UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
)
op.create_index("ix_entity_type", "entity", ["entity_type"])
# entity_relationship
op.create_table(
"entity_relationship",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("rel_type", sa.String(32), nullable=False),
sa.Column("source_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("target_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["source_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["target_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index("ix_rel_source", "entity_relationship", ["source_id"])
op.create_index("ix_rel_target", "entity_relationship", ["target_id"])
op.create_index("ix_rel_type", "entity_relationship", ["rel_type"])
# bitemporal_fact
op.create_table(
"bitemporal_fact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("attr_name", sa.String(64), nullable=False),
sa.Column("attr_value", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False),
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
sa.Column("system_from", sa.DateTime(timezone=True), nullable=False),
sa.Column("system_to", sa.DateTime(timezone=True), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index("ix_btf_entity_attr", "bitemporal_fact", ["entity_id", "attr_name"])
# metric_event(时序)
op.create_table(
"metric_event",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("event_time", sa.DateTime(timezone=True), nullable=False),
sa.Column("subject_type", sa.String(32), nullable=False),
sa.Column("subject_key", sa.String(128), nullable=False),
sa.Column("metric_name", sa.String(64), nullable=False),
sa.Column("metric_value", sa.Float(), nullable=False, server_default="0"),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
# 超表主键需包含分区列 event_time
sa.PrimaryKeyConstraint("id", "event_time"),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index(
"ix_metric_subject_time",
"metric_event",
["subject_type", "subject_key", "event_time"],
)
op.create_index("ix_metric_name_time", "metric_event", ["metric_name", "event_time"])
# 转为 TimescaleDB 超表(若扩展不存在则跳过,便于无 timescaledb 环境运行测试)
op.execute(
"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
PERFORM create_hypertable('metric_event', 'event_time', if_not_exists => TRUE);
END IF;
END$$;
"""
)
# 双时态排他约束:同一实体同一属性,业务有效期不重叠(需 btree_gist)
op.execute(
"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'btree_gist') THEN
ALTER TABLE bitemporal_fact
ADD CONSTRAINT ex_btf_no_overlap
EXCLUDE USING gist (
entity_id WITH =,
attr_name WITH =,
tstzrange(valid_from, valid_to) WITH &&
) WHERE (system_to IS NULL);
END IF;
END$$;
"""
)
def downgrade() -> None:
op.drop_table("metric_event")
op.drop_table("bitemporal_fact")
op.drop_table("entity_relationship")
op.drop_index("ix_entity_type", table_name="entity")
op.drop_table("entity")
op.drop_table("data_version")
@@ -0,0 +1,146 @@
"""线索引擎与系统自审计表:clue / clue_status_history / working_paper / audit_log
Revision ID: 0002_clues_audit
Revises: 0001_init_datahub
Create Date: 2026-06
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0002_clues_audit"
down_revision: str | None = "0001_init_datahub"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
confidence_tier = postgresql.ENUM(
"high", "medium", "low", name="confidence_tier", create_type=False
)
clue_status = postgresql.ENUM(
"new", "assigned", "reviewing", "confirmed", "dismissed",
"rectifying", "transferred", "closed", name="clue_status", create_type=False,
)
def upgrade() -> None:
bind = op.get_bind()
confidence_tier.create(bind, checkfirst=True)
clue_status.create(bind, checkfirst=True)
op.create_table(
"clue",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("title", sa.String(256), nullable=False),
sa.Column("risk_domain", sa.String(32), nullable=False),
sa.Column("scenario_code", sa.String(32), nullable=False),
sa.Column("confidence", confidence_tier, nullable=False),
sa.Column("score", sa.Float(), nullable=False, server_default="0"),
sa.Column("status", clue_status, nullable=False, server_default="new"),
sa.Column("rationale", sa.Text(), nullable=False, server_default=""),
sa.Column("evidence", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("subjects", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("amount_involved", sa.Float(), nullable=True),
sa.Column("assignee", sa.String(64), nullable=True),
sa.Column("feedback", sa.String(16), nullable=True),
sa.Column("model_version", sa.String(64), nullable=True),
sa.Column("rule_version", sa.String(64), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
)
op.create_index("ix_clue_status", "clue", ["status"])
op.create_index("ix_clue_scenario", "clue", ["scenario_code"])
op.create_index("ix_clue_assignee", "clue", ["assignee"])
op.create_table(
"clue_status_history",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("from_status", sa.String(16), nullable=True),
sa.Column("to_status", sa.String(16), nullable=False),
sa.Column("actor", sa.String(64), nullable=False),
sa.Column("note", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
)
op.create_index("ix_csh_clue", "clue_status_history", ["clue_id"])
op.create_table(
"working_paper",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("content", sa.Text(), nullable=False, server_default=""),
sa.Column("conclusion", sa.String(32), nullable=True),
sa.Column("author", sa.String(64), nullable=False),
sa.Column("snapshot", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
)
op.create_index("ix_wp_clue", "working_paper", ["clue_id"])
op.create_table(
"audit_log",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("seq", sa.BigInteger(), sa.Identity(always=False), nullable=False),
sa.Column("actor", sa.String(64), nullable=False),
sa.Column("role", sa.String(32), nullable=True),
sa.Column("action", sa.String(64), nullable=False),
sa.Column("target_type", sa.String(64), nullable=True),
sa.Column("target_id", sa.String(128), nullable=True),
sa.Column("detail", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("prev_hash", sa.String(64), nullable=True),
sa.Column("entry_hash", sa.String(64), nullable=False),
)
op.create_index("ix_audit_actor", "audit_log", ["actor"])
op.create_index("ix_audit_action", "audit_log", ["action"])
op.create_index("ix_audit_seq", "audit_log", ["seq"], unique=True)
# R19:禁止物理删除线索与审计日志(数据库级触发器兜底)
op.execute(
"""
CREATE OR REPLACE FUNCTION forbid_delete() RETURNS trigger AS $$
BEGIN
RAISE EXCEPTION '禁止删除:% 表受 R19 不可删除约束保护', TG_TABLE_NAME;
END;
$$ LANGUAGE plpgsql;
"""
)
op.execute(
"CREATE TRIGGER trg_clue_no_delete BEFORE DELETE ON clue "
"FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
)
op.execute(
"CREATE TRIGGER trg_audit_no_delete BEFORE DELETE ON audit_log "
"FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
)
# 审计日志禁止更新(仅追加)
op.execute(
"""
CREATE OR REPLACE FUNCTION forbid_update() RETURNS trigger AS $$
BEGIN
RAISE EXCEPTION '禁止更新:% 表为仅追加日志', TG_TABLE_NAME;
END;
$$ LANGUAGE plpgsql;
"""
)
op.execute(
"CREATE TRIGGER trg_audit_no_update BEFORE UPDATE ON audit_log "
"FOR EACH ROW EXECUTE FUNCTION forbid_update();"
)
def downgrade() -> None:
op.execute("DROP TRIGGER IF EXISTS trg_audit_no_update ON audit_log;")
op.execute("DROP TRIGGER IF EXISTS trg_audit_no_delete ON audit_log;")
op.execute("DROP TRIGGER IF EXISTS trg_clue_no_delete ON clue;")
op.drop_table("audit_log")
op.drop_table("working_paper")
op.drop_table("clue_status_history")
op.drop_table("clue")
clue_status.drop(op.get_bind(), checkfirst=True)
confidence_tier.drop(op.get_bind(), checkfirst=True)
op.execute("DROP FUNCTION IF EXISTS forbid_update();")
op.execute("DROP FUNCTION IF EXISTS forbid_delete();")
+24
View File
@@ -0,0 +1,24 @@
[project]
name = "aiaudit-backend"
version = "0.1.0"
description = "AIAudit 本地 AI 内审平台后端"
requires-python = ">=3.11"
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["."]
asyncio_mode = "auto"
asyncio_default_fixture_loop_scope = "function"
[tool.ruff]
line-length = 100
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I", "W", "UP", "B"]
# B008FastAPI 依赖注入 Depends() 作为默认值是官方推荐用法
ignore = ["B008"]
[tool.mypy]
python_version = "3.11"
ignore_missing_imports = true
+5
View File
@@ -0,0 +1,5 @@
-r requirements.txt
pytest==8.3.4
pytest-asyncio==0.25.0
ruff==0.8.4
mypy==1.14.0
+11
View File
@@ -0,0 +1,11 @@
fastapi==0.115.6
uvicorn[standard]==0.34.0
pydantic==2.10.4
pydantic-settings==2.7.1
sqlalchemy==2.0.36
psycopg[binary]==3.2.3
alembic==1.14.0
celery==5.4.0
redis==5.2.1
httpx==0.28.1
python-dotenv==1.0.1
View File
+41
View File
@@ -0,0 +1,41 @@
"""集成测试 fixture:连接本地 PostgreSQL 16,按事务隔离并回滚。
需要可连接的数据库(DATABASE_URL)。无法连接时跳过整组集成测试。
"""
from __future__ import annotations
import pytest
from sqlalchemy import text
from sqlalchemy.exc import OperationalError
from app.db import get_engine
@pytest.fixture(scope="session")
def db_available() -> bool:
try:
with get_engine().connect() as conn:
conn.execute(text("SELECT 1"))
return True
except OperationalError:
return False
@pytest.fixture()
def session(db_available):
if not db_available:
pytest.skip("数据库不可用,跳过集成测试")
engine = get_engine()
connection = engine.connect()
trans = connection.begin()
from sqlalchemy.orm import Session
sess = Session(bind=connection)
try:
yield sess
finally:
sess.close()
if trans.is_active:
trans.rollback()
connection.close()
@@ -0,0 +1,49 @@
"""双时态集成测试(需 PostgreSQL)。
验证 R3:按历史业务时点回放属性值,以及双时态排他约束防止有效期重叠。
"""
from __future__ import annotations
import datetime as dt
import pytest
from sqlalchemy.exc import IntegrityError
from app.datahub import bitemporal_repo as btr
from app.datahub.graph_repo import upsert_entity
from app.datahub.ontology import EntityType
def test_bitemporal_replay(session):
"""不同业务时点回放出不同的属性值。"""
cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_BT", "丁公司")
session.flush()
t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
btr.record_fact(session, cust.id, "credit_level", {"v": "A"}, valid_from=t1, valid_to=t2)
btr.record_fact(session, cust.id, "credit_level", {"v": "C"}, valid_from=t2)
session.flush()
early = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 3, 1, tzinfo=dt.UTC))
late = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 9, 1, tzinfo=dt.UTC))
assert early is not None and early.attr_value["v"] == "A"
assert late is not None and late.attr_value["v"] == "C"
def test_bitemporal_exclusion_constraint(session):
"""同一实体同一属性的业务有效期重叠应被排他约束拒绝。"""
cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_EX", "戊公司")
session.flush()
t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
t3 = dt.datetime(2025, 12, 1, tzinfo=dt.UTC)
t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
btr.record_fact(session, cust.id, "status", {"v": "active"}, valid_from=t1, valid_to=t3)
session.flush()
# 与上一条 [t1,t3) 重叠:record_fact 内部 flush 时即触发排他约束
with pytest.raises(IntegrityError):
btr.record_fact(session, cust.id, "status", {"v": "frozen"}, valid_from=t2, valid_to=None)
@@ -0,0 +1,87 @@
"""线索闭环 + 系统自审计集成测试(需 PostgreSQL)。
覆盖 R7/R17/R18/R19:线索生成与分级、状态流转、底稿、审计哈希链、线索不可删。
"""
from __future__ import annotations
import pytest
from sqlalchemy import text
from sqlalchemy.exc import InternalError, ProgrammingError
from app.audit import service as audit
from app.clues import service as clue_svc
from app.clues.models import ClueStatus, ConfidenceTier
def _new_clue(session, score=0.9):
return clue_svc.create_clue(
session,
title="疑似政企拆单",
risk_domain="收入",
scenario_code="R8",
score=score,
rationale="8 个客户金额集中在审批阈值边缘,且法人关联同一实控人",
evidence={"contracts": 8, "threshold": 1000000},
amount_involved=4800000,
actor="system",
)
def test_score_to_confidence_tier():
assert clue_svc.score_to_tier(0.9) == ConfidenceTier.HIGH
assert clue_svc.score_to_tier(0.6) == ConfidenceTier.MEDIUM
assert clue_svc.score_to_tier(0.2) == ConfidenceTier.LOW
def test_clue_full_lifecycle(session):
clue = _new_clue(session)
assert clue.confidence == ConfidenceTier.HIGH
assert clue.status == ClueStatus.NEW
clue_svc.assign(session, clue, assignee="auditor_zhang", actor="manager_li")
assert clue.status == ClueStatus.ASSIGNED
assert clue.assignee == "auditor_zhang"
paper = clue_svc.adjudicate(session, clue, confirmed=True, actor="auditor_zhang", note="属实,移交")
assert clue.status == ClueStatus.CONFIRMED
assert clue.feedback == "confirmed"
assert paper.conclusion == "confirmed"
assert paper.snapshot["score"] == 0.9
# 继续闭环:确认 -> 移交 -> 销项
clue_svc.transition(session, clue, ClueStatus.TRANSFERRED, actor="manager_li")
clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="manager_li")
assert clue.status == ClueStatus.CLOSED
def test_illegal_transition_rejected(session):
clue = _new_clue(session)
with pytest.raises(clue_svc.IllegalTransitionError):
# NEW 不能直接到 CLOSED
clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="x")
def test_audit_hash_chain_integrity(session):
_new_clue(session)
clue = _new_clue(session)
clue_svc.assign(session, clue, "auditor_zhang", "manager_li")
ok, broken = audit.verify_chain(session)
assert ok is True
assert broken is None
def test_clue_cannot_be_deleted(session):
"""R19:数据库触发器禁止物理删除线索。"""
clue = _new_clue(session)
session.flush()
with pytest.raises((InternalError, ProgrammingError)):
session.execute(text("DELETE FROM clue WHERE id = :i"), {"i": clue.id})
session.flush()
def test_list_clues_filters(session):
_new_clue(session, score=0.9)
_new_clue(session, score=0.3)
highs = clue_svc.list_clues(session, confidence=ConfidenceTier.HIGH)
assert all(c.confidence == ConfidenceTier.HIGH for c in highs)
@@ -0,0 +1,63 @@
"""数据中台穿透 API 集成测试(需 PostgreSQL)。
通过 TestClient 调用 /datahub/penetrate,验证统一穿透查询服务端到端可用。
"""
from __future__ import annotations
import uuid
import pytest
from fastapi.testclient import TestClient
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.ontology import EntityType, RelationshipType
from app.db import get_session
from app.main import app
@pytest.fixture()
def client(session):
# 用集成测试的事务化 session 覆盖应用依赖,保证测试数据回滚
app.dependency_overrides[get_session] = lambda: session
try:
yield TestClient(app)
finally:
app.dependency_overrides.pop(get_session, None)
def test_penetrate_endpoint_detects_related(client, session):
suffix = uuid.uuid4().hex[:8]
controller = upsert_entity(session, EntityType.LEGAL_PERSON, f"CTRL-{suffix}", "实控人")
cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{suffix}", "政企客户")
rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"REP-{suffix}", "法人")
add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
session.flush()
resp = client.post(
"/datahub/penetrate",
json={"start_entity_id": str(controller.id), "max_depth": 3},
)
assert resp.status_code == 200
body = resp.json()
related_ids = {r["entity"]["id"] for r in body["related"]}
assert str(cust.id) in related_ids
assert body["related_count"] >= 2
def test_penetrate_unknown_entity_404(client):
resp = client.post(
"/datahub/penetrate",
json={"start_entity_id": str(uuid.uuid4()), "max_depth": 2},
)
assert resp.status_code == 404
def test_get_entity_endpoint(client, session):
suffix = uuid.uuid4().hex[:8]
e = upsert_entity(session, EntityType.SUPPLIER, f"SUP-{suffix}", "供应商甲")
session.flush()
resp = client.get(f"/datahub/entities/{e.id}")
assert resp.status_code == 200
assert resp.json()["business_key"] == f"SUP-{suffix}"
@@ -0,0 +1,76 @@
"""知识图谱穿透集成测试(需 PostgreSQL)。
验证 R2 关键能力:通过关系边的多跳穿透识别"疑似同一实控人"
以及本体约束对非法关系的拒绝。对应场景一(政企拆单+隐性实控人,R8)的图谱基础。
"""
from __future__ import annotations
import pytest
from app.datahub.graph_repo import (
OntologyViolationError,
add_relationship,
find_related_entities,
upsert_entity,
)
from app.datahub.ontology import EntityType, RelationshipType
def test_upsert_entity_is_idempotent(session):
e1 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
e2 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
assert e1.id == e2.id
def test_ontology_violation_rejected(session):
contract = upsert_entity(session, EntityType.CONTRACT, "C-1")
customer = upsert_entity(session, EntityType.CUSTOMER, "CUST-2")
# 合同 —签约→ 客户 方向非法
with pytest.raises(OntologyViolationError):
add_relationship(session, RelationshipType.SIGNED, contract, customer)
def test_detect_shared_controller_across_customers(session):
"""模拟"8 个客户疑似同一实控人":多个客户经法人关联到同一实控自然人。
构图:每个客户 <-法定代表人- 各自法人;各法人 -关联-> 同一实控人。
从实控人出发,应能穿透到全部客户。
"""
controller = upsert_entity(session, EntityType.LEGAL_PERSON, "PER-CTRL", "实控人")
customers = []
for i in range(8):
cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{i}", f"政企客户{i}")
rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"PER-{i}", f"法人{i}")
# 法人 —法定代表人→ 客户
add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
# 法人 —关联(亲属/实控)→ 实控人
add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
customers.append(cust)
session.flush()
related = find_related_entities(session, controller.id, max_depth=3)
related_ids = {rid for rid, _ in related}
# 从实控人 3 跳内应能穿透到全部 8 个客户
for cust in customers:
assert cust.id in related_ids, f"未穿透到 {cust.business_key}"
def test_traversal_respects_max_depth(session):
a = upsert_entity(session, EntityType.LEGAL_PERSON, "A")
b = upsert_entity(session, EntityType.LEGAL_PERSON, "B")
c = upsert_entity(session, EntityType.CUSTOMER, "C")
add_relationship(session, RelationshipType.RELATED_TO, a, b)
add_relationship(session, RelationshipType.LEGAL_REP_OF, b, c)
session.flush()
# depth=1:从 A 只能到 B,到不了 C
ids_d1 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=1)}
assert b.id in ids_d1
assert c.id not in ids_d1
# depth=2:能到 C
ids_d2 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=2)}
assert c.id in ids_d2
+42
View File
@@ -0,0 +1,42 @@
"""数据零出域红线测试:prod 环境必须禁用公网 LLM Provider。"""
import pytest
from app.config import AppEnv, LLMProviderName, Settings
from app.llm.factory import EgressPolicyError, get_llm_provider
def _settings(env: AppEnv, provider: LLMProviderName) -> Settings:
return Settings(aiaudit_env=env, llm_provider=provider, dashscope_api_key="x")
def test_prod_blocks_public_dashscope():
s = _settings(AppEnv.prod, LLMProviderName.dashscope)
with pytest.raises(EgressPolicyError):
get_llm_provider(s)
def test_prod_allows_local_vllm():
s = _settings(AppEnv.prod, LLMProviderName.vllm)
provider = get_llm_provider(s)
assert provider.name == "vllm"
assert provider.egress is False
def test_dev_allows_dashscope():
s = _settings(AppEnv.dev, LLMProviderName.dashscope)
provider = get_llm_provider(s)
assert provider.name == "dashscope"
assert provider.egress is True
def test_validate_egress_policy_raises_in_prod():
s = _settings(AppEnv.prod, LLMProviderName.dashscope)
with pytest.raises(RuntimeError):
s.validate_egress_policy()
def test_validate_egress_policy_ok_in_dev():
s = _settings(AppEnv.dev, LLMProviderName.dashscope)
# dev 下不应抛出
s.validate_egress_policy()
+21
View File
@@ -0,0 +1,21 @@
"""健康检查端点测试。"""
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_health_ok():
resp = client.get("/health")
assert resp.status_code == 200
assert resp.json()["status"] == "ok"
def test_health_config():
resp = client.get("/health/config")
assert resp.status_code == 200
body = resp.json()
assert "env" in body
assert "llm_provider" in body
+42
View File
@@ -0,0 +1,42 @@
"""审计本体约束测试(无需数据库)。"""
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
def test_valid_signed_relationship():
assert is_valid_relationship(
RelationshipType.SIGNED, EntityType.CUSTOMER, EntityType.CONTRACT
)
def test_invalid_signed_direction():
# 合同不能"签约"客户(方向反了)
assert not is_valid_relationship(
RelationshipType.SIGNED, EntityType.CONTRACT, EntityType.CUSTOMER
)
def test_legal_rep_relationship():
assert is_valid_relationship(
RelationshipType.LEGAL_REP_OF, EntityType.LEGAL_PERSON, EntityType.SUPPLIER
)
def test_related_to_between_legal_persons():
# 实控人关联识别的基础:法人之间的亲属/关联关系
assert is_valid_relationship(
RelationshipType.RELATED_TO, EntityType.LEGAL_PERSON, EntityType.LEGAL_PERSON
)
def test_invalid_relationship_wrong_target():
assert not is_valid_relationship(
RelationshipType.HOLDS_MSISDN, EntityType.CUSTOMER, EntityType.CONTRACT
)
def test_all_relationship_types_have_domain():
from app.datahub.ontology import RELATIONSHIP_DOMAIN
for rel in RelationshipType:
assert rel in RELATIONSHIP_DOMAIN, f"关系 {rel} 缺少本体域定义"