Initial commit: InternalAuditInterprise

2026-06-16 00:38:57 +08:00
commit 7b1e2b10a8
57 changed files with 4622 additions and 0 deletions
@@ -0,0 +1,23 @@
+# 运行环境：dev | prod
+# prod 下禁用一切公网 LLM Provider（数据零出域红线）
+AIAUDIT_ENV=dev
+
+# 数据库（本地 PostgreSQL 16 / Postgres.app，无密码）
+DATABASE_URL=postgresql+psycopg://freedak@localhost:5432/aiaudit
+
+# Redis / Celery
+REDIS_URL=redis://localhost:6379/0
+
+# MinIO
+MINIO_ENDPOINT=localhost:9000
+MINIO_ACCESS_KEY=aiaudit
+MINIO_SECRET_KEY=aiaudit_dev
+
+# LLM Provider：dashscope（公网，仅 dev）| vllm（本地，prod）
+LLM_PROVIDER=dashscope
+# 公网千问（仅开发测试，且只允许脱敏/样例假数据）
+DASHSCOPE_API_KEY=
+DASHSCOPE_MODEL=qwen-plus
+# 本地 vLLM（生产）
+VLLM_BASE_URL=http://localhost:8001/v1
+VLLM_MODEL=qwen2.5-72b-instruct
@@ -0,0 +1,38 @@
+[alembic]
+script_location = migrations
+prepend_sys_path = .
+sqlalchemy.url =
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARNING
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARNING
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
@@ -0,0 +1,3 @@
+"""AIAudit 后端应用包。"""
+
+__version__ = "0.1.0"
@@ -0,0 +1 @@
+"""HTTP API 层。"""
@@ -0,0 +1,64 @@
+"""数据中台统一穿透查询 API（P1.2.5）。
+
+作为各引擎与审计场景访问知识图谱的共同入口，对上层屏蔽底层是关系表还是图库。
+对应需求 R2。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+
+from app.api.schemas import (
+    EntityOut,
+    PenetrateRequest,
+    PenetrateResponse,
+    RelatedEntityOut,
+)
+from app.datahub.graph_repo import find_related_entities
+from app.datahub.models import Entity
+from app.db import get_session
+
+router = APIRouter(prefix="/datahub", tags=["datahub"])
+
+
+@router.get("/entities/{entity_id}", response_model=EntityOut)
+def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
+    entity = session.get(Entity, entity_id)
+    if entity is None:
+        raise HTTPException(status_code=404, detail="实体不存在")
+    return entity
+
+
+@router.post("/penetrate", response_model=PenetrateResponse)
+def penetrate(
+    req: PenetrateRequest, session: Session = Depends(get_session)
+) -> PenetrateResponse:
+    """多跳穿透：返回与起点实体连通的关联实体（用于实控人/关联方/马甲识别）。"""
+    start = session.get(Entity, req.start_entity_id)
+    if start is None:
+        raise HTTPException(status_code=404, detail="起点实体不存在")
+
+    related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
+
+    # 批量取出关联实体详情，组装可解释结果
+    id_to_depth = {rid: depth for rid, depth in related_raw}
+    entities = (
+        session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
+        if id_to_depth
+        else []
+    )
+    related = [
+        RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
+        for e in entities
+    ]
+    related.sort(key=lambda r: r.depth)
+
+    return PenetrateResponse(
+        start_entity_id=req.start_entity_id,
+        max_depth=req.max_depth,
+        related_count=len(related),
+        related=related,
+    )
@@ -0,0 +1,36 @@
+"""API 数据传输模型（Pydantic）。"""
+
+from __future__ import annotations
+
+import uuid
+
+from pydantic import BaseModel, Field
+
+
+class EntityOut(BaseModel):
+    id: uuid.UUID
+    entity_type: str
+    business_key: str
+    display_name: str | None = None
+    attributes: dict = Field(default_factory=dict)
+
+    model_config = {"from_attributes": True}
+
+
+class RelatedEntityOut(BaseModel):
+    """穿透命中的关联实体，附最短跳数（证据强度的初步指示）。"""
+
+    entity: EntityOut
+    depth: int
+
+
+class PenetrateRequest(BaseModel):
+    start_entity_id: uuid.UUID
+    max_depth: int = Field(default=3, ge=1, le=6)
+
+
+class PenetrateResponse(BaseModel):
+    start_entity_id: uuid.UUID
+    max_depth: int
+    related_count: int
+    related: list[RelatedEntityOut]
@@ -0,0 +1 @@
+"""系统自审计模块：不可篡改操作日志、独立性与分权（R19）。"""
@@ -0,0 +1,50 @@
+"""系统自审计 ORM 模型：不可篡改操作日志（R19）。
+
+每条日志含哈希链（prev_hash + 内容 → entry_hash），任何篡改都会断链，可检测。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import BigInteger, DateTime, Identity, Index, String
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.db import Base
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+def _now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+class AuditLog(Base):
+    """不可篡改审计轨迹。仅追加，不可更新/删除（应用层与制度共同保证）。"""
+
+    __tablename__ = "audit_log"
+    __table_args__ = (
+        Index("ix_audit_actor", "actor"),
+        Index("ix_audit_action", "action"),
+        Index("ix_audit_seq", "seq", unique=True),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    # 自增序号，构成哈希链顺序
+    seq: Mapped[int] = mapped_column(
+        BigInteger, Identity(always=False), nullable=False, unique=True
+    )
+    actor: Mapped[str] = mapped_column(String(64), nullable=False)
+    role: Mapped[str | None] = mapped_column(String(32), nullable=True)
+    action: Mapped[str] = mapped_column(String(64), nullable=False)  # 如 rule.update/clue.assign
+    target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
+    detail: Mapped[dict] = mapped_column(JSONB, default=dict)
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+
+    prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
@@ -0,0 +1,78 @@
+"""RBAC 权限与独立性约束（R19、PRD §6 权限矩阵）。
+
+核心独立性规则（硬约束）：
+- 任何角色都不能删除线索（DELETE_CLUE 不授予任何角色；数据库触发器再兜底）。
+- 业务方（business）对系统无任何写权限。
+- 配规则/改阈值/看线索/出报告分权制衡。
+"""
+
+from __future__ import annotations
+
+import enum
+
+
+class Role(str, enum.Enum):
+    AUDITOR = "auditor"  # 审计员
+    AUDIT_MANAGER = "audit_manager"  # 审计主管
+    RULE_ADMIN = "rule_admin"  # 规则管理员
+    SYS_ADMIN = "sys_admin"  # 系统管理员
+    SYS_AUDITOR = "sys_auditor"  # 系统审计员（独立监督）
+    BUSINESS = "business"  # 被审计业务方（无写权限）
+
+
+class Permission(str, enum.Enum):
+    QUERY = "query"  # 自然语言查询
+    VIEW_CLUE = "view_clue"  # 查看线索
+    ADJUDICATE_CLUE = "adjudicate_clue"  # 研判/定性线索
+    ASSIGN_CLUE = "assign_clue"  # 分派线索
+    DELETE_CLUE = "delete_clue"  # 删除线索（禁止授予任何人）
+    CONFIG_RULE = "config_rule"  # 配置规则
+    ADJUST_THRESHOLD = "adjust_threshold"  # 调整阈值
+    ISSUE_REPORT = "issue_report"  # 出具报告
+    DATA_INGEST = "data_ingest"  # 数据接入配置
+    VIEW_AUDIT_TRAIL = "view_audit_trail"  # 查看自审计轨迹
+    MODEL_DEPLOY = "model_deploy"  # 模型部署/升级
+
+
+# 角色 -> 权限集合。注意：DELETE_CLUE 不出现在任何角色中（线索不可删，R19）。
+ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
+    Role.AUDITOR: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.ADJUDICATE_CLUE,
+        Permission.ISSUE_REPORT,
+    },
+    Role.AUDIT_MANAGER: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.ADJUDICATE_CLUE,
+        Permission.ASSIGN_CLUE,
+        Permission.ISSUE_REPORT,
+    },
+    Role.RULE_ADMIN: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.CONFIG_RULE,
+        Permission.ADJUST_THRESHOLD,
+    },
+    Role.SYS_ADMIN: {
+        Permission.DATA_INGEST,
+        Permission.MODEL_DEPLOY,
+    },
+    Role.SYS_AUDITOR: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.VIEW_AUDIT_TRAIL,
+        Permission.ISSUE_REPORT,
+    },
+    Role.BUSINESS: set(),  # 业务方无任何权限
+}
+
+
+def has_permission(role: Role, perm: Permission) -> bool:
+    return perm in ROLE_PERMISSIONS.get(role, set())
+
+
+def can_delete_clue(role: Role) -> bool:
+    """线索不可删除——对所有角色恒为 False（独立性硬约束）。"""
+    return False
@@ -0,0 +1,81 @@
+"""系统自审计服务：写入哈希链审计日志、校验完整性（R19）。"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.audit.models import AuditLog
+
+
+def _compute_hash(prev_hash: str | None, payload: dict) -> str:
+    body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
+    raw = f"{prev_hash or ''}|{body}"
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()
+
+
+def record(
+    session: Session,
+    actor: str,
+    action: str,
+    *,
+    role: str | None = None,
+    target_type: str | None = None,
+    target_id: str | None = None,
+    detail: dict | None = None,
+) -> AuditLog:
+    """追加一条审计日志，自动接续哈希链。"""
+    last = session.execute(
+        select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
+    ).scalar_one_or_none()
+    prev_hash = last.entry_hash if last else None
+
+    payload = {
+        "actor": actor,
+        "role": role,
+        "action": action,
+        "target_type": target_type,
+        "target_id": target_id,
+        "detail": detail or {},
+    }
+    entry_hash = _compute_hash(prev_hash, payload)
+
+    log = AuditLog(
+        actor=actor,
+        role=role,
+        action=action,
+        target_type=target_type,
+        target_id=target_id,
+        detail=detail or {},
+        prev_hash=prev_hash,
+        entry_hash=entry_hash,
+    )
+    session.add(log)
+    session.flush()
+    return log
+
+
+def verify_chain(session: Session) -> tuple[bool, int | None]:
+    """校验审计日志哈希链完整性。
+
+    返回 (是否完整, 首个断链的 seq 或 None)。
+    """
+    rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
+    prev_hash: str | None = None
+    for row in rows:
+        payload = {
+            "actor": row.actor,
+            "role": row.role,
+            "action": row.action,
+            "target_type": row.target_type,
+            "target_id": row.target_id,
+            "detail": row.detail or {},
+        }
+        expected = _compute_hash(prev_hash, payload)
+        if expected != row.entry_hash or row.prev_hash != prev_hash:
+            return False, row.seq
+        prev_hash = row.entry_hash
+    return True, None
@@ -0,0 +1 @@
+"""线索引擎模块：线索模型、生成、置信度分级、状态流转（人机闭环）。"""
@@ -0,0 +1,136 @@
+"""线索 ORM 模型。
+
+对应需求 R7（线索+证据链+解释）、R17（闭环状态）、R18（置信度分级）、R19（线索不可删）。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import enum
+import uuid
+
+from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.db import Base
+
+
+def _enum_values(enum_cls):
+    """让 SQLAlchemy 使用枚举的 value（小写）写入 PG 原生 enum，而非 name。"""
+    return [m.value for m in enum_cls]
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+def _now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+class ConfidenceTier(str, enum.Enum):
+    """置信度三级分流（R18）。"""
+
+    HIGH = "high"  # 高置信：直接推送处置
+    MEDIUM = "medium"  # 中置信：人工复核
+    LOW = "low"  # 低置信：归档备查
+
+
+class ClueStatus(str, enum.Enum):
+    """线索闭环状态机（R17）。"""
+
+    NEW = "new"  # 新生成
+    ASSIGNED = "assigned"  # 已分派
+    REVIEWING = "reviewing"  # 研判中
+    CONFIRMED = "confirmed"  # 已定性属实
+    DISMISSED = "dismissed"  # 已定性误报
+    RECTIFYING = "rectifying"  # 整改中
+    TRANSFERRED = "transferred"  # 已移交
+    CLOSED = "closed"  # 已销项闭环
+
+
+class Clue(Base):
+    """审计线索。线索一经生成不可物理删除（R19），失效通过状态表达。"""
+
+    __tablename__ = "clue"
+    __table_args__ = (
+        Index("ix_clue_status", "status"),
+        Index("ix_clue_scenario", "scenario_code"),
+        Index("ix_clue_assignee", "assignee"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    title: Mapped[str] = mapped_column(String(256), nullable=False)
+    risk_domain: Mapped[str] = mapped_column(String(32), nullable=False)  # 收入/成本/采购/资金/合规
+    scenario_code: Mapped[str] = mapped_column(String(32), nullable=False)  # 如 R8/R9
+    confidence: Mapped[ConfidenceTier] = mapped_column(
+        Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
+        nullable=False,
+    )
+    score: Mapped[float] = mapped_column(Float, default=0.0)  # 0-1 风险评分
+    status: Mapped[ClueStatus] = mapped_column(
+        Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
+        default=ClueStatus.NEW,
+        nullable=False,
+    )
+    # 人话解释（判定理由）与证据链
+    rationale: Mapped[str] = mapped_column(Text, default="")
+    evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
+    # 涉及的主体（金额、实体 id 列表等）
+    subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
+    amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    # 误报/属实反馈（R18 反馈学习）
+    feedback: Mapped[str | None] = mapped_column(String(16), nullable=True)  # confirmed/false_positive
+
+    # 可追溯：产生该线索时的模型/规则/数据版本（R19 三重留痕）
+    model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
+
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+    updated_at: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=_now, onupdate=_now
+    )
+
+    history: Mapped[list[ClueStatusHistory]] = relationship(
+        back_populates="clue", cascade="all, delete-orphan"
+    )
+
+
+class ClueStatusHistory(Base):
+    """线索状态流转留痕（R17/R19）。"""
+
+    __tablename__ = "clue_status_history"
+    __table_args__ = (Index("ix_csh_clue", "clue_id"),)
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    clue_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
+    )
+    from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
+    to_status: Mapped[str] = mapped_column(String(16), nullable=False)
+    actor: Mapped[str] = mapped_column(String(64), nullable=False)
+    note: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+
+    clue: Mapped[Clue] = relationship(back_populates="history")
+
+
+class WorkingPaper(Base):
+    """审计底稿（R17）：研判完成自动生成，可追溯。"""
+
+    __tablename__ = "working_paper"
+    __table_args__ = (Index("ix_wp_clue", "clue_id"),)
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    clue_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
+    )
+    content: Mapped[str] = mapped_column(Text, default="")
+    conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
+    author: Mapped[str] = mapped_column(String(64), nullable=False)
+    snapshot: Mapped[dict] = mapped_column(JSONB, default=dict)  # 证据/版本快照
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
@@ -0,0 +1,195 @@
+"""线索服务：生成、置信度分级、状态流转、底稿生成、反馈。
+
+对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志（线索不可删）。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from sqlalchemy.orm import Session
+
+from app.audit import service as audit
+from app.clues.models import (
+    Clue,
+    ClueStatus,
+    ClueStatusHistory,
+    ConfidenceTier,
+    WorkingPaper,
+)
+
+# 允许的状态流转（R17 闭环）
+_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
+    ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
+    ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
+    ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
+    ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
+    ClueStatus.DISMISSED: {ClueStatus.CLOSED},
+    ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
+    ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
+    ClueStatus.CLOSED: set(),
+}
+
+
+class IllegalTransitionError(ValueError):
+    """非法的线索状态流转。"""
+
+
+def score_to_tier(score: float) -> ConfidenceTier:
+    """风险评分映射到置信度三级（R18）。"""
+    if score >= 0.8:
+        return ConfidenceTier.HIGH
+    if score >= 0.5:
+        return ConfidenceTier.MEDIUM
+    return ConfidenceTier.LOW
+
+
+def create_clue(
+    session: Session,
+    *,
+    title: str,
+    risk_domain: str,
+    scenario_code: str,
+    score: float,
+    rationale: str,
+    evidence: dict,
+    subjects: dict | None = None,
+    amount_involved: float | None = None,
+    model_version: str | None = None,
+    rule_version: str | None = None,
+    data_version_id: uuid.UUID | None = None,
+    actor: str = "system",
+) -> Clue:
+    """生成一条线索，自动按评分分级，并记录创建留痕。"""
+    clue = Clue(
+        title=title,
+        risk_domain=risk_domain,
+        scenario_code=scenario_code,
+        confidence=score_to_tier(score),
+        score=score,
+        status=ClueStatus.NEW,
+        rationale=rationale,
+        evidence=evidence,
+        subjects=subjects or {},
+        amount_involved=amount_involved,
+        model_version=model_version,
+        rule_version=rule_version,
+        data_version_id=data_version_id,
+    )
+    session.add(clue)
+    session.flush()
+    _add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
+    audit.record(
+        session, actor, "create_clue",
+        target_type="clue", target_id=str(clue.id),
+        detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
+    )
+    return clue
+
+
+def _add_history(
+    session: Session,
+    clue: Clue,
+    from_status: ClueStatus | None,
+    to_status: ClueStatus,
+    actor: str,
+    note: str | None,
+) -> None:
+    session.add(
+        ClueStatusHistory(
+            clue_id=clue.id,
+            from_status=from_status.value if from_status else None,
+            to_status=to_status.value,
+            actor=actor,
+            note=note,
+        )
+    )
+    session.flush()
+
+
+def transition(
+    session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
+) -> Clue:
+    """执行状态流转，校验合法性并留痕。"""
+    if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
+        raise IllegalTransitionError(
+            f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
+        )
+    from_status = clue.status
+    clue.status = to_status
+    session.flush()
+    _add_history(session, clue, from_status, to_status, actor, note)
+    audit.record(
+        session, actor, "transition_clue",
+        target_type="clue", target_id=str(clue.id),
+        detail={"from": from_status.value, "to": to_status.value, "note": note},
+    )
+    return clue
+
+
+def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
+    clue.assignee = assignee
+    session.flush()
+    if clue.status == ClueStatus.NEW:
+        transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
+    audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
+    return clue
+
+
+def adjudicate(
+    session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
+) -> WorkingPaper:
+    """研判定性：确认属实或误报，自动生成审计底稿并记录反馈（R17/R18）。"""
+    if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
+        # 允许从 NEW/ASSIGNED 直接进入研判
+        pass
+    if clue.status != ClueStatus.REVIEWING:
+        # 先进入研判中
+        target = ClueStatus.REVIEWING
+        if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
+            transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
+
+    to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
+    transition(session, clue, to, actor, note)
+    clue.feedback = "confirmed" if confirmed else "false_positive"
+    session.flush()
+
+    paper = WorkingPaper(
+        clue_id=clue.id,
+        content=note or "",
+        conclusion=to.value,
+        author=actor,
+        snapshot={
+            "evidence": clue.evidence,
+            "rationale": clue.rationale,
+            "score": clue.score,
+            "model_version": clue.model_version,
+            "rule_version": clue.rule_version,
+            "data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
+        },
+    )
+    session.add(paper)
+    session.flush()
+    audit.record(
+        session, actor, "create_working_paper",
+        target_type="working_paper", target_id=str(paper.id),
+        detail={"clue_id": str(clue.id), "conclusion": to.value},
+    )
+    return paper
+
+
+def list_clues(
+    session: Session,
+    *,
+    status: ClueStatus | None = None,
+    scenario_code: str | None = None,
+    confidence: ConfidenceTier | None = None,
+) -> list[Clue]:
+    q = session.query(Clue)
+    if status:
+        q = q.filter(Clue.status == status)
+    if scenario_code:
+        q = q.filter(Clue.scenario_code == scenario_code)
+    if confidence:
+        q = q.filter(Clue.confidence == confidence)
+    return q.order_by(Clue.score.desc()).all()
@@ -0,0 +1,70 @@
+"""应用配置。
+
+通过环境变量加载，区分 dev / prod 运行环境。
+prod 环境强制执行"数据零出域"红线：禁用任何公网 LLM Provider。
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class AppEnv(str, Enum):
+    dev = "dev"
+    prod = "prod"
+
+
+class LLMProviderName(str, Enum):
+    dashscope = "dashscope"  # 公网千问，仅 dev
+    vllm = "vllm"  # 本地，prod
+
+
+# 被认定为"公网/出域"的 Provider，prod 下禁止使用
+EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="",
+        env_file=".env",
+        extra="ignore",
+        case_sensitive=False,
+    )
+
+    aiaudit_env: AppEnv = AppEnv.dev
+
+    database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
+    redis_url: str = "redis://localhost:6379/0"
+
+    llm_provider: LLMProviderName = LLMProviderName.dashscope
+    dashscope_api_key: str = ""
+    dashscope_model: str = "qwen-plus"
+    vllm_base_url: str = "http://localhost:8001/v1"
+    vllm_model: str = "qwen2.5-72b-instruct"
+
+    @property
+    def is_prod(self) -> bool:
+        return self.aiaudit_env == AppEnv.prod
+
+    def validate_egress_policy(self) -> None:
+        """数据零出域红线校验：prod 环境禁用公网 Provider。
+
+        在应用启动时调用；违反则抛出异常阻断启动。
+        """
+        if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
+            raise RuntimeError(
+                f"数据零出域红线违规：prod 环境禁止使用公网 LLM Provider "
+                f"'{self.llm_provider.value}'。请改用本地 Provider（如 vllm）。"
+            )
+
+
+_settings: Settings | None = None
+
+
+def get_settings() -> Settings:
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
@@ -0,0 +1 @@
+"""审计数据中台模块：本体/知识图谱、双时态、时序、数据版本。"""
@@ -0,0 +1,83 @@
+"""双时态事实仓储：写入与"按历史时点回放"查询。
+
+对应需求 R3 / ADR-0002：
+- 业务有效期 valid_from/valid_to（应用时间）
+- 系统记录期 system_from/system_to（事务时间）
+回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import or_
+from sqlalchemy.orm import Session
+
+from app.datahub.models import BitemporalFact
+
+
+def record_fact(
+    session: Session,
+    entity_id: uuid.UUID,
+    attr_name: str,
+    attr_value: dict,
+    valid_from: dt.datetime,
+    valid_to: dt.datetime | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> BitemporalFact:
+    """记录一条双时态事实（system_from 自动取当前事务时间）。"""
+    fact = BitemporalFact(
+        entity_id=entity_id,
+        attr_name=attr_name,
+        attr_value=attr_value,
+        valid_from=valid_from,
+        valid_to=valid_to,
+        data_version_id=data_version_id,
+    )
+    session.add(fact)
+    session.flush()
+    return fact
+
+
+def as_of(
+    session: Session,
+    entity_id: uuid.UUID,
+    attr_name: str,
+    as_of_valid: dt.datetime,
+    as_of_system: dt.datetime | None = None,
+) -> BitemporalFact | None:
+    """回放：返回在给定业务时点且按给定系统时点可见的事实。
+
+    - 业务时间线：valid_from <= as_of_valid < valid_to(或为空表示至今)
+    - 系统时间线：system_from <= as_of_system < system_to(或为空表示当前可见)
+    """
+    as_of_system = as_of_system or dt.datetime.now(dt.UTC)
+
+    q = (
+        session.query(BitemporalFact)
+        .filter(BitemporalFact.entity_id == entity_id)
+        .filter(BitemporalFact.attr_name == attr_name)
+        .filter(BitemporalFact.valid_from <= as_of_valid)
+        .filter(
+            or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
+        )
+        .filter(BitemporalFact.system_from <= as_of_system)
+        .filter(
+            or_(
+                BitemporalFact.system_to.is_(None),
+                BitemporalFact.system_to > as_of_system,
+            )
+        )
+        .order_by(BitemporalFact.system_from.desc())
+    )
+    return q.first()
+
+
+def close_fact(
+    session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
+) -> None:
+    """逻辑关闭一条事实的系统可见期（用于更正/失效，而非物理删除）。"""
+    fact.system_to = system_to or dt.datetime.now(dt.UTC)
+    session.add(fact)
+    session.flush()
@@ -0,0 +1,58 @@
+"""数据中台 schema 初始化。
+
+MVP 阶段以 SQLAlchemy metadata 建表（后续可迁移到 Alembic）。
+扩展按可用性可选启用：
+- btree_gist / vector：若可用则创建。
+- timescaledb：若可用则把 metric_event 转为超表；不可用则保持普通表（带时间索引）。
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import text
+from sqlalchemy.engine import Engine
+
+from app.datahub import models  # noqa: F401  确保模型注册到 metadata
+from app.db import Base, get_engine
+
+
+def _extension_available(engine: Engine, name: str) -> bool:
+    with engine.connect() as conn:
+        row = conn.execute(
+            text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
+        ).first()
+        return row is not None
+
+
+def init_extensions(engine: Engine) -> dict[str, bool]:
+    """按可用性创建扩展，返回各扩展启用状态。"""
+    status: dict[str, bool] = {}
+    for ext in ("btree_gist", "vector", "timescaledb"):
+        available = _extension_available(engine, ext)
+        status[ext] = available
+        if available:
+            with engine.begin() as conn:
+                conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
+    return status
+
+
+def create_schema(engine: Engine | None = None) -> dict[str, bool]:
+    """创建数据中台全部表，并按需启用时序超表。返回扩展状态。"""
+    engine = engine or get_engine()
+    status = init_extensions(engine)
+    Base.metadata.create_all(engine)
+
+    # 若 TimescaleDB 可用，将时序事件表转为超表（幂等）
+    if status.get("timescaledb"):
+        with engine.begin() as conn:
+            conn.execute(
+                text(
+                    "SELECT create_hypertable('metric_event', 'event_time', "
+                    "if_not_exists => TRUE, migrate_data => TRUE)"
+                )
+            )
+    return status
+
+
+if __name__ == "__main__":
+    st = create_schema()
+    print("数据中台 schema 初始化完成。扩展状态：", st)
@@ -0,0 +1,118 @@
+"""知识图谱仓储：实体/关系写入与多跳穿透（递归 CTE）。
+
+对应需求 R2：支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
+统一穿透查询服务（P1.2.5）在此之上封装对外 API，对上层屏蔽底层是关系表还是图库。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from app.datahub.models import Entity, EntityRelationship
+from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
+
+
+class OntologyViolationError(ValueError):
+    """关系不符合本体约束。"""
+
+
+def upsert_entity(
+    session: Session,
+    entity_type: EntityType,
+    business_key: str,
+    display_name: str | None = None,
+    attributes: dict | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> Entity:
+    """按 (类型, 业务主键) 幂等写入实体（主数据对齐的归一锚点）。"""
+    existing = (
+        session.query(Entity)
+        .filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
+        .one_or_none()
+    )
+    if existing is not None:
+        if display_name is not None:
+            existing.display_name = display_name
+        if attributes:
+            existing.attributes = {**(existing.attributes or {}), **attributes}
+        return existing
+
+    entity = Entity(
+        entity_type=entity_type.value,
+        business_key=business_key,
+        display_name=display_name,
+        attributes=attributes or {},
+        data_version_id=data_version_id,
+    )
+    session.add(entity)
+    session.flush()
+    return entity
+
+
+def add_relationship(
+    session: Session,
+    rel_type: RelationshipType,
+    source: Entity,
+    target: Entity,
+    attributes: dict | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> EntityRelationship:
+    """新增一条关系边，写入前校验本体约束。"""
+    src_type = EntityType(source.entity_type)
+    tgt_type = EntityType(target.entity_type)
+    if not is_valid_relationship(rel_type, src_type, tgt_type):
+        raise OntologyViolationError(
+            f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
+        )
+    rel = EntityRelationship(
+        rel_type=rel_type.value,
+        source_id=source.id,
+        target_id=target.id,
+        attributes=attributes or {},
+        data_version_id=data_version_id,
+    )
+    session.add(rel)
+    session.flush()
+    return rel
+
+
+# 多跳穿透：以无向方式遍历关系边，返回与起点在 max_depth 跳内连通的实体集合。
+# 用于"疑似同一实控人/关联方网络"识别。
+_TRAVERSE_SQL = text(
+    """
+    WITH RECURSIVE reachable(entity_id, depth, path) AS (
+        SELECT :start_id, 0, ARRAY[:start_id]
+        UNION ALL
+        SELECT
+            CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
+            rc.depth + 1,
+            rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
+        FROM reachable rc
+        JOIN entity_relationship r
+          ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
+        WHERE rc.depth < :max_depth
+          AND NOT (
+              CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
+              = ANY(rc.path)
+          )
+    )
+    SELECT DISTINCT entity_id, MIN(depth) AS depth
+    FROM reachable
+    WHERE entity_id <> :start_id
+    GROUP BY entity_id
+    ORDER BY depth;
+    """
+)
+
+
+def find_related_entities(
+    session: Session, start_id: uuid.UUID, max_depth: int = 3
+) -> list[tuple[uuid.UUID, int]]:
+    """返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
+    rows = session.execute(
+        _TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
+    ).all()
+    return [(r[0], r[1]) for r in rows]
@@ -0,0 +1,157 @@
+"""审计数据中台 ORM 模型。
+
+涵盖：数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
+对应需求 R2 / R3，建模决策见 ADR-0002。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import (
+    DateTime,
+    Float,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.db import Base
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+class DataVersion(Base):
+    """数据版本登记：每批接入数据的来源/批次/时间/行数，支撑结论可追溯（R3）。"""
+
+    __tablename__ = "data_version"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    source_system: Mapped[str] = mapped_column(String(64), nullable=False)
+    batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
+    row_count: Mapped[int] = mapped_column(Integer, default=0)
+    ingested_at: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
+    )
+    note: Mapped[str | None] = mapped_column(Text, nullable=True)
+
+
+class Entity(Base):
+    """本体实体节点（知识图谱顶点）。
+
+    business_key 是源系统中的业务主键，用于主数据对齐（同一实体跨系统归一）。
+    """
+
+    __tablename__ = "entity"
+    __table_args__ = (
+        UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
+        Index("ix_entity_type", "entity_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
+    business_key: Mapped[str] = mapped_column(String(128), nullable=False)
+    display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    # 主数据对齐：被归并到的"金主"实体（同一实控人/同一主体）。NULL 表示自身即主实体。
+    canonical_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
+    )
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+
+class EntityRelationship(Base):
+    """知识图谱关系边（有向）。多跳穿透用递归 CTE 遍历本表。"""
+
+    __tablename__ = "entity_relationship"
+    __table_args__ = (
+        Index("ix_rel_source", "source_id"),
+        Index("ix_rel_target", "target_id"),
+        Index("ix_rel_type", "rel_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
+    source_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    target_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+    source: Mapped[Entity] = relationship(foreign_keys=[source_id])
+    target: Mapped[Entity] = relationship(foreign_keys=[target_id])
+
+
+class BitemporalFact(Base):
+    """双时态事实：实体的某个属性/状态随时间变化的记录。
+
+    - 业务有效期 valid_from/valid_to（应用时间）
+    - 系统记录期 system_from/system_to（事务时间）
+    回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线（见 repository）。
+    """
+
+    __tablename__ = "bitemporal_fact"
+    __table_args__ = (
+        Index("ix_btf_entity_attr", "entity_id", "attr_name"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    entity_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
+    attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+    valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    system_from: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
+    )
+    system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+
+class MetricEvent(Base):
+    """时序事件：行为/指标类数据（用户生命周期、回款、话务、佣金、资源使用）。
+
+    部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
+    """
+
+    __tablename__ = "metric_event"
+    __table_args__ = (
+        Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
+        Index("ix_metric_name_time", "metric_name", "event_time"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+    subject_type: Mapped[str] = mapped_column(String(32), nullable=False)  # 如 msisdn/channel
+    subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
+    metric_name: Mapped[str] = mapped_column(String(64), nullable=False)  # 如 traffic_mb/commission
+    metric_value: Mapped[float] = mapped_column(Float, default=0.0)
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
@@ -0,0 +1,86 @@
+"""审计本体（Ontology）定义。
+
+定义电信内审域的核心实体类型与关系类型，作为知识图谱与主数据对齐的基准。
+对应需求 R2。
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class EntityType(str, Enum):
+    """审计本体核心实体类型。"""
+
+    CUSTOMER = "customer"  # 客户（含政企）
+    CONTRACT = "contract"  # 合同
+    MSISDN = "msisdn"  # 号码
+    IMEI = "imei"  # 终端设备
+    ACCOUNT = "account"  # 账户（付款/收款）
+    WORK_ORDER = "work_order"  # 工单
+    SUPPLIER = "supplier"  # 供应商
+    SETTLEMENT = "settlement"  # 结算单
+    EMPLOYEE = "employee"  # 员工
+    CHANNEL = "channel"  # 渠道/代理商
+    LEGAL_PERSON = "legal_person"  # 法人/自然人
+    ADDRESS = "address"  # 地址
+
+
+class RelationshipType(str, Enum):
+    """审计本体核心关系类型（有向）。"""
+
+    SIGNED = "signed"  # 客户 —签约→ 合同
+    PAID_BY = "paid_by"  # 合同 —回款账户→ 账户
+    OWNS_ACCOUNT = "owns_account"  # 客户/供应商 —拥有→ 账户
+    REGISTERED_AT = "registered_at"  # 客户/供应商 —注册地址→ 地址
+    LEGAL_REP_OF = "legal_rep_of"  # 法人 —法定代表人→ 客户/供应商
+    RELATED_TO = "related_to"  # 法人 —亲属/关联→ 法人
+    HOLDS_MSISDN = "holds_msisdn"  # 客户 —持有→ 号码
+    BOUND_DEVICE = "bound_device"  # 号码 —绑定→ IMEI
+    BELONGS_TO_CHANNEL = "belongs_to_channel"  # 号码/合同 —归属→ 渠道
+    SUPPLIES = "supplies"  # 供应商 —供货→ 合同/工单
+    HANDLED_BY = "handled_by"  # 工单 —处理人→ 员工
+    SETTLES = "settles"  # 结算单 —结算→ 合同
+
+
+# 关系的合法 (源实体类型, 目标实体类型) 约束，用于校验图谱写入
+RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
+    RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
+    RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
+    RelationshipType.OWNS_ACCOUNT: (
+        {EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
+        {EntityType.ACCOUNT},
+    ),
+    RelationshipType.REGISTERED_AT: (
+        {EntityType.CUSTOMER, EntityType.SUPPLIER},
+        {EntityType.ADDRESS},
+    ),
+    RelationshipType.LEGAL_REP_OF: (
+        {EntityType.LEGAL_PERSON},
+        {EntityType.CUSTOMER, EntityType.SUPPLIER},
+    ),
+    RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
+    RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
+    RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
+    RelationshipType.BELONGS_TO_CHANNEL: (
+        {EntityType.MSISDN, EntityType.CONTRACT},
+        {EntityType.CHANNEL},
+    ),
+    RelationshipType.SUPPLIES: (
+        {EntityType.SUPPLIER},
+        {EntityType.CONTRACT, EntityType.WORK_ORDER},
+    ),
+    RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
+    RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
+}
+
+
+def is_valid_relationship(
+    rel: RelationshipType, source: EntityType, target: EntityType
+) -> bool:
+    """校验一条关系的源/目标实体类型是否符合本体约束。"""
+    domain = RELATIONSHIP_DOMAIN.get(rel)
+    if domain is None:
+        return False
+    sources, targets = domain
+    return source in sources and target in targets
@@ -0,0 +1,40 @@
+"""数据库引擎与会话管理。"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
+
+from app.config import get_settings
+
+
+class Base(DeclarativeBase):
+    """所有 ORM 模型的基类。"""
+
+
+_engine = None
+_SessionLocal: sessionmaker[Session] | None = None
+
+
+def get_engine():
+    global _engine
+    if _engine is None:
+        settings = get_settings()
+        _engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
+    return _engine
+
+
+def get_sessionmaker() -> sessionmaker[Session]:
+    global _SessionLocal
+    if _SessionLocal is None:
+        _SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
+    return _SessionLocal
+
+
+def get_session() -> Iterator[Session]:
+    """FastAPI 依赖注入用的会话生成器。"""
+    sm = get_sessionmaker()
+    with sm() as session:
+        yield session
@@ -0,0 +1,10 @@
+"""LLM Provider 抽象层。
+
+通过统一接口隔离 LLM 实现，使开发期可用公网千问、生产期无缝切换本地 vLLM。
+强约束："数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
+"""
+
+from app.llm.base import ChatMessage, LLMProvider, LLMResponse
+from app.llm.factory import get_llm_provider
+
+__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
@@ -0,0 +1,44 @@
+"""LLM Provider 抽象接口与数据模型。"""
+
+from __future__ import annotations
+
+import abc
+from dataclasses import dataclass, field
+
+
+@dataclass
+class ChatMessage:
+    role: str  # "system" | "user" | "assistant"
+    content: str
+
+
+@dataclass
+class LLMResponse:
+    content: str
+    model: str
+    provider: str
+    # 是否经过出域（公网）通道，便于审计轨迹记录
+    egress: bool = False
+    raw: dict = field(default_factory=dict)
+
+
+class LLMProvider(abc.ABC):
+    """所有 LLM 实现的统一接口。
+
+    业务代码只依赖本接口；切换公网/本地仅改配置，不改调用方。
+    """
+
+    #: provider 名称
+    name: str = "base"
+    #: 是否走公网（出域）。prod 环境禁止 egress=True 的 provider。
+    egress: bool = False
+
+    @abc.abstractmethod
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        """同步对话补全。"""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def health(self) -> bool:
+        """探活：provider 是否可用。"""
+        raise NotImplementedError
@@ -0,0 +1,31 @@
+"""LLM Provider 工厂：按配置创建 provider，并执行数据零出域红线校验。"""
+
+from __future__ import annotations
+
+from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
+from app.llm.base import LLMProvider
+from app.llm.providers import DashScopeProvider, VllmProvider
+
+
+class EgressPolicyError(RuntimeError):
+    """数据零出域红线违规。"""
+
+
+def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
+    settings = settings or get_settings()
+
+    # 红线：prod 环境禁止公网 provider
+    if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
+        raise EgressPolicyError(
+            f"数据零出域红线违规：prod 环境禁止使用公网 LLM Provider "
+            f"'{settings.llm_provider.value}'。"
+        )
+
+    if settings.llm_provider == LLMProviderName.dashscope:
+        return DashScopeProvider(
+            api_key=settings.dashscope_api_key, model=settings.dashscope_model
+        )
+    if settings.llm_provider == LLMProviderName.vllm:
+        return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
+
+    raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
@@ -0,0 +1,80 @@
+"""具体 LLM Provider 实现：DashScope（公网千问，仅 dev）、vLLM（本地，prod）。
+
+两者均走 OpenAI 兼容的 /chat/completions 协议。
+"""
+
+from __future__ import annotations
+
+import httpx
+
+from app.llm.base import ChatMessage, LLMProvider, LLMResponse
+
+
+class DashScopeProvider(LLMProvider):
+    """公网千问（DashScope，OpenAI 兼容模式）。仅限开发测试，且只允许脱敏/样例假数据。"""
+
+    name = "dashscope"
+    egress = True  # 走公网，出域
+
+    _BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+
+    def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
+        self._api_key = api_key
+        self._model = model
+        self._timeout = timeout
+
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        payload = {
+            "model": self._model,
+            "messages": [{"role": m.role, "content": m.content} for m in messages],
+            **kwargs,
+        }
+        headers = {"Authorization": f"Bearer {self._api_key}"}
+        with httpx.Client(timeout=self._timeout) as client:
+            resp = client.post(
+                f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        content = data["choices"][0]["message"]["content"]
+        return LLMResponse(
+            content=content, model=self._model, provider=self.name, egress=True, raw=data
+        )
+
+    def health(self) -> bool:
+        return bool(self._api_key)
+
+
+class VllmProvider(LLMProvider):
+    """本地 vLLM（OpenAI 兼容）。生产使用，数据不出域。"""
+
+    name = "vllm"
+    egress = False
+
+    def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
+        self._base_url = base_url.rstrip("/")
+        self._model = model
+        self._timeout = timeout
+
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        payload = {
+            "model": self._model,
+            "messages": [{"role": m.role, "content": m.content} for m in messages],
+            **kwargs,
+        }
+        with httpx.Client(timeout=self._timeout) as client:
+            resp = client.post(f"{self._base_url}/chat/completions", json=payload)
+            resp.raise_for_status()
+            data = resp.json()
+        content = data["choices"][0]["message"]["content"]
+        return LLMResponse(
+            content=content, model=self._model, provider=self.name, egress=False, raw=data
+        )
+
+    def health(self) -> bool:
+        try:
+            with httpx.Client(timeout=5.0) as client:
+                resp = client.get(f"{self._base_url}/models")
+                return resp.status_code == 200
+        except httpx.HTTPError:
+            return False
@@ -0,0 +1,45 @@
+"""AIAudit FastAPI 应用入口。"""
+
+from __future__ import annotations
+
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+
+from app import __version__
+from app.api.datahub import router as datahub_router
+from app.config import get_settings
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 启动时执行数据零出域红线校验，违规则阻断启动
+    settings = get_settings()
+    settings.validate_egress_policy()
+    yield
+
+
+app = FastAPI(
+    title="AIAudit · 本地 AI 内审平台",
+    version=__version__,
+    lifespan=lifespan,
+)
+
+app.include_router(datahub_router)
+
+
+@app.get("/health")
+def health() -> dict:
+    """存活探针。"""
+    return {"status": "ok", "version": __version__}
+
+
+@app.get("/health/config")
+def health_config() -> dict:
+    """配置/合规探针：暴露环境与 LLM provider 出域状态（不含密钥）。"""
+    settings = get_settings()
+    return {
+        "env": settings.aiaudit_env.value,
+        "llm_provider": settings.llm_provider.value,
+        "egress_blocked_in_prod": settings.is_prod,
+    }
@@ -0,0 +1,7 @@
+# 数据库迁移（Alembic）
+
+- 生成迁移：`alembic revision --autogenerate -m "描述"`
+- 应用迁移：`alembic upgrade head`
+- 回滚一步：`alembic downgrade -1`
+
+模型定义见 `app/datahub/models.py`；连接串取自应用配置（`DATABASE_URL`）。
@@ -0,0 +1,59 @@
+"""Alembic 迁移环境。
+
+从应用配置读取数据库 URL，并以 app.db.Base 的元数据作为 autogenerate 目标。
+"""
+
+from __future__ import annotations
+
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy import engine_from_config, pool
+
+from app.audit import models as audit_models  # noqa: F401,E402
+from app.clues import models as clue_models  # noqa: F401,E402
+from app.config import get_settings
+
+# 导入模型以注册到 Base.metadata
+from app.datahub import models  # noqa: F401,E402
+from app.db import Base
+
+config = context.config
+
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+# 用应用配置覆盖 sqlalchemy.url
+config.set_main_option("sqlalchemy.url", get_settings().database_url)
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online() -> None:
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section, {}),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(connection=connection, target_metadata=target_metadata)
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+revision: str = ${repr(up_revision)}
+down_revision: Union[str, None] = ${repr(down_revision)}
+branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
+depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
+
+
+def upgrade() -> None:
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade() -> None:
+    ${downgrades if downgrades else "pass"}
@@ -0,0 +1,140 @@
+"""初始化数据中台表：数据版本 / 实体 / 关系 / 双时态事实 / 时序事件
+
+Revision ID: 0001_init_datahub
+Revises:
+Create Date: 2026-06
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0001_init_datahub"
+down_revision: str | None = None
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    # data_version
+    op.create_table(
+        "data_version",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("source_system", sa.String(64), nullable=False),
+        sa.Column("batch_label", sa.String(128), nullable=False),
+        sa.Column("row_count", sa.Integer(), nullable=False, server_default="0"),
+        sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("note", sa.Text(), nullable=True),
+    )
+
+    # entity
+    op.create_table(
+        "entity",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("entity_type", sa.String(32), nullable=False),
+        sa.Column("business_key", sa.String(128), nullable=False),
+        sa.Column("display_name", sa.String(256), nullable=True),
+        sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("canonical_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.ForeignKeyConstraint(["canonical_id"], ["entity.id"]),
+        sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
+        sa.UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
+    )
+    op.create_index("ix_entity_type", "entity", ["entity_type"])
+
+    # entity_relationship
+    op.create_table(
+        "entity_relationship",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("rel_type", sa.String(32), nullable=False),
+        sa.Column("source_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("target_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.ForeignKeyConstraint(["source_id"], ["entity.id"]),
+        sa.ForeignKeyConstraint(["target_id"], ["entity.id"]),
+        sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
+    )
+    op.create_index("ix_rel_source", "entity_relationship", ["source_id"])
+    op.create_index("ix_rel_target", "entity_relationship", ["target_id"])
+    op.create_index("ix_rel_type", "entity_relationship", ["rel_type"])
+
+    # bitemporal_fact
+    op.create_table(
+        "bitemporal_fact",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("attr_name", sa.String(64), nullable=False),
+        sa.Column("attr_value", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("system_from", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("system_to", sa.DateTime(timezone=True), nullable=True),
+        sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.ForeignKeyConstraint(["entity_id"], ["entity.id"]),
+        sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
+    )
+    op.create_index("ix_btf_entity_attr", "bitemporal_fact", ["entity_id", "attr_name"])
+
+    # metric_event（时序）
+    op.create_table(
+        "metric_event",
+        sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("event_time", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("subject_type", sa.String(32), nullable=False),
+        sa.Column("subject_key", sa.String(128), nullable=False),
+        sa.Column("metric_name", sa.String(64), nullable=False),
+        sa.Column("metric_value", sa.Float(), nullable=False, server_default="0"),
+        sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
+        # 超表主键需包含分区列 event_time
+        sa.PrimaryKeyConstraint("id", "event_time"),
+        sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
+    )
+    op.create_index(
+        "ix_metric_subject_time",
+        "metric_event",
+        ["subject_type", "subject_key", "event_time"],
+    )
+    op.create_index("ix_metric_name_time", "metric_event", ["metric_name", "event_time"])
+
+    # 转为 TimescaleDB 超表（若扩展不存在则跳过，便于无 timescaledb 环境运行测试）
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
+                PERFORM create_hypertable('metric_event', 'event_time', if_not_exists => TRUE);
+            END IF;
+        END$$;
+        """
+    )
+
+    # 双时态排他约束：同一实体同一属性，业务有效期不重叠（需 btree_gist）
+    op.execute(
+        """
+        DO $$
+        BEGIN
+            IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'btree_gist') THEN
+                ALTER TABLE bitemporal_fact
+                ADD CONSTRAINT ex_btf_no_overlap
+                EXCLUDE USING gist (
+                    entity_id WITH =,
+                    attr_name WITH =,
+                    tstzrange(valid_from, valid_to) WITH &&
+                ) WHERE (system_to IS NULL);
+            END IF;
+        END$$;
+        """
+    )
+
+
+def downgrade() -> None:
+    op.drop_table("metric_event")
+    op.drop_table("bitemporal_fact")
+    op.drop_table("entity_relationship")
+    op.drop_index("ix_entity_type", table_name="entity")
+    op.drop_table("entity")
+    op.drop_table("data_version")
@@ -0,0 +1,146 @@
+"""线索引擎与系统自审计表：clue / clue_status_history / working_paper / audit_log
+
+Revision ID: 0002_clues_audit
+Revises: 0001_init_datahub
+Create Date: 2026-06
+"""
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+from sqlalchemy.dialects import postgresql
+
+revision: str = "0002_clues_audit"
+down_revision: str | None = "0001_init_datahub"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+confidence_tier = postgresql.ENUM(
+    "high", "medium", "low", name="confidence_tier", create_type=False
+)
+clue_status = postgresql.ENUM(
+    "new", "assigned", "reviewing", "confirmed", "dismissed",
+    "rectifying", "transferred", "closed", name="clue_status", create_type=False,
+)
+
+
+def upgrade() -> None:
+    bind = op.get_bind()
+    confidence_tier.create(bind, checkfirst=True)
+    clue_status.create(bind, checkfirst=True)
+
+    op.create_table(
+        "clue",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("title", sa.String(256), nullable=False),
+        sa.Column("risk_domain", sa.String(32), nullable=False),
+        sa.Column("scenario_code", sa.String(32), nullable=False),
+        sa.Column("confidence", confidence_tier, nullable=False),
+        sa.Column("score", sa.Float(), nullable=False, server_default="0"),
+        sa.Column("status", clue_status, nullable=False, server_default="new"),
+        sa.Column("rationale", sa.Text(), nullable=False, server_default=""),
+        sa.Column("evidence", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("subjects", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("amount_involved", sa.Float(), nullable=True),
+        sa.Column("assignee", sa.String(64), nullable=True),
+        sa.Column("feedback", sa.String(16), nullable=True),
+        sa.Column("model_version", sa.String(64), nullable=True),
+        sa.Column("rule_version", sa.String(64), nullable=True),
+        sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("updated_at", sa.DateTime(timezone=True), nullable=False),
+    )
+    op.create_index("ix_clue_status", "clue", ["status"])
+    op.create_index("ix_clue_scenario", "clue", ["scenario_code"])
+    op.create_index("ix_clue_assignee", "clue", ["assignee"])
+
+    op.create_table(
+        "clue_status_history",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("from_status", sa.String(16), nullable=True),
+        sa.Column("to_status", sa.String(16), nullable=False),
+        sa.Column("actor", sa.String(64), nullable=False),
+        sa.Column("note", sa.Text(), nullable=True),
+        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+        sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
+    )
+    op.create_index("ix_csh_clue", "clue_status_history", ["clue_id"])
+
+    op.create_table(
+        "working_paper",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("clue_id", postgresql.UUID(as_uuid=True), nullable=False),
+        sa.Column("content", sa.Text(), nullable=False, server_default=""),
+        sa.Column("conclusion", sa.String(32), nullable=True),
+        sa.Column("author", sa.String(64), nullable=False),
+        sa.Column("snapshot", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+        sa.ForeignKeyConstraint(["clue_id"], ["clue.id"]),
+    )
+    op.create_index("ix_wp_clue", "working_paper", ["clue_id"])
+
+    op.create_table(
+        "audit_log",
+        sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
+        sa.Column("seq", sa.BigInteger(), sa.Identity(always=False), nullable=False),
+        sa.Column("actor", sa.String(64), nullable=False),
+        sa.Column("role", sa.String(32), nullable=True),
+        sa.Column("action", sa.String(64), nullable=False),
+        sa.Column("target_type", sa.String(64), nullable=True),
+        sa.Column("target_id", sa.String(128), nullable=True),
+        sa.Column("detail", postgresql.JSONB(), nullable=False, server_default="{}"),
+        sa.Column("created_at", sa.DateTime(timezone=True), nullable=False),
+        sa.Column("prev_hash", sa.String(64), nullable=True),
+        sa.Column("entry_hash", sa.String(64), nullable=False),
+    )
+    op.create_index("ix_audit_actor", "audit_log", ["actor"])
+    op.create_index("ix_audit_action", "audit_log", ["action"])
+    op.create_index("ix_audit_seq", "audit_log", ["seq"], unique=True)
+
+    # R19：禁止物理删除线索与审计日志（数据库级触发器兜底）
+    op.execute(
+        """
+        CREATE OR REPLACE FUNCTION forbid_delete() RETURNS trigger AS $$
+        BEGIN
+            RAISE EXCEPTION '禁止删除：% 表受 R19 不可删除约束保护', TG_TABLE_NAME;
+        END;
+        $$ LANGUAGE plpgsql;
+        """
+    )
+    op.execute(
+        "CREATE TRIGGER trg_clue_no_delete BEFORE DELETE ON clue "
+        "FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
+    )
+    op.execute(
+        "CREATE TRIGGER trg_audit_no_delete BEFORE DELETE ON audit_log "
+        "FOR EACH ROW EXECUTE FUNCTION forbid_delete();"
+    )
+    # 审计日志禁止更新（仅追加）
+    op.execute(
+        """
+        CREATE OR REPLACE FUNCTION forbid_update() RETURNS trigger AS $$
+        BEGIN
+            RAISE EXCEPTION '禁止更新：% 表为仅追加日志', TG_TABLE_NAME;
+        END;
+        $$ LANGUAGE plpgsql;
+        """
+    )
+    op.execute(
+        "CREATE TRIGGER trg_audit_no_update BEFORE UPDATE ON audit_log "
+        "FOR EACH ROW EXECUTE FUNCTION forbid_update();"
+    )
+
+
+def downgrade() -> None:
+    op.execute("DROP TRIGGER IF EXISTS trg_audit_no_update ON audit_log;")
+    op.execute("DROP TRIGGER IF EXISTS trg_audit_no_delete ON audit_log;")
+    op.execute("DROP TRIGGER IF EXISTS trg_clue_no_delete ON clue;")
+    op.drop_table("audit_log")
+    op.drop_table("working_paper")
+    op.drop_table("clue_status_history")
+    op.drop_table("clue")
+    clue_status.drop(op.get_bind(), checkfirst=True)
+    confidence_tier.drop(op.get_bind(), checkfirst=True)
+    op.execute("DROP FUNCTION IF EXISTS forbid_update();")
+    op.execute("DROP FUNCTION IF EXISTS forbid_delete();")
@@ -0,0 +1,24 @@
+[project]
+name = "aiaudit-backend"
+version = "0.1.0"
+description = "AIAudit 本地 AI 内审平台后端"
+requires-python = ">=3.11"
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "function"
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+
+[tool.ruff.lint]
+select = ["E", "F", "I", "W", "UP", "B"]
+# B008：FastAPI 依赖注入 Depends() 作为默认值是官方推荐用法
+ignore = ["B008"]
+
+[tool.mypy]
+python_version = "3.11"
+ignore_missing_imports = true
@@ -0,0 +1,5 @@
+-r requirements.txt
+pytest==8.3.4
+pytest-asyncio==0.25.0
+ruff==0.8.4
+mypy==1.14.0
@@ -0,0 +1,11 @@
+fastapi==0.115.6
+uvicorn[standard]==0.34.0
+pydantic==2.10.4
+pydantic-settings==2.7.1
+sqlalchemy==2.0.36
+psycopg[binary]==3.2.3
+alembic==1.14.0
+celery==5.4.0
+redis==5.2.1
+httpx==0.28.1
+python-dotenv==1.0.1
@@ -0,0 +1,41 @@
+"""集成测试 fixture：连接本地 PostgreSQL 16，按事务隔离并回滚。
+
+需要可连接的数据库（DATABASE_URL）。无法连接时跳过整组集成测试。
+"""
+
+from __future__ import annotations
+
+import pytest
+from sqlalchemy import text
+from sqlalchemy.exc import OperationalError
+
+from app.db import get_engine
+
+
+@pytest.fixture(scope="session")
+def db_available() -> bool:
+    try:
+        with get_engine().connect() as conn:
+            conn.execute(text("SELECT 1"))
+        return True
+    except OperationalError:
+        return False
+
+
+@pytest.fixture()
+def session(db_available):
+    if not db_available:
+        pytest.skip("数据库不可用，跳过集成测试")
+    engine = get_engine()
+    connection = engine.connect()
+    trans = connection.begin()
+    from sqlalchemy.orm import Session
+
+    sess = Session(bind=connection)
+    try:
+        yield sess
+    finally:
+        sess.close()
+        if trans.is_active:
+            trans.rollback()
+        connection.close()
@@ -0,0 +1,49 @@
+"""双时态集成测试（需 PostgreSQL）。
+
+验证 R3：按历史业务时点回放属性值，以及双时态排他约束防止有效期重叠。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+
+import pytest
+from sqlalchemy.exc import IntegrityError
+
+from app.datahub import bitemporal_repo as btr
+from app.datahub.graph_repo import upsert_entity
+from app.datahub.ontology import EntityType
+
+
+def test_bitemporal_replay(session):
+    """不同业务时点回放出不同的属性值。"""
+    cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_BT", "丁公司")
+    session.flush()
+
+    t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
+    t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
+
+    btr.record_fact(session, cust.id, "credit_level", {"v": "A"}, valid_from=t1, valid_to=t2)
+    btr.record_fact(session, cust.id, "credit_level", {"v": "C"}, valid_from=t2)
+    session.flush()
+
+    early = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 3, 1, tzinfo=dt.UTC))
+    late = btr.as_of(session, cust.id, "credit_level", dt.datetime(2025, 9, 1, tzinfo=dt.UTC))
+    assert early is not None and early.attr_value["v"] == "A"
+    assert late is not None and late.attr_value["v"] == "C"
+
+
+def test_bitemporal_exclusion_constraint(session):
+    """同一实体同一属性的业务有效期重叠应被排他约束拒绝。"""
+    cust = upsert_entity(session, EntityType.CUSTOMER, "CUST_EX", "戊公司")
+    session.flush()
+
+    t1 = dt.datetime(2025, 1, 1, tzinfo=dt.UTC)
+    t3 = dt.datetime(2025, 12, 1, tzinfo=dt.UTC)
+    t2 = dt.datetime(2025, 6, 1, tzinfo=dt.UTC)
+
+    btr.record_fact(session, cust.id, "status", {"v": "active"}, valid_from=t1, valid_to=t3)
+    session.flush()
+    # 与上一条 [t1,t3) 重叠：record_fact 内部 flush 时即触发排他约束
+    with pytest.raises(IntegrityError):
+        btr.record_fact(session, cust.id, "status", {"v": "frozen"}, valid_from=t2, valid_to=None)
@@ -0,0 +1,87 @@
+"""线索闭环 + 系统自审计集成测试（需 PostgreSQL）。
+
+覆盖 R7/R17/R18/R19：线索生成与分级、状态流转、底稿、审计哈希链、线索不可删。
+"""
+
+from __future__ import annotations
+
+import pytest
+from sqlalchemy import text
+from sqlalchemy.exc import InternalError, ProgrammingError
+
+from app.audit import service as audit
+from app.clues import service as clue_svc
+from app.clues.models import ClueStatus, ConfidenceTier
+
+
+def _new_clue(session, score=0.9):
+    return clue_svc.create_clue(
+        session,
+        title="疑似政企拆单",
+        risk_domain="收入",
+        scenario_code="R8",
+        score=score,
+        rationale="8 个客户金额集中在审批阈值边缘，且法人关联同一实控人",
+        evidence={"contracts": 8, "threshold": 1000000},
+        amount_involved=4800000,
+        actor="system",
+    )
+
+
+def test_score_to_confidence_tier():
+    assert clue_svc.score_to_tier(0.9) == ConfidenceTier.HIGH
+    assert clue_svc.score_to_tier(0.6) == ConfidenceTier.MEDIUM
+    assert clue_svc.score_to_tier(0.2) == ConfidenceTier.LOW
+
+
+def test_clue_full_lifecycle(session):
+    clue = _new_clue(session)
+    assert clue.confidence == ConfidenceTier.HIGH
+    assert clue.status == ClueStatus.NEW
+
+    clue_svc.assign(session, clue, assignee="auditor_zhang", actor="manager_li")
+    assert clue.status == ClueStatus.ASSIGNED
+    assert clue.assignee == "auditor_zhang"
+
+    paper = clue_svc.adjudicate(session, clue, confirmed=True, actor="auditor_zhang", note="属实，移交")
+    assert clue.status == ClueStatus.CONFIRMED
+    assert clue.feedback == "confirmed"
+    assert paper.conclusion == "confirmed"
+    assert paper.snapshot["score"] == 0.9
+
+    # 继续闭环：确认 -> 移交 -> 销项
+    clue_svc.transition(session, clue, ClueStatus.TRANSFERRED, actor="manager_li")
+    clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="manager_li")
+    assert clue.status == ClueStatus.CLOSED
+
+
+def test_illegal_transition_rejected(session):
+    clue = _new_clue(session)
+    with pytest.raises(clue_svc.IllegalTransitionError):
+        # NEW 不能直接到 CLOSED
+        clue_svc.transition(session, clue, ClueStatus.CLOSED, actor="x")
+
+
+def test_audit_hash_chain_integrity(session):
+    _new_clue(session)
+    clue = _new_clue(session)
+    clue_svc.assign(session, clue, "auditor_zhang", "manager_li")
+    ok, broken = audit.verify_chain(session)
+    assert ok is True
+    assert broken is None
+
+
+def test_clue_cannot_be_deleted(session):
+    """R19：数据库触发器禁止物理删除线索。"""
+    clue = _new_clue(session)
+    session.flush()
+    with pytest.raises((InternalError, ProgrammingError)):
+        session.execute(text("DELETE FROM clue WHERE id = :i"), {"i": clue.id})
+        session.flush()
+
+
+def test_list_clues_filters(session):
+    _new_clue(session, score=0.9)
+    _new_clue(session, score=0.3)
+    highs = clue_svc.list_clues(session, confidence=ConfidenceTier.HIGH)
+    assert all(c.confidence == ConfidenceTier.HIGH for c in highs)
@@ -0,0 +1,63 @@
+"""数据中台穿透 API 集成测试（需 PostgreSQL）。
+
+通过 TestClient 调用 /datahub/penetrate，验证统一穿透查询服务端到端可用。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+from fastapi.testclient import TestClient
+
+from app.datahub.graph_repo import add_relationship, upsert_entity
+from app.datahub.ontology import EntityType, RelationshipType
+from app.db import get_session
+from app.main import app
+
+
+@pytest.fixture()
+def client(session):
+    # 用集成测试的事务化 session 覆盖应用依赖，保证测试数据回滚
+    app.dependency_overrides[get_session] = lambda: session
+    try:
+        yield TestClient(app)
+    finally:
+        app.dependency_overrides.pop(get_session, None)
+
+
+def test_penetrate_endpoint_detects_related(client, session):
+    suffix = uuid.uuid4().hex[:8]
+    controller = upsert_entity(session, EntityType.LEGAL_PERSON, f"CTRL-{suffix}", "实控人")
+    cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{suffix}", "政企客户")
+    rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"REP-{suffix}", "法人")
+    add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
+    add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
+    session.flush()
+
+    resp = client.post(
+        "/datahub/penetrate",
+        json={"start_entity_id": str(controller.id), "max_depth": 3},
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    related_ids = {r["entity"]["id"] for r in body["related"]}
+    assert str(cust.id) in related_ids
+    assert body["related_count"] >= 2
+
+
+def test_penetrate_unknown_entity_404(client):
+    resp = client.post(
+        "/datahub/penetrate",
+        json={"start_entity_id": str(uuid.uuid4()), "max_depth": 2},
+    )
+    assert resp.status_code == 404
+
+
+def test_get_entity_endpoint(client, session):
+    suffix = uuid.uuid4().hex[:8]
+    e = upsert_entity(session, EntityType.SUPPLIER, f"SUP-{suffix}", "供应商甲")
+    session.flush()
+    resp = client.get(f"/datahub/entities/{e.id}")
+    assert resp.status_code == 200
+    assert resp.json()["business_key"] == f"SUP-{suffix}"
@@ -0,0 +1,76 @@
+"""知识图谱穿透集成测试（需 PostgreSQL）。
+
+验证 R2 关键能力：通过关系边的多跳穿透识别"疑似同一实控人"，
+以及本体约束对非法关系的拒绝。对应场景一（政企拆单+隐性实控人，R8）的图谱基础。
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from app.datahub.graph_repo import (
+    OntologyViolationError,
+    add_relationship,
+    find_related_entities,
+    upsert_entity,
+)
+from app.datahub.ontology import EntityType, RelationshipType
+
+
+def test_upsert_entity_is_idempotent(session):
+    e1 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
+    e2 = upsert_entity(session, EntityType.CUSTOMER, "CUST-001", "客户甲")
+    assert e1.id == e2.id
+
+
+def test_ontology_violation_rejected(session):
+    contract = upsert_entity(session, EntityType.CONTRACT, "C-1")
+    customer = upsert_entity(session, EntityType.CUSTOMER, "CUST-2")
+    # 合同 —签约→ 客户 方向非法
+    with pytest.raises(OntologyViolationError):
+        add_relationship(session, RelationshipType.SIGNED, contract, customer)
+
+
+def test_detect_shared_controller_across_customers(session):
+    """模拟"8 个客户疑似同一实控人"：多个客户经法人关联到同一实控自然人。
+
+    构图：每个客户 <-法定代表人- 各自法人；各法人 -关联-> 同一实控人。
+    从实控人出发，应能穿透到全部客户。
+    """
+    controller = upsert_entity(session, EntityType.LEGAL_PERSON, "PER-CTRL", "实控人")
+
+    customers = []
+    for i in range(8):
+        cust = upsert_entity(session, EntityType.CUSTOMER, f"CUST-{i}", f"政企客户{i}")
+        rep = upsert_entity(session, EntityType.LEGAL_PERSON, f"PER-{i}", f"法人{i}")
+        # 法人 —法定代表人→ 客户
+        add_relationship(session, RelationshipType.LEGAL_REP_OF, rep, cust)
+        # 法人 —关联（亲属/实控）→ 实控人
+        add_relationship(session, RelationshipType.RELATED_TO, rep, controller)
+        customers.append(cust)
+    session.flush()
+
+    related = find_related_entities(session, controller.id, max_depth=3)
+    related_ids = {rid for rid, _ in related}
+
+    # 从实控人 3 跳内应能穿透到全部 8 个客户
+    for cust in customers:
+        assert cust.id in related_ids, f"未穿透到 {cust.business_key}"
+
+
+def test_traversal_respects_max_depth(session):
+    a = upsert_entity(session, EntityType.LEGAL_PERSON, "A")
+    b = upsert_entity(session, EntityType.LEGAL_PERSON, "B")
+    c = upsert_entity(session, EntityType.CUSTOMER, "C")
+    add_relationship(session, RelationshipType.RELATED_TO, a, b)
+    add_relationship(session, RelationshipType.LEGAL_REP_OF, b, c)
+    session.flush()
+
+    # depth=1：从 A 只能到 B，到不了 C
+    ids_d1 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=1)}
+    assert b.id in ids_d1
+    assert c.id not in ids_d1
+
+    # depth=2：能到 C
+    ids_d2 = {rid for rid, _ in find_related_entities(session, a.id, max_depth=2)}
+    assert c.id in ids_d2
@@ -0,0 +1,42 @@
+"""数据零出域红线测试：prod 环境必须禁用公网 LLM Provider。"""
+
+import pytest
+
+from app.config import AppEnv, LLMProviderName, Settings
+from app.llm.factory import EgressPolicyError, get_llm_provider
+
+
+def _settings(env: AppEnv, provider: LLMProviderName) -> Settings:
+    return Settings(aiaudit_env=env, llm_provider=provider, dashscope_api_key="x")
+
+
+def test_prod_blocks_public_dashscope():
+    s = _settings(AppEnv.prod, LLMProviderName.dashscope)
+    with pytest.raises(EgressPolicyError):
+        get_llm_provider(s)
+
+
+def test_prod_allows_local_vllm():
+    s = _settings(AppEnv.prod, LLMProviderName.vllm)
+    provider = get_llm_provider(s)
+    assert provider.name == "vllm"
+    assert provider.egress is False
+
+
+def test_dev_allows_dashscope():
+    s = _settings(AppEnv.dev, LLMProviderName.dashscope)
+    provider = get_llm_provider(s)
+    assert provider.name == "dashscope"
+    assert provider.egress is True
+
+
+def test_validate_egress_policy_raises_in_prod():
+    s = _settings(AppEnv.prod, LLMProviderName.dashscope)
+    with pytest.raises(RuntimeError):
+        s.validate_egress_policy()
+
+
+def test_validate_egress_policy_ok_in_dev():
+    s = _settings(AppEnv.dev, LLMProviderName.dashscope)
+    # dev 下不应抛出
+    s.validate_egress_policy()
@@ -0,0 +1,21 @@
+"""健康检查端点测试。"""
+
+from fastapi.testclient import TestClient
+
+from app.main import app
+
+client = TestClient(app)
+
+
+def test_health_ok():
+    resp = client.get("/health")
+    assert resp.status_code == 200
+    assert resp.json()["status"] == "ok"
+
+
+def test_health_config():
+    resp = client.get("/health/config")
+    assert resp.status_code == 200
+    body = resp.json()
+    assert "env" in body
+    assert "llm_provider" in body
@@ -0,0 +1,42 @@
+"""审计本体约束测试（无需数据库）。"""
+
+from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
+
+
+def test_valid_signed_relationship():
+    assert is_valid_relationship(
+        RelationshipType.SIGNED, EntityType.CUSTOMER, EntityType.CONTRACT
+    )
+
+
+def test_invalid_signed_direction():
+    # 合同不能"签约"客户（方向反了）
+    assert not is_valid_relationship(
+        RelationshipType.SIGNED, EntityType.CONTRACT, EntityType.CUSTOMER
+    )
+
+
+def test_legal_rep_relationship():
+    assert is_valid_relationship(
+        RelationshipType.LEGAL_REP_OF, EntityType.LEGAL_PERSON, EntityType.SUPPLIER
+    )
+
+
+def test_related_to_between_legal_persons():
+    # 实控人关联识别的基础：法人之间的亲属/关联关系
+    assert is_valid_relationship(
+        RelationshipType.RELATED_TO, EntityType.LEGAL_PERSON, EntityType.LEGAL_PERSON
+    )
+
+
+def test_invalid_relationship_wrong_target():
+    assert not is_valid_relationship(
+        RelationshipType.HOLDS_MSISDN, EntityType.CUSTOMER, EntityType.CONTRACT
+    )
+
+
+def test_all_relationship_types_have_domain():
+    from app.datahub.ontology import RELATIONSHIP_DOMAIN
+
+    for rel in RelationshipType:
+        assert rel in RELATIONSHIP_DOMAIN, f"关系 {rel} 缺少本体域定义"
				`@@ -0,0 +1 @@`
				`"""系统自审计模块：不可篡改操作日志、独立性与分权（R19）。"""`
				`@@ -0,0 +1 @@`
				`"""线索引擎模块：线索模型、生成、置信度分级、状态流转（人机闭环）。"""`
				`@@ -0,0 +1 @@`
				`"""审计数据中台模块：本体/知识图谱、双时态、时序、数据版本。"""`