Initial commit: InternalAuditInterprise

2026-06-16 00:38:57 +08:00
commit 7b1e2b10a8
57 changed files with 4622 additions and 0 deletions
@@ -0,0 +1,3 @@
+"""AIAudit 后端应用包。"""
+
+__version__ = "0.1.0"
@@ -0,0 +1 @@
+"""HTTP API 层。"""
@@ -0,0 +1,64 @@
+"""数据中台统一穿透查询 API（P1.2.5）。
+
+作为各引擎与审计场景访问知识图谱的共同入口，对上层屏蔽底层是关系表还是图库。
+对应需求 R2。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from fastapi import APIRouter, Depends, HTTPException
+from sqlalchemy.orm import Session
+
+from app.api.schemas import (
+    EntityOut,
+    PenetrateRequest,
+    PenetrateResponse,
+    RelatedEntityOut,
+)
+from app.datahub.graph_repo import find_related_entities
+from app.datahub.models import Entity
+from app.db import get_session
+
+router = APIRouter(prefix="/datahub", tags=["datahub"])
+
+
+@router.get("/entities/{entity_id}", response_model=EntityOut)
+def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
+    entity = session.get(Entity, entity_id)
+    if entity is None:
+        raise HTTPException(status_code=404, detail="实体不存在")
+    return entity
+
+
+@router.post("/penetrate", response_model=PenetrateResponse)
+def penetrate(
+    req: PenetrateRequest, session: Session = Depends(get_session)
+) -> PenetrateResponse:
+    """多跳穿透：返回与起点实体连通的关联实体（用于实控人/关联方/马甲识别）。"""
+    start = session.get(Entity, req.start_entity_id)
+    if start is None:
+        raise HTTPException(status_code=404, detail="起点实体不存在")
+
+    related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
+
+    # 批量取出关联实体详情，组装可解释结果
+    id_to_depth = {rid: depth for rid, depth in related_raw}
+    entities = (
+        session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
+        if id_to_depth
+        else []
+    )
+    related = [
+        RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
+        for e in entities
+    ]
+    related.sort(key=lambda r: r.depth)
+
+    return PenetrateResponse(
+        start_entity_id=req.start_entity_id,
+        max_depth=req.max_depth,
+        related_count=len(related),
+        related=related,
+    )
@@ -0,0 +1,36 @@
+"""API 数据传输模型（Pydantic）。"""
+
+from __future__ import annotations
+
+import uuid
+
+from pydantic import BaseModel, Field
+
+
+class EntityOut(BaseModel):
+    id: uuid.UUID
+    entity_type: str
+    business_key: str
+    display_name: str | None = None
+    attributes: dict = Field(default_factory=dict)
+
+    model_config = {"from_attributes": True}
+
+
+class RelatedEntityOut(BaseModel):
+    """穿透命中的关联实体，附最短跳数（证据强度的初步指示）。"""
+
+    entity: EntityOut
+    depth: int
+
+
+class PenetrateRequest(BaseModel):
+    start_entity_id: uuid.UUID
+    max_depth: int = Field(default=3, ge=1, le=6)
+
+
+class PenetrateResponse(BaseModel):
+    start_entity_id: uuid.UUID
+    max_depth: int
+    related_count: int
+    related: list[RelatedEntityOut]
@@ -0,0 +1 @@
+"""系统自审计模块：不可篡改操作日志、独立性与分权（R19）。"""
@@ -0,0 +1,50 @@
+"""系统自审计 ORM 模型：不可篡改操作日志（R19）。
+
+每条日志含哈希链（prev_hash + 内容 → entry_hash），任何篡改都会断链，可检测。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import BigInteger, DateTime, Identity, Index, String
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column
+
+from app.db import Base
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+def _now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+class AuditLog(Base):
+    """不可篡改审计轨迹。仅追加，不可更新/删除（应用层与制度共同保证）。"""
+
+    __tablename__ = "audit_log"
+    __table_args__ = (
+        Index("ix_audit_actor", "actor"),
+        Index("ix_audit_action", "action"),
+        Index("ix_audit_seq", "seq", unique=True),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    # 自增序号，构成哈希链顺序
+    seq: Mapped[int] = mapped_column(
+        BigInteger, Identity(always=False), nullable=False, unique=True
+    )
+    actor: Mapped[str] = mapped_column(String(64), nullable=False)
+    role: Mapped[str | None] = mapped_column(String(32), nullable=True)
+    action: Mapped[str] = mapped_column(String(64), nullable=False)  # 如 rule.update/clue.assign
+    target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
+    detail: Mapped[dict] = mapped_column(JSONB, default=dict)
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+
+    prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
@@ -0,0 +1,78 @@
+"""RBAC 权限与独立性约束（R19、PRD §6 权限矩阵）。
+
+核心独立性规则（硬约束）：
+- 任何角色都不能删除线索（DELETE_CLUE 不授予任何角色；数据库触发器再兜底）。
+- 业务方（business）对系统无任何写权限。
+- 配规则/改阈值/看线索/出报告分权制衡。
+"""
+
+from __future__ import annotations
+
+import enum
+
+
+class Role(str, enum.Enum):
+    AUDITOR = "auditor"  # 审计员
+    AUDIT_MANAGER = "audit_manager"  # 审计主管
+    RULE_ADMIN = "rule_admin"  # 规则管理员
+    SYS_ADMIN = "sys_admin"  # 系统管理员
+    SYS_AUDITOR = "sys_auditor"  # 系统审计员（独立监督）
+    BUSINESS = "business"  # 被审计业务方（无写权限）
+
+
+class Permission(str, enum.Enum):
+    QUERY = "query"  # 自然语言查询
+    VIEW_CLUE = "view_clue"  # 查看线索
+    ADJUDICATE_CLUE = "adjudicate_clue"  # 研判/定性线索
+    ASSIGN_CLUE = "assign_clue"  # 分派线索
+    DELETE_CLUE = "delete_clue"  # 删除线索（禁止授予任何人）
+    CONFIG_RULE = "config_rule"  # 配置规则
+    ADJUST_THRESHOLD = "adjust_threshold"  # 调整阈值
+    ISSUE_REPORT = "issue_report"  # 出具报告
+    DATA_INGEST = "data_ingest"  # 数据接入配置
+    VIEW_AUDIT_TRAIL = "view_audit_trail"  # 查看自审计轨迹
+    MODEL_DEPLOY = "model_deploy"  # 模型部署/升级
+
+
+# 角色 -> 权限集合。注意：DELETE_CLUE 不出现在任何角色中（线索不可删，R19）。
+ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
+    Role.AUDITOR: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.ADJUDICATE_CLUE,
+        Permission.ISSUE_REPORT,
+    },
+    Role.AUDIT_MANAGER: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.ADJUDICATE_CLUE,
+        Permission.ASSIGN_CLUE,
+        Permission.ISSUE_REPORT,
+    },
+    Role.RULE_ADMIN: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.CONFIG_RULE,
+        Permission.ADJUST_THRESHOLD,
+    },
+    Role.SYS_ADMIN: {
+        Permission.DATA_INGEST,
+        Permission.MODEL_DEPLOY,
+    },
+    Role.SYS_AUDITOR: {
+        Permission.QUERY,
+        Permission.VIEW_CLUE,
+        Permission.VIEW_AUDIT_TRAIL,
+        Permission.ISSUE_REPORT,
+    },
+    Role.BUSINESS: set(),  # 业务方无任何权限
+}
+
+
+def has_permission(role: Role, perm: Permission) -> bool:
+    return perm in ROLE_PERMISSIONS.get(role, set())
+
+
+def can_delete_clue(role: Role) -> bool:
+    """线索不可删除——对所有角色恒为 False（独立性硬约束）。"""
+    return False
@@ -0,0 +1,81 @@
+"""系统自审计服务：写入哈希链审计日志、校验完整性（R19）。"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+
+from sqlalchemy import select
+from sqlalchemy.orm import Session
+
+from app.audit.models import AuditLog
+
+
+def _compute_hash(prev_hash: str | None, payload: dict) -> str:
+    body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
+    raw = f"{prev_hash or ''}|{body}"
+    return hashlib.sha256(raw.encode("utf-8")).hexdigest()
+
+
+def record(
+    session: Session,
+    actor: str,
+    action: str,
+    *,
+    role: str | None = None,
+    target_type: str | None = None,
+    target_id: str | None = None,
+    detail: dict | None = None,
+) -> AuditLog:
+    """追加一条审计日志，自动接续哈希链。"""
+    last = session.execute(
+        select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
+    ).scalar_one_or_none()
+    prev_hash = last.entry_hash if last else None
+
+    payload = {
+        "actor": actor,
+        "role": role,
+        "action": action,
+        "target_type": target_type,
+        "target_id": target_id,
+        "detail": detail or {},
+    }
+    entry_hash = _compute_hash(prev_hash, payload)
+
+    log = AuditLog(
+        actor=actor,
+        role=role,
+        action=action,
+        target_type=target_type,
+        target_id=target_id,
+        detail=detail or {},
+        prev_hash=prev_hash,
+        entry_hash=entry_hash,
+    )
+    session.add(log)
+    session.flush()
+    return log
+
+
+def verify_chain(session: Session) -> tuple[bool, int | None]:
+    """校验审计日志哈希链完整性。
+
+    返回 (是否完整, 首个断链的 seq 或 None)。
+    """
+    rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
+    prev_hash: str | None = None
+    for row in rows:
+        payload = {
+            "actor": row.actor,
+            "role": row.role,
+            "action": row.action,
+            "target_type": row.target_type,
+            "target_id": row.target_id,
+            "detail": row.detail or {},
+        }
+        expected = _compute_hash(prev_hash, payload)
+        if expected != row.entry_hash or row.prev_hash != prev_hash:
+            return False, row.seq
+        prev_hash = row.entry_hash
+    return True, None
@@ -0,0 +1 @@
+"""线索引擎模块：线索模型、生成、置信度分级、状态流转（人机闭环）。"""
@@ -0,0 +1,136 @@
+"""线索 ORM 模型。
+
+对应需求 R7（线索+证据链+解释）、R17（闭环状态）、R18（置信度分级）、R19（线索不可删）。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import enum
+import uuid
+
+from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.db import Base
+
+
+def _enum_values(enum_cls):
+    """让 SQLAlchemy 使用枚举的 value（小写）写入 PG 原生 enum，而非 name。"""
+    return [m.value for m in enum_cls]
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+def _now() -> dt.datetime:
+    return dt.datetime.now(dt.UTC)
+
+
+class ConfidenceTier(str, enum.Enum):
+    """置信度三级分流（R18）。"""
+
+    HIGH = "high"  # 高置信：直接推送处置
+    MEDIUM = "medium"  # 中置信：人工复核
+    LOW = "low"  # 低置信：归档备查
+
+
+class ClueStatus(str, enum.Enum):
+    """线索闭环状态机（R17）。"""
+
+    NEW = "new"  # 新生成
+    ASSIGNED = "assigned"  # 已分派
+    REVIEWING = "reviewing"  # 研判中
+    CONFIRMED = "confirmed"  # 已定性属实
+    DISMISSED = "dismissed"  # 已定性误报
+    RECTIFYING = "rectifying"  # 整改中
+    TRANSFERRED = "transferred"  # 已移交
+    CLOSED = "closed"  # 已销项闭环
+
+
+class Clue(Base):
+    """审计线索。线索一经生成不可物理删除（R19），失效通过状态表达。"""
+
+    __tablename__ = "clue"
+    __table_args__ = (
+        Index("ix_clue_status", "status"),
+        Index("ix_clue_scenario", "scenario_code"),
+        Index("ix_clue_assignee", "assignee"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    title: Mapped[str] = mapped_column(String(256), nullable=False)
+    risk_domain: Mapped[str] = mapped_column(String(32), nullable=False)  # 收入/成本/采购/资金/合规
+    scenario_code: Mapped[str] = mapped_column(String(32), nullable=False)  # 如 R8/R9
+    confidence: Mapped[ConfidenceTier] = mapped_column(
+        Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
+        nullable=False,
+    )
+    score: Mapped[float] = mapped_column(Float, default=0.0)  # 0-1 风险评分
+    status: Mapped[ClueStatus] = mapped_column(
+        Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
+        default=ClueStatus.NEW,
+        nullable=False,
+    )
+    # 人话解释（判定理由）与证据链
+    rationale: Mapped[str] = mapped_column(Text, default="")
+    evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
+    # 涉及的主体（金额、实体 id 列表等）
+    subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
+    amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
+
+    assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    # 误报/属实反馈（R18 反馈学习）
+    feedback: Mapped[str | None] = mapped_column(String(16), nullable=True)  # confirmed/false_positive
+
+    # 可追溯：产生该线索时的模型/规则/数据版本（R19 三重留痕）
+    model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
+
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+    updated_at: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=_now, onupdate=_now
+    )
+
+    history: Mapped[list[ClueStatusHistory]] = relationship(
+        back_populates="clue", cascade="all, delete-orphan"
+    )
+
+
+class ClueStatusHistory(Base):
+    """线索状态流转留痕（R17/R19）。"""
+
+    __tablename__ = "clue_status_history"
+    __table_args__ = (Index("ix_csh_clue", "clue_id"),)
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    clue_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
+    )
+    from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
+    to_status: Mapped[str] = mapped_column(String(16), nullable=False)
+    actor: Mapped[str] = mapped_column(String(64), nullable=False)
+    note: Mapped[str | None] = mapped_column(Text, nullable=True)
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+
+    clue: Mapped[Clue] = relationship(back_populates="history")
+
+
+class WorkingPaper(Base):
+    """审计底稿（R17）：研判完成自动生成，可追溯。"""
+
+    __tablename__ = "working_paper"
+    __table_args__ = (Index("ix_wp_clue", "clue_id"),)
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    clue_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
+    )
+    content: Mapped[str] = mapped_column(Text, default="")
+    conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
+    author: Mapped[str] = mapped_column(String(64), nullable=False)
+    snapshot: Mapped[dict] = mapped_column(JSONB, default=dict)  # 证据/版本快照
+    created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
@@ -0,0 +1,195 @@
+"""线索服务：生成、置信度分级、状态流转、底稿生成、反馈。
+
+对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志（线索不可删）。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from sqlalchemy.orm import Session
+
+from app.audit import service as audit
+from app.clues.models import (
+    Clue,
+    ClueStatus,
+    ClueStatusHistory,
+    ConfidenceTier,
+    WorkingPaper,
+)
+
+# 允许的状态流转（R17 闭环）
+_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
+    ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
+    ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
+    ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
+    ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
+    ClueStatus.DISMISSED: {ClueStatus.CLOSED},
+    ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
+    ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
+    ClueStatus.CLOSED: set(),
+}
+
+
+class IllegalTransitionError(ValueError):
+    """非法的线索状态流转。"""
+
+
+def score_to_tier(score: float) -> ConfidenceTier:
+    """风险评分映射到置信度三级（R18）。"""
+    if score >= 0.8:
+        return ConfidenceTier.HIGH
+    if score >= 0.5:
+        return ConfidenceTier.MEDIUM
+    return ConfidenceTier.LOW
+
+
+def create_clue(
+    session: Session,
+    *,
+    title: str,
+    risk_domain: str,
+    scenario_code: str,
+    score: float,
+    rationale: str,
+    evidence: dict,
+    subjects: dict | None = None,
+    amount_involved: float | None = None,
+    model_version: str | None = None,
+    rule_version: str | None = None,
+    data_version_id: uuid.UUID | None = None,
+    actor: str = "system",
+) -> Clue:
+    """生成一条线索，自动按评分分级，并记录创建留痕。"""
+    clue = Clue(
+        title=title,
+        risk_domain=risk_domain,
+        scenario_code=scenario_code,
+        confidence=score_to_tier(score),
+        score=score,
+        status=ClueStatus.NEW,
+        rationale=rationale,
+        evidence=evidence,
+        subjects=subjects or {},
+        amount_involved=amount_involved,
+        model_version=model_version,
+        rule_version=rule_version,
+        data_version_id=data_version_id,
+    )
+    session.add(clue)
+    session.flush()
+    _add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
+    audit.record(
+        session, actor, "create_clue",
+        target_type="clue", target_id=str(clue.id),
+        detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
+    )
+    return clue
+
+
+def _add_history(
+    session: Session,
+    clue: Clue,
+    from_status: ClueStatus | None,
+    to_status: ClueStatus,
+    actor: str,
+    note: str | None,
+) -> None:
+    session.add(
+        ClueStatusHistory(
+            clue_id=clue.id,
+            from_status=from_status.value if from_status else None,
+            to_status=to_status.value,
+            actor=actor,
+            note=note,
+        )
+    )
+    session.flush()
+
+
+def transition(
+    session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
+) -> Clue:
+    """执行状态流转，校验合法性并留痕。"""
+    if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
+        raise IllegalTransitionError(
+            f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
+        )
+    from_status = clue.status
+    clue.status = to_status
+    session.flush()
+    _add_history(session, clue, from_status, to_status, actor, note)
+    audit.record(
+        session, actor, "transition_clue",
+        target_type="clue", target_id=str(clue.id),
+        detail={"from": from_status.value, "to": to_status.value, "note": note},
+    )
+    return clue
+
+
+def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
+    clue.assignee = assignee
+    session.flush()
+    if clue.status == ClueStatus.NEW:
+        transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
+    audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
+    return clue
+
+
+def adjudicate(
+    session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
+) -> WorkingPaper:
+    """研判定性：确认属实或误报，自动生成审计底稿并记录反馈（R17/R18）。"""
+    if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
+        # 允许从 NEW/ASSIGNED 直接进入研判
+        pass
+    if clue.status != ClueStatus.REVIEWING:
+        # 先进入研判中
+        target = ClueStatus.REVIEWING
+        if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
+            transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
+
+    to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
+    transition(session, clue, to, actor, note)
+    clue.feedback = "confirmed" if confirmed else "false_positive"
+    session.flush()
+
+    paper = WorkingPaper(
+        clue_id=clue.id,
+        content=note or "",
+        conclusion=to.value,
+        author=actor,
+        snapshot={
+            "evidence": clue.evidence,
+            "rationale": clue.rationale,
+            "score": clue.score,
+            "model_version": clue.model_version,
+            "rule_version": clue.rule_version,
+            "data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
+        },
+    )
+    session.add(paper)
+    session.flush()
+    audit.record(
+        session, actor, "create_working_paper",
+        target_type="working_paper", target_id=str(paper.id),
+        detail={"clue_id": str(clue.id), "conclusion": to.value},
+    )
+    return paper
+
+
+def list_clues(
+    session: Session,
+    *,
+    status: ClueStatus | None = None,
+    scenario_code: str | None = None,
+    confidence: ConfidenceTier | None = None,
+) -> list[Clue]:
+    q = session.query(Clue)
+    if status:
+        q = q.filter(Clue.status == status)
+    if scenario_code:
+        q = q.filter(Clue.scenario_code == scenario_code)
+    if confidence:
+        q = q.filter(Clue.confidence == confidence)
+    return q.order_by(Clue.score.desc()).all()
@@ -0,0 +1,70 @@
+"""应用配置。
+
+通过环境变量加载，区分 dev / prod 运行环境。
+prod 环境强制执行"数据零出域"红线：禁用任何公网 LLM Provider。
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class AppEnv(str, Enum):
+    dev = "dev"
+    prod = "prod"
+
+
+class LLMProviderName(str, Enum):
+    dashscope = "dashscope"  # 公网千问，仅 dev
+    vllm = "vllm"  # 本地，prod
+
+
+# 被认定为"公网/出域"的 Provider，prod 下禁止使用
+EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="",
+        env_file=".env",
+        extra="ignore",
+        case_sensitive=False,
+    )
+
+    aiaudit_env: AppEnv = AppEnv.dev
+
+    database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
+    redis_url: str = "redis://localhost:6379/0"
+
+    llm_provider: LLMProviderName = LLMProviderName.dashscope
+    dashscope_api_key: str = ""
+    dashscope_model: str = "qwen-plus"
+    vllm_base_url: str = "http://localhost:8001/v1"
+    vllm_model: str = "qwen2.5-72b-instruct"
+
+    @property
+    def is_prod(self) -> bool:
+        return self.aiaudit_env == AppEnv.prod
+
+    def validate_egress_policy(self) -> None:
+        """数据零出域红线校验：prod 环境禁用公网 Provider。
+
+        在应用启动时调用；违反则抛出异常阻断启动。
+        """
+        if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
+            raise RuntimeError(
+                f"数据零出域红线违规：prod 环境禁止使用公网 LLM Provider "
+                f"'{self.llm_provider.value}'。请改用本地 Provider（如 vllm）。"
+            )
+
+
+_settings: Settings | None = None
+
+
+def get_settings() -> Settings:
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
@@ -0,0 +1 @@
+"""审计数据中台模块：本体/知识图谱、双时态、时序、数据版本。"""
@@ -0,0 +1,83 @@
+"""双时态事实仓储：写入与"按历史时点回放"查询。
+
+对应需求 R3 / ADR-0002：
+- 业务有效期 valid_from/valid_to（应用时间）
+- 系统记录期 system_from/system_to（事务时间）
+回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import or_
+from sqlalchemy.orm import Session
+
+from app.datahub.models import BitemporalFact
+
+
+def record_fact(
+    session: Session,
+    entity_id: uuid.UUID,
+    attr_name: str,
+    attr_value: dict,
+    valid_from: dt.datetime,
+    valid_to: dt.datetime | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> BitemporalFact:
+    """记录一条双时态事实（system_from 自动取当前事务时间）。"""
+    fact = BitemporalFact(
+        entity_id=entity_id,
+        attr_name=attr_name,
+        attr_value=attr_value,
+        valid_from=valid_from,
+        valid_to=valid_to,
+        data_version_id=data_version_id,
+    )
+    session.add(fact)
+    session.flush()
+    return fact
+
+
+def as_of(
+    session: Session,
+    entity_id: uuid.UUID,
+    attr_name: str,
+    as_of_valid: dt.datetime,
+    as_of_system: dt.datetime | None = None,
+) -> BitemporalFact | None:
+    """回放：返回在给定业务时点且按给定系统时点可见的事实。
+
+    - 业务时间线：valid_from <= as_of_valid < valid_to(或为空表示至今)
+    - 系统时间线：system_from <= as_of_system < system_to(或为空表示当前可见)
+    """
+    as_of_system = as_of_system or dt.datetime.now(dt.UTC)
+
+    q = (
+        session.query(BitemporalFact)
+        .filter(BitemporalFact.entity_id == entity_id)
+        .filter(BitemporalFact.attr_name == attr_name)
+        .filter(BitemporalFact.valid_from <= as_of_valid)
+        .filter(
+            or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
+        )
+        .filter(BitemporalFact.system_from <= as_of_system)
+        .filter(
+            or_(
+                BitemporalFact.system_to.is_(None),
+                BitemporalFact.system_to > as_of_system,
+            )
+        )
+        .order_by(BitemporalFact.system_from.desc())
+    )
+    return q.first()
+
+
+def close_fact(
+    session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
+) -> None:
+    """逻辑关闭一条事实的系统可见期（用于更正/失效，而非物理删除）。"""
+    fact.system_to = system_to or dt.datetime.now(dt.UTC)
+    session.add(fact)
+    session.flush()
@@ -0,0 +1,58 @@
+"""数据中台 schema 初始化。
+
+MVP 阶段以 SQLAlchemy metadata 建表（后续可迁移到 Alembic）。
+扩展按可用性可选启用：
+- btree_gist / vector：若可用则创建。
+- timescaledb：若可用则把 metric_event 转为超表；不可用则保持普通表（带时间索引）。
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import text
+from sqlalchemy.engine import Engine
+
+from app.datahub import models  # noqa: F401  确保模型注册到 metadata
+from app.db import Base, get_engine
+
+
+def _extension_available(engine: Engine, name: str) -> bool:
+    with engine.connect() as conn:
+        row = conn.execute(
+            text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
+        ).first()
+        return row is not None
+
+
+def init_extensions(engine: Engine) -> dict[str, bool]:
+    """按可用性创建扩展，返回各扩展启用状态。"""
+    status: dict[str, bool] = {}
+    for ext in ("btree_gist", "vector", "timescaledb"):
+        available = _extension_available(engine, ext)
+        status[ext] = available
+        if available:
+            with engine.begin() as conn:
+                conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
+    return status
+
+
+def create_schema(engine: Engine | None = None) -> dict[str, bool]:
+    """创建数据中台全部表，并按需启用时序超表。返回扩展状态。"""
+    engine = engine or get_engine()
+    status = init_extensions(engine)
+    Base.metadata.create_all(engine)
+
+    # 若 TimescaleDB 可用，将时序事件表转为超表（幂等）
+    if status.get("timescaledb"):
+        with engine.begin() as conn:
+            conn.execute(
+                text(
+                    "SELECT create_hypertable('metric_event', 'event_time', "
+                    "if_not_exists => TRUE, migrate_data => TRUE)"
+                )
+            )
+    return status
+
+
+if __name__ == "__main__":
+    st = create_schema()
+    print("数据中台 schema 初始化完成。扩展状态：", st)
@@ -0,0 +1,118 @@
+"""知识图谱仓储：实体/关系写入与多跳穿透（递归 CTE）。
+
+对应需求 R2：支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
+统一穿透查询服务（P1.2.5）在此之上封装对外 API，对上层屏蔽底层是关系表还是图库。
+"""
+
+from __future__ import annotations
+
+import uuid
+
+from sqlalchemy import text
+from sqlalchemy.orm import Session
+
+from app.datahub.models import Entity, EntityRelationship
+from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
+
+
+class OntologyViolationError(ValueError):
+    """关系不符合本体约束。"""
+
+
+def upsert_entity(
+    session: Session,
+    entity_type: EntityType,
+    business_key: str,
+    display_name: str | None = None,
+    attributes: dict | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> Entity:
+    """按 (类型, 业务主键) 幂等写入实体（主数据对齐的归一锚点）。"""
+    existing = (
+        session.query(Entity)
+        .filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
+        .one_or_none()
+    )
+    if existing is not None:
+        if display_name is not None:
+            existing.display_name = display_name
+        if attributes:
+            existing.attributes = {**(existing.attributes or {}), **attributes}
+        return existing
+
+    entity = Entity(
+        entity_type=entity_type.value,
+        business_key=business_key,
+        display_name=display_name,
+        attributes=attributes or {},
+        data_version_id=data_version_id,
+    )
+    session.add(entity)
+    session.flush()
+    return entity
+
+
+def add_relationship(
+    session: Session,
+    rel_type: RelationshipType,
+    source: Entity,
+    target: Entity,
+    attributes: dict | None = None,
+    data_version_id: uuid.UUID | None = None,
+) -> EntityRelationship:
+    """新增一条关系边，写入前校验本体约束。"""
+    src_type = EntityType(source.entity_type)
+    tgt_type = EntityType(target.entity_type)
+    if not is_valid_relationship(rel_type, src_type, tgt_type):
+        raise OntologyViolationError(
+            f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
+        )
+    rel = EntityRelationship(
+        rel_type=rel_type.value,
+        source_id=source.id,
+        target_id=target.id,
+        attributes=attributes or {},
+        data_version_id=data_version_id,
+    )
+    session.add(rel)
+    session.flush()
+    return rel
+
+
+# 多跳穿透：以无向方式遍历关系边，返回与起点在 max_depth 跳内连通的实体集合。
+# 用于"疑似同一实控人/关联方网络"识别。
+_TRAVERSE_SQL = text(
+    """
+    WITH RECURSIVE reachable(entity_id, depth, path) AS (
+        SELECT :start_id, 0, ARRAY[:start_id]
+        UNION ALL
+        SELECT
+            CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
+            rc.depth + 1,
+            rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
+        FROM reachable rc
+        JOIN entity_relationship r
+          ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
+        WHERE rc.depth < :max_depth
+          AND NOT (
+              CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
+              = ANY(rc.path)
+          )
+    )
+    SELECT DISTINCT entity_id, MIN(depth) AS depth
+    FROM reachable
+    WHERE entity_id <> :start_id
+    GROUP BY entity_id
+    ORDER BY depth;
+    """
+)
+
+
+def find_related_entities(
+    session: Session, start_id: uuid.UUID, max_depth: int = 3
+) -> list[tuple[uuid.UUID, int]]:
+    """返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
+    rows = session.execute(
+        _TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
+    ).all()
+    return [(r[0], r[1]) for r in rows]
@@ -0,0 +1,157 @@
+"""审计数据中台 ORM 模型。
+
+涵盖：数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
+对应需求 R2 / R3，建模决策见 ADR-0002。
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import uuid
+
+from sqlalchemy import (
+    DateTime,
+    Float,
+    ForeignKey,
+    Index,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+)
+from sqlalchemy.dialects.postgresql import JSONB, UUID
+from sqlalchemy.orm import Mapped, mapped_column, relationship
+
+from app.db import Base
+
+
+def _uuid() -> uuid.UUID:
+    return uuid.uuid4()
+
+
+class DataVersion(Base):
+    """数据版本登记：每批接入数据的来源/批次/时间/行数，支撑结论可追溯（R3）。"""
+
+    __tablename__ = "data_version"
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    source_system: Mapped[str] = mapped_column(String(64), nullable=False)
+    batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
+    row_count: Mapped[int] = mapped_column(Integer, default=0)
+    ingested_at: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
+    )
+    note: Mapped[str | None] = mapped_column(Text, nullable=True)
+
+
+class Entity(Base):
+    """本体实体节点（知识图谱顶点）。
+
+    business_key 是源系统中的业务主键，用于主数据对齐（同一实体跨系统归一）。
+    """
+
+    __tablename__ = "entity"
+    __table_args__ = (
+        UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
+        Index("ix_entity_type", "entity_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
+    business_key: Mapped[str] = mapped_column(String(128), nullable=False)
+    display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    # 主数据对齐：被归并到的"金主"实体（同一实控人/同一主体）。NULL 表示自身即主实体。
+    canonical_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
+    )
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+
+class EntityRelationship(Base):
+    """知识图谱关系边（有向）。多跳穿透用递归 CTE 遍历本表。"""
+
+    __tablename__ = "entity_relationship"
+    __table_args__ = (
+        Index("ix_rel_source", "source_id"),
+        Index("ix_rel_target", "target_id"),
+        Index("ix_rel_type", "rel_type"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
+    source_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    target_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+    source: Mapped[Entity] = relationship(foreign_keys=[source_id])
+    target: Mapped[Entity] = relationship(foreign_keys=[target_id])
+
+
+class BitemporalFact(Base):
+    """双时态事实：实体的某个属性/状态随时间变化的记录。
+
+    - 业务有效期 valid_from/valid_to（应用时间）
+    - 系统记录期 system_from/system_to（事务时间）
+    回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线（见 repository）。
+    """
+
+    __tablename__ = "bitemporal_fact"
+    __table_args__ = (
+        Index("ix_btf_entity_attr", "entity_id", "attr_name"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    entity_id: Mapped[uuid.UUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
+    )
+    attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
+    attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+    valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+    system_from: Mapped[dt.datetime] = mapped_column(
+        DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
+    )
+    system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
+
+
+class MetricEvent(Base):
+    """时序事件：行为/指标类数据（用户生命周期、回款、话务、佣金、资源使用）。
+
+    部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
+    """
+
+    __tablename__ = "metric_event"
+    __table_args__ = (
+        Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
+        Index("ix_metric_name_time", "metric_name", "event_time"),
+    )
+
+    id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
+    event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
+    subject_type: Mapped[str] = mapped_column(String(32), nullable=False)  # 如 msisdn/channel
+    subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
+    metric_name: Mapped[str] = mapped_column(String(64), nullable=False)  # 如 traffic_mb/commission
+    metric_value: Mapped[float] = mapped_column(Float, default=0.0)
+    attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
+
+    data_version_id: Mapped[uuid.UUID | None] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
+    )
@@ -0,0 +1,86 @@
+"""审计本体（Ontology）定义。
+
+定义电信内审域的核心实体类型与关系类型，作为知识图谱与主数据对齐的基准。
+对应需求 R2。
+"""
+
+from __future__ import annotations
+
+from enum import Enum
+
+
+class EntityType(str, Enum):
+    """审计本体核心实体类型。"""
+
+    CUSTOMER = "customer"  # 客户（含政企）
+    CONTRACT = "contract"  # 合同
+    MSISDN = "msisdn"  # 号码
+    IMEI = "imei"  # 终端设备
+    ACCOUNT = "account"  # 账户（付款/收款）
+    WORK_ORDER = "work_order"  # 工单
+    SUPPLIER = "supplier"  # 供应商
+    SETTLEMENT = "settlement"  # 结算单
+    EMPLOYEE = "employee"  # 员工
+    CHANNEL = "channel"  # 渠道/代理商
+    LEGAL_PERSON = "legal_person"  # 法人/自然人
+    ADDRESS = "address"  # 地址
+
+
+class RelationshipType(str, Enum):
+    """审计本体核心关系类型（有向）。"""
+
+    SIGNED = "signed"  # 客户 —签约→ 合同
+    PAID_BY = "paid_by"  # 合同 —回款账户→ 账户
+    OWNS_ACCOUNT = "owns_account"  # 客户/供应商 —拥有→ 账户
+    REGISTERED_AT = "registered_at"  # 客户/供应商 —注册地址→ 地址
+    LEGAL_REP_OF = "legal_rep_of"  # 法人 —法定代表人→ 客户/供应商
+    RELATED_TO = "related_to"  # 法人 —亲属/关联→ 法人
+    HOLDS_MSISDN = "holds_msisdn"  # 客户 —持有→ 号码
+    BOUND_DEVICE = "bound_device"  # 号码 —绑定→ IMEI
+    BELONGS_TO_CHANNEL = "belongs_to_channel"  # 号码/合同 —归属→ 渠道
+    SUPPLIES = "supplies"  # 供应商 —供货→ 合同/工单
+    HANDLED_BY = "handled_by"  # 工单 —处理人→ 员工
+    SETTLES = "settles"  # 结算单 —结算→ 合同
+
+
+# 关系的合法 (源实体类型, 目标实体类型) 约束，用于校验图谱写入
+RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
+    RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
+    RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
+    RelationshipType.OWNS_ACCOUNT: (
+        {EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
+        {EntityType.ACCOUNT},
+    ),
+    RelationshipType.REGISTERED_AT: (
+        {EntityType.CUSTOMER, EntityType.SUPPLIER},
+        {EntityType.ADDRESS},
+    ),
+    RelationshipType.LEGAL_REP_OF: (
+        {EntityType.LEGAL_PERSON},
+        {EntityType.CUSTOMER, EntityType.SUPPLIER},
+    ),
+    RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
+    RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
+    RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
+    RelationshipType.BELONGS_TO_CHANNEL: (
+        {EntityType.MSISDN, EntityType.CONTRACT},
+        {EntityType.CHANNEL},
+    ),
+    RelationshipType.SUPPLIES: (
+        {EntityType.SUPPLIER},
+        {EntityType.CONTRACT, EntityType.WORK_ORDER},
+    ),
+    RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
+    RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
+}
+
+
+def is_valid_relationship(
+    rel: RelationshipType, source: EntityType, target: EntityType
+) -> bool:
+    """校验一条关系的源/目标实体类型是否符合本体约束。"""
+    domain = RELATIONSHIP_DOMAIN.get(rel)
+    if domain is None:
+        return False
+    sources, targets = domain
+    return source in sources and target in targets
@@ -0,0 +1,40 @@
+"""数据库引擎与会话管理。"""
+
+from __future__ import annotations
+
+from collections.abc import Iterator
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
+
+from app.config import get_settings
+
+
+class Base(DeclarativeBase):
+    """所有 ORM 模型的基类。"""
+
+
+_engine = None
+_SessionLocal: sessionmaker[Session] | None = None
+
+
+def get_engine():
+    global _engine
+    if _engine is None:
+        settings = get_settings()
+        _engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
+    return _engine
+
+
+def get_sessionmaker() -> sessionmaker[Session]:
+    global _SessionLocal
+    if _SessionLocal is None:
+        _SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
+    return _SessionLocal
+
+
+def get_session() -> Iterator[Session]:
+    """FastAPI 依赖注入用的会话生成器。"""
+    sm = get_sessionmaker()
+    with sm() as session:
+        yield session
@@ -0,0 +1,10 @@
+"""LLM Provider 抽象层。
+
+通过统一接口隔离 LLM 实现，使开发期可用公网千问、生产期无缝切换本地 vLLM。
+强约束："数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
+"""
+
+from app.llm.base import ChatMessage, LLMProvider, LLMResponse
+from app.llm.factory import get_llm_provider
+
+__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
@@ -0,0 +1,44 @@
+"""LLM Provider 抽象接口与数据模型。"""
+
+from __future__ import annotations
+
+import abc
+from dataclasses import dataclass, field
+
+
+@dataclass
+class ChatMessage:
+    role: str  # "system" | "user" | "assistant"
+    content: str
+
+
+@dataclass
+class LLMResponse:
+    content: str
+    model: str
+    provider: str
+    # 是否经过出域（公网）通道，便于审计轨迹记录
+    egress: bool = False
+    raw: dict = field(default_factory=dict)
+
+
+class LLMProvider(abc.ABC):
+    """所有 LLM 实现的统一接口。
+
+    业务代码只依赖本接口；切换公网/本地仅改配置，不改调用方。
+    """
+
+    #: provider 名称
+    name: str = "base"
+    #: 是否走公网（出域）。prod 环境禁止 egress=True 的 provider。
+    egress: bool = False
+
+    @abc.abstractmethod
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        """同步对话补全。"""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def health(self) -> bool:
+        """探活：provider 是否可用。"""
+        raise NotImplementedError
@@ -0,0 +1,31 @@
+"""LLM Provider 工厂：按配置创建 provider，并执行数据零出域红线校验。"""
+
+from __future__ import annotations
+
+from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
+from app.llm.base import LLMProvider
+from app.llm.providers import DashScopeProvider, VllmProvider
+
+
+class EgressPolicyError(RuntimeError):
+    """数据零出域红线违规。"""
+
+
+def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
+    settings = settings or get_settings()
+
+    # 红线：prod 环境禁止公网 provider
+    if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
+        raise EgressPolicyError(
+            f"数据零出域红线违规：prod 环境禁止使用公网 LLM Provider "
+            f"'{settings.llm_provider.value}'。"
+        )
+
+    if settings.llm_provider == LLMProviderName.dashscope:
+        return DashScopeProvider(
+            api_key=settings.dashscope_api_key, model=settings.dashscope_model
+        )
+    if settings.llm_provider == LLMProviderName.vllm:
+        return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
+
+    raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
@@ -0,0 +1,80 @@
+"""具体 LLM Provider 实现：DashScope（公网千问，仅 dev）、vLLM（本地，prod）。
+
+两者均走 OpenAI 兼容的 /chat/completions 协议。
+"""
+
+from __future__ import annotations
+
+import httpx
+
+from app.llm.base import ChatMessage, LLMProvider, LLMResponse
+
+
+class DashScopeProvider(LLMProvider):
+    """公网千问（DashScope，OpenAI 兼容模式）。仅限开发测试，且只允许脱敏/样例假数据。"""
+
+    name = "dashscope"
+    egress = True  # 走公网，出域
+
+    _BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+
+    def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
+        self._api_key = api_key
+        self._model = model
+        self._timeout = timeout
+
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        payload = {
+            "model": self._model,
+            "messages": [{"role": m.role, "content": m.content} for m in messages],
+            **kwargs,
+        }
+        headers = {"Authorization": f"Bearer {self._api_key}"}
+        with httpx.Client(timeout=self._timeout) as client:
+            resp = client.post(
+                f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
+            )
+            resp.raise_for_status()
+            data = resp.json()
+        content = data["choices"][0]["message"]["content"]
+        return LLMResponse(
+            content=content, model=self._model, provider=self.name, egress=True, raw=data
+        )
+
+    def health(self) -> bool:
+        return bool(self._api_key)
+
+
+class VllmProvider(LLMProvider):
+    """本地 vLLM（OpenAI 兼容）。生产使用，数据不出域。"""
+
+    name = "vllm"
+    egress = False
+
+    def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
+        self._base_url = base_url.rstrip("/")
+        self._model = model
+        self._timeout = timeout
+
+    def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
+        payload = {
+            "model": self._model,
+            "messages": [{"role": m.role, "content": m.content} for m in messages],
+            **kwargs,
+        }
+        with httpx.Client(timeout=self._timeout) as client:
+            resp = client.post(f"{self._base_url}/chat/completions", json=payload)
+            resp.raise_for_status()
+            data = resp.json()
+        content = data["choices"][0]["message"]["content"]
+        return LLMResponse(
+            content=content, model=self._model, provider=self.name, egress=False, raw=data
+        )
+
+    def health(self) -> bool:
+        try:
+            with httpx.Client(timeout=5.0) as client:
+                resp = client.get(f"{self._base_url}/models")
+                return resp.status_code == 200
+        except httpx.HTTPError:
+            return False
@@ -0,0 +1,45 @@
+"""AIAudit FastAPI 应用入口。"""
+
+from __future__ import annotations
+
+from contextlib import asynccontextmanager
+
+from fastapi import FastAPI
+
+from app import __version__
+from app.api.datahub import router as datahub_router
+from app.config import get_settings
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 启动时执行数据零出域红线校验，违规则阻断启动
+    settings = get_settings()
+    settings.validate_egress_policy()
+    yield
+
+
+app = FastAPI(
+    title="AIAudit · 本地 AI 内审平台",
+    version=__version__,
+    lifespan=lifespan,
+)
+
+app.include_router(datahub_router)
+
+
+@app.get("/health")
+def health() -> dict:
+    """存活探针。"""
+    return {"status": "ok", "version": __version__}
+
+
+@app.get("/health/config")
+def health_config() -> dict:
+    """配置/合规探针：暴露环境与 LLM provider 出域状态（不含密钥）。"""
+    settings = get_settings()
+    return {
+        "env": settings.aiaudit_env.value,
+        "llm_provider": settings.llm_provider.value,
+        "egress_blocked_in_prod": settings.is_prod,
+    }
				`@@ -0,0 +1 @@`
				`"""系统自审计模块：不可篡改操作日志、独立性与分权（R19）。"""`
				`@@ -0,0 +1 @@`
				`"""线索引擎模块：线索模型、生成、置信度分级、状态流转（人机闭环）。"""`
				`@@ -0,0 +1 @@`
				`"""审计数据中台模块：本体/知识图谱、双时态、时序、数据版本。"""`