Initial commit: InternalAuditInterprise

This commit is contained in:
freedakgmail
2026-06-16 00:38:57 +08:00
commit 7b1e2b10a8
57 changed files with 4622 additions and 0 deletions
+3
View File
@@ -0,0 +1,3 @@
"""AIAudit 后端应用包。"""
__version__ = "0.1.0"
+1
View File
@@ -0,0 +1 @@
"""HTTP API 层。"""
+64
View File
@@ -0,0 +1,64 @@
"""数据中台统一穿透查询 API(P1.2.5)。
作为各引擎与审计场景访问知识图谱的共同入口,对上层屏蔽底层是关系表还是图库。
对应需求 R2。
"""
from __future__ import annotations
import uuid
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy.orm import Session
from app.api.schemas import (
EntityOut,
PenetrateRequest,
PenetrateResponse,
RelatedEntityOut,
)
from app.datahub.graph_repo import find_related_entities
from app.datahub.models import Entity
from app.db import get_session
router = APIRouter(prefix="/datahub", tags=["datahub"])
@router.get("/entities/{entity_id}", response_model=EntityOut)
def get_entity(entity_id: uuid.UUID, session: Session = Depends(get_session)) -> Entity:
entity = session.get(Entity, entity_id)
if entity is None:
raise HTTPException(status_code=404, detail="实体不存在")
return entity
@router.post("/penetrate", response_model=PenetrateResponse)
def penetrate(
req: PenetrateRequest, session: Session = Depends(get_session)
) -> PenetrateResponse:
"""多跳穿透:返回与起点实体连通的关联实体(用于实控人/关联方/马甲识别)。"""
start = session.get(Entity, req.start_entity_id)
if start is None:
raise HTTPException(status_code=404, detail="起点实体不存在")
related_raw = find_related_entities(session, req.start_entity_id, max_depth=req.max_depth)
# 批量取出关联实体详情,组装可解释结果
id_to_depth = {rid: depth for rid, depth in related_raw}
entities = (
session.query(Entity).filter(Entity.id.in_(list(id_to_depth.keys()))).all()
if id_to_depth
else []
)
related = [
RelatedEntityOut(entity=EntityOut.model_validate(e), depth=id_to_depth[e.id])
for e in entities
]
related.sort(key=lambda r: r.depth)
return PenetrateResponse(
start_entity_id=req.start_entity_id,
max_depth=req.max_depth,
related_count=len(related),
related=related,
)
+36
View File
@@ -0,0 +1,36 @@
"""API 数据传输模型(Pydantic)。"""
from __future__ import annotations
import uuid
from pydantic import BaseModel, Field
class EntityOut(BaseModel):
id: uuid.UUID
entity_type: str
business_key: str
display_name: str | None = None
attributes: dict = Field(default_factory=dict)
model_config = {"from_attributes": True}
class RelatedEntityOut(BaseModel):
"""穿透命中的关联实体,附最短跳数(证据强度的初步指示)。"""
entity: EntityOut
depth: int
class PenetrateRequest(BaseModel):
start_entity_id: uuid.UUID
max_depth: int = Field(default=3, ge=1, le=6)
class PenetrateResponse(BaseModel):
start_entity_id: uuid.UUID
max_depth: int
related_count: int
related: list[RelatedEntityOut]
+1
View File
@@ -0,0 +1 @@
"""系统自审计模块:不可篡改操作日志、独立性与分权(R19)。"""
+50
View File
@@ -0,0 +1,50 @@
"""系统自审计 ORM 模型:不可篡改操作日志(R19)。
每条日志含哈希链(prev_hash + 内容 → entry_hash),任何篡改都会断链,可检测。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import BigInteger, DateTime, Identity, Index, String
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.db import Base
def _uuid() -> uuid.UUID:
return uuid.uuid4()
def _now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
class AuditLog(Base):
"""不可篡改审计轨迹。仅追加,不可更新/删除(应用层与制度共同保证)。"""
__tablename__ = "audit_log"
__table_args__ = (
Index("ix_audit_actor", "actor"),
Index("ix_audit_action", "action"),
Index("ix_audit_seq", "seq", unique=True),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
# 自增序号,构成哈希链顺序
seq: Mapped[int] = mapped_column(
BigInteger, Identity(always=False), nullable=False, unique=True
)
actor: Mapped[str] = mapped_column(String(64), nullable=False)
role: Mapped[str | None] = mapped_column(String(32), nullable=True)
action: Mapped[str] = mapped_column(String(64), nullable=False) # 如 rule.update/clue.assign
target_type: Mapped[str | None] = mapped_column(String(64), nullable=True)
target_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
detail: Mapped[dict] = mapped_column(JSONB, default=dict)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
prev_hash: Mapped[str | None] = mapped_column(String(64), nullable=True)
entry_hash: Mapped[str] = mapped_column(String(64), nullable=False)
+78
View File
@@ -0,0 +1,78 @@
"""RBAC 权限与独立性约束(R19、PRD §6 权限矩阵)。
核心独立性规则(硬约束):
- 任何角色都不能删除线索(DELETE_CLUE 不授予任何角色;数据库触发器再兜底)。
- 业务方(business)对系统无任何写权限。
- 配规则/改阈值/看线索/出报告分权制衡。
"""
from __future__ import annotations
import enum
class Role(str, enum.Enum):
AUDITOR = "auditor" # 审计员
AUDIT_MANAGER = "audit_manager" # 审计主管
RULE_ADMIN = "rule_admin" # 规则管理员
SYS_ADMIN = "sys_admin" # 系统管理员
SYS_AUDITOR = "sys_auditor" # 系统审计员(独立监督)
BUSINESS = "business" # 被审计业务方(无写权限)
class Permission(str, enum.Enum):
QUERY = "query" # 自然语言查询
VIEW_CLUE = "view_clue" # 查看线索
ADJUDICATE_CLUE = "adjudicate_clue" # 研判/定性线索
ASSIGN_CLUE = "assign_clue" # 分派线索
DELETE_CLUE = "delete_clue" # 删除线索(禁止授予任何人)
CONFIG_RULE = "config_rule" # 配置规则
ADJUST_THRESHOLD = "adjust_threshold" # 调整阈值
ISSUE_REPORT = "issue_report" # 出具报告
DATA_INGEST = "data_ingest" # 数据接入配置
VIEW_AUDIT_TRAIL = "view_audit_trail" # 查看自审计轨迹
MODEL_DEPLOY = "model_deploy" # 模型部署/升级
# 角色 -> 权限集合。注意:DELETE_CLUE 不出现在任何角色中(线索不可删,R19)。
ROLE_PERMISSIONS: dict[Role, set[Permission]] = {
Role.AUDITOR: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.ADJUDICATE_CLUE,
Permission.ISSUE_REPORT,
},
Role.AUDIT_MANAGER: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.ADJUDICATE_CLUE,
Permission.ASSIGN_CLUE,
Permission.ISSUE_REPORT,
},
Role.RULE_ADMIN: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.CONFIG_RULE,
Permission.ADJUST_THRESHOLD,
},
Role.SYS_ADMIN: {
Permission.DATA_INGEST,
Permission.MODEL_DEPLOY,
},
Role.SYS_AUDITOR: {
Permission.QUERY,
Permission.VIEW_CLUE,
Permission.VIEW_AUDIT_TRAIL,
Permission.ISSUE_REPORT,
},
Role.BUSINESS: set(), # 业务方无任何权限
}
def has_permission(role: Role, perm: Permission) -> bool:
return perm in ROLE_PERMISSIONS.get(role, set())
def can_delete_clue(role: Role) -> bool:
"""线索不可删除——对所有角色恒为 False(独立性硬约束)。"""
return False
+81
View File
@@ -0,0 +1,81 @@
"""系统自审计服务:写入哈希链审计日志、校验完整性(R19)。"""
from __future__ import annotations
import hashlib
import json
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.audit.models import AuditLog
def _compute_hash(prev_hash: str | None, payload: dict) -> str:
body = json.dumps(payload, sort_keys=True, ensure_ascii=False, default=str)
raw = f"{prev_hash or ''}|{body}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def record(
session: Session,
actor: str,
action: str,
*,
role: str | None = None,
target_type: str | None = None,
target_id: str | None = None,
detail: dict | None = None,
) -> AuditLog:
"""追加一条审计日志,自动接续哈希链。"""
last = session.execute(
select(AuditLog).order_by(AuditLog.seq.desc()).limit(1)
).scalar_one_or_none()
prev_hash = last.entry_hash if last else None
payload = {
"actor": actor,
"role": role,
"action": action,
"target_type": target_type,
"target_id": target_id,
"detail": detail or {},
}
entry_hash = _compute_hash(prev_hash, payload)
log = AuditLog(
actor=actor,
role=role,
action=action,
target_type=target_type,
target_id=target_id,
detail=detail or {},
prev_hash=prev_hash,
entry_hash=entry_hash,
)
session.add(log)
session.flush()
return log
def verify_chain(session: Session) -> tuple[bool, int | None]:
"""校验审计日志哈希链完整性。
返回 (是否完整, 首个断链的 seq 或 None)。
"""
rows = session.execute(select(AuditLog).order_by(AuditLog.seq.asc())).scalars().all()
prev_hash: str | None = None
for row in rows:
payload = {
"actor": row.actor,
"role": row.role,
"action": row.action,
"target_type": row.target_type,
"target_id": row.target_id,
"detail": row.detail or {},
}
expected = _compute_hash(prev_hash, payload)
if expected != row.entry_hash or row.prev_hash != prev_hash:
return False, row.seq
prev_hash = row.entry_hash
return True, None
+1
View File
@@ -0,0 +1 @@
"""线索引擎模块:线索模型、生成、置信度分级、状态流转(人机闭环)。"""
+136
View File
@@ -0,0 +1,136 @@
"""线索 ORM 模型。
对应需求 R7(线索+证据链+解释)、R17(闭环状态)、R18(置信度分级)、R19(线索不可删)。
"""
from __future__ import annotations
import datetime as dt
import enum
import uuid
from sqlalchemy import DateTime, Enum, Float, ForeignKey, Index, String, Text
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db import Base
def _enum_values(enum_cls):
"""让 SQLAlchemy 使用枚举的 value(小写)写入 PG 原生 enum,而非 name。"""
return [m.value for m in enum_cls]
def _uuid() -> uuid.UUID:
return uuid.uuid4()
def _now() -> dt.datetime:
return dt.datetime.now(dt.UTC)
class ConfidenceTier(str, enum.Enum):
"""置信度三级分流(R18)。"""
HIGH = "high" # 高置信:直接推送处置
MEDIUM = "medium" # 中置信:人工复核
LOW = "low" # 低置信:归档备查
class ClueStatus(str, enum.Enum):
"""线索闭环状态机(R17)。"""
NEW = "new" # 新生成
ASSIGNED = "assigned" # 已分派
REVIEWING = "reviewing" # 研判中
CONFIRMED = "confirmed" # 已定性属实
DISMISSED = "dismissed" # 已定性误报
RECTIFYING = "rectifying" # 整改中
TRANSFERRED = "transferred" # 已移交
CLOSED = "closed" # 已销项闭环
class Clue(Base):
"""审计线索。线索一经生成不可物理删除(R19),失效通过状态表达。"""
__tablename__ = "clue"
__table_args__ = (
Index("ix_clue_status", "status"),
Index("ix_clue_scenario", "scenario_code"),
Index("ix_clue_assignee", "assignee"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
title: Mapped[str] = mapped_column(String(256), nullable=False)
risk_domain: Mapped[str] = mapped_column(String(32), nullable=False) # 收入/成本/采购/资金/合规
scenario_code: Mapped[str] = mapped_column(String(32), nullable=False) # 如 R8/R9
confidence: Mapped[ConfidenceTier] = mapped_column(
Enum(ConfidenceTier, name="confidence_tier", values_callable=_enum_values),
nullable=False,
)
score: Mapped[float] = mapped_column(Float, default=0.0) # 0-1 风险评分
status: Mapped[ClueStatus] = mapped_column(
Enum(ClueStatus, name="clue_status", values_callable=_enum_values),
default=ClueStatus.NEW,
nullable=False,
)
# 人话解释(判定理由)与证据链
rationale: Mapped[str] = mapped_column(Text, default="")
evidence: Mapped[dict] = mapped_column(JSONB, default=dict)
# 涉及的主体(金额、实体 id 列表等)
subjects: Mapped[dict] = mapped_column(JSONB, default=dict)
amount_involved: Mapped[float | None] = mapped_column(Float, nullable=True)
assignee: Mapped[str | None] = mapped_column(String(64), nullable=True)
# 误报/属实反馈(R18 反馈学习)
feedback: Mapped[str | None] = mapped_column(String(16), nullable=True) # confirmed/false_positive
# 可追溯:产生该线索时的模型/规则/数据版本(R19 三重留痕)
model_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
rule_version: Mapped[str | None] = mapped_column(String(64), nullable=True)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), nullable=True)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
updated_at: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=_now, onupdate=_now
)
history: Mapped[list[ClueStatusHistory]] = relationship(
back_populates="clue", cascade="all, delete-orphan"
)
class ClueStatusHistory(Base):
"""线索状态流转留痕(R17/R19)。"""
__tablename__ = "clue_status_history"
__table_args__ = (Index("ix_csh_clue", "clue_id"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
clue_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
)
from_status: Mapped[str | None] = mapped_column(String(16), nullable=True)
to_status: Mapped[str] = mapped_column(String(16), nullable=False)
actor: Mapped[str] = mapped_column(String(64), nullable=False)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
clue: Mapped[Clue] = relationship(back_populates="history")
class WorkingPaper(Base):
"""审计底稿(R17):研判完成自动生成,可追溯。"""
__tablename__ = "working_paper"
__table_args__ = (Index("ix_wp_clue", "clue_id"),)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
clue_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("clue.id"), nullable=False
)
content: Mapped[str] = mapped_column(Text, default="")
conclusion: Mapped[str | None] = mapped_column(String(32), nullable=True)
author: Mapped[str] = mapped_column(String(64), nullable=False)
snapshot: Mapped[dict] = mapped_column(JSONB, default=dict) # 证据/版本快照
created_at: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), default=_now)
+195
View File
@@ -0,0 +1,195 @@
"""线索服务:生成、置信度分级、状态流转、底稿生成、反馈。
对应 R7 / R17 / R18 / R19。所有状态变更写入历史并记自审计日志(线索不可删)。
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.audit import service as audit
from app.clues.models import (
Clue,
ClueStatus,
ClueStatusHistory,
ConfidenceTier,
WorkingPaper,
)
# 允许的状态流转(R17 闭环)
_ALLOWED_TRANSITIONS: dict[ClueStatus, set[ClueStatus]] = {
ClueStatus.NEW: {ClueStatus.ASSIGNED, ClueStatus.REVIEWING},
ClueStatus.ASSIGNED: {ClueStatus.REVIEWING},
ClueStatus.REVIEWING: {ClueStatus.CONFIRMED, ClueStatus.DISMISSED},
ClueStatus.CONFIRMED: {ClueStatus.RECTIFYING, ClueStatus.TRANSFERRED},
ClueStatus.DISMISSED: {ClueStatus.CLOSED},
ClueStatus.RECTIFYING: {ClueStatus.CLOSED},
ClueStatus.TRANSFERRED: {ClueStatus.CLOSED},
ClueStatus.CLOSED: set(),
}
class IllegalTransitionError(ValueError):
"""非法的线索状态流转。"""
def score_to_tier(score: float) -> ConfidenceTier:
"""风险评分映射到置信度三级(R18)。"""
if score >= 0.8:
return ConfidenceTier.HIGH
if score >= 0.5:
return ConfidenceTier.MEDIUM
return ConfidenceTier.LOW
def create_clue(
session: Session,
*,
title: str,
risk_domain: str,
scenario_code: str,
score: float,
rationale: str,
evidence: dict,
subjects: dict | None = None,
amount_involved: float | None = None,
model_version: str | None = None,
rule_version: str | None = None,
data_version_id: uuid.UUID | None = None,
actor: str = "system",
) -> Clue:
"""生成一条线索,自动按评分分级,并记录创建留痕。"""
clue = Clue(
title=title,
risk_domain=risk_domain,
scenario_code=scenario_code,
confidence=score_to_tier(score),
score=score,
status=ClueStatus.NEW,
rationale=rationale,
evidence=evidence,
subjects=subjects or {},
amount_involved=amount_involved,
model_version=model_version,
rule_version=rule_version,
data_version_id=data_version_id,
)
session.add(clue)
session.flush()
_add_history(session, clue, None, ClueStatus.NEW, actor, "线索生成")
audit.record(
session, actor, "create_clue",
target_type="clue", target_id=str(clue.id),
detail={"scenario": scenario_code, "score": score, "confidence": clue.confidence.value},
)
return clue
def _add_history(
session: Session,
clue: Clue,
from_status: ClueStatus | None,
to_status: ClueStatus,
actor: str,
note: str | None,
) -> None:
session.add(
ClueStatusHistory(
clue_id=clue.id,
from_status=from_status.value if from_status else None,
to_status=to_status.value,
actor=actor,
note=note,
)
)
session.flush()
def transition(
session: Session, clue: Clue, to_status: ClueStatus, actor: str, note: str | None = None
) -> Clue:
"""执行状态流转,校验合法性并留痕。"""
if to_status not in _ALLOWED_TRANSITIONS.get(clue.status, set()):
raise IllegalTransitionError(
f"线索状态不能从 {clue.status.value} 流转到 {to_status.value}"
)
from_status = clue.status
clue.status = to_status
session.flush()
_add_history(session, clue, from_status, to_status, actor, note)
audit.record(
session, actor, "transition_clue",
target_type="clue", target_id=str(clue.id),
detail={"from": from_status.value, "to": to_status.value, "note": note},
)
return clue
def assign(session: Session, clue: Clue, assignee: str, actor: str) -> Clue:
clue.assignee = assignee
session.flush()
if clue.status == ClueStatus.NEW:
transition(session, clue, ClueStatus.ASSIGNED, actor, f"分派给 {assignee}")
audit.record(session, actor, "assign_clue", target_type="clue", target_id=str(clue.id), detail={"assignee": assignee})
return clue
def adjudicate(
session: Session, clue: Clue, confirmed: bool, actor: str, note: str | None = None
) -> WorkingPaper:
"""研判定性:确认属实或误报,自动生成审计底稿并记录反馈(R17/R18)。"""
if clue.status not in (ClueStatus.ASSIGNED, ClueStatus.REVIEWING, ClueStatus.NEW):
# 允许从 NEW/ASSIGNED 直接进入研判
pass
if clue.status != ClueStatus.REVIEWING:
# 先进入研判中
target = ClueStatus.REVIEWING
if target in _ALLOWED_TRANSITIONS.get(clue.status, set()):
transition(session, clue, ClueStatus.REVIEWING, actor, "进入研判")
to = ClueStatus.CONFIRMED if confirmed else ClueStatus.DISMISSED
transition(session, clue, to, actor, note)
clue.feedback = "confirmed" if confirmed else "false_positive"
session.flush()
paper = WorkingPaper(
clue_id=clue.id,
content=note or "",
conclusion=to.value,
author=actor,
snapshot={
"evidence": clue.evidence,
"rationale": clue.rationale,
"score": clue.score,
"model_version": clue.model_version,
"rule_version": clue.rule_version,
"data_version_id": str(clue.data_version_id) if clue.data_version_id else None,
},
)
session.add(paper)
session.flush()
audit.record(
session, actor, "create_working_paper",
target_type="working_paper", target_id=str(paper.id),
detail={"clue_id": str(clue.id), "conclusion": to.value},
)
return paper
def list_clues(
session: Session,
*,
status: ClueStatus | None = None,
scenario_code: str | None = None,
confidence: ConfidenceTier | None = None,
) -> list[Clue]:
q = session.query(Clue)
if status:
q = q.filter(Clue.status == status)
if scenario_code:
q = q.filter(Clue.scenario_code == scenario_code)
if confidence:
q = q.filter(Clue.confidence == confidence)
return q.order_by(Clue.score.desc()).all()
+70
View File
@@ -0,0 +1,70 @@
"""应用配置。
通过环境变量加载,区分 dev / prod 运行环境。
prod 环境强制执行"数据零出域"红线:禁用任何公网 LLM Provider。
"""
from __future__ import annotations
from enum import Enum
from pydantic_settings import BaseSettings, SettingsConfigDict
class AppEnv(str, Enum):
dev = "dev"
prod = "prod"
class LLMProviderName(str, Enum):
dashscope = "dashscope" # 公网千问,仅 dev
vllm = "vllm" # 本地,prod
# 被认定为"公网/出域"的 Providerprod 下禁止使用
EGRESS_PROVIDERS: frozenset[LLMProviderName] = frozenset({LLMProviderName.dashscope})
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_prefix="",
env_file=".env",
extra="ignore",
case_sensitive=False,
)
aiaudit_env: AppEnv = AppEnv.dev
database_url: str = "postgresql+psycopg://freedak@localhost:5432/aiaudit"
redis_url: str = "redis://localhost:6379/0"
llm_provider: LLMProviderName = LLMProviderName.dashscope
dashscope_api_key: str = ""
dashscope_model: str = "qwen-plus"
vllm_base_url: str = "http://localhost:8001/v1"
vllm_model: str = "qwen2.5-72b-instruct"
@property
def is_prod(self) -> bool:
return self.aiaudit_env == AppEnv.prod
def validate_egress_policy(self) -> None:
"""数据零出域红线校验:prod 环境禁用公网 Provider。
在应用启动时调用;违反则抛出异常阻断启动。
"""
if self.is_prod and self.llm_provider in EGRESS_PROVIDERS:
raise RuntimeError(
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
f"'{self.llm_provider.value}'。请改用本地 Provider(如 vllm)。"
)
_settings: Settings | None = None
def get_settings() -> Settings:
global _settings
if _settings is None:
_settings = Settings()
return _settings
+1
View File
@@ -0,0 +1 @@
"""审计数据中台模块:本体/知识图谱、双时态、时序、数据版本。"""
+83
View File
@@ -0,0 +1,83 @@
"""双时态事实仓储:写入与"按历史时点回放"查询。
对应需求 R3 / ADR-0002
- 业务有效期 valid_from/valid_to(应用时间)
- 系统记录期 system_from/system_to(事务时间)
回放 = 给定 (as_of_valid, as_of_system) 在两条时间线上各取"包含该时点"的记录。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import or_
from sqlalchemy.orm import Session
from app.datahub.models import BitemporalFact
def record_fact(
session: Session,
entity_id: uuid.UUID,
attr_name: str,
attr_value: dict,
valid_from: dt.datetime,
valid_to: dt.datetime | None = None,
data_version_id: uuid.UUID | None = None,
) -> BitemporalFact:
"""记录一条双时态事实(system_from 自动取当前事务时间)。"""
fact = BitemporalFact(
entity_id=entity_id,
attr_name=attr_name,
attr_value=attr_value,
valid_from=valid_from,
valid_to=valid_to,
data_version_id=data_version_id,
)
session.add(fact)
session.flush()
return fact
def as_of(
session: Session,
entity_id: uuid.UUID,
attr_name: str,
as_of_valid: dt.datetime,
as_of_system: dt.datetime | None = None,
) -> BitemporalFact | None:
"""回放:返回在给定业务时点且按给定系统时点可见的事实。
- 业务时间线:valid_from <= as_of_valid < valid_to(或为空表示至今)
- 系统时间线:system_from <= as_of_system < system_to(或为空表示当前可见)
"""
as_of_system = as_of_system or dt.datetime.now(dt.UTC)
q = (
session.query(BitemporalFact)
.filter(BitemporalFact.entity_id == entity_id)
.filter(BitemporalFact.attr_name == attr_name)
.filter(BitemporalFact.valid_from <= as_of_valid)
.filter(
or_(BitemporalFact.valid_to.is_(None), BitemporalFact.valid_to > as_of_valid)
)
.filter(BitemporalFact.system_from <= as_of_system)
.filter(
or_(
BitemporalFact.system_to.is_(None),
BitemporalFact.system_to > as_of_system,
)
)
.order_by(BitemporalFact.system_from.desc())
)
return q.first()
def close_fact(
session: Session, fact: BitemporalFact, system_to: dt.datetime | None = None
) -> None:
"""逻辑关闭一条事实的系统可见期(用于更正/失效,而非物理删除)。"""
fact.system_to = system_to or dt.datetime.now(dt.UTC)
session.add(fact)
session.flush()
+58
View File
@@ -0,0 +1,58 @@
"""数据中台 schema 初始化。
MVP 阶段以 SQLAlchemy metadata 建表(后续可迁移到 Alembic)。
扩展按可用性可选启用:
- btree_gist / vector:若可用则创建。
- timescaledb:若可用则把 metric_event 转为超表;不可用则保持普通表(带时间索引)。
"""
from __future__ import annotations
from sqlalchemy import text
from sqlalchemy.engine import Engine
from app.datahub import models # noqa: F401 确保模型注册到 metadata
from app.db import Base, get_engine
def _extension_available(engine: Engine, name: str) -> bool:
with engine.connect() as conn:
row = conn.execute(
text("SELECT 1 FROM pg_available_extensions WHERE name = :n"), {"n": name}
).first()
return row is not None
def init_extensions(engine: Engine) -> dict[str, bool]:
"""按可用性创建扩展,返回各扩展启用状态。"""
status: dict[str, bool] = {}
for ext in ("btree_gist", "vector", "timescaledb"):
available = _extension_available(engine, ext)
status[ext] = available
if available:
with engine.begin() as conn:
conn.execute(text(f"CREATE EXTENSION IF NOT EXISTS {ext}"))
return status
def create_schema(engine: Engine | None = None) -> dict[str, bool]:
"""创建数据中台全部表,并按需启用时序超表。返回扩展状态。"""
engine = engine or get_engine()
status = init_extensions(engine)
Base.metadata.create_all(engine)
# 若 TimescaleDB 可用,将时序事件表转为超表(幂等)
if status.get("timescaledb"):
with engine.begin() as conn:
conn.execute(
text(
"SELECT create_hypertable('metric_event', 'event_time', "
"if_not_exists => TRUE, migrate_data => TRUE)"
)
)
return status
if __name__ == "__main__":
st = create_schema()
print("数据中台 schema 初始化完成。扩展状态:", st)
+118
View File
@@ -0,0 +1,118 @@
"""知识图谱仓储:实体/关系写入与多跳穿透(递归 CTE)。
对应需求 R2:支撑隐性实控人、关联方网络、"马甲"供应商等穿透分析。
统一穿透查询服务(P1.2.5)在此之上封装对外 API,对上层屏蔽底层是关系表还是图库。
"""
from __future__ import annotations
import uuid
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.datahub.models import Entity, EntityRelationship
from app.datahub.ontology import EntityType, RelationshipType, is_valid_relationship
class OntologyViolationError(ValueError):
"""关系不符合本体约束。"""
def upsert_entity(
session: Session,
entity_type: EntityType,
business_key: str,
display_name: str | None = None,
attributes: dict | None = None,
data_version_id: uuid.UUID | None = None,
) -> Entity:
"""按 (类型, 业务主键) 幂等写入实体(主数据对齐的归一锚点)。"""
existing = (
session.query(Entity)
.filter(Entity.entity_type == entity_type.value, Entity.business_key == business_key)
.one_or_none()
)
if existing is not None:
if display_name is not None:
existing.display_name = display_name
if attributes:
existing.attributes = {**(existing.attributes or {}), **attributes}
return existing
entity = Entity(
entity_type=entity_type.value,
business_key=business_key,
display_name=display_name,
attributes=attributes or {},
data_version_id=data_version_id,
)
session.add(entity)
session.flush()
return entity
def add_relationship(
session: Session,
rel_type: RelationshipType,
source: Entity,
target: Entity,
attributes: dict | None = None,
data_version_id: uuid.UUID | None = None,
) -> EntityRelationship:
"""新增一条关系边,写入前校验本体约束。"""
src_type = EntityType(source.entity_type)
tgt_type = EntityType(target.entity_type)
if not is_valid_relationship(rel_type, src_type, tgt_type):
raise OntologyViolationError(
f"关系 {rel_type.value} 不允许从 {src_type.value} 指向 {tgt_type.value}"
)
rel = EntityRelationship(
rel_type=rel_type.value,
source_id=source.id,
target_id=target.id,
attributes=attributes or {},
data_version_id=data_version_id,
)
session.add(rel)
session.flush()
return rel
# 多跳穿透:以无向方式遍历关系边,返回与起点在 max_depth 跳内连通的实体集合。
# 用于"疑似同一实控人/关联方网络"识别。
_TRAVERSE_SQL = text(
"""
WITH RECURSIVE reachable(entity_id, depth, path) AS (
SELECT :start_id, 0, ARRAY[:start_id]
UNION ALL
SELECT
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END,
rc.depth + 1,
rc.path || CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
FROM reachable rc
JOIN entity_relationship r
ON (r.source_id = rc.entity_id OR r.target_id = rc.entity_id)
WHERE rc.depth < :max_depth
AND NOT (
CASE WHEN r.source_id = rc.entity_id THEN r.target_id ELSE r.source_id END
= ANY(rc.path)
)
)
SELECT DISTINCT entity_id, MIN(depth) AS depth
FROM reachable
WHERE entity_id <> :start_id
GROUP BY entity_id
ORDER BY depth;
"""
)
def find_related_entities(
session: Session, start_id: uuid.UUID, max_depth: int = 3
) -> list[tuple[uuid.UUID, int]]:
"""返回与起点实体在 max_depth 跳内连通的实体 (id, 最短跳数) 列表。"""
rows = session.execute(
_TRAVERSE_SQL, {"start_id": start_id, "max_depth": max_depth}
).all()
return [(r[0], r[1]) for r in rows]
+157
View File
@@ -0,0 +1,157 @@
"""审计数据中台 ORM 模型。
涵盖:数据版本、本体实体、知识图谱关系边、双时态属性、时序事件。
对应需求 R2 / R3,建模决策见 ADR-0002。
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy import (
DateTime,
Float,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db import Base
def _uuid() -> uuid.UUID:
return uuid.uuid4()
class DataVersion(Base):
"""数据版本登记:每批接入数据的来源/批次/时间/行数,支撑结论可追溯(R3)。"""
__tablename__ = "data_version"
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
source_system: Mapped[str] = mapped_column(String(64), nullable=False)
batch_label: Mapped[str] = mapped_column(String(128), nullable=False)
row_count: Mapped[int] = mapped_column(Integer, default=0)
ingested_at: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
)
note: Mapped[str | None] = mapped_column(Text, nullable=True)
class Entity(Base):
"""本体实体节点(知识图谱顶点)。
business_key 是源系统中的业务主键,用于主数据对齐(同一实体跨系统归一)。
"""
__tablename__ = "entity"
__table_args__ = (
UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
Index("ix_entity_type", "entity_type"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
entity_type: Mapped[str] = mapped_column(String(32), nullable=False)
business_key: Mapped[str] = mapped_column(String(128), nullable=False)
display_name: Mapped[str | None] = mapped_column(String(256), nullable=True)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
# 主数据对齐:被归并到的"金主"实体(同一实控人/同一主体)。NULL 表示自身即主实体。
canonical_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=True
)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
class EntityRelationship(Base):
"""知识图谱关系边(有向)。多跳穿透用递归 CTE 遍历本表。"""
__tablename__ = "entity_relationship"
__table_args__ = (
Index("ix_rel_source", "source_id"),
Index("ix_rel_target", "target_id"),
Index("ix_rel_type", "rel_type"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
rel_type: Mapped[str] = mapped_column(String(32), nullable=False)
source_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
target_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
source: Mapped[Entity] = relationship(foreign_keys=[source_id])
target: Mapped[Entity] = relationship(foreign_keys=[target_id])
class BitemporalFact(Base):
"""双时态事实:实体的某个属性/状态随时间变化的记录。
- 业务有效期 valid_from/valid_to(应用时间)
- 系统记录期 system_from/system_to(事务时间)
回放历史 = 给定 (as_of_valid, as_of_system) 过滤两条时间线(见 repository)。
"""
__tablename__ = "bitemporal_fact"
__table_args__ = (
Index("ix_btf_entity_attr", "entity_id", "attr_name"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
entity_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("entity.id"), nullable=False
)
attr_name: Mapped[str] = mapped_column(String(64), nullable=False)
attr_value: Mapped[dict] = mapped_column(JSONB, default=dict)
valid_from: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
valid_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
system_from: Mapped[dt.datetime] = mapped_column(
DateTime(timezone=True), default=lambda: dt.datetime.now(dt.UTC)
)
system_to: Mapped[dt.datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
class MetricEvent(Base):
"""时序事件:行为/指标类数据(用户生命周期、回款、话务、佣金、资源使用)。
部署后通过 TimescaleDB create_hypertable('metric_event', 'event_time') 转为超表。
"""
__tablename__ = "metric_event"
__table_args__ = (
Index("ix_metric_subject_time", "subject_type", "subject_key", "event_time"),
Index("ix_metric_name_time", "metric_name", "event_time"),
)
id: Mapped[uuid.UUID] = mapped_column(UUID(as_uuid=True), primary_key=True, default=_uuid)
event_time: Mapped[dt.datetime] = mapped_column(DateTime(timezone=True), nullable=False)
subject_type: Mapped[str] = mapped_column(String(32), nullable=False) # 如 msisdn/channel
subject_key: Mapped[str] = mapped_column(String(128), nullable=False)
metric_name: Mapped[str] = mapped_column(String(64), nullable=False) # 如 traffic_mb/commission
metric_value: Mapped[float] = mapped_column(Float, default=0.0)
attributes: Mapped[dict] = mapped_column(JSONB, default=dict)
data_version_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("data_version.id"), nullable=True
)
+86
View File
@@ -0,0 +1,86 @@
"""审计本体(Ontology)定义。
定义电信内审域的核心实体类型与关系类型,作为知识图谱与主数据对齐的基准。
对应需求 R2。
"""
from __future__ import annotations
from enum import Enum
class EntityType(str, Enum):
"""审计本体核心实体类型。"""
CUSTOMER = "customer" # 客户(含政企)
CONTRACT = "contract" # 合同
MSISDN = "msisdn" # 号码
IMEI = "imei" # 终端设备
ACCOUNT = "account" # 账户(付款/收款)
WORK_ORDER = "work_order" # 工单
SUPPLIER = "supplier" # 供应商
SETTLEMENT = "settlement" # 结算单
EMPLOYEE = "employee" # 员工
CHANNEL = "channel" # 渠道/代理商
LEGAL_PERSON = "legal_person" # 法人/自然人
ADDRESS = "address" # 地址
class RelationshipType(str, Enum):
"""审计本体核心关系类型(有向)。"""
SIGNED = "signed" # 客户 —签约→ 合同
PAID_BY = "paid_by" # 合同 —回款账户→ 账户
OWNS_ACCOUNT = "owns_account" # 客户/供应商 —拥有→ 账户
REGISTERED_AT = "registered_at" # 客户/供应商 —注册地址→ 地址
LEGAL_REP_OF = "legal_rep_of" # 法人 —法定代表人→ 客户/供应商
RELATED_TO = "related_to" # 法人 —亲属/关联→ 法人
HOLDS_MSISDN = "holds_msisdn" # 客户 —持有→ 号码
BOUND_DEVICE = "bound_device" # 号码 —绑定→ IMEI
BELONGS_TO_CHANNEL = "belongs_to_channel" # 号码/合同 —归属→ 渠道
SUPPLIES = "supplies" # 供应商 —供货→ 合同/工单
HANDLED_BY = "handled_by" # 工单 —处理人→ 员工
SETTLES = "settles" # 结算单 —结算→ 合同
# 关系的合法 (源实体类型, 目标实体类型) 约束,用于校验图谱写入
RELATIONSHIP_DOMAIN: dict[RelationshipType, tuple[set[EntityType], set[EntityType]]] = {
RelationshipType.SIGNED: ({EntityType.CUSTOMER}, {EntityType.CONTRACT}),
RelationshipType.PAID_BY: ({EntityType.CONTRACT}, {EntityType.ACCOUNT}),
RelationshipType.OWNS_ACCOUNT: (
{EntityType.CUSTOMER, EntityType.SUPPLIER, EntityType.LEGAL_PERSON},
{EntityType.ACCOUNT},
),
RelationshipType.REGISTERED_AT: (
{EntityType.CUSTOMER, EntityType.SUPPLIER},
{EntityType.ADDRESS},
),
RelationshipType.LEGAL_REP_OF: (
{EntityType.LEGAL_PERSON},
{EntityType.CUSTOMER, EntityType.SUPPLIER},
),
RelationshipType.RELATED_TO: ({EntityType.LEGAL_PERSON}, {EntityType.LEGAL_PERSON}),
RelationshipType.HOLDS_MSISDN: ({EntityType.CUSTOMER}, {EntityType.MSISDN}),
RelationshipType.BOUND_DEVICE: ({EntityType.MSISDN}, {EntityType.IMEI}),
RelationshipType.BELONGS_TO_CHANNEL: (
{EntityType.MSISDN, EntityType.CONTRACT},
{EntityType.CHANNEL},
),
RelationshipType.SUPPLIES: (
{EntityType.SUPPLIER},
{EntityType.CONTRACT, EntityType.WORK_ORDER},
),
RelationshipType.HANDLED_BY: ({EntityType.WORK_ORDER}, {EntityType.EMPLOYEE}),
RelationshipType.SETTLES: ({EntityType.SETTLEMENT}, {EntityType.CONTRACT}),
}
def is_valid_relationship(
rel: RelationshipType, source: EntityType, target: EntityType
) -> bool:
"""校验一条关系的源/目标实体类型是否符合本体约束。"""
domain = RELATIONSHIP_DOMAIN.get(rel)
if domain is None:
return False
sources, targets = domain
return source in sources and target in targets
+40
View File
@@ -0,0 +1,40 @@
"""数据库引擎与会话管理。"""
from __future__ import annotations
from collections.abc import Iterator
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, Session, sessionmaker
from app.config import get_settings
class Base(DeclarativeBase):
"""所有 ORM 模型的基类。"""
_engine = None
_SessionLocal: sessionmaker[Session] | None = None
def get_engine():
global _engine
if _engine is None:
settings = get_settings()
_engine = create_engine(settings.database_url, pool_pre_ping=True, future=True)
return _engine
def get_sessionmaker() -> sessionmaker[Session]:
global _SessionLocal
if _SessionLocal is None:
_SessionLocal = sessionmaker(bind=get_engine(), expire_on_commit=False)
return _SessionLocal
def get_session() -> Iterator[Session]:
"""FastAPI 依赖注入用的会话生成器。"""
sm = get_sessionmaker()
with sm() as session:
yield session
+10
View File
@@ -0,0 +1,10 @@
"""LLM Provider 抽象层。
通过统一接口隔离 LLM 实现,使开发期可用公网千问、生产期无缝切换本地 vLLM。
强约束:"数据零出域"红线由 provider 工厂在 prod 环境拦截公网 Provider。
"""
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
from app.llm.factory import get_llm_provider
__all__ = ["ChatMessage", "LLMProvider", "LLMResponse", "get_llm_provider"]
+44
View File
@@ -0,0 +1,44 @@
"""LLM Provider 抽象接口与数据模型。"""
from __future__ import annotations
import abc
from dataclasses import dataclass, field
@dataclass
class ChatMessage:
role: str # "system" | "user" | "assistant"
content: str
@dataclass
class LLMResponse:
content: str
model: str
provider: str
# 是否经过出域(公网)通道,便于审计轨迹记录
egress: bool = False
raw: dict = field(default_factory=dict)
class LLMProvider(abc.ABC):
"""所有 LLM 实现的统一接口。
业务代码只依赖本接口;切换公网/本地仅改配置,不改调用方。
"""
#: provider 名称
name: str = "base"
#: 是否走公网(出域)。prod 环境禁止 egress=True 的 provider。
egress: bool = False
@abc.abstractmethod
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
"""同步对话补全。"""
raise NotImplementedError
@abc.abstractmethod
def health(self) -> bool:
"""探活:provider 是否可用。"""
raise NotImplementedError
+31
View File
@@ -0,0 +1,31 @@
"""LLM Provider 工厂:按配置创建 provider,并执行数据零出域红线校验。"""
from __future__ import annotations
from app.config import EGRESS_PROVIDERS, LLMProviderName, Settings, get_settings
from app.llm.base import LLMProvider
from app.llm.providers import DashScopeProvider, VllmProvider
class EgressPolicyError(RuntimeError):
"""数据零出域红线违规。"""
def get_llm_provider(settings: Settings | None = None) -> LLMProvider:
settings = settings or get_settings()
# 红线:prod 环境禁止公网 provider
if settings.is_prod and settings.llm_provider in EGRESS_PROVIDERS:
raise EgressPolicyError(
f"数据零出域红线违规:prod 环境禁止使用公网 LLM Provider "
f"'{settings.llm_provider.value}'"
)
if settings.llm_provider == LLMProviderName.dashscope:
return DashScopeProvider(
api_key=settings.dashscope_api_key, model=settings.dashscope_model
)
if settings.llm_provider == LLMProviderName.vllm:
return VllmProvider(base_url=settings.vllm_base_url, model=settings.vllm_model)
raise ValueError(f"未知的 LLM Provider: {settings.llm_provider}")
+80
View File
@@ -0,0 +1,80 @@
"""具体 LLM Provider 实现:DashScope(公网千问,仅 dev)、vLLM(本地,prod)。
两者均走 OpenAI 兼容的 /chat/completions 协议。
"""
from __future__ import annotations
import httpx
from app.llm.base import ChatMessage, LLMProvider, LLMResponse
class DashScopeProvider(LLMProvider):
"""公网千问(DashScope,OpenAI 兼容模式)。仅限开发测试,且只允许脱敏/样例假数据。"""
name = "dashscope"
egress = True # 走公网,出域
_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
def __init__(self, api_key: str, model: str, timeout: float = 30.0) -> None:
self._api_key = api_key
self._model = model
self._timeout = timeout
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
payload = {
"model": self._model,
"messages": [{"role": m.role, "content": m.content} for m in messages],
**kwargs,
}
headers = {"Authorization": f"Bearer {self._api_key}"}
with httpx.Client(timeout=self._timeout) as client:
resp = client.post(
f"{self._BASE_URL}/chat/completions", json=payload, headers=headers
)
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"]
return LLMResponse(
content=content, model=self._model, provider=self.name, egress=True, raw=data
)
def health(self) -> bool:
return bool(self._api_key)
class VllmProvider(LLMProvider):
"""本地 vLLM(OpenAI 兼容)。生产使用,数据不出域。"""
name = "vllm"
egress = False
def __init__(self, base_url: str, model: str, timeout: float = 60.0) -> None:
self._base_url = base_url.rstrip("/")
self._model = model
self._timeout = timeout
def chat(self, messages: list[ChatMessage], **kwargs) -> LLMResponse:
payload = {
"model": self._model,
"messages": [{"role": m.role, "content": m.content} for m in messages],
**kwargs,
}
with httpx.Client(timeout=self._timeout) as client:
resp = client.post(f"{self._base_url}/chat/completions", json=payload)
resp.raise_for_status()
data = resp.json()
content = data["choices"][0]["message"]["content"]
return LLMResponse(
content=content, model=self._model, provider=self.name, egress=False, raw=data
)
def health(self) -> bool:
try:
with httpx.Client(timeout=5.0) as client:
resp = client.get(f"{self._base_url}/models")
return resp.status_code == 200
except httpx.HTTPError:
return False
+45
View File
@@ -0,0 +1,45 @@
"""AIAudit FastAPI 应用入口。"""
from __future__ import annotations
from contextlib import asynccontextmanager
from fastapi import FastAPI
from app import __version__
from app.api.datahub import router as datahub_router
from app.config import get_settings
@asynccontextmanager
async def lifespan(app: FastAPI):
# 启动时执行数据零出域红线校验,违规则阻断启动
settings = get_settings()
settings.validate_egress_policy()
yield
app = FastAPI(
title="AIAudit · 本地 AI 内审平台",
version=__version__,
lifespan=lifespan,
)
app.include_router(datahub_router)
@app.get("/health")
def health() -> dict:
"""存活探针。"""
return {"status": "ok", "version": __version__}
@app.get("/health/config")
def health_config() -> dict:
"""配置/合规探针:暴露环境与 LLM provider 出域状态(不含密钥)。"""
settings = get_settings()
return {
"env": settings.aiaudit_env.value,
"llm_provider": settings.llm_provider.value,
"egress_blocked_in_prod": settings.is_prod,
}