Files
InternalAuditInterprise/backend/app/ingest/runner.py
T

90 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。
用法:
from app.ingest.runner import run_all_adapters
results = run_all_adapters(session, data_version_id)
"""
from __future__ import annotations
import logging
import uuid
from sqlalchemy.orm import Session
from app.ingest.base import IngestResult
from app.ingest.registry import ADAPTER_REGISTRY
# 确保所有适配器模块被导入,触发 @register_adapter 注册
import app.ingest.adapters_master # noqa: F401
import app.ingest.adapters_r8 # noqa: F401
import app.ingest.adapters_r9 # noqa: F401
import app.ingest.adapters_r10 # noqa: F401
import app.ingest.adapters_r11 # noqa: F401
import app.ingest.adapters_r12 # noqa: F401
import app.ingest.adapters_r13 # noqa: F401
import app.ingest.adapters_r14 # noqa: F401
import app.ingest.adapters_r15 # noqa: F401
logger = logging.getLogger(__name__)
def run_all_adapters(
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
tables: list[str] | None = None,
) -> dict[str, IngestResult]:
"""执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。
Args:
session: 数据库会话(调用方负责 commit/rollback
data_version_id: 当前批次数据版本 ID
batch_size: 每个适配器单次处理行数上限
tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部
Returns:
各适配器的执行结果字典
"""
results: dict[str, IngestResult] = {}
target_adapters = ADAPTER_REGISTRY
if tables:
target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables}
for table_name, adapter_cls in target_adapters.items():
logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name)
adapter = adapter_cls()
try:
result = adapter.ingest(
session, data_version_id=data_version_id, batch_size=batch_size
)
results[table_name] = result
logger.info(
" → rows=%d, entities=%d, rels=%d, events=%d, errors=%d",
result.row_count,
len(result.entities),
len(result.relationships),
len(result.metric_events),
result.error_count,
)
except Exception as exc:
logger.error("Adapter %s failed: %s", table_name, exc)
results[table_name] = IngestResult(error_count=1)
return results
def run_adapter(
session: Session,
staging_table: str,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
"""执行单个指定 staging 表的适配器。"""
adapter_cls = ADAPTER_REGISTRY.get(staging_table)
if adapter_cls is None:
raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器")
adapter = adapter_cls()
return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)