"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。 用法: from app.ingest.runner import run_all_adapters results = run_all_adapters(session, data_version_id) """ from __future__ import annotations import logging import uuid from sqlalchemy.orm import Session from app.ingest.base import IngestResult from app.ingest.registry import ADAPTER_REGISTRY # 确保所有适配器模块被导入,触发 @register_adapter 注册 import app.ingest.adapters_master # noqa: F401 import app.ingest.adapters_r8 # noqa: F401 import app.ingest.adapters_r9 # noqa: F401 import app.ingest.adapters_r10 # noqa: F401 import app.ingest.adapters_r11 # noqa: F401 import app.ingest.adapters_r12 # noqa: F401 import app.ingest.adapters_r13 # noqa: F401 import app.ingest.adapters_r14 # noqa: F401 import app.ingest.adapters_r15 # noqa: F401 logger = logging.getLogger(__name__) def run_all_adapters( session: Session, data_version_id: uuid.UUID | None = None, batch_size: int = 1000, tables: list[str] | None = None, ) -> dict[str, IngestResult]: """执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。 Args: session: 数据库会话(调用方负责 commit/rollback) data_version_id: 当前批次数据版本 ID batch_size: 每个适配器单次处理行数上限 tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部 Returns: 各适配器的执行结果字典 """ results: dict[str, IngestResult] = {} target_adapters = ADAPTER_REGISTRY if tables: target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables} for table_name, adapter_cls in target_adapters.items(): logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name) adapter = adapter_cls() try: result = adapter.ingest( session, data_version_id=data_version_id, batch_size=batch_size ) results[table_name] = result logger.info( " → rows=%d, entities=%d, rels=%d, events=%d, errors=%d", result.row_count, len(result.entities), len(result.relationships), len(result.metric_events), result.error_count, ) except Exception as exc: logger.error("Adapter %s failed: %s", table_name, exc) results[table_name] = IngestResult(error_count=1) return results def run_adapter( session: Session, staging_table: str, data_version_id: uuid.UUID | None = None, batch_size: int = 1000, ) -> IngestResult: """执行单个指定 staging 表的适配器。""" adapter_cls = ADAPTER_REGISTRY.get(staging_table) if adapter_cls is None: raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器") adapter = adapter_cls() return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)