90 lines
3.0 KiB
Python
90 lines
3.0 KiB
Python
"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。
|
||
|
||
用法:
|
||
from app.ingest.runner import run_all_adapters
|
||
results = run_all_adapters(session, data_version_id)
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import uuid
|
||
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.ingest.base import IngestResult
|
||
from app.ingest.registry import ADAPTER_REGISTRY
|
||
|
||
# 确保所有适配器模块被导入,触发 @register_adapter 注册
|
||
import app.ingest.adapters_master # noqa: F401
|
||
import app.ingest.adapters_r8 # noqa: F401
|
||
import app.ingest.adapters_r9 # noqa: F401
|
||
import app.ingest.adapters_r10 # noqa: F401
|
||
import app.ingest.adapters_r11 # noqa: F401
|
||
import app.ingest.adapters_r12 # noqa: F401
|
||
import app.ingest.adapters_r13 # noqa: F401
|
||
import app.ingest.adapters_r14 # noqa: F401
|
||
import app.ingest.adapters_r15 # noqa: F401
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
|
||
def run_all_adapters(
|
||
session: Session,
|
||
data_version_id: uuid.UUID | None = None,
|
||
batch_size: int = 1000,
|
||
tables: list[str] | None = None,
|
||
) -> dict[str, IngestResult]:
|
||
"""执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。
|
||
|
||
Args:
|
||
session: 数据库会话(调用方负责 commit/rollback)
|
||
data_version_id: 当前批次数据版本 ID
|
||
batch_size: 每个适配器单次处理行数上限
|
||
tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部
|
||
|
||
Returns:
|
||
各适配器的执行结果字典
|
||
"""
|
||
results: dict[str, IngestResult] = {}
|
||
|
||
target_adapters = ADAPTER_REGISTRY
|
||
if tables:
|
||
target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables}
|
||
|
||
for table_name, adapter_cls in target_adapters.items():
|
||
logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name)
|
||
adapter = adapter_cls()
|
||
try:
|
||
result = adapter.ingest(
|
||
session, data_version_id=data_version_id, batch_size=batch_size
|
||
)
|
||
results[table_name] = result
|
||
logger.info(
|
||
" → rows=%d, entities=%d, rels=%d, events=%d, errors=%d",
|
||
result.row_count,
|
||
len(result.entities),
|
||
len(result.relationships),
|
||
len(result.metric_events),
|
||
result.error_count,
|
||
)
|
||
except Exception as exc:
|
||
logger.error("Adapter %s failed: %s", table_name, exc)
|
||
results[table_name] = IngestResult(error_count=1)
|
||
|
||
return results
|
||
|
||
|
||
def run_adapter(
|
||
session: Session,
|
||
staging_table: str,
|
||
data_version_id: uuid.UUID | None = None,
|
||
batch_size: int = 1000,
|
||
) -> IngestResult:
|
||
"""执行单个指定 staging 表的适配器。"""
|
||
adapter_cls = ADAPTER_REGISTRY.get(staging_table)
|
||
if adapter_cls is None:
|
||
raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器")
|
||
adapter = adapter_cls()
|
||
return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)
|