feat: 添加线索引擎、NLQ、场景检测、前端界面等核心功能模块

This commit is contained in:
freedakgmail
2026-06-16 08:15:15 +08:00
parent 7b1e2b10a8
commit 48340f6011
62 changed files with 6772 additions and 65 deletions
+23
View File
@@ -0,0 +1,23 @@
"""接入适配器(P1.1):源明细 → 通用本体映射。
职责:
1. 从 staging(源明细)读取原始数据行;
2. 按映射规则投影为 Entity / EntityRelationship / MetricEvent
3. 保留源明细不可变(原始证据),本体层为分析基础。
设计原则:
- 每个源明细表对应一个 Adapter 类;
- Adapter 实现统一接口 `ingest(session, data_version_id)` → (entities, relationships, events)
- 映射逻辑集中于此模块,上层引擎/场景模块只依赖本体。
"""
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import ADAPTER_REGISTRY, get_adapter, register_adapter
__all__ = [
"BaseAdapter",
"IngestResult",
"ADAPTER_REGISTRY",
"get_adapter",
"register_adapter",
]
+360
View File
@@ -0,0 +1,360 @@
"""主数据适配器:将源明细中的主数据表映射到本体 Entity 层。
覆盖:SrcCustomer / SrcSupplier / SrcEmployee / SrcChannel / SrcMsisdn / SrcAccount
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import (
SrcAccount,
SrcChannel,
SrcCustomer,
SrcEmployee,
SrcMsisdn,
SrcSupplier,
)
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CustomerAdapter(BaseAdapter):
"""SrcCustomer → Entity(CUSTOMER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "BSS"
staging_table = "src_customer"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcCustomer).filter(
SrcCustomer.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcCustomer).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
display_name=row.customer_name,
attributes={
"customer_type": row.customer_type,
"uscc": row.uscc,
"open_date": str(row.open_date) if row.open_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
# 注册地址 → Entity(ADDRESS) + 关系 REGISTERED_AT
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 法人 → Entity(LEGAL_PERSON) + 关系 LEGAL_REP_OF
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class SupplierAdapter(BaseAdapter):
"""SrcSupplier → Entity(SUPPLIER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "ERP"
staging_table = "src_supplier"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcSupplier).filter(
SrcSupplier.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcSupplier).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.SUPPLIER,
business_key=row.supplier_key,
display_name=row.supplier_name,
attributes={
"uscc": row.uscc,
"shareholder_info": row.shareholder_info,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class EmployeeAdapter(BaseAdapter):
"""SrcEmployee → Entity(EMPLOYEE)。"""
source_system = "ERP"
staging_table = "src_employee"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcEmployee).filter(
SrcEmployee.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcEmployee).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.employee_key,
display_name=row.employee_name,
attributes={
"position": row.position,
"department": row.department,
"role_permissions": row.role_permissions,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ChannelAdapter(BaseAdapter):
"""SrcChannel → Entity(CHANNEL)。"""
source_system = "BSS"
staging_table = "src_channel"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcChannel).filter(
SrcChannel.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcChannel).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
display_name=row.channel_name,
attributes={
"commission_policy": row.commission_policy,
"region": row.region,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class MsisdnAdapter(BaseAdapter):
"""SrcMsisdn → Entity(MSISDN) + 关系(HOLDS_MSISDN)。"""
source_system = "BSS"
staging_table = "src_msisdn"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcMsisdn).filter(
SrcMsisdn.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcMsisdn).limit(batch_size).all()
for row in rows:
try:
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={
"region": row.region,
"status": row.status,
"activate_date": str(row.activate_date) if row.activate_date else None,
"deactivate_date": str(row.deactivate_date) if row.deactivate_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 号码 → 客户持有关系
if row.customer_key:
cust_entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.HOLDS_MSISDN, cust_entity, msisdn_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class AccountAdapter(BaseAdapter):
"""SrcAccount → Entity(ACCOUNT) + 关系(OWNS_ACCOUNT)。"""
source_system = "FIN"
staging_table = "src_account"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcAccount).filter(
SrcAccount.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcAccount).limit(batch_size).all()
for row in rows:
try:
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.account_key,
display_name=row.account_name,
attributes={
"bank_name": row.bank_name,
"branch_name": row.branch_name,
},
data_version_id=data_version_id,
)
result.entities.append(acct_entity)
# 账户所属主体关系
if row.owner_key and row.owner_type:
owner_type_map = {
"customer": EntityType.CUSTOMER,
"supplier": EntityType.SUPPLIER,
"legal_person": EntityType.LEGAL_PERSON,
}
etype = owner_type_map.get(row.owner_type)
if etype:
owner_entity = upsert_entity(
session,
entity_type=etype,
business_key=row.owner_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OWNS_ACCOUNT, owner_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+137
View File
@@ -0,0 +1,137 @@
"""R10 适配器:收入与成本跨期匹配。
源明细:SrcRevenueRecognition / SrcCostAmortization
映射到:MetricEvent(收入确认/成本摊销时序) + Entity(CONTRACT) 关联补强
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCostAmortization, SrcRevenueRecognition
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class RevenueRecognitionAdapter(BaseAdapter):
"""SrcRevenueRecognition → MetricEvent(收入确认时序)。"""
source_system = "FIN"
staging_table = "src_revenue_recognition"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcRevenueRecognition)
if data_version_id:
query = query.filter(SrcRevenueRecognition.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保合同实体存在
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
if row.recognition_date:
event_time = dt.datetime.combine(
row.recognition_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.voucher_no,
metric_name="revenue_recognition",
metric_value=row.recognition_amount,
attributes={
"voucher_no": row.voucher_no,
"billing_mode": row.billing_mode,
"period_start": str(row.period_start) if row.period_start else None,
"period_end": str(row.period_end) if row.period_end else None,
"prepaid_flag": row.prepaid_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class CostAmortizationAdapter(BaseAdapter):
"""SrcCostAmortization → MetricEvent(成本摊销时序)。"""
source_system = "FIN"
staging_table = "src_cost_amortization"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCostAmortization)
if data_version_id:
query = query.filter(SrcCostAmortization.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
if row.amortization_date:
event_time = dt.datetime.combine(
row.amortization_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.voucher_no,
metric_name="cost_amortization",
metric_value=row.amortization_amount,
attributes={
"voucher_no": row.voucher_no,
"cost_type": row.cost_type,
"total_periods": row.total_periods,
"current_period": row.current_period,
"delivery_date": str(row.delivery_date) if row.delivery_date else None,
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+103
View File
@@ -0,0 +1,103 @@
"""R11 适配器:渠道佣金与代理商套利。
源明细:SrcTerminalBinding
映射到:Entity(IMEI, MSISDN) + 关系(BOUND_DEVICE) + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcTerminalBinding
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class TerminalBindingAdapter(BaseAdapter):
"""SrcTerminalBinding → Entity(IMEI, MSISDN) + BOUND_DEVICE + MetricEvent。"""
source_system = "BSS"
staging_table = "src_terminal_binding"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcTerminalBinding)
if data_version_id:
query = query.filter(SrcTerminalBinding.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# IMEI 实体
imei_entity = upsert_entity(
session,
entity_type=EntityType.IMEI,
business_key=row.imei,
display_name=row.brand_model or row.imei,
attributes={
"brand_model": row.brand_model,
"region": row.region,
},
data_version_id=data_version_id,
)
result.entities.append(imei_entity)
# MSISDN 实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"region": row.region},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 绑定关系
rel = add_relationship(
session, RelationshipType.BOUND_DEVICE, msisdn_entity, imei_entity,
attributes={
"activate_time": str(row.activate_time) if row.activate_time else None,
"subsidy_amount": row.subsidy_amount,
},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 终端激活/补贴事件
if row.activate_time:
event = MetricEvent(
event_time=row.activate_time,
subject_type="imei",
subject_key=row.imei,
metric_name="terminal_activate",
metric_value=row.subsidy_amount + row.commission_amount,
attributes={
"msisdn": row.msisdn,
"subsidy_amount": row.subsidy_amount,
"commission_amount": row.commission_amount,
"online_days": row.online_days,
"post_activate_traffic_mb": row.post_activate_traffic_mb,
"cross_province_flag": row.cross_province_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+200
View File
@@ -0,0 +1,200 @@
"""R12 适配器:网络建设与工程采购。
源明细:SrcBidding / SrcProjectSignoff
映射到:Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR, SUPPLIES) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcBidding, SrcProjectSignoff
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class BiddingAdapter(BaseAdapter):
"""SrcBidding → Entity(SUPPLIER, WORK_ORDER) + 关系(BIDS_FOR) + MetricEvent。"""
source_system = "ERP"
staging_table = "src_bidding"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcBidding)
if data_version_id:
query = query.filter(SrcBidding.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 供应商(投标人)实体
supplier_entity = upsert_entity(
session,
entity_type=EntityType.SUPPLIER,
business_key=row.bidder_key,
display_name=row.bidder_name,
attributes={
"legal_person": row.legal_person,
"shareholder_info": row.shareholder_info,
},
data_version_id=data_version_id,
)
result.entities.append(supplier_entity)
# 工单/项目实体
wo_entity = upsert_entity(
session,
entity_type=EntityType.WORK_ORDER,
business_key=row.project_no,
display_name=row.project_name,
data_version_id=data_version_id,
)
result.entities.append(wo_entity)
# 投标关系
rel = add_relationship(
session, RelationshipType.BIDS_FOR, supplier_entity, wo_entity,
attributes={
"bid_amount": row.bid_amount,
"win_flag": row.win_flag,
"technical_score": row.technical_score,
},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 中标 → 补充 SUPPLIES 关系
if row.win_flag and row.win_flag.upper() == "Y":
rel2 = add_relationship(
session, RelationshipType.SUPPLIES, supplier_entity, wo_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel2)
# 法人实体
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, supplier_entity,
data_version_id=data_version_id,
)
# 投标事件
if row.bid_time:
event = MetricEvent(
event_time=row.bid_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="bid_submitted",
metric_value=row.bid_amount or 0.0,
attributes={
"bidder_key": row.bidder_key,
"bidder_name": row.bidder_name,
"win_flag": row.win_flag,
"technical_score": row.technical_score,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ProjectSignoffAdapter(BaseAdapter):
"""SrcProjectSignoff → MetricEvent(工程签证/巡检时序)。"""
source_system = "WO"
staging_table = "src_project_signoff"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcProjectSignoff)
if data_version_id:
query = query.filter(SrcProjectSignoff.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保工单实体存在
upsert_entity(
session,
entity_type=EntityType.WORK_ORDER,
business_key=row.project_no,
data_version_id=data_version_id,
)
# 签证事件
if row.signoff_date:
event_time = dt.datetime.combine(
row.signoff_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="signoff_quantity",
metric_value=row.signoff_quantity or 0.0,
attributes={
"work_order_no": row.work_order_no,
"unit": row.unit,
"resource_consumed": row.resource_consumed,
"contractor_key": row.contractor_key,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
# 巡检 GPS 事件
if row.inspection_time and row.inspection_lat:
event2 = MetricEvent(
event_time=row.inspection_time,
subject_type="work_order",
subject_key=row.project_no,
metric_name="inspection",
metric_value=1.0,
attributes={
"lat": row.inspection_lat,
"lng": row.inspection_lng,
"work_order_no": row.work_order_no,
},
data_version_id=data_version_id,
)
session.add(event2)
result.metric_events.append(event2)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+147
View File
@@ -0,0 +1,147 @@
"""R13 适配器:互联互通与网间结算。
源明细:SrcCdr / SrcInterconnectSettlement
映射到:Entity(MSISDN, SETTLEMENT) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCdr, SrcInterconnectSettlement
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CdrAdapter(BaseAdapter):
"""SrcCdr → Entity(MSISDN) + MetricEvent(话务时序)。"""
source_system = "SIGNAL"
staging_table = "src_cdr"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCdr)
if data_version_id:
query = query.filter(SrcCdr.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保主被叫号码实体存在
upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.caller,
display_name=row.caller,
data_version_id=data_version_id,
)
upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.callee,
display_name=row.callee,
data_version_id=data_version_id,
)
# 话务事件
event = MetricEvent(
event_time=row.start_time,
subject_type="msisdn",
subject_key=row.caller,
metric_name="cdr_duration",
metric_value=float(row.duration_sec),
attributes={
"callee": row.callee,
"call_type": row.call_type,
"peer_operator": row.peer_operator,
"route_info": row.route_info,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class InterconnectSettlementAdapter(BaseAdapter):
"""SrcInterconnectSettlement → Entity(SETTLEMENT) + MetricEvent。"""
source_system = "FIN"
staging_table = "src_interconnect_settlement"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcInterconnectSettlement)
if data_version_id:
query = query.filter(SrcInterconnectSettlement.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 结算单实体
settle_entity = upsert_entity(
session,
entity_type=EntityType.SETTLEMENT,
business_key=row.settlement_no,
display_name=f"网间结算-{row.settlement_no}",
attributes={
"peer_operator": row.peer_operator,
"settle_type": row.settle_type,
},
data_version_id=data_version_id,
)
result.entities.append(settle_entity)
# 结算时序事件
try:
event_time = dt.datetime.strptime(
row.settle_period, "%Y-%m"
).replace(tzinfo=dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="settlement",
subject_key=row.settlement_no,
metric_name="interconnect_settle",
metric_value=row.settle_amount,
attributes={
"peer_operator": row.peer_operator,
"settle_type": row.settle_type,
"volume": row.volume,
"unit_price": row.unit_price,
"sms_delivery_rate": row.sms_delivery_rate,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+149
View File
@@ -0,0 +1,149 @@
"""R14 适配器:云业务 / IDC 与新兴业务。
源明细:SrcCloudUsage / SrcIdcCabinet
映射到:Entity(CONTRACT, CUSTOMER) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType
from app.datahub.staging import SrcCloudUsage, SrcIdcCabinet
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CloudUsageAdapter(BaseAdapter):
"""SrcCloudUsage → Entity(CONTRACT) + MetricEvent(云资源用量时序)。"""
source_system = "BSS"
staging_table = "src_cloud_usage"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcCloudUsage)
if data_version_id:
query = query.filter(SrcCloudUsage.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
# 客户实体(如有)
if row.customer_key:
upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
data_version_id=data_version_id,
)
# 云资源用量事件
if row.usage_date:
event_time = dt.datetime.combine(
row.usage_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="cloud_usage",
metric_value=row.actual_usage,
attributes={
"resource_type": row.resource_type,
"contracted_quota": row.contracted_quota,
"billed_usage": row.billed_usage,
"unit": row.unit,
"customer_key": row.customer_key,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class IdcCabinetAdapter(BaseAdapter):
"""SrcIdcCabinet → MetricEventIDC 机柜出租率/电力时序)。"""
source_system = "OSS"
staging_table = "src_idc_cabinet"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcIdcCabinet)
if data_version_id:
query = query.filter(SrcIdcCabinet.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体(如有)
if row.contract_no:
upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
# IDC 出租/电力事件
try:
event_time = dt.datetime.strptime(
row.report_month, "%Y-%m"
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no or row.cabinet_id,
metric_name="idc_cabinet",
metric_value=row.occupancy_rate or 0.0,
attributes={
"cabinet_id": row.cabinet_id,
"customer_key": row.customer_key,
"power_kwh": row.power_kwh,
"revenue_amount": row.revenue_amount,
"acceptance_date": str(row.acceptance_date) if row.acceptance_date else None,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+237
View File
@@ -0,0 +1,237 @@
"""R15 适配器:员工内部舞弊与资源滥用。
源明细:SrcEmployeeOperation / SrcInternalMsisdn / SrcPointsTransaction
映射到:Entity(EMPLOYEE, MSISDN) + 关系(OPERATES) + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import (
SrcEmployeeOperation,
SrcInternalMsisdn,
SrcPointsTransaction,
)
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class EmployeeOperationAdapter(BaseAdapter):
"""SrcEmployeeOperation → Entity(EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_employee_operation"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcEmployeeOperation)
if data_version_id:
query = query.filter(SrcEmployeeOperation.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 员工实体
emp_entity = upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.employee_key,
display_name=row.employee_name,
attributes={
"position": row.position,
"department": row.department,
"role_permissions": row.role_permissions,
},
data_version_id=data_version_id,
)
result.entities.append(emp_entity)
# 操作目标 → OPERATES 关系(如操作对象是号码或账户)
if row.operation_target:
# 尝试识别操作目标类型(简单启发式:以1开头长度11为号码,否则为账户)
target_key = row.operation_target.strip()
if target_key.isdigit() and len(target_key) == 11:
target_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=target_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OPERATES, emp_entity, target_entity,
attributes={"operation_type": row.operation_type},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 操作日志事件
if row.operation_time:
event = MetricEvent(
event_time=row.operation_time,
subject_type="employee",
subject_key=row.employee_key,
metric_name="operation_log",
metric_value=1.0,
attributes={
"operation_type": row.operation_type,
"operation_target": row.operation_target,
"position": row.position,
"department": row.department,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class InternalMsisdnAdapter(BaseAdapter):
"""SrcInternalMsisdn → Entity(MSISDN, EMPLOYEE) + 关系(OPERATES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_internal_msisdn"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcInternalMsisdn)
if data_version_id:
query = query.filter(SrcInternalMsisdn.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 内部号码实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"purpose": row.purpose, "internal": True},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 分配员工 → OPERATES 关系
if row.assigned_employee:
emp_entity = upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.assigned_employee,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OPERATES, emp_entity, msisdn_entity,
attributes={"purpose": row.purpose},
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 内部号用量事件
import datetime as dt
try:
event_time = dt.datetime.strptime(
row.report_month, "%Y-%m"
).replace(tzinfo=dt.timezone.utc) if row.report_month else dt.datetime.now(dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
event = MetricEvent(
event_time=event_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="internal_usage",
metric_value=row.traffic_mb,
attributes={
"voice_min": row.voice_min,
"revenue_attributed": row.revenue_attributed,
"assigned_employee": row.assigned_employee,
"purpose": row.purpose,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class PointsTransactionAdapter(BaseAdapter):
"""SrcPointsTransaction → MetricEvent(积分发放/兑换时序)。"""
source_system = "BSS"
staging_table = "src_points_transaction"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcPointsTransaction)
if data_version_id:
query = query.filter(SrcPointsTransaction.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保操作人实体存在
upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.operator_key,
data_version_id=data_version_id,
)
# 积分事件
if row.transaction_time:
event = MetricEvent(
event_time=row.transaction_time,
subject_type="employee",
subject_key=row.operator_key,
metric_name="points_transaction",
metric_value=row.points_amount,
attributes={
"transaction_no": row.transaction_no,
"target_account": row.target_account,
"transaction_type": row.transaction_type,
"cash_value": row.cash_value,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+236
View File
@@ -0,0 +1,236 @@
"""R8 适配器:政企收入全链路穿透 / 拆单规避。
源明细:SrcContract / SrcContractApproval / SrcPayment
映射到:Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系 + MetricEvent
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcContract, SrcContractApproval, SrcPayment
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class ContractAdapter(BaseAdapter):
"""SrcContract → Entity(CONTRACT, CUSTOMER, ACCOUNT, ADDRESS, LEGAL_PERSON) + 关系。"""
source_system = "CONTRACT"
staging_table = "src_contract"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcContract)
if data_version_id:
query = query.filter(SrcContract.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 合同实体
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
display_name=f"合同-{row.contract_no}",
attributes={
"amount": row.amount,
"sign_date": str(row.sign_date) if row.sign_date else None,
"approval_threshold": row.approval_threshold,
"approval_level": row.approval_level,
},
data_version_id=data_version_id,
)
result.entities.append(contract_entity)
# 客户实体 + 签约关系
cust_entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
display_name=row.customer_name,
data_version_id=data_version_id,
)
result.entities.append(cust_entity)
rel = add_relationship(
session, RelationshipType.SIGNED, cust_entity, contract_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 回款账户 → Entity(ACCOUNT) + 关系 PAID_BY
if row.pay_account:
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.pay_account,
data_version_id=data_version_id,
)
result.entities.append(acct_entity)
rel = add_relationship(
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 注册地址
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, cust_entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 法人
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, cust_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ContractApprovalAdapter(BaseAdapter):
"""SrcContractApproval → MetricEvent(审批时序事件)。"""
source_system = "CONTRACT"
staging_table = "src_contract_approval"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcContractApproval)
if data_version_id:
query = query.filter(SrcContractApproval.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.approval_time:
event = MetricEvent(
event_time=row.approval_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="approval_step",
metric_value=float(row.approval_step),
attributes={
"approver": row.approver,
"result": row.approval_result,
"remark": row.remark,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class PaymentAdapter(BaseAdapter):
"""SrcPayment → MetricEvent(回款时序事件) + 关系补强。"""
source_system = "FIN"
staging_table = "src_payment"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcPayment)
if data_version_id:
query = query.filter(SrcPayment.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
if row.pay_date:
import datetime as dt
event_time = dt.datetime.combine(
row.pay_date, dt.time.min, tzinfo=dt.timezone.utc
)
event = MetricEvent(
event_time=event_time,
subject_type="contract",
subject_key=row.contract_no,
metric_name="payment",
metric_value=row.pay_amount,
attributes={
"pay_account": row.pay_account,
"pay_type": row.pay_type,
"overdue_flag": row.overdue_flag,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
# 强化合同→账户关系
if row.pay_account:
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.contract_no,
data_version_id=data_version_id,
)
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.pay_account,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.PAID_BY, contract_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+184
View File
@@ -0,0 +1,184 @@
"""R9 适配器:市场业务真实性 / 养卡骗补。
源明细:SrcChannelMonthly / SrcSubscription
映射到:Entity(CHANNEL, MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent
"""
from __future__ import annotations
import datetime as dt
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.models import MetricEvent
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import SrcChannelMonthly, SrcSubscription
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class ChannelMonthlyAdapter(BaseAdapter):
"""SrcChannelMonthly → MetricEvent(渠道月度留存/佣金时序)。"""
source_system = "BSS"
staging_table = "src_channel_monthly"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcChannelMonthly)
if data_version_id:
query = query.filter(SrcChannelMonthly.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# 确保渠道实体存在
upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
data_version_id=data_version_id,
)
# cohort_label 如 "2025-01" → 转为时间
try:
event_time = dt.datetime.strptime(
row.cohort_label, "%Y-%m"
).replace(tzinfo=dt.timezone.utc)
except ValueError:
event_time = dt.datetime.now(dt.timezone.utc)
# 留存率事件
event = MetricEvent(
event_time=event_time,
subject_type="channel",
subject_key=row.channel_key,
metric_name="retention",
metric_value=row.retained / row.cohort_size if row.cohort_size > 0 else 0.0,
attributes={
"cohort_label": row.cohort_label,
"month_index": row.month_index,
"cohort_size": row.cohort_size,
"retained": row.retained,
"commission_paid": row.commission_paid,
"active_ratio": row.active_ratio,
"zero_usage_ratio": row.zero_usage_ratio,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class SubscriptionAdapter(BaseAdapter):
"""SrcSubscription → Entity(MSISDN) + 关系(BELONGS_TO_CHANNEL, SUBSCRIBES) + MetricEvent。"""
source_system = "BSS"
staging_table = "src_subscription"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
query = session.query(SrcSubscription)
if data_version_id:
query = query.filter(SrcSubscription.data_version_id == data_version_id)
rows = query.limit(batch_size).all()
for row in rows:
try:
# MSISDN 实体
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={"region": row.region},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 渠道归属关系
if row.channel_key:
chan_entity = upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.BELONGS_TO_CHANNEL, msisdn_entity, chan_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 订购关系(号码→合同/产品)
if row.product_code:
contract_entity = upsert_entity(
session,
entity_type=EntityType.CONTRACT,
business_key=row.product_code,
display_name=f"产品-{row.product_code}",
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.SUBSCRIBES, msisdn_entity, contract_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 订购/退订时序事件
if row.subscribe_time:
event = MetricEvent(
event_time=row.subscribe_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="subscribe",
metric_value=1.0,
attributes={
"channel_key": row.channel_key,
"product_code": row.product_code,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
if row.unsubscribe_time:
event = MetricEvent(
event_time=row.unsubscribe_time,
subject_type="msisdn",
subject_key=row.msisdn,
metric_name="unsubscribe",
metric_value=-1.0,
attributes={
"channel_key": row.channel_key,
"product_code": row.product_code,
},
data_version_id=data_version_id,
)
session.add(event)
result.metric_events.append(event)
result.row_count += 1
except Exception:
result.error_count += 1
return result
+53
View File
@@ -0,0 +1,53 @@
"""接入适配器基类与通用数据结构。"""
from __future__ import annotations
import uuid
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from sqlalchemy.orm import Session
from app.datahub.models import Entity, EntityRelationship, MetricEvent
@dataclass
class IngestResult:
"""单次适配器执行的输出汇总。"""
entities: list[Entity] = field(default_factory=list)
relationships: list[EntityRelationship] = field(default_factory=list)
metric_events: list[MetricEvent] = field(default_factory=list)
row_count: int = 0
error_count: int = 0
class BaseAdapter(ABC):
"""接入适配器抽象基类。
每个源明细表实现一个子类,负责将 staging 行映射到本体层。
"""
# 子类须指定所适配的源系统标识(如 "BSS", "ERP"
source_system: str = ""
# 子类须指定所适配的 staging 表名
staging_table: str = ""
@abstractmethod
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
"""从 staging 表读取未处理行,映射写入本体层。
Args:
session: 数据库会话
data_version_id: 当前批次的数据版本 ID
batch_size: 每批处理行数
Returns:
IngestResult 汇总
"""
...
+22
View File
@@ -0,0 +1,22 @@
"""适配器注册表:按 staging 表名索引,便于调度器统一调用。"""
from __future__ import annotations
from typing import Type
from app.ingest.base import BaseAdapter
# 全局注册表:staging_table -> Adapter 类
ADAPTER_REGISTRY: dict[str, Type[BaseAdapter]] = {}
def register_adapter(cls: Type[BaseAdapter]) -> Type[BaseAdapter]:
"""类装饰器:将 Adapter 注册到全局表。"""
if cls.staging_table:
ADAPTER_REGISTRY[cls.staging_table] = cls
return cls
def get_adapter(staging_table: str) -> Type[BaseAdapter] | None:
"""按 staging 表名查找已注册的适配器类。"""
return ADAPTER_REGISTRY.get(staging_table)
+89
View File
@@ -0,0 +1,89 @@
"""接入适配器调度器:统一驱动全部 Adapter 执行 staging → 本体映射。
用法:
from app.ingest.runner import run_all_adapters
results = run_all_adapters(session, data_version_id)
"""
from __future__ import annotations
import logging
import uuid
from sqlalchemy.orm import Session
from app.ingest.base import IngestResult
from app.ingest.registry import ADAPTER_REGISTRY
# 确保所有适配器模块被导入,触发 @register_adapter 注册
import app.ingest.adapters_master # noqa: F401
import app.ingest.adapters_r8 # noqa: F401
import app.ingest.adapters_r9 # noqa: F401
import app.ingest.adapters_r10 # noqa: F401
import app.ingest.adapters_r11 # noqa: F401
import app.ingest.adapters_r12 # noqa: F401
import app.ingest.adapters_r13 # noqa: F401
import app.ingest.adapters_r14 # noqa: F401
import app.ingest.adapters_r15 # noqa: F401
logger = logging.getLogger(__name__)
def run_all_adapters(
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
tables: list[str] | None = None,
) -> dict[str, IngestResult]:
"""执行全部(或指定的)适配器,返回 {staging_table: IngestResult}。
Args:
session: 数据库会话(调用方负责 commit/rollback
data_version_id: 当前批次数据版本 ID
batch_size: 每个适配器单次处理行数上限
tables: 若指定,仅执行这些 staging 表对应的适配器;为 None 时执行全部
Returns:
各适配器的执行结果字典
"""
results: dict[str, IngestResult] = {}
target_adapters = ADAPTER_REGISTRY
if tables:
target_adapters = {k: v for k, v in ADAPTER_REGISTRY.items() if k in tables}
for table_name, adapter_cls in target_adapters.items():
logger.info("Running adapter: %s (%s)", adapter_cls.__name__, table_name)
adapter = adapter_cls()
try:
result = adapter.ingest(
session, data_version_id=data_version_id, batch_size=batch_size
)
results[table_name] = result
logger.info(
" → rows=%d, entities=%d, rels=%d, events=%d, errors=%d",
result.row_count,
len(result.entities),
len(result.relationships),
len(result.metric_events),
result.error_count,
)
except Exception as exc:
logger.error("Adapter %s failed: %s", table_name, exc)
results[table_name] = IngestResult(error_count=1)
return results
def run_adapter(
session: Session,
staging_table: str,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
"""执行单个指定 staging 表的适配器。"""
adapter_cls = ADAPTER_REGISTRY.get(staging_table)
if adapter_cls is None:
raise ValueError(f"未找到 staging 表 '{staging_table}' 对应的适配器")
adapter = adapter_cls()
return adapter.ingest(session, data_version_id=data_version_id, batch_size=batch_size)