Files
InternalAuditInterprise/backend/app/ingest/adapters_master.py
T

361 lines
13 KiB
Python

"""主数据适配器:将源明细中的主数据表映射到本体 Entity 层。
覆盖:SrcCustomer / SrcSupplier / SrcEmployee / SrcChannel / SrcMsisdn / SrcAccount
"""
from __future__ import annotations
import uuid
from sqlalchemy.orm import Session
from app.datahub.graph_repo import add_relationship, upsert_entity
from app.datahub.ontology import EntityType, RelationshipType
from app.datahub.staging import (
SrcAccount,
SrcChannel,
SrcCustomer,
SrcEmployee,
SrcMsisdn,
SrcSupplier,
)
from app.ingest.base import BaseAdapter, IngestResult
from app.ingest.registry import register_adapter
@register_adapter
class CustomerAdapter(BaseAdapter):
"""SrcCustomer → Entity(CUSTOMER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "BSS"
staging_table = "src_customer"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcCustomer).filter(
SrcCustomer.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcCustomer).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
display_name=row.customer_name,
attributes={
"customer_type": row.customer_type,
"uscc": row.uscc,
"open_date": str(row.open_date) if row.open_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
# 注册地址 → Entity(ADDRESS) + 关系 REGISTERED_AT
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
# 法人 → Entity(LEGAL_PERSON) + 关系 LEGAL_REP_OF
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class SupplierAdapter(BaseAdapter):
"""SrcSupplier → Entity(SUPPLIER) + 关系(REGISTERED_AT, LEGAL_REP_OF)。"""
source_system = "ERP"
staging_table = "src_supplier"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcSupplier).filter(
SrcSupplier.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcSupplier).limit(batch_size).all()
for row in rows:
try:
entity = upsert_entity(
session,
entity_type=EntityType.SUPPLIER,
business_key=row.supplier_key,
display_name=row.supplier_name,
attributes={
"uscc": row.uscc,
"shareholder_info": row.shareholder_info,
},
data_version_id=data_version_id,
)
result.entities.append(entity)
if row.register_address:
addr_entity = upsert_entity(
session,
entity_type=EntityType.ADDRESS,
business_key=row.register_address,
display_name=row.register_address,
data_version_id=data_version_id,
)
result.entities.append(addr_entity)
rel = add_relationship(
session, RelationshipType.REGISTERED_AT, entity, addr_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
if row.legal_person:
lp_entity = upsert_entity(
session,
entity_type=EntityType.LEGAL_PERSON,
business_key=row.legal_person,
display_name=row.legal_person,
data_version_id=data_version_id,
)
result.entities.append(lp_entity)
rel = add_relationship(
session, RelationshipType.LEGAL_REP_OF, lp_entity, entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class EmployeeAdapter(BaseAdapter):
"""SrcEmployee → Entity(EMPLOYEE)。"""
source_system = "ERP"
staging_table = "src_employee"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcEmployee).filter(
SrcEmployee.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcEmployee).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.EMPLOYEE,
business_key=row.employee_key,
display_name=row.employee_name,
attributes={
"position": row.position,
"department": row.department,
"role_permissions": row.role_permissions,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class ChannelAdapter(BaseAdapter):
"""SrcChannel → Entity(CHANNEL)。"""
source_system = "BSS"
staging_table = "src_channel"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcChannel).filter(
SrcChannel.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcChannel).limit(batch_size).all()
for row in rows:
try:
upsert_entity(
session,
entity_type=EntityType.CHANNEL,
business_key=row.channel_key,
display_name=row.channel_name,
attributes={
"commission_policy": row.commission_policy,
"region": row.region,
},
data_version_id=data_version_id,
)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class MsisdnAdapter(BaseAdapter):
"""SrcMsisdn → Entity(MSISDN) + 关系(HOLDS_MSISDN)。"""
source_system = "BSS"
staging_table = "src_msisdn"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcMsisdn).filter(
SrcMsisdn.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcMsisdn).limit(batch_size).all()
for row in rows:
try:
msisdn_entity = upsert_entity(
session,
entity_type=EntityType.MSISDN,
business_key=row.msisdn,
display_name=row.msisdn,
attributes={
"region": row.region,
"status": row.status,
"activate_date": str(row.activate_date) if row.activate_date else None,
"deactivate_date": str(row.deactivate_date) if row.deactivate_date else None,
},
data_version_id=data_version_id,
)
result.entities.append(msisdn_entity)
# 号码 → 客户持有关系
if row.customer_key:
cust_entity = upsert_entity(
session,
entity_type=EntityType.CUSTOMER,
business_key=row.customer_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.HOLDS_MSISDN, cust_entity, msisdn_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result
@register_adapter
class AccountAdapter(BaseAdapter):
"""SrcAccount → Entity(ACCOUNT) + 关系(OWNS_ACCOUNT)。"""
source_system = "FIN"
staging_table = "src_account"
def ingest(
self,
session: Session,
data_version_id: uuid.UUID | None = None,
batch_size: int = 1000,
) -> IngestResult:
result = IngestResult()
rows = session.query(SrcAccount).filter(
SrcAccount.data_version_id == data_version_id
).limit(batch_size).all() if data_version_id else session.query(SrcAccount).limit(batch_size).all()
for row in rows:
try:
acct_entity = upsert_entity(
session,
entity_type=EntityType.ACCOUNT,
business_key=row.account_key,
display_name=row.account_name,
attributes={
"bank_name": row.bank_name,
"branch_name": row.branch_name,
},
data_version_id=data_version_id,
)
result.entities.append(acct_entity)
# 账户所属主体关系
if row.owner_key and row.owner_type:
owner_type_map = {
"customer": EntityType.CUSTOMER,
"supplier": EntityType.SUPPLIER,
"legal_person": EntityType.LEGAL_PERSON,
}
etype = owner_type_map.get(row.owner_type)
if etype:
owner_entity = upsert_entity(
session,
entity_type=etype,
business_key=row.owner_key,
data_version_id=data_version_id,
)
rel = add_relationship(
session, RelationshipType.OWNS_ACCOUNT, owner_entity, acct_entity,
data_version_id=data_version_id,
)
result.relationships.append(rel)
result.row_count += 1
except Exception:
result.error_count += 1
return result