Files
InternalAuditInterprise/backend/migrations/versions/0001_init_datahub.py
T
2026-06-16 00:38:57 +08:00

141 lines
6.0 KiB
Python

"""初始化数据中台表:数据版本 / 实体 / 关系 / 双时态事实 / 时序事件
Revision ID: 0001_init_datahub
Revises:
Create Date: 2026-06
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0001_init_datahub"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# data_version
op.create_table(
"data_version",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("source_system", sa.String(64), nullable=False),
sa.Column("batch_label", sa.String(128), nullable=False),
sa.Column("row_count", sa.Integer(), nullable=False, server_default="0"),
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("note", sa.Text(), nullable=True),
)
# entity
op.create_table(
"entity",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("entity_type", sa.String(32), nullable=False),
sa.Column("business_key", sa.String(128), nullable=False),
sa.Column("display_name", sa.String(256), nullable=True),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("canonical_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["canonical_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
sa.UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
)
op.create_index("ix_entity_type", "entity", ["entity_type"])
# entity_relationship
op.create_table(
"entity_relationship",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("rel_type", sa.String(32), nullable=False),
sa.Column("source_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("target_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["source_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["target_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index("ix_rel_source", "entity_relationship", ["source_id"])
op.create_index("ix_rel_target", "entity_relationship", ["target_id"])
op.create_index("ix_rel_type", "entity_relationship", ["rel_type"])
# bitemporal_fact
op.create_table(
"bitemporal_fact",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("attr_name", sa.String(64), nullable=False),
sa.Column("attr_value", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False),
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
sa.Column("system_from", sa.DateTime(timezone=True), nullable=False),
sa.Column("system_to", sa.DateTime(timezone=True), nullable=True),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"]),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index("ix_btf_entity_attr", "bitemporal_fact", ["entity_id", "attr_name"])
# metric_event(时序)
op.create_table(
"metric_event",
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
sa.Column("event_time", sa.DateTime(timezone=True), nullable=False),
sa.Column("subject_type", sa.String(32), nullable=False),
sa.Column("subject_key", sa.String(128), nullable=False),
sa.Column("metric_name", sa.String(64), nullable=False),
sa.Column("metric_value", sa.Float(), nullable=False, server_default="0"),
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
# 超表主键需包含分区列 event_time
sa.PrimaryKeyConstraint("id", "event_time"),
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
)
op.create_index(
"ix_metric_subject_time",
"metric_event",
["subject_type", "subject_key", "event_time"],
)
op.create_index("ix_metric_name_time", "metric_event", ["metric_name", "event_time"])
# 转为 TimescaleDB 超表(若扩展不存在则跳过,便于无 timescaledb 环境运行测试)
op.execute(
"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
PERFORM create_hypertable('metric_event', 'event_time', if_not_exists => TRUE);
END IF;
END$$;
"""
)
# 双时态排他约束:同一实体同一属性,业务有效期不重叠(需 btree_gist)
op.execute(
"""
DO $$
BEGIN
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'btree_gist') THEN
ALTER TABLE bitemporal_fact
ADD CONSTRAINT ex_btf_no_overlap
EXCLUDE USING gist (
entity_id WITH =,
attr_name WITH =,
tstzrange(valid_from, valid_to) WITH &&
) WHERE (system_to IS NULL);
END IF;
END$$;
"""
)
def downgrade() -> None:
op.drop_table("metric_event")
op.drop_table("bitemporal_fact")
op.drop_table("entity_relationship")
op.drop_index("ix_entity_type", table_name="entity")
op.drop_table("entity")
op.drop_table("data_version")