141 lines
6.0 KiB
Python
141 lines
6.0 KiB
Python
"""初始化数据中台表:数据版本 / 实体 / 关系 / 双时态事实 / 时序事件
|
|
|
|
Revision ID: 0001_init_datahub
|
|
Revises:
|
|
Create Date: 2026-06
|
|
"""
|
|
from collections.abc import Sequence
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
from sqlalchemy.dialects import postgresql
|
|
|
|
revision: str = "0001_init_datahub"
|
|
down_revision: str | None = None
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# data_version
|
|
op.create_table(
|
|
"data_version",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
|
sa.Column("source_system", sa.String(64), nullable=False),
|
|
sa.Column("batch_label", sa.String(128), nullable=False),
|
|
sa.Column("row_count", sa.Integer(), nullable=False, server_default="0"),
|
|
sa.Column("ingested_at", sa.DateTime(timezone=True), nullable=False),
|
|
sa.Column("note", sa.Text(), nullable=True),
|
|
)
|
|
|
|
# entity
|
|
op.create_table(
|
|
"entity",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
|
sa.Column("entity_type", sa.String(32), nullable=False),
|
|
sa.Column("business_key", sa.String(128), nullable=False),
|
|
sa.Column("display_name", sa.String(256), nullable=True),
|
|
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
|
sa.Column("canonical_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
sa.ForeignKeyConstraint(["canonical_id"], ["entity.id"]),
|
|
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
|
sa.UniqueConstraint("entity_type", "business_key", name="uq_entity_type_bizkey"),
|
|
)
|
|
op.create_index("ix_entity_type", "entity", ["entity_type"])
|
|
|
|
# entity_relationship
|
|
op.create_table(
|
|
"entity_relationship",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
|
sa.Column("rel_type", sa.String(32), nullable=False),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("target_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
|
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
sa.ForeignKeyConstraint(["source_id"], ["entity.id"]),
|
|
sa.ForeignKeyConstraint(["target_id"], ["entity.id"]),
|
|
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
|
)
|
|
op.create_index("ix_rel_source", "entity_relationship", ["source_id"])
|
|
op.create_index("ix_rel_target", "entity_relationship", ["target_id"])
|
|
op.create_index("ix_rel_type", "entity_relationship", ["rel_type"])
|
|
|
|
# bitemporal_fact
|
|
op.create_table(
|
|
"bitemporal_fact",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True),
|
|
sa.Column("entity_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("attr_name", sa.String(64), nullable=False),
|
|
sa.Column("attr_value", postgresql.JSONB(), nullable=False, server_default="{}"),
|
|
sa.Column("valid_from", sa.DateTime(timezone=True), nullable=False),
|
|
sa.Column("valid_to", sa.DateTime(timezone=True), nullable=True),
|
|
sa.Column("system_from", sa.DateTime(timezone=True), nullable=False),
|
|
sa.Column("system_to", sa.DateTime(timezone=True), nullable=True),
|
|
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
sa.ForeignKeyConstraint(["entity_id"], ["entity.id"]),
|
|
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
|
)
|
|
op.create_index("ix_btf_entity_attr", "bitemporal_fact", ["entity_id", "attr_name"])
|
|
|
|
# metric_event(时序)
|
|
op.create_table(
|
|
"metric_event",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("event_time", sa.DateTime(timezone=True), nullable=False),
|
|
sa.Column("subject_type", sa.String(32), nullable=False),
|
|
sa.Column("subject_key", sa.String(128), nullable=False),
|
|
sa.Column("metric_name", sa.String(64), nullable=False),
|
|
sa.Column("metric_value", sa.Float(), nullable=False, server_default="0"),
|
|
sa.Column("attributes", postgresql.JSONB(), nullable=False, server_default="{}"),
|
|
sa.Column("data_version_id", postgresql.UUID(as_uuid=True), nullable=True),
|
|
# 超表主键需包含分区列 event_time
|
|
sa.PrimaryKeyConstraint("id", "event_time"),
|
|
sa.ForeignKeyConstraint(["data_version_id"], ["data_version.id"]),
|
|
)
|
|
op.create_index(
|
|
"ix_metric_subject_time",
|
|
"metric_event",
|
|
["subject_type", "subject_key", "event_time"],
|
|
)
|
|
op.create_index("ix_metric_name_time", "metric_event", ["metric_name", "event_time"])
|
|
|
|
# 转为 TimescaleDB 超表(若扩展不存在则跳过,便于无 timescaledb 环境运行测试)
|
|
op.execute(
|
|
"""
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'timescaledb') THEN
|
|
PERFORM create_hypertable('metric_event', 'event_time', if_not_exists => TRUE);
|
|
END IF;
|
|
END$$;
|
|
"""
|
|
)
|
|
|
|
# 双时态排他约束:同一实体同一属性,业务有效期不重叠(需 btree_gist)
|
|
op.execute(
|
|
"""
|
|
DO $$
|
|
BEGIN
|
|
IF EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'btree_gist') THEN
|
|
ALTER TABLE bitemporal_fact
|
|
ADD CONSTRAINT ex_btf_no_overlap
|
|
EXCLUDE USING gist (
|
|
entity_id WITH =,
|
|
attr_name WITH =,
|
|
tstzrange(valid_from, valid_to) WITH &&
|
|
) WHERE (system_to IS NULL);
|
|
END IF;
|
|
END$$;
|
|
"""
|
|
)
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_table("metric_event")
|
|
op.drop_table("bitemporal_fact")
|
|
op.drop_table("entity_relationship")
|
|
op.drop_index("ix_entity_type", table_name="entity")
|
|
op.drop_table("entity")
|
|
op.drop_table("data_version")
|