Files
MAcode/docs/03-数据库与数据模型.md
selfrelease a329d4906b init: AIGC-Hub/AVCC 方案文档 + TCS-IPTV 内容可信锁定系统 MVP
- 方案文档: AVCC 体系建设、IPTV TCS 需求(0-req)/PRD(1-prd)/任务(2-task)/二三四期任务
- tcs-iptv: Go 后端(哈希SDK/MA码生成/可信数据空间mock/业务编排/HTTP API+HMAC鉴权)
- web-console: React+AntD 监管大屏(角色工作台/全流程演示/监管片库)
- 一剧一码+集级哈希, 集级下架/恢复, 全栈测试通过
2026-06-14 16:50:31 +08:00

17 KiB
Raw Permalink Blame History

第三章 数据库与数据模型设计

版本:V1.0
基于文档:《AIGC-Hub智视码(AVCC)体系建设方案 V2.0》


3.1 数据存储策略总览

存储类型 选型 用途 数据量预估
关系型数据库 PostgreSQL 16 主业务数据(赋码、审核、版权链元数据) 单表千万级
缓存 Redis Cluster 7.x 会话、热点数据、限流、分布式锁 百万级 Key
时序分析数据库 ClickHouse 24.x 播放数据、审核性能指标、运营分析 日增数亿行
对象存储 MinIO 视频文件、C2PA 水印、证据包、证书 PDF PB 级
搜索引擎 Elasticsearch 8.x 审核报告全文检索、日志检索 TB 级
向量数据库 Milvus / pgvector 视频指纹向量、版权相似度检索 百万级向量
区块链 ChainMaker 版权存证、权益记录、清算记录 链上不可删除

3.2 PostgreSQL 核心表结构

3.2.1 创作者表 (creators)

CREATE TABLE creators (
    id              BIGSERIAL PRIMARY KEY,
    ma_node_code    VARCHAR(64) NOT NULL,
    creator_type    VARCHAR(16) CHECK (creator_type IN ('individual', 'mcn', 'studio')),
    real_name       VARCHAR(128) NOT NULL,
    id_card_hash    VARCHAR(64),
    enterprise_name VARCHAR(256),
    enterprise_credit_code VARCHAR(32),
    phone           VARCHAR(32) NOT NULL,
    email           VARCHAR(128),
    status          VARCHAR(16) DEFAULT 'active' CHECK (status IN ('active', 'frozen', 'blacklisted')),
    created_at      TIMESTAMPTZ DEFAULT NOW(),
    updated_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_creators_ma_node ON creators(ma_node_code);
CREATE INDEX idx_creators_status ON creators(status);

COMMENT ON TABLE creators IS '创作者/机构主表,存储实名信息(身份证存哈希)';

3.2.2 平台/机构节点表 (platforms)

CREATE TABLE platforms (
    id              BIGSERIAL PRIMARY KEY,
    ma_node_code    VARCHAR(64) NOT NULL UNIQUE,
    platform_name   VARCHAR(128) NOT NULL,
    platform_type   VARCHAR(32) CHECK (platform_type IN ('long_video', 'short_video', 'ai_tool', 'mcn')),
    api_key_hash    VARCHAR(128),
    api_secret_hash VARCHAR(256),
    status          VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'active', 'suspended')),
    quota_daily     INT DEFAULT 10000,
    callback_url    VARCHAR(256),
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_platforms_type ON platforms(platform_type);
CREATE INDEX idx_platforms_status ON platforms(status);

COMMENT ON TABLE platforms IS '接入平台/机构节点表,对应 MA 三级节点';

3.2.3 AVCC 赋码记录表 (avcc_records) — 核心业务表

CREATE TABLE avcc_records (
    id                  BIGSERIAL PRIMARY KEY,
    avcc_full_code      VARCHAR(512) NOT NULL UNIQUE,
    ma_root             VARCHAR(8) DEFAULT 'MA',
    country_code        VARCHAR(8) DEFAULT '156',
    industry_node       VARCHAR(16) DEFAULT '10005',
    platform_node       VARCHAR(16) NOT NULL,
    object_category     VARCHAR(64) NOT NULL,
    license_no          VARCHAR(128),
    review_level        VARCHAR(8) CHECK (review_level IN ('P', 'G', 'O')),
    version             INT DEFAULT 1,
    ai_content_hash     VARCHAR(128) NOT NULL,
    copyright_crd       VARCHAR(128),
    creator_id          BIGINT REFERENCES creators(id),
    platform_id         BIGINT REFERENCES platforms(id),
    review_status       VARCHAR(32) DEFAULT 'pending'
                        CHECK (review_status IN ('pending', 'pre_checking', 'ai_reviewing', 'human_reviewing', 'approved', 'rejected', 'revoked')),
    review_report_id    BIGINT,
    circulation_status  VARCHAR(32) DEFAULT 'active'
                        CHECK (circulation_status IN ('active', 'suspended', 'revoked', 'expired')),
    c2pa_manifest_hash  VARCHAR(128),
    content_storage_key VARCHAR(256),
    submitted_at        TIMESTAMPTZ DEFAULT NOW(),
    reviewed_at         TIMESTAMPTZ,
    expires_at          TIMESTAMPTZ,
    revoked_at          TIMESTAMPTZ,
    revoked_reason      TEXT,
    created_at          TIMESTAMPTZ DEFAULT NOW(),
    updated_at          TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_avcc_platform ON avcc_records(platform_node);
CREATE INDEX idx_avcc_creator ON avcc_records(creator_id);
CREATE INDEX idx_avcc_review_status ON avcc_records(review_status);
CREATE INDEX idx_avcc_circulation ON avcc_records(circulation_status);
CREATE INDEX idx_avcc_license ON avcc_records(license_no);
CREATE INDEX idx_avcc_submitted ON avcc_records(submitted_at);

COMMENT ON TABLE avcc_records IS 'AVCC 赋码记录核心业务表,一码一行';

3.2.4 审核报告表 (review_reports)

CREATE TABLE review_reports (
    id              BIGSERIAL PRIMARY KEY,
    avcc_record_id  BIGINT REFERENCES avcc_records(id),
    review_type     VARCHAR(16) CHECK (review_type IN ('AI', 'HUMAN', 'HYBRID')),
    overall_score   DECIMAL(5,2),
    suggested_level VARCHAR(8),
    final_level     VARCHAR(8),
    vision_score    DECIMAL(5,2),
    dialogue_score  DECIMAL(5,2),
    audio_score     DECIMAL(5,2),
    authenticity_score DECIMAL(5,2),
    copyright_score DECIMAL(5,2),
    violations      JSONB DEFAULT '[]',
    reviewer_id     BIGINT,
    reviewer_comment TEXT,
    ai_model_version VARCHAR(64),
    report_json     JSONB,
    content_fingerprint VARCHAR(128),
    processing_time_ms INT,
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_review_avcc ON review_reports(avcc_record_id);
CREATE INDEX idx_review_type ON review_reports(review_type);
CREATE INDEX idx_review_created ON review_reports(created_at);

COMMENT ON TABLE review_reports IS '审核报告表,AI/HUMAN/HYBRID 三类审核结果';

3.2.5 版权链存证表 (chain_records)

CREATE TABLE chain_records (
    id              BIGSERIAL PRIMARY KEY,
    avcc_record_id  BIGINT REFERENCES avcc_records(id),
    chain_type      VARCHAR(32) CHECK (chain_type IN ('copyright_main', 'user_rights', 'settlement', 'iteration', 'revocation')),
    tx_hash         VARCHAR(128) NOT NULL,
    block_height    BIGINT,
    chain_node      VARCHAR(64),
    payload_hash    VARCHAR(128),
    payload         JSONB,
    confirmation_count INT DEFAULT 0,
    status          VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'confirmed', 'failed')),
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_chain_avcc ON chain_records(avcc_record_id);
CREATE INDEX idx_chain_type ON chain_records(chain_type);
CREATE INDEX idx_chain_tx ON chain_records(tx_hash);

COMMENT ON TABLE chain_records IS '版权链存证元数据表(链上原始数据存在 ChainMaker';

3.2.6 分账/清算记录表 (settlement_records)

CREATE TABLE settlement_records (
    id              BIGSERIAL PRIMARY KEY,
    avcc_record_id  BIGINT REFERENCES avcc_records(id),
    platform_id     BIGINT REFERENCES platforms(id),
    period_start    DATE NOT NULL,
    period_end      DATE NOT NULL,
    total_revenue   DECIMAL(18,4),
    platform_share  DECIMAL(18,4),
    creator_share   DECIMAL(18,4),
    model_share     DECIMAL(18,4),
    ip_share        DECIMAL(18,4),
    hub_fee         DECIMAL(18,4),
    status          VARCHAR(16) DEFAULT 'pending'
                    CHECK (status IN ('pending', 'confirmed', 'disputed', 'settled', 'refunded')),
    dispute_reason  TEXT,
    tx_hash         VARCHAR(128),
    settled_at      TIMESTAMPTZ,
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_settlement_avcc ON settlement_records(avcc_record_id);
CREATE INDEX idx_settlement_platform ON settlement_records(platform_id);
CREATE INDEX idx_settlement_period ON settlement_records(period_start, period_end);

COMMENT ON TABLE settlement_records IS '分账清算记录表,链上清算后同步写入';

3.2.7 黑名单表 (blacklist)

CREATE TABLE blacklist (
    id              BIGSERIAL PRIMARY KEY,
    target_type     VARCHAR(32) CHECK (target_type IN ('creator', 'platform', 'model', 'dataset', 'content')),
    target_id       VARCHAR(128) NOT NULL,
    ma_node_code    VARCHAR(64),
    reason          TEXT NOT NULL,
    evidence_hash   VARCHAR(128),
    source          VARCHAR(32) CHECK (source IN ('admin', 'system', 'appeal')),
    operator_id     BIGINT,
    revoked_at      TIMESTAMPTZ,
    revoked_reason  TEXT,
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_blacklist_target ON blacklist(target_type, target_id);
CREATE INDEX idx_blacklist_active ON blacklist(target_type, target_id) WHERE revoked_at IS NULL;

COMMENT ON TABLE blacklist IS '黑名单表,支持创作者/平台/模型/数据集/内容五级黑名单';

3.2.8 审核任务队列表 (review_tasks)

CREATE TABLE review_tasks (
    id              BIGSERIAL PRIMARY KEY,
    avcc_record_id  BIGINT REFERENCES avcc_records(id),
    task_type       VARCHAR(16) CHECK (task_type IN ('AI_SCREEN', 'HUMAN_FINAL', 'APPEAL')),
    status          VARCHAR(16) DEFAULT 'queued'
                    CHECK (status IN ('queued', 'preprocessing', 'inferencing', 'fusion', 'completed', 'failed', 'cancelled')),
    priority        INT DEFAULT 5,  -- 1-10, P类内容=1(最高), O类=10
    queue_position  INT,
    processor_node  VARCHAR(64),   -- 处理节点标识
    started_at      TIMESTAMPTZ,
    completed_at    TIMESTAMPTZ,
    error_message   TEXT,
    retry_count     INT DEFAULT 0,
    created_at      TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_tasks_status ON review_tasks(status);
CREATE INDEX idx_tasks_priority ON review_tasks(priority, created_at);
CREATE INDEX idx_tasks_avcc ON review_tasks(avcc_record_id);

COMMENT ON TABLE review_tasks IS '审核任务队列,支撑优先级调度与故障重试';

3.2.9 API 调用日志表 (api_call_logs) — 镜像表

CREATE TABLE api_call_logs (
    id              BIGSERIAL PRIMARY KEY,
    platform_id     BIGINT REFERENCES platforms(id),
    api_endpoint    VARCHAR(256) NOT NULL,
    http_method     VARCHAR(8),
    avcc_code       VARCHAR(512),
    request_hash    VARCHAR(128),
    response_status INT,
    latency_ms      INT,
    client_ip       INET,
    user_agent      TEXT,
    called_at       TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_api_logs_platform ON api_call_logs(platform_id, called_at);
CREATE INDEX idx_api_logs_endpoint ON api_call_logs(api_endpoint, called_at);
CREATE INDEX idx_api_logs_date ON api_call_logs(called_at);

COMMENT ON TABLE api_call_logs IS 'API 调用日志镜像表(详细日志写入 ClickHouse';

3.3 Redis 缓存设计

Key Pattern 类型 TTL 说明
avcc:{code} String 1h AVCC 解析结果缓存(热点码加速)
avcc:parse:{code} Hash 30m 解析元数据缓存
platform:{id}:quota String 1d 平台日调用配额剩余
platform:{id}:rate String 1m 平台限流计数(Token Bucket
session:{token} Hash 24h 用户会话(创作者/运营人员)
blacklist:{type}:{id} String 永久 黑名单缓存(每日全量同步)
review:task:{id} Hash 2h 审核任务状态缓存
code:pool:{industry_node} String 永久 码资源池剩余数量
code:allocated:{platform_node} String 永久 已分配码段索引
stats:daily:{date} Hash 7d 日运营统计缓存

Redis 使用场景补充

  • 分布式锁:码资源分配时使用 SET code:lock:{platform_node} {value} NX EX 10
  • 排行榜:创作者收益排行 ZADD leaderboard:revenue:{period} {score} {creator_id}
  • 队列:人工审核队列使用 Redis List + 优先级排序
  • 发布订阅:审核完成通知 PUBLISH review:completed {avcc}

3.4 ClickHouse 分析表设计

3.4.1 播放与消费事件表

CREATE TABLE playback_metrics (
    event_time      DateTime64(3),
    avcc_code       String,
    platform_node   String,
    user_hash       String,
    event_type      Enum8('play' = 1, 'complete' = 2, 'purchase' = 3, 'share' = 4, 'comment' = 5),
    duration_sec    Int32,
    revenue         Decimal(18,4),
    device_type     Enum8('mobile' = 1, 'pc' = 2, 'tv' = 3, 'other' = 4),
    province        String
) ENGINE = MergeTree()
ORDER BY (event_time, avcc_code, platform_node)
PARTITION BY toYYYYMMDD(event_time)
TTL event_time + INTERVAL 3 YEAR;

3.4.2 审核性能分析表

CREATE TABLE review_metrics (
    event_time      DateTime64(3),
    review_type     String,
    model_version   String,
    content_type    String,
    queue_wait_ms   Int32,
    preprocess_ms   Int32,
    inference_ms    Int32,
    fusion_ms       Int32,
    total_latency_ms Int32,
    result          Enum8('pass' = 1, 'fail' = 2, 'manual' = 3),
    gpu_node        String
) ENGINE = MergeTree()
ORDER BY (event_time, review_type)
PARTITION BY toYYYYMMDD(event_time);

3.4.3 平台调用分析表

CREATE TABLE api_metrics (
    event_time      DateTime64(3),
    platform_id     String,
    api_endpoint    String,
    response_status Int16,
    latency_ms      Int32,
    client_ip       String,
    avcc_code       String
) ENGINE = MergeTree()
ORDER BY (event_time, platform_id, api_endpoint)
PARTITION BY toYYYYMMDD(event_time)
TTL event_time + INTERVAL 1 YEAR;

3.4.4 版权链存证分析表

CREATE TABLE chain_metrics (
    event_time      DateTime64(3),
    chain_type      String,
    tx_hash         String,
    block_height    UInt64,
    confirmation_ms Int32,
    gas_cost        Decimal(18,8),
    payload_size    Int32
) ENGINE = MergeTree()
ORDER BY (event_time, chain_type)
PARTITION BY toYYYYMMDD(event_time);

3.5 向量数据库设计(Milvus / pgvector

3.5.1 视频指纹向量集合

# 伪代码:视频指纹向量存储(使用 Milvus)
collection = "video_fingerprint"
schema = {
    "fields": [
        {"name": "id", "type": "INT64", "is_primary": True, "auto_id": True},
        {"name": "avcc_code", "type": "VARCHAR", "max_length": 512},
        {"name": "platform_node", "type": "VARCHAR", "max_length": 64},
        {"name": "fingerprint", "type": "FLOAT_VECTOR", "dim": 256},  # 感知哈希向量
        {"name": "content_hash", "type": "VARCHAR", "max_length": 128},
        {"name": "created_at", "type": "INT64"}  # 时间戳
    ]
}
index_params = {
    "metric_type": "COSINE",
    "index_type": "HNSW",
    "params": {"M": 16, "efConstruction": 200}
}

3.5.2 版权比对查询

def search_similar_works(fingerprint_vector, top_k=10, threshold=0.85):
    """
    检索相似作品,用于版权比对与重复检测
    threshold: 相似度阈值,>0.85 视为疑似侵权
    """
    results = milvus.search(
        collection_name="video_fingerprint",
        data=[fingerprint_vector],
        anns_field="fingerprint",
        param={"metric_type": "COSINE", "params": {"ef": 64}},
        limit=top_k,
        output_fields=["avcc_code", "platform_node", "content_hash"]
    )
    return [r for r in results if r.score > threshold]

3.6 对象存储目录结构(MinIO

aigc-hub-storage/
├── works/
│   ├── raw/
│   │   └── {platform_node}/
│   │       └── {date}/
│   │           └── {avcc_code}/
│   │               ├── original.mp4          # 原始作品
│   │               ├── thumbnail.jpg         # 缩略图
│   │               └── metadata.json           # 元数据
│   ├── processed/
│   │   └── {avcc_code}/
│   │       ├── frames/                       # 抽帧序列
│   │       ├── audio.wav                     # 音频分离
│   │       └── fingerprint.bin               # 视频指纹
│   └── archived/
│       └── {year}/
│           └── {avcc_code}.tar.gz            # 归档包
├── c2pa/
│   └── {avcc_code}.c2pa                      # C2PA 水印文件
├── certificates/
│   └── {avcc_code}.pdf                       # AVCC 证书 PDF
├── reports/
│   └── {avcc_code}/
│       ├── ai_review_report.pdf
│       └── human_review_report.pdf
├── evidence/
│   └── {case_id}/
│       ├── complaint.pdf
│       └── investigation/
└── backups/
    └── daily/
        └── {date}.sql.gz                     # PG 备份

3.7 数据库高可用与灾备

策略 实现方式 RPO RTO
PostgreSQL Patroni + etcd + 流复制(1主2从) < 1s < 30s
Redis Cluster 模式,3主3从,自动故障转移 0 < 10s
ClickHouse 3节点副本,ReplicatedMergeTree < 1min < 5min
MinIO 4节点纠删码,4+2配置 0 < 1min
区块链 联盟链多节点共识,5组织各1节点 0 共识自愈
跨可用区 主集群北京,备集群上海,异步同步 < 5min < 15min

上一章:02-核心系统模块设计.md
下一章:04-API接口设计.md