a329d4906b
- 方案文档: AVCC 体系建设、IPTV TCS 需求(0-req)/PRD(1-prd)/任务(2-task)/二三四期任务 - tcs-iptv: Go 后端(哈希SDK/MA码生成/可信数据空间mock/业务编排/HTTP API+HMAC鉴权) - web-console: React+AntD 监管大屏(角色工作台/全流程演示/监管片库) - 一剧一码+集级哈希, 集级下架/恢复, 全栈测试通过
17 KiB
17 KiB
第三章 数据库与数据模型设计
版本:V1.0
基于文档:《AIGC-Hub智视码(AVCC)体系建设方案 V2.0》
3.1 数据存储策略总览
| 存储类型 | 选型 | 用途 | 数据量预估 |
|---|---|---|---|
| 关系型数据库 | PostgreSQL 16 | 主业务数据(赋码、审核、版权链元数据) | 单表千万级 |
| 缓存 | Redis Cluster 7.x | 会话、热点数据、限流、分布式锁 | 百万级 Key |
| 时序分析数据库 | ClickHouse 24.x | 播放数据、审核性能指标、运营分析 | 日增数亿行 |
| 对象存储 | MinIO | 视频文件、C2PA 水印、证据包、证书 PDF | PB 级 |
| 搜索引擎 | Elasticsearch 8.x | 审核报告全文检索、日志检索 | TB 级 |
| 向量数据库 | Milvus / pgvector | 视频指纹向量、版权相似度检索 | 百万级向量 |
| 区块链 | ChainMaker | 版权存证、权益记录、清算记录 | 链上不可删除 |
3.2 PostgreSQL 核心表结构
3.2.1 创作者表 (creators)
CREATE TABLE creators (
id BIGSERIAL PRIMARY KEY,
ma_node_code VARCHAR(64) NOT NULL,
creator_type VARCHAR(16) CHECK (creator_type IN ('individual', 'mcn', 'studio')),
real_name VARCHAR(128) NOT NULL,
id_card_hash VARCHAR(64),
enterprise_name VARCHAR(256),
enterprise_credit_code VARCHAR(32),
phone VARCHAR(32) NOT NULL,
email VARCHAR(128),
status VARCHAR(16) DEFAULT 'active' CHECK (status IN ('active', 'frozen', 'blacklisted')),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_creators_ma_node ON creators(ma_node_code);
CREATE INDEX idx_creators_status ON creators(status);
COMMENT ON TABLE creators IS '创作者/机构主表,存储实名信息(身份证存哈希)';
3.2.2 平台/机构节点表 (platforms)
CREATE TABLE platforms (
id BIGSERIAL PRIMARY KEY,
ma_node_code VARCHAR(64) NOT NULL UNIQUE,
platform_name VARCHAR(128) NOT NULL,
platform_type VARCHAR(32) CHECK (platform_type IN ('long_video', 'short_video', 'ai_tool', 'mcn')),
api_key_hash VARCHAR(128),
api_secret_hash VARCHAR(256),
status VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'active', 'suspended')),
quota_daily INT DEFAULT 10000,
callback_url VARCHAR(256),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_platforms_type ON platforms(platform_type);
CREATE INDEX idx_platforms_status ON platforms(status);
COMMENT ON TABLE platforms IS '接入平台/机构节点表,对应 MA 三级节点';
3.2.3 AVCC 赋码记录表 (avcc_records) — 核心业务表
CREATE TABLE avcc_records (
id BIGSERIAL PRIMARY KEY,
avcc_full_code VARCHAR(512) NOT NULL UNIQUE,
ma_root VARCHAR(8) DEFAULT 'MA',
country_code VARCHAR(8) DEFAULT '156',
industry_node VARCHAR(16) DEFAULT '10005',
platform_node VARCHAR(16) NOT NULL,
object_category VARCHAR(64) NOT NULL,
license_no VARCHAR(128),
review_level VARCHAR(8) CHECK (review_level IN ('P', 'G', 'O')),
version INT DEFAULT 1,
ai_content_hash VARCHAR(128) NOT NULL,
copyright_crd VARCHAR(128),
creator_id BIGINT REFERENCES creators(id),
platform_id BIGINT REFERENCES platforms(id),
review_status VARCHAR(32) DEFAULT 'pending'
CHECK (review_status IN ('pending', 'pre_checking', 'ai_reviewing', 'human_reviewing', 'approved', 'rejected', 'revoked')),
review_report_id BIGINT,
circulation_status VARCHAR(32) DEFAULT 'active'
CHECK (circulation_status IN ('active', 'suspended', 'revoked', 'expired')),
c2pa_manifest_hash VARCHAR(128),
content_storage_key VARCHAR(256),
submitted_at TIMESTAMPTZ DEFAULT NOW(),
reviewed_at TIMESTAMPTZ,
expires_at TIMESTAMPTZ,
revoked_at TIMESTAMPTZ,
revoked_reason TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_avcc_platform ON avcc_records(platform_node);
CREATE INDEX idx_avcc_creator ON avcc_records(creator_id);
CREATE INDEX idx_avcc_review_status ON avcc_records(review_status);
CREATE INDEX idx_avcc_circulation ON avcc_records(circulation_status);
CREATE INDEX idx_avcc_license ON avcc_records(license_no);
CREATE INDEX idx_avcc_submitted ON avcc_records(submitted_at);
COMMENT ON TABLE avcc_records IS 'AVCC 赋码记录核心业务表,一码一行';
3.2.4 审核报告表 (review_reports)
CREATE TABLE review_reports (
id BIGSERIAL PRIMARY KEY,
avcc_record_id BIGINT REFERENCES avcc_records(id),
review_type VARCHAR(16) CHECK (review_type IN ('AI', 'HUMAN', 'HYBRID')),
overall_score DECIMAL(5,2),
suggested_level VARCHAR(8),
final_level VARCHAR(8),
vision_score DECIMAL(5,2),
dialogue_score DECIMAL(5,2),
audio_score DECIMAL(5,2),
authenticity_score DECIMAL(5,2),
copyright_score DECIMAL(5,2),
violations JSONB DEFAULT '[]',
reviewer_id BIGINT,
reviewer_comment TEXT,
ai_model_version VARCHAR(64),
report_json JSONB,
content_fingerprint VARCHAR(128),
processing_time_ms INT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_review_avcc ON review_reports(avcc_record_id);
CREATE INDEX idx_review_type ON review_reports(review_type);
CREATE INDEX idx_review_created ON review_reports(created_at);
COMMENT ON TABLE review_reports IS '审核报告表,AI/HUMAN/HYBRID 三类审核结果';
3.2.5 版权链存证表 (chain_records)
CREATE TABLE chain_records (
id BIGSERIAL PRIMARY KEY,
avcc_record_id BIGINT REFERENCES avcc_records(id),
chain_type VARCHAR(32) CHECK (chain_type IN ('copyright_main', 'user_rights', 'settlement', 'iteration', 'revocation')),
tx_hash VARCHAR(128) NOT NULL,
block_height BIGINT,
chain_node VARCHAR(64),
payload_hash VARCHAR(128),
payload JSONB,
confirmation_count INT DEFAULT 0,
status VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'confirmed', 'failed')),
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_chain_avcc ON chain_records(avcc_record_id);
CREATE INDEX idx_chain_type ON chain_records(chain_type);
CREATE INDEX idx_chain_tx ON chain_records(tx_hash);
COMMENT ON TABLE chain_records IS '版权链存证元数据表(链上原始数据存在 ChainMaker)';
3.2.6 分账/清算记录表 (settlement_records)
CREATE TABLE settlement_records (
id BIGSERIAL PRIMARY KEY,
avcc_record_id BIGINT REFERENCES avcc_records(id),
platform_id BIGINT REFERENCES platforms(id),
period_start DATE NOT NULL,
period_end DATE NOT NULL,
total_revenue DECIMAL(18,4),
platform_share DECIMAL(18,4),
creator_share DECIMAL(18,4),
model_share DECIMAL(18,4),
ip_share DECIMAL(18,4),
hub_fee DECIMAL(18,4),
status VARCHAR(16) DEFAULT 'pending'
CHECK (status IN ('pending', 'confirmed', 'disputed', 'settled', 'refunded')),
dispute_reason TEXT,
tx_hash VARCHAR(128),
settled_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_settlement_avcc ON settlement_records(avcc_record_id);
CREATE INDEX idx_settlement_platform ON settlement_records(platform_id);
CREATE INDEX idx_settlement_period ON settlement_records(period_start, period_end);
COMMENT ON TABLE settlement_records IS '分账清算记录表,链上清算后同步写入';
3.2.7 黑名单表 (blacklist)
CREATE TABLE blacklist (
id BIGSERIAL PRIMARY KEY,
target_type VARCHAR(32) CHECK (target_type IN ('creator', 'platform', 'model', 'dataset', 'content')),
target_id VARCHAR(128) NOT NULL,
ma_node_code VARCHAR(64),
reason TEXT NOT NULL,
evidence_hash VARCHAR(128),
source VARCHAR(32) CHECK (source IN ('admin', 'system', 'appeal')),
operator_id BIGINT,
revoked_at TIMESTAMPTZ,
revoked_reason TEXT,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_blacklist_target ON blacklist(target_type, target_id);
CREATE INDEX idx_blacklist_active ON blacklist(target_type, target_id) WHERE revoked_at IS NULL;
COMMENT ON TABLE blacklist IS '黑名单表,支持创作者/平台/模型/数据集/内容五级黑名单';
3.2.8 审核任务队列表 (review_tasks)
CREATE TABLE review_tasks (
id BIGSERIAL PRIMARY KEY,
avcc_record_id BIGINT REFERENCES avcc_records(id),
task_type VARCHAR(16) CHECK (task_type IN ('AI_SCREEN', 'HUMAN_FINAL', 'APPEAL')),
status VARCHAR(16) DEFAULT 'queued'
CHECK (status IN ('queued', 'preprocessing', 'inferencing', 'fusion', 'completed', 'failed', 'cancelled')),
priority INT DEFAULT 5, -- 1-10, P类内容=1(最高), O类=10
queue_position INT,
processor_node VARCHAR(64), -- 处理节点标识
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
error_message TEXT,
retry_count INT DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_tasks_status ON review_tasks(status);
CREATE INDEX idx_tasks_priority ON review_tasks(priority, created_at);
CREATE INDEX idx_tasks_avcc ON review_tasks(avcc_record_id);
COMMENT ON TABLE review_tasks IS '审核任务队列,支撑优先级调度与故障重试';
3.2.9 API 调用日志表 (api_call_logs) — 镜像表
CREATE TABLE api_call_logs (
id BIGSERIAL PRIMARY KEY,
platform_id BIGINT REFERENCES platforms(id),
api_endpoint VARCHAR(256) NOT NULL,
http_method VARCHAR(8),
avcc_code VARCHAR(512),
request_hash VARCHAR(128),
response_status INT,
latency_ms INT,
client_ip INET,
user_agent TEXT,
called_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX idx_api_logs_platform ON api_call_logs(platform_id, called_at);
CREATE INDEX idx_api_logs_endpoint ON api_call_logs(api_endpoint, called_at);
CREATE INDEX idx_api_logs_date ON api_call_logs(called_at);
COMMENT ON TABLE api_call_logs IS 'API 调用日志镜像表(详细日志写入 ClickHouse)';
3.3 Redis 缓存设计
| Key Pattern | 类型 | TTL | 说明 |
|---|---|---|---|
avcc:{code} |
String | 1h | AVCC 解析结果缓存(热点码加速) |
avcc:parse:{code} |
Hash | 30m | 解析元数据缓存 |
platform:{id}:quota |
String | 1d | 平台日调用配额剩余 |
platform:{id}:rate |
String | 1m | 平台限流计数(Token Bucket) |
session:{token} |
Hash | 24h | 用户会话(创作者/运营人员) |
blacklist:{type}:{id} |
String | 永久 | 黑名单缓存(每日全量同步) |
review:task:{id} |
Hash | 2h | 审核任务状态缓存 |
code:pool:{industry_node} |
String | 永久 | 码资源池剩余数量 |
code:allocated:{platform_node} |
String | 永久 | 已分配码段索引 |
stats:daily:{date} |
Hash | 7d | 日运营统计缓存 |
Redis 使用场景补充
- 分布式锁:码资源分配时使用
SET code:lock:{platform_node} {value} NX EX 10 - 排行榜:创作者收益排行
ZADD leaderboard:revenue:{period} {score} {creator_id} - 队列:人工审核队列使用 Redis List + 优先级排序
- 发布订阅:审核完成通知
PUBLISH review:completed {avcc}
3.4 ClickHouse 分析表设计
3.4.1 播放与消费事件表
CREATE TABLE playback_metrics (
event_time DateTime64(3),
avcc_code String,
platform_node String,
user_hash String,
event_type Enum8('play' = 1, 'complete' = 2, 'purchase' = 3, 'share' = 4, 'comment' = 5),
duration_sec Int32,
revenue Decimal(18,4),
device_type Enum8('mobile' = 1, 'pc' = 2, 'tv' = 3, 'other' = 4),
province String
) ENGINE = MergeTree()
ORDER BY (event_time, avcc_code, platform_node)
PARTITION BY toYYYYMMDD(event_time)
TTL event_time + INTERVAL 3 YEAR;
3.4.2 审核性能分析表
CREATE TABLE review_metrics (
event_time DateTime64(3),
review_type String,
model_version String,
content_type String,
queue_wait_ms Int32,
preprocess_ms Int32,
inference_ms Int32,
fusion_ms Int32,
total_latency_ms Int32,
result Enum8('pass' = 1, 'fail' = 2, 'manual' = 3),
gpu_node String
) ENGINE = MergeTree()
ORDER BY (event_time, review_type)
PARTITION BY toYYYYMMDD(event_time);
3.4.3 平台调用分析表
CREATE TABLE api_metrics (
event_time DateTime64(3),
platform_id String,
api_endpoint String,
response_status Int16,
latency_ms Int32,
client_ip String,
avcc_code String
) ENGINE = MergeTree()
ORDER BY (event_time, platform_id, api_endpoint)
PARTITION BY toYYYYMMDD(event_time)
TTL event_time + INTERVAL 1 YEAR;
3.4.4 版权链存证分析表
CREATE TABLE chain_metrics (
event_time DateTime64(3),
chain_type String,
tx_hash String,
block_height UInt64,
confirmation_ms Int32,
gas_cost Decimal(18,8),
payload_size Int32
) ENGINE = MergeTree()
ORDER BY (event_time, chain_type)
PARTITION BY toYYYYMMDD(event_time);
3.5 向量数据库设计(Milvus / pgvector)
3.5.1 视频指纹向量集合
# 伪代码:视频指纹向量存储(使用 Milvus)
collection = "video_fingerprint"
schema = {
"fields": [
{"name": "id", "type": "INT64", "is_primary": True, "auto_id": True},
{"name": "avcc_code", "type": "VARCHAR", "max_length": 512},
{"name": "platform_node", "type": "VARCHAR", "max_length": 64},
{"name": "fingerprint", "type": "FLOAT_VECTOR", "dim": 256}, # 感知哈希向量
{"name": "content_hash", "type": "VARCHAR", "max_length": 128},
{"name": "created_at", "type": "INT64"} # 时间戳
]
}
index_params = {
"metric_type": "COSINE",
"index_type": "HNSW",
"params": {"M": 16, "efConstruction": 200}
}
3.5.2 版权比对查询
def search_similar_works(fingerprint_vector, top_k=10, threshold=0.85):
"""
检索相似作品,用于版权比对与重复检测
threshold: 相似度阈值,>0.85 视为疑似侵权
"""
results = milvus.search(
collection_name="video_fingerprint",
data=[fingerprint_vector],
anns_field="fingerprint",
param={"metric_type": "COSINE", "params": {"ef": 64}},
limit=top_k,
output_fields=["avcc_code", "platform_node", "content_hash"]
)
return [r for r in results if r.score > threshold]
3.6 对象存储目录结构(MinIO)
aigc-hub-storage/
├── works/
│ ├── raw/
│ │ └── {platform_node}/
│ │ └── {date}/
│ │ └── {avcc_code}/
│ │ ├── original.mp4 # 原始作品
│ │ ├── thumbnail.jpg # 缩略图
│ │ └── metadata.json # 元数据
│ ├── processed/
│ │ └── {avcc_code}/
│ │ ├── frames/ # 抽帧序列
│ │ ├── audio.wav # 音频分离
│ │ └── fingerprint.bin # 视频指纹
│ └── archived/
│ └── {year}/
│ └── {avcc_code}.tar.gz # 归档包
├── c2pa/
│ └── {avcc_code}.c2pa # C2PA 水印文件
├── certificates/
│ └── {avcc_code}.pdf # AVCC 证书 PDF
├── reports/
│ └── {avcc_code}/
│ ├── ai_review_report.pdf
│ └── human_review_report.pdf
├── evidence/
│ └── {case_id}/
│ ├── complaint.pdf
│ └── investigation/
└── backups/
└── daily/
└── {date}.sql.gz # PG 备份
3.7 数据库高可用与灾备
| 策略 | 实现方式 | RPO | RTO |
|---|---|---|---|
| PostgreSQL | Patroni + etcd + 流复制(1主2从) | < 1s | < 30s |
| Redis | Cluster 模式,3主3从,自动故障转移 | 0 | < 10s |
| ClickHouse | 3节点副本,ReplicatedMergeTree | < 1min | < 5min |
| MinIO | 4节点纠删码,4+2配置 | 0 | < 1min |
| 区块链 | 联盟链多节点共识,5组织各1节点 | 0 | 共识自愈 |
| 跨可用区 | 主集群北京,备集群上海,异步同步 | < 5min | < 15min |
上一章:02-核心系统模块设计.md
下一章:04-API接口设计.md