# 第三章 数据库与数据模型设计 > 版本:V1.0 > 基于文档:《AIGC-Hub智视码(AVCC)体系建设方案 V2.0》 --- ## 3.1 数据存储策略总览 | 存储类型 | 选型 | 用途 | 数据量预估 | |----------|------|------|------------| | 关系型数据库 | PostgreSQL 16 | 主业务数据(赋码、审核、版权链元数据) | 单表千万级 | | 缓存 | Redis Cluster 7.x | 会话、热点数据、限流、分布式锁 | 百万级 Key | | 时序分析数据库 | ClickHouse 24.x | 播放数据、审核性能指标、运营分析 | 日增数亿行 | | 对象存储 | MinIO | 视频文件、C2PA 水印、证据包、证书 PDF | PB 级 | | 搜索引擎 | Elasticsearch 8.x | 审核报告全文检索、日志检索 | TB 级 | | 向量数据库 | Milvus / pgvector | 视频指纹向量、版权相似度检索 | 百万级向量 | | 区块链 | ChainMaker | 版权存证、权益记录、清算记录 | 链上不可删除 | --- ## 3.2 PostgreSQL 核心表结构 ### 3.2.1 创作者表 (`creators`) ```sql CREATE TABLE creators ( id BIGSERIAL PRIMARY KEY, ma_node_code VARCHAR(64) NOT NULL, creator_type VARCHAR(16) CHECK (creator_type IN ('individual', 'mcn', 'studio')), real_name VARCHAR(128) NOT NULL, id_card_hash VARCHAR(64), enterprise_name VARCHAR(256), enterprise_credit_code VARCHAR(32), phone VARCHAR(32) NOT NULL, email VARCHAR(128), status VARCHAR(16) DEFAULT 'active' CHECK (status IN ('active', 'frozen', 'blacklisted')), created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_creators_ma_node ON creators(ma_node_code); CREATE INDEX idx_creators_status ON creators(status); COMMENT ON TABLE creators IS '创作者/机构主表,存储实名信息(身份证存哈希)'; ``` ### 3.2.2 平台/机构节点表 (`platforms`) ```sql CREATE TABLE platforms ( id BIGSERIAL PRIMARY KEY, ma_node_code VARCHAR(64) NOT NULL UNIQUE, platform_name VARCHAR(128) NOT NULL, platform_type VARCHAR(32) CHECK (platform_type IN ('long_video', 'short_video', 'ai_tool', 'mcn')), api_key_hash VARCHAR(128), api_secret_hash VARCHAR(256), status VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'active', 'suspended')), quota_daily INT DEFAULT 10000, callback_url VARCHAR(256), created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_platforms_type ON platforms(platform_type); CREATE INDEX idx_platforms_status ON platforms(status); COMMENT ON TABLE platforms IS '接入平台/机构节点表,对应 MA 三级节点'; ``` ### 3.2.3 AVCC 赋码记录表 (`avcc_records`) — 核心业务表 ```sql CREATE TABLE avcc_records ( id BIGSERIAL PRIMARY KEY, avcc_full_code VARCHAR(512) NOT NULL UNIQUE, ma_root VARCHAR(8) DEFAULT 'MA', country_code VARCHAR(8) DEFAULT '156', industry_node VARCHAR(16) DEFAULT '10005', platform_node VARCHAR(16) NOT NULL, object_category VARCHAR(64) NOT NULL, license_no VARCHAR(128), review_level VARCHAR(8) CHECK (review_level IN ('P', 'G', 'O')), version INT DEFAULT 1, ai_content_hash VARCHAR(128) NOT NULL, copyright_crd VARCHAR(128), creator_id BIGINT REFERENCES creators(id), platform_id BIGINT REFERENCES platforms(id), review_status VARCHAR(32) DEFAULT 'pending' CHECK (review_status IN ('pending', 'pre_checking', 'ai_reviewing', 'human_reviewing', 'approved', 'rejected', 'revoked')), review_report_id BIGINT, circulation_status VARCHAR(32) DEFAULT 'active' CHECK (circulation_status IN ('active', 'suspended', 'revoked', 'expired')), c2pa_manifest_hash VARCHAR(128), content_storage_key VARCHAR(256), submitted_at TIMESTAMPTZ DEFAULT NOW(), reviewed_at TIMESTAMPTZ, expires_at TIMESTAMPTZ, revoked_at TIMESTAMPTZ, revoked_reason TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_avcc_platform ON avcc_records(platform_node); CREATE INDEX idx_avcc_creator ON avcc_records(creator_id); CREATE INDEX idx_avcc_review_status ON avcc_records(review_status); CREATE INDEX idx_avcc_circulation ON avcc_records(circulation_status); CREATE INDEX idx_avcc_license ON avcc_records(license_no); CREATE INDEX idx_avcc_submitted ON avcc_records(submitted_at); COMMENT ON TABLE avcc_records IS 'AVCC 赋码记录核心业务表,一码一行'; ``` ### 3.2.4 审核报告表 (`review_reports`) ```sql CREATE TABLE review_reports ( id BIGSERIAL PRIMARY KEY, avcc_record_id BIGINT REFERENCES avcc_records(id), review_type VARCHAR(16) CHECK (review_type IN ('AI', 'HUMAN', 'HYBRID')), overall_score DECIMAL(5,2), suggested_level VARCHAR(8), final_level VARCHAR(8), vision_score DECIMAL(5,2), dialogue_score DECIMAL(5,2), audio_score DECIMAL(5,2), authenticity_score DECIMAL(5,2), copyright_score DECIMAL(5,2), violations JSONB DEFAULT '[]', reviewer_id BIGINT, reviewer_comment TEXT, ai_model_version VARCHAR(64), report_json JSONB, content_fingerprint VARCHAR(128), processing_time_ms INT, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_review_avcc ON review_reports(avcc_record_id); CREATE INDEX idx_review_type ON review_reports(review_type); CREATE INDEX idx_review_created ON review_reports(created_at); COMMENT ON TABLE review_reports IS '审核报告表,AI/HUMAN/HYBRID 三类审核结果'; ``` ### 3.2.5 版权链存证表 (`chain_records`) ```sql CREATE TABLE chain_records ( id BIGSERIAL PRIMARY KEY, avcc_record_id BIGINT REFERENCES avcc_records(id), chain_type VARCHAR(32) CHECK (chain_type IN ('copyright_main', 'user_rights', 'settlement', 'iteration', 'revocation')), tx_hash VARCHAR(128) NOT NULL, block_height BIGINT, chain_node VARCHAR(64), payload_hash VARCHAR(128), payload JSONB, confirmation_count INT DEFAULT 0, status VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'confirmed', 'failed')), created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_chain_avcc ON chain_records(avcc_record_id); CREATE INDEX idx_chain_type ON chain_records(chain_type); CREATE INDEX idx_chain_tx ON chain_records(tx_hash); COMMENT ON TABLE chain_records IS '版权链存证元数据表(链上原始数据存在 ChainMaker)'; ``` ### 3.2.6 分账/清算记录表 (`settlement_records`) ```sql CREATE TABLE settlement_records ( id BIGSERIAL PRIMARY KEY, avcc_record_id BIGINT REFERENCES avcc_records(id), platform_id BIGINT REFERENCES platforms(id), period_start DATE NOT NULL, period_end DATE NOT NULL, total_revenue DECIMAL(18,4), platform_share DECIMAL(18,4), creator_share DECIMAL(18,4), model_share DECIMAL(18,4), ip_share DECIMAL(18,4), hub_fee DECIMAL(18,4), status VARCHAR(16) DEFAULT 'pending' CHECK (status IN ('pending', 'confirmed', 'disputed', 'settled', 'refunded')), dispute_reason TEXT, tx_hash VARCHAR(128), settled_at TIMESTAMPTZ, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_settlement_avcc ON settlement_records(avcc_record_id); CREATE INDEX idx_settlement_platform ON settlement_records(platform_id); CREATE INDEX idx_settlement_period ON settlement_records(period_start, period_end); COMMENT ON TABLE settlement_records IS '分账清算记录表,链上清算后同步写入'; ``` ### 3.2.7 黑名单表 (`blacklist`) ```sql CREATE TABLE blacklist ( id BIGSERIAL PRIMARY KEY, target_type VARCHAR(32) CHECK (target_type IN ('creator', 'platform', 'model', 'dataset', 'content')), target_id VARCHAR(128) NOT NULL, ma_node_code VARCHAR(64), reason TEXT NOT NULL, evidence_hash VARCHAR(128), source VARCHAR(32) CHECK (source IN ('admin', 'system', 'appeal')), operator_id BIGINT, revoked_at TIMESTAMPTZ, revoked_reason TEXT, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_blacklist_target ON blacklist(target_type, target_id); CREATE INDEX idx_blacklist_active ON blacklist(target_type, target_id) WHERE revoked_at IS NULL; COMMENT ON TABLE blacklist IS '黑名单表,支持创作者/平台/模型/数据集/内容五级黑名单'; ``` ### 3.2.8 审核任务队列表 (`review_tasks`) ```sql CREATE TABLE review_tasks ( id BIGSERIAL PRIMARY KEY, avcc_record_id BIGINT REFERENCES avcc_records(id), task_type VARCHAR(16) CHECK (task_type IN ('AI_SCREEN', 'HUMAN_FINAL', 'APPEAL')), status VARCHAR(16) DEFAULT 'queued' CHECK (status IN ('queued', 'preprocessing', 'inferencing', 'fusion', 'completed', 'failed', 'cancelled')), priority INT DEFAULT 5, -- 1-10, P类内容=1(最高), O类=10 queue_position INT, processor_node VARCHAR(64), -- 处理节点标识 started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, error_message TEXT, retry_count INT DEFAULT 0, created_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_tasks_status ON review_tasks(status); CREATE INDEX idx_tasks_priority ON review_tasks(priority, created_at); CREATE INDEX idx_tasks_avcc ON review_tasks(avcc_record_id); COMMENT ON TABLE review_tasks IS '审核任务队列,支撑优先级调度与故障重试'; ``` ### 3.2.9 API 调用日志表 (`api_call_logs`) — 镜像表 ```sql CREATE TABLE api_call_logs ( id BIGSERIAL PRIMARY KEY, platform_id BIGINT REFERENCES platforms(id), api_endpoint VARCHAR(256) NOT NULL, http_method VARCHAR(8), avcc_code VARCHAR(512), request_hash VARCHAR(128), response_status INT, latency_ms INT, client_ip INET, user_agent TEXT, called_at TIMESTAMPTZ DEFAULT NOW() ); CREATE INDEX idx_api_logs_platform ON api_call_logs(platform_id, called_at); CREATE INDEX idx_api_logs_endpoint ON api_call_logs(api_endpoint, called_at); CREATE INDEX idx_api_logs_date ON api_call_logs(called_at); COMMENT ON TABLE api_call_logs IS 'API 调用日志镜像表(详细日志写入 ClickHouse)'; ``` --- ## 3.3 Redis 缓存设计 | Key Pattern | 类型 | TTL | 说明 | |-------------|------|-----|------| | `avcc:{code}` | String | 1h | AVCC 解析结果缓存(热点码加速) | | `avcc:parse:{code}` | Hash | 30m | 解析元数据缓存 | | `platform:{id}:quota` | String | 1d | 平台日调用配额剩余 | | `platform:{id}:rate` | String | 1m | 平台限流计数(Token Bucket) | | `session:{token}` | Hash | 24h | 用户会话(创作者/运营人员) | | `blacklist:{type}:{id}` | String | 永久 | 黑名单缓存(每日全量同步) | | `review:task:{id}` | Hash | 2h | 审核任务状态缓存 | | `code:pool:{industry_node}` | String | 永久 | 码资源池剩余数量 | | `code:allocated:{platform_node}` | String | 永久 | 已分配码段索引 | | `stats:daily:{date}` | Hash | 7d | 日运营统计缓存 | ### Redis 使用场景补充 - **分布式锁**:码资源分配时使用 `SET code:lock:{platform_node} {value} NX EX 10` - **排行榜**:创作者收益排行 `ZADD leaderboard:revenue:{period} {score} {creator_id}` - **队列**:人工审核队列使用 Redis List + 优先级排序 - **发布订阅**:审核完成通知 `PUBLISH review:completed {avcc}` --- ## 3.4 ClickHouse 分析表设计 ### 3.4.1 播放与消费事件表 ```sql CREATE TABLE playback_metrics ( event_time DateTime64(3), avcc_code String, platform_node String, user_hash String, event_type Enum8('play' = 1, 'complete' = 2, 'purchase' = 3, 'share' = 4, 'comment' = 5), duration_sec Int32, revenue Decimal(18,4), device_type Enum8('mobile' = 1, 'pc' = 2, 'tv' = 3, 'other' = 4), province String ) ENGINE = MergeTree() ORDER BY (event_time, avcc_code, platform_node) PARTITION BY toYYYYMMDD(event_time) TTL event_time + INTERVAL 3 YEAR; ``` ### 3.4.2 审核性能分析表 ```sql CREATE TABLE review_metrics ( event_time DateTime64(3), review_type String, model_version String, content_type String, queue_wait_ms Int32, preprocess_ms Int32, inference_ms Int32, fusion_ms Int32, total_latency_ms Int32, result Enum8('pass' = 1, 'fail' = 2, 'manual' = 3), gpu_node String ) ENGINE = MergeTree() ORDER BY (event_time, review_type) PARTITION BY toYYYYMMDD(event_time); ``` ### 3.4.3 平台调用分析表 ```sql CREATE TABLE api_metrics ( event_time DateTime64(3), platform_id String, api_endpoint String, response_status Int16, latency_ms Int32, client_ip String, avcc_code String ) ENGINE = MergeTree() ORDER BY (event_time, platform_id, api_endpoint) PARTITION BY toYYYYMMDD(event_time) TTL event_time + INTERVAL 1 YEAR; ``` ### 3.4.4 版权链存证分析表 ```sql CREATE TABLE chain_metrics ( event_time DateTime64(3), chain_type String, tx_hash String, block_height UInt64, confirmation_ms Int32, gas_cost Decimal(18,8), payload_size Int32 ) ENGINE = MergeTree() ORDER BY (event_time, chain_type) PARTITION BY toYYYYMMDD(event_time); ``` --- ## 3.5 向量数据库设计(Milvus / pgvector) ### 3.5.1 视频指纹向量集合 ```python # 伪代码:视频指纹向量存储(使用 Milvus) collection = "video_fingerprint" schema = { "fields": [ {"name": "id", "type": "INT64", "is_primary": True, "auto_id": True}, {"name": "avcc_code", "type": "VARCHAR", "max_length": 512}, {"name": "platform_node", "type": "VARCHAR", "max_length": 64}, {"name": "fingerprint", "type": "FLOAT_VECTOR", "dim": 256}, # 感知哈希向量 {"name": "content_hash", "type": "VARCHAR", "max_length": 128}, {"name": "created_at", "type": "INT64"} # 时间戳 ] } index_params = { "metric_type": "COSINE", "index_type": "HNSW", "params": {"M": 16, "efConstruction": 200} } ``` ### 3.5.2 版权比对查询 ```python def search_similar_works(fingerprint_vector, top_k=10, threshold=0.85): """ 检索相似作品,用于版权比对与重复检测 threshold: 相似度阈值,>0.85 视为疑似侵权 """ results = milvus.search( collection_name="video_fingerprint", data=[fingerprint_vector], anns_field="fingerprint", param={"metric_type": "COSINE", "params": {"ef": 64}}, limit=top_k, output_fields=["avcc_code", "platform_node", "content_hash"] ) return [r for r in results if r.score > threshold] ``` --- ## 3.6 对象存储目录结构(MinIO) ``` aigc-hub-storage/ ├── works/ │ ├── raw/ │ │ └── {platform_node}/ │ │ └── {date}/ │ │ └── {avcc_code}/ │ │ ├── original.mp4 # 原始作品 │ │ ├── thumbnail.jpg # 缩略图 │ │ └── metadata.json # 元数据 │ ├── processed/ │ │ └── {avcc_code}/ │ │ ├── frames/ # 抽帧序列 │ │ ├── audio.wav # 音频分离 │ │ └── fingerprint.bin # 视频指纹 │ └── archived/ │ └── {year}/ │ └── {avcc_code}.tar.gz # 归档包 ├── c2pa/ │ └── {avcc_code}.c2pa # C2PA 水印文件 ├── certificates/ │ └── {avcc_code}.pdf # AVCC 证书 PDF ├── reports/ │ └── {avcc_code}/ │ ├── ai_review_report.pdf │ └── human_review_report.pdf ├── evidence/ │ └── {case_id}/ │ ├── complaint.pdf │ └── investigation/ └── backups/ └── daily/ └── {date}.sql.gz # PG 备份 ``` --- ## 3.7 数据库高可用与灾备 | 策略 | 实现方式 | RPO | RTO | |------|----------|-----|-----| | PostgreSQL | Patroni + etcd + 流复制(1主2从) | < 1s | < 30s | | Redis | Cluster 模式,3主3从,自动故障转移 | 0 | < 10s | | ClickHouse | 3节点副本,ReplicatedMergeTree | < 1min | < 5min | | MinIO | 4节点纠删码,4+2配置 | 0 | < 1min | | 区块链 | 联盟链多节点共识,5组织各1节点 | 0 | 共识自愈 | | 跨可用区 | 主集群北京,备集群上海,异步同步 | < 5min | < 15min | --- *上一章:[02-核心系统模块设计.md](02-核心系统模块设计.md)* *下一章:[04-API接口设计.md](04-API接口设计.md)*