Files
GovAI/server/migrations/011_knowledge_chunks.sql
2026-06-15 23:48:37 +08:00

29 lines
1.2 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- 知识库分片与向量检索支持
-- 需要 pgvector 扩展: CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS vector;
-- 知识文档分片表(每篇文档切分为多个 chunk,每个 chunk 存储 embedding 向量)
CREATE TABLE IF NOT EXISTS knowledge_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
kb_id UUID NOT NULL REFERENCES knowledge_bases(id) ON DELETE CASCADE,
doc_id UUID NOT NULL REFERENCES knowledge_documents(id) ON DELETE CASCADE,
chunk_index INTEGER NOT NULL DEFAULT 0,
content TEXT NOT NULL,
char_count INTEGER NOT NULL DEFAULT 0,
embedding vector(1024), -- 向量维度 1024(适配 text-embedding-v3
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- 索引
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_kb_id ON knowledge_chunks(kb_id);
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_doc_id ON knowledge_chunks(doc_id);
-- HNSW 向量索引(余弦距离)
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_embedding ON knowledge_chunks
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- 在 knowledge_documents 表添加 chunk_count 字段
ALTER TABLE knowledge_documents ADD COLUMN IF NOT EXISTS chunk_count INTEGER DEFAULT 0;