29 lines
1.2 KiB
SQL
29 lines
1.2 KiB
SQL
-- 知识库分片与向量检索支持
|
||
-- 需要 pgvector 扩展: CREATE EXTENSION IF NOT EXISTS vector;
|
||
|
||
CREATE EXTENSION IF NOT EXISTS vector;
|
||
|
||
-- 知识文档分片表(每篇文档切分为多个 chunk,每个 chunk 存储 embedding 向量)
|
||
CREATE TABLE IF NOT EXISTS knowledge_chunks (
|
||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||
kb_id UUID NOT NULL REFERENCES knowledge_bases(id) ON DELETE CASCADE,
|
||
doc_id UUID NOT NULL REFERENCES knowledge_documents(id) ON DELETE CASCADE,
|
||
chunk_index INTEGER NOT NULL DEFAULT 0,
|
||
content TEXT NOT NULL,
|
||
char_count INTEGER NOT NULL DEFAULT 0,
|
||
embedding vector(1024), -- 向量维度 1024(适配 text-embedding-v3)
|
||
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||
);
|
||
|
||
-- 索引
|
||
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_kb_id ON knowledge_chunks(kb_id);
|
||
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_doc_id ON knowledge_chunks(doc_id);
|
||
|
||
-- HNSW 向量索引(余弦距离)
|
||
CREATE INDEX IF NOT EXISTS idx_knowledge_chunks_embedding ON knowledge_chunks
|
||
USING hnsw (embedding vector_cosine_ops)
|
||
WITH (m = 16, ef_construction = 64);
|
||
|
||
-- 在 knowledge_documents 表添加 chunk_count 字段
|
||
ALTER TABLE knowledge_documents ADD COLUMN IF NOT EXISTS chunk_count INTEGER DEFAULT 0;
|