17 lines
788 B
Python
17 lines
788 B
Python
from langchain_community.document_loaders import TextLoader
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
|
from langchain_chroma import Chroma
|
|
import os
|
|
from langchain_community.embeddings import DashScopeEmbeddings
|
|
|
|
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'development')
|
|
DIR_NAME = os.path.dirname(__file__)
|
|
path = os.path.join(DIR_NAME, '..','config',ENVIRONMENT,"rag.txt")
|
|
loader = TextLoader(file_path=path)
|
|
|
|
docs = loader.load()
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=100)
|
|
splits = text_splitter.split_documents(docs)
|
|
out_dir = os.path.join(DIR_NAME, '..', 'chroma_db')
|
|
vectorstore = Chroma.from_documents(documents=splits, embedding=DashScopeEmbeddings(), persist_directory=out_dir)
|
|
print("向量数据库更新完毕") |