294 lines
5.9 KiB
Markdown
294 lines
5.9 KiB
Markdown
# LLM 应用开发指南 (RAG/Agent)
|
||
|
||
## RAG 系统架构
|
||
|
||
```
|
||
用户查询 → 查询改写 → 向量检索 → 重排序 → LLM生成 → 回答
|
||
↓
|
||
向量数据库
|
||
↑
|
||
文档切分 → Embedding
|
||
```
|
||
|
||
## 文档处理
|
||
|
||
```python
|
||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||
from langchain.document_loaders import PyPDFLoader, TextLoader
|
||
|
||
# 加载文档
|
||
loader = PyPDFLoader('document.pdf')
|
||
documents = loader.load()
|
||
|
||
# 文档切分
|
||
splitter = RecursiveCharacterTextSplitter(
|
||
chunk_size=500,
|
||
chunk_overlap=50,
|
||
separators=['\n\n', '\n', '。', '!', '?', ',', ' ']
|
||
)
|
||
chunks = splitter.split_documents(documents)
|
||
```
|
||
|
||
## 向量数据库
|
||
|
||
### Chroma
|
||
|
||
```python
|
||
from langchain.embeddings import HuggingFaceEmbeddings
|
||
from langchain.vectorstores import Chroma
|
||
|
||
# Embedding 模型
|
||
embeddings = HuggingFaceEmbeddings(
|
||
model_name='BAAI/bge-large-zh-v1.5',
|
||
model_kwargs={'device': 'cuda'},
|
||
encode_kwargs={'normalize_embeddings': True}
|
||
)
|
||
|
||
# 创建向量库
|
||
vectorstore = Chroma.from_documents(
|
||
documents=chunks,
|
||
embedding=embeddings,
|
||
persist_directory='./chroma_db'
|
||
)
|
||
|
||
# 检索
|
||
retriever = vectorstore.as_retriever(
|
||
search_type='similarity', # mmr
|
||
search_kwargs={'k': 5}
|
||
)
|
||
docs = retriever.get_relevant_documents('查询内容')
|
||
```
|
||
|
||
### FAISS
|
||
|
||
```python
|
||
from langchain.vectorstores import FAISS
|
||
|
||
vectorstore = FAISS.from_documents(chunks, embeddings)
|
||
vectorstore.save_local('faiss_index')
|
||
|
||
# 加载
|
||
vectorstore = FAISS.load_local('faiss_index', embeddings)
|
||
```
|
||
|
||
## RAG 实现
|
||
|
||
### 基础 RAG
|
||
|
||
```python
|
||
from langchain.chains import RetrievalQA
|
||
from langchain.llms import OpenAI
|
||
|
||
llm = OpenAI(temperature=0)
|
||
|
||
qa_chain = RetrievalQA.from_chain_type(
|
||
llm=llm,
|
||
chain_type='stuff', # map_reduce, refine
|
||
retriever=retriever,
|
||
return_source_documents=True
|
||
)
|
||
|
||
result = qa_chain({'query': '问题'})
|
||
print(result['result'])
|
||
print(result['source_documents'])
|
||
```
|
||
|
||
### 自定义 Prompt
|
||
|
||
```python
|
||
from langchain.prompts import PromptTemplate
|
||
|
||
template = """基于以下上下文回答问题。如果无法从上下文中找到答案,请说"我不知道"。
|
||
|
||
上下文:
|
||
{context}
|
||
|
||
问题: {question}
|
||
|
||
回答:"""
|
||
|
||
prompt = PromptTemplate(
|
||
template=template,
|
||
input_variables=['context', 'question']
|
||
)
|
||
|
||
qa_chain = RetrievalQA.from_chain_type(
|
||
llm=llm,
|
||
retriever=retriever,
|
||
chain_type_kwargs={'prompt': prompt}
|
||
)
|
||
```
|
||
|
||
### 对话历史
|
||
|
||
```python
|
||
from langchain.chains import ConversationalRetrievalChain
|
||
from langchain.memory import ConversationBufferMemory
|
||
|
||
memory = ConversationBufferMemory(
|
||
memory_key='chat_history',
|
||
return_messages=True
|
||
)
|
||
|
||
qa_chain = ConversationalRetrievalChain.from_llm(
|
||
llm=llm,
|
||
retriever=retriever,
|
||
memory=memory
|
||
)
|
||
|
||
result = qa_chain({'question': '问题'})
|
||
```
|
||
|
||
## Agent 开发
|
||
|
||
### ReAct Agent
|
||
|
||
```python
|
||
from langchain.agents import initialize_agent, AgentType, Tool
|
||
from langchain.tools import DuckDuckGoSearchRun
|
||
|
||
# 定义工具
|
||
search = DuckDuckGoSearchRun()
|
||
|
||
tools = [
|
||
Tool(
|
||
name='Search',
|
||
func=search.run,
|
||
description='用于搜索互联网上的信息'
|
||
),
|
||
Tool(
|
||
name='Calculator',
|
||
func=lambda x: eval(x),
|
||
description='用于数学计算'
|
||
)
|
||
]
|
||
|
||
# 创建 Agent
|
||
agent = initialize_agent(
|
||
tools=tools,
|
||
llm=llm,
|
||
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
|
||
verbose=True
|
||
)
|
||
|
||
result = agent.run('北京今天的天气怎么样?')
|
||
```
|
||
|
||
### 自定义工具
|
||
|
||
```python
|
||
from langchain.tools import BaseTool
|
||
from pydantic import BaseModel, Field
|
||
|
||
class SearchInput(BaseModel):
|
||
query: str = Field(description='搜索查询')
|
||
|
||
class CustomSearchTool(BaseTool):
|
||
name = 'custom_search'
|
||
description = '自定义搜索工具'
|
||
args_schema = SearchInput
|
||
|
||
def _run(self, query: str) -> str:
|
||
# 实现搜索逻辑
|
||
return f'搜索结果: {query}'
|
||
|
||
async def _arun(self, query: str) -> str:
|
||
return self._run(query)
|
||
```
|
||
|
||
## Prompt Engineering
|
||
|
||
### 结构化输出
|
||
|
||
```python
|
||
from langchain.output_parsers import PydanticOutputParser
|
||
from pydantic import BaseModel
|
||
|
||
class ExtractedInfo(BaseModel):
|
||
name: str
|
||
age: int
|
||
occupation: str
|
||
|
||
parser = PydanticOutputParser(pydantic_object=ExtractedInfo)
|
||
|
||
prompt = PromptTemplate(
|
||
template='从以下文本中提取信息:\n{text}\n\n{format_instructions}',
|
||
input_variables=['text'],
|
||
partial_variables={'format_instructions': parser.get_format_instructions()}
|
||
)
|
||
|
||
chain = prompt | llm | parser
|
||
result = chain.invoke({'text': '张三,30岁,软件工程师'})
|
||
```
|
||
|
||
### Few-shot Learning
|
||
|
||
```python
|
||
from langchain.prompts import FewShotPromptTemplate
|
||
|
||
examples = [
|
||
{'input': '今天天气真好', 'output': '正面'},
|
||
{'input': '服务太差了', 'output': '负面'},
|
||
{'input': '还行吧', 'output': '中性'},
|
||
]
|
||
|
||
example_prompt = PromptTemplate(
|
||
input_variables=['input', 'output'],
|
||
template='输入: {input}\n输出: {output}'
|
||
)
|
||
|
||
few_shot_prompt = FewShotPromptTemplate(
|
||
examples=examples,
|
||
example_prompt=example_prompt,
|
||
prefix='对以下文本进行情感分类:\n',
|
||
suffix='\n输入: {input}\n输出:',
|
||
input_variables=['input']
|
||
)
|
||
```
|
||
|
||
## vLLM 部署
|
||
|
||
```python
|
||
from vllm import LLM, SamplingParams
|
||
|
||
# 加载模型
|
||
llm = LLM(
|
||
model='Qwen/Qwen2-7B-Instruct',
|
||
tensor_parallel_size=1,
|
||
dtype='float16'
|
||
)
|
||
|
||
# 采样参数
|
||
sampling_params = SamplingParams(
|
||
temperature=0.7,
|
||
top_p=0.9,
|
||
max_tokens=512
|
||
)
|
||
|
||
# 批量推理
|
||
prompts = ['问题1', '问题2']
|
||
outputs = llm.generate(prompts, sampling_params)
|
||
|
||
for output in outputs:
|
||
print(output.outputs[0].text)
|
||
```
|
||
|
||
## API 服务
|
||
|
||
```python
|
||
from fastapi import FastAPI
|
||
from pydantic import BaseModel
|
||
|
||
app = FastAPI()
|
||
|
||
class Query(BaseModel):
|
||
question: str
|
||
|
||
@app.post('/chat')
|
||
async def chat(query: Query):
|
||
result = qa_chain({'query': query.question})
|
||
return {'answer': result['result']}
|
||
|
||
# 运行: uvicorn main:app --host 0.0.0.0 --port 8000
|
||
```
|