Commit d0572507 authored by lvzhen's avatar lvzhen
Browse files

Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py,...

Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py, basic_demo/infer_test.py, basic_demo/utils.py, basic_demo/vocab.txt, basic_demo/web_demo.py, basic_demo/web_demo2.py, composite_demo/.streamlit/config.toml, composite_demo/assets/demo.png, composite_demo/assets/emojis.png, composite_demo/assets/heart.png, composite_demo/assets/tool.png, composite_demo/README.md, composite_demo/README_en.md, composite_demo/client.py, composite_demo/conversation.py, composite_demo/demo_chat.py, composite_demo/demo_ci.py, composite_demo/demo_tool.py, composite_demo/main.py, composite_demo/requirements.txt, composite_demo/tool_registry.py, cookbook/data/toutiao_cat_data_example.txt, cookbook/accurate_prompt.ipynb, cookbook/finetune_muti_classfication.ipynb, finetune_basemodel_demo/scripts/finetune_lora.sh, finetune_basemodel_demo/scripts/formate_alpaca2jsonl.py, finetune_basemodel_demo/README.md, finetune_basemodel_demo/arguments.py, finetune_basemodel_demo/finetune.py, finetune_basemodel_demo/inference.py, finetune_basemodel_demo/preprocess_utils.py, finetune_basemodel_demo/requirements.txt, finetune_basemodel_demo/trainer.py, finetune_chatmodel_demo/AdvertiseGen/dev.json, finetune_chatmodel_demo/AdvertiseGen/train.json, finetune_chatmodel_demo/configs/deepspeed.json, finetune_chatmodel_demo/formatted_data/advertise_gen.jsonl, finetune_chatmodel_demo/formatted_data/tool_alpaca.jsonl, finetune_chatmodel_demo/scripts/finetune_ds.sh, finetune_chatmodel_demo/scripts/finetune_ds_multiturn.sh, finetune_chatmodel_demo/scripts/finetune_pt.sh, finetune_chatmodel_demo/scripts/finetune_pt_multiturn.sh, finetune_chatmodel_demo/scripts/format_advertise_gen.py, finetune_chatmodel_demo/scripts/format_tool_alpaca.py, finetune_chatmodel_demo/README.md, finetune_chatmodel_demo/arguments.py, finetune_chatmodel_demo/finetune.py, finetune_chatmodel_demo/inference.py, finetune_chatmodel_demo/preprocess_utils.py, finetune_chatmodel_demo/requirements.txt, finetune_chatmodel_demo/train_data.json, finetune_chatmodel_demo/trainer.py, langchain_demo/Tool/Calculator.py, langchain_demo/Tool/Calculator.yaml, langchain_demo/Tool/Weather.py, langchain_demo/Tool/arxiv_example.yaml, langchain_demo/Tool/weather.yaml, langchain_demo/ChatGLM3.py, langchain_demo/README.md, langchain_demo/main.py, langchain_demo/requirements.txt, langchain_demo/utils.py, media/GLM.png, media/cli.png, media/transformers.jpg, openai_api_demo/openai_api.py, openai_api_demo/openai_api_request.py, openai_api_demo/requirements.txt, openai_api_demo/utils.py, resources/WECHAT.md, resources/cli-demo.png, resources/code_en.gif, resources/heart.png, resources/tool.png, resources/tool_en.png, resources/web-demo.gif, resources/web-demo2.gif, resources/web-demo2.png, resources/wechat.jpg, tool_using/README.md, tool_using/README_en.md, tool_using/cli_demo_tool.py, tool_using/openai_api_demo.py, tool_using/requirements.txt, tool_using/test.py, tool_using/tool_register.py, DEPLOYMENT.md, DEPLOYMENT_en.md, Dockerfile, MODEL_LICENSE, PROMPT.md, PROMPT_en.md, README.md, README_en.md, README_old.md, lvzhen.log, model.properties, requirements.txt files
parent d7be7b1c
This source diff could not be displayed because it is too large. You can view the blob instead.
# coding=utf-8
# Copyright 2020-present the HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
The Trainer class, to easily train a 🤗 Transformers from scratch or finetune it on a new task.
"""
import os
from typing import Optional
from transformers import Trainer
import torch
from transformers.modeling_utils import PreTrainedModel, unwrap_model
from transformers.utils import logging
logger = logging.get_logger(__name__)
WEIGHTS_NAME = "pytorch_model.bin"
TRAINING_ARGS_NAME = "training_args.bin"
class PrefixTrainer(Trainer):
def __init__(self, *args, save_changed=False, **kwargs):
self.save_changed = save_changed
super().__init__(*args, **kwargs)
def _save(self, output_dir: Optional[str] = None, state_dict=None):
# If we are executing this function, we are the process zero, so we don't check for that.
output_dir = output_dir if output_dir is not None else self.args.output_dir
os.makedirs(output_dir, exist_ok=True)
logger.info(f"Saving model checkpoint to {output_dir}")
# Save a trained model and configuration using `save_pretrained()`.
# They can then be reloaded using `from_pretrained()`
if not isinstance(self.model, PreTrainedModel):
if isinstance(unwrap_model(self.model), PreTrainedModel):
if state_dict is None:
state_dict = self.model.state_dict()
unwrap_model(self.model).save_pretrained(output_dir, state_dict=state_dict)
else:
logger.info("Trainer.model is not a `PreTrainedModel`, only saving its state dict.")
if state_dict is None:
state_dict = self.model.state_dict()
torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
else:
if self.save_changed:
print("Saving PrefixEncoder")
state_dict = self.model.state_dict()
filtered_state_dict = {}
for k, v in self.model.named_parameters():
if v.requires_grad:
filtered_state_dict[k] = state_dict[k]
self.model.save_pretrained(output_dir, state_dict=filtered_state_dict)
else:
print("Saving the whole model")
self.model.save_pretrained(output_dir, state_dict=state_dict)
if self.tokenizer is not None:
self.tokenizer.save_pretrained(output_dir)
# Good practice: save your training arguments together with the trained model
torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
import json
from langchain.llms.base import LLM
from transformers import AutoTokenizer, AutoModel, AutoConfig
from typing import List, Optional
from utils import tool_config_from_file
class ChatGLM3(LLM):
max_token: int = 8192
do_sample: bool = False
temperature: float = 0.8
top_p = 0.8
tokenizer: object = None
model: object = None
history: List = []
tool_names: List = []
has_search: bool = False
def __init__(self):
super().__init__()
@property
def _llm_type(self) -> str:
return "ChatGLM3"
def load_model(self, model_name_or_path=None):
model_config = AutoConfig.from_pretrained(
model_name_or_path,
trust_remote_code=True
)
self.tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path,
trust_remote_code=True
)
self.model = AutoModel.from_pretrained(
model_name_or_path, config=model_config, trust_remote_code=True
).half().cuda()
def _tool_history(self, prompt: str):
ans = []
tool_prompts = prompt.split(
"You have access to the following tools:\n\n")[1].split("\n\nUse a json blob")[0].split("\n")
tool_names = [tool.split(":")[0] for tool in tool_prompts]
self.tool_names = tool_names
tools_json = []
for i, tool in enumerate(tool_names):
tool_config = tool_config_from_file(tool)
if tool_config:
tools_json.append(tool_config)
else:
ValueError(
f"Tool {tool} config not found! It's description is {tool_prompts[i]}"
)
ans.append({
"role": "system",
"content": "Answer the following questions as best as you can. You have access to the following tools:",
"tools": tools_json
})
query = f"""{prompt.split("Human: ")[-1].strip()}"""
return ans, query
def _extract_observation(self, prompt: str):
return_json = prompt.split("Observation: ")[-1].split("\nThought:")[0]
self.history.append({
"role": "observation",
"content": return_json
})
return
def _extract_tool(self):
if len(self.history[-1]["metadata"]) > 0:
metadata = self.history[-1]["metadata"]
content = self.history[-1]["content"]
if "tool_call" in content:
for tool in self.tool_names:
if tool in metadata:
input_para = content.split("='")[-1].split("'")[0]
action_json = {
"action": tool,
"action_input": input_para
}
self.has_search = True
return f"""
Action:
```
{json.dumps(action_json, ensure_ascii=False)}
```"""
final_answer_json = {
"action": "Final Answer",
"action_input": self.history[-1]["content"]
}
self.has_search = False
return f"""
Action:
```
{json.dumps(final_answer_json, ensure_ascii=False)}
```"""
def _call(self, prompt: str, history: List = [], stop: Optional[List[str]] = ["<|user|>"]):
print("======")
print(prompt)
print("======")
if not self.has_search:
self.history, query = self._tool_history(prompt)
else:
self._extract_observation(prompt)
query = ""
# print("======")
# print(history)
# print("======")
_, self.history = self.model.chat(
self.tokenizer,
query,
history=self.history,
do_sample=self.do_sample,
max_length=self.max_token,
temperature=self.temperature,
)
response = self._extract_tool()
history.append((prompt, response))
return response
# README
## 模型配置
`main.py` 文件中,修改 `model_path = /path/to/chatglm3-6b` 路径,也可以填写 `THUDM/chatglm3-6b` 自动下载模型。
## 工具添加
### LangChain 已实现工具
参考 [langchain](https://python.langchain.com/docs/modules/agents/tools/) 工具相关函数,在 `main.py` 中导入工具模块,例如导入 `arxiv` 工具
```python
run_tool(["arxiv"], llm, [
"帮我查询AgentTuning相关工作"
])
```
#### Calculator、Weather Tool配置
如果你的 Python 环境中 `LangChain` 的版本低于 **`0.0.278`** 则需要在这两个预定义工具类中实现 `_arun` 方法
否则将会出现
`TypeError: Can't instantiate abstract class Weather with abstract method _arun`
示例如下:
```python
class Weather(BaseTool):
name = "weather"
description = "Use for searching weather at a specific location"
async def _arun(self, *args: Any, **kwargs: Any) -> Any:
# 用例中没有用到 arun 不予具体实现
pass
```
运行 `main.py` 文件
```
python main.py
```
模型会因找不到 `arxiv` 工具的 yaml 文件描述而中断,需要用户手动构建 `./Tool/arxiv.yaml` 文件。工具可以用户自行描述,也可以参考 LangChain 对该工具的描述。
`arxiv` 这个例子而言,参考内容位于 `./Tool/arxiv_example.yaml` 文件,可参考该文件构建 `Tool/arxiv.yaml` 文件(最简单的方式修改名称即可),重新运行模型就能合理调用工具。
> 有些工具需要导入 API_KEY,按照 langchain 报错添加到环境变量即可。
### 自定义工具
如果用户想自定义工具,可以参考 `Tool/Weather.py` 以及 `Tool/Weather.yaml` 文件,重载新的 `Tool` 类,实现其对应的 `_run()` 方法,然后在 `main.py` 中导入该工具模块,例如导入 `Weather` 工具,即可以调用
```python
# 对同一个工具调用多次
# 需要 export SENIVERSE_KEY=<YOUR_API_KEY_HERE>
run_tool([Weather()], llm, [
"今天北京天气怎么样?",
"What's the weather like in Shanghai today",
])
```
## 多工具使用
可以将多个工具组装在一起让模型自动选择调用,例如
```python
run_tool([Calculator(), "arxiv", Weather()], llm, [
"帮我检索GLM-130B相关论文",
"今天北京天气怎么样?",
"根号3减去根号二再加上4等于多少?",
])
```
import abc
import math
from typing import Any
from langchain.tools import BaseTool
class Calculator(BaseTool, abc.ABC):
name = "Calculator"
description = "Useful for when you need to answer questions about math"
def __init__(self):
super().__init__()
async def _arun(self, *args: Any, **kwargs: Any) -> Any:
# 用例中没有用到 arun 不予具体实现
pass
def _run(self, para: str) -> str:
para = para.replace("^", "**")
if "sqrt" in para:
para = para.replace("sqrt", "math.sqrt")
elif "log" in para:
para = para.replace("log", "math.log")
return eval(para)
if __name__ == "__main__":
calculator_tool = Calculator()
result = calculator_tool.run("sqrt(2) + 3")
print(result)
name: Calculator
description: Useful for when you need to answer questions about math
parameters:
type: object
properties:
formula:
type: string
description: The formula to be calculated
required:
- formula
\ No newline at end of file
import os
from typing import Any
import requests
from langchain.tools import BaseTool
class Weather(BaseTool):
name = "weather"
description = "Use for searching weather at a specific location"
def __init__(self):
super().__init__()
async def _arun(self, *args: Any, **kwargs: Any) -> Any:
# 用例中没有用到 arun 不予具体实现
pass
def get_weather(self, location):
api_key = os.environ["SENIVERSE_KEY"]
url = f"https://api.seniverse.com/v3/weather/now.json?key={api_key}&location={location}&language=zh-Hans&unit=c"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
weather = {
"temperature": data["results"][0]["now"]["temperature"],
"description": data["results"][0]["now"]["text"],
}
return weather
else:
raise Exception(
f"Failed to retrieve weather: {response.status_code}")
def _run(self, para: str) -> str:
return self.get_weather(para)
if __name__ == "__main__":
weather_tool = Weather()
weather_info = weather_tool.run("成都")
print(weather_info)
name: arxiv
description: A wrapper around Arxiv.org for searching and retrieving scientific articles in various fields.
parameters:
type: object
properties:
query:
type: string
description: The search query title
required:
- query
\ No newline at end of file
name: weather
description: Search the current weather of a city
parameters:
type: object
properties:
city:
type: string
description: City name
required:
- city
\ No newline at end of file
import os
from typing import List
from ChatGLM3 import ChatGLM3
from langchain.agents import load_tools
from Tool.Weather import Weather
from Tool.Calculator import Calculator
from langchain.agents import initialize_agent
from langchain.agents import AgentType
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/chatglm3-6b')
def run_tool(tools, llm, prompt_chain: List[str]):
loaded_tolls = []
for tool in tools:
if isinstance(tool, str):
loaded_tolls.append(load_tools([tool], llm=llm)[0])
else:
loaded_tolls.append(tool)
agent = initialize_agent(
loaded_tolls, llm,
agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
verbose=True,
handle_parsing_errors=True
)
for prompt in prompt_chain:
agent.run(prompt)
if __name__ == "__main__":
llm = ChatGLM3()
llm.load_model(model_name_or_path=MODEL_PATH)
# arxiv: 单个工具调用示例 1
run_tool(["arxiv"], llm, [
"帮我查询GLM-130B相关工作"
])
# weather: 单个工具调用示例 2
run_tool([Weather()], llm, [
"今天北京天气怎么样?",
"What's the weather like in Shanghai today",
])
# calculator: 单个工具调用示例 3
run_tool([Calculator()], llm, [
"12345679乘以54等于多少?",
"3.14的3.14次方等于多少?",
"根号2加上根号三等于多少?",
]),
# arxiv + weather + calculator: 多个工具结合调用
# run_tool([Calculator(), "arxiv", Weather()], llm, [
# "帮我检索GLM-130B相关论文",
# "今天北京天气怎么样?",
# "根号3减去根号二再加上4等于多少?",
# ])
\ No newline at end of file
langchain
arxiv
\ No newline at end of file
import os
import yaml
def tool_config_from_file(tool_name, directory="Tool/"):
"""search tool yaml and return json format"""
for filename in os.listdir(directory):
if filename.endswith('.yaml') and tool_name in filename:
file_path = os.path.join(directory, filename)
with open(file_path, encoding='utf-8') as f:
return yaml.safe_load(f)
return None
# 模型唯一标识
modelCode=474
# 模型名称
modelName=chatglm3-6b_pytorch
# 模型描述
modelDescription=基于pytorch的chatglm3-6b
# 应用场景
appScenario=训练,推理,对话问答,医疗,教育,科研,金融
# 框架类型
frameType=Pytorch,Transformers,Deepspeed
# coding=utf-8
# Implements API for ChatGLM3-6B in OpenAI's format. (https://platform.openai.com/docs/api-reference/chat)
# Usage: python openai_api.py
# Visit http://localhost:8000/docs for documents.
# 在OpenAI的API中,max_tokens 等价于 HuggingFace 的 max_new_tokens 而不是 max_length,。
# 例如,对于6b模型,设置max_tokens = 8192,则会报错,因为扣除历史记录和提示词后,模型不能输出那么多的tokens。
import os
import time
from contextlib import asynccontextmanager
from typing import List, Literal, Optional, Union
import torch
import uvicorn
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from loguru import logger
from pydantic import BaseModel, Field
from sse_starlette.sse import EventSourceResponse
from transformers import AutoTokenizer, AutoModel
from utils import process_response, generate_chatglm3, generate_stream_chatglm3
MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/chatglm3-6b')
TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
@asynccontextmanager
async def lifespan(app: FastAPI): # collects GPU memory
yield
if torch.cuda.is_available():
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class ModelCard(BaseModel):
id: str
object: str = "model"
created: int = Field(default_factory=lambda: int(time.time()))
owned_by: str = "owner"
root: Optional[str] = None
parent: Optional[str] = None
permission: Optional[list] = None
class ModelList(BaseModel):
object: str = "list"
data: List[ModelCard] = []
class FunctionCallResponse(BaseModel):
name: Optional[str] = None
arguments: Optional[str] = None
class ChatMessage(BaseModel):
role: Literal["user", "assistant", "system", "function"]
content: str = None
name: Optional[str] = None
function_call: Optional[FunctionCallResponse] = None
class DeltaMessage(BaseModel):
role: Optional[Literal["user", "assistant", "system"]] = None
content: Optional[str] = None
function_call: Optional[FunctionCallResponse] = None
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 0.8
top_p: Optional[float] = 0.8
max_tokens: Optional[int] = None
stream: Optional[bool] = False
functions: Optional[Union[dict, List[dict]]] = None
# Additional parameters
repetition_penalty: Optional[float] = 1.1
class ChatCompletionResponseChoice(BaseModel):
index: int
message: ChatMessage
finish_reason: Literal["stop", "length", "function_call"]
class ChatCompletionResponseStreamChoice(BaseModel):
index: int
delta: DeltaMessage
finish_reason: Optional[Literal["stop", "length", "function_call"]]
class UsageInfo(BaseModel):
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens: Optional[int] = 0
class ChatCompletionResponse(BaseModel):
model: str
object: Literal["chat.completion", "chat.completion.chunk"]
choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
created: Optional[int] = Field(default_factory=lambda: int(time.time()))
usage: Optional[UsageInfo] = None
@app.get("/v1/models", response_model=ModelList)
async def list_models():
model_card = ModelCard(id="chatglm3-6b")
return ModelList(data=[model_card])
@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
global model, tokenizer
if len(request.messages) < 1 or request.messages[-1].role == "assistant":
raise HTTPException(status_code=400, detail="Invalid request")
gen_params = dict(
messages=request.messages,
temperature=request.temperature,
top_p=request.top_p,
max_tokens=request.max_tokens or 1024,
echo=False,
stream=request.stream,
repetition_penalty=request.repetition_penalty,
functions=request.functions,
)
logger.debug(f"==== request ====\n{gen_params}")
if request.stream:
generate = predict(request.model, gen_params)
return EventSourceResponse(generate, media_type="text/event-stream")
response = generate_chatglm3(model, tokenizer, gen_params)
# Remove the first newline character
if response["text"].startswith("\n"):
response["text"] = response["text"][1:]
response["text"] = response["text"].strip()
usage = UsageInfo()
function_call, finish_reason = None, "stop"
if request.functions:
try:
function_call = process_response(response["text"], use_tool=True)
except:
logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")
if isinstance(function_call, dict):
finish_reason = "function_call"
function_call = FunctionCallResponse(**function_call)
message = ChatMessage(
role="assistant",
content=response["text"],
function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
)
logger.debug(f"==== message ====\n{message}")
choice_data = ChatCompletionResponseChoice(
index=0,
message=message,
finish_reason=finish_reason,
)
task_usage = UsageInfo.model_validate(response["usage"])
for usage_key, usage_value in task_usage.model_dump().items():
setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)
async def predict(model_id: str, params: dict):
global model, tokenizer
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(role="assistant"),
finish_reason=None
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
previous_text = ""
for new_response in generate_stream_chatglm3(model, tokenizer, params):
decoded_unicode = new_response["text"]
delta_text = decoded_unicode[len(previous_text):]
previous_text = decoded_unicode
finish_reason = new_response["finish_reason"]
if len(delta_text) == 0 and finish_reason != "function_call":
continue
function_call = None
if finish_reason == "function_call":
try:
function_call = process_response(decoded_unicode, use_tool=True)
except:
logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")
if isinstance(function_call, dict):
function_call = FunctionCallResponse(**function_call)
delta = DeltaMessage(
content=delta_text,
role="assistant",
function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
)
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=delta,
finish_reason=finish_reason
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
choice_data = ChatCompletionResponseStreamChoice(
index=0,
delta=DeltaMessage(),
finish_reason="stop"
)
chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
yield "{}".format(chunk.model_dump_json(exclude_unset=True))
yield '[DONE]'
if __name__ == "__main__":
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH, trust_remote_code=True)
if 'cuda' in DEVICE: # AMD, NVIDIA GPU can use Half Precision
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to(DEVICE).eval()
else: # CPU, Intel GPU and other GPU can use Float16 Precision Only
model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).float().to(DEVICE).eval()
uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
# 使用curl命令测试返回
# curl -X POST "http://127.0.0.1:8000/v1/chat/completions" \
# -H "Content-Type: application/json" \
# -d "{\"model\": \"chatglm3-6b\", \"messages\": [{\"role\": \"system\", \"content\": \"You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.\"}, {\"role\": \"user\", \"content\": \"你好,给我讲一个故事,大概100字\"}], \"stream\": false, \"max_tokens\": 100, \"temperature\": 0.8, \"top_p\": 0.8}"
# 使用Python代码测返回
import requests
import json
base_url = "http://127.0.0.1:8000"
def create_chat_completion(model, messages, functions, use_stream=False):
data = {
"function": functions, # 函数定义
"model": model, # 模型名称
"messages": messages, # 会话历史
"stream": use_stream, # 是否流式响应
"max_tokens": 100, # 最多生成字数
"temperature": 0.8, # 温度
"top_p": 0.8, # 采样概率
}
response = requests.post(f"{base_url}/v1/chat/completions", json=data, stream=use_stream)
if response.status_code == 200:
if use_stream:
# 处理流式响应
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')[6:]
try:
response_json = json.loads(decoded_line)
content = response_json.get("choices", [{}])[0].get("delta", {}).get("content", "")
print(content)
except:
print("Special Token:", decoded_line)
else:
# 处理非流式响应
decoded_line = response.json()
content = decoded_line.get("choices", [{}])[0].get("message", "").get("content", "")
print(content)
else:
print("Error:", response.status_code)
return None
def function_chat(use_stream=True):
functions = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location.",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state, e.g. Beijing",
},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
},
"required": ["location"],
},
}
]
chat_messages = [
{
"role": "user",
"content": "波士顿天气如何?",
},
{
"role": "assistant",
"content": "get_current_weather\n ```python\ntool_call(location='Beijing', unit='celsius')\n```",
"function_call": {
"name": "get_current_weather",
"arguments": '{"location": "Beijing", "unit": "celsius"}',
},
},
{
"role": "function",
"name": "get_current_weather",
"content": '{"temperature": "12", "unit": "celsius", "description": "Sunny"}',
},
# ... 接下来这段是 assistant 的回复和用户的回复。
# {
# "role": "assistant",
# "content": "根据最新的天气预报,目前北京的天气情况是晴朗的,温度为12摄氏度。",
# },
# {
# "role": "user",
# "content": "谢谢",
# }
]
create_chat_completion("chatglm3-6b", messages=chat_messages, functions=functions, use_stream=use_stream)
def simple_chat(use_stream=True):
functions = None
chat_messages = [
{
"role": "system",
"content": "You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.",
},
{
"role": "user",
"content": "你好,给我讲一个故事,大概100字"
}
]
create_chat_completion("chatglm3-6b", messages=chat_messages, functions=functions, use_stream=use_stream)
if __name__ == "__main__":
function_chat(use_stream=False)
# simple_chat(use_stream=True)
openai>=1.3.0
pydantic>=2.5.1
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment