Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py,...

Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py, basic_demo/infer_test.py, basic_demo/utils.py, basic_demo/vocab.txt, basic_demo/web_demo.py, basic_demo/web_demo2.py, composite_demo/.streamlit/config.toml, composite_demo/assets/demo.png, composite_demo/assets/emojis.png, composite_demo/assets/heart.png, composite_demo/assets/tool.png, composite_demo/README.md, composite_demo/README_en.md, composite_demo/client.py, composite_demo/conversation.py, composite_demo/demo_chat.py, composite_demo/demo_ci.py, composite_demo/demo_tool.py, composite_demo/main.py, composite_demo/requirements.txt, composite_demo/tool_registry.py, cookbook/data/toutiao_cat_data_example.txt, cookbook/accurate_prompt.ipynb, cookbook/finetune_muti_classfication.ipynb, finetune_basemodel_demo/scripts/finetune_lora.sh, finetune_basemodel_demo/scripts/formate_alpaca2jsonl.py, finetune_basemodel_demo/README.md, finetune_basemodel_demo/arguments.py, finetune_basemodel_demo/finetune.py, finetune_basemodel_demo/inference.py, finetune_basemodel_demo/preprocess_utils.py, finetune_basemodel_demo/requirements.txt, finetune_basemodel_demo/trainer.py, finetune_chatmodel_demo/AdvertiseGen/dev.json, finetune_chatmodel_demo/AdvertiseGen/train.json, finetune_chatmodel_demo/configs/deepspeed.json, finetune_chatmodel_demo/formatted_data/advertise_gen.jsonl, finetune_chatmodel_demo/formatted_data/tool_alpaca.jsonl, finetune_chatmodel_demo/scripts/finetune_ds.sh, finetune_chatmodel_demo/scripts/finetune_ds_multiturn.sh, finetune_chatmodel_demo/scripts/finetune_pt.sh, finetune_chatmodel_demo/scripts/finetune_pt_multiturn.sh, finetune_chatmodel_demo/scripts/format_advertise_gen.py, finetune_chatmodel_demo/scripts/format_tool_alpaca.py, finetune_chatmodel_demo/README.md, finetune_chatmodel_demo/arguments.py, finetune_chatmodel_demo/finetune.py, finetune_chatmodel_demo/inference.py, finetune_chatmodel_demo/preprocess_utils.py, finetune_chatmodel_demo/requirements.txt, finetune_chatmodel_demo/train_data.json, finetune_chatmodel_demo/trainer.py, langchain_demo/Tool/Calculator.py, langchain_demo/Tool/Calculator.yaml, langchain_demo/Tool/Weather.py, langchain_demo/Tool/arxiv_example.yaml, langchain_demo/Tool/weather.yaml, langchain_demo/ChatGLM3.py, langchain_demo/README.md, langchain_demo/main.py, langchain_demo/requirements.txt, langchain_demo/utils.py, media/GLM.png, media/cli.png, media/transformers.jpg, openai_api_demo/openai_api.py, openai_api_demo/openai_api_request.py, openai_api_demo/requirements.txt, openai_api_demo/utils.py, resources/WECHAT.md, resources/cli-demo.png, resources/code_en.gif, resources/heart.png, resources/tool.png, resources/tool_en.png, resources/web-demo.gif, resources/web-demo2.gif, resources/web-demo2.png, resources/wechat.jpg, tool_using/README.md, tool_using/README_en.md, tool_using/cli_demo_tool.py, tool_using/openai_api_demo.py, tool_using/requirements.txt, tool_using/test.py, tool_using/tool_register.py, DEPLOYMENT.md, DEPLOYMENT_en.md, Dockerfile, MODEL_LICENSE, PROMPT.md, PROMPT_en.md, README.md, README_en.md, README_old.md, lvzhen.log, model.properties, requirements.txt files

Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py,...
Deleted basic_demo/cli_demo.py, basic_demo/cli_demo_bad_word_ids.py, basic_demo/infer_test.py, basic_demo/utils.py, basic_demo/vocab.txt, basic_demo/web_demo.py, basic_demo/web_demo2.py, composite_demo/.streamlit/config.toml, composite_demo/assets/demo.png, composite_demo/assets/emojis.png, composite_demo/assets/heart.png, composite_demo/assets/tool.png, composite_demo/README.md, composite_demo/README_en.md, composite_demo/client.py, composite_demo/conversation.py, composite_demo/demo_chat.py, composite_demo/demo_ci.py, composite_demo/demo_tool.py, composite_demo/main.py, composite_demo/requirements.txt, composite_demo/tool_registry.py, cookbook/data/toutiao_cat_data_example.txt, cookbook/accurate_prompt.ipynb, cookbook/finetune_muti_classfication.ipynb, finetune_basemodel_demo/scripts/finetune_lora.sh, finetune_basemodel_demo/scripts/formate_alpaca2jsonl.py, finetune_basemodel_demo/README.md, finetune_basemodel_demo/arguments.py, finetune_basemodel_demo/finetune.py, finetune_basemodel_demo/inference.py, finetune_basemodel_demo/preprocess_utils.py, finetune_basemodel_demo/requirements.txt, finetune_basemodel_demo/trainer.py, finetune_chatmodel_demo/AdvertiseGen/dev.json, finetune_chatmodel_demo/AdvertiseGen/train.json, finetune_chatmodel_demo/configs/deepspeed.json, finetune_chatmodel_demo/formatted_data/advertise_gen.jsonl, finetune_chatmodel_demo/formatted_data/tool_alpaca.jsonl, finetune_chatmodel_demo/scripts/finetune_ds.sh, finetune_chatmodel_demo/scripts/finetune_ds_multiturn.sh, finetune_chatmodel_demo/scripts/finetune_pt.sh, finetune_chatmodel_demo/scripts/finetune_pt_multiturn.sh, finetune_chatmodel_demo/scripts/format_advertise_gen.py, finetune_chatmodel_demo/scripts/format_tool_alpaca.py, finetune_chatmodel_demo/README.md, finetune_chatmodel_demo/arguments.py, finetune_chatmodel_demo/finetune.py, finetune_chatmodel_demo/inference.py, finetune_chatmodel_demo/preprocess_utils.py, finetune_chatmodel_demo/requirements.txt, finetune_chatmodel_demo/train_data.json, finetune_chatmodel_demo/trainer.py, langchain_demo/Tool/Calculator.py, langchain_demo/Tool/Calculator.yaml, langchain_demo/Tool/Weather.py, langchain_demo/Tool/arxiv_example.yaml, langchain_demo/Tool/weather.yaml, langchain_demo/ChatGLM3.py, langchain_demo/README.md, langchain_demo/main.py, langchain_demo/requirements.txt, langchain_demo/utils.py, media/GLM.png, media/cli.png, media/transformers.jpg, openai_api_demo/openai_api.py, openai_api_demo/openai_api_request.py, openai_api_demo/requirements.txt, openai_api_demo/utils.py, resources/WECHAT.md, resources/cli-demo.png, resources/code_en.gif, resources/heart.png, resources/tool.png, resources/tool_en.png, resources/web-demo.gif, resources/web-demo2.gif, resources/web-demo2.png, resources/wechat.jpg, tool_using/README.md, tool_using/README_en.md, tool_using/cli_demo_tool.py, tool_using/openai_api_demo.py, tool_using/requirements.txt, tool_using/test.py, tool_using/tool_register.py, DEPLOYMENT.md, DEPLOYMENT_en.md, Dockerfile, MODEL_LICENSE, PROMPT.md, PROMPT_en.md, README.md, README_en.md, README_old.md, lvzhen.log, model.properties, requirements.txt files
d0572507 · lvzhen · d7be7b1c · d7be7b1c · d7be7b1c · d7be7b1c
Commit d0572507 authored May 10, 2024 by lvzhen
20 changed files
--- a/finetune_chatmodel_demo/train_data.json
+++ b/finetune_chatmodel_demo/train_data.json
--- a/finetune_chatmodel_demo/trainer.py
+++ b/finetune_chatmodel_demo/trainer.py
-# coding=utf-8
-# Copyright 2020-present the HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-The Trainer class, to easily train a 🤗 Transformers from scratch or finetune it on a new task.
-"""
-import os
-from typing import Optional
-from transformers import Trainer
-
-import torch
-from transformers.modeling_utils import PreTrainedModel, unwrap_model
-from transformers.utils import logging
-
-logger = logging.get_logger(__name__)
-
-WEIGHTS_NAME = "pytorch_model.bin"
-TRAINING_ARGS_NAME = "training_args.bin"
-
-
-class PrefixTrainer(Trainer):
-    def __init__(self, *args, save_changed=False, **kwargs):
-        self.save_changed = save_changed
-        super().__init__(*args, **kwargs)
-
-    def _save(self, output_dir: Optional[str] = None, state_dict=None):
-        # If we are executing this function, we are the process zero, so we don't check for that.
-        output_dir = output_dir if output_dir is not None else self.args.output_dir
-        os.makedirs(output_dir, exist_ok=True)
-        logger.info(f"Saving model checkpoint to {output_dir}")
-        # Save a trained model and configuration using `save_pretrained()`.
-        # They can then be reloaded using `from_pretrained()`
-        if not isinstance(self.model, PreTrainedModel):
-            if isinstance(unwrap_model(self.model), PreTrainedModel):
-                if state_dict is None:
-                    state_dict = self.model.state_dict()
-                unwrap_model(self.model).save_pretrained(output_dir, state_dict=state_dict)
-            else:
-                logger.info("Trainer.model is not a `PreTrainedModel`, only saving its state dict.")
-                if state_dict is None:
-                    state_dict = self.model.state_dict()
-                torch.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
-        else:
-            if self.save_changed:
-                print("Saving PrefixEncoder")
-                state_dict = self.model.state_dict()
-                filtered_state_dict = {}
-                for k, v in self.model.named_parameters():
-                    if v.requires_grad:
-                        filtered_state_dict[k] = state_dict[k]
-                self.model.save_pretrained(output_dir, state_dict=filtered_state_dict)
-            else:
-                print("Saving the whole model")
-                self.model.save_pretrained(output_dir, state_dict=state_dict)
-        if self.tokenizer is not None:
-            self.tokenizer.save_pretrained(output_dir)
-
-        # Good practice: save your training arguments together with the trained model
-        torch.save(self.args, os.path.join(output_dir, TRAINING_ARGS_NAME))
--- a/langchain_demo/ChatGLM3.py
+++ b/langchain_demo/ChatGLM3.py
-import json
-from langchain.llms.base import LLM
-from transformers import AutoTokenizer, AutoModel, AutoConfig
-from typing import List, Optional
-from utils import tool_config_from_file
-
-
-class ChatGLM3(LLM):
-    max_token: int = 8192
-    do_sample: bool = False
-    temperature: float = 0.8
-    top_p = 0.8
-    tokenizer: object = None
-    model: object = None
-    history: List = []
-    tool_names: List = []
-    has_search: bool = False
-
-    def __init__(self):
-        super().__init__()
-
-    @property
-    def _llm_type(self) -> str:
-        return "ChatGLM3"
-
-    def load_model(self, model_name_or_path=None):
-        model_config = AutoConfig.from_pretrained(
-            model_name_or_path,
-            trust_remote_code=True
-        )
-        self.tokenizer = AutoTokenizer.from_pretrained(
-            model_name_or_path,
-            trust_remote_code=True
-        )
-        self.model = AutoModel.from_pretrained(
-            model_name_or_path, config=model_config, trust_remote_code=True
-        ).half().cuda()
-
-    def _tool_history(self, prompt: str):
-        ans = []
-        tool_prompts = prompt.split(
-            "You have access to the following tools:\n\n")[1].split("\n\nUse a json blob")[0].split("\n")
-
-        tool_names = [tool.split(":")[0] for tool in tool_prompts]
-        self.tool_names = tool_names
-        tools_json = []
-        for i, tool in enumerate(tool_names):
-            tool_config = tool_config_from_file(tool)
-            if tool_config:
-                tools_json.append(tool_config)
-            else:
-                ValueError(
-                    f"Tool {tool} config not found! It's description is {tool_prompts[i]}"
-                )
-
-        ans.append({
-            "role": "system",
-            "content": "Answer the following questions as best as you can. You have access to the following tools:",
-            "tools": tools_json
-        })
-        query = f"""{prompt.split("Human: ")[-1].strip()}"""
-        return ans, query
-
-    def _extract_observation(self, prompt: str):
-        return_json = prompt.split("Observation: ")[-1].split("\nThought:")[0]
-        self.history.append({
-            "role": "observation",
-            "content": return_json
-        })
-        return
-
-    def _extract_tool(self):
-        if len(self.history[-1]["metadata"]) > 0:
-            metadata = self.history[-1]["metadata"]
-            content = self.history[-1]["content"]
-            if "tool_call" in content:
-                for tool in self.tool_names:
-                    if tool in metadata:
-                        input_para = content.split("='")[-1].split("'")[0]
-                        action_json = {
-                            "action": tool,
-                            "action_input": input_para
-                        }
-                        self.has_search = True
-                        return f"""
-Action: 
-```
-{json.dumps(action_json, ensure_ascii=False)}
-```"""
-        final_answer_json = {
-            "action": "Final Answer",
-            "action_input": self.history[-1]["content"]
-        }
-        self.has_search = False
-        return f"""
-Action: 
-```
-{json.dumps(final_answer_json, ensure_ascii=False)}
-```"""
-
-    def _call(self, prompt: str, history: List = [], stop: Optional[List[str]] = ["<|user|>"]):
-        print("======")
-        print(prompt)
-        print("======")
-        if not self.has_search:
-            self.history, query = self._tool_history(prompt)
-        else:
-            self._extract_observation(prompt)
-            query = ""
-        # print("======")
-        # print(history)
-        # print("======")
-        _, self.history = self.model.chat(
-            self.tokenizer,
-            query,
-            history=self.history,
-            do_sample=self.do_sample,
-            max_length=self.max_token,
-            temperature=self.temperature,
-        )
-        response = self._extract_tool()
-        history.append((prompt, response))
-        return response
--- a/langchain_demo/README.md
+++ b/langchain_demo/README.md
-# README
-
-## 模型配置
-
-在 `main.py` 文件中，修改 `model_path = /path/to/chatglm3-6b` 路径，也可以填写 `THUDM/chatglm3-6b` 自动下载模型。
-
-## 工具添加
-
-### LangChain 已实现工具
-
-参考 [langchain](https://python.langchain.com/docs/modules/agents/tools/) 工具相关函数，在 `main.py` 中导入工具模块，例如导入 `arxiv` 工具
-
-```python
-run_tool(["arxiv"], llm, [
-    "帮我查询AgentTuning相关工作"
-])
-```
-
-#### Calculator、Weather Tool配置 
-
-如果你的 Python 环境中 `LangChain` 的版本低于  **`0.0.278`**  则需要在这两个预定义工具类中实现 `_arun` 方法
-否则将会出现 
-`TypeError: Can't instantiate abstract class Weather with abstract method _arun`
-
-示例如下：
-```python
-class Weather(BaseTool):
-    name = "weather"
-    description = "Use for searching weather at a specific location"
-
-    async def _arun(self, *args: Any, **kwargs: Any) -> Any:
-        # 用例中没有用到 arun 不予具体实现
-        pass
-```
-
-运行 `main.py` 文件
-
-```
-python main.py
-```
-
-模型会因找不到 `arxiv` 工具的 yaml 文件描述而中断，需要用户手动构建 `./Tool/arxiv.yaml` 文件。工具可以用户自行描述，也可以参考 LangChain 对该工具的描述。
-
-对 `arxiv` 这个例子而言，参考内容位于 `./Tool/arxiv_example.yaml` 文件，可参考该文件构建 `Tool/arxiv.yaml` 文件（最简单的方式修改名称即可），重新运行模型就能合理调用工具。
-
-> 有些工具需要导入 API_KEY，按照 langchain 报错添加到环境变量即可。
-
-### 自定义工具
-
-如果用户想自定义工具，可以参考 `Tool/Weather.py` 以及 `Tool/Weather.yaml` 文件，重载新的 `Tool` 类，实现其对应的 `_run()` 方法，然后在 `main.py` 中导入该工具模块，例如导入 `Weather` 工具，即可以调用
-
-```python
-# 对同一个工具调用多次
-# 需要 export SENIVERSE_KEY=<YOUR_API_KEY_HERE>
-run_tool([Weather()], llm, [
-        "今天北京天气怎么样？",
-        "What's the weather like in Shanghai today",
-])
-```
-
-## 多工具使用
-
-可以将多个工具组装在一起让模型自动选择调用，例如
-
-```python
-run_tool([Calculator(), "arxiv", Weather()], llm, [
-        "帮我检索GLM-130B相关论文",
-        "今天北京天气怎么样？",
-        "根号3减去根号二再加上4等于多少？",
-])
-```
--- a/langchain_demo/Tool/Calculator.py
+++ b/langchain_demo/Tool/Calculator.py
-import abc
-import math
-from typing import Any
-
-from langchain.tools import BaseTool
-
-
-class Calculator(BaseTool, abc.ABC):
-    name = "Calculator"
-    description = "Useful for when you need to answer questions about math"
-
-    def __init__(self):
-        super().__init__()
-
-    async def _arun(self, *args: Any, **kwargs: Any) -> Any:
-        # 用例中没有用到 arun 不予具体实现
-        pass
-
-
-    def _run(self, para: str) -> str:
-        para = para.replace("^", "**")
-        if "sqrt" in para:
-            para = para.replace("sqrt", "math.sqrt")
-        elif "log" in para:
-            para = para.replace("log", "math.log")
-        return eval(para)
-
-
-if __name__ == "__main__":
-    calculator_tool = Calculator()
-    result = calculator_tool.run("sqrt(2) + 3")
-    print(result)
--- a/langchain_demo/Tool/Calculator.yaml
+++ b/langchain_demo/Tool/Calculator.yaml
-name: Calculator
-description: Useful for when you need to answer questions about math
-parameters:
-  type: object
-  properties:
-    formula:
-      type: string
-      description: The formula to be calculated
-  required:
-    - formula
\ No newline at end of file
--- a/langchain_demo/Tool/Weather.py
+++ b/langchain_demo/Tool/Weather.py
-import os
-from typing import Any
-
-import requests
-from langchain.tools import BaseTool
-
-
-class Weather(BaseTool):
-    name = "weather"
-    description = "Use for searching weather at a specific location"
-
-    def __init__(self):
-        super().__init__()
-
-    async def _arun(self, *args: Any, **kwargs: Any) -> Any:
-        # 用例中没有用到 arun 不予具体实现
-        pass
-
-    def get_weather(self, location):
-        api_key = os.environ["SENIVERSE_KEY"]
-        url = f"https://api.seniverse.com/v3/weather/now.json?key={api_key}&location={location}&language=zh-Hans&unit=c"
-        response = requests.get(url)
-        if response.status_code == 200:
-            data = response.json()
-            weather = {
-                "temperature": data["results"][0]["now"]["temperature"],
-                "description": data["results"][0]["now"]["text"],
-            }
-            return weather
-        else:
-            raise Exception(
-                f"Failed to retrieve weather: {response.status_code}")
-
-    def _run(self, para: str) -> str:
-        return self.get_weather(para)
-
-
-if __name__ == "__main__":
-    weather_tool = Weather()
-    weather_info = weather_tool.run("成都")
-    print(weather_info)
--- a/langchain_demo/Tool/arxiv_example.yaml
+++ b/langchain_demo/Tool/arxiv_example.yaml
-name: arxiv
-description: A wrapper around Arxiv.org for searching and retrieving scientific articles in various fields.
-parameters:
-  type: object
-  properties:
-    query:
-      type: string
-      description: The search query title
-required:
-  - query
\ No newline at end of file
--- a/langchain_demo/Tool/weather.yaml
+++ b/langchain_demo/Tool/weather.yaml
-name: weather
-description: Search the current weather of a city
-parameters:
-  type: object
-  properties:
-    city:
-      type: string
-      description: City name
-  required:
-    - city
\ No newline at end of file
--- a/langchain_demo/main.py
+++ b/langchain_demo/main.py
-import os
-from typing import List
-from ChatGLM3 import ChatGLM3
-
-from langchain.agents import load_tools
-from Tool.Weather import Weather
-from Tool.Calculator import Calculator
-from langchain.agents import initialize_agent
-from langchain.agents import AgentType
-
-MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/chatglm3-6b')
-
-def run_tool(tools, llm, prompt_chain: List[str]):
-    loaded_tolls = []
-    for tool in tools:
-        if isinstance(tool, str):
-            loaded_tolls.append(load_tools([tool], llm=llm)[0])
-        else:
-            loaded_tolls.append(tool)
-    agent = initialize_agent(
-        loaded_tolls, llm,
-        agent=AgentType.STRUCTURED_CHAT_ZERO_SHOT_REACT_DESCRIPTION,
-        verbose=True,
-        handle_parsing_errors=True
-    )
-    for prompt in prompt_chain:
-        agent.run(prompt)
-
-
-if __name__ == "__main__":
-    llm = ChatGLM3()
-    llm.load_model(model_name_or_path=MODEL_PATH)
-
-    # arxiv: 单个工具调用示例 1
-    run_tool(["arxiv"], llm, [
-        "帮我查询GLM-130B相关工作"
-    ])
-
-    # weather: 单个工具调用示例 2
-    run_tool([Weather()], llm, [
-        "今天北京天气怎么样？",
-        "What's the weather like in Shanghai today",
-    ])
-
-    # calculator: 单个工具调用示例 3
-    run_tool([Calculator()], llm, [
-        "12345679乘以54等于多少？",
-        "3.14的3.14次方等于多少？",
-        "根号2加上根号三等于多少？",
-    ]),
-
-    # arxiv + weather + calculator: 多个工具结合调用
-    # run_tool([Calculator(), "arxiv", Weather()], llm, [
-    #     "帮我检索GLM-130B相关论文",
-    #     "今天北京天气怎么样？",
-    #     "根号3减去根号二再加上4等于多少？",
-    # ])
\ No newline at end of file
--- a/langchain_demo/requirements.txt
+++ b/langchain_demo/requirements.txt
-langchain
-arxiv
\ No newline at end of file
--- a/langchain_demo/utils.py
+++ b/langchain_demo/utils.py
-import os
-import yaml
-
-
-def tool_config_from_file(tool_name, directory="Tool/"):
-    """search tool yaml and return json format"""
-    for filename in os.listdir(directory):
-        if filename.endswith('.yaml') and tool_name in filename:
-            file_path = os.path.join(directory, filename)
-            with open(file_path, encoding='utf-8') as f:
-                return yaml.safe_load(f)
-    return None
--- a/lvzhen.log
+++ b/lvzhen.log
-
--- a/media/GLM.png
+++ b/media/GLM.png
--- a/media/cli.png
+++ b/media/cli.png
--- a/media/transformers.jpg
+++ b/media/transformers.jpg
--- a/model.properties
+++ b/model.properties
-# 模型唯一标识
-modelCode=474
-# 模型名称
-modelName=chatglm3-6b_pytorch
-# 模型描述
-modelDescription=基于pytorch的chatglm3-6b
-# 应用场景
-appScenario=训练,推理,对话问答,医疗,教育,科研,金融
-# 框架类型
-frameType=Pytorch,Transformers,Deepspeed
--- a/openai_api_demo/openai_api.py
+++ b/openai_api_demo/openai_api.py
-# coding=utf-8
-# Implements API for ChatGLM3-6B in OpenAI's format. (https://platform.openai.com/docs/api-reference/chat)
-# Usage: python openai_api.py
-# Visit http://localhost:8000/docs for documents.
-
-# 在OpenAI的API中，max_tokens 等价于 HuggingFace 的 max_new_tokens 而不是 max_length，。
-# 例如，对于6b模型，设置max_tokens = 8192，则会报错，因为扣除历史记录和提示词后，模型不能输出那么多的tokens。
-
-import os
-import time
-from contextlib import asynccontextmanager
-from typing import List, Literal, Optional, Union
-
-import torch
-import uvicorn
-from fastapi import FastAPI, HTTPException
-from fastapi.middleware.cors import CORSMiddleware
-from loguru import logger
-from pydantic import BaseModel, Field
-from sse_starlette.sse import EventSourceResponse
-from transformers import AutoTokenizer, AutoModel
-
-from utils import process_response, generate_chatglm3, generate_stream_chatglm3
-
-MODEL_PATH = os.environ.get('MODEL_PATH', 'THUDM/chatglm3-6b')
-TOKENIZER_PATH = os.environ.get("TOKENIZER_PATH", MODEL_PATH)
-DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-
-
-@asynccontextmanager
-async def lifespan(app: FastAPI):  # collects GPU memory
-    yield
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
-
-
-app = FastAPI(lifespan=lifespan)
-
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-
-
-class ModelCard(BaseModel):
-    id: str
-    object: str = "model"
-    created: int = Field(default_factory=lambda: int(time.time()))
-    owned_by: str = "owner"
-    root: Optional[str] = None
-    parent: Optional[str] = None
-    permission: Optional[list] = None
-
-
-class ModelList(BaseModel):
-    object: str = "list"
-    data: List[ModelCard] = []
-
-
-class FunctionCallResponse(BaseModel):
-    name: Optional[str] = None
-    arguments: Optional[str] = None
-
-
-class ChatMessage(BaseModel):
-    role: Literal["user", "assistant", "system", "function"]
-    content: str = None
-    name: Optional[str] = None
-    function_call: Optional[FunctionCallResponse] = None
-
-
-class DeltaMessage(BaseModel):
-    role: Optional[Literal["user", "assistant", "system"]] = None
-    content: Optional[str] = None
-    function_call: Optional[FunctionCallResponse] = None
-
-
-class ChatCompletionRequest(BaseModel):
-    model: str
-    messages: List[ChatMessage]
-    temperature: Optional[float] = 0.8
-    top_p: Optional[float] = 0.8
-    max_tokens: Optional[int] = None
-    stream: Optional[bool] = False
-    functions: Optional[Union[dict, List[dict]]] = None
-    # Additional parameters
-    repetition_penalty: Optional[float] = 1.1
-
-
-class ChatCompletionResponseChoice(BaseModel):
-    index: int
-    message: ChatMessage
-    finish_reason: Literal["stop", "length", "function_call"]
-
-
-class ChatCompletionResponseStreamChoice(BaseModel):
-    index: int
-    delta: DeltaMessage
-    finish_reason: Optional[Literal["stop", "length", "function_call"]]
-
-
-class UsageInfo(BaseModel):
-    prompt_tokens: int = 0
-    total_tokens: int = 0
-    completion_tokens: Optional[int] = 0
-
-
-class ChatCompletionResponse(BaseModel):
-    model: str
-    object: Literal["chat.completion", "chat.completion.chunk"]
-    choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]]
-    created: Optional[int] = Field(default_factory=lambda: int(time.time()))
-    usage: Optional[UsageInfo] = None
-
-
-@app.get("/v1/models", response_model=ModelList)
-async def list_models():
-    model_card = ModelCard(id="chatglm3-6b")
-    return ModelList(data=[model_card])
-
-
-@app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
-async def create_chat_completion(request: ChatCompletionRequest):
-    global model, tokenizer
-
-    if len(request.messages) < 1 or request.messages[-1].role == "assistant":
-        raise HTTPException(status_code=400, detail="Invalid request")
-
-    gen_params = dict(
-        messages=request.messages,
-        temperature=request.temperature,
-        top_p=request.top_p,
-        max_tokens=request.max_tokens or 1024,
-        echo=False,
-        stream=request.stream,
-        repetition_penalty=request.repetition_penalty,
-        functions=request.functions,
-    )
-
-    logger.debug(f"==== request ====\n{gen_params}")
-
-    if request.stream:
-        generate = predict(request.model, gen_params)
-        return EventSourceResponse(generate, media_type="text/event-stream")
-
-    response = generate_chatglm3(model, tokenizer, gen_params)
-
-    # Remove the first newline character
-    if response["text"].startswith("\n"):
-        response["text"] = response["text"][1:]
-    response["text"] = response["text"].strip()
-    usage = UsageInfo()
-    function_call, finish_reason = None, "stop"
-    if request.functions:
-        try:
-            function_call = process_response(response["text"], use_tool=True)
-        except:
-            logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")
-
-    if isinstance(function_call, dict):
-        finish_reason = "function_call"
-        function_call = FunctionCallResponse(**function_call)
-
-    message = ChatMessage(
-        role="assistant",
-        content=response["text"],
-        function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
-    )
-
-    logger.debug(f"==== message ====\n{message}")
-
-    choice_data = ChatCompletionResponseChoice(
-        index=0,
-        message=message,
-        finish_reason=finish_reason,
-    )
-    task_usage = UsageInfo.model_validate(response["usage"])
-    for usage_key, usage_value in task_usage.model_dump().items():
-        setattr(usage, usage_key, getattr(usage, usage_key) + usage_value)
-    return ChatCompletionResponse(model=request.model, choices=[choice_data], object="chat.completion", usage=usage)
-
-
-async def predict(model_id: str, params: dict):
-    global model, tokenizer
-
-    choice_data = ChatCompletionResponseStreamChoice(
-        index=0,
-        delta=DeltaMessage(role="assistant"),
-        finish_reason=None
-    )
-    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
-    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
-
-    previous_text = ""
-    for new_response in generate_stream_chatglm3(model, tokenizer, params):
-        decoded_unicode = new_response["text"]
-        delta_text = decoded_unicode[len(previous_text):]
-        previous_text = decoded_unicode
-
-        finish_reason = new_response["finish_reason"]
-        if len(delta_text) == 0 and finish_reason != "function_call":
-            continue
-
-        function_call = None
-        if finish_reason == "function_call":
-            try:
-                function_call = process_response(decoded_unicode, use_tool=True)
-            except:
-                logger.warning("Failed to parse tool call, maybe the response is not a tool call or have been answered.")
-
-        if isinstance(function_call, dict):
-            function_call = FunctionCallResponse(**function_call)
-
-        delta = DeltaMessage(
-            content=delta_text,
-            role="assistant",
-            function_call=function_call if isinstance(function_call, FunctionCallResponse) else None,
-        )
-
-        choice_data = ChatCompletionResponseStreamChoice(
-            index=0,
-            delta=delta,
-            finish_reason=finish_reason
-        )
-        chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
-        yield "{}".format(chunk.model_dump_json(exclude_unset=True))
-
-    choice_data = ChatCompletionResponseStreamChoice(
-        index=0,
-        delta=DeltaMessage(),
-        finish_reason="stop"
-    )
-    chunk = ChatCompletionResponse(model=model_id, choices=[choice_data], object="chat.completion.chunk")
-    yield "{}".format(chunk.model_dump_json(exclude_unset=True))
-    yield '[DONE]'
-
-
-if __name__ == "__main__":
-
-    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH, trust_remote_code=True)
-    if 'cuda' in DEVICE:  # AMD, NVIDIA GPU can use Half Precision
-        model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).to(DEVICE).eval()
-    else:  # CPU, Intel GPU and other GPU can use Float16 Precision Only
-        model = AutoModel.from_pretrained(MODEL_PATH, trust_remote_code=True).float().to(DEVICE).eval()
-    uvicorn.run(app, host='0.0.0.0', port=8000, workers=1)
--- a/openai_api_demo/openai_api_request.py
+++ b/openai_api_demo/openai_api_request.py
-# 使用curl命令测试返回
-# curl -X POST "http://127.0.0.1:8000/v1/chat/completions" \
-# -H "Content-Type: application/json" \
-# -d "{\"model\": \"chatglm3-6b\", \"messages\": [{\"role\": \"system\", \"content\": \"You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.\"}, {\"role\": \"user\", \"content\": \"你好，给我讲一个故事，大概100字\"}], \"stream\": false, \"max_tokens\": 100, \"temperature\": 0.8, \"top_p\": 0.8}"
-
-# 使用Python代码测返回
-import requests
-import json
-
-base_url = "http://127.0.0.1:8000"
-
-
-def create_chat_completion(model, messages, functions, use_stream=False):
-    data = {
-        "function": functions,  # 函数定义
-        "model": model,  # 模型名称
-        "messages": messages,  # 会话历史
-        "stream": use_stream,  # 是否流式响应
-        "max_tokens": 100,  # 最多生成字数
-        "temperature": 0.8,  # 温度
-        "top_p": 0.8,  # 采样概率
-    }
-
-    response = requests.post(f"{base_url}/v1/chat/completions", json=data, stream=use_stream)
-    if response.status_code == 200:
-        if use_stream:
-            # 处理流式响应
-            for line in response.iter_lines():
-                if line:
-                    decoded_line = line.decode('utf-8')[6:]
-                    try:
-                        response_json = json.loads(decoded_line)
-                        content = response_json.get("choices", [{}])[0].get("delta", {}).get("content", "")
-                        print(content)
-                    except:
-                        print("Special Token:", decoded_line)
-        else:
-            # 处理非流式响应
-            decoded_line = response.json()
-            content = decoded_line.get("choices", [{}])[0].get("message", "").get("content", "")
-            print(content)
-    else:
-        print("Error:", response.status_code)
-        return None
-
-
-def function_chat(use_stream=True):
-    functions = [
-        {
-            "name": "get_current_weather",
-            "description": "Get the current weather in a given location.",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city and state, e.g. Beijing",
-                    },
-                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
-                },
-                "required": ["location"],
-            },
-        }
-    ]
-    chat_messages = [
-        {
-            "role": "user",
-            "content": "波士顿天气如何？",
-        },
-        {
-            "role": "assistant",
-            "content": "get_current_weather\n ```python\ntool_call(location='Beijing', unit='celsius')\n```",
-            "function_call": {
-                "name": "get_current_weather",
-                "arguments": '{"location": "Beijing", "unit": "celsius"}',
-            },
-        },
-        {
-            "role": "function",
-            "name": "get_current_weather",
-            "content": '{"temperature": "12", "unit": "celsius", "description": "Sunny"}',
-        },
-        # ... 接下来这段是 assistant 的回复和用户的回复。
-        # {
-        #     "role": "assistant",
-        #     "content": "根据最新的天气预报，目前北京的天气情况是晴朗的，温度为12摄氏度。",
-        # },
-        # {
-        #     "role": "user",
-        #     "content": "谢谢",
-        # }
-    ]
-    create_chat_completion("chatglm3-6b", messages=chat_messages, functions=functions, use_stream=use_stream)
-
-
-def simple_chat(use_stream=True):
-    functions = None
-    chat_messages = [
-        {
-            "role": "system",
-            "content": "You are ChatGLM3, a large language model trained by Zhipu.AI. Follow the user's instructions carefully. Respond using markdown.",
-        },
-        {
-            "role": "user",
-            "content": "你好，给我讲一个故事，大概100字"
-        }
-    ]
-    create_chat_completion("chatglm3-6b", messages=chat_messages, functions=functions, use_stream=use_stream)
-
-
-if __name__ == "__main__":
-    function_chat(use_stream=False)
-    # simple_chat(use_stream=True)
--- a/openai_api_demo/requirements.txt
+++ b/openai_api_demo/requirements.txt
-openai>=1.3.0
-pydantic>=2.5.1
\ No newline at end of file