First add

9eb7f37f · Rayyyyy · 9eb7f37f · 9eb7f37f · 9eb7f37f · 9eb7f37f
Commit 9eb7f37f authored Jun 06, 2024 by Rayyyyy
20 changed files
--- a/composite_demo/README_en.md
+++ b/composite_demo/README_en.md
+# GLM-4-9B Web Demo
+
+![Demo webpage](assets/demo.png)
+
+## Installation
+
+We recommend using [Conda](https://docs.conda.io/en/latest/) for environment management.
+
+Execute the following commands to create a conda environment and install the required dependencies:
+
+```bash
+conda create -n glm-4-demo python=3.12
+conda activate glm-4-demo
+pip install -r requirements.txt
+```
+
+Please note that this project requires Python 3.10 or higher.
+In addition, you need to install the Jupyter kernel to use the Code Interpreter:
+
+```bash
+ipython kernel install --name glm-4-demo --user
+```
+
+You can modify `~/.local/share/jupyter/kernels/glm-4-demo/kernel.json` to change the configuration of the Jupyter
+kernel, including the kernel startup parameters. For example, if you want to use Matplotlib to draw when using the
+Python code execution capability of All Tools, you can add `"--matplotlib=inline"` to the `argv` array.
+
+To use the browser and search functions, you also need to start the browser backend. First, install Node.js according to
+the instructions on the [Node.js](https://nodejs.org/en/download/package-manager)
+official website, then install the package manager [PNPM](https://pnpm.io) and then install the browser service
+dependencies:
+
+```bash
+cd browser
+npm install -g pnpm
+pnpm install
+```
+
+## Run
+
+1. Modify `BING_SEARCH_API_KEY` in `browser/src/config.ts` to configure the Bing Search API Key that the browser service
+   needs to use:
+
+```diff
+--- a/browser/src/config.ts
+++ b/browser/src/config.ts
+@@ -3,7 +3,7 @@ export default {
+
+BROWSER_TIMEOUT: 10000,
+BING_SEARCH_API_URL: 'https://api.bing.microsoft.com/v7.0',
+- BING_SEARCH_API_KEY: '',
+ BING_SEARCH_API_KEY: '<PUT_YOUR_BING_SEARCH_KEY_HERE>',
+
+HOST: 'localhost',
+PORT: 3000,
+```
+
+2. The Wenshengtu function needs to call the CogView API. Modify `src/tools/config.py`
+   , provide the [Zhipu AI Open Platform](https://open.bigmodel.cn) API Key required for the Wenshengtu function:
+
+```diff
+--- a/src/tools/config.py
+++ b/src/tools/config.py
+@@ -2,5 +2,5 @@ BROWSER_SERVER_URL = 'http://localhost:3000'
+
+IPYKERNEL = 'glm4-demo'
+
+-ZHIPU_AI_KEY = ''
+ZHIPU_AI_KEY = '<PUT_YOUR_ZHIPU_AI_KEY_HERE>'
+COGVIEW_MODEL = 'cogview-3'
+```
+
+3. Start the browser backend in a separate shell:
+
+```bash
+cd browser
+pnpm start
+```
+
+4. Run the following commands to load the model locally and start the demo:
+
+```bash
+streamlit run src/main.py
+```
+
+Then you can see the demo address from the command line and click it to access it. The first access requires downloading
+and loading the model, which may take some time.
+
+If you have downloaded the model locally, you can specify to load the model from the local
+by `export *_MODEL_PATH=/path/to/model`. The models that can be specified include:
+
+- `CHAT_MODEL_PATH`: used for All Tools mode and document interpretation mode, the default is `THUDM/glm-4-9b-chat`.
+
+- `VLM_MODEL_PATH`: used for VLM mode, the default is `THUDM/glm-4v-9b`.
+
+The Chat model supports reasoning using [vLLM](https://github.com/vllm-project/vllm). To use it, please install vLLM and
+set the environment variable `USE_VLLM=1`.
+
+If you need to customize the Jupyter kernel, you can specify it by `export IPYKERNEL=<kernel_name>`.
+
+## Usage
+
+GLM4 Demo has three modes:
+
+- All Tools mode
+- VLM mode
+- Text interpretation mode
+
+### All Tools mode
+
+You can enhance the model's capabilities by registering new tools in `tool_registry.py`. Just use `@register_tool`
+decorated function to complete the registration. For tool declarations, the function name is the name of the tool, and
+the function docstring
+is the description of the tool; for tool parameters, use `Annotated[typ: type, description: str, required: bool]` to
+annotate the parameter type, description, and whether it is required.
+
+For example, the registration of the `get_weather` tool is as follows:
+
+```python
+@register_tool
+def get_weather(
+        city_name: Annotated[str, 'The name of the city to be queried', True],
+) -> str:
+
+
+    """
+    Get the weather for `city_name` in the following week
+    """
+...
+```
+
+This mode is compatible with the tool registration process of ChatGLM3-6B.
+
+ Code capability, drawing capability, and networking capability have been automatically integrated. Users only need to
+  configure the corresponding Key as required.
+ System prompt words are not supported in this mode. The model will automatically build prompt words.
+
+## Text interpretation mode
+
+Users can upload documents and use the long text capability of GLM-4-9B to understand the text. It can parse pptx, docx,
+pdf and other files.
+
+ Tool calls and system prompt words are not supported in this mode.
+ If the text is very long, the model may require a high amount of video memory. Please confirm your hardware
+  configuration.
+
+## Image Understanding Mode
+
+Users can upload images and use the image understanding capabilities of GLM-4-9B to understand the images.
+
+ This mode must use the glm-4v-9b model.
+ Tool calls and system prompts are not supported in this mode.
+ The model can only understand and communicate with one image. If you need to change the image, you need to open a new
+  conversation.
+ The supported image resolution is 1120 x 1120
--- a/composite_demo/assets/cogview.png
+++ b/composite_demo/assets/cogview.png
--- a/composite_demo/assets/demo.png
+++ b/composite_demo/assets/demo.png
--- a/composite_demo/assets/doc_reader.png
+++ b/composite_demo/assets/doc_reader.png
--- a/composite_demo/assets/tool.png
+++ b/composite_demo/assets/tool.png
--- a/composite_demo/assets/vlm.png
+++ b/composite_demo/assets/vlm.png
--- a/composite_demo/assets/weather.png
+++ b/composite_demo/assets/weather.png
--- a/composite_demo/assets/web_plot_1.png
+++ b/composite_demo/assets/web_plot_1.png
--- a/composite_demo/assets/web_plot_2.png
+++ b/composite_demo/assets/web_plot_2.png
--- a/composite_demo/browser/.gitignore
+++ b/composite_demo/browser/.gitignore
+# Created by https://www.toptal.com/developers/gitignore/api/node
+# Edit at https://www.toptal.com/developers/gitignore?templates=node
+
+### Node ###
+# Logs
+logs
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+lerna-debug.log*
+.pnpm-debug.log*
+
+# Diagnostic reports (https://nodejs.org/api/report.html)
+report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
+
+# Runtime data
+pids
+*.pid
+*.seed
+*.pid.lock
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+*.lcov
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# Bower dependency directory (https://bower.io/)
+bower_components
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (https://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules/
+jspm_packages/
+
+# Snowpack dependency directory (https://snowpack.dev/)
+web_modules/
+
+# TypeScript cache
+*.tsbuildinfo
+
+# Optional npm cache directory
+.npm
+
+# Optional eslint cache
+.eslintcache
+
+# Optional stylelint cache
+.stylelintcache
+
+# Microbundle cache
+.rpt2_cache/
+.rts2_cache_cjs/
+.rts2_cache_es/
+.rts2_cache_umd/
+
+# Optional REPL history
+.node_repl_history
+
+# Output of 'npm pack'
+*.tgz
+
+# Yarn Integrity file
+.yarn-integrity
+
+# dotenv environment variable files
+.env
+.env.development.local
+.env.test.local
+.env.production.local
+.env.local
+
+# parcel-bundler cache (https://parceljs.org/)
+.cache
+.parcel-cache
+
+# Next.js build output
+.next
+out
+
+# Nuxt.js build / generate output
+.nuxt
+dist
+
+# Gatsby files
+.cache/
+# Comment in the public line in if your project uses Gatsby and not Next.js
+# https://nextjs.org/blog/next-9-1#public-directory-support
+# public
+
+# vuepress build output
+.vuepress/dist
+
+# vuepress v2.x temp and cache directory
+.temp
+
+# Docusaurus cache and generated files
+.docusaurus
+
+# Serverless directories
+.serverless/
+
+# FuseBox cache
+.fusebox/
+
+# DynamoDB Local files
+.dynamodb/
+
+# TernJS port file
+.tern-port
+
+# Stores VSCode versions used for testing VSCode extensions
+.vscode-test
+
+# yarn v2
+.yarn/cache
+.yarn/unplugged
+.yarn/build-state.yml
+.yarn/install-state.gz
+.pnp.*
+
+### Node Patch ###
+# Serverless Webpack directories
+.webpack/
+
+# Optional stylelint cache
+
+# SvelteKit build / generate output
+.svelte-kit
+
+# End of https://www.toptal.com/developers/gitignore/api/node
\ No newline at end of file
--- a/composite_demo/browser/package-lock.json
+++ b/composite_demo/browser/package-lock.json
--- a/composite_demo/browser/package.json
+++ b/composite_demo/browser/package.json
+{
+  "name": "glm4-browser",
+  "version": "1.0.0",
+  "description": "Browser system for GLM-4",
+  "main": "src/server.ts",
+  "scripts": {
+    "dev": "npx nodemon src/server",
+    "start": "npx ts-node src/server.ts"
+  },
+  "license": "MIT",
+  "dependencies": {
+    "express": "^4.18.3",
+    "jsdom": "^24.0.0",
+    "pnpm": "^9.1.2",
+    "turndown": "^7.1.2",
+    "winston": "^3.11.0"
+  },
+  "devDependencies": {
+    "@types/express": "^4.17.21",
+    "@types/jsdom": "^21.1.6",
+    "@types/node": "^20.11.20",
+    "@types/turndown": "^5.0.4",
+    "nodemon": "^3.1.0",
+    "ts-node": "^10.9.2"
+  }
+}
--- a/composite_demo/browser/pnpm-lock.yaml
+++ b/composite_demo/browser/pnpm-lock.yaml
--- a/composite_demo/browser/src/browser.ts
+++ b/composite_demo/browser/src/browser.ts
--- a/composite_demo/browser/src/config.ts
+++ b/composite_demo/browser/src/config.ts
+export default {
+    LOG_LEVEL: 'debug',
+
+    BROWSER_TIMEOUT: 10000,
+    BING_SEARCH_API_URL: 'https://api.bing.microsoft.com/',
+    BING_SEARCH_API_KEY: '',
+
+    HOST: 'localhost',
+    PORT: 3000,
+};
\ No newline at end of file
--- a/composite_demo/browser/src/server.ts
+++ b/composite_demo/browser/src/server.ts
+import express, { Express, Request, Response } from 'express';
+
+import { SimpleBrowser } from './browser';
+import config from './config';
+import { logger } from './utils';
+
+const session_history: Record<string, SimpleBrowser> = {};
+
+const app: Express = express();
+
+app.use(express.json());
+
+app.post('/', async (req: Request, res: Response) => {
+  const {
+    session_id,
+    action,
+  }: {
+    session_id: string;
+    action: string;
+  } = req.body;
+  logger.info(`session_id: ${session_id}`);
+  logger.info(`action: ${action}`);
+  
+  if (!session_history[session_id]) {
+    session_history[session_id] = new SimpleBrowser();
+  }
+
+  const browser = session_history[session_id];
+
+  try {
+    res.json(await browser.action(action));
+  } catch (err) {
+    logger.error(err);
+    res.status(400).json(err);
+  }
+})
+
+process.on('SIGINT', () => {
+  process.exit(0);
+});
+
+process.on('uncaughtException', e => {
+  logger.error(e);
+});
+
+const { HOST, PORT } = config;
+
+(async () => {
+  app.listen(PORT, HOST, () => {
+    logger.info(`⚡️[server]: Server is running at http://${HOST}:${PORT}`);
+    try {
+      (<any>process).send('ready');
+    } catch (err) {}
+  });
+})();
--- a/composite_demo/browser/src/types.ts
+++ b/composite_demo/browser/src/types.ts
+export interface File {
+  id: string;
+  name: string;
+  size: number;
+}
+
+export interface Metadata {
+  files?: File[];
+  reference?: string;
+}
+
+export interface Message {
+  role: 'user' | 'assistant' | 'system' | 'observation';
+  metadata: string;
+  content: string;
+  request_metadata?: Metadata;
+}
+
+export interface ToolObservation {
+  contentType: string;
+  result: string;
+  text?: string;
+  roleMetadata?: string; // metadata for <|observation|>${metadata}
+  metadata: any; // metadata for response
+}
--- a/composite_demo/browser/src/utils.ts
+++ b/composite_demo/browser/src/utils.ts
--- a/composite_demo/browser/tsconfig.json
+++ b/composite_demo/browser/tsconfig.json
--- a/composite_demo/requirements.txt
+++ b/composite_demo/requirements.txt