Commit 396700dd authored by chenzk's avatar chenzk
Browse files

v1.0

parents
Pipeline #2603 failed with stages
in 0 seconds
{
// Set container runtime user
"build": {
"dockerfile": ".devcontainer/Dockerfile.dev",
"context": "./",
"args": {
"USERNAME": "${localEnv:USER}"
},
"options": [
"--no-cache",
"--network=host"
]
},
"updateRemoteUserUID": false,
"remoteUser": "${localEnv:USER}",
"initializeCommand": ".devcontainer/init_env.sh",
"name": "dbgpt",
"workspaceFolder": "/app",
"workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind",
"runArgs": [
"--network",
"host",
"--runtime=nvidia",
"--gpus",
"all",
"-e",
"LOCAL_DB_HOST=${localEnv:LOCAL_DB_HOST}",
"-e",
"LOCAL_DB_PASSWORD=${localEnv:LOCAL_DB_PASSWORD}",
"-e",
"MYSQL_ROOT_PASSWORD=${localEnv:MYSQL_ROOT_PASSWORD}",
"-e",
"LLM_MODEL=${localEnv:LLM_MODEL}",
"-e",
"LANGUAGE=${localEnv:LANGUAGE}",
"-e",
"PROXY_SERVER_URL=${localEnv:PROXY_SERVER_URL}",
"-e",
"HF_HOME=/app/models"
],
"mounts": [
// sharing-git-credentials see https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials
// This will enable you to work with the repository code using Git inside the Dev container.
"source=${localEnv:SSH_AUTH_SOCK},target=/run/host-services/ssh-auth.sock,type=bind",
// mount to local models
// Persist the model to avoid redundant downloads.
"source=${localWorkspaceFolder}/models,target=/app/models,type=bind"
],
"containerEnv": {
"SSH_AUTH_SOCK": "/run/host-services/ssh-auth.sock"
},
"postCreateCommand": "chmod +x /app/.devcontainer/post-create.sh && /app/.devcontainer/post-create.sh",
"customizations": {
"vscode": {
"settings": {
"extensions.verifySignature": false,
"http.proxyStrictSSL": false,
"python.linting.flake8Enabled": true,
"python.languageServer": "Pylance",
"python.linting.enabled": true,
"terminal.integrated.defaultProfile.linux": "zsh",
"terminal.integrated.shell.linux": "/bin/zsh",
"python.linting.mypyEnabled": true,
"python.linting.provider": "ruff",
"python.formatting.provider": "ruff"
},
"extensions": [
"ms-python.python",
"ms-python.isort",
"ms-python.vscode-pylance",
"ms-python.autopep8",
"ms-vscode.makefile-tools",
"ms-python.flake8",
"ms-azuretools.vscode-docker",
"ms-python.mypy-type-checker",
"charliermarsh.ruff"
]
}
}
}
\ No newline at end of file
FROM eosphorosai/dbgpt-full:latest
ARG PYTHON_VERSION=3.11
ARG PIP_INDEX_URL="https://pypi.tuna.tsinghua.edu.cn/simple"
ARG USERNAME
ARG EXTRAS="base,proxy_openai,rag,storage_chromadb, storage_elasticsearch,cuda121,hf,quant_bnb,dbgpts"
ARG DEFAULT_VENV=/opt/.uv.venv
WORKDIR /app
COPY . .
USER root
# Set the GID and UID of the container and
# add a user to prevent permission mismatches
# between the container user (root) and the host user,
# and to resolve the issue of the host user lacking write permissions.
RUN . .devcontainer/.env && \
groupadd -g $USER_GID $GROUPNAME && \
useradd -u $USER_UID -g $USER_GID -m $USERNAME && \
chown -R $USER_UID:$USER_GID /app
RUN apt-get update && apt-get install -y \
git \
curl \
wget \
python${PYTHON_VERSION}-dev \
default-libmysqlclient-dev \
ssh zsh autojump curl git-flow vim sudo \
&& python${PYTHON_VERSION} -m pip install --upgrade pip \
&& python${PYTHON_VERSION} -m pip install --upgrade pipx \
&& pipx install -i $PIP_INDEX_URL uv --global \
&& chown -R $USERNAME:$GROUPNAME $DEFAULT_VENV \
&& echo "$USERNAME ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME \
&& chmod 0440 /etc/sudoers.d/$USERNAME
USER $USERNAME
ENV UV_LINK_MODE=copy \
PIP_INDEX_URL=$PIP_INDEX_URL \
VIRTUAL_ENV=$DEFAULT_VENV \
UV_PROJECT_ENVIRONMENT=$DEFAULT_VENV \
UV_PYTHON=$DEFAULT_VENV/bin/python3 \
UV_INDEX=$PIP_INDEX_URL \
UV_DEFAULT_INDEX=$PIP_INDEX_URL
RUN sed -i "s|/app/\.venv|${FINAL_VENV_NAME}|g" /${DEFAULT_VENV}/bin/activate && \
pip config set global.index-url $PIP_INDEX_URL && \
pip config set global.trusted-host $(echo "$PIP_INDEX_URL" | sed -E 's|^https?://([^/]+).*|\1|') && \
. $DEFAULT_VENV/bin/activate && \
extras=$(echo $EXTRAS | tr ',' '\n' | while read extra; do echo "--extra $extra"; done | tr '\n' ' ') && \
uv sync -v --active --all-packages $extras --default-index $PIP_INDEX_URL && \
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/dev-requirements.txt && \
uv pip -v install --prefix $VIRTUAL_ENV -r requirements/lint-requirements.txt && \
cp .devcontainer/dbgpt.pth /opt/.uv.venv/lib/python${PYTHON_VERSION}/site-packages/dbgpt.pth && \
python -c "import dbgpt; print(dbgpt.__version__)"
\ No newline at end of file
# Developing inside a Container
Use VS Code's ​Dev Container extension to build a containerized development environment. Leverage the eosphorosai/dbgpt:latest image as the development environment to avoid repeated dependency installations and improve development efficiency.
NOTE: **Compatible with Linux and Windows Subsystem for Linux (WSL) environments only.**
# Setup
- Follow the guide [Developing inside a Container](https://code.visualstudio.com/docs/devcontainers/containers) to set up the Dev Container:
- Install the ​**Dev Containers** extension.
- Before the first launch, please execute the .devcontainer/init_env.sh script in the project root directory in **host**
- Create `models` dir in project root and download text2vec-large-chinese to models/text2vec-large-chinese
- Use the shortcut `Ctrl+Shift+P` to open the command palette, then enter `Dev Containers: Open Folder in Container`.
# Develop
After successfully starting the Dev Container, open the terminal
- Activate the virtual environment
```bash
. /opt/.uv.venv/bin/activate
```
- Customize the configuration file
You can copy the configuration file to the `.devcontainer` directory and rename it to `dev.toml` to avoid committing your personal configurations to the repository.
```bash
cp configs/dbgpt-app-config.example.toml .devcontainer/dev.toml
```
- Start the service
```bash
dbgpt start webserver --config .devcontainer/dev.toml
```
# Create A Pull Request
Please refer to [CONTRIBUTING.md](../CONTRIBUTING.md). Before executing the make script or git commit, remember to deactivate the current virtual environment in the development environment.
/app/packages/dbgpt-app/src
/app/packages/dbgpt-accelerator
/app/packages/dbgpt-accelerator/src
/app/packages/dbgpt-core/src
/app/packages/dbgpt-client/src
/app/packages/dbgpt-ext/src
/app/packages/dbgpt-serve/src
/app/packages/dbgpt-app/src
\ No newline at end of file
#!/usr/bin/env bash
OS=$(uname -s)
USERNAME="$USER"
USER_UID=$(id -u "$USER")
if [ "$OS" = "Linux" ]; then
GROUPNAME=$(id -gn "$USER")
USER_GID=$(id -g "$USER")
else
GROUPNAME="root"
USER_GID="0"
fi
printf "OS=%s\nUSERNAME=%s\nUSER_UID=%s\nGROUPNAME=%s\nUSER_GID=%s\n" \
"$OS" \
"$USERNAME" \
"$USER_UID" \
"$GROUPNAME" \
"$USER_GID" > .devcontainer/.env
# sharing-git-credentials see https://code.visualstudio.com/remote/advancedcontainers/sharing-git-credentials
init_ssh_agent(){
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
RUNNING_AGENT="$(ps -ax | grep '''ssh-agent -s''' | grep -v grep | wc -l)"
if [ "$RUNNING_AGENT" = "0" ]; then
ssh-agent -s &> $HOME/.ssh/ssh-agent
fi
eval $(cat $HOME/.ssh/ssh-agent) > /dev/null
ssh-add 2> /dev/null
echo $SSH_AUTH_SOCK
fi
# Define code block to insert (with unique identifier comment)
SSH_AGENT_CODE='# SSH Agent Auto Management[ID:ssh_agent_v1]
if [[ -z "$SSH_AUTH_SOCK" || ! -S "$SSH_AUTH_SOCK" ]]; then
RUNNING_AGENT="$(ps -ax | grep '\''ssh-agent -s'\'' | grep -v grep | wc -l)"
if [ "$RUNNING_AGENT" = "0" ]; then
ssh-agent -s &> $HOME/.ssh/ssh-agent
fi
eval $(cat $HOME/.ssh/ssh-agent) > /dev/null
ssh-add 2> /dev/null
fi
# END_SSH_AGENT_CODE'
TARGET_FILE="$HOME/.bashrc"
# Create .ssh directory if not exists
mkdir -p "$HOME/.ssh"
# Check for existing code block
if ! grep -q 'END_SSH_AGENT_CODE' "$TARGET_FILE"; then
echo "Adding SSH agent management code to ${TARGET_FILE}..."
echo "$SSH_AGENT_CODE" >> "$TARGET_FILE"
if [[ "$SHELL" == *"zsh"* ]]; then
echo "$SSH_AGENT_CODE" >> "$HOME/.zshrc"
fi
echo "Code added successfully. Please run source ${TARGET_FILE} to apply changes immediately"
else
echo "Existing SSH agent code detected, no need to add again"
fi
}
init_ssh_agent
mkdir -p models
\ No newline at end of file
#!/bin/bash
set -e
cd /app
# Install Oh My Zsh with mirror fallback
if [ ! -f ~/.oh-my-zsh/oh-my-zsh.sh ]; then
echo "Installing Oh My Zsh..."
REPO=mirrors/oh-my-zsh REMOTE=https://gitee.com/mirrors/oh-my-zsh.git sh -c "$(curl -fsSL https://gitee.com/mirrors/oh-my-zsh/raw/master/tools/install.sh)" "" --unattended
fi
# Install plugins with mirror switching
plugins=(
"zsh-users/zsh-autosuggestions"
"zsh-users/zsh-syntax-highlighting"
)
for plugin in "${plugins[@]}"; do
repo_name=$(basename $plugin)
if [ ! -d ~/.oh-my-zsh/custom/plugins/$repo_name ]; then
echo "Installing plugin: $plugin"
# Clone from GitHub with Gitee mirror fallback
git clone --depth=1 https://github.com/$plugin.git ~/.oh-my-zsh/custom/plugins/$repo_name || \
git clone --depth=1 https://gitee.com/zsh-users/$repo_name.git ~/.oh-my-zsh/custom/plugins/$repo_name
fi
done
# Install theme with mirror fallback
if [ ! -d ~/.oh-my-zsh/custom/themes/powerlevel10k ]; then
echo "Installing powerlevel10k theme..."
# Clone from GitHub with Gitee mirror fallback
git clone --depth=1 https://github.com/romkatv/powerlevel10k.git ~/.oh-my-zsh/custom/themes/powerlevel10k || \
git clone --depth=1 https://gitee.com/romkatv/powerlevel10k.git ~/.oh-my-zsh/custom/themes/powerlevel10k
fi
# Configuration section remains the same...
# Apply custom configuration
if [ -f /app/.devcontainer/zshrc-config ]; then
cp /app/.devcontainer/zshrc-config ~/.zshrc
else
# Generate basic .zshrc if no custom configuration exists
cat << EOF >> ~/.zshrc
export ZSH="\$HOME/.oh-my-zsh"
ZSH_THEME="robbyrussell"
plugins=(git zsh-autosuggestions zsh-syntax-highlighting autojump)
source \$ZSH/oh-my-zsh.sh
# Enable autojump
[[ -s /usr/share/autojump/autojump.sh ]] && source /usr/share/autojump/autojump.sh
EOF
fi
# Ensure autojump configuration is applied (even if custom configuration exists)
if ! grep -q "autojump.sh" ~/.zshrc; then
echo '[[ -s /usr/share/autojump/autojump.sh ]] && source /usr/share/autojump/autojump.sh' >> ~/.zshrc
fi
cat << EOF >> ~/.zshrc
# Add the following to ~/.zshrc
load_env() {
if [ -f /app/.env ]; then
ENV_CONTENT=$(grep -vE '^#|^$' /app/.env | xargs)
if [ -n "$ENV_CONTENT" ]; then
export $ENV_CONTENT
fi
fi
}
load_env
EOF
rm -rf .venv.make
echo "Post-create setup completed!"
\ No newline at end of file
.env
.git/
./.mypy_cache/
models/
plugins/
pilot/data
pilot/message
pilot/meta_data/alembic/versions
pilot/meta_data/dbgpt.db
logs/
venv/
.venv/
.venv.make/
web/node_modules/
web/.next/
web/.env
docs/node_modules/
build/
docs/build/
docs/Dockerfile-deploy
\ No newline at end of file
[flake8]
exclude =
.eggs/
build/
*/tests/*
*_private
max-line-length = 88
inline-quotes = "
ignore =
C408
C417
E121
E123
E126
E203
E226
E231
E24
E704
W503
W504
W605
I
N
B001
B002
B003
B004
B005
B007
B008
B009
B010
B011
B012
B013
B014
B015
B016
B017
avoid-escape = no
[settings]
# This is to make isort compatible with Black. See
# https://black.readthedocs.io/en/stable/the_black_code_style.html#how-black-wraps-lines.
line_length=88
profile=black
multi_line_output=3
include_trailing_comma=True
use_parentheses=True
float_to_top=True
filter_files=True
skip_glob=examples/notebook/*
sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER,AFTERRAY
[mypy]
exclude = /tests/
# plugins = pydantic.mypy
[mypy-dbgpt.rag.*]
strict_optional = False
ignore_missing_imports = True
follow_imports = skip
[mypy-dbgpt.app.*]
follow_imports = skip
[mypy-dbgpt.serve.*]
follow_imports = skip
[mypy-dbgpt.model.*]
follow_imports = skip
[mypy-dbgpt.util.*]
follow_imports = skip
[mypy-graphviz.*]
ignore_missing_imports = True
[mypy-cachetools.*]
ignore_missing_imports = True
[mypy-coloredlogs.*]
ignore_missing_imports = True
[mypy-termcolor.*]
ignore_missing_imports = True
[mypy-pydantic.*]
strict_optional = False
ignore_missing_imports = True
follow_imports = skip
[mypy-sentence_transformers.*]
ignore_missing_imports = True
[mypy-InstructorEmbedding.*]
ignore_missing_imports = True
[mypy-llama_index.*]
ignore_missing_imports = True
[mypy-langchain.*]
ignore_missing_imports = True
[mypy-pptx.*]
ignore_missing_imports = True
[mypy-docx.*]
ignore_missing_imports = True
[mypy-markdown.*]
ignore_missing_imports = True
[mypy-auto_gpt_plugin_template.*]
ignore_missing_imports = True
[mypy-spacy.*]
ignore_missing_imports = True
follow_imports = skip
[mypy-jieba.*]
ignore_missing_imports = True
# Storage
[mypy-msgpack.*]
ignore_missing_imports = True
[mypy-rocksdict.*]
ignore_missing_imports = True
[mypy-weaviate.*]
ignore_missing_imports = True
[mypy-pymilvus.*]
ignore_missing_imports = True
[mypy-elasticsearch.*]
ignore_missing_imports = True
[mypy-cryptography.*]
ignore_missing_imports = True
# Datasource
[mypy-pyspark.*]
ignore_missing_imports = True
[mypy-regex.*]
ignore_missing_imports = True
[mypy-sqlparse.*]
ignore_missing_imports = True
[mypy-clickhouse_connect.*]
ignore_missing_imports = True
[mypy-fastchat.protocol.api_protocol]
ignore_missing_imports = True
[mypy-neo4j.*]
ignore_missing_imports = True
# Agent
[mypy-seaborn.*]
ignore_missing_imports = True
[mypy-unstructured.*]
ignore_missing_imports = True
[mypy-rich.*]
ignore_missing_imports = True
[mypy-ollama.*]
ignore_missing_imports = True
[mypy-networkx.*]
ignore_missing_imports = True
[mypy-pypdf.*]
ignore_missing_imports = True
[mypy-qianfan.*]
ignore_missing_imports = True
\ No newline at end of file
# Please run command `pre-commit install` to install pre-commit hook
repos:
- repo: local
hooks:
- id: python-fmt
name: Python Format
entry: make fmt-check
language: system
exclude: '^dbgpt/app/static/|^web/'
types: [python]
stages: [commit]
pass_filenames: false
args: []
- id: python-test
name: Python Unit Test
entry: make test
language: system
exclude: '^dbgpt/app/static/|^web/'
types: [python]
stages: [commit]
pass_filenames: false
args: []
- id: python-test-doc
name: Python Doc Test
entry: make test-doc
language: system
exclude: '^dbgpt/app/static/|^web/'
types: [python]
stages: [commit]
pass_filenames: false
args: []
- id: python-lint-mypy
name: Python Lint mypy
entry: make mypy
language: system
exclude: '^dbgpt/app/static/|^web/'
types: [python]
stages: [commit]
pass_filenames: false
args: []
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at
[INSERT CONTACT METHOD].
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
*Community Impact*: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
*Consequence*: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
*Community Impact*: A violation through a single incident or series of
actions.
*Consequence*: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
*Community Impact*: A serious violation of community standards, including
sustained inappropriate behavior.
*Consequence*: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
*Community Impact*: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
*Consequence*: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
For answers to common questions about this code of conduct, see the FAQ at
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
[https://www.contributor-covenant.org/translations][translations].
# Contribution
First of all, thank you for considering contributing to this project.
It's people like you that make it a reality for the community. There are many ways to contribute, and we appreciate all of them.
This guide will help you get started with contributing to this project.
## Fork The Repository
1. Fork the repository you want to contribute to by clicking the "Fork" button on the project page.
2. Clone the repository to your local machine using the following command:
```
git clone https://github.com/<YOUR-GITHUB-USERNAME>/DB-GPT
```
Please replace `<YOUR-GITHUB-USERNAME>` with your GitHub username.
## Create A New Development Environment
1. Create a new virtual environment using the following command:
```
# Make sure python >= 3.10
conda create -n dbgpt_env python=3.10
conda activate dbgpt_env
```
2. Change to the project directory using the following command:
```
cd DB-GPT
```
3. Install the project from the local source using the following command:
```
# it will take some minutes
pip install -e ".[default]"
```
4. Install development requirements
```
pip install -r requirements/dev-requirements.txt
pip install -r requirements/lint-requirements.txt
```
5. Install pre-commit hooks
```
pre-commit install
```
6. Install `make` command
The `make` command has been installed by default on most Unix-based systems. If you not
have it, you can install it by searching on the internet.
## New Branch And Make Changes
1. Create a new branch for your changes using the following command:
```
git checkout -b <branch-name>
```
Please replace `<branch-name>` with a descriptive name for your branch.
2. Make your changes to the code or documentation.
3. Add tests for your changes if necessary.
4. Format your code using the following command:
```
make fmt
```
5. Run the tests using the following command:
```
make test
```
6. Check types using the following command:
```
make mypy
```
7. Check lint using the following command:
```
make fmt-check
```
8. If all checks pass, you can add and commit your changes using the following commands:
```
git add xxxx
```
make sure to replace `xxxx` with the files you want to commit.
then commit your changes using the following command:
```
git commit -m "your commit message"
```
Please replace `your commit message` with a meaningful commit message.
It will take some time to get used to the process, but it's worth it. And it will run
all git hooks and checks before you commit. If it fails, you need to fix the issues
then re-commit it.
9. Push the changes to your forked repository using the following command:
```
git push origin <branch-name>
```
## Create A Pull Request
1. Go to the GitHub website and navigate to your forked repository.
2. Click the "New pull request" button.
3. Select the branch you just pushed to and the branch you want to merge into on the original repository.
Write necessary information about your changes and click "Create pull request".
4. Wait for the project maintainer to review your changes and provide feedback.
That's it you made it 🐣⭐⭐
# Developing inside a Container
If you are using VS Code as your IDE for development, you can refer to the [configuration here](.devcontainer/README.md) to set up the Dev Containers development environment.
# User Agreement and Disclaimer
1. If you do not agree with any content of this statement, please stop using this software immediately. Once you start using this software product and service, it means that you have agreed to all the contents of this statement
2. This disclaimer applies to all users of this software. This software reserves the right to modify and update this statement at any time, and notify users in the form of Github Readme, software updates, etc. Please review regularly and abide by the latest disclaimer.
3. The original design intention of this project is to provide a basic framework/tool ​​set, mainly focusing on RAGs,Agents, AWEL, etc. To keep the project simple and easy to use, we intentionally did not integrate any form of user login, authentication or authorization mechanism.
4. If you plan to deploy this project into a production environment, it is strongly recommended to connect to existing third-party authentication services (such as OAuth, OpenID Connect, etc.) according to your specific needs, or to develop and maintain a complete set of user management and permissions yourself. control system.
5. We encourage all developers to follow best practices to keep user data secure, but this is beyond the scope of this project. Therefore, always take appropriate security measures when handling sensitive information.
6. Users are responsible for the security configuration in their applications, including but not limited to user account management, password policies, access control lists, etc.
7. The project authors and contributors are not legally responsible for any direct or indirect losses caused by the use of this software.
Please read and understand all the contents of this disclaimer carefully before using this software, thank you for your understanding and support.
MIT License
Copyright (c) 2023 magic.chen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
include LICENSE
include README.md
include requirements.txt
.DEFAULT_GOAL := help
SHELL=/bin/bash
VENV = .venv.make
# Detect the operating system and set the virtualenv bin directory
ifeq ($(OS),Windows_NT)
VENV_BIN=$(VENV)/Scripts
else
VENV_BIN=$(VENV)/bin
endif
setup: $(VENV)/bin/activate
$(VENV)/bin/activate: $(VENV)/.venv-timestamp
$(VENV)/.venv-timestamp: uv.lock
# Create new virtual environment if setup.py has changed
uv venv --python 3.11 $(VENV)
uv pip install --prefix $(VENV) ruff
uv pip install --prefix $(VENV) mypy
uv pip install --prefix $(VENV) pytest
touch $(VENV)/.venv-timestamp
testenv: $(VENV)/.testenv
$(VENV)/.testenv: $(VENV)/bin/activate
# check uv version and use appropriate parameters
if . $(VENV_BIN)/activate && uv sync --help | grep -q -- "--active"; then \
. $(VENV_BIN)/activate && uv sync --active --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts" \
--link-mode=copy; \
else \
. $(VENV_BIN)/activate && uv sync --all-packages \
--extra "base" \
--extra "proxy_openai" \
--extra "rag" \
--extra "storage_chromadb" \
--extra "dbgpts" \
--link-mode=copy; \
fi
cp .devcontainer/dbgpt.pth $(VENV)/lib/python3.11/site-packages
touch $(VENV)/.testenv
.PHONY: fmt
fmt: setup ## Format Python code
# Format code
$(VENV_BIN)/ruff format packages
$(VENV_BIN)/ruff format --exclude="examples/notebook" examples
$(VENV_BIN)/ruff format i18n
$(VENV_BIN)/ruff format scripts/update_version_all.py
$(VENV_BIN)/ruff format install_help.py
# Sort imports
$(VENV_BIN)/ruff check --select I --fix packages
$(VENV_BIN)/ruff check --select I --fix --exclude="examples/notebook" examples
$(VENV_BIN)/ruff check --select I --fix i18n
$(VENV_BIN)/ruff check --select I --fix update_version_all.py
$(VENV_BIN)/ruff check --select I --fix install_help.py
$(VENV_BIN)/ruff check --fix packages \
--exclude="packages/dbgpt-serve/src/**"
$(VENV_BIN)/ruff check --fix packages/dbgpt-serve --ignore F811,F841
# Not need to check examples/notebook
#$(VENV_BIN)/ruff check --fix --exclude="examples/notebook" examples
.PHONY: fmt-check
fmt-check: setup ## Check Python code formatting and style without making changes
$(VENV_BIN)/ruff format --check packages
$(VENV_BIN)/ruff format --check --exclude="examples/notebook" examples
$(VENV_BIN)/ruff check --select I packages
$(VENV_BIN)/ruff check --select I --exclude="examples/notebook" examples
$(VENV_BIN)/ruff check --fix packages \
--exclude="packages/dbgpt-serve/src/**"
$(VENV_BIN)/ruff check --fix packages/dbgpt-serve --ignore F811,F841
.PHONY: pre-commit
pre-commit: fmt-check test test-doc mypy ## Run formatting and unit tests before committing
test: $(VENV)/.testenv ## Run unit tests
$(VENV_BIN)/pytest --pyargs dbgpt
.PHONY: test-doc
test-doc: $(VENV)/.testenv ## Run doctests
# -k "not test_" skips tests that are not doctests.
$(VENV_BIN)/pytest --doctest-modules -k "not test_" dbgpt/core
.PHONY: mypy
mypy: $(VENV)/.testenv ## Run mypy checks
# https://github.com/python/mypy
$(VENV_BIN)/mypy --config-file .mypy.ini --ignore-missing-imports packages/dbgpt-core/
# $(VENV_BIN)/mypy --config-file .mypy.ini dbgpt/rag/ dbgpt/datasource/ dbgpt/client/ dbgpt/agent/ dbgpt/vis/ dbgpt/experimental/
# rag depends on core and storage, so we not need to check it again.
# $(VENV_BIN)/mypy --config-file .mypy.ini dbgpt/storage/
# $(VENV_BIN)/mypy --config-file .mypy.ini dbgpt/core/
# TODO: More package checks with mypy.
.PHONY: coverage
coverage: setup ## Run tests and report coverage
$(VENV_BIN)/pytest --pyargs dbgpt --cov=dbgpt
.PHONY: clean
clean: ## Clean up the environment
rm -rf $(VENV)
find . -type f -name '*.pyc' -delete
find . -type d -name '__pycache__' -delete
# find . -type d -name '.pytest_cache' -delete
find . -type d -name '.coverage' -delete
.PHONY: clean-dist
clean-dist: ## Clean up the distribution
rm -rf dist/ *.egg-info build/
.PHONY: build
build: clean-dist ## Package the project for distribution
uv build --all-packages
rm -rf dist/dbgpt_app-*
rm -rf dist/dbgpt_serve-*
.PHONY: publish
publish: build ## Upload the package to PyPI
uv publish
.PHONY: publish-test
publish-test: build ## Upload the package to PyPI
uv publish --index testpypi
.PHONY: help
help: ## Display this help screen
@echo "Available commands:"
@grep -E '^[a-z.A-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-18s\033[0m %s\n", $$1, $$2}' | sort
\ No newline at end of file
# DB-GPT: データベースとの対話を革新するプライベートLLM技術
<p align="left">
<img src="./assets/LOGO.png" width="100%" />
</p>
<div align="center">
<p>
<a href="https://github.com/eosphoros-ai/DB-GPT">
<img alt="stars" src="https://img.shields.io/github/stars/eosphoros-ai/db-gpt?style=social" />
</a>
<a href="https://github.com/eosphoros-ai/DB-GPT">
<img alt="forks" src="https://img.shields.io/github/forks/eosphoros-ai/db-gpt?style=social" />
</a>
<a href="https://opensource.org/licenses/MIT">
<img alt="License: MIT" src="https://img.shields.io/badge/License-MIT-yellow.svg" />
</a>
<a href="https://github.com/eosphoros-ai/DB-GPT/releases">
<img alt="Release Notes" src="https://img.shields.io/github/release/eosphoros-ai/DB-GPT" />
</a>
<a href="https://github.com/eosphoros-ai/DB-GPT/issues">
<img alt="Open Issues" src="https://img.shields.io/github/issues-raw/eosphoros-ai/DB-GPT" />
</a>
<a href="https://discord.gg/7uQnPuveTY">
<img alt="Discord" src="https://dcbadge.vercel.app/api/server/7uQnPuveTY?compact=true&style=flat" />
</a>
<a href="https://join.slack.com/t/slack-inu2564/shared_invite/zt-29rcnyw2b-N~ubOD9kFc7b7MDOAM1otA">
<img alt="Slack" src="https://badgen.net/badge/Slack/Join%20DB-GPT/0abd59?icon=slack" />
</a>
<a href="https://codespaces.new/eosphoros-ai/DB-GPT">
<img alt="Open in GitHub Codespaces" src="https://github.com/codespaces/badge.svg" />
</a>
</p>
[**英語**](README.md) | [**中国語**](README.zh.md) | [**Discord**](https://discord.gg/7uQnPuveTY) | [**ドキュメント**](https://docs.dbgpt.site) | [**微信**](https://github.com/eosphoros-ai/DB-GPT/blob/main/README.zh.md#%E8%81%94%E7%B3%BB%E6%88%91%E4%BB%AC) | [**コミュニティ**](https://github.com/eosphoros-ai/community) | [**論文**](https://arxiv.org/pdf/2312.17449.pdf)
</div>
## DB-GPTとは何か?
🤖 **DB-GPTは、AWEL(エージェントワークフロー式言語)とエージェントを備えたオープンソースのAIネイティブデータアプリ開発フレームワークです。**
大規模モデルの分野でのインフラを構築することを目的としており、SMMF(マルチモデル管理)、Text2SQL効果の最適化、RAGフレームワークと最適化、マルチエージェントフレームワークの協力、AWEL(エージェントワークフローのオーケストレーション)など、複数の技術機能の開発を通じて、データを使用した大規模モデルアプリケーションをよりシンプルで便利にします。
🚀 **データ3.0時代には、モデルとデータベースを基盤として、企業や開発者がより少ないコードで独自のアプリケーションを構築できます。**
### AIネイティブデータアプリ
- 🔥🔥🔥 [V0.7.0 リリース | 重要なアップグレードのセット](http://docs.dbgpt.cn/blog/db-gpt-v070-release)
- [サポート MCP Protocol](https://github.com/eosphoros-ai/DB-GPT/pull/2497)
- [サポート DeepSeek R1](https://github.com/deepseek-ai/DeepSeek-R1)
- [サポート QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
- [基本モジュールをリファクタリングする]()
- [dbgpt-app](./packages/dbgpt-app)
- [dbgpt-core](./packages/dbgpt-core)
- [dbgpt-serve](./packages/dbgpt-serve)
- [dbgpt-client](./packages/dbgpt-client)
- [dbgpt-accelerator](./packages/dbgpt-accelerator)
- [dbgpt-ext](./packages/dbgpt-ext)
![Data-awels](https://github.com/eosphoros-ai/DB-GPT/assets/17919400/37d116fc-d9dd-4efa-b4df-9ab02b22541c)
![Data-Apps](https://github.com/eosphoros-ai/DB-GPT/assets/17919400/a7bf6d65-92d1-4f0e-aaf0-259ccdde22fd)
![dashboard-images](https://github.com/eosphoros-ai/DB-GPT/assets/17919400/1849a79a-f7fd-40cf-bc9c-b117a041dd6a)
## 目次
- [紹介](#紹介)
- [インストール](#インストール)
- [特徴](#特徴)
- [貢献](#貢献)
- [連絡先](#連絡先情報)
## 紹介
DB-GPTのアーキテクチャは以下の図に示されています:
<p align="center">
<img src="./assets/dbgpt.png" width="800" />
</p>
コア機能には以下の部分が含まれます:
- **RAG(Retrieval Augmented Generation)**:現在、RAGは最も実用的に実装され、緊急に必要とされる領域です。DB-GPTは、RAGの機能を使用して知識ベースのアプリケーションを構築できるようにする、RAGに基づくフレームワークをすでに実装しています。
- **GBI(Generative Business Intelligence)**:Generative BIはDB-GPTプロジェクトのコア機能の1つであり、企業のレポート分析とビジネスインサイトを構築するための基本的なデータインテリジェンス技術を提供します。
- **ファインチューニングフレームワーク**:モデルのファインチューニングは、任意の企業が垂直およびニッチなドメインで実装するために不可欠な機能です。DB-GPTは、DB-GPTプロジェクトとシームレスに統合される完全なファインチューニングフレームワークを提供します。最近のファインチューニングの取り組みでは、Spiderデータセットに基づいて82.5%の実行精度を達成しています。
- **データ駆動型マルチエージェントフレームワーク**:DB-GPTは、データに基づいて継続的に意思決定を行い、実行するためのデータ駆動型自己進化型マルチエージェントフレームワークを提供します。
- **データファクトリー**:データファクトリーは、主に大規模モデルの時代における信頼できる知識とデータのクリーニングと処理に関するものです。
- **データソース**:DB-GPTのコア機能に生産ビジネスデータをシームレスに接続するために、さまざまなデータソースを統合します。
### サブモジュール
- [DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub) 大規模言語モデル(LLM)上での教師ありファインチューニング(SFT)を適用することにより、高性能なText-to-SQLワークフロー。
- [dbgpts](https://github.com/eosphoros-ai/dbgpts) dbgptsは、DB-GPT上で構築されたいくつかのデータアプリ、AWELオペレータ、AWELワークフローテンプレート、およびエージェントを含む公式リポジトリです。
#### Text2SQLファインチューニング
- サポートされているLLM
- [x] LLaMA
- [x] LLaMA-2
- [x] BLOOM
- [x] BLOOMZ
- [x] Falcon
- [x] Baichuan
- [x] Baichuan2
- [x] InternLM
- [x] Qwen
- [x] XVERSE
- [x] ChatGLM2
- SFT精度
2023年10月10日現在、このプロジェクトを使用して130億パラメータのオープンソースモデルをファインチューニングすることにより、SpiderデータセットでGPT-4を超える実行精度を達成しました!
[Text2SQLファインチューニングに関する詳細情報](https://github.com/eosphoros-ai/DB-GPT-Hub)
- [DB-GPT-Plugins](https://github.com/eosphoros-ai/DB-GPT-Plugins) Auto-GPTプラグインを直接実行できるDB-GPTプラグイン
- [GPT-Vis](https://github.com/eosphoros-ai/GPT-Vis) 可視化プロトコル
## インストール
![Docker](https://img.shields.io/badge/docker-%230db7ed.svg?style=for-the-badge&logo=docker&logoColor=white)
![Linux](https://img.shields.io/badge/Linux-FCC624?style=for-the-badge&logo=linux&logoColor=black)
![macOS](https://img.shields.io/badge/mac%20os-000000?style=for-the-badge&logo=macos&logoColor=F0F0F0)
![Windows](https://img.shields.io/badge/Windows-0078D6?style=for-the-badge&logo=windows&logoColor=white)
[**使用チュートリアル**](http://docs.dbgpt.site/docs/overview)
- [**インストール**](http://docs.dbgpt.site/docs/installation)
- [Docker](https://docs.dbgpt.site/docs/installation/docker)
- [ソースコード](https://docs.dbgpt.site/docs/installation/sourcecode)
- [**クイックスタート**](http://docs.dbgpt.site/docs/quickstart)
- [**アプリケーション**](http://docs.dbgpt.site/docs/operation_manual)
- [アプリの使用](https://docs.dbgpt.site/docs/application/app_usage)
- [AWELフローの使用](https://docs.dbgpt.site/docs/application/awel_flow_usage)
- [**デバッグ**](http://docs.dbgpt.site/docs/operation_manual/advanced_tutorial/debugging)
- [**高度な使用法**](https://docs.dbgpt.site/docs/application/advanced_tutorial/cli)
- [SMMF](https://docs.dbgpt.site/docs/application/advanced_tutorial/smmf)
- [ファインチューニング](https://docs.dbgpt.site/docs/application/fine_tuning_manual/dbgpt_hub)
- [AWEL](http://docs.dbgpt.cn/docs/awel/tutorial)
## 特徴
現在、私たちはいくつかの主要な機能を紹介して、現在の能力を示しています:
- **プライベートドメインQ&A&データ処理**
DB-GPTプロジェクトは、知識ベースの構築を改善し、構造化および非構造化データの両方の効率的なストレージと検索を可能にする一連の機能を提供します。これらの機能には、複数のファイル形式のアップロードのサポート、カスタムデータ抽出プラグインの統合、および大量の情報を効果的に管理するための統一されたベクトルストレージと検索機能が含まれます。
- **マルチデータソース&GBI(Generative Business Intelligence)**
DB-GPTプロジェクトは、Excel、データベース、データウェアハウスなどのさまざまなデータソースとの自然言語のシームレスな対話を容易にします。これらのソースから情報を照会および取得するプロセスを簡素化し、直感的な会話を行い、洞察を得ることができます。さらに、DB-GPTは分析レポートの生成をサポートし、ユーザーに貴重なデータの要約と解釈を提供します。
- **マルチエージェント&プラグイン**
さまざまなタスクを実行するためのカスタムプラグインのサポートを提供し、Auto-GPTプラグインモデルをネイティブにサポートしています。エージェントプロトコルは、エージェントプロトコル標準に準拠しています。
- **自動ファインチューニングText2SQL**
私たちはまた、大規模言語モデル(LLM)、Text2SQLデータセット、LoRA/QLoRA/Pturningなどのファインチューニング方法を中心に、自動ファインチューニングの軽量フレームワークを開発しました。このフレームワークは、Text-to-SQLファインチューニングをアセンブリラインのように簡単にします。[DB-GPT-Hub](https://github.com/eosphoros-ai/DB-GPT-Hub)
- **SMMF(サービス指向マルチモデル管理フレームワーク)**
私たちは、LLaMA/LLaMA2、Baichuan、ChatGLM、Wenxin、Tongyi、Zhipuなど、オープンソースおよびAPIエージェントからの数十の大規模言語モデル(LLM)を含む幅広いモデルをサポートしています。
- ニュース
- 🔥🔥🔥 [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B)
- 🔥🔥🔥 [DeepSeek-R1](https://huggingface.co/deepseek-ai/DeepSeek-R1)
- 🔥🔥🔥 [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Llama-70B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-70B)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Qwen-14B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Llama-8B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B)
- 🔥🔥🔥 [DeepSeek-R1-Distill-Qwen-1.5B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B)
- 🔥🔥🔥 [Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)
- 🔥🔥🔥 [Qwen2.5-Coder-14B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct)
- 🔥🔥🔥 [Qwen2.5-72B-Instruct](https://huggingface.co/Qwen/Qwen2.5-72B-Instruct)
- 🔥🔥🔥 [Qwen2.5-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-32B-Instruct)
- 🔥🔥🔥 [Qwen2.5-14B-Instruct](https://huggingface.co/Qwen/Qwen2.5-14B-Instruct)
- 🔥🔥🔥 [Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct)
- 🔥🔥🔥 [Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct)
- 🔥🔥🔥 [Qwen2.5-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct)
- 🔥🔥🔥 [Qwen2.5-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
- 🔥🔥🔥 [Qwen2.5-Coder-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct)
- 🔥🔥🔥 [Qwen2.5-Coder-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct)
- 🔥🔥🔥 [Meta-Llama-3.1-405B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct)
- 🔥🔥🔥 [Meta-Llama-3.1-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct)
- 🔥🔥🔥 [Meta-Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct)
- 🔥🔥🔥 [gemma-2-27b-it](https://huggingface.co/google/gemma-2-27b-it)
- 🔥🔥🔥 [gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it)
- 🔥🔥🔥 [DeepSeek-Coder-V2-Instruct](https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Instruct)
- 🔥🔥🔥 [DeepSeek-Coder-V2-Lite-Instruct](https://huggingface.co/deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct)
- 🔥🔥🔥 [Qwen2-57B-A14B-Instruct](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct)
- 🔥🔥🔥 [Qwen2-57B-A14B-Instruct](https://huggingface.co/Qwen/Qwen2-57B-A14B-Instruct)
- 🔥🔥🔥 [Qwen2-72B-Instruct](https://huggingface.co/Qwen/Qwen2-72B-Instruct)
- 🔥🔥🔥 [Qwen2-7B-Instruct](https://huggingface.co/Qwen/Qwen2-7B-Instruct)
- 🔥🔥🔥 [Qwen2-1.5B-Instruct](https://huggingface.co/Qwen/Qwen2-1.5B-Instruct)
- 🔥🔥🔥 [Qwen2-0.5B-Instruct](https://huggingface.co/Qwen/Qwen2-0.5B-Instruct)
- 🔥🔥🔥 [glm-4-9b-chat](https://huggingface.co/THUDM/glm-4-9b-chat)
- 🔥🔥🔥 [Phi-3](https://huggingface.co/collections/microsoft/phi-3-6626e15e9585a200d2d761e3)
- 🔥🔥🔥 [Yi-1.5-34B-Chat](https://huggingface.co/01-ai/Yi-1.5-34B-Chat)
- 🔥🔥🔥 [Yi-1.5-9B-Chat](https://huggingface.co/01-ai/Yi-1.5-9B-Chat)
- 🔥🔥🔥 [Yi-1.5-6B-Chat](https://huggingface.co/01-ai/Yi-1.5-6B-Chat)
- 🔥🔥🔥 [Qwen1.5-110B-Chat](https://huggingface.co/Qwen/Qwen1.5-110B-Chat)
- 🔥🔥🔥 [Qwen1.5-MoE-A2.7B-Chat](https://huggingface.co/Qwen/Qwen1.5-MoE-A2.7B-Chat)
- 🔥🔥🔥 [Meta-Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct)
- 🔥🔥🔥 [Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct)
- 🔥🔥🔥 [CodeQwen1.5-7B-Chat](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat)
- 🔥🔥🔥 [Qwen1.5-32B-Chat](https://huggingface.co/Qwen/Qwen1.5-32B-Chat)
- 🔥🔥🔥 [Starling-LM-7B-beta](https://huggingface.co/Nexusflow/Starling-LM-7B-beta)
- 🔥🔥🔥 [gemma-7b-it](https://huggingface.co/google/gemma-7b-it)
- 🔥🔥🔥 [gemma-2b-it](https://huggingface.co/google/gemma-2b-it)
- 🔥🔥🔥 [SOLAR-10.7B](https://huggingface.co/upstage/SOLAR-10.7B-Instruct-v1.0)
- 🔥🔥🔥 [Mixtral-8x7B](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1)
- 🔥🔥🔥 [Qwen-72B-Chat](https://huggingface.co/Qwen/Qwen-72B-Chat)
- 🔥🔥🔥 [Yi-34B-Chat](https://huggingface.co/01-ai/Yi-34B-Chat)
- [サポートされているLLMの詳細](http://docs.dbgpt.site/docs/modules/smmf)
- **プライバシーとセキュリティ**
私たちは、さまざまな技術を実装することにより、データのプライバシーとセキュリティを確保しています。これには、大規模モデルのプライベート化とプロキシの非識別化が含まれます。
- サポートされているデータソース
- [データソース](http://docs.dbgpt.site/docs/modules/connections)
## 画像
🌐 [AutoDLイメージ](https://www.codewithgpu.com/i/eosphoros-ai/DB-GPT/dbgpt)
### 言語切り替え
.env設定ファイルでLANGUAGEパラメータを変更して、異なる言語に切り替えることができます。デフォルトは英語です(中国語:zh、英語:en、他の言語は後で追加されます)。
## 貢献
- 新しい貢献のための詳細なガイドラインを確認するには、[貢献方法](https://github.com/eosphoros-ai/DB-GPT/blob/main/CONTRIBUTING.md)を参照してください。
### 貢献者ウォール
<a href="https://github.com/eosphoros-ai/DB-GPT/graphs/contributors">
<img src="https://contrib.rocks/image?repo=eosphoros-ai/DB-GPT&max=200" />
</a>
## ライセンス
MITライセンス(MIT)
## 引用
もし`DB-GPT`があなたの研究や開発に役立つと感じた場合、以下の論文を引用してください。
DB-GPTの全体的なアーキテクチャについて知りたい場合は、<a href="https://arxiv.org/abs/2312.17449" target="_blank">論文</a><a href="https://arxiv.org/abs/2404.10209" target="_blank">論文</a>を引用してください。
DB-GPTを使用してAgent開発に関する内容について知りたい場合は、<a href="https://arxiv.org/abs/2412.13520" target="_blank">論文</a>を引用してください。
```bibtex
@article{xue2023dbgpt,
title={DB-GPT: Empowering Database Interactions with Private Large Language Models},
author={Siqiao Xue and Caigao Jiang and Wenhui Shi and Fangyin Cheng and Keting Chen and Hongjun Yang and Zhiping Zhang and Jianshan He and Hongyang Zhang and Ganglin Wei and Wang Zhao and Fan Zhou and Danrui Qi and Hong Yi and Shaodong Liu and Faqiang Chen},
year={2023},
journal={arXiv preprint arXiv:2312.17449},
url={https://arxiv.org/abs/2312.17449}
}
@misc{huang2024romasrolebasedmultiagentdatabase,
title={ROMAS: A Role-Based Multi-Agent System for Database monitoring and Planning},
author={Yi Huang and Fangyin Cheng and Fan Zhou and Jiahui Li and Jian Gong and Hongjun Yang and Zhidong Fan and Caigao Jiang and Siqiao Xue and Faqiang Chen},
year={2024},
eprint={2412.13520},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2412.13520},
}
@inproceedings{xue2024demonstration,
title={Demonstration of DB-GPT: Next Generation Data Interaction System Empowered by Large Language Models},
author={Siqiao Xue and Danrui Qi and Caigao Jiang and Wenhui Shi and Fangyin Cheng and Keting Chen and Hongjun Yang and Zhiping Zhang and Jianshan He and Hongyang Zhang and Ganglin Wei and Wang Zhao and Fan Zhou and Hong Yi and Shaodong Liu and Hongjun Yang and Faqiang Chen},
year={2024},
booktitle = "Proceedings of the VLDB Endowment",
url={https://arxiv.org/abs/2404.10209}
}
```
## 連絡先情報
コミュニティを構築するために取り組んでいます。コミュニティの構築に関するアイデアがあれば、お気軽にお問い合わせください。
[![](https://dcbadge.vercel.app/api/server/7uQnPuveTY?compact=true&style=flat)](https://discord.gg/7uQnPuveTY)
[![Star History Chart](https://api.star-history.com/svg?repos=csunny/DB-GPT&type=Date)](https://star-history.com/#csunny/DB-GPT)
# DB-GPT
用自然语言直接查数据库,DB-GPT具备生成高精度和复杂SQL的能力,简化大模型在数据库应用中的开发,支持数据处理、问答、RAG等功能,适用于数据3.0时代的高效应用构建。
## 论文
`无`
## 模型结构
DB-GPT架构:
<div align=center>
<img src="./doc/dbgpt.png"/>
</div>
本步骤以目前最新模型QwQ32B为例,其它模型读者可以此类推进行研究,QwQ-32B采用transformer通用的Decoder-only结构。
<div align=center>
<img src="./doc/qwen.png"/>
</div>
## 算法原理
强大的基础模型+大规模强化学习=强大的推理能力,这是当前大语言模型训练的有效新方向。除了基础推理能力外,QwQ-32B还集成了与Agent相关的能力,使其能够在使用工具的同时进行批判性思考,并根据环境反馈调整推理过程。
作者暂未公布具体采用的何种强化学习算法,若为GRPO,原理如下:
算法核心点:通过反向KL散度约束,GRPO实现了更稳定的策略更新。与TRPO的硬约束不同,采用软约束形式,既能保证训练稳定性,又避免了复杂的二阶优化计算,β负责动态调节探索与利用的平衡系数。
<div align=center>
<img src="./doc/algorithm.png"/>
</div>
<div align=center>
<img src="./doc/GRPO.png"/>
</div>
GRPO算法工作流程:
<div align=center>
<img src="./doc/GRPO_flow.png"/>
</div>
## 环境配置
```
mv DB-GPT_pytorch DB-GPT # 去框架名后缀
```
### Docker(方法一)
```
docker pull image.sourcefind.cn:5000/dcu/admin/base/pytorch:2.4.1-ubuntu22.04-dtk25.04-py3.10-fixpy
# <your IMAGE ID>为以上拉取的docker的镜像ID替换,本镜像为:e77c15729879
docker run -it -p 5670:5670 --shm-size=64G -v $PWD/DB-GPT:/home/DB-GPT -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video --name dbgpt <your IMAGE ID> bash
cd /home/DB-GPT
pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple
```
### Dockerfile(方法二)
```
cd /home/DB-GPT/docker
docker build --no-cache -t dbgpt:latest .
docker run -p 5670:5670 --shm-size=64G --name dbgpt -v /opt/hyhal:/opt/hyhal:ro --privileged=true --device=/dev/kfd --device=/dev/dri/ --group-add video -v $PWD/../../DB-GPT:/home/DB-GPT -it dbgpt bash
# 若遇到Dockerfile启动的方式安装环境需要长时间等待,可注释掉里面的pip安装,启动容器后再安装python库:pip install -r requirements.txt。
cd /home/DB-GPT
pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple # 根据项目DB-GPT中的文件安装需要的库。
```
### Anaconda(方法三)
1、关于本项目DCU显卡所需的特殊深度学习库可从光合开发者社区下载安装:
- https://developer.hpccube.com/tool/
```
DTK驱动:dtk2504
python:python3.10
torch:2.4.1
torchvision:0.19.1
triton:3.0.0
vllm:0.6.2
flash-attn:2.6.1
deepspeed:0.14.2
apex:1.4.0
transformers:4.49.0
```
`Tips:以上dtk驱动、python、torch等DCU相关工具版本需要严格一一对应。`
2、其它非特殊库参照requirements.txt安装
```
cd /home/DB-GPT
pip install -r requirements.txt -i https://mirrors.aliyun.com/pypi/simple
```
由于本项目功能特别多,此步骤仅供部分功能示例,若需更多功能体验,其环境配置可参考源项目中的[`README_origin`](./README_origin.md)借鉴本文档的步骤说明自行研究使用。
体验其它功能需要的python库可通过以下两种方法补充:
```
# 方法一:
pip install xxx
# 方法二:
uv pip install xxx --default-index=https://mirrors.aliyun.com/pypi/simple --system
```
## 数据集
`无`
## 训练
## 推理
预训练权重目录结构:
```
/home/DB-GPT
├── BAAI/bge-large-zh-v1.5
└── Qwen/QwQ-32B
```
### 单机多卡
```
dbgpt start webserver --config configs/dbgpt-local-qwq32b.toml # 本步骤以QwQ-32B为例,其它模型的使用方法依次类推,可参考其github源项目的issue解决项目bug。
```
## result
本地浏览器打开地址访问在线操作页面:http://localhost:5670/
<div align=center>
<img src="./doc/page1.png"/>
</div>
`输入: `
```
美国多少人口
```
<div align=center>
<img src="./doc/page2.png"/>
</div>
`输出:`
```
截至2023年12月,美国人口的最新估算约为 3.36亿(336,000,000)。这一数据基于美国人口普查局(U.S. Census Bureau)的实时人口钟(Population Clock)动态估算,会随出生、死亡、移民等因素持续更新。
```
<div align=center>
<img src="./doc/page3.png"/>
</div>
### 精度
DCU与GPU精度一致,推理框架:pytorch。
## 应用场景
### 算法类别
`对话问答`
### 热点应用行业
`制造,广媒,金融,能源,医疗,家居,教育`
## 预训练权重
预训练权重快速下载中心:[SCNet AIModels](http://113.200.138.88:18080/aimodels) ,项目中的预训练权重可从快速下载通道下载:[bge-large-zh-v1.5](http://113.200.138.88:18080/aimodels/bge-large-zh-v1.5.git)[QwQ-32B](http://113.200.138.88:18080/aimodels/qwen/QwQ-32B.git)
HF/魔搭社区下载地址为:[bge-large-zh-v1.5](https://huggingface.co/BAAI/bge-large-zh-v1.5)[QwQ-32B](https://www.modelscope.cn/models/Qwen/QwQ-32B)
## 源码仓库及问题反馈
- http://developer.sourcefind.cn/codes/modelzoo/DB-GPT_pytorch.git
## 参考资料
- https://github.com/eosphoros-ai/DB-GPT.git
- http://docs.dbgpt.cn/docs/quickstart
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment