Unverified Commit 641b1ee7 authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[devops] remove post commit ci (#5566)

* [devops] remove post commit ci

* [misc] run pre-commit on all files

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



---------
Co-authored-by: default avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
parent 341263df
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
- [ ] I have created an issue for this PR for traceability - [ ] I have created an issue for this PR for traceability
- [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description` - [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description`
- [ ] I have added relevant tags if possible for us to better distinguish different PRs - [ ] I have added relevant tags if possible for us to better distinguish different PRs
- [ ] I have installed pre-commit: `pip install pre-commit && pre-commit install`
## 🚨 Issue number ## 🚨 Issue number
......
name: post-commit
on:
pull_request:
types:
- closed
jobs:
# this job will run after a PR is merged to run pre-commit on any changed file
# so that the user does not need to learn pre-commit and pre-commit can still
# be auto-executed by the workflow
pre-commit:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
# the PR branch and the hpcaitech/colossal-ai main branch
# must share a common commit, we need to locate that commit,
# which is the commit checked-out or forked when the PR branch is created
# such that we can look for files changed since that commit
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
- name: List all changed files
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
echo "$file was changed"
done
# check out the main branch
- uses: actions/checkout@v2
with:
ref: 'main'
- uses: actions/setup-python@v3
- name: Cache pre-commit hooks
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: ${{ runner.os }}-pre-commit-hooks
- name: Set up pre-commit
run: |
pip install pre-commit
pre-commit install
# run pre-commit on changed files
- name: Run Pre-commit
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
pre-commit run --files $file || true
done
# create commit for pre-commit
# when all files are well formatted, there is no need to create a commit
# therefore, this step will produce an error, which should be allowed
- name: Create commits
id: commit
continue-on-error: true
run: |
git config --global user.name 'github-actions'
git config --global user.email 'github-actions@github.com'
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git add -A
git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
# create pull request
- name: Create Pull Request
if: steps.commit.outcome == 'success'
id: cpr
uses: peter-evans/create-pull-request@v4
with:
branch: pre-commit-${{ github.event.pull_request.number }}
title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
- name: Enable Auto-merge for the New PR
if: steps.commit.outcome == 'success'
uses: peter-evans/enable-pull-request-automerge@v2
with:
pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
merge-method: squash
...@@ -162,4 +162,4 @@ coverage.xml ...@@ -162,4 +162,4 @@ coverage.xml
# log, test files - ColossalChat # log, test files - ColossalChat
applications/ColossalChat/logs applications/ColossalChat/logs
applications/ColossalChat/tests/logs applications/ColossalChat/tests/logs
\ No newline at end of file
...@@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved. ...@@ -551,4 +551,4 @@ Copyright 2021- HPC-AI Technology Inc. All rights reserved.
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE. THE SOFTWARE.
...@@ -8,11 +8,10 @@ import argparse ...@@ -8,11 +8,10 @@ import argparse
import numpy as np import numpy as np
import torch import torch
from transformers import LlamaTokenizer, LlamaForCausalLM from transformers import LlamaForCausalLM, LlamaTokenizer
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
logger = get_dist_logger() logger = get_dist_logger()
......
...@@ -10,8 +10,8 @@ import os ...@@ -10,8 +10,8 @@ import os
from typing import Any, Dict, Tuple, Union from typing import Any, Dict, Tuple, Union
import torch import torch
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.optimizer import Optimizer
from colossalai.booster import Booster from colossalai.booster import Booster
from colossalai.cluster import DistCoordinator from colossalai.cluster import DistCoordinator
......
from copy import deepcopy from copy import deepcopy
from typing import Optional, List, Dict, Tuple, Callable, Any from typing import Any, Callable, Dict, List, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from transformers.utils import logging
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
from transformers.utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
def get_prompt_template( def get_prompt_template(
input_query:str, input_query: str,
history:List[Dict]= None, history: List[Dict] = None,
roles:list = ["", "Human", "Assistant"], roles: list = ["", "Human", "Assistant"],
) -> str: ) -> str:
""" """
Generates a prompt template for chat models based on input and history. Generates a prompt template for chat models based on input and history.
...@@ -32,7 +31,7 @@ def get_prompt_template( ...@@ -32,7 +31,7 @@ def get_prompt_template(
new_history = [] new_history = []
else: else:
new_history = deepcopy(history) new_history = deepcopy(history)
new_history.append({"role": roles[1], "message": input_query.strip()}) new_history.append({"role": roles[1], "message": input_query.strip()})
new_history.append({"role": roles[2], "message": None}) new_history.append({"role": roles[2], "message": None})
...@@ -48,22 +47,23 @@ def get_prompt_template( ...@@ -48,22 +47,23 @@ def get_prompt_template(
prompt += f"{role}: <s>" prompt += f"{role}: <s>"
return prompt return prompt
@torch.inference_mode() @torch.inference_mode()
def streaming_chat( def streaming_chat(
model: Any, model: Any,
tokenizer: PreTrainedTokenizer, tokenizer: PreTrainedTokenizer,
input_query: str, input_query: str,
history: List[Dict] = None, history: List[Dict] = None,
roles: list = ["", "Human", "Assistant"], roles: list = ["", "Human", "Assistant"],
past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None, past_key_values: Tuple[Tuple[torch.FloatTensor, Any], Any] = None,
temperature: float = 0.8, temperature: float = 0.8,
top_p: float = 0.95, top_p: float = 0.95,
top_k: int = 50, top_k: int = 50,
do_sample: bool = True, do_sample: bool = True,
length_penalty: float = 1.2, length_penalty: float = 1.2,
max_new_tokens: int = 512, max_new_tokens: int = 512,
logits_processor: LogitsProcessorList = None, logits_processor: LogitsProcessorList = None,
return_past_key_values: bool = False, return_past_key_values: bool = False,
**kwargs, **kwargs,
): ):
""" """
...@@ -87,7 +87,7 @@ def streaming_chat( ...@@ -87,7 +87,7 @@ def streaming_chat(
**kwargs: Additional keyword arguments for generation. **kwargs: Additional keyword arguments for generation.
Yields: Yields:
Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and Tuple[str, List[Dict], Optional[Tuple[Tuple[torch.FloatTensor, Any], Any]]]: A tuple containing the generated response, updated history, and
optionally the updated past key values if `return_past_key_values` is True. optionally the updated past key values if `return_past_key_values` is True.
Ensures padding is on the left side for the tokenizer. Ensures padding is on the left side for the tokenizer.
...@@ -97,32 +97,37 @@ def streaming_chat( ...@@ -97,32 +97,37 @@ def streaming_chat(
history = [] history = []
if logits_processor is None: if logits_processor is None:
logits_processor = LogitsProcessorList() logits_processor = LogitsProcessorList()
generation_kwargs = { generation_kwargs = {
'temperature': temperature, "temperature": temperature,
'top_p': top_p, "top_p": top_p,
'top_k': top_k, "top_k": top_k,
'do_sample': do_sample, "do_sample": do_sample,
'max_new_tokens': max_new_tokens, "max_new_tokens": max_new_tokens,
'length_penalty': length_penalty, "length_penalty": length_penalty,
'use_cache': True, "use_cache": True,
**kwargs **kwargs,
} }
prompt_str = get_prompt_template(input_query, history=history, roles=roles) prompt_str = get_prompt_template(input_query, history=history, roles=roles)
eos_token_id = [tokenizer.eos_token_id] eos_token_id = [tokenizer.eos_token_id]
inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device) inputs = tokenizer(prompt_str, return_tensors="pt").to(model.device)
history.append({"role": roles[1], "message": input_query.strip()}) history.append({"role": roles[1], "message": input_query.strip()})
history.append({"role": roles[2], "message": None}) history.append({"role": roles[2], "message": None})
for outputs in stream_generate(model, **inputs, past_key_values=past_key_values, for outputs in stream_generate(
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values, model,
**generation_kwargs): **inputs,
past_key_values=past_key_values,
eos_token_id=eos_token_id,
return_past_key_values=return_past_key_values,
**generation_kwargs,
):
if return_past_key_values: if return_past_key_values:
outputs, past_key_values = outputs outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
response = tokenizer.decode(outputs) response = tokenizer.decode(outputs)
history[-1]["message"] = response.strip() history[-1]["message"] = response.strip()
...@@ -130,30 +135,30 @@ def streaming_chat( ...@@ -130,30 +135,30 @@ def streaming_chat(
yield response, history, past_key_values yield response, history, past_key_values
else: else:
yield response, history yield response, history
@torch.inference_mode() @torch.inference_mode()
def stream_generate( def stream_generate(
model: Any, model: Any,
input_ids: torch.Tensor, input_ids: torch.Tensor,
generation_config: Optional[GenerationConfig] = None, generation_config: Optional[GenerationConfig] = None,
logits_processor: Optional[LogitsProcessorList] = None, logits_processor: Optional[LogitsProcessorList] = None,
stopping_criteria: Optional[StoppingCriteriaList] = None, stopping_criteria: Optional[StoppingCriteriaList] = None,
prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
return_past_key_values: bool = False, return_past_key_values: bool = False,
**kwargs, **kwargs,
): ):
""" """
Generates sequences of token ids using the specified model and generation parameters. Generates sequences of token ids using the specified model and generation parameters.
Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py Adapted from https://huggingface.co/THUDM/chatglm3-6b/blob/main/modeling_chatglm.py
Args: Args:
model (Any): The model used for generating sequences of token ids. model (Any): The model used for generating sequences of token ids.
input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder. input_ids (torch.Tensor): The sequence used as a prompt for the generation or as model inputs to the encoder.
generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call. generation_config (Optional[GenerationConfig]): The generation configuration to be used as base parametrization for the generation call.
logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments logits_processor (Optional[LogitsProcessorList]): Custom logits processors that complement the default logits processors built from arguments
and generation config. and generation config.
stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments stopping_criteria (Optional[StoppingCriteriaList]): Custom stopping criteria that complement the default stopping criteria built from arguments
and a generation config. and a generation config.
prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation. prefix_allowed_tokens_fn (Optional[Callable[[int, torch.Tensor], List[int]]]): Function to constrain token generation.
return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False. return_past_key_values (bool): Whether to return past key values for further incremental decoding, defaults to False.
...@@ -169,7 +174,7 @@ def stream_generate( ...@@ -169,7 +174,7 @@ def stream_generate(
generation_config = model.generation_config generation_config = model.generation_config
generation_config = deepcopy(generation_config) generation_config = deepcopy(generation_config)
model_kwargs = generation_config.update(**kwargs) model_kwargs = generation_config.update(**kwargs)
eos_token_id = generation_config.eos_token_id eos_token_id = generation_config.eos_token_id
if isinstance(eos_token_id, int): if isinstance(eos_token_id, int):
eos_token_id = [eos_token_id] eos_token_id = [eos_token_id]
...@@ -177,25 +182,25 @@ def stream_generate( ...@@ -177,25 +182,25 @@ def stream_generate(
if generation_config.max_new_tokens is not None: if generation_config.max_new_tokens is not None:
generation_config.max_length = generation_config.max_new_tokens + input_ids_len generation_config.max_length = generation_config.max_new_tokens + input_ids_len
if input_ids_len >= generation_config.max_length: if input_ids_len >= generation_config.max_length:
input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids" input_ids_string = "decoder_input_ids" if model.config.is_encoder_decoder else "input_ids"
logger.warning( logger.warning(
f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to" f"Input length of {input_ids_string} is {input_ids_len}, but `max_length` is set to"
f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider" f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
" increasing `max_new_tokens`." " increasing `max_new_tokens`."
) )
logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
# prepare distribution pre_processing samplers # prepare distribution pre_processing samplers
logits_processor = model._get_logits_processor( logits_processor = model._get_logits_processor(
generation_config=generation_config, generation_config=generation_config,
input_ids_seq_length=input_ids_len, input_ids_seq_length=input_ids_len,
encoder_input_ids=input_ids, encoder_input_ids=input_ids,
prefix_allowed_tokens_fn=prefix_allowed_tokens_fn, prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
logits_processor=logits_processor, logits_processor=logits_processor,
) )
# prepare stopping criteria # prepare stopping criteria
stopping_criteria = model._get_stopping_criteria( stopping_criteria = model._get_stopping_criteria(
...@@ -205,7 +210,7 @@ def stream_generate( ...@@ -205,7 +210,7 @@ def stream_generate(
logits_warper = model._get_logits_warper(generation_config) logits_warper = model._get_logits_warper(generation_config)
unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1) unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
scores = None scores = None
while True: while True:
model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs) model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
# forward pass to get next token # forward pass to get next token
...@@ -244,4 +249,4 @@ def stream_generate( ...@@ -244,4 +249,4 @@ def stream_generate(
yield input_ids yield input_ids
# stop when each sentence is finished, or if exceed the maximum length # stop when each sentence is finished, or if exceed the maximum length
if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores): if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
break break
\ No newline at end of file
...@@ -43,7 +43,7 @@ if __name__ == '__main__': ...@@ -43,7 +43,7 @@ if __name__ == '__main__':
model.to(device) model.to(device)
tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base") tokenizer = AutoTokenizer.from_pretrained("hpcai-tech/Colossal-LLaMA-2-13b-base")
question = "xxx" # Your question. question = "xxx" # Your question.
prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n" prompt = (f"请对下面的问题,给出准确、可靠、安全的回答\n"
f"{question}" f"{question}"
...@@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value ...@@ -111,7 +111,7 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
| 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 | | 整车热管理主要包括什么内容 | 整车热管理主要包括:1、发动机冷却系统;2、散热系统;3、空调冷却系统;4、采暖系统;5、动力转向器冷却系统;6、润滑油冷却系统;7、变速箱冷却系统;8、燃油冷却系统;9、冷却系统排放液系统。 |
| 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 | | 简述下汽车空调-热泵循环系统的原理 | 汽车空调系统主要有三种形式,其中热泵循环系统是其中的一种。其原理是利用发动机排放的热量,通过压缩机将冷媒压缩成高温高压的状态,然后通过冷却器释放到空气中,吸收周围的热量,再通过膨胀阀释放到蒸发器中,蒸发器在压缩机的作用下将冷媒压缩成高温高压的状态,再次释放到空气中,从而实现制冷的效果。 |
#### Finance #### Finance
| Question | <center>Colossal-LLaMA-2-13b-base</center> | | Question | <center>Colossal-LLaMA-2-13b-base</center> |
| :------: | :----------------------- | | :------: | :----------------------- |
| 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。| | 如何看待当前提前还贷热 | 提前还贷是一种常见的还贷方式,在市场中存在着许多的因素和影响,需要具体分析当前的经济形势和人们心理状况的影响。提前还贷可以帮助借款人早日摆脱债务负担,但也存在一些风险和不确定性。从经济形势来看,当前利率下探、还款压力减轻等因素可能会导致一部分借款人提前还贷。此外,一些购房者可能会受到市场不景气的影响,担心未来房产价值会下降,因此提前还贷以避免资产贬值。从借款人心理状况来看,提前还贷也可能会由于借款人对未来经济预期的不确定性而产生压力和不满。因此,提前还贷并不能一概而论,需要根据具体情况进行分析。如果提前还贷对借款人的经济情况和未来规划造成不利影响,建议谨慎考虑是否要提前还贷。|
...@@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value ...@@ -150,4 +150,4 @@ The Colossal-AI team has conducted comprehensive evaluation for safety and value
## Conclusion ## Conclusion
The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values. The Colossal-AI team's advanced 13B model, compared to the 7B version, features a refined data structure categorizing information into informative, functional, and memory replay data. Informative data is intricately subdivided into major categories, each further segmented for precise control. Concurrently, data scale across domains is expanded. Tailored enhancements meet community demands for large model capabilities in natural language processing tasks, ensuring proficiency during pre-training and cost-effective fine-tuning. Addressing security and values concerns, multidimensional controls are implemented, securing the baseline model and aligning it with correct values.
\ No newline at end of file
...@@ -242,4 +242,4 @@ To comprehensively assess the performance of the Colossal-LLaMA-2-7B-base model, ...@@ -242,4 +242,4 @@ To comprehensively assess the performance of the Colossal-LLaMA-2-7B-base model,
## Conclusion ## Conclusion
In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above. In general, the Colossal-LLaMA-2-7B-base model not only enhances its understanding of English but also exhibits significant improvements in its comprehension of Chinese. It boasts a broad spectrum of general knowledge, encompassing various fields such as food, sports, technology, literature, games, and more. Regarding text generation tasks, the Colossal-LLaMA-2-7B-base model excels in writing performance; however, its ability to generate specific formats like code, emails, tables, etc., needs enhancement due to the scarcity of relevant training data during our training phase. When compared to the Qwen-7b-base model, the Colossal-LLaMA-2-7B-base model outperforms it in answering most English questions and some Chinese questions, as demonstrated in the examples above.
Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements. Presently, the Colossal-LLaMA-2-7B-base model already exhibits some capabilities in sentiment analysis, logical reasoning, information extraction, role-play, classification, and rewriting. These capabilities are poised for further improvement in the future as part of our ongoing enhancements.
\ No newline at end of file
hostname1 hostname1
hostname2 hostname2
\ No newline at end of file
...@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs): ...@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
model.to(device) model.to(device)
try: try:
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left') tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
except OSError: except OSError:
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.") raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")
......
...@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5 ...@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
tqdm tqdm
sentencepiece==0.1.99 sentencepiece==0.1.99
protobuf<=3.20.0 protobuf<=3.20.0
import os
import argparse import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
from colossal_llama2.utils.stream_chat_patch import streaming_chat from colossal_llama2.utils.stream_chat_patch import streaming_chat
from transformers import AutoModelForCausalLM, AutoTokenizer
SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
def main(args): def main(args):
model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval() model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path) tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
...@@ -27,29 +27,34 @@ def main(args): ...@@ -27,29 +27,34 @@ def main(args):
print(f"\n{roles[2]}: ", end="") print(f"\n{roles[2]}: ", end="")
gen_len = 0 gen_len = 0
for response, history, past_key_values in streaming_chat( for response, history, past_key_values in streaming_chat(
model, tokenizer, input_query, history=history, roles=roles, model,
temperature = args.temperature, tokenizer,
top_p = args.top_p, input_query,
top_k = args.top_k, history=history,
do_sample = args.do_sample, roles=roles,
length_penalty = args.length_penalty, temperature=args.temperature,
max_new_tokens = args.max_new_tokens, top_p=args.top_p,
top_k=args.top_k,
do_sample=args.do_sample,
length_penalty=args.length_penalty,
max_new_tokens=args.max_new_tokens,
past_key_values=past_key_values, past_key_values=past_key_values,
return_past_key_values=True): return_past_key_values=True,
):
output = response[gen_len:] output = response[gen_len:]
print(output, end="", flush=True) print(output, end="", flush=True)
gen_len = len(response) gen_len = len(response)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default=None, help="path to chat version model") parser.add_argument("--model_path", type=str, default=None, help="path to chat version model")
parser.add_argument('--tokenizer_path', type=str, default=None, help="path to chat version tokenizer") parser.add_argument("--tokenizer_path", type=str, default=None, help="path to chat version tokenizer")
parser.add_argument('--temperature', type=float, default=0.8, help="set temperature") parser.add_argument("--temperature", type=float, default=0.8, help="set temperature")
parser.add_argument('--top_p', type=float, default=0.95, help="set top p value") parser.add_argument("--top_p", type=float, default=0.95, help="set top p value")
parser.add_argument('--top_k', type=int, default=50, help="set top k value") parser.add_argument("--top_k", type=int, default=50, help="set top k value")
parser.add_argument('--do_sample', type=bool, default=True, help="whether turn on do_sample or not") parser.add_argument("--do_sample", type=bool, default=True, help="whether turn on do_sample or not")
parser.add_argument('--length_penalty', type=float, default=1.2, help="set length penalty") parser.add_argument("--length_penalty", type=float, default=1.2, help="set length penalty")
parser.add_argument('--max_new_tokens', type=int, default=512, help="set max new tokens") parser.add_argument("--max_new_tokens", type=int, default=512, help="set max new tokens")
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
\ No newline at end of file
...@@ -20,13 +20,13 @@ import colossalai ...@@ -20,13 +20,13 @@ import colossalai
from colossalai.booster import Booster from colossalai.booster import Booster
from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
from colossalai.cluster import DistCoordinator from colossalai.cluster import DistCoordinator
from colossalai.logging import get_dist_logger
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
from colossalai.utils import get_current_device
from colossalai.logging import get_dist_logger
logger = get_dist_logger() logger = get_dist_logger()
def train(args): def train(args):
# check lora compatibility # check lora compatibility
if "gemini" in args.plugin and args.lora_rank > 0: if "gemini" in args.plugin and args.lora_rank > 0:
......
...@@ -3,7 +3,6 @@ import copy ...@@ -3,7 +3,6 @@ import copy
import os import os
from typing import Dict, List from typing import Dict, List
import torch
import torch.distributed as dist import torch.distributed as dist
from colossal_eval import dataset, models, utils from colossal_eval import dataset, models, utils
......
...@@ -106,6 +106,5 @@ def main(): ...@@ -106,6 +106,5 @@ def main():
print(f"[{coordinator.rank}] {outputs}") print(f"[{coordinator.rank}] {outputs}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field ...@@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field
from langchain.schema import BaseRetriever, Document from langchain.schema import BaseRetriever, Document
from langchain.schema.language_model import BaseLanguageModel from langchain.schema.language_model import BaseLanguageModel
class CustomBaseRetrievalQA(BaseRetrievalQA): class CustomBaseRetrievalQA(BaseRetrievalQA):
"""Base class for question-answering chains.""" """Base class for question-answering chains."""
...@@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
for k, v in inputs.items() for k, v in inputs.items()
if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"] if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
} }
answers = []
if self.combine_documents_chain.memory is not None: if self.combine_documents_chain.memory is not None:
buffered_history_backup, summarized_history_temp_backup = copy.deepcopy( buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
self.combine_documents_chain.memory.buffered_history self.combine_documents_chain.memory.buffered_history
...@@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup) ) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
# if rejection_trigger_keywords is not given, return the response from LLM directly # if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', []) rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
if answer is None: if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
if self.combine_documents_chain.memory is not None: if self.combine_documents_chain.memory is not None:
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
...@@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
) )
# if rejection_trigger_keywords is not given, return the response from LLM directly # if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', []) rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords)==0 else None answer = (
answer
if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords) == 0
else None
)
if answer is None: if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
if self.return_source_documents: if self.return_source_documents:
......
...@@ -126,7 +126,7 @@ class DocumentLoader: ...@@ -126,7 +126,7 @@ class DocumentLoader:
else: else:
# May ba a directory, we strictly follow the glob path and will not load files in subdirectories # May ba a directory, we strictly follow the glob path and will not load files in subdirectories
pass pass
def clear(self): def clear(self):
""" """
Clear loaded data. Clear loaded data.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment