Unverified Commit 641b1ee7 authored by Hongxin Liu's avatar Hongxin Liu Committed by GitHub
Browse files

[devops] remove post commit ci (#5566)

* [devops] remove post commit ci

* [misc] run pre-commit on all files

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci



---------
Co-authored-by: default avatarpre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
parent 341263df
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
- [ ] I have created an issue for this PR for traceability - [ ] I have created an issue for this PR for traceability
- [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description` - [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description`
- [ ] I have added relevant tags if possible for us to better distinguish different PRs - [ ] I have added relevant tags if possible for us to better distinguish different PRs
- [ ] I have installed pre-commit: `pip install pre-commit && pre-commit install`
## 🚨 Issue number ## 🚨 Issue number
......
name: post-commit
on:
pull_request:
types:
- closed
jobs:
# this job will run after a PR is merged to run pre-commit on any changed file
# so that the user does not need to learn pre-commit and pre-commit can still
# be auto-executed by the workflow
pre-commit:
runs-on: ubuntu-latest
if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
ref: ${{ github.event.pull_request.head.sha }}
# the PR branch and the hpcaitech/colossal-ai main branch
# must share a common commit, we need to locate that commit,
# which is the commit checked-out or forked when the PR branch is created
# such that we can look for files changed since that commit
- name: Locate base commit
id: locate-base-sha
run: |
curBranch=$(git rev-parse --abbrev-ref HEAD)
commonCommit=$(git merge-base origin/main $curBranch)
echo $commonCommit
echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
- name: Find the changed files
id: find-changed-files
uses: tj-actions/changed-files@v35
with:
base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
- name: List all changed files
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
echo "$file was changed"
done
# check out the main branch
- uses: actions/checkout@v2
with:
ref: 'main'
- uses: actions/setup-python@v3
- name: Cache pre-commit hooks
uses: actions/cache@v3
with:
path: ~/.cache/pre-commit
key: ${{ runner.os }}-pre-commit-hooks
- name: Set up pre-commit
run: |
pip install pre-commit
pre-commit install
# run pre-commit on changed files
- name: Run Pre-commit
run: |
for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
pre-commit run --files $file || true
done
# create commit for pre-commit
# when all files are well formatted, there is no need to create a commit
# therefore, this step will produce an error, which should be allowed
- name: Create commits
id: commit
continue-on-error: true
run: |
git config --global user.name 'github-actions'
git config --global user.email 'github-actions@github.com'
git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
git add -A
git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
# create pull request
- name: Create Pull Request
if: steps.commit.outcome == 'success'
id: cpr
uses: peter-evans/create-pull-request@v4
with:
branch: pre-commit-${{ github.event.pull_request.number }}
title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
- name: Enable Auto-merge for the New PR
if: steps.commit.outcome == 'success'
uses: peter-evans/enable-pull-request-automerge@v2
with:
pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
merge-method: squash
...@@ -8,11 +8,10 @@ import argparse ...@@ -8,11 +8,10 @@ import argparse
import numpy as np import numpy as np
import torch import torch
from transformers import LlamaTokenizer, LlamaForCausalLM from transformers import LlamaForCausalLM, LlamaTokenizer
from colossalai.logging import get_dist_logger from colossalai.logging import get_dist_logger
logger = get_dist_logger() logger = get_dist_logger()
......
...@@ -10,8 +10,8 @@ import os ...@@ -10,8 +10,8 @@ import os
from typing import Any, Dict, Tuple, Union from typing import Any, Dict, Tuple, Union
import torch import torch
from torch.optim.optimizer import Optimizer
from torch.optim.lr_scheduler import _LRScheduler from torch.optim.lr_scheduler import _LRScheduler
from torch.optim.optimizer import Optimizer
from colossalai.booster import Booster from colossalai.booster import Booster
from colossalai.cluster import DistCoordinator from colossalai.cluster import DistCoordinator
......
from copy import deepcopy from copy import deepcopy
from typing import Optional, List, Dict, Tuple, Callable, Any from typing import Any, Callable, Dict, List, Optional, Tuple
import torch import torch
from torch import nn from torch import nn
from transformers import PreTrainedTokenizer from transformers import PreTrainedTokenizer
from transformers.utils import logging
from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
from transformers.utils import logging
logger = logging.get_logger(__name__) logger = logging.get_logger(__name__)
def get_prompt_template( def get_prompt_template(
input_query:str, input_query: str,
history:List[Dict]= None, history: List[Dict] = None,
roles:list = ["", "Human", "Assistant"], roles: list = ["", "Human", "Assistant"],
) -> str: ) -> str:
""" """
Generates a prompt template for chat models based on input and history. Generates a prompt template for chat models based on input and history.
...@@ -48,6 +47,7 @@ def get_prompt_template( ...@@ -48,6 +47,7 @@ def get_prompt_template(
prompt += f"{role}: <s>" prompt += f"{role}: <s>"
return prompt return prompt
@torch.inference_mode() @torch.inference_mode()
def streaming_chat( def streaming_chat(
model: Any, model: Any,
...@@ -99,14 +99,14 @@ def streaming_chat( ...@@ -99,14 +99,14 @@ def streaming_chat(
logits_processor = LogitsProcessorList() logits_processor = LogitsProcessorList()
generation_kwargs = { generation_kwargs = {
'temperature': temperature, "temperature": temperature,
'top_p': top_p, "top_p": top_p,
'top_k': top_k, "top_k": top_k,
'do_sample': do_sample, "do_sample": do_sample,
'max_new_tokens': max_new_tokens, "max_new_tokens": max_new_tokens,
'length_penalty': length_penalty, "length_penalty": length_penalty,
'use_cache': True, "use_cache": True,
**kwargs **kwargs,
} }
prompt_str = get_prompt_template(input_query, history=history, roles=roles) prompt_str = get_prompt_template(input_query, history=history, roles=roles)
...@@ -116,13 +116,18 @@ def streaming_chat( ...@@ -116,13 +116,18 @@ def streaming_chat(
history.append({"role": roles[1], "message": input_query.strip()}) history.append({"role": roles[1], "message": input_query.strip()})
history.append({"role": roles[2], "message": None}) history.append({"role": roles[2], "message": None})
for outputs in stream_generate(model, **inputs, past_key_values=past_key_values, for outputs in stream_generate(
eos_token_id=eos_token_id, return_past_key_values=return_past_key_values, model,
**generation_kwargs): **inputs,
past_key_values=past_key_values,
eos_token_id=eos_token_id,
return_past_key_values=return_past_key_values,
**generation_kwargs,
):
if return_past_key_values: if return_past_key_values:
outputs, past_key_values = outputs outputs, past_key_values = outputs
outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
response = tokenizer.decode(outputs) response = tokenizer.decode(outputs)
history[-1]["message"] = response.strip() history[-1]["message"] = response.strip()
......
...@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs): ...@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
model.to(device) model.to(device)
try: try:
tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left') tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
except OSError: except OSError:
raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.") raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")
......
...@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5 ...@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
tqdm tqdm
sentencepiece==0.1.99 sentencepiece==0.1.99
protobuf<=3.20.0 protobuf<=3.20.0
import os
import argparse import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM
from colossal_llama2.utils.stream_chat_patch import streaming_chat from colossal_llama2.utils.stream_chat_patch import streaming_chat
from transformers import AutoModelForCausalLM, AutoTokenizer
SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions." SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
def main(args): def main(args):
model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval() model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path) tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
...@@ -27,29 +27,34 @@ def main(args): ...@@ -27,29 +27,34 @@ def main(args):
print(f"\n{roles[2]}: ", end="") print(f"\n{roles[2]}: ", end="")
gen_len = 0 gen_len = 0
for response, history, past_key_values in streaming_chat( for response, history, past_key_values in streaming_chat(
model, tokenizer, input_query, history=history, roles=roles, model,
temperature = args.temperature, tokenizer,
top_p = args.top_p, input_query,
top_k = args.top_k, history=history,
do_sample = args.do_sample, roles=roles,
length_penalty = args.length_penalty, temperature=args.temperature,
max_new_tokens = args.max_new_tokens, top_p=args.top_p,
top_k=args.top_k,
do_sample=args.do_sample,
length_penalty=args.length_penalty,
max_new_tokens=args.max_new_tokens,
past_key_values=past_key_values, past_key_values=past_key_values,
return_past_key_values=True): return_past_key_values=True,
):
output = response[gen_len:] output = response[gen_len:]
print(output, end="", flush=True) print(output, end="", flush=True)
gen_len = len(response) gen_len = len(response)
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--model_path', type=str, default=None, help="path to chat version model") parser.add_argument("--model_path", type=str, default=None, help="path to chat version model")
parser.add_argument('--tokenizer_path', type=str, default=None, help="path to chat version tokenizer") parser.add_argument("--tokenizer_path", type=str, default=None, help="path to chat version tokenizer")
parser.add_argument('--temperature', type=float, default=0.8, help="set temperature") parser.add_argument("--temperature", type=float, default=0.8, help="set temperature")
parser.add_argument('--top_p', type=float, default=0.95, help="set top p value") parser.add_argument("--top_p", type=float, default=0.95, help="set top p value")
parser.add_argument('--top_k', type=int, default=50, help="set top k value") parser.add_argument("--top_k", type=int, default=50, help="set top k value")
parser.add_argument('--do_sample', type=bool, default=True, help="whether turn on do_sample or not") parser.add_argument("--do_sample", type=bool, default=True, help="whether turn on do_sample or not")
parser.add_argument('--length_penalty', type=float, default=1.2, help="set length penalty") parser.add_argument("--length_penalty", type=float, default=1.2, help="set length penalty")
parser.add_argument('--max_new_tokens', type=int, default=512, help="set max new tokens") parser.add_argument("--max_new_tokens", type=int, default=512, help="set max new tokens")
args = parser.parse_args() args = parser.parse_args()
main(args) main(args)
...@@ -20,13 +20,13 @@ import colossalai ...@@ -20,13 +20,13 @@ import colossalai
from colossalai.booster import Booster from colossalai.booster import Booster
from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
from colossalai.cluster import DistCoordinator from colossalai.cluster import DistCoordinator
from colossalai.logging import get_dist_logger
from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
from colossalai.nn.optimizer import HybridAdam from colossalai.nn.optimizer import HybridAdam
from colossalai.utils import get_current_device
from colossalai.logging import get_dist_logger
logger = get_dist_logger() logger = get_dist_logger()
def train(args): def train(args):
# check lora compatibility # check lora compatibility
if "gemini" in args.plugin and args.lora_rank > 0: if "gemini" in args.plugin and args.lora_rank > 0:
......
...@@ -3,7 +3,6 @@ import copy ...@@ -3,7 +3,6 @@ import copy
import os import os
from typing import Dict, List from typing import Dict, List
import torch
import torch.distributed as dist import torch.distributed as dist
from colossal_eval import dataset, models, utils from colossal_eval import dataset, models, utils
......
...@@ -106,6 +106,5 @@ def main(): ...@@ -106,6 +106,5 @@ def main():
print(f"[{coordinator.rank}] {outputs}") print(f"[{coordinator.rank}] {outputs}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field ...@@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field
from langchain.schema import BaseRetriever, Document from langchain.schema import BaseRetriever, Document
from langchain.schema.language_model import BaseLanguageModel from langchain.schema.language_model import BaseLanguageModel
class CustomBaseRetrievalQA(BaseRetrievalQA): class CustomBaseRetrievalQA(BaseRetrievalQA):
"""Base class for question-answering chains.""" """Base class for question-answering chains."""
...@@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
for k, v in inputs.items() for k, v in inputs.items()
if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"] if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
} }
answers = []
if self.combine_documents_chain.memory is not None: if self.combine_documents_chain.memory is not None:
buffered_history_backup, summarized_history_temp_backup = copy.deepcopy( buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
self.combine_documents_chain.memory.buffered_history self.combine_documents_chain.memory.buffered_history
...@@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup) ) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
# if rejection_trigger_keywords is not given, return the response from LLM directly # if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', []) rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
if answer is None: if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
if self.combine_documents_chain.memory is not None: if self.combine_documents_chain.memory is not None:
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
...@@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA): ...@@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
) )
# if rejection_trigger_keywords is not given, return the response from LLM directly # if rejection_trigger_keywords is not given, return the response from LLM directly
rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', []) rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords)==0 else None answer = (
answer
if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords) == 0
else None
)
if answer is None: if answer is None:
answer = inputs.get('rejection_answer', "抱歉,根据提供的信息无法回答该问题。") answer = inputs.get("rejection_answer", "抱歉,根据提供的信息无法回答该问题。")
self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer}) self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
if self.return_source_documents: if self.return_source_documents:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment