[devops] remove post commit ci (#5566)

* [devops] remove post commit ci * [misc] run pre-commit on all files * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

[devops] remove post commit ci (#5566)
* [devops] remove post commit ci * [misc] run pre-commit on all files * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
641b1ee7 · Hongxin Liu · GitHub · 341263df · 641b1ee7 · 341263df
Unverified Commit 641b1ee7 authored Apr 08, 2024 by Hongxin Liu Committed by GitHub Apr 08, 2024
20 changed files
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -3,6 +3,7 @@
 - [ ] I have created an issue for this PR for traceability
 - [ ] The title follows the standard format: `[doc/gemini/tensor/...]: A concise description`
 - [ ] I have added relevant tags if possible for us to better distinguish different PRs
+- [ ] I have installed pre-commit: `pip install pre-commit && pre-commit install`
 ## 🚨 Issue number

--- a/.github/workflows/post_commit.yml
+++ b/.github/workflows/post_commit.yml
-name: post-commit
-on:
-  pull_request:
-    types:
-        - closed
-jobs:
-  # this job will run after a PR is merged to run pre-commit on any changed file
-  # so that the user does not need to learn pre-commit and pre-commit can still
-  # be auto-executed by the workflow
-  pre-commit:
-    runs-on: ubuntu-latest
-    if: github.event.pull_request.merged == true && github.repository == 'hpcaitech/ColossalAI'
-    steps:
-    - uses: actions/checkout@v2
-      with:
-          fetch-depth: 0
-          ref: ${{ github.event.pull_request.head.sha }}
-    # the PR branch and the hpcaitech/colossal-ai main branch
-    # must share a common commit, we need to locate that commit,
-    # which is the commit checked-out or forked when the PR branch is created
-    # such that we can look for files changed since that commit
-    - name: Locate base commit
-      id: locate-base-sha
-      run: |
-          curBranch=$(git rev-parse --abbrev-ref HEAD)
-          commonCommit=$(git merge-base origin/main $curBranch)
-          echo $commonCommit
-          echo "baseSHA=$commonCommit" >> $GITHUB_OUTPUT
-    - name: Find the changed files
-      id: find-changed-files
-      uses: tj-actions/changed-files@v35
-      with:
-        base_sha: ${{ steps.locate-base-sha.outputs.baseSHA }}
-    - name: List all changed files
-      run: |
-        for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
-          echo "$file was changed"
-        done
-    # check out the main branch
-    - uses: actions/checkout@v2
-      with:
-        ref: 'main'
-    - uses: actions/setup-python@v3
-    - name: Cache pre-commit hooks
-      uses: actions/cache@v3
-      with:
-        path: ~/.cache/pre-commit
-        key: ${{ runner.os }}-pre-commit-hooks
-    - name: Set up pre-commit
-      run: |
-        pip install pre-commit
-        pre-commit install
-    # run pre-commit on changed files
-    - name: Run Pre-commit
-      run: |
-        for file in ${{ steps.find-changed-files.outputs.all_changed_files }}; do
-          pre-commit run --files $file || true
-        done
-    # create commit for pre-commit
-    # when all files are well formatted, there is no need to create a commit
-    # therefore, this step will produce an error, which should be allowed
-    - name: Create commits
-      id: commit
-      continue-on-error: true
-      run: |
-        git config --global user.name 'github-actions'
-        git config --global user.email 'github-actions@github.com'
-        git remote set-url origin https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}
-        git add -A
-        git commit -am "[format] applied code formatting on changed files in pull request ${{ github.event.pull_request.number }}"
-    # create pull request
-    - name: Create Pull Request
-      if: steps.commit.outcome == 'success'
-      id: cpr
-      uses: peter-evans/create-pull-request@v4
-      with:
-        branch: pre-commit-${{ github.event.pull_request.number }}
-        title: "[format] applied code formatting on changed files in PR ${{ github.event.pull_request.number }}"
-    - name: Enable Auto-merge for the New PR
-      if: steps.commit.outcome == 'success'
-      uses: peter-evans/enable-pull-request-automerge@v2
-      with:
-        pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
-        merge-method: squash
--- a/.gitignore
+++ b/.gitignore
--- a/LICENSE
+++ b/LICENSE
--- a/applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py
+++ b/applications/Colossal-LLaMA-2/colossal_llama2/model/init_model.py
@@ -8,11 +8,10 @@ import argparse
 import numpy as np
 import torch
-from transformers import LlamaTokenizer, LlamaForCausalLM
+from transformers import LlamaForCausalLM, LlamaTokenizer
 from colossalai.logging import get_dist_logger
 logger = get_dist_logger()

--- a/applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py
+++ b/applications/Colossal-LLaMA-2/colossal_llama2/utils/ckpt_io.py
@@ -10,8 +10,8 @@ import os
 from typing import Any, Dict, Tuple, Union
 import torch
-from torch.optim.optimizer import Optimizer
 from torch.optim.lr_scheduler import _LRScheduler
+from torch.optim.optimizer import Optimizer
 from colossalai.booster import Booster
 from colossalai.cluster import DistCoordinator

--- a/applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py
+++ b/applications/Colossal-LLaMA-2/colossal_llama2/utils/stream_chat_patch.py
 from copy import deepcopy
-from typing import Optional, List, Dict, Tuple, Callable, Any
+from typing import Any, Callable, Dict, List, Optional, Tuple
 import torch
 from torch import nn
 from transformers import PreTrainedTokenizer
-from transformers.utils import logging
 from transformers.generation.utils import GenerationConfig, LogitsProcessorList, StoppingCriteriaList
+from transformers.utils import logging
 logger = logging.get_logger(__name__)
 def get_prompt_template(
-    input_query:str, 
+    input_query: str,
-    history:List[Dict]= None, 
+    history: List[Dict] = None,
-    roles:list = ["", "Human", "Assistant"],
+    roles: list = ["", "Human", "Assistant"],
 ) -> str:
    """
    Generates a prompt template for chat models based on input and history.
@@ -48,6 +47,7 @@ def get_prompt_template(
                prompt += f"{role}: <s>"
    return prompt
 @torch.inference_mode()
 def streaming_chat(
    model: Any,
@@ -99,14 +99,14 @@ def streaming_chat(
        logits_processor = LogitsProcessorList()
    generation_kwargs = {
-        'temperature': temperature,
+        "temperature": temperature,
-        'top_p': top_p,
+        "top_p": top_p,
-        'top_k': top_k,
+        "top_k": top_k,
-        'do_sample': do_sample,
+        "do_sample": do_sample,
-        'max_new_tokens': max_new_tokens,
+        "max_new_tokens": max_new_tokens,
-        'length_penalty': length_penalty,
+        "length_penalty": length_penalty,
-        'use_cache': True,
+        "use_cache": True,
-        **kwargs
+        **kwargs,
    }
    prompt_str = get_prompt_template(input_query, history=history, roles=roles)
@@ -116,13 +116,18 @@ def streaming_chat(
    history.append({"role": roles[1], "message": input_query.strip()})
    history.append({"role": roles[2], "message": None})
-    for outputs in stream_generate(model, **inputs, past_key_values=past_key_values,
+    for outputs in stream_generate(
-                            eos_token_id=eos_token_id, return_past_key_values=return_past_key_values,
+        model,
-                            **generation_kwargs):
+        **inputs,
+        past_key_values=past_key_values,
+        eos_token_id=eos_token_id,
+        return_past_key_values=return_past_key_values,
+        **generation_kwargs,
+    ):
        if return_past_key_values:
            outputs, past_key_values = outputs
-        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1]
+        outputs = outputs.tolist()[0][len(inputs["input_ids"][0]) : -1]
        response = tokenizer.decode(outputs)
        history[-1]["message"] = response.strip()

--- a/applications/Colossal-LLaMA-2/docs/example_13b.md
+++ b/applications/Colossal-LLaMA-2/docs/example_13b.md
--- a/applications/Colossal-LLaMA-2/docs/example_7b.md
+++ b/applications/Colossal-LLaMA-2/docs/example_7b.md
--- a/applications/Colossal-LLaMA-2/hostfile.example
+++ b/applications/Colossal-LLaMA-2/hostfile.example
--- a/applications/Colossal-LLaMA-2/inference_example.py
+++ b/applications/Colossal-LLaMA-2/inference_example.py
@@ -15,7 +15,7 @@ def load_model(model_path, device="cuda", **kwargs):
    model.to(device)
    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side='left')
+        tokenizer = AutoTokenizer.from_pretrained(model_path, padding_side="left")
    except OSError:
        raise ImportError("Tokenizer not found. Please check if the tokenizer exists or the model path is correct.")

--- a/applications/Colossal-LLaMA-2/requirements.txt
+++ b/applications/Colossal-LLaMA-2/requirements.txt
@@ -12,4 +12,3 @@ flash-attn>=2.0.0,<=2.0.5
 tqdm
 sentencepiece==0.1.99
 protobuf<=3.20.0
--- a/applications/Colossal-LLaMA-2/stream_chat_example.py
+++ b/applications/Colossal-LLaMA-2/stream_chat_example.py
-import os
 import argparse
-from transformers import AutoTokenizer, AutoModelForCausalLM
 from colossal_llama2.utils.stream_chat_patch import streaming_chat
+from transformers import AutoModelForCausalLM, AutoTokenizer
 SYSTEM = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
 def main(args):
    model = AutoModelForCausalLM.from_pretrained(args.model_path).cuda().eval()
    tokenizer = AutoTokenizer.from_pretrained(args.tokenizer_path)
@@ -27,29 +27,34 @@ def main(args):
        print(f"\n{roles[2]}: ", end="")
        gen_len = 0
        for response, history, past_key_values in streaming_chat(
-            model, tokenizer, input_query, history=history, roles=roles,
+            model,
-            temperature = args.temperature,
+            tokenizer,
-            top_p = args.top_p,
+            input_query,
-            top_k = args.top_k,
+            history=history,
-            do_sample = args.do_sample,
+            roles=roles,
-            length_penalty = args.length_penalty,
+            temperature=args.temperature,
-            max_new_tokens = args.max_new_tokens,
+            top_p=args.top_p,
+            top_k=args.top_k,
+            do_sample=args.do_sample,
+            length_penalty=args.length_penalty,
+            max_new_tokens=args.max_new_tokens,
            past_key_values=past_key_values,
-            return_past_key_values=True):
+            return_past_key_values=True,
+        ):
            output = response[gen_len:]
            print(output, end="", flush=True)
            gen_len = len(response)
 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument('--model_path', type=str, default=None, help="path to chat version model")
+    parser.add_argument("--model_path", type=str, default=None, help="path to chat version model")
-    parser.add_argument('--tokenizer_path', type=str, default=None, help="path to chat version tokenizer")
+    parser.add_argument("--tokenizer_path", type=str, default=None, help="path to chat version tokenizer")
-    parser.add_argument('--temperature', type=float, default=0.8, help="set temperature")
+    parser.add_argument("--temperature", type=float, default=0.8, help="set temperature")
-    parser.add_argument('--top_p', type=float, default=0.95, help="set top p value")
+    parser.add_argument("--top_p", type=float, default=0.95, help="set top p value")
-    parser.add_argument('--top_k', type=int, default=50, help="set top k value")
+    parser.add_argument("--top_k", type=int, default=50, help="set top k value")
-    parser.add_argument('--do_sample', type=bool, default=True, help="whether turn on do_sample or not")
+    parser.add_argument("--do_sample", type=bool, default=True, help="whether turn on do_sample or not")
-    parser.add_argument('--length_penalty', type=float, default=1.2, help="set length penalty")
+    parser.add_argument("--length_penalty", type=float, default=1.2, help="set length penalty")
-    parser.add_argument('--max_new_tokens', type=int, default=512, help="set max new tokens")
+    parser.add_argument("--max_new_tokens", type=int, default=512, help="set max new tokens")
    args = parser.parse_args()
    main(args)
--- a/applications/Colossal-LLaMA-2/version.txt
+++ b/applications/Colossal-LLaMA-2/version.txt
--- a/applications/ColossalChat/examples/training_scripts/train_dpo.py
+++ b/applications/ColossalChat/examples/training_scripts/train_dpo.py
@@ -20,13 +20,13 @@ import colossalai
 from colossalai.booster import Booster
 from colossalai.booster.plugin import GeminiPlugin, HybridParallelPlugin, LowLevelZeroPlugin
 from colossalai.cluster import DistCoordinator
+from colossalai.logging import get_dist_logger
 from colossalai.nn.lr_scheduler import CosineAnnealingWarmupLR
 from colossalai.nn.optimizer import HybridAdam
-from colossalai.utils import get_current_device
-from colossalai.logging import get_dist_logger
 logger = get_dist_logger()
 def train(args):
    # check lora compatibility
    if "gemini" in args.plugin and args.lora_rank > 0:

--- a/applications/ColossalEval/examples/dataset_evaluation/inference.py
+++ b/applications/ColossalEval/examples/dataset_evaluation/inference.py
@@ -3,7 +3,6 @@ import copy
 import os
 from typing import Dict, List
-import torch
 import torch.distributed as dist
 from colossal_eval import dataset, models, utils

--- a/applications/ColossalMoE/README.md
+++ b/applications/ColossalMoE/README.md
--- a/applications/ColossalMoE/infer.py
+++ b/applications/ColossalMoE/infer.py
@@ -106,6 +106,5 @@ def main():
    print(f"[{coordinator.rank}] {outputs}")
 if __name__ == "__main__":
    main()
--- a/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py
+++ b/applications/ColossalQA/colossalqa/chain/retrieval_qa/base.py
@@ -24,6 +24,7 @@ from langchain.pydantic_v1 import Field
 from langchain.schema import BaseRetriever, Document
 from langchain.schema.language_model import BaseLanguageModel
 class CustomBaseRetrievalQA(BaseRetrievalQA):
    """Base class for question-answering chains."""
@@ -98,7 +99,6 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
            for k, v in inputs.items()
            if k in ["stop", "temperature", "top_k", "top_p", "max_new_tokens", "doc_prefix"]
        }
-        answers = []
        if self.combine_documents_chain.memory is not None:
            buffered_history_backup, summarized_history_temp_backup = copy.deepcopy(
                self.combine_documents_chain.memory.buffered_history
@@ -117,10 +117,10 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
            ) = copy.deepcopy(buffered_history_backup), copy.deepcopy(summarized_history_temp_backup)
        # if rejection_trigger_keywords is not given, return the response from LLM directly
-        rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
+        rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
        answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) else None
        if answer is None:
-            answer = inputs.get('rejection_answer', "抱歉，根据提供的信息无法回答该问题。")
+            answer = inputs.get("rejection_answer", "抱歉，根据提供的信息无法回答该问题。")
        if self.combine_documents_chain.memory is not None:
            self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
@@ -161,10 +161,14 @@ class CustomBaseRetrievalQA(BaseRetrievalQA):
            input_documents=docs, question=question, callbacks=_run_manager.get_child(), **kwargs
        )
        # if rejection_trigger_keywords is not given, return the response from LLM directly
-        rejection_trigger_keywords = inputs.get('rejection_trigger_keywords', [])
+        rejection_trigger_keywords = inputs.get("rejection_trigger_keywords", [])
-        answer = answer if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords)==0 else None
+        answer = (
+            answer
+            if all([rej not in answer for rej in rejection_trigger_keywords]) or len(rejection_trigger_keywords) == 0
+            else None
+        )
        if answer is None:
-            answer = inputs.get('rejection_answer', "抱歉，根据提供的信息无法回答该问题。")
+            answer = inputs.get("rejection_answer", "抱歉，根据提供的信息无法回答该问题。")
        self.combine_documents_chain.memory.save_context({"question": question}, {"output": answer})
        if self.return_source_documents:

--- a/applications/ColossalQA/colossalqa/data_loader/document_loader.py
+++ b/applications/ColossalQA/colossalqa/data_loader/document_loader.py