[fix] bugs about Qwen57B, install requirement, Dockerfile

49cce0c4 · chenxl · c80490a9 · 49cce0c4 · 49cce0c4 · 49cce0c4
Commit 49cce0c4 authored Aug 30, 2024 by chenxl
Showing with 3 additions and 8 deletions

Dockerfile Dockerfile +2 -1

ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml ...mers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml +0 -7

pyproject.toml pyproject.toml +1 -0

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -12,6 +12,7 @@ EOF

 FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
 WORKDIR /workspace
+ENV CUDA_HOME /usr/local/cuda
 COPY --from=web_compile /home/ktransformers /workspace/ktransformers
 RUN <<EOF
 apt update -y &&  apt install -y  --no-install-recommends \
@@ -27,7 +28,7 @@ git submodule init &&
 git submodule update &&
 pip install ninja pyproject numpy cpufeature &&
 pip install flash-attn &&
-CPU_INSTRUCT=NATIVE  KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
+CPU_INSTRUCT=NATIVE  KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
 pip cache purge
 EOF


--- a/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml
+++ b/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct.yaml
- match:
-    name: "^model\\.layers\\..*\\."
-  replace:
-    class: "default"
-    kwargs:
-      generate_device: "cuda"
-      prefill_device: "cuda"
 - match:
    class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
  replace:

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,6 +18,7 @@ dependencies = [
  "torch >= 2.3.0",
  "transformers == 4.43.2",
  "fastapi >= 0.111.0",
+  "uvicorn >= 0.30.1",
  "langchain >= 0.2.0",
  "blessed >= 1.20.0",
  "accelerate >= 0.31.0",