Commit 49cce0c4 authored by chenxl's avatar chenxl
Browse files

[fix] bugs about Qwen57B, install requirement, Dockerfile

parent c80490a9
......@@ -12,6 +12,7 @@ EOF
FROM pytorch/pytorch:2.3.1-cuda12.1-cudnn8-devel as compile_server
WORKDIR /workspace
ENV CUDA_HOME /usr/local/cuda
COPY --from=web_compile /home/ktransformers /workspace/ktransformers
RUN <<EOF
apt update -y && apt install -y --no-install-recommends \
......@@ -27,7 +28,7 @@ git submodule init &&
git submodule update &&
pip install ninja pyproject numpy cpufeature &&
pip install flash-attn &&
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9" pip install . --no-build-isolation --verbose &&
CPU_INSTRUCT=NATIVE KTRANSFORMERS_FORCE_BUILD=TRUE TORCH_CUDA_ARCH_LIST="8.0;8.6;8.7;8.9;9.0+PTX" pip install . --no-build-isolation --verbose &&
pip cache purge
EOF
......
- match:
name: "^model\\.layers\\..*\\."
replace:
class: "default"
kwargs:
generate_device: "cuda"
prefill_device: "cuda"
- match:
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeRotaryEmbedding
replace:
......
......@@ -18,6 +18,7 @@ dependencies = [
"torch >= 2.3.0",
"transformers == 4.43.2",
"fastapi >= 0.111.0",
"uvicorn >= 0.30.1",
"langchain >= 0.2.0",
"blessed >= 1.20.0",
"accelerate >= 0.31.0",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment