RUN echo 'export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD' >> ~/.bashrc
RUN pip install https://intel-extension-for-pytorch.s3.amazonaws.com/ipex_dev/cpu/intel_extension_for_pytorch-2.3.100%2Bgit0eb3473-cp310-cp310-linux_x86_64.whl
@@ -23,16 +23,10 @@ If you have cool projects related to vLLM or LLM inference, we would love to see
...
@@ -23,16 +23,10 @@ If you have cool projects related to vLLM or LLM inference, we would love to see
This will be a great chance for everyone in the community to get together and learn.
This will be a great chance for everyone in the community to get together and learn.
Please submit your proposal [here](https://raysummit.anyscale.com/flow/anyscale/raysummit2024/landing/page/eventsite)
Please submit your proposal [here](https://raysummit.anyscale.com/flow/anyscale/raysummit2024/landing/page/eventsite)
**The Fourth vLLM Bay Area Meetup (June 11th 5:30pm-8pm PT)**
We are thrilled to announce our fourth vLLM Meetup!
The vLLM team will share recent updates and roadmap.
We will also have vLLM collaborators from BentoML and Cloudflare coming up to the stage to discuss their experience in deploying LLMs with vLLM.
Please register [here](https://lu.ma/agivllm) and join us!
---
---
*Latest News* 🔥
*Latest News* 🔥
-[2024/06] We hosted [the fourth vLLM meetup](https://lu.ma/agivllm) with Cloudflare and BentoML! Please find the meetup slides [here](https://docs.google.com/presentation/d/1iJ8o7V2bQEi0BFEljLTwc5G1S10_Rhv3beed5oB0NJ4/edit?usp=sharing).
-[2024/04] We hosted [the third vLLM meetup](https://robloxandvllmmeetup2024.splashthat.com/) with Roblox! Please find the meetup slides [here](https://docs.google.com/presentation/d/1A--47JAK4BJ39t954HyTkvtfwn0fkqtsL8NGFuslReM/edit?usp=sharing).
-[2024/04] We hosted [the third vLLM meetup](https://robloxandvllmmeetup2024.splashthat.com/) with Roblox! Please find the meetup slides [here](https://docs.google.com/presentation/d/1A--47JAK4BJ39t954HyTkvtfwn0fkqtsL8NGFuslReM/edit?usp=sharing).
-[2024/01] We hosted [the second vLLM meetup](https://lu.ma/ygxbpzhl) in SF! Please find the meetup slides [here](https://docs.google.com/presentation/d/12mI2sKABnUw5RBWXDYY-HtHth4iMSNcEoQ10jDQbxgA/edit?usp=sharing).
-[2024/01] We hosted [the second vLLM meetup](https://lu.ma/ygxbpzhl) in SF! Please find the meetup slides [here](https://docs.google.com/presentation/d/12mI2sKABnUw5RBWXDYY-HtHth4iMSNcEoQ10jDQbxgA/edit?usp=sharing).
- [2024/01] Added ROCm 6.0 support to vLLM.
- [2024/01] Added ROCm 6.0 support to vLLM.
...
@@ -65,7 +59,7 @@ vLLM is flexible and easy to use with:
...
@@ -65,7 +59,7 @@ vLLM is flexible and easy to use with:
- Tensor parallelism support for distributed inference
- Tensor parallelism support for distributed inference
@@ -33,6 +33,7 @@ function (find_isa CPUINFO TARGET OUT)
...
@@ -33,6 +33,7 @@ function (find_isa CPUINFO TARGET OUT)
endif()
endif()
endfunction()
endfunction()
find_isa(${CPUINFO}"avx2" AVX2_FOUND)
find_isa(${CPUINFO}"avx512f" AVX512_FOUND)
find_isa(${CPUINFO}"avx512f" AVX512_FOUND)
if(AVX512_FOUND)
if(AVX512_FOUND)
...
@@ -53,8 +54,11 @@ if (AVX512_FOUND)
...
@@ -53,8 +54,11 @@ if (AVX512_FOUND)
else()
else()
message(WARNING "Disable AVX512-BF16 ISA support, no avx512_bf16 found in local CPU flags."" If cross-compilation is required, please set env VLLM_CPU_AVX512BF16=1.")
message(WARNING "Disable AVX512-BF16 ISA support, no avx512_bf16 found in local CPU flags."" If cross-compilation is required, please set env VLLM_CPU_AVX512BF16=1.")
endif()
endif()
elseif(AVX2_FOUND)
list(APPEND CXX_COMPILE_FLAGS "-mavx2")
message(WARNING "vLLM CPU backend using AVX2 ISA")
else()
else()
message(FATAL_ERROR "vLLM CPU backend requires AVX512 ISA support.")
message(FATAL_ERROR "vLLM CPU backend requires AVX512 or AVX2 ISA support.")