Commit 29dbf01c authored by Po Yen, Chen's avatar Po Yen, Chen
Browse files

Move all headers under same directory

parent aee06365
...@@ -17,12 +17,13 @@ ...@@ -17,12 +17,13 @@
#pragma once #pragma once
#include <hip/hip_bf16.h> #include <hip/hip_bf16.h>
#include "cuda_compat.h"
#include <algorithm> #include <algorithm>
#include <cfloat> #include <cfloat>
#include "attention/dtype_fp8.cuh"
#include "quantization/fp8/amd/quant_utils.cuh" #include "dtype_fp8.cuh"
#include "hip_compat.h"
#include "quant_utils.cuh"
#if defined(__HIPCC__) && \ #if defined(__HIPCC__) && \
(defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__))
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
#include <hip/hip_bf16.h> #include <hip/hip_bf16.h>
#include <hip/hip_bfloat16.h> #include <hip/hip_bfloat16.h>
#include "../../../attention/attention_dtypes.h" #include "attention_dtypes.h"
namespace vllm { namespace vllm {
#ifdef USE_ROCM #ifdef USE_ROCM
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include "paged_attention.hpp" #include "paged_attention.hpp"
#include "kernel/paged_attention_kernel.hpp" #include "paged_attention_kernel.hpp"
#define LAUNCH_CUSTOM_ATTENTION(GQA_RATIO) \ #define LAUNCH_CUSTOM_ATTENTION(GQA_RATIO) \
paged_attention_ll4mi_QKV_kernel<T, \ paged_attention_ll4mi_QKV_kernel<T, \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment