Commit f7461a96 authored by zhangqha's avatar zhangqha
Browse files

Merge branch 'v0.15.1-dev-lxh' into 'v0.15.1-dev'

Fix:修复调用Triton MoE gemm时缺失的参数,对齐接口

See merge request dcutoolkit/deeplearing/vllm!476
parents 02a1e691 3b9aa746
......@@ -72,7 +72,7 @@ __device__ inline bool cmp_eq(const T& a, const T& b) {
static constexpr int SIZE_WARP = 32;
static constexpr int WARPS_PER_CTA = 6;
// static constexpr int MAX_VPT = 32; // maximum VPT we support, > params.VPT = num_expert / num_expert_group
static constexpr int MAX_VPT = 128; // Extend MAX_VPT from 32 to 128 to accommodate large-scale MoE models (e.g., GLM-4V-quantized model).
static constexpr int MAX_VPT = 256; // Extend MAX_VPT from 32 to 256 to accommodate large-scale MoE models (e.g., GLM-5-quantized model).
// Create an alias for Array using AlignedArray
template <typename T, int N>
......
......@@ -9,7 +9,7 @@ import math
from collections.abc import Callable
from typing import Any
from typing import Any, Callable, Dict, List, Optional
import torch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment