Unverified Commit b4a2f3ac authored by Johnny's avatar Johnny Committed by GitHub
Browse files

[NVIDIA] Bugfix NVFP4 DGX Spark and RTX50 (#38423)


Signed-off-by: default avatarjohnnynunez <johnnynuca14@gmail.com>
Signed-off-by: default avatarJohnny <johnnynuca14@gmail.com>
parent 8e6293e8
...@@ -309,7 +309,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") ...@@ -309,7 +309,7 @@ if(VLLM_GPU_LANG STREQUAL "CUDA")
SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library") SET(CUTLASS_ENABLE_HEADERS_ONLY ON CACHE BOOL "Enable only the header library")
# Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building. # Set CUTLASS_REVISION. Used for FetchContent. Also fixes some bogus messages when building.
set(CUTLASS_REVISION "v4.2.1") set(CUTLASS_REVISION "v4.4.2")
# Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided # Use the specified CUTLASS source directory for compilation if VLLM_CUTLASS_SRC_DIR is provided
if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR}) if (DEFINED ENV{VLLM_CUTLASS_SRC_DIR})
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <torch/all.h> #include <torch/all.h>
#include "cutlass_extensions/common.hpp"
#include "nvfp4_utils.cuh" #include "nvfp4_utils.cuh"
#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ #if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
...@@ -53,12 +54,27 @@ void silu_and_mul_scaled_fp4_experts_quant_sm1xxa( ...@@ -53,12 +54,27 @@ void silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
torch::Tensor const& output_scale_offset_by_experts); torch::Tensor const& output_scale_offset_by_experts);
#endif #endif
static bool nvfp4_quant_sm_supported() {
const int32_t sm = get_sm_version_num();
#if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
if (sm >= 100 && sm < 120) return true;
#endif
#if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
if (sm >= 120 && sm < 130) return true;
#endif
return false;
}
void scaled_fp4_quant_out(torch::Tensor const& input, void scaled_fp4_quant_out(torch::Tensor const& input,
torch::Tensor const& input_sf, torch::Tensor const& input_sf,
bool is_sf_swizzled_layout, torch::Tensor& output, bool is_sf_swizzled_layout, torch::Tensor& output,
torch::Tensor& output_sf) { torch::Tensor& output_sf) {
#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ #if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
(defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
TORCH_CHECK(nvfp4_quant_sm_supported(),
"No compiled nvfp4 quantization kernel for SM ",
get_sm_version_num(),
". Recompile with the appropriate CUDA arch.");
return scaled_fp4_quant_sm1xxa(output, input, output_sf, input_sf, return scaled_fp4_quant_sm1xxa(output, input, output_sf, input_sf,
is_sf_swizzled_layout); is_sf_swizzled_layout);
#endif #endif
...@@ -100,6 +116,10 @@ void scaled_fp4_experts_quant( ...@@ -100,6 +116,10 @@ void scaled_fp4_experts_quant(
torch::Tensor const& output_scale_offset_by_experts) { torch::Tensor const& output_scale_offset_by_experts) {
#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ #if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
(defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
TORCH_CHECK(nvfp4_quant_sm_supported(),
"No compiled nvfp4 experts quantization kernel for SM ",
get_sm_version_num(),
". Recompile with the appropriate CUDA arch.");
return scaled_fp4_experts_quant_sm1xxa( return scaled_fp4_experts_quant_sm1xxa(
output, output_scale, input, input_global_scale, input_offset_by_experts, output, output_scale, input, input_global_scale, input_offset_by_experts,
output_scale_offset_by_experts); output_scale_offset_by_experts);
...@@ -112,6 +132,10 @@ void silu_and_mul_nvfp4_quant(torch::Tensor& output, torch::Tensor& output_sf, ...@@ -112,6 +132,10 @@ void silu_and_mul_nvfp4_quant(torch::Tensor& output, torch::Tensor& output_sf,
torch::Tensor& input, torch::Tensor& input_sf) { torch::Tensor& input, torch::Tensor& input_sf) {
#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ #if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
(defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
TORCH_CHECK(nvfp4_quant_sm_supported(),
"No compiled silu_and_mul nvfp4 quantization kernel for SM ",
get_sm_version_num(),
". Recompile with the appropriate CUDA arch.");
return silu_and_mul_nvfp4_quant_sm1xxa(output, output_sf, input, input_sf); return silu_and_mul_nvfp4_quant_sm1xxa(output, output_sf, input, input_sf);
#endif #endif
TORCH_CHECK_NOT_IMPLEMENTED( TORCH_CHECK_NOT_IMPLEMENTED(
...@@ -125,6 +149,11 @@ void silu_and_mul_scaled_fp4_experts_quant( ...@@ -125,6 +149,11 @@ void silu_and_mul_scaled_fp4_experts_quant(
torch::Tensor const& output_scale_offset_by_experts) { torch::Tensor const& output_scale_offset_by_experts) {
#if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \ #if (defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100) || \
(defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120) (defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120)
TORCH_CHECK(nvfp4_quant_sm_supported(),
"No compiled silu_and_mul nvfp4 experts quantization kernel "
"for SM ",
get_sm_version_num(),
". Recompile with the appropriate CUDA arch.");
return silu_and_mul_scaled_fp4_experts_quant_sm1xxa( return silu_and_mul_scaled_fp4_experts_quant_sm1xxa(
output, output_scale, input, input_global_scale, input_offset_by_experts, output, output_scale, input, input_global_scale, input_offset_by_experts,
output_scale_offset_by_experts); output_scale_offset_by_experts);
......
...@@ -63,5 +63,17 @@ void cutlass_scaled_fp4_mm(torch::Tensor& D, const torch::Tensor& A, ...@@ -63,5 +63,17 @@ void cutlass_scaled_fp4_mm(torch::Tensor& D, const torch::Tensor& A,
bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability) { bool cutlass_scaled_mm_supports_fp4(int64_t cuda_device_capability) {
int runtimeVersion; int runtimeVersion;
cudaRuntimeGetVersion(&runtimeVersion); cudaRuntimeGetVersion(&runtimeVersion);
return cuda_device_capability >= 100 && runtimeVersion >= 12080; if (runtimeVersion < 12080) return false;
// Only report support when the SM-specific kernel was actually compiled in,
// so the Python-side backend selector does not choose CUTLASS and then hit
// TORCH_CHECK_NOT_IMPLEMENTED (or worse, fall through to Marlin).
#if defined(ENABLE_NVFP4_SM100) && ENABLE_NVFP4_SM100
if (cuda_device_capability >= 100 && cuda_device_capability < 120)
return true;
#endif
#if defined(ENABLE_NVFP4_SM120) && ENABLE_NVFP4_SM120
if (cuda_device_capability >= 120 && cuda_device_capability < 130)
return true;
#endif
return false;
} }
...@@ -154,6 +154,7 @@ struct MacheteCollectiveMma { ...@@ -154,6 +154,7 @@ struct MacheteCollectiveMma {
struct DispatchPolicy { struct DispatchPolicy {
constexpr static int Stages = PipelineStages; constexpr static int Stages = PipelineStages;
using ClusterShape = ClusterShape_MNK; using ClusterShape = ClusterShape_MNK;
using ArchTag = arch::Sm90;
using Schedule = KernelScheduleType; using Schedule = KernelScheduleType;
}; };
......
...@@ -590,7 +590,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ...@@ -590,7 +590,10 @@ RUN --mount=type=cache,target=/root/.cache/uv \
# Install FlashInfer JIT cache (requires CUDA-version-specific index URL) # Install FlashInfer JIT cache (requires CUDA-version-specific index URL)
# https://docs.flashinfer.ai/installation.html # https://docs.flashinfer.ai/installation.html
# From versions.json: .flashinfer.version # From versions.json: .flashinfer.version
ARG FLASHINFER_VERSION=0.6.6 # 0.6.7: CUTLASS 4.4.2 bump, fixes TMA grouped GEMM on SM12x (flashinfer#2798)
# TODO: bump to 0.6.8 when released for NVFP4/MXFP4 group GEMMs on
# SM120/SM121 (RTX 50 / DGX Spark) via flashinfer#2738
ARG FLASHINFER_VERSION=0.6.7
RUN --mount=type=cache,target=/root/.cache/uv \ RUN --mount=type=cache,target=/root/.cache/uv \
uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \ uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} \
--extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \ --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') \
......
...@@ -217,13 +217,16 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2. ...@@ -217,13 +217,16 @@ RUN pip install setuptools==75.6.0 packaging==23.2 ninja==1.11.1.3 build==1.2.2.
# build flashinfer for torch nightly from source around 10 mins # build flashinfer for torch nightly from source around 10 mins
# release version: v0.6.6 # release version: v0.6.7
# 0.6.7: CUTLASS 4.4.2 bump, fixes TMA grouped GEMM on SM12x (flashinfer#2798)
# TODO: bump to 0.6.8 when released for NVFP4/MXFP4 group GEMMs on
# SM120/SM121 (RTX 50 / DGX Spark) via flashinfer#2738
# todo(elainewy): cache flashinfer build result for faster build # todo(elainewy): cache flashinfer build result for faster build
ENV CCACHE_DIR=/root/.cache/ccache ENV CCACHE_DIR=/root/.cache/ccache
RUN --mount=type=cache,target=/root/.cache/ccache \ RUN --mount=type=cache,target=/root/.cache/ccache \
--mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/uv \
echo "git clone flashinfer..." \ echo "git clone flashinfer..." \
&& git clone --depth 1 --branch v0.6.6 --recursive https://github.com/flashinfer-ai/flashinfer.git \ && git clone --depth 1 --branch v0.6.7 --recursive https://github.com/flashinfer-ai/flashinfer.git \
&& cd flashinfer \ && cd flashinfer \
&& git submodule update --init --recursive \ && git submodule update --init --recursive \
&& echo "finish git clone flashinfer..." \ && echo "finish git clone flashinfer..." \
......
...@@ -68,7 +68,7 @@ ...@@ -68,7 +68,7 @@
"default": "true" "default": "true"
}, },
"FLASHINFER_VERSION": { "FLASHINFER_VERSION": {
"default": "0.6.6" "default": "0.6.7"
}, },
"GDRCOPY_CUDA_VERSION": { "GDRCOPY_CUDA_VERSION": {
"default": "12.8" "default": "12.8"
......
...@@ -9,8 +9,8 @@ torchaudio==2.10.0 ...@@ -9,8 +9,8 @@ torchaudio==2.10.0
# These must be updated alongside torch # These must be updated alongside torch
torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version torchvision==0.25.0 # Required for phi3v processor. See https://github.com/pytorch/vision?tab=readme-ov-file#installation for corresponding version
# FlashInfer should be updated together with the Dockerfile # FlashInfer should be updated together with the Dockerfile
flashinfer-python==0.6.6 flashinfer-python==0.6.7
flashinfer-cubin==0.6.6 flashinfer-cubin==0.6.7
# Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to # Cap nvidia-cudnn-frontend (transitive dep of flashinfer) due to
# breaking changes in 1.19.0 # breaking changes in 1.19.0
nvidia-cudnn-frontend>=1.13.0,<1.19.0 nvidia-cudnn-frontend>=1.13.0,<1.19.0
......
...@@ -57,7 +57,6 @@ def test_select_default_backend_by_platform( ...@@ -57,7 +57,6 @@ def test_select_default_backend_by_platform(
moe_config = make_dummy_moe_config() moe_config = make_dummy_moe_config()
selected_backend = select_unquantized_moe_backend( selected_backend = select_unquantized_moe_backend(
moe_config=moe_config, moe_config=moe_config,
use_ep=False,
use_dp=False, use_dp=False,
) )
...@@ -90,7 +89,6 @@ def test_select_rocm_aiter_backend(mock_aiter_enabled, mock_has_flashinfer): ...@@ -90,7 +89,6 @@ def test_select_rocm_aiter_backend(mock_aiter_enabled, mock_has_flashinfer):
moe_config = make_dummy_moe_config() moe_config = make_dummy_moe_config()
selected_backend = select_unquantized_moe_backend( selected_backend = select_unquantized_moe_backend(
moe_config=moe_config, moe_config=moe_config,
use_ep=False,
use_dp=False, use_dp=False,
) )
...@@ -129,7 +127,6 @@ def test_select_cuda_flashinfer_trtllm_backend( ...@@ -129,7 +127,6 @@ def test_select_cuda_flashinfer_trtllm_backend(
selected_backend = select_unquantized_moe_backend( selected_backend = select_unquantized_moe_backend(
moe_config=moe_config, moe_config=moe_config,
use_ep=True,
use_dp=False, use_dp=False,
) )
...@@ -171,7 +168,6 @@ def test_select_cuda_flashinfer_cutlass_backend( ...@@ -171,7 +168,6 @@ def test_select_cuda_flashinfer_cutlass_backend(
selected_backend = select_unquantized_moe_backend( selected_backend = select_unquantized_moe_backend(
moe_config=moe_config, moe_config=moe_config,
use_ep=True, # CUTLASS requires EP
use_dp=False, # CUTLASS doesn't support DP use_dp=False, # CUTLASS doesn't support DP
) )
......
...@@ -406,6 +406,11 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit ...@@ -406,6 +406,11 @@ class TrtLlmFp8ExpertsMonolithic(TrtLlmFp8ExpertsBase, mk.FusedMoEExpertsMonolit
if self.routing_method_type == RoutingMethodType.DeepSeekV3: if self.routing_method_type == RoutingMethodType.DeepSeekV3:
router_logits = router_logits.to(torch.float32) router_logits = router_logits.to(torch.float32)
# Currently FI requires bfloat16 routing bias.
# https://github.com/flashinfer-ai/flashinfer/issues/2909
if e_score_correction_bias is not None:
e_score_correction_bias = e_score_correction_bias.to(torch.bfloat16)
out = flashinfer.fused_moe.trtllm_fp8_per_tensor_scale_moe( out = flashinfer.fused_moe.trtllm_fp8_per_tensor_scale_moe(
routing_logits=router_logits, routing_logits=router_logits,
routing_bias=e_score_correction_bias, routing_bias=e_score_correction_bias,
......
...@@ -5,6 +5,7 @@ import flashinfer ...@@ -5,6 +5,7 @@ import flashinfer
import torch import torch
import vllm.model_executor.layers.fused_moe.modular_kernel as mk import vllm.model_executor.layers.fused_moe.modular_kernel as mk
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe.activation import MoEActivation from vllm.model_executor.layers.fused_moe.activation import MoEActivation
from vllm.model_executor.layers.fused_moe.config import ( from vllm.model_executor.layers.fused_moe.config import (
FusedMoEConfig, FusedMoEConfig,
...@@ -27,6 +28,8 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import ( ...@@ -27,6 +28,8 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils.flashinfer import has_flashinfer_trtllm_fused_moe from vllm.utils.flashinfer import has_flashinfer_trtllm_fused_moe
logger = init_logger(__name__)
class TrtLlmNvFp4ExpertsBase: class TrtLlmNvFp4ExpertsBase:
""" """
...@@ -315,6 +318,11 @@ class TrtLlmNvFp4ExpertsMonolithic( ...@@ -315,6 +318,11 @@ class TrtLlmNvFp4ExpertsMonolithic(
else router_logits else router_logits
) )
# Currently FI requires bfloat16 routing bias.
# https://github.com/flashinfer-ai/flashinfer/issues/2909
if e_score_correction_bias is not None:
e_score_correction_bias = e_score_correction_bias.to(torch.bfloat16)
# Invoke kernel. # Invoke kernel.
return flashinfer.fused_moe.trtllm_fp4_block_scale_moe( return flashinfer.fused_moe.trtllm_fp4_block_scale_moe(
routing_logits=router_logits, routing_logits=router_logits,
......
...@@ -361,7 +361,7 @@ class FlashInferExperts(mk.FusedMoEExpertsModular): ...@@ -361,7 +361,7 @@ class FlashInferExperts(mk.FusedMoEExpertsModular):
fc1_expert_weights = w1 fc1_expert_weights = w1
fc2_expert_weights = w2 fc2_expert_weights = w2
else: else:
quant_scales = None quant_scales = []
a1q_scale = None a1q_scale = None
fc1_expert_weights = w1 fc1_expert_weights = w1
fc2_expert_weights = w2 fc2_expert_weights = w2
......
...@@ -70,7 +70,6 @@ def map_unquantized_backend(runner_backend: MoEBackend) -> UnquantizedMoeBackend ...@@ -70,7 +70,6 @@ def map_unquantized_backend(runner_backend: MoEBackend) -> UnquantizedMoeBackend
def select_unquantized_moe_backend( def select_unquantized_moe_backend(
moe_config: FusedMoEConfig, moe_config: FusedMoEConfig,
use_ep: bool,
use_dp: bool, use_dp: bool,
) -> UnquantizedMoeBackend: ) -> UnquantizedMoeBackend:
""" """
...@@ -96,7 +95,6 @@ def select_unquantized_moe_backend( ...@@ -96,7 +95,6 @@ def select_unquantized_moe_backend(
# FlashInfer CUTLASS MoE is only supported on Hopper and later GPUS # FlashInfer CUTLASS MoE is only supported on Hopper and later GPUS
flashinfer_cutlass_available = ( flashinfer_cutlass_available = (
has_flashinfer_cutlass_fused_moe() has_flashinfer_cutlass_fused_moe()
and use_ep
and (not use_dp) and (not use_dp)
and current_platform.has_device_capability(90) and current_platform.has_device_capability(90)
) )
...@@ -161,9 +159,9 @@ def select_unquantized_moe_backend( ...@@ -161,9 +159,9 @@ def select_unquantized_moe_backend(
"to enable it for better performance.", "to enable it for better performance.",
scope="local", scope="local",
) )
elif use_ep and (not use_dp): elif not use_dp and flashinfer_cutlass_available:
logger.info_once( logger.info_once(
"FlashInfer MoE is available for EP" "FlashInfer CUTLASS MoE is available"
" but not enabled, consider setting" " but not enabled, consider setting"
" VLLM_USE_FLASHINFER_MOE_FP16=1 to enable it.", " VLLM_USE_FLASHINFER_MOE_FP16=1 to enable it.",
scope="local", scope="local",
......
...@@ -61,7 +61,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): ...@@ -61,7 +61,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
super().__init__(moe) super().__init__(moe)
self.unquantized_backend = select_unquantized_moe_backend( self.unquantized_backend = select_unquantized_moe_backend(
moe_config=self.moe, moe_config=self.moe,
use_ep=self.moe.moe_parallel_config.use_ep,
use_dp=self.moe.moe_parallel_config.dp_size > 1, use_dp=self.moe.moe_parallel_config.dp_size > 1,
) )
......
...@@ -55,8 +55,16 @@ def select_nvfp4_linear_backend() -> NvFp4LinearBackend: ...@@ -55,8 +55,16 @@ def select_nvfp4_linear_backend() -> NvFp4LinearBackend:
elif envs.VLLM_USE_NVFP4_CT_EMULATIONS: elif envs.VLLM_USE_NVFP4_CT_EMULATIONS:
backend = NvFp4LinearBackend.EMULATION backend = NvFp4LinearBackend.EMULATION
elif envs.VLLM_NVFP4_GEMM_BACKEND is None: elif envs.VLLM_NVFP4_GEMM_BACKEND is None:
# Auto-select best available backend # Auto-select best available backend.
if current_platform.has_device_capability(100) and has_flashinfer(): # cutlass_fp4_supported() checks that the vLLM NVFP4 kernels (both
# quantization and GEMM) were compiled for the current SM version.
# FlashInfer backends still rely on the vLLM quantization kernels,
# so we gate them on the same check.
if (
cutlass_fp4_supported()
and current_platform.has_device_capability(100)
and has_flashinfer()
):
backend = NvFp4LinearBackend.FLASHINFER_CUTLASS backend = NvFp4LinearBackend.FLASHINFER_CUTLASS
elif cutlass_fp4_supported(): elif cutlass_fp4_supported():
backend = NvFp4LinearBackend.VLLM_CUTLASS backend = NvFp4LinearBackend.VLLM_CUTLASS
...@@ -72,6 +80,10 @@ def select_nvfp4_linear_backend() -> NvFp4LinearBackend: ...@@ -72,6 +80,10 @@ def select_nvfp4_linear_backend() -> NvFp4LinearBackend:
NvFp4LinearBackend.FLASHINFER_CUDNN, NvFp4LinearBackend.FLASHINFER_CUDNN,
): ):
assert has_flashinfer(), f"FlashInfer is required for {backend}" assert has_flashinfer(), f"FlashInfer is required for {backend}"
assert cutlass_fp4_supported(), (
f"{backend} requires vLLM NVFP4 quantization kernels compiled "
f"for the current GPU (SM {current_platform.get_device_capability()})"
)
elif backend == NvFp4LinearBackend.VLLM_CUTLASS: elif backend == NvFp4LinearBackend.VLLM_CUTLASS:
assert cutlass_fp4_supported(), f"Cutlass is required for {backend}" assert cutlass_fp4_supported(), f"Cutlass is required for {backend}"
elif backend == NvFp4LinearBackend.MARLIN: elif backend == NvFp4LinearBackend.MARLIN:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment