"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "dab1de9f3895a153a7bc2ce7ef7782ba7818a146"
Unverified Commit f98548b9 authored by Luka Govedič's avatar Luka Govedič Committed by GitHub
Browse files

[torch.compile][ROCm] Fuse quantization onto attention using a torch.compile pass (#16756)


Signed-off-by: default avatarLuka Govedič <lgovedic@redhat.com>
Co-authored-by: default avatarSage Moore <sage@neuralmagic.com>
parent 96846bb3
# SPDX-License-Identifier: Apache-2.0
import torch
import torch._inductor.pattern_matcher as pm
from torch._higher_order_ops.auto_functionalize import auto_functionalized
from torch._inductor.pattern_matcher import PatternMatcherPass
from torch._subclasses.fake_tensor import (FakeTensorMode,
unset_fake_temporarily)
from vllm.attention import Attention
from vllm.config import VllmConfig
from vllm.logger import init_logger
from vllm.platforms import current_platform
from .fusion import QUANT_OPS, GroupShape, QuantKey, empty_bf16, empty_fp32
from .vllm_inductor_pass import VllmInductorPass
logger = init_logger(__name__)
ATTN_OP = torch.ops.vllm.unified_attention_with_output.default
RESHAPE_OP = torch.ops.aten.reshape.default
class AttentionStaticQuantPattern:
def __init__(
self,
layer_name: str,
num_heads: int,
head_size: int,
quant_dtype: torch.dtype,
symmetric=True,
):
self.layer_name = layer_name
self.num_heads = num_heads
self.head_size = head_size
self.quant_dtype = quant_dtype
self.quant_key = QuantKey(dtype=quant_dtype,
static=True,
group_shape=GroupShape.PER_TENSOR,
symmetric=symmetric)
assert self.quant_key in QUANT_OPS, \
f"unsupported quantization scheme {self.quant_key}"
self.QUANT_OP = QUANT_OPS[self.quant_key]
def empty_quant(self, *args, **kwargs):
kwargs = {'dtype': self.quant_dtype, 'device': "cuda", **kwargs}
return torch.empty(*args, **kwargs)
def register_if_supported(self, pm_pass: PatternMatcherPass,
layer: Attention):
if layer.impl.fused_output_quant_supported(self.quant_dtype,
self.quant_key.static,
self.quant_key.group_shape):
self._register(pm_pass)
def _register(self, pm_pass: PatternMatcherPass):
def pattern(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
output_attn: torch.Tensor, output_quant: torch.Tensor,
scale: torch.Tensor):
view_7 = RESHAPE_OP(output_attn,
[-1, self.num_heads, self.head_size])
at1 = auto_functionalized(ATTN_OP,
query=q,
key=k,
value=v,
output=view_7,
layer_name=self.layer_name,
output_scale=None)
attn_out_view = RESHAPE_OP(at1[1],
[-1, self.num_heads * self.head_size])
at2 = auto_functionalized(self.QUANT_OP,
result=output_quant,
input=attn_out_view,
scale=scale)
return at2[1]
def replacement(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor,
output_attn: torch.Tensor, output_quant: torch.Tensor,
scale: torch.Tensor):
view_7 = RESHAPE_OP(output_quant,
[-1, self.num_heads, self.head_size])
at1 = auto_functionalized(ATTN_OP,
query=q,
key=k,
value=v,
output=view_7,
layer_name=self.layer_name,
output_scale=scale)
return RESHAPE_OP(at1[1], [-1, self.num_heads * self.head_size])
# Need custom fake mode, otherwise tracing happens with real tensors.
# That would not work for the unified_attention custom op.
with unset_fake_temporarily(), FakeTensorMode():
inputs = [
empty_bf16(5, self.num_heads, self.head_size), # q
empty_bf16(5, self.num_heads, self.head_size), # k
empty_bf16(5, self.num_heads, self.head_size), # v
empty_bf16(5, self.num_heads * self.head_size), # attn_output
self.empty_quant(5, self.num_heads *
self.head_size), # quant_output
empty_fp32(1, 1) # scale
]
def wrap_trace_fn(process_fx, trace_fn):
def wrapped(*args, **kwargs):
return process_fx(trace_fn(*args, **kwargs))
return wrapped
def fx_view_to_reshape(gm: torch.fx.GraphModule):
from torch._inductor.fx_passes.post_grad import view_to_reshape
view_to_reshape(gm)
return gm
pm.register_replacement(
pattern, replacement, inputs,
wrap_trace_fn(fx_view_to_reshape, pm.fwd_only), pm_pass)
class AttnFusionPass(VllmInductorPass):
"""
This pass fuses post-attention quantization onto attention if supported.
It uses the pattern matcher and matches each layer manually, as strings
cannot be wildcarded. This also lets us check support on attention layers
upon registration instead of during pattern matching.
Currently, only static fp8 quant is supported, but patterns could easily be
added for other quant schemes and dtypes. The bigger hurdle for wider
support are attention kernels, which need to support fusing output quant.
"""
def __init__(self, config: VllmConfig):
super().__init__(config)
self.static_fwd_ctx = config.compilation_config.static_forward_context
self.patterns = PatternMatcherPass(pass_name="attn_fusion_pass")
for key, layer in self.static_fwd_ctx.items():
pattern = AttentionStaticQuantPattern(key, layer.num_heads,
layer.head_size,
current_platform.fp8_dtype())
pattern.register_if_supported(self.patterns, layer)
if len(self.static_fwd_ctx) == 0:
logger.warning(
"Attention + quant fusion is enabled, but "
"CompilationConfig.static_forward_context is empty. "
"Cannot access attention layers so no fusion "
"patterns were registered.")
def __call__(self, graph: torch.fx.graph.Graph) -> None:
self.begin()
self.dump_graph(graph, "before_attn_fusion")
count = self.patterns.apply(graph)
logger.debug("Fused quantization onto %s attention nodes", count)
self.dump_graph(graph, "after_attn_fusion")
self.end_and_log()
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import operator import operator
from collections.abc import Iterable from collections.abc import Iterable, Iterator
from typing import Optional from typing import Optional
from torch import fx from torch import fx
...@@ -14,6 +14,10 @@ def is_func(node: fx.Node, target) -> bool: ...@@ -14,6 +14,10 @@ def is_func(node: fx.Node, target) -> bool:
return node.op == "call_function" and node.target == target return node.op == "call_function" and node.target == target
def is_auto_func(node: fx.Node, op: OpOverload) -> bool:
return is_func(node, auto_functionalized) and node.args[0] == op
# Returns the first specified node with the given op (if it exists) # Returns the first specified node with the given op (if it exists)
def find_specified_fn_maybe(nodes: Iterable[fx.Node], def find_specified_fn_maybe(nodes: Iterable[fx.Node],
op: OpOverload) -> Optional[fx.Node]: op: OpOverload) -> Optional[fx.Node]:
...@@ -60,3 +64,21 @@ def find_getitem(node: fx.Node, idx: int) -> fx.Node: ...@@ -60,3 +64,21 @@ def find_getitem(node: fx.Node, idx: int) -> fx.Node:
ret = find_getitem_maybe(node, idx) ret = find_getitem_maybe(node, idx)
assert ret is not None, f"Could not find getitem {idx} in node {node}" assert ret is not None, f"Could not find getitem {idx} in node {node}"
return ret return ret
# An auto-functionalization-aware utility for finding nodes with a specific op
def find_op_nodes(op: OpOverload, graph: fx.Graph) -> Iterator[fx.Node]:
if not op._schema.is_mutable:
yield from graph.find_nodes(op="call_function", target=op)
for n in graph.find_nodes(op="call_function", target=auto_functionalized):
if n.args[0] == op:
yield n
# Asserts that the node only has one user and returns it
# Even if a node has only 1 user, it might share storage with another node,
# which might need to be taken into account.
def get_only_user(node: fx.Node) -> fx.Node:
assert len(node.users) == 1
return next(iter(node.users))
...@@ -23,7 +23,23 @@ class NoOpEliminationPass(VllmInductorPass): ...@@ -23,7 +23,23 @@ class NoOpEliminationPass(VllmInductorPass):
in the 2D-case. Additionally, torch internal no-op elimination pass does in the 2D-case. Additionally, torch internal no-op elimination pass does
not handle certain slice variants. not handle certain slice variants.
Cases handled:
1. A chain of reshapes is equivalent to the last reshape called on the
base tensor (input of the first reshape).
2. A reshape that produces the shape of the input is redundant
3. A slice that produces the shape of the input is redundant
Example graph 1: Example graph 1:
mul_1: "f16[s0, 4096]" = ...
view_1: "f16[s0, 128, 32]" = torch.reshape(mul_1, [-1, 128, 32])
view_2: "f16[s0, 4096]" = torch.reshape(view_2, [-1, 4096])
view_3: "f16[s0, 128, 32]" = torch.reshape(view_3, [-1, 128, 32])
Can be replaced with:
mul_1: "f16[s0, 4096]" = ...
view_3: "f16[s0, 128, 32]" = ...
Example graph 2:
getitem_1: "f16[s0, 4096]" = ... getitem_1: "f16[s0, 4096]" = ...
view_1: "f16[s0, 4096]" = torch.reshape(getitem_1, [-1, 4096]) view_1: "f16[s0, 4096]" = torch.reshape(getitem_1, [-1, 4096])
at = auto_functionalized(static_scaled_fp8_quant, input = view_1, ...) at = auto_functionalized(static_scaled_fp8_quant, input = view_1, ...)
...@@ -34,7 +50,7 @@ class NoOpEliminationPass(VllmInductorPass): ...@@ -34,7 +50,7 @@ class NoOpEliminationPass(VllmInductorPass):
at = auto_functionalized(static_scaled_fp8_quant, input = getitem_1, ...) at = auto_functionalized(static_scaled_fp8_quant, input = getitem_1, ...)
out: "f8e4m3fn[s0, 4096]" = at[1] out: "f8e4m3fn[s0, 4096]" = at[1]
Example graph 2: Example graph 3:
arg0: "s0" = SymInt(s0) arg0: "s0" = SymInt(s0)
scaled_mm: "f16[s0, 4096]" = ... scaled_mm: "f16[s0, 4096]" = ...
slice_1: "f16[s0, 4096]" = torch.slice(scaled_mm, -1, 0, arg0) slice_1: "f16[s0, 4096]" = torch.slice(scaled_mm, -1, 0, arg0)
...@@ -58,6 +74,18 @@ class NoOpEliminationPass(VllmInductorPass): ...@@ -58,6 +74,18 @@ class NoOpEliminationPass(VllmInductorPass):
# Remove no-op reshapes/views: # Remove no-op reshapes/views:
for node in graph.nodes: for node in graph.nodes:
if is_func(node, torch.ops.aten.reshape.default): if is_func(node, torch.ops.aten.reshape.default):
# Case 1: rewrite reshape chains to reshapes on the base tensor
input = node.args[0]
# If the input is a reshape, rebind to that node
if is_func(input, torch.ops.aten.reshape.default):
# The new input is guaranteed not to be a reshape,
# because we process nodes in order
node.update_arg(0, input.args[0])
if len(input.users) == 0:
graph.erase_node(input)
count += 1
# Case 2: remove this reshape if it produces the original shape
input, shape = node.args[:2] input, shape = node.args[:2]
input_shape = input.meta["val"].shape input_shape = input.meta["val"].shape
if len(shape) != len(input_shape): if len(shape) != len(input_shape):
......
...@@ -10,6 +10,7 @@ from .activation_quant_fusion import ActivationQuantFusionPass ...@@ -10,6 +10,7 @@ from .activation_quant_fusion import ActivationQuantFusionPass
from .collective_fusion import AsyncTPPass from .collective_fusion import AsyncTPPass
from .fix_functionalization import FixFunctionalizationPass from .fix_functionalization import FixFunctionalizationPass
from .fusion import FusionPass from .fusion import FusionPass
from .fusion_attn import AttnFusionPass
from .inductor_pass import CustomGraphPass, InductorPass, get_pass_context from .inductor_pass import CustomGraphPass, InductorPass, get_pass_context
from .noop_elimination import NoOpEliminationPass from .noop_elimination import NoOpEliminationPass
from .sequence_parallelism import SequenceParallelismPass from .sequence_parallelism import SequenceParallelismPass
...@@ -59,6 +60,9 @@ class PostGradPassManager(CustomGraphPass): ...@@ -59,6 +60,9 @@ class PostGradPassManager(CustomGraphPass):
if self.pass_config.enable_async_tp: if self.pass_config.enable_async_tp:
self.passes += [AsyncTPPass(config)] self.passes += [AsyncTPPass(config)]
if self.pass_config.enable_attn_fusion:
self.passes += [AttnFusionPass(config)]
self.fix_functionalization = FixFunctionalizationPass(config) self.fix_functionalization = FixFunctionalizationPass(config)
def add(self, pass_: InductorPass): def add(self, pass_: InductorPass):
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import time import time
import torch import torch
from torch._dynamo.utils import lazy_format_graph_code
from vllm.config import PassConfig, VllmConfig from vllm.config import PassConfig, VllmConfig
# yapf: disable # yapf: disable
...@@ -34,6 +35,8 @@ class VllmInductorPass(InductorPass): ...@@ -34,6 +35,8 @@ class VllmInductorPass(InductorPass):
self.pass_name = self.__class__.__name__ self.pass_name = self.__class__.__name__
def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False): def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
lazy_format_graph_code(stage, graph.owning_module)
if stage in self.pass_config.dump_graph_stages or always: if stage in self.pass_config.dump_graph_stages or always:
# Make sure filename includes rank in the distributed setting # Make sure filename includes rank in the distributed setting
parallel = p_is_init() and get_tp_world_size() > 1 parallel = p_is_init() and get_tp_world_size() > 1
......
...@@ -3804,9 +3804,10 @@ class PassConfig: ...@@ -3804,9 +3804,10 @@ class PassConfig:
its own stages (before, after, maybe in-between).""" its own stages (before, after, maybe in-between)."""
dump_graph_dir: Path = Path(".") dump_graph_dir: Path = Path(".")
"""Directory to dump the graphs.""" """Directory to dump the graphs."""
# TODO(luka) better pass enabling system.
enable_fusion: bool = True enable_fusion: bool = True
"""Whether to enable the custom fusion pass.""" """Whether to enable the custom fusion (RMSNorm/SiluMul+quant) pass."""
enable_attn_fusion: bool = False
"""Whether to enable the custom attention+quant fusion pass."""
enable_noop: bool = True enable_noop: bool = True
"""Whether to enable the custom no-op elimination pass.""" """Whether to enable the custom no-op elimination pass."""
enable_sequence_parallelism: bool = False enable_sequence_parallelism: bool = False
...@@ -3814,6 +3815,8 @@ class PassConfig: ...@@ -3814,6 +3815,8 @@ class PassConfig:
enable_async_tp: bool = False enable_async_tp: bool = False
"""Whether to enable async TP.""" """Whether to enable async TP."""
# TODO(luka) better pass enabling system.
def uuid(self): def uuid(self):
""" """
Produces a hash unique to the pass configuration. Produces a hash unique to the pass configuration.
...@@ -3821,18 +3824,20 @@ class PassConfig: ...@@ -3821,18 +3824,20 @@ class PassConfig:
Do not include dump_graph_* in the hash - they don't affect Do not include dump_graph_* in the hash - they don't affect
compilation. compilation.
""" """
include = { exclude = {"dump_graph_stages", "dump_graph_dir"}
"enable_fusion", "enable_noop", "enable_sequence_parallelism", dict_ = {k: v for k, v in asdict(self).items() if k not in exclude}
"enable_async_tp"
}
dict_ = {k: v for k, v in asdict(self).items() if k in include}
return InductorPass.hash_dict(dict_) return InductorPass.hash_dict(dict_)
def __post_init__(self) -> None: def __post_init__(self) -> None:
if not self.enable_noop and self.enable_fusion: if not self.enable_noop:
logger.warning_once( if self.enable_fusion:
"Fusion enabled but reshape elimination disabled. " logger.warning_once(
"RMSNorm + quant (fp8) fusion might not work") "Fusion enabled but reshape elimination disabled. "
"RMSNorm/SiluMul + quant (fp8) fusion might not work")
if self.enable_attn_fusion:
logger.warning_once(
"Fusion enabled but reshape elimination disabled. "
"Attention + quant (fp8) fusion might not work")
@config @config
......
...@@ -15,7 +15,7 @@ if TYPE_CHECKING: ...@@ -15,7 +15,7 @@ if TYPE_CHECKING:
VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60 VLLM_RINGBUFFER_WARNING_INTERVAL: int = 60
VLLM_NCCL_SO_PATH: Optional[str] = None VLLM_NCCL_SO_PATH: Optional[str] = None
LD_LIBRARY_PATH: Optional[str] = None LD_LIBRARY_PATH: Optional[str] = None
VLLM_USE_TRITON_FLASH_ATTN: bool = False VLLM_USE_TRITON_FLASH_ATTN: bool = True
VLLM_V1_USE_PREFILL_DECODE_ATTENTION: bool = False VLLM_V1_USE_PREFILL_DECODE_ATTENTION: bool = False
VLLM_FLASH_ATTN_VERSION: Optional[int] = None VLLM_FLASH_ATTN_VERSION: Optional[int] = None
LOCAL_RANK: int = 0 LOCAL_RANK: int = 0
......
...@@ -569,6 +569,7 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -569,6 +569,7 @@ class FlashAttentionImpl(AttentionImpl):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: FlashAttentionMetadata, attn_metadata: FlashAttentionMetadata,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Forward pass with FlashAttention. """Forward pass with FlashAttention.
...@@ -586,6 +587,11 @@ class FlashAttentionImpl(AttentionImpl): ...@@ -586,6 +587,11 @@ class FlashAttentionImpl(AttentionImpl):
""" """
assert output is not None, "Output tensor must be provided." assert output is not None, "Output tensor must be provided."
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for FlashAttentionImpl")
if attn_metadata is None: if attn_metadata is None:
# Profiling run. # Profiling run.
return output return output
......
...@@ -547,6 +547,7 @@ class FlashInferImpl(AttentionImpl): ...@@ -547,6 +547,7 @@ class FlashInferImpl(AttentionImpl):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: FlashInferMetadata, attn_metadata: FlashInferMetadata,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Forward pass with FlashInfer. """Forward pass with FlashInfer.
...@@ -561,6 +562,11 @@ class FlashInferImpl(AttentionImpl): ...@@ -561,6 +562,11 @@ class FlashInferImpl(AttentionImpl):
""" """
assert output is not None, "Output tensor must be provided." assert output is not None, "Output tensor must be provided."
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for FlashInferImpl")
if attn_metadata is None: if attn_metadata is None:
# Profiling run. # Profiling run.
return output return output
......
...@@ -414,6 +414,7 @@ class FlexAttentionImpl(AttentionImpl): ...@@ -414,6 +414,7 @@ class FlexAttentionImpl(AttentionImpl):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: FlexAttentionMetadata, attn_metadata: FlexAttentionMetadata,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Forward pass with FLexAttention. """Forward pass with FLexAttention.
...@@ -427,6 +428,12 @@ class FlexAttentionImpl(AttentionImpl): ...@@ -427,6 +428,12 @@ class FlexAttentionImpl(AttentionImpl):
shape = [num_tokens, num_heads * head_size] shape = [num_tokens, num_heads * head_size]
""" """
assert output is not None, "Output tensor must be provided." assert output is not None, "Output tensor must be provided."
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for FlexAttentionImpl")
enable_gqa = self.num_kv_heads != self.num_heads enable_gqa = self.num_kv_heads != self.num_heads
if attn_metadata is None: if attn_metadata is None:
......
...@@ -865,10 +865,16 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]): ...@@ -865,10 +865,16 @@ class MLACommonImpl(MLAAttentionImpl[M], Generic[M]):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: M, attn_metadata: M,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
assert output is not None, "Output tensor must be provided." assert output is not None, "Output tensor must be provided."
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for MLACommonImpl")
if attn_metadata is None: if attn_metadata is None:
# The zero fill is required when used with DP + EP # The zero fill is required when used with DP + EP
# to ensure all ranks within a DP group compute the # to ensure all ranks within a DP group compute the
......
...@@ -161,6 +161,7 @@ class PallasAttentionBackendImpl(AttentionImpl): ...@@ -161,6 +161,7 @@ class PallasAttentionBackendImpl(AttentionImpl):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: PallasMetadata, attn_metadata: PallasMetadata,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Forward pass with Pallas attention. """Forward pass with Pallas attention.
...@@ -173,6 +174,11 @@ class PallasAttentionBackendImpl(AttentionImpl): ...@@ -173,6 +174,11 @@ class PallasAttentionBackendImpl(AttentionImpl):
Returns: Returns:
shape = [num_tokens, num_heads * head_size] shape = [num_tokens, num_heads * head_size]
""" """
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for PallasAttentionBackendImpl")
# For determine_available_memory case. # For determine_available_memory case.
if kv_cache.numel() == 0: if kv_cache.numel() == 0:
if output is None: if output is None:
......
...@@ -142,6 +142,7 @@ class TritonAttentionImpl(AttentionImpl): ...@@ -142,6 +142,7 @@ class TritonAttentionImpl(AttentionImpl):
kv_cache: torch.Tensor, kv_cache: torch.Tensor,
attn_metadata: FlashAttentionMetadata, attn_metadata: FlashAttentionMetadata,
output: Optional[torch.Tensor] = None, output: Optional[torch.Tensor] = None,
output_scale: Optional[torch.Tensor] = None,
) -> torch.Tensor: ) -> torch.Tensor:
"""Forward pass with FlashAttention. """Forward pass with FlashAttention.
...@@ -156,6 +157,11 @@ class TritonAttentionImpl(AttentionImpl): ...@@ -156,6 +157,11 @@ class TritonAttentionImpl(AttentionImpl):
""" """
assert output is not None, "Output tensor must be provided." assert output is not None, "Output tensor must be provided."
if output_scale is not None:
raise NotImplementedError(
"fused output quantization is not yet supported"
" for TritonAttentionImpl")
if attn_metadata is None: if attn_metadata is None:
# Profiling run. # Profiling run.
return output return output
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment