"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "2da6fd722fe4b0fad758efd401690df47fca799c"
Unverified Commit f2d16207 authored by Andreas Karatzas's avatar Andreas Karatzas Committed by GitHub
Browse files

[ROCm][CI] Fix flaky GPTQ compile correctness test (#38161)


Signed-off-by: default avatarAndreas Karatzas <akaratza@amd.com>
parent 37a83007
...@@ -137,6 +137,7 @@ def test_compile_correctness( ...@@ -137,6 +137,7 @@ def test_compile_correctness(
all_args.append( all_args.append(
final_args + [f"-cc.mode={mode.name}", "-cc.backend=inductor"] final_args + [f"-cc.mode={mode.name}", "-cc.backend=inductor"]
) )
all_envs.append({})
# inductor will change the output, so we only compare if the output # inductor will change the output, so we only compare if the output
# is close, not exactly the same. # is close, not exactly the same.
...@@ -157,6 +158,5 @@ def test_compile_correctness( ...@@ -157,6 +158,5 @@ def test_compile_correctness(
]: ]:
all_args.append(final_args + [f"-cc.mode={mode.name}", "-cc.backend=eager"]) all_args.append(final_args + [f"-cc.mode={mode.name}", "-cc.backend=eager"])
all_envs.append({}) all_envs.append({})
all_envs.append({})
compare_all_settings(model, all_args * 3, all_envs, method=method) compare_all_settings(model, all_args, all_envs, method=method)
...@@ -1348,40 +1348,47 @@ def initialize_single_dummy_weight( ...@@ -1348,40 +1348,47 @@ def initialize_single_dummy_weight(
high: float = 1e-3, high: float = 1e-3,
seed: int = 1234, seed: int = 1234,
) -> None: ) -> None:
if torch.is_floating_point(param): if not torch.is_floating_point(param):
if current_platform.is_tpu(): if current_platform.is_rocm():
generator = torch.Generator(device="cpu") # On ROCm, integer params (e.g. GPTQ qweight/qzeros) are left
generator.manual_seed(seed) # as torch.empty() by default, giving non-deterministic values
# Note: The param.uniform_ function cannot be used in this # across processes. Zero them for reproducibility.
# context because it demands more TPU HBM than directly copying param.zero_()
# from a CPU tensor. return
# Note: We avoid using torch.rank_like as it doesn't currently
# support the generator argument.
param.copy_(
(high - low)
* torch.rand(
param.shape,
generator=generator,
dtype=param.dtype,
layout=param.layout,
requires_grad=param.requires_grad,
device="cpu",
)
+ low
)
torch._sync(param)
return
generator = torch.Generator(device=param.data.device) if current_platform.is_tpu():
generator = torch.Generator(device="cpu")
generator.manual_seed(seed) generator.manual_seed(seed)
if torch.finfo(param.data.dtype).bits < 16: # Note: The param.uniform_ function cannot be used in this
# uniform_ doesn't support < 16-bit datatypes (FP8) # context because it demands more TPU HBM than directly copying
dtype = param.data.dtype # from a CPU tensor.
tmp_param = param.data.to(torch.float16) # Note: We avoid using torch.rank_like as it doesn't currently
tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype) # support the generator argument.
param.data.copy_(tmp_param) param.copy_(
else: (high - low)
param.uniform_(low, high, generator=generator) * torch.rand(
param.shape,
generator=generator,
dtype=param.dtype,
layout=param.layout,
requires_grad=param.requires_grad,
device="cpu",
)
+ low
)
torch._sync(param)
return
generator = torch.Generator(device=param.data.device)
generator.manual_seed(seed)
if torch.finfo(param.data.dtype).bits < 16:
# uniform_ doesn't support < 16-bit datatypes (FP8)
dtype = param.data.dtype
tmp_param = param.data.to(torch.float16)
tmp_param = tmp_param.uniform_(low, high, generator=generator).to(dtype)
param.data.copy_(tmp_param)
else:
param.uniform_(low, high, generator=generator)
def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None: def maybe_remap_kv_scale_name(name: str, params_dict: dict) -> str | None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment