Unverified Commit 677ff400 authored by Matthew Douglas's avatar Matthew Douglas Committed by GitHub
Browse files

Drop Python 3.8 support. (#1574)

* Drop Python 3.8 support.

* Formatting
parent 9b339952
......@@ -111,7 +111,7 @@ jobs:
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
# The specific Python version is irrelevant in this context as we are only packaging non-C extension
# code. This ensures compatibility across Python versions, including Python 3.8, as compatibility is
# code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is
# dictated by the packaged code itself, not the Python version used for packaging.
python-version: ["3.10"]
arch: [x86_64, aarch64]
......
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.6.9
rev: v0.11.2
hooks:
- id: ruff
args:
......
......@@ -65,4 +65,4 @@ for i in range(5):
print("=" * 40)
print(f"Example:\n{tokenizer.decode(generated_ids[0])}")
print("=" * 40)
print(f"Speed: {num/(time.time() - time_1)}token/s")
print(f"Speed: {num / (time.time() - time_1)}token/s")
......@@ -66,7 +66,7 @@ def test_bench_matmul(batch, seq, model, hidden):
torch.matmul(A, B.t())
torch.cuda.synchronize()
print(
f"pytorch fp16: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s",
f"pytorch fp16: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s",
)
# torch.cuda.synchronize()
......@@ -88,14 +88,16 @@ def test_bench_matmul(batch, seq, model, hidden):
for i in range(iters):
bnb.matmul_4bit(A, B_nf4.t(), quant_state=state_nf4)
torch.cuda.synchronize()
print(f"bnb nf4: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s")
print(f"bnb nf4: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s")
torch.cuda.synchronize()
t0 = time.time()
for i in range(iters):
bnb.matmul_4bit(A, B_nf4_c.t(), quant_state=state_nf4_c)
torch.cuda.synchronize()
print(f"bnb nf4+DQ: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s")
print(
f"bnb nf4+DQ: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
torch.cuda.synchronize()
t0 = time.time()
......@@ -103,7 +105,7 @@ def test_bench_matmul(batch, seq, model, hidden):
bnb.matmul(A, B)
torch.cuda.synchronize()
print(
f"B -> CB (each iteration): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s"
f"B -> CB (each iteration): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
torch.cuda.synchronize()
......@@ -112,7 +114,7 @@ def test_bench_matmul(batch, seq, model, hidden):
bnb.matmul(A, B, threshold=6.0)
torch.cuda.synchronize()
print(
f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s"
f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
CA, SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0)
......@@ -124,7 +126,7 @@ def test_bench_matmul(batch, seq, model, hidden):
out32 = F.int8_linear_matmul(CA, CB)
torch.cuda.synchronize()
print(
f"no overhead int8 [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s"
f"no overhead int8 [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
# C32A, SA = F.transform(CA, "col32")
......@@ -183,7 +185,7 @@ def test_bench_matmul(batch, seq, model, hidden):
linear8bit(A)
torch.cuda.synchronize()
print(
f"bnb linear8bitlt (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s"
f"bnb linear8bitlt (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
linearMixedBit(A)
......@@ -193,7 +195,7 @@ def test_bench_matmul(batch, seq, model, hidden):
linearMixedBit(A)
torch.cuda.synchronize()
print(
f"bnb linear8bitlt with threshold (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s"
f"bnb linear8bitlt with threshold (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s"
)
# linear8bit_train(A)
......
from collections.abc import Sequence
from math import prod
from typing import Optional, Sequence, Tuple
from typing import Optional
import torch
......@@ -131,7 +132,7 @@ torch.library.define(
def _(
A: torch.Tensor,
threshold=0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
out_row = torch.empty_like(A, dtype=torch.int8)
out_col = torch.empty_like(A, dtype=torch.int8)
row_stats = torch.empty(prod(A.shape[:-1]), device=A.device, dtype=torch.float32)
......@@ -191,7 +192,7 @@ torch.library.define(
@register_fake("bitsandbytes::quantize_4bit")
def _(
A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
torch._check_is_size(blocksize)
n = A.numel()
......@@ -235,7 +236,7 @@ torch.library.define("bitsandbytes::quantize_blockwise", "(Tensor A, Tensor code
@register_fake("bitsandbytes::quantize_blockwise")
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]:
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]:
torch._check_is_size(blocksize)
n = A.numel()
blocks = -(n // -blocksize)
......
from dataclasses import dataclass
from math import prod
from typing import Callable, Optional, Tuple
from typing import Callable, Optional
import warnings
from warnings import warn
......@@ -55,7 +55,7 @@ class GlobalOutlierPooler:
)
def get_inverse_transform_indices(
transform_tile: Callable[[torch.Tensor], torch.Tensor],
tile_size: Tuple[int, int],
tile_size: tuple[int, int],
):
"""
Compute a permutation of indices that invert the specified (tiled) matrix transformation
......
import ctypes as ct
from typing import Optional, Tuple
from typing import Optional
import torch
......@@ -47,7 +47,7 @@ def _(
@register_kernel("bitsandbytes::quantize_blockwise", "cpu")
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]:
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]:
torch._check_is_size(blocksize)
torch._check(A.dtype == torch.float32, lambda: f"A must be float32 on cpu, got {A.dtype}")
......@@ -116,7 +116,7 @@ _NF4_QUANT_TABLE = torch.tensor(
@register_kernel("bitsandbytes::quantize_4bit", "cpu")
def _(
A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
torch._check_is_size(blocksize)
torch._check(quant_type == "nf4", lambda: f"quant_type must be nf4 on CPU, got {quant_type}")
......
from collections.abc import Sequence
import ctypes as ct
from math import prod
from typing import Optional, Sequence, Tuple
from typing import Optional
import torch
......@@ -78,10 +79,7 @@ def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor
raise NotImplementedError("int8_linear_matmul not implemented!")
else:
raise RuntimeError(
f"cublasLt ran into an error!\n"
f"\t{shapeA=}, {shapeB=}, {shapeC=}\n"
f"\t{(lda, ldb, ldc)=}\n"
f"\t{(m, n, k)=}"
f"cublasLt ran into an error!\n\t{shapeA=}, {shapeB=}, {shapeC=}\n\t{(lda, ldb, ldc)=}\n\t{(m, n, k)=}"
)
return out
......@@ -169,7 +167,7 @@ def _(A: torch.Tensor, threshold=0.0):
def _(
A: torch.Tensor,
threshold=0.0,
) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
# Use CUDA kernel for rowwise and COO tensor
quant_row, row_stats, outlier_cols = torch.ops.bitsandbytes.int8_vectorwise_quant.default(
A,
......@@ -188,7 +186,7 @@ def _(
def _get_col_absmax(
A: torch.Tensor,
threshold=0.0,
) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, Optional[torch.Tensor]]:
torch._check(A.is_floating_point())
outlier_mask = None
......@@ -207,7 +205,7 @@ def _get_col_absmax(
@register_kernel("bitsandbytes::quantize_blockwise", "cuda")
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]:
def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]:
torch._check_is_size(blocksize)
torch._check(blocksize in [4096, 2048, 1024, 512, 256, 128, 64])
torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}")
......@@ -292,7 +290,7 @@ def _dequantize_blockwise_impl(
@register_kernel("bitsandbytes::quantize_4bit", "cuda")
def _(
A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype
) -> Tuple[torch.Tensor, torch.Tensor]:
) -> tuple[torch.Tensor, torch.Tensor]:
torch._check(blocksize in [4096, 2048, 1024, 512, 256, 128, 64])
torch._check(quant_type in ["fp4", "nf4"])
torch._check(
......
import dataclasses
from functools import lru_cache
from typing import List, Optional, Tuple
from typing import Optional
import torch
@dataclasses.dataclass(frozen=True)
class CUDASpecs:
highest_compute_capability: Tuple[int, int]
highest_compute_capability: tuple[int, int]
cuda_version_string: str
cuda_version_tuple: Tuple[int, int]
cuda_version_tuple: tuple[int, int]
@property
def has_imma(self) -> bool:
return torch.version.hip or self.highest_compute_capability >= (7, 5)
def get_compute_capabilities() -> List[Tuple[int, int]]:
def get_compute_capabilities() -> list[tuple[int, int]]:
return sorted(torch.cuda.get_device_capability(torch.cuda.device(i)) for i in range(torch.cuda.device_count()))
@lru_cache(None)
def get_cuda_version_tuple() -> Tuple[int, int]:
def get_cuda_version_tuple() -> tuple[int, int]:
if torch.version.cuda:
return map(int, torch.version.cuda.split(".")[0:2])
elif torch.version.hip:
......
from collections.abc import Iterable, Iterator
import logging
import os
from pathlib import Path
from typing import Dict, Iterable, Iterator
import torch
......@@ -76,7 +76,7 @@ def is_relevant_candidate_env_var(env_var: str, value: str) -> bool:
)
def get_potentially_lib_path_containing_env_vars() -> Dict[str, str]:
def get_potentially_lib_path_containing_env_vars() -> dict[str, str]:
return {env_var: value for env_var, value in os.environ.items() if is_relevant_candidate_env_var(env_var, value)}
......
......@@ -2,10 +2,11 @@
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from collections.abc import Iterable
import ctypes as ct
import itertools
from math import prod
from typing import Any, Dict, Iterable, Optional, Tuple, Union
from typing import Any, Optional, Union
import numpy as np
import torch
......@@ -619,7 +620,7 @@ class QuantState:
return list_repr[idx]
@classmethod
def from_dict(cls, qs_dict: Dict[str, Any], device: torch.device) -> "QuantState":
def from_dict(cls, qs_dict: dict[str, Any], device: torch.device) -> "QuantState":
"""
unpacks components of state_dict into QuantState
where necessary, convert into strings, torch.dtype, ints, etc.
......@@ -741,7 +742,7 @@ def quantize_blockwise(
out: Optional[torch.Tensor] = None,
blocksize=4096,
nested=False,
) -> Tuple[torch.Tensor, QuantState]:
) -> tuple[torch.Tensor, QuantState]:
"""Quantize a tensor in blocks of values.
The input tensor is quantized by dividing it into blocks of `blocksize` values.
......@@ -994,7 +995,7 @@ def quantize_4bit(
compress_statistics=False,
quant_type="fp4",
quant_storage=torch.uint8,
) -> Tuple[torch.Tensor, QuantState]:
) -> tuple[torch.Tensor, QuantState]:
"""Quantize tensor A in blocks of 4-bit values.
Quantizes tensor A by dividing it into blocks which are independently quantized.
......@@ -1161,7 +1162,7 @@ def quantize(
A: Tensor,
code: Optional[torch.Tensor] = None,
out: Optional[torch.Tensor] = None,
) -> Tuple[Tensor, Tuple[Tensor, Tensor]]:
) -> tuple[Tensor, tuple[Tensor, Tensor]]:
if code is None:
if "dynamic" not in name2qmap:
name2qmap["dynamic"] = create_dynamic_map().to(A.device)
......@@ -1179,7 +1180,7 @@ def quantize(
@deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning)
def dequantize(
A: Tensor,
state: Optional[Tuple[Tensor, Tensor]] = None,
state: Optional[tuple[Tensor, Tensor]] = None,
absmax: Optional[torch.Tensor] = None,
code: Optional[torch.Tensor] = None,
out: Optional[torch.Tensor] = None,
......@@ -2006,7 +2007,7 @@ def get_colrow_absmax(
col_stats: Optional[torch.Tensor] = None,
nnz_block_ptr: Optional[torch.Tensor] = None,
threshold=0.0,
) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]:
""" "Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm.
The row-wise and column-wise absmax values are determined.
......@@ -2268,9 +2269,9 @@ def spmm_coo(
out: Optional[torch.Tensor] = None,
):
if not isinstance(cooA, COOSparseTensor):
assert (
cooA.is_sparse and cooA.layout == torch.sparse_coo
), "Tensor must be `COOSparseTensor or a PyTorch COO tensor."
assert cooA.is_sparse and cooA.layout == torch.sparse_coo, (
"Tensor must be `COOSparseTensor or a PyTorch COO tensor."
)
# Convert to custom COOSparseTensor
cooA = COOSparseTensor(
......
......@@ -3,7 +3,7 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import copy
from typing import Any, Dict, Optional, TypeVar, Union, overload
from typing import Any, Optional, TypeVar, Union, overload
import warnings
import torch
......@@ -268,7 +268,7 @@ class Params4bit(torch.nn.Parameter):
def from_prequantized(
cls,
data: torch.Tensor,
quantized_stats: Dict[str, Any],
quantized_stats: dict[str, Any],
requires_grad: bool = False,
device="cuda",
module: Optional["Linear4bit"] = None,
......
from collections.abc import Iterable
import math
from typing import Iterable, Literal, Optional, Tuple
from typing import Literal, Optional
import torch
......@@ -16,7 +17,7 @@ class _ReferenceAdEMAMix(torch.optim.Optimizer):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
eps: float = 1e-8,
weight_decay: float = 1e-2, # default 0.0 or 1e-2?
......@@ -108,7 +109,7 @@ class AdEMAMix(Optimizer2State):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......@@ -151,7 +152,7 @@ class AdEMAMix(Optimizer2State):
elif config["optim_bits"] == 8:
dtype = torch.uint8
else:
raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}')
raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}")
if p.numel() < config["min_8bit_size"]:
dtype = torch.float32
......@@ -274,7 +275,7 @@ class AdEMAMix8bit(AdEMAMix):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......@@ -303,7 +304,7 @@ class PagedAdEMAMix8bit(AdEMAMix8bit):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......@@ -330,7 +331,7 @@ class PagedAdEMAMix(AdEMAMix):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......@@ -359,7 +360,7 @@ class AdEMAMix32bit(Optimizer2State):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......@@ -392,7 +393,7 @@ class PagedAdEMAMix32bit(AdEMAMix32bit):
self,
params: Iterable[torch.nn.Parameter],
lr: float = 1e-3,
betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999),
betas: tuple[float, float, float] = (0.9, 0.999, 0.9999),
alpha: float = 5.0,
t_alpha: Optional[int] = None,
t_beta3: Optional[int] = None,
......
......@@ -450,7 +450,7 @@ class Optimizer2State(Optimizer8bit):
elif config["optim_bits"] == 8:
dtype = torch.uint8
else:
raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}')
raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}")
if p.numel() < config["min_8bit_size"]:
dtype = torch.float32
......@@ -677,7 +677,7 @@ class Optimizer1State(Optimizer8bit):
elif config["optim_bits"] == 8:
dtype = torch.uint8
else:
raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}')
raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}")
if p.numel() < config["min_8bit_size"]:
dtype = torch.float32
......
......@@ -128,7 +128,7 @@ def estimate_matmul_time(
print(
f"Total time: {total_time_ms}ms, compute time: {compute_ms}ms, "
f"loading time: {load_ms}ms, store time: {store_ms}ms, "
f"Activate CTAs: {active_cta_ratio*100}%"
f"Activate CTAs: {active_cta_ratio * 100}%"
)
return total_time_ms
......
import json
import shlex
import subprocess
from typing import Tuple
import torch
......@@ -104,7 +103,7 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False)
return idx
def execute_and_return(command_string: str) -> Tuple[str, str]:
def execute_and_return(command_string: str) -> tuple[str, str]:
def _decode(subprocess_err_out_tuple):
return tuple(to_decode.decode("UTF-8").strip() for to_decode in subprocess_err_out_tuple)
......
......@@ -8,7 +8,7 @@ text = "Hamburg is in which country?\n"
tokenizer = LlamaTokenizer.from_pretrained(model_name)
input_ids = tokenizer(text, return_tensors="pt").input_ids
max_memory = f"{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB"
max_memory = f"{int(torch.cuda.mem_get_info()[0] / 1024**3) - 2}GB"
n_gpus = torch.cuda.device_count()
max_memory = {i: max_memory for i in range(n_gpus)}
......
......@@ -11,7 +11,7 @@ maintainers = [
{name="Titus von Köller", email="titus@huggingface.co"},
{name="Matthew Douglas", email="matthew.douglas@huggingface.co"}
]
requires-python = ">=3.8"
requires-python = ">=3.9"
readme = "README.md"
license = {file="LICENSE"}
keywords = [
......@@ -34,11 +34,11 @@ classifiers = [
"Operating System :: Microsoft :: Windows",
"Programming Language :: C++",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
"Topic :: Scientific/Engineering :: Artificial Intelligence"
]
dependencies = [
......@@ -58,7 +58,7 @@ docs = ["hf-doc-builder==0.5.0"]
dev = [
"bitsandbytes[test]",
"build>=1.0.0,<2",
"ruff==0.9.6",
"ruff==0.11.2",
"pre-commit>=3.5.0,<4",
"wheel>=0.42,<1"
]
......@@ -66,7 +66,6 @@ test = [
"einops~=0.8.0",
"lion-pytorch==0.2.3",
"pytest~=8.3",
"scipy>=1.10.1,<2; python_version < '3.9'",
"scipy>=1.11.4,<2; python_version >= '3.9'",
"transformers>=4.30.1,<5"
]
......@@ -101,7 +100,7 @@ src = [
"tests",
"benchmarking"
]
target-version = "py38"
target-version = "py39"
line-length = 119
[tool.ruff.lint]
......@@ -124,6 +123,7 @@ ignore = [
"E731", # Do not use lambda
"F841", # Local assigned but not used (TODO: enable, these are likely bugs)
"RUF012", # Mutable class attribute annotations
"RUF034", # Useless if-else (TODO: enable)
"ISC001", # single-line-implicit-string-concatenation incompatible with formatter
]
......
from io import BytesIO
from itertools import product
import random
from typing import Any, List
from typing import Any
import torch
......@@ -27,7 +27,7 @@ def torch_load_from_buffer(buffer):
return obj
def get_test_dims(min: int, max: int, *, n: int) -> List[int]:
def get_test_dims(min: int, max: int, *, n: int) -> list[int]:
return [test_dims_rng.randint(min, max) for _ in range(n)]
......
......@@ -674,12 +674,12 @@ class TestLLMInt8Functional:
min_error = 1 / 500
if num_not_close_cols > (min_error * n):
print(
f"Min error exceeded {num_not_close_cols} elements are different. Error: {num_not_close_cols/n:.4f}"
f"Min error exceeded {num_not_close_cols} elements are different. Error: {num_not_close_cols / n:.4f}"
)
assert False
if num_not_close_rows > (min_error * n):
print(
f"Min error exceeded {num_not_close_rows} elements are different. Error: {num_not_close_rows/n:.4f}"
f"Min error exceeded {num_not_close_rows} elements are different. Error: {num_not_close_rows / n:.4f}"
)
assert False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment