pyproject.toml 4.26 KB
Newer Older
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
1
[tool.poetry]
2
name = "text-generation-server"
Nicolas Patry's avatar
Nicolas Patry committed
3
version = "2.0.5-dev0"
4
description = "Text Generation Inference Python gRPC Server"
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
5
6
authors = ["Olivier Dehaene <olivier@huggingface.co>"]

Olivier Dehaene's avatar
Olivier Dehaene committed
7
[tool.poetry.scripts]
8
text-generation-server = 'text_generation_server.cli:app'
Olivier Dehaene's avatar
Olivier Dehaene committed
9

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
10
[tool.poetry.dependencies]
Nicolas Patry's avatar
Nicolas Patry committed
11
python = ">=3.9,<3.13"
12
protobuf = ">=4.25.3,<6"
13
14
15
grpcio = "^1.51.1"
grpcio-status = "^1.51.1"
grpcio-reflection = "^1.51.1"
16
17
18
grpc-interceptor = "^0.15.4"
typer = "^0.12.5"
accelerate = {version = "^1.1.0", optional = true}
19
bitsandbytes = { version = "^0.43.0", optional = true }
20
21
22
23
24
safetensors = "^0.4.5"
loguru = "^0.7.2"
opentelemetry-api = "^1.27.0"
opentelemetry-exporter-otlp = "^1.27.0"
opentelemetry-instrumentation-grpc = "^0.48b0"
25
hf-transfer = "^0.1.2"
26
27
sentencepiece = "^0.2.0"
tokenizers = "^0.20.3"
drbh's avatar
drbh committed
28
huggingface-hub = "^0.23"
29
30
transformers = "^4.46.2"
einops = "^0.8.0"
31
texttable = { version = "^1.6.7", optional = true }
32
33
34
35
36
37
datasets = {version = "^2.21.0", optional = true}
peft = {version = "^0.13.2", optional = true}
torch = {version = "^2.4.1", optional = true}
scipy = "^1.13.1"
pillow = "^11.0.0"
outlines= {version = "^0.1.3", optional = true}
38
prometheus-client = ">=0.20.0,<0.22"
Nicolas Patry's avatar
Nicolas Patry committed
39
py-cpuinfo = "^9.0.0"
40
compressed-tensors = {version = "^0.7.1", optional = true}
Daniël de Kok's avatar
Daniël de Kok committed
41
# Remove later, temporary workaround for outlines.
42
numpy = "^1.26.4"
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
43

44
45
46
47
48
49
attention-kernels = [
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/attention-kernels/releases/download/v0.1.1/attention_kernels-0.1.1+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
]
50
marlin-kernels = [
51
52
53
54
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/marlin-kernels/releases/download/v0.3.6/marlin_kernels-0.3.6+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
55
]
56
moe-kernels = [
57
58
59
60
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp39-cp39-linux_x86_64.whl", python = "~3.9", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp310-cp310-linux_x86_64.whl", python = "~3.10", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp311-cp311-linux_x86_64.whl", python = "~3.11", optional = true },
  { url = "https://github.com/danieldk/moe-kernels/releases/download/v0.7.0/moe_kernels-0.7.0+cu123torch2.4-cp312-cp312-linux_x86_64.whl", python = "~3.12", optional = true },
61
]
62
rich = "^13.8.1"
63

64
[tool.poetry.extras]
65
torch = ["torch"]
66
accelerate = ["accelerate"]
67
attention = ["attention-kernels"]
68
bnb = ["bitsandbytes"]
69
compressed-tensors = ["compressed-tensors"]
70
marlin = ["marlin-kernels"]
71
moe = ["moe-kernels"]
72
peft = ["peft"]
73
quantize = ["texttable", "datasets", "accelerate"]
OlivierDehaene's avatar
OlivierDehaene committed
74
outlines = ["outlines"]
75

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
76
[tool.poetry.group.dev.dependencies]
77
grpcio-tools = "^1.51.1"
78
pytest = "^7.3.0"
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
79

80
81
82

[[tool.poetry.source]]
name = "pytorch-gpu-src"
83
url = "https://download.pytorch.org/whl/cu121"
84
85
priority = "explicit"

86
87
88
[tool.pytest.ini_options]
markers = ["private: marks tests as requiring an admin hf token (deselect with '-m \"not private\"')"]

Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
89
[build-system]
90
91
92
requires = [
    "poetry-core>=1.0.0",
]
Olivier Dehaene's avatar
Init  
Olivier Dehaene committed
93
build-backend = "poetry.core.masonry.api"
94
95
96

[tool.isort]
profile = "black"