server.nix 2.16 KB
Newer Older
1
2
3
4
5
{
  nix-filter,
  buildPythonPackage,
  poetry-core,
  mypy-protobuf,
6
  awq-inference-engine,
7
  causal-conv1d,
Nicolas Patry's avatar
Nicolas Patry committed
8
  eetq,
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
  einops,
  exllamav2,
  fbgemm-gpu,
  flashinfer,
  flash-attn,
  flash-attn-layer-norm,
  flash-attn-rotary,
  grpc-interceptor,
  grpcio-reflection,
  grpcio-status,
  grpcio-tools,
  hf-transfer,
  loguru,
  mamba-ssm,
  marlin-kernels,
  opentelemetry-api,
  opentelemetry-exporter-otlp,
  opentelemetry-instrumentation-grpc,
  opentelemetry-semantic-conventions,
  peft,
  safetensors,
  tokenizers,
31
  torch,
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
  sentencepiece,
  transformers,
  typer,
  vllm,
}:

let
  filter = nix-filter.lib;
in
buildPythonPackage {
  name = "text-generation-server";

  src = filter {
    root = ../.;
    include = with filter; [
      isDirectory
      (and (inDirectory "server") (or_ (matchExt "py") (matchExt "pyi")))
      "server/pyproject.toml"
      (and (inDirectory "proto/v3") (matchExt "proto"))
    ];
  };

  pyproject = true;

  build-system = [ poetry-core ];

  nativeBuildInputs = [ mypy-protobuf ];

  pythonRelaxDeps = [
    "einops"
    "huggingface-hub"
    "loguru"
    "opentelemetry-instrumentation-grpc"
    "sentencepiece"
    "typer"
  ];

  pythonRemoveDeps = [ "scipy" ];

  dependencies = [
72
    awq-inference-engine
Nicolas Patry's avatar
Nicolas Patry committed
73
    eetq
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
    causal-conv1d
    einops
    exllamav2
    fbgemm-gpu
    flashinfer
    flash-attn
    flash-attn-layer-norm
    flash-attn-rotary
    grpc-interceptor
    grpcio-reflection
    grpcio-status
    grpcio-tools
    hf-transfer
    loguru
    mamba-ssm
    marlin-kernels
    opentelemetry-api
    opentelemetry-exporter-otlp
    opentelemetry-instrumentation-grpc
    opentelemetry-semantic-conventions
    peft
    safetensors
    sentencepiece
    tokenizers
    transformers
    typer
    vllm
  ];

  prePatch = ''
    python -m grpc_tools.protoc -Iproto/v3 --python_out=server/text_generation_server/pb \
           --grpc_python_out=server/text_generation_server/pb --mypy_out=server/text_generation_server/pb proto/v3/generate.proto
    find server/text_generation_server/pb/ -type f -name "*.py" -print0 -exec sed -i -e 's/^\(import.*pb2\)/from . \1/g' {} \;
    touch server/text_generation_server/pb/__init__.py
    cd server
  '';
}