_toctree.yml 2.32 KB
Newer Older
jixx's avatar
init  
jixx committed
1
2
3
4
5
- sections:
  - local: index
    title: Text Generation Inference
  - local: quicktour
    title: Quick Tour
jixx's avatar
jixx committed
6
7
  - local: supported_models
    title: Supported Models
jixx's avatar
init  
jixx committed
8
9
10
11
12
13
14
15
  - local: installation_nvidia
    title: Using TGI with Nvidia GPUs
  - local: installation_amd
    title: Using TGI with AMD GPUs
  - local: installation_gaudi
    title: Using TGI with Intel Gaudi
  - local: installation_inferentia
    title: Using TGI with AWS Inferentia
jixx's avatar
jixx committed
16
17
  - local: installation_intel
    title: Using TGI with Intel GPUs
jixx's avatar
init  
jixx committed
18
19
  - local: installation
    title: Installation from source
jixx's avatar
jixx committed
20

jixx's avatar
init  
jixx committed
21
22
  - local: architecture
    title: Internal Architecture
jixx's avatar
jixx committed
23
24
  - local: usage_statistics
    title: Usage Statistics
jixx's avatar
init  
jixx committed
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
  title: Getting started
- sections:
  - local: basic_tutorials/consuming_tgi
    title: Consuming TGI
  - local: basic_tutorials/preparing_model
    title: Preparing Model for Serving
  - local: basic_tutorials/gated_model_access
    title: Serving Private & Gated Models
  - local: basic_tutorials/using_cli
    title: Using TGI CLI
  - local: basic_tutorials/non_core_models
    title: Non-core Model Serving
  - local: basic_tutorials/safety
    title: Safety
  - local: basic_tutorials/using_guidance
    title: Using Guidance, JSON, tools
  - local: basic_tutorials/visual_language_models
    title: Visual Language Models
  - local: basic_tutorials/monitoring
    title: Monitoring TGI with Prometheus and Grafana
  - local: basic_tutorials/train_medusa
    title: Train Medusa
  title: Tutorials
jixx's avatar
jixx committed
48
49
50
51
52
53
54
55
- sections:
  - local: reference/launcher
    title: All TGI CLI options
  - local: reference/metrics
    title: Exported Metrics
  - local: reference/api_reference
    title: API Reference
  title: Reference
jixx's avatar
init  
jixx committed
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
- sections:
  - local: conceptual/streaming
    title: Streaming
  - local: conceptual/quantization
    title: Quantization
  - local: conceptual/tensor_parallelism
    title: Tensor Parallelism
  - local: conceptual/paged_attention
    title: PagedAttention
  - local: conceptual/safetensors
    title: Safetensors
  - local: conceptual/flash_attention
    title: Flash Attention
  - local: conceptual/speculation
    title: Speculation (Medusa, ngram)
  - local: conceptual/guidance
jixx's avatar
jixx committed
72
    title: How Guidance Works (via outlines)
jixx's avatar
init  
jixx committed
73
74
  - local: conceptual/lora
    title: LoRA (Low-Rank Adaptation)
jixx's avatar
jixx committed
75
76
  - local: conceptual/external
    title: External Resources
jixx's avatar
init  
jixx committed
77
78
79


  title: Conceptual Guides