refactor: move libs to lib dir

Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>

refactor: move libs to lib dir
Signed-off-by: Neelay Shah <neelays@nvidia.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
08fcd7e9 · Neelay Shah · GitHub · 0bfd9a76 · 08fcd7e9 · 08fcd7e9
Commit 08fcd7e9 authored Feb 24, 2025 by Neelay Shah Committed by GitHub Feb 24, 2025
20 changed files
--- a/llm/rust/triton-llm/tests/data/sample-models/TinyLlama_v1.1/tokenizer_config.json
+++ b/llm/rust/triton-llm/tests/data/sample-models/TinyLlama_v1.1/tokenizer_config.json
--- a/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/config.json
+++ b/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/config.json
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": 128009,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.0.dev0",
+  "use_cache": true,
+  "vocab_size": 128256
+}
--- a/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/generation_config.json
+++ b/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/generation_config.json
+{
+  "bos_token_id": 128000,
+  "eos_token_id": [128001, 128009],
+  "do_sample": true,
+  "temperature": 0.6,
+  "max_length": 4096,
+  "top_p": 0.9,
+  "transformers_version": "4.40.0.dev0"
+}
--- a/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/tokenizer.json
+++ b/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/tokenizer.json
+{
+  "version": "1.0",
+  "truncation": null,
+  "padding": null,
+  "added_tokens": [
+    {
+      "id": 128000,
+      "content": "<|begin_of_text|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128001,
+      "content": "<|end_of_text|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128002,
+      "content": "<|reserved_special_token_0|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128003,
+      "content": "<|reserved_special_token_1|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128004,
+      "content": "<|reserved_special_token_2|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128005,
+      "content": "<|reserved_special_token_3|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128006,
+      "content": "<|start_header_id|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128007,
+      "content": "<|end_header_id|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128008,
+      "content": "<|reserved_special_token_4|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128009,
+      "content": "<|eot_id|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 128010,
+      "content": "<|reserved_special_token_5|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    }
+  ],
+  "normalizer": null,
+  "pre_tokenizer": {
+    "type": "Sequence",
+    "pretokenizers": [
+      {
+        "type": "Split",
+        "pattern": {
+          "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
+        },
+        "behavior": "Isolated",
+        "invert": false
+      },
+      {
+        "type": "ByteLevel",
+        "add_prefix_space": false,
+        "trim_offsets": true,
+        "use_regex": false
+      }
+    ]
+  },
+  "post_processor": {
+    "type": "Sequence",
+    "processors": [
+      {
+        "type": "ByteLevel",
+        "add_prefix_space": true,
+        "trim_offsets": false,
+        "use_regex": true
+      },
+      {
+        "type": "TemplateProcessing",
+        "single": [
+          {
+            "SpecialToken": {
+              "id": "<|begin_of_text|>",
+              "type_id": 0
+            }
+          },
+          {
+            "Sequence": {
+              "id": "A",
+              "type_id": 0
+            }
+          }
+        ],
+        "pair": [
+          {
+            "SpecialToken": {
+              "id": "<|begin_of_text|>",
+              "type_id": 0
+            }
+          },
+          {
+            "Sequence": {
+              "id": "A",
+              "type_id": 0
+            }
+          },
+          {
+            "SpecialToken": {
+              "id": "<|begin_of_text|>",
+              "type_id": 1
+            }
+          },
+          {
+            "Sequence": {
+              "id": "B",
+              "type_id": 1
+            }
+          }
+        ],
+        "special_tokens": {
+          "<|begin_of_text|>": {
+            "id": "<|begin_of_text|>",
+            "ids": [
+              128000
+            ],
+            "tokens": [
+              "<|begin_of_text|>"
+            ]
+          }
+        }
+      }
+    ]
+  },
+  "decoder": {
+    "type": "ByteLevel",
+    "add_prefix_space": true,
+    "trim_offsets": true,
+    "use_regex": true
+  },
+  "model": {
+    "type": "BPE",
+    "dropout": null,
+    "unk_token": null,
+    "continuing_subword_prefix": null,
+    "end_of_word_suffix": null,
+    "fuse_unk": false,
+    "byte_fallback": false,
+    "ignore_merges": true,
+    "vocab": {},
+    "merges": []
+  }
+}
\ No newline at end of file
--- a/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/tokenizer_config.json
+++ b/lib/llm/tests/data/sample-models/mock-llama-3.1-8b-instruct/tokenizer_config.json
+{
+  "added_tokens_decoder": {
+    "128000": {
+      "content": "<|begin_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "<|start_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "<|end_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "<|eot_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim %}{% if loop.first %}{% set content = bos_token + content %}{% endif %}{% if not loop.last %}{% set content = content + '<|eot_id|>'%}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1000000000000000019884624838656,
+  "tokenizer_class": "PreTrainedTokenizerFast"
+}
--- a/llm/rust/triton-llm/tests/http-service.rs
+++ b/llm/rust/triton-llm/tests/http-service.rs
@@ -18,19 +18,19 @@ use async_stream::stream;
 use prometheus::{proto::MetricType, Registry};
 use reqwest::StatusCode;
 use std::sync::Arc;
-use triton_distributed::{
+use triton_distributed_runtime::{
    pipeline::{
        async_trait, AsyncEngine, AsyncEngineContextProvider, ManyOut, ResponseStream, SingleIn,
    },
    CancellationToken,
 };
-use triton_llm::http::service::{
+use triton_distributed_llm::http::service::{
    error::HttpError,
    metrics::{Endpoint, RequestType, Status},
    service_v2::HttpService,
    Metrics,
 };
-use triton_llm::protocols::{
+use triton_distributed_llm::protocols::{
    openai::{
        chat_completions::{ChatCompletionRequest, ChatCompletionResponseDelta},
        completions::{CompletionRequest, CompletionResponse},

--- a/llm/rust/triton-llm/tests/model_card.rs
+++ b/llm/rust/triton-llm/tests/model_card.rs
@@ -14,7 +14,7 @@
 // limitations under the License.

 use tempfile::tempdir;
-use triton_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};
+use triton_distributed_llm::model_card::model::{ModelDeploymentCard, PromptFormatterArtifact, TokenizerKind};

 const HF_PATH: &str = "tests/data/sample-models/TinyLlama_v1.1";


--- a/llm/rust/triton-llm/tests/openai_completions.rs
+++ b/llm/rust/triton-llm/tests/openai_completions.rs
@@ -14,7 +14,7 @@
 // limitations under the License.

 use serde::{Deserialize, Serialize};
-use triton_llm::protocols::{
+use triton_distributed_llm::protocols::{
    common,
    openai::{
        self,

--- a/llm/rust/triton-llm/tests/preprocessor.rs
+++ b/llm/rust/triton-llm/tests/preprocessor.rs
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__minimum_viable_request.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__minimum_viable_request.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-10.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-10.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-2.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-2.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-3.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-3.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-4.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-4.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-5.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-5.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-6.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-6.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-7.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-7.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-8.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-8.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-9.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples-9.snap
--- a/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples.snap
+++ b/llm/rust/triton-llm/tests/snapshots/openai_completions__valid_samples.snap