[Doc] Replace ibm-fms with ibm-ai-platform (#12709)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>

[Doc] Replace ibm-fms with ibm-ai-platform (#12709)
Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
bb392af4 · Thomas Parnell · GitHub · 5d98d560 · bb392af4 · bb392af4
Unverified Commit bb392af4 authored Feb 04, 2025 by Thomas Parnell Committed by GitHub Feb 04, 2025
5 changed files
--- a/docs/source/features/spec_decode.md
+++ b/docs/source/features/spec_decode.md
@@ -131,7 +131,7 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 llm = LLM(
    model="meta-llama/Meta-Llama-3.1-70B-Instruct",
    tensor_parallel_size=4,
-    speculative_model="ibm-fms/llama3-70b-accelerator",
+    speculative_model="ibm-ai-platform/llama3-70b-accelerator",
    speculative_draft_tensor_parallel_size=1,
 )
 outputs = llm.generate(prompts, sampling_params)
@@ -149,11 +149,11 @@ limitation will be fixed in a future release.

 A variety of speculative models of this type are available on HF hub:

- [llama-13b-accelerator](https://huggingface.co/ibm-fms/llama-13b-accelerator)
- [llama3-8b-accelerator](https://huggingface.co/ibm-fms/llama3-8b-accelerator)
- [codellama-34b-accelerator](https://huggingface.co/ibm-fms/codellama-34b-accelerator)
- [llama2-70b-accelerator](https://huggingface.co/ibm-fms/llama2-70b-accelerator)
- [llama3-70b-accelerator](https://huggingface.co/ibm-fms/llama3-70b-accelerator)
+- [llama-13b-accelerator](https://huggingface.co/ibm-ai-platform/llama-13b-accelerator)
+- [llama3-8b-accelerator](https://huggingface.co/ibm-ai-platform/llama3-8b-accelerator)
+- [codellama-34b-accelerator](https://huggingface.co/ibm-ai-platform/codellama-34b-accelerator)
+- [llama2-70b-accelerator](https://huggingface.co/ibm-ai-platform/llama2-70b-accelerator)
+- [llama3-70b-accelerator](https://huggingface.co/ibm-ai-platform/llama3-70b-accelerator)
 - [granite-3b-code-instruct-accelerator](https://huggingface.co/ibm-granite/granite-3b-code-instruct-accelerator)
 - [granite-8b-code-instruct-accelerator](https://huggingface.co/ibm-granite/granite-8b-code-instruct-accelerator)
 - [granite-7b-instruct-accelerator](https://huggingface.co/ibm-granite/granite-7b-instruct-accelerator)

--- a/examples/offline_inference/mlpspeculator.py
+++ b/examples/offline_inference/mlpspeculator.py
@@ -51,7 +51,7 @@ if __name__ == "__main__":
    # Create an LLM with spec decoding
    llm = LLM(
        model="meta-llama/Llama-2-13b-chat-hf",
-        speculative_model="ibm-fms/llama-13b-accelerator",
+        speculative_model="ibm-ai-platform/llama-13b-accelerator",
    )

    print("With speculation")

--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -278,7 +278,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
    "MedusaModel": _HfExamplesInfo("JackFram/llama-68m",
                                   speculative_model="abhigoyal/vllm-medusa-llama-68m-random"),  # noqa: E501
    "MLPSpeculatorPreTrainedModel": _HfExamplesInfo("JackFram/llama-160m",
-                                                    speculative_model="ibm-fms/llama-160m-accelerator"),  # noqa: E501
+                                                    speculative_model="ibm-ai-platform/llama-160m-accelerator"),  # noqa: E501
 }

 _FALLBACK_MODEL = {

--- a/tests/spec_decode/e2e/test_mlp_correctness.py
+++ b/tests/spec_decode/e2e/test_mlp_correctness.py
@@ -33,7 +33,7 @@ from .conftest import run_equality_correctness_test
 MAIN_MODEL = "JackFram/llama-160m"

 # speculative model
-SPEC_MODEL = "ibm-fms/llama-160m-accelerator"
+SPEC_MODEL = "ibm-ai-platform/llama-160m-accelerator"

 # max. number of speculative tokens: this corresponds to
 # n_predict in the config.json of the speculator model.

--- a/vllm/model_executor/models/mlp_speculator.py
+++ b/vllm/model_executor/models/mlp_speculator.py
@@ -64,7 +64,7 @@ class MLPSpeculator(nn.Module):
    https://arxiv.org/pdf/2404.19124

    Trained speculators of this type are available on HF hub at:
-    https://huggingface.co/ibm-fms and https://huggingface.co/ibm-granite
+    https://huggingface.co/ibm-ai-platform and https://huggingface.co/ibm-granite
    """

    def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: