Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
bb392af4
Unverified
Commit
bb392af4
authored
Feb 04, 2025
by
Thomas Parnell
Committed by
GitHub
Feb 04, 2025
Browse files
[Doc] Replace ibm-fms with ibm-ai-platform (#12709)
Signed-off-by:
Thomas Parnell
<
tpa@zurich.ibm.com
>
parent
5d98d560
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
10 additions
and
10 deletions
+10
-10
docs/source/features/spec_decode.md
docs/source/features/spec_decode.md
+6
-6
examples/offline_inference/mlpspeculator.py
examples/offline_inference/mlpspeculator.py
+1
-1
tests/models/registry.py
tests/models/registry.py
+1
-1
tests/spec_decode/e2e/test_mlp_correctness.py
tests/spec_decode/e2e/test_mlp_correctness.py
+1
-1
vllm/model_executor/models/mlp_speculator.py
vllm/model_executor/models/mlp_speculator.py
+1
-1
No files found.
docs/source/features/spec_decode.md
View file @
bb392af4
...
...
@@ -131,7 +131,7 @@ sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
llm
=
LLM
(
model
=
"meta-llama/Meta-Llama-3.1-70B-Instruct"
,
tensor_parallel_size
=
4
,
speculative_model
=
"ibm-
fms
/llama3-70b-accelerator"
,
speculative_model
=
"ibm-
ai-platform
/llama3-70b-accelerator"
,
speculative_draft_tensor_parallel_size
=
1
,
)
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
...
...
@@ -149,11 +149,11 @@ limitation will be fixed in a future release.
A variety of speculative models of this type are available on HF hub:
-
[
llama-13b-accelerator
](
https://huggingface.co/ibm-
fms
/llama-13b-accelerator
)
-
[
llama3-8b-accelerator
](
https://huggingface.co/ibm-
fms
/llama3-8b-accelerator
)
-
[
codellama-34b-accelerator
](
https://huggingface.co/ibm-
fms
/codellama-34b-accelerator
)
-
[
llama2-70b-accelerator
](
https://huggingface.co/ibm-
fms
/llama2-70b-accelerator
)
-
[
llama3-70b-accelerator
](
https://huggingface.co/ibm-
fms
/llama3-70b-accelerator
)
-
[
llama-13b-accelerator
](
https://huggingface.co/ibm-
ai-platform
/llama-13b-accelerator
)
-
[
llama3-8b-accelerator
](
https://huggingface.co/ibm-
ai-platform
/llama3-8b-accelerator
)
-
[
codellama-34b-accelerator
](
https://huggingface.co/ibm-
ai-platform
/codellama-34b-accelerator
)
-
[
llama2-70b-accelerator
](
https://huggingface.co/ibm-
ai-platform
/llama2-70b-accelerator
)
-
[
llama3-70b-accelerator
](
https://huggingface.co/ibm-
ai-platform
/llama3-70b-accelerator
)
-
[
granite-3b-code-instruct-accelerator
](
https://huggingface.co/ibm-granite/granite-3b-code-instruct-accelerator
)
-
[
granite-8b-code-instruct-accelerator
](
https://huggingface.co/ibm-granite/granite-8b-code-instruct-accelerator
)
-
[
granite-7b-instruct-accelerator
](
https://huggingface.co/ibm-granite/granite-7b-instruct-accelerator
)
...
...
examples/offline_inference/mlpspeculator.py
View file @
bb392af4
...
...
@@ -51,7 +51,7 @@ if __name__ == "__main__":
# Create an LLM with spec decoding
llm
=
LLM
(
model
=
"meta-llama/Llama-2-13b-chat-hf"
,
speculative_model
=
"ibm-
fms
/llama-13b-accelerator"
,
speculative_model
=
"ibm-
ai-platform
/llama-13b-accelerator"
,
)
print
(
"With speculation"
)
...
...
tests/models/registry.py
View file @
bb392af4
...
...
@@ -278,7 +278,7 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
"MedusaModel"
:
_HfExamplesInfo
(
"JackFram/llama-68m"
,
speculative_model
=
"abhigoyal/vllm-medusa-llama-68m-random"
),
# noqa: E501
"MLPSpeculatorPreTrainedModel"
:
_HfExamplesInfo
(
"JackFram/llama-160m"
,
speculative_model
=
"ibm-
fms
/llama-160m-accelerator"
),
# noqa: E501
speculative_model
=
"ibm-
ai-platform
/llama-160m-accelerator"
),
# noqa: E501
}
_FALLBACK_MODEL
=
{
...
...
tests/spec_decode/e2e/test_mlp_correctness.py
View file @
bb392af4
...
...
@@ -33,7 +33,7 @@ from .conftest import run_equality_correctness_test
MAIN_MODEL
=
"JackFram/llama-160m"
# speculative model
SPEC_MODEL
=
"ibm-
fms
/llama-160m-accelerator"
SPEC_MODEL
=
"ibm-
ai-platform
/llama-160m-accelerator"
# max. number of speculative tokens: this corresponds to
# n_predict in the config.json of the speculator model.
...
...
vllm/model_executor/models/mlp_speculator.py
View file @
bb392af4
...
...
@@ -64,7 +64,7 @@ class MLPSpeculator(nn.Module):
https://arxiv.org/pdf/2404.19124
Trained speculators of this type are available on HF hub at:
https://huggingface.co/ibm-
fms
and https://huggingface.co/ibm-granite
https://huggingface.co/ibm-
ai-platform
and https://huggingface.co/ibm-granite
"""
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
""
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment