Unverified Commit 85f50eb4 authored by rahul-sarvam's avatar rahul-sarvam Committed by GitHub
Browse files

Adding support to Sarvam's MoE models (#33942)


Signed-off-by: default avatarrahul-sarvam <140298821+rahul-sarvam@users.noreply.github.com>
parent 5261223c
...@@ -469,6 +469,8 @@ th { ...@@ -469,6 +469,8 @@ th {
| `Qwen3MoeForCausalLM` | Qwen3MoE | `Qwen/Qwen3-30B-A3B`, etc. | ✅︎ | ✅︎ | | `Qwen3MoeForCausalLM` | Qwen3MoE | `Qwen/Qwen3-30B-A3B`, etc. | ✅︎ | ✅︎ |
| `Qwen3NextForCausalLM` | Qwen3NextMoE | `Qwen/Qwen3-Next-80B-A3B-Instruct`, etc. | ✅︎ | ✅︎ | | `Qwen3NextForCausalLM` | Qwen3NextMoE | `Qwen/Qwen3-Next-80B-A3B-Instruct`, etc. | ✅︎ | ✅︎ |
| `RWForCausalLM` | Falcon RW | `tiiuae/falcon-40b`, etc. | | ✅︎ | | `RWForCausalLM` | Falcon RW | `tiiuae/falcon-40b`, etc. | | ✅︎ |
| `SarvamMoEForCausalLM` | Sarvam 2 | `sarvamai/sarvam2-30b-a3b`, etc. | ✅︎ | ✅︎ |
| `SarvamMLAForCausalLM` | Sarvam 2 | `sarvamai/sarvam2-105b-a9b`, etc. | | ✅︎ |
| `SeedOssForCausalLM` | SeedOss | `ByteDance-Seed/Seed-OSS-36B-Instruct`, etc. | ✅︎ | ✅︎ | | `SeedOssForCausalLM` | SeedOss | `ByteDance-Seed/Seed-OSS-36B-Instruct`, etc. | ✅︎ | ✅︎ |
| `SolarForCausalLM` | Solar Pro | `upstage/solar-pro-preview-instruct`, etc. | ✅︎ | ✅︎ | | `SolarForCausalLM` | Solar Pro | `upstage/solar-pro-preview-instruct`, etc. | ✅︎ | ✅︎ |
| `StableLmForCausalLM` | StableLM | `stabilityai/stablelm-3b-4e1t`, `stabilityai/stablelm-base-alpha-7b-v2`, etc. | | | | `StableLmForCausalLM` | StableLM | `stabilityai/stablelm-3b-4e1t`, `stabilityai/stablelm-base-alpha-7b-v2`, etc. | | |
......
...@@ -480,6 +480,18 @@ _TEXT_GENERATION_EXAMPLE_MODELS = { ...@@ -480,6 +480,18 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
min_transformers_version="4.56.3", min_transformers_version="4.56.3",
), ),
"RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"), "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"),
"SarvamMoEForCausalLM": _HfExamplesInfo(
"sarvamai/sarvam-30b",
trust_remote_code=True,
max_model_len=4096,
is_available_online=True,
),
"SarvamMLAForCausalLM": _HfExamplesInfo(
"sarvamai/sarvam-105b",
trust_remote_code=True,
max_model_len=4096,
is_available_online=True,
),
"SeedOssForCausalLM": _HfExamplesInfo( "SeedOssForCausalLM": _HfExamplesInfo(
"ByteDance-Seed/Seed-OSS-36B-Instruct", "ByteDance-Seed/Seed-OSS-36B-Instruct",
trust_remote_code=True, trust_remote_code=True,
......
...@@ -191,6 +191,8 @@ _TEXT_GENERATION_MODELS = { ...@@ -191,6 +191,8 @@ _TEXT_GENERATION_MODELS = {
"Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"), "Qwen3ForCausalLM": ("qwen3", "Qwen3ForCausalLM"),
"Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"), "Qwen3MoeForCausalLM": ("qwen3_moe", "Qwen3MoeForCausalLM"),
"RWForCausalLM": ("falcon", "FalconForCausalLM"), "RWForCausalLM": ("falcon", "FalconForCausalLM"),
"SarvamMoEForCausalLM": ("sarvam", "SarvamMoEForCausalLM"),
"SarvamMLAForCausalLM": ("sarvam", "SarvamMLAForCausalLM"),
"SeedOssForCausalLM": ("seed_oss", "SeedOssForCausalLM"), "SeedOssForCausalLM": ("seed_oss", "SeedOssForCausalLM"),
"Step1ForCausalLM": ("step1", "Step1ForCausalLM"), "Step1ForCausalLM": ("step1", "Step1ForCausalLM"),
"Step3TextForCausalLM": ("step3_text", "Step3TextForCausalLM"), "Step3TextForCausalLM": ("step3_text", "Step3TextForCausalLM"),
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment