Unverified Commit 8c3e1999 authored by Yong Hoon Shin's avatar Yong Hoon Shin Committed by GitHub
Browse files

Revert gemma3n fast prefill changes (#23897)


Signed-off-by: default avatarYong Hoon Shin <yhshin@meta.com>
parent 1c26b422
......@@ -64,6 +64,7 @@ def cleanup(llm: LLM, compilation_config: CompilationConfig):
@fork_new_process_for_each_test
@pytest.mark.parametrize("enforce_eager", [True])
@pytest.mark.skip(reason="Disable until Gemma3n supports fast prefill")
def test_kv_sharing_fast_prefill(
monkeypatch: pytest.MonkeyPatch,
enforce_eager: bool,
......
This diff is collapsed.
......@@ -620,7 +620,7 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal):
# NOTE (NickLucche) Each pass needs tokens to compute PLE so we cache
# them here, as the model forward has only access to the input_embeds.
if input_ids is not None:
per_layer_inputs = self.language_model.model.self_decoder.get_per_layer_input_embeddings(
per_layer_inputs = self.language_model.model.get_per_layer_input_embeddings(
input_ids)
per_layer_inputs = per_layer_inputs.reshape(
-1, self.config.text_config.num_hidden_layers,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment