Fix sampler

de0fabbc · Woosuk Kwon · fdd0f2f4 · de0fabbc · de0fabbc
Commit de0fabbc authored Feb 23, 2023 by Woosuk Kwon
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 8 deletions

cacheflow/models/opt.py cacheflow/models/opt.py +3 -2

cacheflow/models/sample.py cacheflow/models/sample.py +3 -6

No files found.
--- a/cacheflow/models/opt.py
+++ b/cacheflow/models/opt.py
@@ -227,7 +227,7 @@ class OPTForCausalLM(OPTPreTrainedModel):
        self.model = OPTModel(config)
        # the lm_head weight is automatically tied to the embed tokens weight
        self.lm_head = nn.Linear(config.word_embed_proj_dim, config.vocab_size, bias=False)
-        self.sampler = Sampler(embedding=self.lm_head.weight)
+        self.sampler = Sampler()
        # Initialize weights and apply final processing
        self.post_init()
@@ -242,5 +242,6 @@ class OPTForCausalLM(OPTPreTrainedModel):
    ) -> Dict[int, Tuple[int, int]]:
        hidden_states = self.model(
            input_ids, positions, kv_caches, input_metadata, cache_events)
-        next_tokens = self.sampler(hidden_states, input_metadata)
+        next_tokens = self.sampler(
+            self.lm_head.weight, hidden_states, input_metadata)
        return next_tokens
--- a/cacheflow/models/sample.py
+++ b/cacheflow/models/sample.py
@@ -8,15 +8,12 @@ from cacheflow.models import InputMetadata
 class Sampler(nn.Module):
-    def __init__(
+    def __init__(self) -> None:
-        self,
-        embedding: torch.Tensor,
-    ) -> None:
        super().__init__()
-        self.embedding = embedding  # [vocab_size, hidden_size]
    def forward(
        self,
+        embedding: torch.Tensor,
        hidden_states: torch.Tensor,
        input_metadata: InputMetadata,
    ) -> Dict[int, Tuple[int, int]]:
@@ -31,7 +28,7 @@ class Sampler(nn.Module):
        hidden_states = hidden_states[last_token_indicies]
        # Get the logits for the next tokens.
-        logits = torch.matmul(hidden_states, self.embedding.t())
+        logits = torch.matmul(hidden_states, embedding.t())
        # Sample the next tokens.
        # TODO(woosuk): Implement other sampling methods.