Unverified Commit 7996ef74 authored by Younes Belkada's avatar Younes Belkada Committed by GitHub
Browse files

fix module order (#18312)

- put gelu before 4h to h
parent 70e7d1d6
...@@ -352,9 +352,9 @@ class BloomMLP(nn.Module): ...@@ -352,9 +352,9 @@ class BloomMLP(nn.Module):
self.pretraining_tp = config.pretraining_tp self.pretraining_tp = config.pretraining_tp
self.slow_but_exact = config.slow_but_exact self.slow_but_exact = config.slow_but_exact
self.dense_h_to_4h = nn.Linear(hidden_size, 4 * hidden_size) self.dense_h_to_4h = nn.Linear(hidden_size, 4 * hidden_size)
self.gelu_impl = BloomGelu()
self.dense_4h_to_h = nn.Linear(4 * hidden_size, hidden_size) self.dense_4h_to_h = nn.Linear(4 * hidden_size, hidden_size)
self.hidden_dropout = config.hidden_dropout self.hidden_dropout = config.hidden_dropout
self.gelu_impl = BloomGelu()
def forward(self, hidden_states, residual): def forward(self, hidden_states, residual):
hidden_states = self.gelu_impl(self.dense_h_to_4h(hidden_states)) hidden_states = self.gelu_impl(self.dense_h_to_4h(hidden_states))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment