Fix vortex attention interface by adding dropout parameters and updating function signature

06fe2294 · one · 1516fed0 · 06fe2294
Commit 06fe2294 authored Jan 31, 2026 by one
Hide whitespace changes
Inline Side-by-side

Showing with 24 additions and 7 deletions

evo2/vortex_fix.patch evo2/vortex_fix.patch +24 -7

No files found.
--- a/evo2/vortex_fix.patch
+++ b/evo2/vortex_fix.patch
@@ -13,11 +13,11 @@
 +++ vortex/model/attention.py   2026-01-19 10:47:28.981582989 +0800
 @@ -26,6 +26,7 @@
 FusedDense, ColumnParallelLinear, RowParallelLinear = None, None, None
-
+ 
 from vortex.model.rotary import RotaryEmbedding
 +from flash_attn.flash_attn_interface import flash_attn_kvpacked_func as dcu_flash_attn_kvpacked_fun
-
-
+ 
+ 
 # From https://github.com/ofirpress/attention_with_linear_biases/blob/4b92f28a005ead2567abe2359f633e73e08f3833/fairseq/models/transformer.py#L742
 @@ -215,16 +216,19 @@
             batch_size, seqlen_q = q.shape[0], q.shape[1]
@@ -37,8 +37,8 @@
 +                 q,
 +                 kv,
 +                 self.drop.p if self.training else 0.0,
-+                 softmax_scale=None,
-+                 causal=False,
+                 causal=causal,
+                 softmax_scale=self.softmax_scale,
 +                 alibi_slopes=self.alibi_slopes,
 +                 window_size=self.window_size,
 +                 deterministic=self.deterministic,
@@ -46,8 +46,8 @@
 +                 return_attn_probs=False,
 +                 bhsd=False
 +             )
-
-
+ 
+ 
 class SelfAttention(nn.Module):
 --- vortex/ops/attn_interface.py.orig   2026-01-19 10:41:45.456424582 +0800
 +++ vortex/ops/attn_interface.py        2026-01-19 10:47:28.983582996 +0800
@@ -60,3 +60,20 @@
         q,
         k,
         v,
+@@ -72,6 +72,9 @@
+         softcap,
+         return_softmax,
+         None,
+        False,
+        None,
+        0.0,
+     )
+     return out, softmax_lse, S_dmask, rng_state
+ 
+@@ -1624,5 +1627,6 @@
+         softcap,
+         rotary_interleaved,
+         num_splits,
+        None,
+     )
+     return (out, softmax_lse) if return_softmax_lse else out