Unverified Commit 3028b20a authored by Ali Hassani's avatar Ali Hassani Committed by GitHub
Browse files

Fix natten (#22229)

* Add kernel size to NATTEN's QK arguments.

The new NATTEN 0.14.5 supports PyTorch 2.0, but also adds an additional
argument to the QK operation to allow optional RPBs.

This ends up failing NATTEN tests.

This commit adds NATTEN back to circleci and adds the arguments to get
it working again.

* Force NATTEN >= 0.14.5
parent 074490b2
...@@ -374,8 +374,7 @@ exotic_models_job = CircleCIJob( ...@@ -374,8 +374,7 @@ exotic_models_job = CircleCIJob(
"pip install 'git+https://github.com/facebookresearch/detectron2.git'", "pip install 'git+https://github.com/facebookresearch/detectron2.git'",
"sudo apt install tesseract-ocr", "sudo apt install tesseract-ocr",
"pip install pytesseract", "pip install pytesseract",
# wait until natten is ready for torch 2.0.0 "pip install natten",
# "pip install natten",
], ],
tests_to_run=[ tests_to_run=[
"tests/models/*layoutlmv*", "tests/models/*layoutlmv*",
......
...@@ -129,7 +129,7 @@ _deps = [ ...@@ -129,7 +129,7 @@ _deps = [
"keras-nlp>=0.3.1", "keras-nlp>=0.3.1",
"librosa", "librosa",
"nltk", "nltk",
"natten>=0.14.4", "natten>=0.14.5",
"numpy>=1.17", "numpy>=1.17",
"onnxconverter-common", "onnxconverter-common",
"onnxruntime-tools>=1.4.2", "onnxruntime-tools>=1.4.2",
......
...@@ -35,7 +35,7 @@ deps = { ...@@ -35,7 +35,7 @@ deps = {
"keras-nlp": "keras-nlp>=0.3.1", "keras-nlp": "keras-nlp>=0.3.1",
"librosa": "librosa", "librosa": "librosa",
"nltk": "nltk", "nltk": "nltk",
"natten": "natten>=0.14.4", "natten": "natten>=0.14.5",
"numpy": "numpy>=1.17", "numpy": "numpy>=1.17",
"onnxconverter-common": "onnxconverter-common", "onnxconverter-common": "onnxconverter-common",
"onnxruntime-tools": "onnxruntime-tools>=1.4.2", "onnxruntime-tools": "onnxruntime-tools>=1.4.2",
......
...@@ -347,7 +347,7 @@ class NeighborhoodAttention(nn.Module): ...@@ -347,7 +347,7 @@ class NeighborhoodAttention(nn.Module):
query_layer = query_layer / math.sqrt(self.attention_head_size) query_layer = query_layer / math.sqrt(self.attention_head_size)
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases. # Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.dilation) attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, self.dilation)
# Normalize the attention scores to probabilities. # Normalize the attention scores to probabilities.
attention_probs = nn.functional.softmax(attention_scores, dim=-1) attention_probs = nn.functional.softmax(attention_scores, dim=-1)
......
...@@ -339,7 +339,7 @@ class NeighborhoodAttention(nn.Module): ...@@ -339,7 +339,7 @@ class NeighborhoodAttention(nn.Module):
query_layer = query_layer / math.sqrt(self.attention_head_size) query_layer = query_layer / math.sqrt(self.attention_head_size)
# Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases. # Compute NA between "query" and "key" to get the raw attention scores, and add relative positional biases.
attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, 1) attention_scores = natten2dqkrpb(query_layer, key_layer, self.rpb, self.kernel_size, 1)
# Normalize the attention scores to probabilities. # Normalize the attention scores to probabilities.
attention_probs = nn.functional.softmax(attention_scores, dim=-1) attention_probs = nn.functional.softmax(attention_scores, dim=-1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment