"tests/vscode:/vscode.git/clone" did not exist on "9c9b8e707b9803a1425ed8dd2f51069b22d9230f"
Unverified Commit 86cff21c authored by Yih-Dar's avatar Yih-Dar Committed by GitHub
Browse files

Fix some TF GPT-J CI testings (#16454)



* Fix for test_mixed_precision

* Fix test_saved_model_creation by using shape_list instead of shape

* skit test_model_from_pretrained on GPU for now to avoid GPU OOM

* skip test_gptj_sample_max_time for now
Co-authored-by: default avatarydshieh <ydshieh@users.noreply.github.com>
parent aebca696
......@@ -59,13 +59,13 @@ GPTJ_PRETRAINED_MODEL_ARCHIVE_LIST = [
def fixed_pos_embedding(x: tf.Tensor, seq_dim: int = 1, seq_len: Optional[int] = None) -> Tuple[tf.Tensor, tf.Tensor]:
dim = x.shape[-1]
dim = shape_list(x)[-1]
if seq_len is None:
seq_len = x.shape[seq_dim]
seq_len = shape_list(x)[seq_dim]
inv_freq = tf.cast(1.0 / (10000 ** (tf.range(0, dim, 2) / dim)), tf.float32)
seq_len_range = tf.cast(tf.range(seq_len), tf.float32)
sinusoid_inp = tf.cast(tf.einsum("i , j -> i j", seq_len_range, inv_freq), tf.float32)
return tf.sin(sinusoid_inp), tf.cos(sinusoid_inp)
return tf.cast(tf.sin(sinusoid_inp), dtype=x.dtype), tf.cast(tf.cos(sinusoid_inp), dtype=x.dtype)
def rotate_every_two(x: tf.Tensor) -> tf.Tensor:
......@@ -77,8 +77,8 @@ def rotate_every_two(x: tf.Tensor) -> tf.Tensor:
def apply_rotary_pos_emb(x: tf.Tensor, sincos: tf.Tensor, offset: int = 0) -> tf.Tensor:
sin_pos, cos_pos = sincos
sin_pos = tf.repeat(sin_pos[None, offset : x.shape[1] + offset, None, :], 2, 3)
cos_pos = tf.repeat(cos_pos[None, offset : x.shape[1] + offset, None, :], 2, 3)
sin_pos = tf.repeat(sin_pos[None, offset : shape_list(x)[1] + offset, None, :], 2, 3)
cos_pos = tf.repeat(cos_pos[None, offset : shape_list(x)[1] + offset, None, :], 2, 3)
return (x * cos_pos) + (rotate_every_two(x) * sin_pos)
......@@ -173,7 +173,7 @@ class TFGPTJAttention(tf.keras.layers.Layer):
head_mask: Optional[tf.Tensor] = None,
) -> Tuple[tf.Tensor, tf.Tensor]:
# compute causal mask from causal mask buffer
query_length, key_length = query.shape[-2], key.shape[-2]
query_length, key_length = shape_list(query)[-2], shape_list(key)[-2]
causal_mask = self.get_causal_mask(key_length, query_length)
# Keep the attention weights computation in fp32 to avoid overflow issues
......@@ -218,11 +218,11 @@ class TFGPTJAttention(tf.keras.layers.Layer):
key = self._split_heads(key, True)
value = self._split_heads(value, False)
seq_len = key.shape[1]
seq_len = shape_list(key)[1]
offset = 0
if layer_past is not None:
offset = layer_past[0].shape[-2]
offset = shape_list(layer_past[0])[-2]
seq_len += offset
if self.rotary_dim is not None:
......
......@@ -345,6 +345,10 @@ class TFGPTJModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestC
assert name is None
@slow
@unittest.skipIf(
not is_tf_available() or len(tf.config.list_physical_devices("GPU")) > 0,
"skip testing on GPU for now to avoid GPU OOM.",
)
def test_model_from_pretrained(self):
model = TFGPTJModel.from_pretrained("EleutherAI/gpt-j-6B", from_pt=True)
self.assertIsNotNone(model)
......@@ -395,6 +399,7 @@ class TFGPTJModelLanguageGenerationTest(unittest.TestCase):
) # token_type_ids should change output
@slow
@unittest.skip(reason="TF generate currently has no time-based stopping criteria")
def test_gptj_sample_max_time(self):
tokenizer = AutoTokenizer.from_pretrained("anton-l/gpt-j-tiny-random")
model = TFGPTJForCausalLM.from_pretrained("anton-l/gpt-j-tiny-random", from_pt=True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment