Unverified Commit 6b6823a1 authored by Kaixi Hou's avatar Kaixi Hou Committed by GitHub
Browse files

Restrict TF tests to one GPU (#264)



* Only use one gpu for tensorflow tests
Signed-off-by: default avatarkaixih <kaixih@nvidia.com>

* Simplify the change
Signed-off-by: default avatarkaixih <kaixih@nvidia.com>

* Final fix
Signed-off-by: default avatarkaixih <kaixih@nvidia.com>

---------
Signed-off-by: default avatarkaixih <kaixih@nvidia.com>
Co-authored-by: default avatarKirthi Shankar Sivamani <ksivamani@nvidia.com>
parent 39b2ef10
...@@ -75,6 +75,10 @@ def get_adjusted_layernorm_dx(x, ln_dy, init): ...@@ -75,6 +75,10 @@ def get_adjusted_layernorm_dx(x, ln_dy, init):
class LayersTest(test.TestCase): class LayersTest(test.TestCase):
def setUp(self):
super().setUp()
tf.keras.mixed_precision.set_global_policy('mixed_float16')
@test_util.run_gpu_only @test_util.run_gpu_only
def testDenseFwd(self): def testDenseFwd(self):
B, M, K, N = 4, 8, 16, 32 B, M, K, N = 4, 8, 16, 32
...@@ -578,5 +582,4 @@ class LayersTest(test.TestCase): ...@@ -578,5 +582,4 @@ class LayersTest(test.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
tf.keras.mixed_precision.set_global_policy('mixed_float16')
test.main() test.main()
...@@ -126,6 +126,10 @@ class MultiHeadAttentionKeras(tf.keras.Model): ...@@ -126,6 +126,10 @@ class MultiHeadAttentionKeras(tf.keras.Model):
class MHATest(test.TestCase): class MHATest(test.TestCase):
def setUp(self):
super().setUp()
tf.keras.mixed_precision.set_global_policy('mixed_float16')
@test_util.run_gpu_only @test_util.run_gpu_only
def testMHAForward(self): def testMHAForward(self):
use_fp8 = tf.test.is_gpu_available(True, (9, 0)) use_fp8 = tf.test.is_gpu_available(True, (9, 0))
...@@ -252,5 +256,4 @@ class MHATest(test.TestCase): ...@@ -252,5 +256,4 @@ class MHATest(test.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
tf.keras.mixed_precision.set_global_policy('mixed_float16')
test.main() test.main()
...@@ -38,6 +38,10 @@ def train_step(dy, x, x_mask, x_dec, x_dec_mask, model, use_fp8=False, ...@@ -38,6 +38,10 @@ def train_step(dy, x, x_mask, x_dec, x_dec_mask, model, use_fp8=False,
class TransformerLayerTest(test.TestCase): class TransformerLayerTest(test.TestCase):
def setUp(self):
super().setUp()
tf.keras.mixed_precision.set_global_policy('mixed_float16')
@test_util.run_gpu_only @test_util.run_gpu_only
def testTransformerSanity(self): def testTransformerSanity(self):
use_fp8 = tf.test.is_gpu_available(True, (9, 0)) use_fp8 = tf.test.is_gpu_available(True, (9, 0))
...@@ -115,5 +119,4 @@ class TransformerLayerTest(test.TestCase): ...@@ -115,5 +119,4 @@ class TransformerLayerTest(test.TestCase):
if __name__ == '__main__': if __name__ == '__main__':
tf.keras.mixed_precision.set_global_policy('mixed_float16')
test.main() test.main()
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "common/include/transformer_engine/transformer_engine.h" #include "common/include/transformer_engine/transformer_engine.h"
#include "common/include/transformer_engine/transpose.h" #include "common/include/transformer_engine/transpose.h"
#include "tensorflow/c/eager/c_api_experimental.h" #include "tensorflow/c/eager/c_api_experimental.h"
#include "tensorflow/c/eager/c_api_internal.h"
#include "tensorflow/c/eager/immediate_execution_tensor_handle.h" #include "tensorflow/c/eager/immediate_execution_tensor_handle.h"
#include "tensorflow/c/eager/tfe_tensorhandle_internal.h" #include "tensorflow/c/eager/tfe_tensorhandle_internal.h"
#include "tensorflow/c/tf_status_internal.h" #include "tensorflow/c/tf_status_internal.h"
...@@ -200,6 +201,17 @@ TFE_Context* GetContext(TF_Status* status) { ...@@ -200,6 +201,17 @@ TFE_Context* GetContext(TF_Status* status) {
static TFE_Context* context = nullptr; static TFE_Context* context = nullptr;
if (context == nullptr) { if (context == nullptr) {
TFE_ContextOptions* opts = TFE_NewContextOptions(); TFE_ContextOptions* opts = TFE_NewContextOptions();
// Current TF-TE only supports a single GPU. Here we need to manually set
// the GPU number to 1 in case of the multi-GPU environment. Otherwise, the
// TF will still traverse all the valid GPUs (to get stream priority ranges)
// and eventually cudaSetDevice to the last one (This logic is defined in
// BaseGPUDeviceFactory::CreateDevices). This would cause the other pybind
// functions to be dispatched onto other GPUs, leading to bad results.
auto* device_count =
opts->session_options.options.config.mutable_device_count();
device_count->insert({"GPU", 1});
context = TFE_NewContext(opts, status); context = TFE_NewContext(opts, status);
} }
return context; return context;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment