import tensorflow as tf import numpy as np import time def check_gpu_availability(): """检查GPU是否可用""" print("TensorFlow版本:", tf.__version__) print("\n检查GPU设备:") # 列出所有可用设备 devices = tf.config.list_physical_devices() print("可用设备:", devices) # 检查GPU gpus = tf.config.list_physical_devices('GPU') if gpus: print(f"\n✅ 发现 {len(gpus)} 个GPU:") for gpu in gpus: print(f" - {gpu}") # 设置内存增长 try: for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True) print("已启用内存动态增长") except RuntimeError as e: print(e) else: print("\n❌ 未找到GPU设备,将使用CPU") return len(gpus) > 0 def matrix_multiplication_demo(): """矩阵乘法GPU加速演示""" print("\n" + "="*50) print("矩阵乘法性能测试") print("="*50) # 创建大型矩阵 size = 5000 print(f"创建 {size}x{size} 的矩阵...") # 在CPU上创建随机矩阵 with tf.device('/CPU:0'): a_cpu = tf.random.normal((size, size)) b_cpu = tf.random.normal((size, size)) # CPU计算 print("\n在CPU上计算...") start_time = time.time() with tf.device('/CPU:0'): result_cpu = tf.matmul(a_cpu, b_cpu) cpu_time = time.time() - start_time print(f"CPU计算时间: {cpu_time:.2f}秒") # GPU计算(如果可用) if tf.config.list_physical_devices('GPU'): print("\n在GPU上计算...") # 将数据传输到GPU with tf.device('/GPU:0'): a_gpu = tf.identity(a_cpu) b_gpu = tf.identity(b_cpu) # 预热GPU(第一次计算可能较慢) with tf.device('/GPU:0'): _ = tf.matmul(a_gpu, b_gpu) # 正式计算 start_time = time.time() with tf.device('/GPU:0'): result_gpu = tf.matmul(a_gpu, b_gpu) gpu_time = time.time() - start_time print(f"GPU计算时间: {gpu_time:.2f}秒") print(f"加速比: {cpu_time/gpu_time:.1f}x") # 验证结果一致性 print("\n验证CPU和GPU计算结果一致性...") difference = tf.reduce_max(tf.abs(result_cpu - result_gpu)) print(f"最大差异: {difference:.6f}") if difference < 1e-4: print("✅ 计算结果一致") else: print("⚠️ 计算结果存在差异") def neural_network_demo(): """神经网络GPU加速演示""" print("\n" + "="*50) print("神经网络训练演示") print("="*50) # 创建模拟数据 print("生成模拟数据...") num_samples = 10000 num_features = 100 X = np.random.randn(num_samples, num_features).astype(np.float32) y = np.random.randint(0, 2, size=(num_samples, 1)).astype(np.float32) # 创建简单的神经网络模型 def create_model(): model = tf.keras.Sequential([ tf.keras.layers.Dense(128, activation='relu', input_shape=(num_features,)), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'] ) return model # 在CPU上训练 print("\n在CPU上训练神经网络...") with tf.device('/CPU:0'): model_cpu = create_model() start_time = time.time() history_cpu = model_cpu.fit( X, y, epochs=10, batch_size=32, verbose=0 ) cpu_time = time.time() - start_time print(f"CPU训练时间: {cpu_time:.2f}秒") print(f"最终准确率: {history_cpu.history['accuracy'][-1]:.4f}") # 在GPU上训练(如果可用) if tf.config.list_physical_devices('GPU'): print("\n在GPU上训练神经网络...") # 清理之前的模型 tf.keras.backend.clear_session() with tf.device('/GPU:0'): model_gpu = create_model() start_time = time.time() history_gpu = model_gpu.fit( X, y, epochs=10, batch_size=32, verbose=0 ) gpu_time = time.time() - start_time print(f"GPU训练时间: {gpu_time:.2f}秒") print(f"最终准确率: {history_gpu.history['accuracy'][-1]:.4f}") print(f"加速比: {cpu_time/gpu_time:.1f}x") def mixed_precision_demo(): """混合精度训练演示(需要GPU支持)""" print("\n" + "="*50) print("混合精度训练演示") print("="*50) if not tf.config.list_physical_devices('GPU'): print("需要GPU支持混合精度训练") return # 启用混合精度 policy = tf.keras.mixed_precision.Policy('mixed_float16') tf.keras.mixed_precision.set_global_policy(policy) print(f"计算数据类型: {policy.compute_dtype}") print(f"变量数据类型: {policy.variable_dtype}") # 创建模型 model = tf.keras.Sequential([ tf.keras.layers.Dense(256, activation='relu', input_shape=(100,)), tf.keras.layers.Dense(128, activation='relu'), tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(1) ]) # 使用混合精度时需要确保输出层使用float32 model.add(tf.keras.layers.Activation('linear', dtype='float32')) model.compile( optimizer=tf.keras.optimizers.Adam(), loss='mse' ) # 生成数据 X = tf.random.normal((1000, 100)) y = tf.random.normal((1000, 1)) print("\n使用混合精度训练...") history = model.fit(X, y, epochs=5, batch_size=32, verbose=1) print("✅ 混合精度训练完成") def gpu_memory_info(): """显示GPU内存信息""" if not tf.config.list_physical_devices('GPU'): return print("\n" + "="*50) print("GPU内存信息") print("="*50) # 获取GPU设备 gpus = tf.config.list_physical_devices('GPU') # 使用TensorFlow的GPU内存统计 for i, gpu in enumerate(gpus): print(f"\nGPU {i}: {gpu}") # 创建一些张量来观察内存使用 with tf.device(f'/GPU:{i}'): # 分配1GB的GPU内存 size = 250000000 # 大约1GB (4 bytes per float32 * 250M) large_tensor = tf.random.normal((size, 1)) memory_mb = large_tensor.shape[0] * 4 / 1024 / 1024 print(f" 已分配张量: {memory_mb:.2f} MB") # 检查内存统计 memory_info = tf.config.experimental.get_memory_info(f'GPU:{i}') if memory_info: print(f" 当前内存使用: {memory_info['current'] / 1024**2:.2f} MB") print(f" 峰值内存使用: {memory_info['peak'] / 1024**2:.2f} MB") # 清理 del large_tensor def main(): """主函数""" print("TensorFlow 2.8.0 GPU演示程序") print("="*50) # 检查GPU has_gpu = check_gpu_availability() if has_gpu: # 运行各种演示 matrix_multiplication_demo() neural_network_demo() mixed_precision_demo() gpu_memory_info() print("\n" + "="*50) print("所有演示完成!") print("="*50) else: print("\n⚠️ 未检测到GPU,只运行CPU演示") matrix_multiplication_demo() print("\n提示:") print("1. 请确保已安装NVIDIA显卡驱动") print("2. 安装CUDA 11.2和cuDNN 8.1") print("3. 安装TensorFlow 2.8.0 GPU版本: pip install tensorflow-gpu==2.8.0") if __name__ == "__main__": main()