XLA_FLAGS="--xla_gpu_cuda_data_dir=/public/software/compiler/rocm/dtk-21.10.1/amdgcn/bitcode/ --xla_dump_hlo_pass_re=.* --xla_dump_hlo_as_html --xla_dump_to=./tmp" TF_DUMP_GRAPH_PREFIX="./tf_graph" hipprof --hip-trace python3 ./resnet_ctl_imagenet_main.py \ --base_learning_rate=10.0 \ --batch_size=32 \ --nocache_decoded_image \ --data_dir=/public/software/apps/DeepLearning/Data/ImageNet-tensorflow \ --device_warmup_steps=1 \ --dtype=fp32 \ --noenable_checkpoint_and_export \ --noenable_device_warmup \ --enable_eager \ --epochs_between_evals=4 \ --noeval_dataset_cache \ --eval_offset_epochs=2 \ --label_smoothing=0.1 \ --lars_epsilon=0 \ --log_steps=125 \ --lr_schedule=polynomial \ --optimizer=LARS \ --noreport_accuracy_metrics \ --single_l2_loss_op \ --steps_per_loop=25 \ --train_epochs=1 \ --notraining_dataset_cache \ --notrace_warmup \ --nouse_synthetic_data \ --use_tf_function \ --verbosity=0 \ --warmup_epochs=5 \ --weight_decay=0.0002 \ --target_accuracy=0.759 \ --momentum=0.9 \ --num_replicas=64 \ --num_accumulation_steps=2 \ --num_classes=1000 \ --noskip_eval