import torch from torch import tensor, device import torch.fx as fx from torch._dynamo.testing import rand_strided from math import inf import torch._inductor.inductor_prims import torch._dynamo.config import torch._inductor.config import torch._functorch.config import torch.fx.experimental._config torch._dynamo.config.capture_scalar_outputs = True isolate_fails_code_str = None # torch version: 2.4.1 # torch cuda version: None # torch git version: 45d303c9e4f41ec2f5450b6f60031246f67189d6 # CUDA Info: # nvcc not found # GPU Hardware Info: # BW200 : 8 from torch.nn import * class Repro(torch.nn.Module): def __init__(self): super().__init__() def forward(self, primals_1, primals_2, primals_4, primals_5, primals_6, primals_7, primals_8, primals_10, convert_element_type_1, clamp_max, convert_element_type_3, clamp_max_1, clamp_max_2, clamp_max_3, cat, convolution, squeeze_1, relu, convolution_1, getitem_3, rsqrt_1, convert_element_type_5, clamp_max_4, convert_element_type_7, clamp_max_5, clamp_max_6, clamp_max_7, add_19, convolution_2, squeeze_7, relu_2, unsqueeze_14, unsqueeze_38, tangents_1): sum_1 = torch.ops.aten.sum.dim_IntList(tangents_1, [0, 2, 3]) convolution_backward = torch.ops.aten.convolution_backward.default(tangents_1, relu_2, primals_10, [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]); tangents_1 = primals_10 = None getitem_6 = convolution_backward[0] getitem_7 = convolution_backward[1]; convolution_backward = None le = torch.ops.aten.le.Scalar(relu_2, 0); relu_2 = None full_default = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False) where = torch.ops.aten.where.self(le, full_default, getitem_6); le = getitem_6 = None sum_2 = torch.ops.aten.sum.dim_IntList(where, [0, 2, 3]) sub_13 = torch.ops.aten.sub.Tensor(convolution_2, unsqueeze_14); convolution_2 = unsqueeze_14 = None mul_31 = torch.ops.aten.mul.Tensor(where, sub_13) sum_3 = torch.ops.aten.sum.dim_IntList(mul_31, [0, 2, 3]); mul_31 = None mul_32 = torch.ops.aten.mul.Tensor(sum_2, 6.25e-06) unsqueeze_15 = torch.ops.aten.unsqueeze.default(mul_32, 0); mul_32 = None unsqueeze_16 = torch.ops.aten.unsqueeze.default(unsqueeze_15, 2); unsqueeze_15 = None unsqueeze_17 = torch.ops.aten.unsqueeze.default(unsqueeze_16, 3); unsqueeze_16 = None mul_33 = torch.ops.aten.mul.Tensor(sum_3, 6.25e-06) mul_34 = torch.ops.aten.mul.Tensor(squeeze_7, squeeze_7) mul_35 = torch.ops.aten.mul.Tensor(mul_33, mul_34); mul_33 = mul_34 = None unsqueeze_18 = torch.ops.aten.unsqueeze.default(mul_35, 0); mul_35 = None unsqueeze_19 = torch.ops.aten.unsqueeze.default(unsqueeze_18, 2); unsqueeze_18 = None unsqueeze_20 = torch.ops.aten.unsqueeze.default(unsqueeze_19, 3); unsqueeze_19 = None mul_36 = torch.ops.aten.mul.Tensor(squeeze_7, primals_8); primals_8 = None unsqueeze_21 = torch.ops.aten.unsqueeze.default(mul_36, 0); mul_36 = None unsqueeze_22 = torch.ops.aten.unsqueeze.default(unsqueeze_21, 2); unsqueeze_21 = None unsqueeze_23 = torch.ops.aten.unsqueeze.default(unsqueeze_22, 3); unsqueeze_22 = None mul_37 = torch.ops.aten.mul.Tensor(sub_13, unsqueeze_20); sub_13 = unsqueeze_20 = None sub_15 = torch.ops.aten.sub.Tensor(where, mul_37); where = mul_37 = None sub_16 = torch.ops.aten.sub.Tensor(sub_15, unsqueeze_17); sub_15 = unsqueeze_17 = None mul_38 = torch.ops.aten.mul.Tensor(sub_16, unsqueeze_23); sub_16 = unsqueeze_23 = None mul_39 = torch.ops.aten.mul.Tensor(sum_3, squeeze_7); sum_3 = squeeze_7 = None convolution_backward_1 = torch.ops.aten.convolution_backward.default(mul_38, add_19, primals_7, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]); mul_38 = add_19 = primals_7 = None getitem_9 = convolution_backward_1[0] getitem_10 = convolution_backward_1[1]; convolution_backward_1 = None mul_40 = torch.ops.aten.mul.Tensor(getitem_9, clamp_max_7); clamp_max_7 = None neg = torch.ops.aten.neg.default(mul_40) add_25 = torch.ops.aten.add.Tensor(getitem_9, neg); getitem_9 = neg = None mul_41 = torch.ops.aten.mul.Tensor(mul_40, clamp_max_6) neg_1 = torch.ops.aten.neg.default(mul_41) add_26 = torch.ops.aten.add.Tensor(mul_40, neg_1); mul_40 = neg_1 = None mul_42 = torch.ops.aten.mul.Tensor(add_25, clamp_max_6); clamp_max_6 = None neg_2 = torch.ops.aten.neg.default(mul_42) add_27 = torch.ops.aten.add.Tensor(add_25, neg_2); add_25 = neg_2 = None full_default_1 = torch.ops.aten.full.default([4, 512, 100, 100], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False) _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, clamp_max_5], mul_41, True); mul_41 = None _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, convert_element_type_7], add_26, True); clamp_max_4 = add_26 = None add_28 = torch.ops.aten.add.Tensor(_unsafe_index_put, _unsafe_index_put_1); _unsafe_index_put = _unsafe_index_put_1 = None _unsafe_index_put_2 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, clamp_max_5], mul_42, True); clamp_max_5 = mul_42 = None add_29 = torch.ops.aten.add.Tensor(add_28, _unsafe_index_put_2); add_28 = _unsafe_index_put_2 = None _unsafe_index_put_3 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, convert_element_type_7], add_27, True); full_default_1 = convert_element_type_5 = convert_element_type_7 = add_27 = None add_30 = torch.ops.aten.add.Tensor(add_29, _unsafe_index_put_3); add_29 = _unsafe_index_put_3 = None sub_6 = torch.ops.aten.sub.Tensor(convolution_1, getitem_3) mul_12 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_1); sub_6 = None unsqueeze_4 = torch.ops.aten.unsqueeze.default(primals_5, -1) unsqueeze_5 = torch.ops.aten.unsqueeze.default(unsqueeze_4, -1); unsqueeze_4 = None mul_18 = torch.ops.aten.mul.Tensor(mul_12, unsqueeze_5); mul_12 = unsqueeze_5 = None unsqueeze_6 = torch.ops.aten.unsqueeze.default(primals_6, -1); primals_6 = None unsqueeze_7 = torch.ops.aten.unsqueeze.default(unsqueeze_6, -1); unsqueeze_6 = None add_14 = torch.ops.aten.add.Tensor(mul_18, unsqueeze_7); mul_18 = unsqueeze_7 = None relu_1 = torch.ops.aten.relu.default(add_14); add_14 = None le_1 = torch.ops.aten.le.Scalar(relu_1, 0); relu_1 = None where_1 = torch.ops.aten.where.self(le_1, full_default, add_30); le_1 = add_30 = None squeeze_3 = torch.ops.aten.squeeze.dims(getitem_3, [0, 2, 3]); getitem_3 = None unsqueeze_24 = torch.ops.aten.unsqueeze.default(squeeze_3, 0); squeeze_3 = None unsqueeze_25 = torch.ops.aten.unsqueeze.default(unsqueeze_24, 2); unsqueeze_24 = None unsqueeze_26 = torch.ops.aten.unsqueeze.default(unsqueeze_25, 3); unsqueeze_25 = None sum_4 = torch.ops.aten.sum.dim_IntList(where_1, [0, 2, 3]) sub_17 = torch.ops.aten.sub.Tensor(convolution_1, unsqueeze_26); convolution_1 = unsqueeze_26 = None mul_43 = torch.ops.aten.mul.Tensor(where_1, sub_17) sum_5 = torch.ops.aten.sum.dim_IntList(mul_43, [0, 2, 3]); mul_43 = None mul_44 = torch.ops.aten.mul.Tensor(sum_4, 2.5e-05) unsqueeze_27 = torch.ops.aten.unsqueeze.default(mul_44, 0); mul_44 = None unsqueeze_28 = torch.ops.aten.unsqueeze.default(unsqueeze_27, 2); unsqueeze_27 = None unsqueeze_29 = torch.ops.aten.unsqueeze.default(unsqueeze_28, 3); unsqueeze_28 = None mul_45 = torch.ops.aten.mul.Tensor(sum_5, 2.5e-05) squeeze_4 = torch.ops.aten.squeeze.dims(rsqrt_1, [0, 2, 3]); rsqrt_1 = None mul_46 = torch.ops.aten.mul.Tensor(squeeze_4, squeeze_4) mul_47 = torch.ops.aten.mul.Tensor(mul_45, mul_46); mul_45 = mul_46 = None unsqueeze_30 = torch.ops.aten.unsqueeze.default(mul_47, 0); mul_47 = None unsqueeze_31 = torch.ops.aten.unsqueeze.default(unsqueeze_30, 2); unsqueeze_30 = None unsqueeze_32 = torch.ops.aten.unsqueeze.default(unsqueeze_31, 3); unsqueeze_31 = None mul_48 = torch.ops.aten.mul.Tensor(squeeze_4, primals_5); primals_5 = None unsqueeze_33 = torch.ops.aten.unsqueeze.default(mul_48, 0); mul_48 = None unsqueeze_34 = torch.ops.aten.unsqueeze.default(unsqueeze_33, 2); unsqueeze_33 = None unsqueeze_35 = torch.ops.aten.unsqueeze.default(unsqueeze_34, 3); unsqueeze_34 = None mul_49 = torch.ops.aten.mul.Tensor(sub_17, unsqueeze_32); sub_17 = unsqueeze_32 = None sub_19 = torch.ops.aten.sub.Tensor(where_1, mul_49); where_1 = mul_49 = None sub_20 = torch.ops.aten.sub.Tensor(sub_19, unsqueeze_29); sub_19 = unsqueeze_29 = None mul_50 = torch.ops.aten.mul.Tensor(sub_20, unsqueeze_35); sub_20 = unsqueeze_35 = None mul_51 = torch.ops.aten.mul.Tensor(sum_5, squeeze_4); sum_5 = squeeze_4 = None convolution_backward_2 = torch.ops.aten.convolution_backward.default(mul_50, relu, primals_4, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]); mul_50 = primals_4 = None getitem_12 = convolution_backward_2[0] getitem_13 = convolution_backward_2[1]; convolution_backward_2 = None le_2 = torch.ops.aten.le.Scalar(relu, 0); relu = None where_2 = torch.ops.aten.where.self(le_2, full_default, getitem_12); le_2 = full_default = getitem_12 = None sum_6 = torch.ops.aten.sum.dim_IntList(where_2, [0, 2, 3]) sub_21 = torch.ops.aten.sub.Tensor(convolution, unsqueeze_38); convolution = unsqueeze_38 = None mul_52 = torch.ops.aten.mul.Tensor(where_2, sub_21) sum_7 = torch.ops.aten.sum.dim_IntList(mul_52, [0, 2, 3]); mul_52 = None mul_53 = torch.ops.aten.mul.Tensor(sum_6, 2.5e-05) unsqueeze_39 = torch.ops.aten.unsqueeze.default(mul_53, 0); mul_53 = None unsqueeze_40 = torch.ops.aten.unsqueeze.default(unsqueeze_39, 2); unsqueeze_39 = None unsqueeze_41 = torch.ops.aten.unsqueeze.default(unsqueeze_40, 3); unsqueeze_40 = None mul_54 = torch.ops.aten.mul.Tensor(sum_7, 2.5e-05) mul_55 = torch.ops.aten.mul.Tensor(squeeze_1, squeeze_1) mul_56 = torch.ops.aten.mul.Tensor(mul_54, mul_55); mul_54 = mul_55 = None unsqueeze_42 = torch.ops.aten.unsqueeze.default(mul_56, 0); mul_56 = None unsqueeze_43 = torch.ops.aten.unsqueeze.default(unsqueeze_42, 2); unsqueeze_42 = None unsqueeze_44 = torch.ops.aten.unsqueeze.default(unsqueeze_43, 3); unsqueeze_43 = None mul_57 = torch.ops.aten.mul.Tensor(squeeze_1, primals_2); primals_2 = None unsqueeze_45 = torch.ops.aten.unsqueeze.default(mul_57, 0); mul_57 = None unsqueeze_46 = torch.ops.aten.unsqueeze.default(unsqueeze_45, 2); unsqueeze_45 = None unsqueeze_47 = torch.ops.aten.unsqueeze.default(unsqueeze_46, 3); unsqueeze_46 = None mul_58 = torch.ops.aten.mul.Tensor(sub_21, unsqueeze_44); sub_21 = unsqueeze_44 = None sub_23 = torch.ops.aten.sub.Tensor(where_2, mul_58); where_2 = mul_58 = None sub_24 = torch.ops.aten.sub.Tensor(sub_23, unsqueeze_41); sub_23 = unsqueeze_41 = None mul_59 = torch.ops.aten.mul.Tensor(sub_24, unsqueeze_47); sub_24 = unsqueeze_47 = None mul_60 = torch.ops.aten.mul.Tensor(sum_7, squeeze_1); sum_7 = squeeze_1 = None convolution_backward_3 = torch.ops.aten.convolution_backward.default(mul_59, cat, primals_1, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]); mul_59 = cat = primals_1 = None getitem_15 = convolution_backward_3[0] getitem_16 = convolution_backward_3[1]; convolution_backward_3 = None slice_1 = torch.ops.aten.slice.Tensor(getitem_15, 1, 0, 128) slice_2 = torch.ops.aten.slice.Tensor(getitem_15, 1, 128, 640); getitem_15 = None mul_61 = torch.ops.aten.mul.Tensor(slice_2, clamp_max_3); clamp_max_3 = None neg_3 = torch.ops.aten.neg.default(mul_61) add_31 = torch.ops.aten.add.Tensor(slice_2, neg_3); slice_2 = neg_3 = None mul_62 = torch.ops.aten.mul.Tensor(mul_61, clamp_max_2) neg_4 = torch.ops.aten.neg.default(mul_62) add_32 = torch.ops.aten.add.Tensor(mul_61, neg_4); mul_61 = neg_4 = None mul_63 = torch.ops.aten.mul.Tensor(add_31, clamp_max_2); clamp_max_2 = None neg_5 = torch.ops.aten.neg.default(mul_63) add_33 = torch.ops.aten.add.Tensor(add_31, neg_5); add_31 = neg_5 = None full_default_7 = torch.ops.aten.full.default([4, 512, 25, 25], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False) _unsafe_index_put_4 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, clamp_max_1], mul_62, True); mul_62 = None _unsafe_index_put_5 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, convert_element_type_3], add_32, True); clamp_max = add_32 = None add_34 = torch.ops.aten.add.Tensor(_unsafe_index_put_4, _unsafe_index_put_5); _unsafe_index_put_4 = _unsafe_index_put_5 = None _unsafe_index_put_6 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, clamp_max_1], mul_63, True); clamp_max_1 = mul_63 = None add_35 = torch.ops.aten.add.Tensor(add_34, _unsafe_index_put_6); add_34 = _unsafe_index_put_6 = None _unsafe_index_put_7 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, convert_element_type_3], add_33, True); full_default_7 = convert_element_type_1 = convert_element_type_3 = add_33 = None add_36 = torch.ops.aten.add.Tensor(add_35, _unsafe_index_put_7); add_35 = _unsafe_index_put_7 = None return [getitem_16, mul_60, sum_6, getitem_13, mul_51, sum_4, getitem_10, mul_39, sum_2, getitem_7, sum_1, None, None, None, None, None, None, None, None, None, slice_1, add_36] def load_args(reader): buf0 = reader.storage('934c55e4a7a69a0a29a96cd8ef9f11c9859658e1', 11796480, device=device(type='cuda', index=2)) reader.tensor(buf0, (512, 640, 3, 3), requires_grad=True, is_leaf=True) # primals_1 buf1 = reader.storage('f12094f433480ec90280d223057708434df38941', 2048, device=device(type='cuda', index=2)) reader.tensor(buf1, (512,), requires_grad=True, is_leaf=True) # primals_2 buf2 = reader.storage('06c46ad2c91ec5c8eebc4fb0be80459bdfe007a8', 9437184, device=device(type='cuda', index=2)) reader.tensor(buf2, (512, 512, 3, 3), requires_grad=True, is_leaf=True) # primals_4 buf3 = reader.storage('aba0c4266c842d1845e720dc0c789942770a60b7', 2048, device=device(type='cuda', index=2)) reader.tensor(buf3, (512,), requires_grad=True, is_leaf=True) # primals_5 buf4 = reader.storage('bb8471d379e03c8ccb9897ce7d3a2dfbacb44e30', 2048, device=device(type='cuda', index=2)) reader.tensor(buf4, (512,), requires_grad=True, is_leaf=True) # primals_6 buf5 = reader.storage('b9484105fb5b2045fb6550a1edb77af72e639416', 4718592, device=device(type='cuda', index=2)) reader.tensor(buf5, (256, 512, 3, 3), requires_grad=True, is_leaf=True) # primals_7 buf6 = reader.storage('b778b8cab416c3fa6763b88e431266ae6ea28941', 1024, device=device(type='cuda', index=2)) reader.tensor(buf6, (256,), requires_grad=True, is_leaf=True) # primals_8 buf7 = reader.storage('c5f14ec72c73a593b47ef4aecf37f6bb25d2dec4', 262144, device=device(type='cuda', index=2)) reader.tensor(buf7, (256, 256, 1, 1), requires_grad=True, is_leaf=True) # primals_10 buf8 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf8, (100, 1), dtype=torch.int64, is_leaf=True) # convert_element_type_1 buf9 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf9, (100, 1), dtype=torch.int64, is_leaf=True) # clamp_max buf10 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf10, (100,), dtype=torch.int64, is_leaf=True) # convert_element_type_3 buf11 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf11, (100,), dtype=torch.int64, is_leaf=True) # clamp_max_1 buf12 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2)) reader.tensor(buf12, (100,), is_leaf=True) # clamp_max_2 buf13 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2)) reader.tensor(buf13, (100, 1), is_leaf=True) # clamp_max_3 buf14 = reader.storage('5d41e66671a283b70001fd74345d8e7e3def00bd', 102400000, device=device(type='cuda', index=2)) reader.tensor(buf14, (4, 640, 100, 100), is_leaf=True) # cat buf15 = reader.storage('a8fe0ed584571bb3218d663656459a36545be5e6', 81920000, device=device(type='cuda', index=2)) reader.tensor(buf15, (4, 512, 100, 100), is_leaf=True) # convolution buf16 = reader.storage('0af13bcf109b8ca2df7f5ce3387d51e8576fb30a', 2048, device=device(type='cuda', index=2)) reader.tensor(buf16, (512,), is_leaf=True) # squeeze_1 buf17 = reader.storage('32f14d6fa07f654fbb09ef1563066303a3501eda', 81920000, device=device(type='cuda', index=2)) reader.tensor(buf17, (4, 512, 100, 100), is_leaf=True) # relu buf18 = reader.storage('aca23d51e723ad9b4bec2e54d6f0af4b5b85cc7d', 81920000, device=device(type='cuda', index=2)) reader.tensor(buf18, (4, 512, 100, 100), is_leaf=True) # convolution_1 buf19 = reader.storage('4940c79e48676c2e1359870dc770e25cd780983d', 2048, device=device(type='cuda', index=2)) reader.tensor(buf19, (1, 512, 1, 1), is_leaf=True) # getitem_3 buf20 = reader.storage('d17407a9f45954a4d0d36e5b20a40ac554cc3aff', 2048, device=device(type='cuda', index=2)) reader.tensor(buf20, (1, 512, 1, 1), is_leaf=True) # rsqrt_1 buf21 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf21, (200, 1), dtype=torch.int64, is_leaf=True) # convert_element_type_5 buf22 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf22, (200, 1), dtype=torch.int64, is_leaf=True) # clamp_max_4 buf23 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf23, (200,), dtype=torch.int64, is_leaf=True) # convert_element_type_7 buf24 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64) reader.tensor(buf24, (200,), dtype=torch.int64, is_leaf=True) # clamp_max_5 buf25 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2)) reader.tensor(buf25, (200,), is_leaf=True) # clamp_max_6 buf26 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2)) reader.tensor(buf26, (200, 1), is_leaf=True) # clamp_max_7 buf27 = reader.storage('32194c54194bddd5f695a8d306828130629246fc', 327680000, device=device(type='cuda', index=2)) reader.tensor(buf27, (4, 512, 200, 200), is_leaf=True) # add_19 buf28 = reader.storage('e3a286ef8d6373c83ef30afe16eaae96ee52b965', 163840000, device=device(type='cuda', index=2)) reader.tensor(buf28, (4, 256, 200, 200), is_leaf=True) # convolution_2 buf29 = reader.storage('9572b289e6d5c9bdd20a79367d4005440da40795', 1024, device=device(type='cuda', index=2)) reader.tensor(buf29, (256,), is_leaf=True) # squeeze_7 buf30 = reader.storage('42f9ce794a05b12a40f15cbd4abb1201ccef0f72', 163840000, device=device(type='cuda', index=2)) reader.tensor(buf30, (4, 256, 200, 200), is_leaf=True) # relu_2 buf31 = reader.storage('61670207f087dc68f052bc03747d9ab365297b17', 1024, device=device(type='cuda', index=2)) reader.tensor(buf31, (1, 256, 1, 1), is_leaf=True) # unsqueeze_14 buf32 = reader.storage('ab77896e6dd76345e63586ecda30b1e4a63439cc', 2048, device=device(type='cuda', index=2)) reader.tensor(buf32, (1, 512, 1, 1), is_leaf=True) # unsqueeze_38 buf33 = reader.storage('f0ec623d2a44ff0f64fc264faf9128c2a6896e57', 163840000, device=device(type='cuda', index=2)) reader.tensor(buf33, (4, 256, 200, 200), is_leaf=True) # tangents_1 load_args._version = 0 mod = Repro() if __name__ == '__main__': from torch._dynamo.repro.after_aot import run_repro with torch.no_grad(): run_repro(mod, load_args, accuracy=True, command='run', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None) # To run it separately, do # mod, args = run_repro(mod, load_args, accuracy=True, command='get_args', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None) # mod(*args)