repro.py


import torch
from torch import tensor, device
import torch.fx as fx
from torch._dynamo.testing import rand_strided
from math import inf
import torch._inductor.inductor_prims

import torch._dynamo.config
import torch._inductor.config
import torch._functorch.config
import torch.fx.experimental._config
torch._dynamo.config.capture_scalar_outputs = True


isolate_fails_code_str = None


# torch version: 2.4.1
# torch cuda version: None
# torch git version: 45d303c9e4f41ec2f5450b6f60031246f67189d6


# CUDA Info: 
# nvcc not found
# GPU Hardware Info: 
# BW200 : 8 


from torch.nn import *
class Repro(torch.nn.Module):
    def __init__(self):
        super().__init__()

    
    def forward(self, primals_1, primals_2, primals_4, primals_5, primals_6, primals_7, primals_8, primals_10, convert_element_type_1, clamp_max, convert_element_type_3, clamp_max_1, clamp_max_2, clamp_max_3, cat, convolution, squeeze_1, relu, convolution_1, getitem_3, rsqrt_1, convert_element_type_5, clamp_max_4, convert_element_type_7, clamp_max_5, clamp_max_6, clamp_max_7, add_19, convolution_2, squeeze_7, relu_2, unsqueeze_14, unsqueeze_38, tangents_1):
        sum_1 = torch.ops.aten.sum.dim_IntList(tangents_1, [0, 2, 3])
        convolution_backward = torch.ops.aten.convolution_backward.default(tangents_1, relu_2, primals_10, [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]);  tangents_1 = primals_10 = None
        getitem_6 = convolution_backward[0]
        getitem_7 = convolution_backward[1];  convolution_backward = None
        le = torch.ops.aten.le.Scalar(relu_2, 0);  relu_2 = None
        full_default = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        where = torch.ops.aten.where.self(le, full_default, getitem_6);  le = getitem_6 = None
        sum_2 = torch.ops.aten.sum.dim_IntList(where, [0, 2, 3])
        sub_13 = torch.ops.aten.sub.Tensor(convolution_2, unsqueeze_14);  convolution_2 = unsqueeze_14 = None
        mul_31 = torch.ops.aten.mul.Tensor(where, sub_13)
        sum_3 = torch.ops.aten.sum.dim_IntList(mul_31, [0, 2, 3]);  mul_31 = None
        mul_32 = torch.ops.aten.mul.Tensor(sum_2, 6.25e-06)
        unsqueeze_15 = torch.ops.aten.unsqueeze.default(mul_32, 0);  mul_32 = None
        unsqueeze_16 = torch.ops.aten.unsqueeze.default(unsqueeze_15, 2);  unsqueeze_15 = None
        unsqueeze_17 = torch.ops.aten.unsqueeze.default(unsqueeze_16, 3);  unsqueeze_16 = None
        mul_33 = torch.ops.aten.mul.Tensor(sum_3, 6.25e-06)
        mul_34 = torch.ops.aten.mul.Tensor(squeeze_7, squeeze_7)
        mul_35 = torch.ops.aten.mul.Tensor(mul_33, mul_34);  mul_33 = mul_34 = None
        unsqueeze_18 = torch.ops.aten.unsqueeze.default(mul_35, 0);  mul_35 = None
        unsqueeze_19 = torch.ops.aten.unsqueeze.default(unsqueeze_18, 2);  unsqueeze_18 = None
        unsqueeze_20 = torch.ops.aten.unsqueeze.default(unsqueeze_19, 3);  unsqueeze_19 = None
        mul_36 = torch.ops.aten.mul.Tensor(squeeze_7, primals_8);  primals_8 = None
        unsqueeze_21 = torch.ops.aten.unsqueeze.default(mul_36, 0);  mul_36 = None
        unsqueeze_22 = torch.ops.aten.unsqueeze.default(unsqueeze_21, 2);  unsqueeze_21 = None
        unsqueeze_23 = torch.ops.aten.unsqueeze.default(unsqueeze_22, 3);  unsqueeze_22 = None
        mul_37 = torch.ops.aten.mul.Tensor(sub_13, unsqueeze_20);  sub_13 = unsqueeze_20 = None
        sub_15 = torch.ops.aten.sub.Tensor(where, mul_37);  where = mul_37 = None
        sub_16 = torch.ops.aten.sub.Tensor(sub_15, unsqueeze_17);  sub_15 = unsqueeze_17 = None
        mul_38 = torch.ops.aten.mul.Tensor(sub_16, unsqueeze_23);  sub_16 = unsqueeze_23 = None
        mul_39 = torch.ops.aten.mul.Tensor(sum_3, squeeze_7);  sum_3 = squeeze_7 = None
        convolution_backward_1 = torch.ops.aten.convolution_backward.default(mul_38, add_19, primals_7, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_38 = add_19 = primals_7 = None
        getitem_9 = convolution_backward_1[0]
        getitem_10 = convolution_backward_1[1];  convolution_backward_1 = None
        mul_40 = torch.ops.aten.mul.Tensor(getitem_9, clamp_max_7);  clamp_max_7 = None
        neg = torch.ops.aten.neg.default(mul_40)
        add_25 = torch.ops.aten.add.Tensor(getitem_9, neg);  getitem_9 = neg = None
        mul_41 = torch.ops.aten.mul.Tensor(mul_40, clamp_max_6)
        neg_1 = torch.ops.aten.neg.default(mul_41)
        add_26 = torch.ops.aten.add.Tensor(mul_40, neg_1);  mul_40 = neg_1 = None
        mul_42 = torch.ops.aten.mul.Tensor(add_25, clamp_max_6);  clamp_max_6 = None
        neg_2 = torch.ops.aten.neg.default(mul_42)
        add_27 = torch.ops.aten.add.Tensor(add_25, neg_2);  add_25 = neg_2 = None
        full_default_1 = torch.ops.aten.full.default([4, 512, 100, 100], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, clamp_max_5], mul_41, True);  mul_41 = None
        _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, convert_element_type_7], add_26, True);  clamp_max_4 = add_26 = None
        add_28 = torch.ops.aten.add.Tensor(_unsafe_index_put, _unsafe_index_put_1);  _unsafe_index_put = _unsafe_index_put_1 = None
        _unsafe_index_put_2 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, clamp_max_5], mul_42, True);  clamp_max_5 = mul_42 = None
        add_29 = torch.ops.aten.add.Tensor(add_28, _unsafe_index_put_2);  add_28 = _unsafe_index_put_2 = None
        _unsafe_index_put_3 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, convert_element_type_7], add_27, True);  full_default_1 = convert_element_type_5 = convert_element_type_7 = add_27 = None
        add_30 = torch.ops.aten.add.Tensor(add_29, _unsafe_index_put_3);  add_29 = _unsafe_index_put_3 = None
        sub_6 = torch.ops.aten.sub.Tensor(convolution_1, getitem_3)
        mul_12 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_1);  sub_6 = None
        unsqueeze_4 = torch.ops.aten.unsqueeze.default(primals_5, -1)
        unsqueeze_5 = torch.ops.aten.unsqueeze.default(unsqueeze_4, -1);  unsqueeze_4 = None
        mul_18 = torch.ops.aten.mul.Tensor(mul_12, unsqueeze_5);  mul_12 = unsqueeze_5 = None
        unsqueeze_6 = torch.ops.aten.unsqueeze.default(primals_6, -1);  primals_6 = None
        unsqueeze_7 = torch.ops.aten.unsqueeze.default(unsqueeze_6, -1);  unsqueeze_6 = None
        add_14 = torch.ops.aten.add.Tensor(mul_18, unsqueeze_7);  mul_18 = unsqueeze_7 = None
        relu_1 = torch.ops.aten.relu.default(add_14);  add_14 = None
        le_1 = torch.ops.aten.le.Scalar(relu_1, 0);  relu_1 = None
        where_1 = torch.ops.aten.where.self(le_1, full_default, add_30);  le_1 = add_30 = None
        squeeze_3 = torch.ops.aten.squeeze.dims(getitem_3, [0, 2, 3]);  getitem_3 = None
        unsqueeze_24 = torch.ops.aten.unsqueeze.default(squeeze_3, 0);  squeeze_3 = None
        unsqueeze_25 = torch.ops.aten.unsqueeze.default(unsqueeze_24, 2);  unsqueeze_24 = None
        unsqueeze_26 = torch.ops.aten.unsqueeze.default(unsqueeze_25, 3);  unsqueeze_25 = None
        sum_4 = torch.ops.aten.sum.dim_IntList(where_1, [0, 2, 3])
        sub_17 = torch.ops.aten.sub.Tensor(convolution_1, unsqueeze_26);  convolution_1 = unsqueeze_26 = None
        mul_43 = torch.ops.aten.mul.Tensor(where_1, sub_17)
        sum_5 = torch.ops.aten.sum.dim_IntList(mul_43, [0, 2, 3]);  mul_43 = None
        mul_44 = torch.ops.aten.mul.Tensor(sum_4, 2.5e-05)
        unsqueeze_27 = torch.ops.aten.unsqueeze.default(mul_44, 0);  mul_44 = None
        unsqueeze_28 = torch.ops.aten.unsqueeze.default(unsqueeze_27, 2);  unsqueeze_27 = None
        unsqueeze_29 = torch.ops.aten.unsqueeze.default(unsqueeze_28, 3);  unsqueeze_28 = None
        mul_45 = torch.ops.aten.mul.Tensor(sum_5, 2.5e-05)
        squeeze_4 = torch.ops.aten.squeeze.dims(rsqrt_1, [0, 2, 3]);  rsqrt_1 = None
        mul_46 = torch.ops.aten.mul.Tensor(squeeze_4, squeeze_4)
        mul_47 = torch.ops.aten.mul.Tensor(mul_45, mul_46);  mul_45 = mul_46 = None
        unsqueeze_30 = torch.ops.aten.unsqueeze.default(mul_47, 0);  mul_47 = None
        unsqueeze_31 = torch.ops.aten.unsqueeze.default(unsqueeze_30, 2);  unsqueeze_30 = None
        unsqueeze_32 = torch.ops.aten.unsqueeze.default(unsqueeze_31, 3);  unsqueeze_31 = None
        mul_48 = torch.ops.aten.mul.Tensor(squeeze_4, primals_5);  primals_5 = None
        unsqueeze_33 = torch.ops.aten.unsqueeze.default(mul_48, 0);  mul_48 = None
        unsqueeze_34 = torch.ops.aten.unsqueeze.default(unsqueeze_33, 2);  unsqueeze_33 = None
        unsqueeze_35 = torch.ops.aten.unsqueeze.default(unsqueeze_34, 3);  unsqueeze_34 = None
        mul_49 = torch.ops.aten.mul.Tensor(sub_17, unsqueeze_32);  sub_17 = unsqueeze_32 = None
        sub_19 = torch.ops.aten.sub.Tensor(where_1, mul_49);  where_1 = mul_49 = None
        sub_20 = torch.ops.aten.sub.Tensor(sub_19, unsqueeze_29);  sub_19 = unsqueeze_29 = None
        mul_50 = torch.ops.aten.mul.Tensor(sub_20, unsqueeze_35);  sub_20 = unsqueeze_35 = None
        mul_51 = torch.ops.aten.mul.Tensor(sum_5, squeeze_4);  sum_5 = squeeze_4 = None
        convolution_backward_2 = torch.ops.aten.convolution_backward.default(mul_50, relu, primals_4, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_50 = primals_4 = None
        getitem_12 = convolution_backward_2[0]
        getitem_13 = convolution_backward_2[1];  convolution_backward_2 = None
        le_2 = torch.ops.aten.le.Scalar(relu, 0);  relu = None
        where_2 = torch.ops.aten.where.self(le_2, full_default, getitem_12);  le_2 = full_default = getitem_12 = None
        sum_6 = torch.ops.aten.sum.dim_IntList(where_2, [0, 2, 3])
        sub_21 = torch.ops.aten.sub.Tensor(convolution, unsqueeze_38);  convolution = unsqueeze_38 = None
        mul_52 = torch.ops.aten.mul.Tensor(where_2, sub_21)
        sum_7 = torch.ops.aten.sum.dim_IntList(mul_52, [0, 2, 3]);  mul_52 = None
        mul_53 = torch.ops.aten.mul.Tensor(sum_6, 2.5e-05)
        unsqueeze_39 = torch.ops.aten.unsqueeze.default(mul_53, 0);  mul_53 = None
        unsqueeze_40 = torch.ops.aten.unsqueeze.default(unsqueeze_39, 2);  unsqueeze_39 = None
        unsqueeze_41 = torch.ops.aten.unsqueeze.default(unsqueeze_40, 3);  unsqueeze_40 = None
        mul_54 = torch.ops.aten.mul.Tensor(sum_7, 2.5e-05)
        mul_55 = torch.ops.aten.mul.Tensor(squeeze_1, squeeze_1)
        mul_56 = torch.ops.aten.mul.Tensor(mul_54, mul_55);  mul_54 = mul_55 = None
        unsqueeze_42 = torch.ops.aten.unsqueeze.default(mul_56, 0);  mul_56 = None
        unsqueeze_43 = torch.ops.aten.unsqueeze.default(unsqueeze_42, 2);  unsqueeze_42 = None
        unsqueeze_44 = torch.ops.aten.unsqueeze.default(unsqueeze_43, 3);  unsqueeze_43 = None
        mul_57 = torch.ops.aten.mul.Tensor(squeeze_1, primals_2);  primals_2 = None
        unsqueeze_45 = torch.ops.aten.unsqueeze.default(mul_57, 0);  mul_57 = None
        unsqueeze_46 = torch.ops.aten.unsqueeze.default(unsqueeze_45, 2);  unsqueeze_45 = None
        unsqueeze_47 = torch.ops.aten.unsqueeze.default(unsqueeze_46, 3);  unsqueeze_46 = None
        mul_58 = torch.ops.aten.mul.Tensor(sub_21, unsqueeze_44);  sub_21 = unsqueeze_44 = None
        sub_23 = torch.ops.aten.sub.Tensor(where_2, mul_58);  where_2 = mul_58 = None
        sub_24 = torch.ops.aten.sub.Tensor(sub_23, unsqueeze_41);  sub_23 = unsqueeze_41 = None
        mul_59 = torch.ops.aten.mul.Tensor(sub_24, unsqueeze_47);  sub_24 = unsqueeze_47 = None
        mul_60 = torch.ops.aten.mul.Tensor(sum_7, squeeze_1);  sum_7 = squeeze_1 = None
        convolution_backward_3 = torch.ops.aten.convolution_backward.default(mul_59, cat, primals_1, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_59 = cat = primals_1 = None
        getitem_15 = convolution_backward_3[0]
        getitem_16 = convolution_backward_3[1];  convolution_backward_3 = None
        slice_1 = torch.ops.aten.slice.Tensor(getitem_15, 1, 0, 128)
        slice_2 = torch.ops.aten.slice.Tensor(getitem_15, 1, 128, 640);  getitem_15 = None
        mul_61 = torch.ops.aten.mul.Tensor(slice_2, clamp_max_3);  clamp_max_3 = None
        neg_3 = torch.ops.aten.neg.default(mul_61)
        add_31 = torch.ops.aten.add.Tensor(slice_2, neg_3);  slice_2 = neg_3 = None
        mul_62 = torch.ops.aten.mul.Tensor(mul_61, clamp_max_2)
        neg_4 = torch.ops.aten.neg.default(mul_62)
        add_32 = torch.ops.aten.add.Tensor(mul_61, neg_4);  mul_61 = neg_4 = None
        mul_63 = torch.ops.aten.mul.Tensor(add_31, clamp_max_2);  clamp_max_2 = None
        neg_5 = torch.ops.aten.neg.default(mul_63)
        add_33 = torch.ops.aten.add.Tensor(add_31, neg_5);  add_31 = neg_5 = None
        full_default_7 = torch.ops.aten.full.default([4, 512, 25, 25], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        _unsafe_index_put_4 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, clamp_max_1], mul_62, True);  mul_62 = None
        _unsafe_index_put_5 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, convert_element_type_3], add_32, True);  clamp_max = add_32 = None
        add_34 = torch.ops.aten.add.Tensor(_unsafe_index_put_4, _unsafe_index_put_5);  _unsafe_index_put_4 = _unsafe_index_put_5 = None
        _unsafe_index_put_6 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, clamp_max_1], mul_63, True);  clamp_max_1 = mul_63 = None
        add_35 = torch.ops.aten.add.Tensor(add_34, _unsafe_index_put_6);  add_34 = _unsafe_index_put_6 = None
        _unsafe_index_put_7 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, convert_element_type_3], add_33, True);  full_default_7 = convert_element_type_1 = convert_element_type_3 = add_33 = None
        add_36 = torch.ops.aten.add.Tensor(add_35, _unsafe_index_put_7);  add_35 = _unsafe_index_put_7 = None
        return [getitem_16, mul_60, sum_6, getitem_13, mul_51, sum_4, getitem_10, mul_39, sum_2, getitem_7, sum_1, None, None, None, None, None, None, None, None, None, slice_1, add_36]
        
def load_args(reader):
    buf0 = reader.storage('934c55e4a7a69a0a29a96cd8ef9f11c9859658e1', 11796480, device=device(type='cuda', index=2))
    reader.tensor(buf0, (512, 640, 3, 3), requires_grad=True, is_leaf=True)  # primals_1
    buf1 = reader.storage('f12094f433480ec90280d223057708434df38941', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf1, (512,), requires_grad=True, is_leaf=True)  # primals_2
    buf2 = reader.storage('06c46ad2c91ec5c8eebc4fb0be80459bdfe007a8', 9437184, device=device(type='cuda', index=2))
    reader.tensor(buf2, (512, 512, 3, 3), requires_grad=True, is_leaf=True)  # primals_4
    buf3 = reader.storage('aba0c4266c842d1845e720dc0c789942770a60b7', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf3, (512,), requires_grad=True, is_leaf=True)  # primals_5
    buf4 = reader.storage('bb8471d379e03c8ccb9897ce7d3a2dfbacb44e30', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf4, (512,), requires_grad=True, is_leaf=True)  # primals_6
    buf5 = reader.storage('b9484105fb5b2045fb6550a1edb77af72e639416', 4718592, device=device(type='cuda', index=2))
    reader.tensor(buf5, (256, 512, 3, 3), requires_grad=True, is_leaf=True)  # primals_7
    buf6 = reader.storage('b778b8cab416c3fa6763b88e431266ae6ea28941', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf6, (256,), requires_grad=True, is_leaf=True)  # primals_8
    buf7 = reader.storage('c5f14ec72c73a593b47ef4aecf37f6bb25d2dec4', 262144, device=device(type='cuda', index=2))
    reader.tensor(buf7, (256, 256, 1, 1), requires_grad=True, is_leaf=True)  # primals_10
    buf8 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf8, (100, 1), dtype=torch.int64, is_leaf=True)  # convert_element_type_1
    buf9 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf9, (100, 1), dtype=torch.int64, is_leaf=True)  # clamp_max
    buf10 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf10, (100,), dtype=torch.int64, is_leaf=True)  # convert_element_type_3
    buf11 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf11, (100,), dtype=torch.int64, is_leaf=True)  # clamp_max_1
    buf12 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2))
    reader.tensor(buf12, (100,), is_leaf=True)  # clamp_max_2
    buf13 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2))
    reader.tensor(buf13, (100, 1), is_leaf=True)  # clamp_max_3
    buf14 = reader.storage('5d41e66671a283b70001fd74345d8e7e3def00bd', 102400000, device=device(type='cuda', index=2))
    reader.tensor(buf14, (4, 640, 100, 100), is_leaf=True)  # cat
    buf15 = reader.storage('a8fe0ed584571bb3218d663656459a36545be5e6', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf15, (4, 512, 100, 100), is_leaf=True)  # convolution
    buf16 = reader.storage('0af13bcf109b8ca2df7f5ce3387d51e8576fb30a', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf16, (512,), is_leaf=True)  # squeeze_1
    buf17 = reader.storage('32f14d6fa07f654fbb09ef1563066303a3501eda', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf17, (4, 512, 100, 100), is_leaf=True)  # relu
    buf18 = reader.storage('aca23d51e723ad9b4bec2e54d6f0af4b5b85cc7d', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf18, (4, 512, 100, 100), is_leaf=True)  # convolution_1
    buf19 = reader.storage('4940c79e48676c2e1359870dc770e25cd780983d', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf19, (1, 512, 1, 1), is_leaf=True)  # getitem_3
    buf20 = reader.storage('d17407a9f45954a4d0d36e5b20a40ac554cc3aff', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf20, (1, 512, 1, 1), is_leaf=True)  # rsqrt_1
    buf21 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf21, (200, 1), dtype=torch.int64, is_leaf=True)  # convert_element_type_5
    buf22 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf22, (200, 1), dtype=torch.int64, is_leaf=True)  # clamp_max_4
    buf23 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf23, (200,), dtype=torch.int64, is_leaf=True)  # convert_element_type_7
    buf24 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf24, (200,), dtype=torch.int64, is_leaf=True)  # clamp_max_5
    buf25 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2))
    reader.tensor(buf25, (200,), is_leaf=True)  # clamp_max_6
    buf26 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2))
    reader.tensor(buf26, (200, 1), is_leaf=True)  # clamp_max_7
    buf27 = reader.storage('32194c54194bddd5f695a8d306828130629246fc', 327680000, device=device(type='cuda', index=2))
    reader.tensor(buf27, (4, 512, 200, 200), is_leaf=True)  # add_19
    buf28 = reader.storage('e3a286ef8d6373c83ef30afe16eaae96ee52b965', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf28, (4, 256, 200, 200), is_leaf=True)  # convolution_2
    buf29 = reader.storage('9572b289e6d5c9bdd20a79367d4005440da40795', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf29, (256,), is_leaf=True)  # squeeze_7
    buf30 = reader.storage('42f9ce794a05b12a40f15cbd4abb1201ccef0f72', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf30, (4, 256, 200, 200), is_leaf=True)  # relu_2
    buf31 = reader.storage('61670207f087dc68f052bc03747d9ab365297b17', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf31, (1, 256, 1, 1), is_leaf=True)  # unsqueeze_14
    buf32 = reader.storage('ab77896e6dd76345e63586ecda30b1e4a63439cc', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf32, (1, 512, 1, 1), is_leaf=True)  # unsqueeze_38
    buf33 = reader.storage('f0ec623d2a44ff0f64fc264faf9128c2a6896e57', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf33, (4, 256, 200, 200), is_leaf=True)  # tangents_1
load_args._version = 0
mod = Repro()
if __name__ == '__main__':
    from torch._dynamo.repro.after_aot import run_repro
    with torch.no_grad():
        run_repro(mod, load_args, accuracy=True, command='run', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None)
        # To run it separately, do 
        # mod, args = run_repro(mod, load_args, accuracy=True, command='get_args', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None)
        # mod(*args)