repro.py 21.3 KB
Newer Older
lishj6's avatar
lishj6 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260

import torch
from torch import tensor, device
import torch.fx as fx
from torch._dynamo.testing import rand_strided
from math import inf
import torch._inductor.inductor_prims

import torch._dynamo.config
import torch._inductor.config
import torch._functorch.config
import torch.fx.experimental._config
torch._dynamo.config.capture_scalar_outputs = True





isolate_fails_code_str = None



# torch version: 2.4.1
# torch cuda version: None
# torch git version: 45d303c9e4f41ec2f5450b6f60031246f67189d6


# CUDA Info: 
# nvcc not found
# GPU Hardware Info: 
# BW200 : 8 


from torch.nn import *
class Repro(torch.nn.Module):
    def __init__(self):
        super().__init__()

    
    
    def forward(self, primals_1, primals_2, primals_4, primals_5, primals_6, primals_7, primals_8, primals_10, convert_element_type_1, clamp_max, convert_element_type_3, clamp_max_1, clamp_max_2, clamp_max_3, cat, convolution, squeeze_1, relu, convolution_1, getitem_3, rsqrt_1, convert_element_type_5, clamp_max_4, convert_element_type_7, clamp_max_5, clamp_max_6, clamp_max_7, add_19, convolution_2, squeeze_7, relu_2, unsqueeze_14, unsqueeze_38, tangents_1):
        sum_1 = torch.ops.aten.sum.dim_IntList(tangents_1, [0, 2, 3])
        convolution_backward = torch.ops.aten.convolution_backward.default(tangents_1, relu_2, primals_10, [256], [1, 1], [0, 0], [1, 1], False, [0, 0], 1, [True, True, False]);  tangents_1 = primals_10 = None
        getitem_6 = convolution_backward[0]
        getitem_7 = convolution_backward[1];  convolution_backward = None
        le = torch.ops.aten.le.Scalar(relu_2, 0);  relu_2 = None
        full_default = torch.ops.aten.full.default([], 0.0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        where = torch.ops.aten.where.self(le, full_default, getitem_6);  le = getitem_6 = None
        sum_2 = torch.ops.aten.sum.dim_IntList(where, [0, 2, 3])
        sub_13 = torch.ops.aten.sub.Tensor(convolution_2, unsqueeze_14);  convolution_2 = unsqueeze_14 = None
        mul_31 = torch.ops.aten.mul.Tensor(where, sub_13)
        sum_3 = torch.ops.aten.sum.dim_IntList(mul_31, [0, 2, 3]);  mul_31 = None
        mul_32 = torch.ops.aten.mul.Tensor(sum_2, 6.25e-06)
        unsqueeze_15 = torch.ops.aten.unsqueeze.default(mul_32, 0);  mul_32 = None
        unsqueeze_16 = torch.ops.aten.unsqueeze.default(unsqueeze_15, 2);  unsqueeze_15 = None
        unsqueeze_17 = torch.ops.aten.unsqueeze.default(unsqueeze_16, 3);  unsqueeze_16 = None
        mul_33 = torch.ops.aten.mul.Tensor(sum_3, 6.25e-06)
        mul_34 = torch.ops.aten.mul.Tensor(squeeze_7, squeeze_7)
        mul_35 = torch.ops.aten.mul.Tensor(mul_33, mul_34);  mul_33 = mul_34 = None
        unsqueeze_18 = torch.ops.aten.unsqueeze.default(mul_35, 0);  mul_35 = None
        unsqueeze_19 = torch.ops.aten.unsqueeze.default(unsqueeze_18, 2);  unsqueeze_18 = None
        unsqueeze_20 = torch.ops.aten.unsqueeze.default(unsqueeze_19, 3);  unsqueeze_19 = None
        mul_36 = torch.ops.aten.mul.Tensor(squeeze_7, primals_8);  primals_8 = None
        unsqueeze_21 = torch.ops.aten.unsqueeze.default(mul_36, 0);  mul_36 = None
        unsqueeze_22 = torch.ops.aten.unsqueeze.default(unsqueeze_21, 2);  unsqueeze_21 = None
        unsqueeze_23 = torch.ops.aten.unsqueeze.default(unsqueeze_22, 3);  unsqueeze_22 = None
        mul_37 = torch.ops.aten.mul.Tensor(sub_13, unsqueeze_20);  sub_13 = unsqueeze_20 = None
        sub_15 = torch.ops.aten.sub.Tensor(where, mul_37);  where = mul_37 = None
        sub_16 = torch.ops.aten.sub.Tensor(sub_15, unsqueeze_17);  sub_15 = unsqueeze_17 = None
        mul_38 = torch.ops.aten.mul.Tensor(sub_16, unsqueeze_23);  sub_16 = unsqueeze_23 = None
        mul_39 = torch.ops.aten.mul.Tensor(sum_3, squeeze_7);  sum_3 = squeeze_7 = None
        convolution_backward_1 = torch.ops.aten.convolution_backward.default(mul_38, add_19, primals_7, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_38 = add_19 = primals_7 = None
        getitem_9 = convolution_backward_1[0]
        getitem_10 = convolution_backward_1[1];  convolution_backward_1 = None
        mul_40 = torch.ops.aten.mul.Tensor(getitem_9, clamp_max_7);  clamp_max_7 = None
        neg = torch.ops.aten.neg.default(mul_40)
        add_25 = torch.ops.aten.add.Tensor(getitem_9, neg);  getitem_9 = neg = None
        mul_41 = torch.ops.aten.mul.Tensor(mul_40, clamp_max_6)
        neg_1 = torch.ops.aten.neg.default(mul_41)
        add_26 = torch.ops.aten.add.Tensor(mul_40, neg_1);  mul_40 = neg_1 = None
        mul_42 = torch.ops.aten.mul.Tensor(add_25, clamp_max_6);  clamp_max_6 = None
        neg_2 = torch.ops.aten.neg.default(mul_42)
        add_27 = torch.ops.aten.add.Tensor(add_25, neg_2);  add_25 = neg_2 = None
        full_default_1 = torch.ops.aten.full.default([4, 512, 100, 100], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        _unsafe_index_put = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, clamp_max_5], mul_41, True);  mul_41 = None
        _unsafe_index_put_1 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, clamp_max_4, convert_element_type_7], add_26, True);  clamp_max_4 = add_26 = None
        add_28 = torch.ops.aten.add.Tensor(_unsafe_index_put, _unsafe_index_put_1);  _unsafe_index_put = _unsafe_index_put_1 = None
        _unsafe_index_put_2 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, clamp_max_5], mul_42, True);  clamp_max_5 = mul_42 = None
        add_29 = torch.ops.aten.add.Tensor(add_28, _unsafe_index_put_2);  add_28 = _unsafe_index_put_2 = None
        _unsafe_index_put_3 = torch.ops.aten._unsafe_index_put.default(full_default_1, [None, None, convert_element_type_5, convert_element_type_7], add_27, True);  full_default_1 = convert_element_type_5 = convert_element_type_7 = add_27 = None
        add_30 = torch.ops.aten.add.Tensor(add_29, _unsafe_index_put_3);  add_29 = _unsafe_index_put_3 = None
        sub_6 = torch.ops.aten.sub.Tensor(convolution_1, getitem_3)
        mul_12 = torch.ops.aten.mul.Tensor(sub_6, rsqrt_1);  sub_6 = None
        unsqueeze_4 = torch.ops.aten.unsqueeze.default(primals_5, -1)
        unsqueeze_5 = torch.ops.aten.unsqueeze.default(unsqueeze_4, -1);  unsqueeze_4 = None
        mul_18 = torch.ops.aten.mul.Tensor(mul_12, unsqueeze_5);  mul_12 = unsqueeze_5 = None
        unsqueeze_6 = torch.ops.aten.unsqueeze.default(primals_6, -1);  primals_6 = None
        unsqueeze_7 = torch.ops.aten.unsqueeze.default(unsqueeze_6, -1);  unsqueeze_6 = None
        add_14 = torch.ops.aten.add.Tensor(mul_18, unsqueeze_7);  mul_18 = unsqueeze_7 = None
        relu_1 = torch.ops.aten.relu.default(add_14);  add_14 = None
        le_1 = torch.ops.aten.le.Scalar(relu_1, 0);  relu_1 = None
        where_1 = torch.ops.aten.where.self(le_1, full_default, add_30);  le_1 = add_30 = None
        squeeze_3 = torch.ops.aten.squeeze.dims(getitem_3, [0, 2, 3]);  getitem_3 = None
        unsqueeze_24 = torch.ops.aten.unsqueeze.default(squeeze_3, 0);  squeeze_3 = None
        unsqueeze_25 = torch.ops.aten.unsqueeze.default(unsqueeze_24, 2);  unsqueeze_24 = None
        unsqueeze_26 = torch.ops.aten.unsqueeze.default(unsqueeze_25, 3);  unsqueeze_25 = None
        sum_4 = torch.ops.aten.sum.dim_IntList(where_1, [0, 2, 3])
        sub_17 = torch.ops.aten.sub.Tensor(convolution_1, unsqueeze_26);  convolution_1 = unsqueeze_26 = None
        mul_43 = torch.ops.aten.mul.Tensor(where_1, sub_17)
        sum_5 = torch.ops.aten.sum.dim_IntList(mul_43, [0, 2, 3]);  mul_43 = None
        mul_44 = torch.ops.aten.mul.Tensor(sum_4, 2.5e-05)
        unsqueeze_27 = torch.ops.aten.unsqueeze.default(mul_44, 0);  mul_44 = None
        unsqueeze_28 = torch.ops.aten.unsqueeze.default(unsqueeze_27, 2);  unsqueeze_27 = None
        unsqueeze_29 = torch.ops.aten.unsqueeze.default(unsqueeze_28, 3);  unsqueeze_28 = None
        mul_45 = torch.ops.aten.mul.Tensor(sum_5, 2.5e-05)
        squeeze_4 = torch.ops.aten.squeeze.dims(rsqrt_1, [0, 2, 3]);  rsqrt_1 = None
        mul_46 = torch.ops.aten.mul.Tensor(squeeze_4, squeeze_4)
        mul_47 = torch.ops.aten.mul.Tensor(mul_45, mul_46);  mul_45 = mul_46 = None
        unsqueeze_30 = torch.ops.aten.unsqueeze.default(mul_47, 0);  mul_47 = None
        unsqueeze_31 = torch.ops.aten.unsqueeze.default(unsqueeze_30, 2);  unsqueeze_30 = None
        unsqueeze_32 = torch.ops.aten.unsqueeze.default(unsqueeze_31, 3);  unsqueeze_31 = None
        mul_48 = torch.ops.aten.mul.Tensor(squeeze_4, primals_5);  primals_5 = None
        unsqueeze_33 = torch.ops.aten.unsqueeze.default(mul_48, 0);  mul_48 = None
        unsqueeze_34 = torch.ops.aten.unsqueeze.default(unsqueeze_33, 2);  unsqueeze_33 = None
        unsqueeze_35 = torch.ops.aten.unsqueeze.default(unsqueeze_34, 3);  unsqueeze_34 = None
        mul_49 = torch.ops.aten.mul.Tensor(sub_17, unsqueeze_32);  sub_17 = unsqueeze_32 = None
        sub_19 = torch.ops.aten.sub.Tensor(where_1, mul_49);  where_1 = mul_49 = None
        sub_20 = torch.ops.aten.sub.Tensor(sub_19, unsqueeze_29);  sub_19 = unsqueeze_29 = None
        mul_50 = torch.ops.aten.mul.Tensor(sub_20, unsqueeze_35);  sub_20 = unsqueeze_35 = None
        mul_51 = torch.ops.aten.mul.Tensor(sum_5, squeeze_4);  sum_5 = squeeze_4 = None
        convolution_backward_2 = torch.ops.aten.convolution_backward.default(mul_50, relu, primals_4, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_50 = primals_4 = None
        getitem_12 = convolution_backward_2[0]
        getitem_13 = convolution_backward_2[1];  convolution_backward_2 = None
        le_2 = torch.ops.aten.le.Scalar(relu, 0);  relu = None
        where_2 = torch.ops.aten.where.self(le_2, full_default, getitem_12);  le_2 = full_default = getitem_12 = None
        sum_6 = torch.ops.aten.sum.dim_IntList(where_2, [0, 2, 3])
        sub_21 = torch.ops.aten.sub.Tensor(convolution, unsqueeze_38);  convolution = unsqueeze_38 = None
        mul_52 = torch.ops.aten.mul.Tensor(where_2, sub_21)
        sum_7 = torch.ops.aten.sum.dim_IntList(mul_52, [0, 2, 3]);  mul_52 = None
        mul_53 = torch.ops.aten.mul.Tensor(sum_6, 2.5e-05)
        unsqueeze_39 = torch.ops.aten.unsqueeze.default(mul_53, 0);  mul_53 = None
        unsqueeze_40 = torch.ops.aten.unsqueeze.default(unsqueeze_39, 2);  unsqueeze_39 = None
        unsqueeze_41 = torch.ops.aten.unsqueeze.default(unsqueeze_40, 3);  unsqueeze_40 = None
        mul_54 = torch.ops.aten.mul.Tensor(sum_7, 2.5e-05)
        mul_55 = torch.ops.aten.mul.Tensor(squeeze_1, squeeze_1)
        mul_56 = torch.ops.aten.mul.Tensor(mul_54, mul_55);  mul_54 = mul_55 = None
        unsqueeze_42 = torch.ops.aten.unsqueeze.default(mul_56, 0);  mul_56 = None
        unsqueeze_43 = torch.ops.aten.unsqueeze.default(unsqueeze_42, 2);  unsqueeze_42 = None
        unsqueeze_44 = torch.ops.aten.unsqueeze.default(unsqueeze_43, 3);  unsqueeze_43 = None
        mul_57 = torch.ops.aten.mul.Tensor(squeeze_1, primals_2);  primals_2 = None
        unsqueeze_45 = torch.ops.aten.unsqueeze.default(mul_57, 0);  mul_57 = None
        unsqueeze_46 = torch.ops.aten.unsqueeze.default(unsqueeze_45, 2);  unsqueeze_45 = None
        unsqueeze_47 = torch.ops.aten.unsqueeze.default(unsqueeze_46, 3);  unsqueeze_46 = None
        mul_58 = torch.ops.aten.mul.Tensor(sub_21, unsqueeze_44);  sub_21 = unsqueeze_44 = None
        sub_23 = torch.ops.aten.sub.Tensor(where_2, mul_58);  where_2 = mul_58 = None
        sub_24 = torch.ops.aten.sub.Tensor(sub_23, unsqueeze_41);  sub_23 = unsqueeze_41 = None
        mul_59 = torch.ops.aten.mul.Tensor(sub_24, unsqueeze_47);  sub_24 = unsqueeze_47 = None
        mul_60 = torch.ops.aten.mul.Tensor(sum_7, squeeze_1);  sum_7 = squeeze_1 = None
        convolution_backward_3 = torch.ops.aten.convolution_backward.default(mul_59, cat, primals_1, [0], [1, 1], [1, 1], [1, 1], False, [0, 0], 1, [True, True, False]);  mul_59 = cat = primals_1 = None
        getitem_15 = convolution_backward_3[0]
        getitem_16 = convolution_backward_3[1];  convolution_backward_3 = None
        slice_1 = torch.ops.aten.slice.Tensor(getitem_15, 1, 0, 128)
        slice_2 = torch.ops.aten.slice.Tensor(getitem_15, 1, 128, 640);  getitem_15 = None
        mul_61 = torch.ops.aten.mul.Tensor(slice_2, clamp_max_3);  clamp_max_3 = None
        neg_3 = torch.ops.aten.neg.default(mul_61)
        add_31 = torch.ops.aten.add.Tensor(slice_2, neg_3);  slice_2 = neg_3 = None
        mul_62 = torch.ops.aten.mul.Tensor(mul_61, clamp_max_2)
        neg_4 = torch.ops.aten.neg.default(mul_62)
        add_32 = torch.ops.aten.add.Tensor(mul_61, neg_4);  mul_61 = neg_4 = None
        mul_63 = torch.ops.aten.mul.Tensor(add_31, clamp_max_2);  clamp_max_2 = None
        neg_5 = torch.ops.aten.neg.default(mul_63)
        add_33 = torch.ops.aten.add.Tensor(add_31, neg_5);  add_31 = neg_5 = None
        full_default_7 = torch.ops.aten.full.default([4, 512, 25, 25], 0, dtype = torch.float32, layout = torch.strided, device = device(type='cuda', index=2), pin_memory = False)
        _unsafe_index_put_4 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, clamp_max_1], mul_62, True);  mul_62 = None
        _unsafe_index_put_5 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, clamp_max, convert_element_type_3], add_32, True);  clamp_max = add_32 = None
        add_34 = torch.ops.aten.add.Tensor(_unsafe_index_put_4, _unsafe_index_put_5);  _unsafe_index_put_4 = _unsafe_index_put_5 = None
        _unsafe_index_put_6 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, clamp_max_1], mul_63, True);  clamp_max_1 = mul_63 = None
        add_35 = torch.ops.aten.add.Tensor(add_34, _unsafe_index_put_6);  add_34 = _unsafe_index_put_6 = None
        _unsafe_index_put_7 = torch.ops.aten._unsafe_index_put.default(full_default_7, [None, None, convert_element_type_1, convert_element_type_3], add_33, True);  full_default_7 = convert_element_type_1 = convert_element_type_3 = add_33 = None
        add_36 = torch.ops.aten.add.Tensor(add_35, _unsafe_index_put_7);  add_35 = _unsafe_index_put_7 = None
        return [getitem_16, mul_60, sum_6, getitem_13, mul_51, sum_4, getitem_10, mul_39, sum_2, getitem_7, sum_1, None, None, None, None, None, None, None, None, None, slice_1, add_36]
        
def load_args(reader):
    buf0 = reader.storage('934c55e4a7a69a0a29a96cd8ef9f11c9859658e1', 11796480, device=device(type='cuda', index=2))
    reader.tensor(buf0, (512, 640, 3, 3), requires_grad=True, is_leaf=True)  # primals_1
    buf1 = reader.storage('f12094f433480ec90280d223057708434df38941', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf1, (512,), requires_grad=True, is_leaf=True)  # primals_2
    buf2 = reader.storage('06c46ad2c91ec5c8eebc4fb0be80459bdfe007a8', 9437184, device=device(type='cuda', index=2))
    reader.tensor(buf2, (512, 512, 3, 3), requires_grad=True, is_leaf=True)  # primals_4
    buf3 = reader.storage('aba0c4266c842d1845e720dc0c789942770a60b7', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf3, (512,), requires_grad=True, is_leaf=True)  # primals_5
    buf4 = reader.storage('bb8471d379e03c8ccb9897ce7d3a2dfbacb44e30', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf4, (512,), requires_grad=True, is_leaf=True)  # primals_6
    buf5 = reader.storage('b9484105fb5b2045fb6550a1edb77af72e639416', 4718592, device=device(type='cuda', index=2))
    reader.tensor(buf5, (256, 512, 3, 3), requires_grad=True, is_leaf=True)  # primals_7
    buf6 = reader.storage('b778b8cab416c3fa6763b88e431266ae6ea28941', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf6, (256,), requires_grad=True, is_leaf=True)  # primals_8
    buf7 = reader.storage('c5f14ec72c73a593b47ef4aecf37f6bb25d2dec4', 262144, device=device(type='cuda', index=2))
    reader.tensor(buf7, (256, 256, 1, 1), requires_grad=True, is_leaf=True)  # primals_10
    buf8 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf8, (100, 1), dtype=torch.int64, is_leaf=True)  # convert_element_type_1
    buf9 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf9, (100, 1), dtype=torch.int64, is_leaf=True)  # clamp_max
    buf10 = reader.storage('99ef5c7086a924dfc5221c01ff1520de469849c8', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf10, (100,), dtype=torch.int64, is_leaf=True)  # convert_element_type_3
    buf11 = reader.storage('532b7b8fc19c48c7434e569ab96aa0670d5651ef', 800, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf11, (100,), dtype=torch.int64, is_leaf=True)  # clamp_max_1
    buf12 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2))
    reader.tensor(buf12, (100,), is_leaf=True)  # clamp_max_2
    buf13 = reader.storage('0538ed039b8a4706a4f85bf431e12664d8940742', 400, device=device(type='cuda', index=2))
    reader.tensor(buf13, (100, 1), is_leaf=True)  # clamp_max_3
    buf14 = reader.storage('5d41e66671a283b70001fd74345d8e7e3def00bd', 102400000, device=device(type='cuda', index=2))
    reader.tensor(buf14, (4, 640, 100, 100), is_leaf=True)  # cat
    buf15 = reader.storage('a8fe0ed584571bb3218d663656459a36545be5e6', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf15, (4, 512, 100, 100), is_leaf=True)  # convolution
    buf16 = reader.storage('0af13bcf109b8ca2df7f5ce3387d51e8576fb30a', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf16, (512,), is_leaf=True)  # squeeze_1
    buf17 = reader.storage('32f14d6fa07f654fbb09ef1563066303a3501eda', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf17, (4, 512, 100, 100), is_leaf=True)  # relu
    buf18 = reader.storage('aca23d51e723ad9b4bec2e54d6f0af4b5b85cc7d', 81920000, device=device(type='cuda', index=2))
    reader.tensor(buf18, (4, 512, 100, 100), is_leaf=True)  # convolution_1
    buf19 = reader.storage('4940c79e48676c2e1359870dc770e25cd780983d', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf19, (1, 512, 1, 1), is_leaf=True)  # getitem_3
    buf20 = reader.storage('d17407a9f45954a4d0d36e5b20a40ac554cc3aff', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf20, (1, 512, 1, 1), is_leaf=True)  # rsqrt_1
    buf21 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf21, (200, 1), dtype=torch.int64, is_leaf=True)  # convert_element_type_5
    buf22 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf22, (200, 1), dtype=torch.int64, is_leaf=True)  # clamp_max_4
    buf23 = reader.storage('95fbd2b85e217ab78f8f9d7900b273a1362b3112', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf23, (200,), dtype=torch.int64, is_leaf=True)  # convert_element_type_7
    buf24 = reader.storage('d9920b87a7261c94c907bc68889b005f277cd597', 1600, device=device(type='cuda', index=2), dtype_hint=torch.int64)
    reader.tensor(buf24, (200,), dtype=torch.int64, is_leaf=True)  # clamp_max_5
    buf25 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2))
    reader.tensor(buf25, (200,), is_leaf=True)  # clamp_max_6
    buf26 = reader.storage('131d76cb798ee04745f0c7dcb67b63c74a6c00df', 800, device=device(type='cuda', index=2))
    reader.tensor(buf26, (200, 1), is_leaf=True)  # clamp_max_7
    buf27 = reader.storage('32194c54194bddd5f695a8d306828130629246fc', 327680000, device=device(type='cuda', index=2))
    reader.tensor(buf27, (4, 512, 200, 200), is_leaf=True)  # add_19
    buf28 = reader.storage('e3a286ef8d6373c83ef30afe16eaae96ee52b965', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf28, (4, 256, 200, 200), is_leaf=True)  # convolution_2
    buf29 = reader.storage('9572b289e6d5c9bdd20a79367d4005440da40795', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf29, (256,), is_leaf=True)  # squeeze_7
    buf30 = reader.storage('42f9ce794a05b12a40f15cbd4abb1201ccef0f72', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf30, (4, 256, 200, 200), is_leaf=True)  # relu_2
    buf31 = reader.storage('61670207f087dc68f052bc03747d9ab365297b17', 1024, device=device(type='cuda', index=2))
    reader.tensor(buf31, (1, 256, 1, 1), is_leaf=True)  # unsqueeze_14
    buf32 = reader.storage('ab77896e6dd76345e63586ecda30b1e4a63439cc', 2048, device=device(type='cuda', index=2))
    reader.tensor(buf32, (1, 512, 1, 1), is_leaf=True)  # unsqueeze_38
    buf33 = reader.storage('f0ec623d2a44ff0f64fc264faf9128c2a6896e57', 163840000, device=device(type='cuda', index=2))
    reader.tensor(buf33, (4, 256, 200, 200), is_leaf=True)  # tangents_1
load_args._version = 0
mod = Repro()
if __name__ == '__main__':
    from torch._dynamo.repro.after_aot import run_repro
    with torch.no_grad():
        run_repro(mod, load_args, accuracy=True, command='run', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None)
        # To run it separately, do 
        # mod, args = run_repro(mod, load_args, accuracy=True, command='get_args', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None)
        # mod(*args)