Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
d2b71343
Commit
d2b71343
authored
Apr 08, 2026
by
雍大凯
Browse files
add code
parent
69e57885
Changes
259
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1014 additions
and
0 deletions
+1014
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__pycache__/nearest_assign.cpython-310.pyc
...nearest_assign/__pycache__/nearest_assign.cpython-310.pyc
+0
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
...jects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
+89
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign_ext.cpython-310-x86_64-linux-gnu.so
...assign/nearest_assign_ext.cpython-310-x86_64-linux-gnu.so
+0
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
.../mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
+58
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.cu
...et3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.cu
+73
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.hip
...t3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.hip
+76
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_hip.cpp
...et3d_plugin/ops/nearest_assign/src/nearest_assign_hip.cpp
+60
-0
docker-hub/FlashOCC/Flashocc/projects/setup.py
docker-hub/FlashOCC/Flashocc/projects/setup.py
+94
-0
docker-hub/FlashOCC/Flashocc/repro.py
docker-hub/FlashOCC/Flashocc/repro.py
+260
-0
docker-hub/FlashOCC/Flashocc/requirements.txt
docker-hub/FlashOCC/Flashocc/requirements.txt
+4
-0
docker-hub/FlashOCC/Flashocc/requirements/build.txt
docker-hub/FlashOCC/Flashocc/requirements/build.txt
+0
-0
docker-hub/FlashOCC/Flashocc/requirements/docs.txt
docker-hub/FlashOCC/Flashocc/requirements/docs.txt
+8
-0
docker-hub/FlashOCC/Flashocc/requirements/mminstall.txt
docker-hub/FlashOCC/Flashocc/requirements/mminstall.txt
+3
-0
docker-hub/FlashOCC/Flashocc/requirements/optional.txt
docker-hub/FlashOCC/Flashocc/requirements/optional.txt
+3
-0
docker-hub/FlashOCC/Flashocc/requirements/readthedocs.txt
docker-hub/FlashOCC/Flashocc/requirements/readthedocs.txt
+5
-0
docker-hub/FlashOCC/Flashocc/requirements/runtime.txt
docker-hub/FlashOCC/Flashocc/requirements/runtime.txt
+10
-0
docker-hub/FlashOCC/Flashocc/requirements/tests.txt
docker-hub/FlashOCC/Flashocc/requirements/tests.txt
+13
-0
docker-hub/FlashOCC/Flashocc/rocblas_Flashocc.log
docker-hub/FlashOCC/Flashocc/rocblas_Flashocc.log
+23
-0
docker-hub/FlashOCC/Flashocc/start_flashocc.sh
docker-hub/FlashOCC/Flashocc/start_flashocc.sh
+33
-0
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/analyze_logs.py
...ub/FlashOCC/Flashocc/tools/analysis_tools/analyze_logs.py
+202
-0
No files found.
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__pycache__/nearest_assign.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
0 → 100644
View file @
d2b71343
# Copyright (c) Phigent Robotics. All rights reserved.
import
numpy
as
np
import
torch
from
.
import
nearest_assign_ext
__all__
=
[
'nearest_assign'
]
class
QuickNearestAssignCuda
(
torch
.
autograd
.
Function
):
@
staticmethod
def
forward
(
ctx
,
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
,
):
occ_pred
=
occ_pred
.
contiguous
().
int
()
l2s_key
=
l2s_key
.
contiguous
().
int
()
occind2detind
=
occind2detind
.
contiguous
().
int
()
inst_cls
=
inst_cls
.
contiguous
().
int
()
inst_xyz
=
inst_xyz
.
contiguous
().
int
()
inst_id_list
=
inst_id_list
.
contiguous
().
int
()
inst_pred
=
occ_pred
.
new_zeros
(
occ_pred
.
shape
)
nearest_assign_ext
.
nearest_assign_forward
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
,
inst_pred
)
return
inst_pred
def
nearest_assign
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
):
inst_pred
=
QuickNearestAssignCuda
.
apply
(
occ_pred
,
l2s_key
,
occind2detind
,
inst_cls
,
inst_xyz
,
inst_id_list
)
# (B, Dz, Dy, Dx, C)
return
inst_pred
def
test_bev_pool_v2
():
depth
=
np
.
array
([
0.3
,
0.4
,
0.2
,
0.1
,
0.7
,
0.6
,
0.8
,
0.9
])
depth
=
torch
.
from_numpy
(
depth
).
float
().
cuda
()
depth
=
depth
.
view
(
1
,
1
,
2
,
2
,
2
).
requires_grad_
()
feat
=
torch
.
ones
(
size
=
[
1
,
1
,
2
,
2
,
2
],
dtype
=
torch
.
float
,
device
=
'cuda'
).
requires_grad_
()
ranks_depth
=
torch
.
from_numpy
(
np
.
array
([
0
,
4
,
1
,
6
])).
int
().
cuda
()
ranks_feat
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
2
])).
int
().
cuda
()
ranks_bev
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
1
])).
int
().
cuda
()
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_bev
[
1
:]
!=
ranks_bev
[:
-
1
]
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
if
len
(
interval_starts
)
==
0
:
return
None
,
None
,
None
,
None
,
None
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
interval_lengths
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts
[
-
1
]
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
(
1
,
1
,
2
,
2
,
2
),
interval_starts
,
interval_lengths
)
loss
=
torch
.
sum
(
bev_feat
)
loss
.
backward
()
assert
loss
==
4.4
grad_depth
=
np
.
array
([
2.
,
2.
,
0.
,
0.
,
2.
,
0.
,
2.
,
0.
])
grad_depth
=
torch
.
from_numpy
(
grad_depth
).
float
()
grad_depth
=
grad_depth
.
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
depth
.
grad
.
allclose
(
grad_depth
)
grad_feat
=
np
.
array
([
1.0
,
1.0
,
0.4
,
0.4
,
0.8
,
0.8
,
0.
,
0.
])
grad_feat
=
torch
.
from_numpy
(
grad_feat
).
float
().
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
feat
.
grad
.
allclose
(
grad_feat
)
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/nearest_assign_ext.cpython-310-x86_64-linux-gnu.so
0 → 100755
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
0 → 100644
View file @
d2b71343
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
// CUDA function declarations
void
nearest_assign
(
const
int
*
l2s_key
,
int
l2s_size
,
const
int
*
__restrict__
occind2detind
,
int
inst_size
,
const
int
*
__restrict__
occ_pred
,
const
int
*
__restrict__
inst_xyz
,
const
int
*
__restrict__
inst_cls
,
const
int
*
__restrict__
inst_id_list
,
int
*
__restrict__
inst_pred
);
void
nearest_assign_forward
(
const
at
::
Tensor
_occ_pred
,
// (200, 200, 16)
const
at
::
Tensor
_l2s_key
,
// (l2s_size, 1)
const
at
::
Tensor
_occind2detind
,
// (10, 1)
const
at
::
Tensor
_inst_cls
,
// (inst_size, 1)
const
at
::
Tensor
_inst_xyz
,
// (inst_size, 3)
const
at
::
Tensor
_inst_id_list
,
// (inst_size, 1)
at
::
Tensor
_inst_pred
// (200, 200, 16)
)
{
int
l2s_size
=
_l2s_key
.
size
(
0
);
int
inst_size
=
_inst_xyz
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_occ_pred
));
const
int
*
occ_pred
=
_occ_pred
.
data_ptr
<
int
>
();
const
int
*
inst_xyz
=
_inst_xyz
.
data_ptr
<
int
>
();
const
int
*
inst_cls
=
_inst_cls
.
data_ptr
<
int
>
();
const
int
*
l2s_key
=
_l2s_key
.
data_ptr
<
int
>
();
const
int
*
inst_id_list
=
_inst_id_list
.
data_ptr
<
int
>
();
const
int
*
occind2detind
=
_occind2detind
.
data_ptr
<
int
>
();
// std::map<int, int> l2s;
// for (int l2s_ind = 0; l2s_ind < l2s_size; l2s_ind++){
// l2s.insert(pair<int, int>(l2s_key[l2s_ind], l2s_val[l2s_ind]));
// }
int
*
inst_pred
=
_inst_pred
.
data_ptr
<
int
>
();
nearest_assign
(
l2s_key
,
l2s_size
,
occind2detind
,
inst_size
,
occ_pred
,
inst_xyz
,
inst_cls
,
inst_id_list
,
inst_pred
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"nearest_assign_forward"
,
&
nearest_assign_forward
,
"nearest_assign_forward"
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.cu
0 → 100644
View file @
d2b71343
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
__global__
void
nearest_assign_kernel
(
const
int
*
l2s_key
,
int
l2s_size
,
const
int
*
occind2detind
,
const
int
*
__restrict__
occ_pred
,
const
int
*
__restrict__
inst_xyz
,
const
int
*
__restrict__
inst_cls
,
const
int
*
__restrict__
inst_id_list
,
int
inst_size
,
int
*
__restrict__
inst_pred
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// while (idx < 200*200*16)
if
(
true
)
{
int
occ_pred_label
=
occ_pred
[
idx
];
int
dist_min
=
100000000
;
for
(
int
index
=
0
;
index
<
l2s_size
;
index
++
)
{
if
(
occ_pred_label
==
l2s_key
[
index
])
{
int
x
=
idx
/
(
200
*
16
);
int
y
=
(
idx
-
x
*
200
*
16
)
/
16
;
int
z
=
idx
-
x
*
200
*
16
-
y
*
16
;
int
inst_ind
=
0
;
for
(
inst_ind
=
0
;
inst_ind
<
inst_size
;
inst_ind
++
)
{
if
(
inst_cls
[
inst_ind
]
==
occind2detind
[
occ_pred_label
])
{
int
dx
=
x
-
inst_xyz
[
inst_ind
*
3
+
0
];
int
dy
=
y
-
inst_xyz
[
inst_ind
*
3
+
1
];
int
dz
=
z
-
inst_xyz
[
inst_ind
*
3
+
2
];
int
dist
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
if
(
dist
<
dist_min
){
dist_min
=
dist
;
inst_pred
[
idx
]
=
inst_id_list
[
inst_ind
];
}
}
}
return
;
}
}
inst_pred
[
idx
]
=
occ_pred
[
idx
];
// idx += blockDim.x * gridDim.x;
}
}
void
nearest_assign
(
const
int
*
l2s_key
,
int
l2s_size
,
const
int
*
__restrict__
occind2detind
,
int
inst_size
,
const
int
*
__restrict__
occ_pred
,
const
int
*
__restrict__
inst_xyz
,
const
int
*
__restrict__
inst_cls
,
const
int
*
__restrict__
inst_id_list
,
int
*
__restrict__
inst_pred
)
{
// nearest_assign_kernel<<<128, 256>>>(
nearest_assign_kernel
<<<
(
int
)
ceil
(((
double
)
200
*
200
*
16
/
256
)),
256
>>>
(
l2s_key
,
l2s_size
,
occind2detind
,
occ_pred
,
inst_xyz
,
inst_cls
,
inst_id_list
,
inst_size
,
inst_pred
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.hip
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include "hip/hip_runtime.h"
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
__global__ void nearest_assign_kernel(
const int* l2s_key,
int l2s_size,
const int* occind2detind,
const int *__restrict__ occ_pred,
const int *__restrict__ inst_xyz,
const int *__restrict__ inst_cls,
const int *__restrict__ inst_id_list,
int inst_size,
int* __restrict__ inst_pred) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
// while (idx < 200*200*16)
if (true)
{
int occ_pred_label = occ_pred[idx];
int dist_min = 100000000;
for (int index = 0; index < l2s_size; index ++)
{
if (occ_pred_label == l2s_key[index])
{
int x = idx/(200*16);
int y = (idx - x*200*16)/16;
int z = idx - x*200*16 - y*16;
int inst_ind = 0;
for (inst_ind = 0; inst_ind < inst_size; inst_ind ++)
{
if (inst_cls[inst_ind] == occind2detind[occ_pred_label])
{
int dx = x - inst_xyz[inst_ind*3+0];
int dy = y - inst_xyz[inst_ind*3+1];
int dz = z - inst_xyz[inst_ind*3+2];
int dist = dx*dx + dy*dy + dz*dz;
if (dist < dist_min){
dist_min = dist;
inst_pred[idx] = inst_id_list[inst_ind];
}
}
}
return;
}
}
inst_pred[idx] = occ_pred[idx];
// idx += blockDim.x * gridDim.x;
}
}
void nearest_assign(
const int* l2s_key,
int l2s_size,
const int *__restrict__ occind2detind,
int inst_size,
const int *__restrict__ occ_pred,
const int *__restrict__ inst_xyz,
const int *__restrict__ inst_cls,
const int *__restrict__ inst_id_list,
int* __restrict__ inst_pred) {
// nearest_assign_kernel<<<128, 256>>>(
hipLaunchKernelGGL(( nearest_assign_kernel), dim3((int)ceil(((double)200 * 200 * 16 / 256))), dim3(256), 0, 0,
l2s_key, l2s_size, occind2detind,
occ_pred, inst_xyz, inst_cls,
inst_id_list, inst_size, inst_pred
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_hip.cpp
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
// CUDA function declarations
void
nearest_assign
(
const
int
*
l2s_key
,
int
l2s_size
,
const
int
*
__restrict__
occind2detind
,
int
inst_size
,
const
int
*
__restrict__
occ_pred
,
const
int
*
__restrict__
inst_xyz
,
const
int
*
__restrict__
inst_cls
,
const
int
*
__restrict__
inst_id_list
,
int
*
__restrict__
inst_pred
);
void
nearest_assign_forward
(
const
at
::
Tensor
_occ_pred
,
// (200, 200, 16)
const
at
::
Tensor
_l2s_key
,
// (l2s_size, 1)
const
at
::
Tensor
_occind2detind
,
// (10, 1)
const
at
::
Tensor
_inst_cls
,
// (inst_size, 1)
const
at
::
Tensor
_inst_xyz
,
// (inst_size, 3)
const
at
::
Tensor
_inst_id_list
,
// (inst_size, 1)
at
::
Tensor
_inst_pred
// (200, 200, 16)
)
{
int
l2s_size
=
_l2s_key
.
size
(
0
);
int
inst_size
=
_inst_xyz
.
size
(
0
);
const
at
::
hip
::
OptionalHIPGuardMasqueradingAsCUDA
device_guard
(
device_of
(
_occ_pred
));
const
int
*
occ_pred
=
_occ_pred
.
data_ptr
<
int
>
();
const
int
*
inst_xyz
=
_inst_xyz
.
data_ptr
<
int
>
();
const
int
*
inst_cls
=
_inst_cls
.
data_ptr
<
int
>
();
const
int
*
l2s_key
=
_l2s_key
.
data_ptr
<
int
>
();
const
int
*
inst_id_list
=
_inst_id_list
.
data_ptr
<
int
>
();
const
int
*
occind2detind
=
_occind2detind
.
data_ptr
<
int
>
();
// std::map<int, int> l2s;
// for (int l2s_ind = 0; l2s_ind < l2s_size; l2s_ind++){
// l2s.insert(pair<int, int>(l2s_key[l2s_ind], l2s_val[l2s_ind]));
// }
int
*
inst_pred
=
_inst_pred
.
data_ptr
<
int
>
();
nearest_assign
(
l2s_key
,
l2s_size
,
occind2detind
,
inst_size
,
occ_pred
,
inst_xyz
,
inst_cls
,
inst_id_list
,
inst_pred
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"nearest_assign_forward"
,
&
nearest_assign_forward
,
"nearest_assign_forward"
);
}
docker-hub/FlashOCC/Flashocc/projects/setup.py
0 → 100644
View file @
d2b71343
from
setuptools
import
find_packages
,
setup
import
os
import
shutil
import
sys
import
torch
import
warnings
from
os
import
path
as
osp
from
torch.utils.cpp_extension
import
(
BuildExtension
,
CppExtension
,
CUDAExtension
)
def
make_cuda_ext
(
name
,
module
,
sources
,
sources_cuda
=
[],
extra_args
=
[],
extra_include_path
=
[]):
define_macros
=
[]
extra_compile_args
=
{
'cxx'
:
[]
+
extra_args
}
if
torch
.
cuda
.
is_available
()
or
os
.
getenv
(
'FORCE_CUDA'
,
'0'
)
==
'1'
:
define_macros
+=
[(
'WITH_CUDA'
,
None
)]
extension
=
CUDAExtension
extra_compile_args
[
'nvcc'
]
=
extra_args
+
[
'-D__CUDA_NO_HALF_OPERATORS__'
,
'-D__CUDA_NO_HALF_CONVERSIONS__'
,
'-D__CUDA_NO_HALF2_OPERATORS__'
,
]
sources
+=
sources_cuda
else
:
print
(
'Compiling {} without CUDA'
.
format
(
name
))
extension
=
CppExtension
# raise EnvironmentError('CUDA is required to compile MMDetection!')
return
extension
(
name
=
'{}.{}'
.
format
(
module
,
name
),
sources
=
[
os
.
path
.
join
(
*
module
.
split
(
'.'
),
p
)
for
p
in
sources
],
include_dirs
=
extra_include_path
,
define_macros
=
define_macros
,
extra_compile_args
=
extra_compile_args
)
if
__name__
==
'__main__'
:
setup
(
name
=
'flashocc_plugin'
,
description
=
(
"OpenMMLab's next-generation platform"
'for general 3D object detection.'
),
long_description_content_type
=
'text/markdown'
,
author
=
'MMDetection3D Contributors'
,
author_email
=
'zwwdev@gmail.com'
,
keywords
=
'computer vision, 3D object detection'
,
url
=
'https://github.com/open-mmlab/mmdetection3d'
,
classifiers
=
[
'Development Status :: 4 - Beta'
,
'License :: OSI Approved :: Apache Software License'
,
'Operating System :: OS Independent'
,
'Programming Language :: Python :: 3'
,
'Programming Language :: Python :: 3.6'
,
'Programming Language :: Python :: 3.7'
,
],
license
=
'Apache License 2.0'
,
ext_modules
=
[
make_cuda_ext
(
name
=
"bev_pool_ext"
,
module
=
"mmdet3d_plugin.ops.bev_pool"
,
sources
=
[
"src/bev_pooling.cpp"
,
"src/bev_sum_pool.cpp"
,
"src/bev_sum_pool_cuda.cu"
,
"src/bev_max_pool.cpp"
,
"src/bev_max_pool_cuda.cu"
,
],
),
make_cuda_ext
(
name
=
"bev_pool_v2_ext"
,
module
=
"mmdet3d_plugin.ops.bev_pool_v2"
,
sources
=
[
"src/bev_pool.cpp"
,
"src/bev_pool_cuda.cu"
],
),
make_cuda_ext
(
name
=
"nearest_assign_ext"
,
module
=
"mmdet3d_plugin.ops.nearest_assign"
,
sources
=
[
"src/nearest_assign.cpp"
,
"src/nearest_assign_cuda.cu"
],
),
],
cmdclass
=
{
'build_ext'
:
BuildExtension
},
zip_safe
=
False
)
docker-hub/FlashOCC/Flashocc/repro.py
0 → 100644
View file @
d2b71343
import
torch
from
torch
import
tensor
,
device
import
torch.fx
as
fx
from
torch._dynamo.testing
import
rand_strided
from
math
import
inf
import
torch._inductor.inductor_prims
import
torch._dynamo.config
import
torch._inductor.config
import
torch._functorch.config
import
torch.fx.experimental._config
torch
.
_dynamo
.
config
.
capture_scalar_outputs
=
True
isolate_fails_code_str
=
None
# torch version: 2.4.1
# torch cuda version: None
# torch git version: 45d303c9e4f41ec2f5450b6f60031246f67189d6
# CUDA Info:
# nvcc not found
# GPU Hardware Info:
# BW200 : 8
from
torch.nn
import
*
class
Repro
(
torch
.
nn
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
def
forward
(
self
,
primals_1
,
primals_2
,
primals_4
,
primals_5
,
primals_6
,
primals_7
,
primals_8
,
primals_10
,
convert_element_type_1
,
clamp_max
,
convert_element_type_3
,
clamp_max_1
,
clamp_max_2
,
clamp_max_3
,
cat
,
convolution
,
squeeze_1
,
relu
,
convolution_1
,
getitem_3
,
rsqrt_1
,
convert_element_type_5
,
clamp_max_4
,
convert_element_type_7
,
clamp_max_5
,
clamp_max_6
,
clamp_max_7
,
add_19
,
convolution_2
,
squeeze_7
,
relu_2
,
unsqueeze_14
,
unsqueeze_38
,
tangents_1
):
sum_1
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
tangents_1
,
[
0
,
2
,
3
])
convolution_backward
=
torch
.
ops
.
aten
.
convolution_backward
.
default
(
tangents_1
,
relu_2
,
primals_10
,
[
256
],
[
1
,
1
],
[
0
,
0
],
[
1
,
1
],
False
,
[
0
,
0
],
1
,
[
True
,
True
,
False
]);
tangents_1
=
primals_10
=
None
getitem_6
=
convolution_backward
[
0
]
getitem_7
=
convolution_backward
[
1
];
convolution_backward
=
None
le
=
torch
.
ops
.
aten
.
le
.
Scalar
(
relu_2
,
0
);
relu_2
=
None
full_default
=
torch
.
ops
.
aten
.
full
.
default
([],
0.0
,
dtype
=
torch
.
float32
,
layout
=
torch
.
strided
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
pin_memory
=
False
)
where
=
torch
.
ops
.
aten
.
where
.
self
(
le
,
full_default
,
getitem_6
);
le
=
getitem_6
=
None
sum_2
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
where
,
[
0
,
2
,
3
])
sub_13
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
convolution_2
,
unsqueeze_14
);
convolution_2
=
unsqueeze_14
=
None
mul_31
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
where
,
sub_13
)
sum_3
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
mul_31
,
[
0
,
2
,
3
]);
mul_31
=
None
mul_32
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_2
,
6.25e-06
)
unsqueeze_15
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_32
,
0
);
mul_32
=
None
unsqueeze_16
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_15
,
2
);
unsqueeze_15
=
None
unsqueeze_17
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_16
,
3
);
unsqueeze_16
=
None
mul_33
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_3
,
6.25e-06
)
mul_34
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_7
,
squeeze_7
)
mul_35
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_33
,
mul_34
);
mul_33
=
mul_34
=
None
unsqueeze_18
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_35
,
0
);
mul_35
=
None
unsqueeze_19
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_18
,
2
);
unsqueeze_18
=
None
unsqueeze_20
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_19
,
3
);
unsqueeze_19
=
None
mul_36
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_7
,
primals_8
);
primals_8
=
None
unsqueeze_21
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_36
,
0
);
mul_36
=
None
unsqueeze_22
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_21
,
2
);
unsqueeze_21
=
None
unsqueeze_23
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_22
,
3
);
unsqueeze_22
=
None
mul_37
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_13
,
unsqueeze_20
);
sub_13
=
unsqueeze_20
=
None
sub_15
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
where
,
mul_37
);
where
=
mul_37
=
None
sub_16
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
sub_15
,
unsqueeze_17
);
sub_15
=
unsqueeze_17
=
None
mul_38
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_16
,
unsqueeze_23
);
sub_16
=
unsqueeze_23
=
None
mul_39
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_3
,
squeeze_7
);
sum_3
=
squeeze_7
=
None
convolution_backward_1
=
torch
.
ops
.
aten
.
convolution_backward
.
default
(
mul_38
,
add_19
,
primals_7
,
[
0
],
[
1
,
1
],
[
1
,
1
],
[
1
,
1
],
False
,
[
0
,
0
],
1
,
[
True
,
True
,
False
]);
mul_38
=
add_19
=
primals_7
=
None
getitem_9
=
convolution_backward_1
[
0
]
getitem_10
=
convolution_backward_1
[
1
];
convolution_backward_1
=
None
mul_40
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
getitem_9
,
clamp_max_7
);
clamp_max_7
=
None
neg
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_40
)
add_25
=
torch
.
ops
.
aten
.
add
.
Tensor
(
getitem_9
,
neg
);
getitem_9
=
neg
=
None
mul_41
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_40
,
clamp_max_6
)
neg_1
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_41
)
add_26
=
torch
.
ops
.
aten
.
add
.
Tensor
(
mul_40
,
neg_1
);
mul_40
=
neg_1
=
None
mul_42
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
add_25
,
clamp_max_6
);
clamp_max_6
=
None
neg_2
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_42
)
add_27
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_25
,
neg_2
);
add_25
=
neg_2
=
None
full_default_1
=
torch
.
ops
.
aten
.
full
.
default
([
4
,
512
,
100
,
100
],
0
,
dtype
=
torch
.
float32
,
layout
=
torch
.
strided
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
pin_memory
=
False
)
_unsafe_index_put
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_1
,
[
None
,
None
,
clamp_max_4
,
clamp_max_5
],
mul_41
,
True
);
mul_41
=
None
_unsafe_index_put_1
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_1
,
[
None
,
None
,
clamp_max_4
,
convert_element_type_7
],
add_26
,
True
);
clamp_max_4
=
add_26
=
None
add_28
=
torch
.
ops
.
aten
.
add
.
Tensor
(
_unsafe_index_put
,
_unsafe_index_put_1
);
_unsafe_index_put
=
_unsafe_index_put_1
=
None
_unsafe_index_put_2
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_1
,
[
None
,
None
,
convert_element_type_5
,
clamp_max_5
],
mul_42
,
True
);
clamp_max_5
=
mul_42
=
None
add_29
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_28
,
_unsafe_index_put_2
);
add_28
=
_unsafe_index_put_2
=
None
_unsafe_index_put_3
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_1
,
[
None
,
None
,
convert_element_type_5
,
convert_element_type_7
],
add_27
,
True
);
full_default_1
=
convert_element_type_5
=
convert_element_type_7
=
add_27
=
None
add_30
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_29
,
_unsafe_index_put_3
);
add_29
=
_unsafe_index_put_3
=
None
sub_6
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
convolution_1
,
getitem_3
)
mul_12
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_6
,
rsqrt_1
);
sub_6
=
None
unsqueeze_4
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
primals_5
,
-
1
)
unsqueeze_5
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_4
,
-
1
);
unsqueeze_4
=
None
mul_18
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_12
,
unsqueeze_5
);
mul_12
=
unsqueeze_5
=
None
unsqueeze_6
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
primals_6
,
-
1
);
primals_6
=
None
unsqueeze_7
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_6
,
-
1
);
unsqueeze_6
=
None
add_14
=
torch
.
ops
.
aten
.
add
.
Tensor
(
mul_18
,
unsqueeze_7
);
mul_18
=
unsqueeze_7
=
None
relu_1
=
torch
.
ops
.
aten
.
relu
.
default
(
add_14
);
add_14
=
None
le_1
=
torch
.
ops
.
aten
.
le
.
Scalar
(
relu_1
,
0
);
relu_1
=
None
where_1
=
torch
.
ops
.
aten
.
where
.
self
(
le_1
,
full_default
,
add_30
);
le_1
=
add_30
=
None
squeeze_3
=
torch
.
ops
.
aten
.
squeeze
.
dims
(
getitem_3
,
[
0
,
2
,
3
]);
getitem_3
=
None
unsqueeze_24
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
squeeze_3
,
0
);
squeeze_3
=
None
unsqueeze_25
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_24
,
2
);
unsqueeze_24
=
None
unsqueeze_26
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_25
,
3
);
unsqueeze_25
=
None
sum_4
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
where_1
,
[
0
,
2
,
3
])
sub_17
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
convolution_1
,
unsqueeze_26
);
convolution_1
=
unsqueeze_26
=
None
mul_43
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
where_1
,
sub_17
)
sum_5
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
mul_43
,
[
0
,
2
,
3
]);
mul_43
=
None
mul_44
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_4
,
2.5e-05
)
unsqueeze_27
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_44
,
0
);
mul_44
=
None
unsqueeze_28
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_27
,
2
);
unsqueeze_27
=
None
unsqueeze_29
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_28
,
3
);
unsqueeze_28
=
None
mul_45
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_5
,
2.5e-05
)
squeeze_4
=
torch
.
ops
.
aten
.
squeeze
.
dims
(
rsqrt_1
,
[
0
,
2
,
3
]);
rsqrt_1
=
None
mul_46
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_4
,
squeeze_4
)
mul_47
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_45
,
mul_46
);
mul_45
=
mul_46
=
None
unsqueeze_30
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_47
,
0
);
mul_47
=
None
unsqueeze_31
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_30
,
2
);
unsqueeze_30
=
None
unsqueeze_32
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_31
,
3
);
unsqueeze_31
=
None
mul_48
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_4
,
primals_5
);
primals_5
=
None
unsqueeze_33
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_48
,
0
);
mul_48
=
None
unsqueeze_34
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_33
,
2
);
unsqueeze_33
=
None
unsqueeze_35
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_34
,
3
);
unsqueeze_34
=
None
mul_49
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_17
,
unsqueeze_32
);
sub_17
=
unsqueeze_32
=
None
sub_19
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
where_1
,
mul_49
);
where_1
=
mul_49
=
None
sub_20
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
sub_19
,
unsqueeze_29
);
sub_19
=
unsqueeze_29
=
None
mul_50
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_20
,
unsqueeze_35
);
sub_20
=
unsqueeze_35
=
None
mul_51
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_5
,
squeeze_4
);
sum_5
=
squeeze_4
=
None
convolution_backward_2
=
torch
.
ops
.
aten
.
convolution_backward
.
default
(
mul_50
,
relu
,
primals_4
,
[
0
],
[
1
,
1
],
[
1
,
1
],
[
1
,
1
],
False
,
[
0
,
0
],
1
,
[
True
,
True
,
False
]);
mul_50
=
primals_4
=
None
getitem_12
=
convolution_backward_2
[
0
]
getitem_13
=
convolution_backward_2
[
1
];
convolution_backward_2
=
None
le_2
=
torch
.
ops
.
aten
.
le
.
Scalar
(
relu
,
0
);
relu
=
None
where_2
=
torch
.
ops
.
aten
.
where
.
self
(
le_2
,
full_default
,
getitem_12
);
le_2
=
full_default
=
getitem_12
=
None
sum_6
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
where_2
,
[
0
,
2
,
3
])
sub_21
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
convolution
,
unsqueeze_38
);
convolution
=
unsqueeze_38
=
None
mul_52
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
where_2
,
sub_21
)
sum_7
=
torch
.
ops
.
aten
.
sum
.
dim_IntList
(
mul_52
,
[
0
,
2
,
3
]);
mul_52
=
None
mul_53
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_6
,
2.5e-05
)
unsqueeze_39
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_53
,
0
);
mul_53
=
None
unsqueeze_40
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_39
,
2
);
unsqueeze_39
=
None
unsqueeze_41
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_40
,
3
);
unsqueeze_40
=
None
mul_54
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_7
,
2.5e-05
)
mul_55
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_1
,
squeeze_1
)
mul_56
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_54
,
mul_55
);
mul_54
=
mul_55
=
None
unsqueeze_42
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_56
,
0
);
mul_56
=
None
unsqueeze_43
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_42
,
2
);
unsqueeze_42
=
None
unsqueeze_44
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_43
,
3
);
unsqueeze_43
=
None
mul_57
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
squeeze_1
,
primals_2
);
primals_2
=
None
unsqueeze_45
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
mul_57
,
0
);
mul_57
=
None
unsqueeze_46
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_45
,
2
);
unsqueeze_45
=
None
unsqueeze_47
=
torch
.
ops
.
aten
.
unsqueeze
.
default
(
unsqueeze_46
,
3
);
unsqueeze_46
=
None
mul_58
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_21
,
unsqueeze_44
);
sub_21
=
unsqueeze_44
=
None
sub_23
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
where_2
,
mul_58
);
where_2
=
mul_58
=
None
sub_24
=
torch
.
ops
.
aten
.
sub
.
Tensor
(
sub_23
,
unsqueeze_41
);
sub_23
=
unsqueeze_41
=
None
mul_59
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sub_24
,
unsqueeze_47
);
sub_24
=
unsqueeze_47
=
None
mul_60
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
sum_7
,
squeeze_1
);
sum_7
=
squeeze_1
=
None
convolution_backward_3
=
torch
.
ops
.
aten
.
convolution_backward
.
default
(
mul_59
,
cat
,
primals_1
,
[
0
],
[
1
,
1
],
[
1
,
1
],
[
1
,
1
],
False
,
[
0
,
0
],
1
,
[
True
,
True
,
False
]);
mul_59
=
cat
=
primals_1
=
None
getitem_15
=
convolution_backward_3
[
0
]
getitem_16
=
convolution_backward_3
[
1
];
convolution_backward_3
=
None
slice_1
=
torch
.
ops
.
aten
.
slice
.
Tensor
(
getitem_15
,
1
,
0
,
128
)
slice_2
=
torch
.
ops
.
aten
.
slice
.
Tensor
(
getitem_15
,
1
,
128
,
640
);
getitem_15
=
None
mul_61
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
slice_2
,
clamp_max_3
);
clamp_max_3
=
None
neg_3
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_61
)
add_31
=
torch
.
ops
.
aten
.
add
.
Tensor
(
slice_2
,
neg_3
);
slice_2
=
neg_3
=
None
mul_62
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
mul_61
,
clamp_max_2
)
neg_4
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_62
)
add_32
=
torch
.
ops
.
aten
.
add
.
Tensor
(
mul_61
,
neg_4
);
mul_61
=
neg_4
=
None
mul_63
=
torch
.
ops
.
aten
.
mul
.
Tensor
(
add_31
,
clamp_max_2
);
clamp_max_2
=
None
neg_5
=
torch
.
ops
.
aten
.
neg
.
default
(
mul_63
)
add_33
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_31
,
neg_5
);
add_31
=
neg_5
=
None
full_default_7
=
torch
.
ops
.
aten
.
full
.
default
([
4
,
512
,
25
,
25
],
0
,
dtype
=
torch
.
float32
,
layout
=
torch
.
strided
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
pin_memory
=
False
)
_unsafe_index_put_4
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_7
,
[
None
,
None
,
clamp_max
,
clamp_max_1
],
mul_62
,
True
);
mul_62
=
None
_unsafe_index_put_5
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_7
,
[
None
,
None
,
clamp_max
,
convert_element_type_3
],
add_32
,
True
);
clamp_max
=
add_32
=
None
add_34
=
torch
.
ops
.
aten
.
add
.
Tensor
(
_unsafe_index_put_4
,
_unsafe_index_put_5
);
_unsafe_index_put_4
=
_unsafe_index_put_5
=
None
_unsafe_index_put_6
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_7
,
[
None
,
None
,
convert_element_type_1
,
clamp_max_1
],
mul_63
,
True
);
clamp_max_1
=
mul_63
=
None
add_35
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_34
,
_unsafe_index_put_6
);
add_34
=
_unsafe_index_put_6
=
None
_unsafe_index_put_7
=
torch
.
ops
.
aten
.
_unsafe_index_put
.
default
(
full_default_7
,
[
None
,
None
,
convert_element_type_1
,
convert_element_type_3
],
add_33
,
True
);
full_default_7
=
convert_element_type_1
=
convert_element_type_3
=
add_33
=
None
add_36
=
torch
.
ops
.
aten
.
add
.
Tensor
(
add_35
,
_unsafe_index_put_7
);
add_35
=
_unsafe_index_put_7
=
None
return
[
getitem_16
,
mul_60
,
sum_6
,
getitem_13
,
mul_51
,
sum_4
,
getitem_10
,
mul_39
,
sum_2
,
getitem_7
,
sum_1
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
None
,
slice_1
,
add_36
]
def
load_args
(
reader
):
buf0
=
reader
.
storage
(
'934c55e4a7a69a0a29a96cd8ef9f11c9859658e1'
,
11796480
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf0
,
(
512
,
640
,
3
,
3
),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_1
buf1
=
reader
.
storage
(
'f12094f433480ec90280d223057708434df38941'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf1
,
(
512
,),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_2
buf2
=
reader
.
storage
(
'06c46ad2c91ec5c8eebc4fb0be80459bdfe007a8'
,
9437184
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf2
,
(
512
,
512
,
3
,
3
),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_4
buf3
=
reader
.
storage
(
'aba0c4266c842d1845e720dc0c789942770a60b7'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf3
,
(
512
,),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_5
buf4
=
reader
.
storage
(
'bb8471d379e03c8ccb9897ce7d3a2dfbacb44e30'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf4
,
(
512
,),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_6
buf5
=
reader
.
storage
(
'b9484105fb5b2045fb6550a1edb77af72e639416'
,
4718592
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf5
,
(
256
,
512
,
3
,
3
),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_7
buf6
=
reader
.
storage
(
'b778b8cab416c3fa6763b88e431266ae6ea28941'
,
1024
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf6
,
(
256
,),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_8
buf7
=
reader
.
storage
(
'c5f14ec72c73a593b47ef4aecf37f6bb25d2dec4'
,
262144
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf7
,
(
256
,
256
,
1
,
1
),
requires_grad
=
True
,
is_leaf
=
True
)
# primals_10
buf8
=
reader
.
storage
(
'99ef5c7086a924dfc5221c01ff1520de469849c8'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf8
,
(
100
,
1
),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# convert_element_type_1
buf9
=
reader
.
storage
(
'532b7b8fc19c48c7434e569ab96aa0670d5651ef'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf9
,
(
100
,
1
),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# clamp_max
buf10
=
reader
.
storage
(
'99ef5c7086a924dfc5221c01ff1520de469849c8'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf10
,
(
100
,),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# convert_element_type_3
buf11
=
reader
.
storage
(
'532b7b8fc19c48c7434e569ab96aa0670d5651ef'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf11
,
(
100
,),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# clamp_max_1
buf12
=
reader
.
storage
(
'0538ed039b8a4706a4f85bf431e12664d8940742'
,
400
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf12
,
(
100
,),
is_leaf
=
True
)
# clamp_max_2
buf13
=
reader
.
storage
(
'0538ed039b8a4706a4f85bf431e12664d8940742'
,
400
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf13
,
(
100
,
1
),
is_leaf
=
True
)
# clamp_max_3
buf14
=
reader
.
storage
(
'5d41e66671a283b70001fd74345d8e7e3def00bd'
,
102400000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf14
,
(
4
,
640
,
100
,
100
),
is_leaf
=
True
)
# cat
buf15
=
reader
.
storage
(
'a8fe0ed584571bb3218d663656459a36545be5e6'
,
81920000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf15
,
(
4
,
512
,
100
,
100
),
is_leaf
=
True
)
# convolution
buf16
=
reader
.
storage
(
'0af13bcf109b8ca2df7f5ce3387d51e8576fb30a'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf16
,
(
512
,),
is_leaf
=
True
)
# squeeze_1
buf17
=
reader
.
storage
(
'32f14d6fa07f654fbb09ef1563066303a3501eda'
,
81920000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf17
,
(
4
,
512
,
100
,
100
),
is_leaf
=
True
)
# relu
buf18
=
reader
.
storage
(
'aca23d51e723ad9b4bec2e54d6f0af4b5b85cc7d'
,
81920000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf18
,
(
4
,
512
,
100
,
100
),
is_leaf
=
True
)
# convolution_1
buf19
=
reader
.
storage
(
'4940c79e48676c2e1359870dc770e25cd780983d'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf19
,
(
1
,
512
,
1
,
1
),
is_leaf
=
True
)
# getitem_3
buf20
=
reader
.
storage
(
'd17407a9f45954a4d0d36e5b20a40ac554cc3aff'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf20
,
(
1
,
512
,
1
,
1
),
is_leaf
=
True
)
# rsqrt_1
buf21
=
reader
.
storage
(
'95fbd2b85e217ab78f8f9d7900b273a1362b3112'
,
1600
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf21
,
(
200
,
1
),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# convert_element_type_5
buf22
=
reader
.
storage
(
'd9920b87a7261c94c907bc68889b005f277cd597'
,
1600
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf22
,
(
200
,
1
),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# clamp_max_4
buf23
=
reader
.
storage
(
'95fbd2b85e217ab78f8f9d7900b273a1362b3112'
,
1600
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf23
,
(
200
,),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# convert_element_type_7
buf24
=
reader
.
storage
(
'd9920b87a7261c94c907bc68889b005f277cd597'
,
1600
,
device
=
device
(
type
=
'cuda'
,
index
=
2
),
dtype_hint
=
torch
.
int64
)
reader
.
tensor
(
buf24
,
(
200
,),
dtype
=
torch
.
int64
,
is_leaf
=
True
)
# clamp_max_5
buf25
=
reader
.
storage
(
'131d76cb798ee04745f0c7dcb67b63c74a6c00df'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf25
,
(
200
,),
is_leaf
=
True
)
# clamp_max_6
buf26
=
reader
.
storage
(
'131d76cb798ee04745f0c7dcb67b63c74a6c00df'
,
800
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf26
,
(
200
,
1
),
is_leaf
=
True
)
# clamp_max_7
buf27
=
reader
.
storage
(
'32194c54194bddd5f695a8d306828130629246fc'
,
327680000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf27
,
(
4
,
512
,
200
,
200
),
is_leaf
=
True
)
# add_19
buf28
=
reader
.
storage
(
'e3a286ef8d6373c83ef30afe16eaae96ee52b965'
,
163840000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf28
,
(
4
,
256
,
200
,
200
),
is_leaf
=
True
)
# convolution_2
buf29
=
reader
.
storage
(
'9572b289e6d5c9bdd20a79367d4005440da40795'
,
1024
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf29
,
(
256
,),
is_leaf
=
True
)
# squeeze_7
buf30
=
reader
.
storage
(
'42f9ce794a05b12a40f15cbd4abb1201ccef0f72'
,
163840000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf30
,
(
4
,
256
,
200
,
200
),
is_leaf
=
True
)
# relu_2
buf31
=
reader
.
storage
(
'61670207f087dc68f052bc03747d9ab365297b17'
,
1024
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf31
,
(
1
,
256
,
1
,
1
),
is_leaf
=
True
)
# unsqueeze_14
buf32
=
reader
.
storage
(
'ab77896e6dd76345e63586ecda30b1e4a63439cc'
,
2048
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf32
,
(
1
,
512
,
1
,
1
),
is_leaf
=
True
)
# unsqueeze_38
buf33
=
reader
.
storage
(
'f0ec623d2a44ff0f64fc264faf9128c2a6896e57'
,
163840000
,
device
=
device
(
type
=
'cuda'
,
index
=
2
))
reader
.
tensor
(
buf33
,
(
4
,
256
,
200
,
200
),
is_leaf
=
True
)
# tangents_1
load_args
.
_version
=
0
mod
=
Repro
()
if
__name__
==
'__main__'
:
from
torch._dynamo.repro.after_aot
import
run_repro
with
torch
.
no_grad
():
run_repro
(
mod
,
load_args
,
accuracy
=
True
,
command
=
'run'
,
save_dir
=
'/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints'
,
tracing_mode
=
'real'
,
check_str
=
None
)
# To run it separately, do
# mod, args = run_repro(mod, load_args, accuracy=True, command='get_args', save_dir='/root/FlashOCC/torch_compile_debug/run_2025_08_24_19_42_28_279064-pid_182645/minifier/checkpoints', tracing_mode='real', check_str=None)
# mod(*args)
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/requirements.txt
0 → 100644
View file @
d2b71343
-r requirements/build.txt
-r requirements/optional.txt
-r requirements/runtime.txt
-r requirements/tests.txt
docker-hub/FlashOCC/Flashocc/requirements/build.txt
0 → 100644
View file @
d2b71343
docker-hub/FlashOCC/Flashocc/requirements/docs.txt
0 → 100644
View file @
d2b71343
docutils==0.16.0
m2r
mistune==0.8.4
myst-parser
-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
sphinx==4.0.2
sphinx-copybutton
sphinx_markdown_tables
docker-hub/FlashOCC/Flashocc/requirements/mminstall.txt
0 → 100644
View file @
d2b71343
mmcv-full>=1.4.8,<=1.6.0
mmdet>=2.24.0,<=3.0.0
mmsegmentation>=0.20.0,<=1.0.0
docker-hub/FlashOCC/Flashocc/requirements/optional.txt
0 → 100644
View file @
d2b71343
open3d
spconv
waymo-open-dataset-tf-2-1-0==1.2.0
docker-hub/FlashOCC/Flashocc/requirements/readthedocs.txt
0 → 100644
View file @
d2b71343
mmcv>=1.4.8
mmdet>=2.24.0
mmsegmentation>=0.20.1
torch
torchvision
docker-hub/FlashOCC/Flashocc/requirements/runtime.txt
0 → 100644
View file @
d2b71343
lyft_dataset_sdk
networkx>=2.2,<2.3
numba==0.53.0
numpy
nuscenes-devkit
plyfile
scikit-image
# by default we also use tensorboard to log results
tensorboard
trimesh>=2.35.39,<2.35.40
docker-hub/FlashOCC/Flashocc/requirements/tests.txt
0 → 100644
View file @
d2b71343
asynctest
codecov
flake8
interrogate
isort
# Note: used for kwarray.group_items, this may be ported to mmcv in the future.
kwarray
pytest
pytest-cov
pytest-runner
ubelt
xdoctest >= 0.10.0
yapf
docker-hub/FlashOCC/Flashocc/rocblas_Flashocc.log
0 → 100644
View file @
d2b71343
118 ./rocblas-bench -f gemm_ex --transposeA T --transposeB N -m 1024 -n 2048 -k 25344 --alpha 1 --a_type f32_r --lda 25344 --b_type f32_r --ldb 25344 --beta 0 --c_type f32_r --ldc 1024 --d_type f32_r --ldd 1024 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
2 ./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 256 -n 960000 -k 512 --alpha 1 --lda 256 --ldb 512 --beta 0 --ldc 256
2 ./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB N -m 512 -n 960000 -k 288 --alpha 1 --lda 512 --ldb 288 --beta 0 --ldc 512
2 ./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 256 -n 512 -k 960000 --alpha 1 --lda 256 --ldb 512 --beta 0 --ldc 256
2 ./rocblas-bench -f gemm -r f32_r --transposeA N --transposeB T -m 512 -n 288 -k 960000 --alpha 1 --lda 512 --ldb 288 --beta 0 --ldc 512
2 ./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 288 -n 960000 -k 512 --alpha 1 --lda 512 --ldb 512 --beta 1 --ldc 288
2 ./rocblas-bench -f gemm -r f32_r --transposeA T --transposeB N -m 512 -n 960000 -k 256 --alpha 1 --lda 256 --ldb 256 --beta 0 --ldc 512
948 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 11264 -n 256 -k 64 --alpha 1 --a_type f32_r --lda 11264 --stride_a 720896 --b_type f32_r --ldb 64 --stride_b 0 --beta 0 --c_type f32_r --ldc 11264 --stride_c 2883584 --d_type f32_r --ldd 11264 --stride_d 2883584 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
237 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 11264 -n 64 -k 64 --alpha 1 --a_type f32_r --lda 11264 --stride_a 720896 --b_type f32_r --ldb 64 --stride_b 0 --beta 0 --c_type f32_r --ldc 11264 --stride_c 720896 --d_type f32_r --ldd 11264 --stride_d 720896 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
948 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 2816 -n 512 -k 128 --alpha 1 --a_type f32_r --lda 2816 --stride_a 360448 --b_type f32_r --ldb 128 --stride_b 0 --beta 0 --c_type f32_r --ldc 2816 --stride_c 1441792 --d_type f32_r --ldd 2816 --stride_d 1441792 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
237 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 2816 -n 512 -k 256 --alpha 1 --a_type f32_r --lda 2816 --stride_a 720896 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 2816 --stride_c 1441792 --d_type f32_r --ldd 2816 --stride_d 1441792 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
119 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 40000 -n 256 -k 256 --alpha 1 --a_type f32_r --lda 40000 --stride_a 10240000 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 40000 --stride_c 10240000 --d_type f32_r --ldd 40000 --stride_d 10240000 --batch_count 24 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
119 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB N -m 704 -n 152 -k 256 --alpha 1 --a_type f32_r --lda 704 --stride_a 180224 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 704 --stride_c 107008 --d_type f32_r --ldd 704 --stride_d 107008 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
118 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB T -m 11264 -n 256 -k 128 --alpha 1 --a_type f32_r --lda 11264 --stride_a 1441792 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 11264 --stride_c 2883584 --d_type f32_r --ldd 11264 --stride_d 2883584 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
236 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB T -m 11264 -n 256 -k 64 --alpha 1 --a_type f32_r --lda 11264 --stride_a 720896 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 11264 --stride_c 2883584 --d_type f32_r --ldd 11264 --stride_d 2883584 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
118 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB T -m 2816 -n 256 -k 512 --alpha 1 --a_type f32_r --lda 2816 --stride_a 1441792 --b_type f32_r --ldb 256 --stride_b 0 --beta 0 --c_type f32_r --ldc 2816 --stride_c 720896 --d_type f32_r --ldd 2816 --stride_d 720896 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
118 ./rocblas-bench -f gemm_strided_batched_ex --transposeA N --transposeB T -m 704 -n 512 -k 1024 --alpha 1 --a_type f32_r --lda 704 --stride_a 720896 --b_type f32_r --ldb 512 --stride_b 0 --beta 0 --c_type f32_r --ldc 704 --stride_c 360448 --d_type f32_r --ldd 704 --stride_d 360448 --batch_count 144 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
118 ./rocblas-bench -f gemm_strided_batched_ex --transposeA T --transposeB N -m 512 -n 512 -k 9600 --alpha 1 --a_type f32_r --lda 9600 --stride_a 4915200 --b_type f32_r --ldb 9600 --stride_b 4915200 --beta 0 --c_type f32_r --ldc 512 --stride_c 262144 --d_type f32_r --ldd 512 --stride_d 262144 --batch_count 49 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
118 ./rocblas-bench -f gemm_strided_batched_ex --transposeA T --transposeB N -m 512 -n 640 -k 9600 --alpha 1 --a_type f32_r --lda 9600 --stride_a 4915200 --b_type f32_r --ldb 9600 --stride_b 6144000 --beta 0 --c_type f32_r --ldc 512 --stride_c 327680 --d_type f32_r --ldd 512 --stride_d 327680 --batch_count 49 --compute_type f32_r --algo 0 --solution_index 0 --flags 0
2 ./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 3 -n 1 -k 3 --alpha 1 --lda 3 --stride_a 9 --ldb 3 --stride_b 3 --beta 0 --ldc 3 --stride_c 3 --batch_count 8921088
2 ./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB N -m 3 -n 3 -k 3 --alpha 1 --lda 3 --stride_a 9 --ldb 4 --stride_b 16 --beta 0 --ldc 3 --stride_c 9 --batch_count 144
4 ./rocblas-bench -f gemm_strided_batched -r f32_r --transposeA T --transposeB T -m 3 -n 1 -k 3 --alpha 1 --lda 3 --stride_a 9 --ldb 1 --stride_b 3 --beta 0 --ldc 3 --stride_c 3 --batch_count 8921088
238 ./rocblas-bench -f gemm_strided_batched -r f64_r --transposeA N --transposeB N -m 4 -n 4 -k 4 --alpha 1 --lda 4 --stride_a 16 --ldb 4 --stride_b 16 --beta 0 --ldc 4 --stride_c 16 --batch_count 144
docker-hub/FlashOCC/Flashocc/start_flashocc.sh
0 → 100755
View file @
d2b71343
#!/bin/bash
export
NCCL_TOPO_FILE
=
null
export
NCCL_ALGO
=
Ring
export
NCCL_RINGS
=
"N0 0 7 6 5 4 3 2 1 N0|N1 1 2 3 4 5 6 7 0 N1|N2 2 1 0 7 6 5 4 3 N2|N3 3 4 5 6 7 0 1 2 N3|N4 4 3 2 1 0 7 6 5 N4|N5 5 6 7 0 1 2 3 4 N5|N6 6 5 4 3 2 1 0 7 N6|N7 7 0 1 2 3 4 5 6 N7"
export
PYTORCH_MIOPEN_SUGGEST_NHWC
=
1
export
MIOPEN_PRECISION_FP32_FP32_FP32_TF32_FP32
=
1
export
MIOPEN_FIND_MODE
=
1
export
ROCBLAS_MATH_MODE
=
1
export
HSA_FORCE_FINE_GRAIN_PCIE
=
1
export
TORCHINDUCTOR_LAYOUT_OPTIMIZATION
=
1
export
TORCHINDUCTOR_COORDINATE_DESCENT_TUNING
=
1
#export MIOPEN_ENABLE_LOGGING=1 # 打开MIOPEN LOGGING日志 default =0
#export MIOPEN_ENABLE_LOGGING_CMD=1 # 输出日志CMD信息 default =0
#export MIOPEN_LOG_LEVEL=6 # 设置日志打印level default=0
#export ROCBLAS_LAYER=3 # 打开 rocblas输出日志 default=0
TIME
=
$(
date
"+%Y-%m-%d_%H_%M"
)
MASTER_ADDR
=
${
1
:-
localhost
}
NNODES
=
${
2
:-
1
}
NODE_RANK
=
${
3
:-
0
}
CONFIG
=
${
4
:-
projects
/configs/flashocc/flashocc-r50.py
}
bash tools/dist_train_numa.sh
$MASTER_ADDR
$NNODES
$NODE_RANK
$CONFIG
\
2>&1 |
tee
cvm_bw1000_flashocc_
${
NNODES
}
nodes_
$TIME
.log
docker-hub/FlashOCC/Flashocc/tools/analysis_tools/analyze_logs.py
0 → 100644
View file @
d2b71343
# Copyright (c) OpenMMLab. All rights reserved.
import
argparse
import
json
from
collections
import
defaultdict
import
numpy
as
np
import
seaborn
as
sns
from
matplotlib
import
pyplot
as
plt
def
cal_train_time
(
log_dicts
,
args
):
for
i
,
log_dict
in
enumerate
(
log_dicts
):
print
(
f
'
{
"-"
*
5
}
Analyze train time of
{
args
.
json_logs
[
i
]
}{
"-"
*
5
}
'
)
all_times
=
[]
for
epoch
in
log_dict
.
keys
():
if
args
.
include_outliers
:
all_times
.
append
(
log_dict
[
epoch
][
'time'
])
else
:
all_times
.
append
(
log_dict
[
epoch
][
'time'
][
1
:])
all_times
=
np
.
array
(
all_times
)
epoch_ave_time
=
all_times
.
mean
(
-
1
)
slowest_epoch
=
epoch_ave_time
.
argmax
()
fastest_epoch
=
epoch_ave_time
.
argmin
()
std_over_epoch
=
epoch_ave_time
.
std
()
print
(
f
'slowest epoch
{
slowest_epoch
+
1
}
, '
f
'average time is
{
epoch_ave_time
[
slowest_epoch
]:.
4
f
}
'
)
print
(
f
'fastest epoch
{
fastest_epoch
+
1
}
, '
f
'average time is
{
epoch_ave_time
[
fastest_epoch
]:.
4
f
}
'
)
print
(
f
'time std over epochs is
{
std_over_epoch
:.
4
f
}
'
)
print
(
f
'average iter time:
{
np
.
mean
(
all_times
):.
4
f
}
s/iter'
)
print
()
def
plot_curve
(
log_dicts
,
args
):
if
args
.
backend
is
not
None
:
plt
.
switch_backend
(
args
.
backend
)
sns
.
set_style
(
args
.
style
)
# if legend is None, use {filename}_{key} as legend
legend
=
args
.
legend
if
legend
is
None
:
legend
=
[]
for
json_log
in
args
.
json_logs
:
for
metric
in
args
.
keys
:
legend
.
append
(
f
'
{
json_log
}
_
{
metric
}
'
)
assert
len
(
legend
)
==
(
len
(
args
.
json_logs
)
*
len
(
args
.
keys
))
metrics
=
args
.
keys
num_metrics
=
len
(
metrics
)
for
i
,
log_dict
in
enumerate
(
log_dicts
):
epochs
=
list
(
log_dict
.
keys
())
for
j
,
metric
in
enumerate
(
metrics
):
print
(
f
'plot curve of
{
args
.
json_logs
[
i
]
}
, metric is
{
metric
}
'
)
if
metric
not
in
log_dict
[
epochs
[
args
.
interval
-
1
]]:
raise
KeyError
(
f
'
{
args
.
json_logs
[
i
]
}
does not contain metric
{
metric
}
'
)
if
args
.
mode
==
'eval'
:
if
min
(
epochs
)
==
args
.
interval
:
x0
=
args
.
interval
else
:
# if current training is resumed from previous checkpoint
# we lost information in early epochs
# `xs` should start according to `min(epochs)`
if
min
(
epochs
)
%
args
.
interval
==
0
:
x0
=
min
(
epochs
)
else
:
# find the first epoch that do eval
x0
=
min
(
epochs
)
+
args
.
interval
-
\
min
(
epochs
)
%
args
.
interval
xs
=
np
.
arange
(
x0
,
max
(
epochs
)
+
1
,
args
.
interval
)
ys
=
[]
for
epoch
in
epochs
[
args
.
interval
-
1
::
args
.
interval
]:
ys
+=
log_dict
[
epoch
][
metric
]
# if training is aborted before eval of the last epoch
# `xs` and `ys` will have different length and cause an error
# check if `ys[-1]` is empty here
if
not
log_dict
[
epoch
][
metric
]:
xs
=
xs
[:
-
1
]
ax
=
plt
.
gca
()
ax
.
set_xticks
(
xs
)
plt
.
xlabel
(
'epoch'
)
plt
.
plot
(
xs
,
ys
,
label
=
legend
[
i
*
num_metrics
+
j
],
marker
=
'o'
)
else
:
xs
=
[]
ys
=
[]
num_iters_per_epoch
=
\
log_dict
[
epochs
[
args
.
interval
-
1
]][
'iter'
][
-
1
]
for
epoch
in
epochs
[
args
.
interval
-
1
::
args
.
interval
]:
iters
=
log_dict
[
epoch
][
'iter'
]
if
log_dict
[
epoch
][
'mode'
][
-
1
]
==
'val'
:
iters
=
iters
[:
-
1
]
xs
.
append
(
np
.
array
(
iters
)
+
(
epoch
-
1
)
*
num_iters_per_epoch
)
ys
.
append
(
np
.
array
(
log_dict
[
epoch
][
metric
][:
len
(
iters
)]))
xs
=
np
.
concatenate
(
xs
)
ys
=
np
.
concatenate
(
ys
)
plt
.
xlabel
(
'iter'
)
plt
.
plot
(
xs
,
ys
,
label
=
legend
[
i
*
num_metrics
+
j
],
linewidth
=
0.5
)
plt
.
legend
()
if
args
.
title
is
not
None
:
plt
.
title
(
args
.
title
)
if
args
.
out
is
None
:
plt
.
show
()
else
:
print
(
f
'save curve to:
{
args
.
out
}
'
)
plt
.
savefig
(
args
.
out
)
plt
.
cla
()
def
add_plot_parser
(
subparsers
):
parser_plt
=
subparsers
.
add_parser
(
'plot_curve'
,
help
=
'parser for plotting curves'
)
parser_plt
.
add_argument
(
'json_logs'
,
type
=
str
,
nargs
=
'+'
,
help
=
'path of train log in json format'
)
parser_plt
.
add_argument
(
'--keys'
,
type
=
str
,
nargs
=
'+'
,
default
=
[
'mAP_0.25'
],
help
=
'the metric that you want to plot'
)
parser_plt
.
add_argument
(
'--title'
,
type
=
str
,
help
=
'title of figure'
)
parser_plt
.
add_argument
(
'--legend'
,
type
=
str
,
nargs
=
'+'
,
default
=
None
,
help
=
'legend of each plot'
)
parser_plt
.
add_argument
(
'--backend'
,
type
=
str
,
default
=
None
,
help
=
'backend of plt'
)
parser_plt
.
add_argument
(
'--style'
,
type
=
str
,
default
=
'dark'
,
help
=
'style of plt'
)
parser_plt
.
add_argument
(
'--out'
,
type
=
str
,
default
=
None
)
parser_plt
.
add_argument
(
'--mode'
,
type
=
str
,
default
=
'train'
)
parser_plt
.
add_argument
(
'--interval'
,
type
=
int
,
default
=
1
)
def
add_time_parser
(
subparsers
):
parser_time
=
subparsers
.
add_parser
(
'cal_train_time'
,
help
=
'parser for computing the average time per training iteration'
)
parser_time
.
add_argument
(
'json_logs'
,
type
=
str
,
nargs
=
'+'
,
help
=
'path of train log in json format'
)
parser_time
.
add_argument
(
'--include-outliers'
,
action
=
'store_true'
,
help
=
'include the first value of every epoch when computing '
'the average time'
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Analyze Json Log'
)
# currently only support plot curve and calculate average train time
subparsers
=
parser
.
add_subparsers
(
dest
=
'task'
,
help
=
'task parser'
)
add_plot_parser
(
subparsers
)
add_time_parser
(
subparsers
)
args
=
parser
.
parse_args
()
return
args
def
load_json_logs
(
json_logs
):
# load and convert json_logs to log_dict, key is epoch, value is a sub dict
# keys of sub dict is different metrics, e.g. memory, bbox_mAP
# value of sub dict is a list of corresponding values of all iterations
log_dicts
=
[
dict
()
for
_
in
json_logs
]
for
json_log
,
log_dict
in
zip
(
json_logs
,
log_dicts
):
with
open
(
json_log
,
'r'
)
as
log_file
:
for
line
in
log_file
:
log
=
json
.
loads
(
line
.
strip
())
# skip lines without `epoch` field
if
'epoch'
not
in
log
:
continue
epoch
=
log
.
pop
(
'epoch'
)
if
epoch
not
in
log_dict
:
log_dict
[
epoch
]
=
defaultdict
(
list
)
for
k
,
v
in
log
.
items
():
log_dict
[
epoch
][
k
].
append
(
v
)
return
log_dicts
def
main
():
args
=
parse_args
()
json_logs
=
args
.
json_logs
for
json_log
in
json_logs
:
assert
json_log
.
endswith
(
'.json'
)
log_dicts
=
load_json_logs
(
json_logs
)
eval
(
args
.
task
)(
log_dicts
,
args
)
if
__name__
==
'__main__'
:
main
()
Prev
1
…
6
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment