Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
apex
Commits
96850dfa
Unverified
Commit
96850dfa
authored
Aug 15, 2022
by
Jithun Nair
Committed by
GitHub
Aug 15, 2022
Browse files
Merge pull request #80 from ROCmSoftwarePlatform/IFU-master-2022-07-29
IFU-master-2022-07-29
parents
87fc4125
cc5f83b5
Changes
235
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
0 additions
and
2585 deletions
+0
-2585
apex/pyprof/parse/__main__.py
apex/pyprof/parse/__main__.py
+0
-10
apex/pyprof/parse/db.py
apex/pyprof/parse/db.py
+0
-61
apex/pyprof/parse/kernel.py
apex/pyprof/parse/kernel.py
+0
-210
apex/pyprof/parse/nvvp.py
apex/pyprof/parse/nvvp.py
+0
-282
apex/pyprof/parse/parse.py
apex/pyprof/parse/parse.py
+0
-122
apex/pyprof/prof/__init__.py
apex/pyprof/prof/__init__.py
+0
-1
apex/pyprof/prof/__main__.py
apex/pyprof/prof/__main__.py
+0
-10
apex/pyprof/prof/activation.py
apex/pyprof/prof/activation.py
+0
-65
apex/pyprof/prof/base.py
apex/pyprof/prof/base.py
+0
-47
apex/pyprof/prof/blas.py
apex/pyprof/prof/blas.py
+0
-340
apex/pyprof/prof/conv.py
apex/pyprof/prof/conv.py
+0
-236
apex/pyprof/prof/convert.py
apex/pyprof/prof/convert.py
+0
-62
apex/pyprof/prof/data.py
apex/pyprof/prof/data.py
+0
-54
apex/pyprof/prof/dropout.py
apex/pyprof/prof/dropout.py
+0
-50
apex/pyprof/prof/embedding.py
apex/pyprof/prof/embedding.py
+0
-71
apex/pyprof/prof/index_slice_join_mutate.py
apex/pyprof/prof/index_slice_join_mutate.py
+0
-419
apex/pyprof/prof/linear.py
apex/pyprof/prof/linear.py
+0
-188
apex/pyprof/prof/loss.py
apex/pyprof/prof/loss.py
+0
-84
apex/pyprof/prof/misc.py
apex/pyprof/prof/misc.py
+0
-219
apex/pyprof/prof/normalization.py
apex/pyprof/prof/normalization.py
+0
-54
No files found.
apex/pyprof/parse/__main__.py
deleted
100644 → 0
View file @
87fc4125
import
warnings
try
:
from
.parse
import
main
except
ImportError
as
e
:
warnings
.
warn
(
"Did you make sure to install PyProf dependencies by using the --pyprof flag during Apex installation?)"
)
raise
e
if
__name__
==
'__main__'
:
main
()
apex/pyprof/parse/db.py
deleted
100644 → 0
View file @
87fc4125
import
sys
,
sqlite3
class
DB
(
object
):
"""
This class provides functions for DB operations
with exception handling.
"""
def
__init__
(
self
,
dbFile
):
try
:
conn
=
sqlite3
.
connect
(
dbFile
)
conn
.
row_factory
=
sqlite3
.
Row
c
=
conn
.
cursor
()
except
:
print
(
"Error opening {}"
.
format
(
dbFile
))
sys
.
exit
(
1
)
self
.
conn
=
conn
self
.
c
=
c
def
select
(
self
,
cmd
):
try
:
self
.
c
.
execute
(
cmd
)
#rows = self.c.fetchall()
rows
=
[
dict
(
row
)
for
row
in
self
.
c
.
fetchall
()]
except
sqlite3
.
Error
as
e
:
print
(
e
)
sys
.
exit
(
1
)
except
:
print
(
"Uncaught error in SQLite access while executing {}"
.
format
(
cmd
))
sys
.
exit
(
1
)
#print(rows)
return
rows
def
insert
(
self
,
cmd
,
data
):
try
:
self
.
c
.
execute
(
cmd
,
data
)
except
sqlite3
.
Error
as
e
:
print
(
e
)
sys
.
exit
(
1
)
except
:
print
(
"Uncaught error in SQLite access while executing {}"
.
format
(
cmd
))
sys
.
exit
(
1
)
def
execute
(
self
,
cmd
):
try
:
self
.
c
.
execute
(
cmd
)
except
sqlite3
.
Error
as
e
:
print
(
e
)
sys
.
exit
(
1
)
except
:
print
(
"Uncaught error in SQLite access while executing {}"
.
format
(
cmd
))
sys
.
exit
(
1
)
def
commit
(
self
):
self
.
conn
.
commit
()
def
close
(
self
):
self
.
c
.
close
()
self
.
conn
.
close
()
apex/pyprof/parse/kernel.py
deleted
100644 → 0
View file @
87fc4125
import
cxxfilt
,
struct
,
binascii
#Helper functions
def
demangle
(
name
):
"""
Demangle a C++ string
"""
return
cxxfilt
.
demangle
(
name
)
def
encode_object_id
(
pid
,
tid
):
"""
Given process id (pid) and thread id (tid), return the object id.
object id = pid (little endian 4 bytes) + tid (little endian 8 bytes)
"""
objId
=
struct
.
pack
(
'<i'
,
pid
)
+
struct
.
pack
(
'<q'
,
tid
)
objId
=
binascii
.
hexlify
(
objId
).
decode
(
'ascii'
).
upper
()
return
objId
def
getShortName
(
name
):
"""
Returns a shorter kernel name
"""
sname
=
name
.
split
(
"<"
)[
0
]
\
.
replace
(
"void "
,
""
)
\
.
replace
(
"at::"
,
""
)
\
.
replace
(
"cuda::"
,
""
)
\
.
replace
(
"native::"
,
""
)
\
.
replace
(
"(anonymous namespace)::"
,
""
)
sname
=
sname
.
split
(
"("
)[
0
]
return
sname
class
Kernel
(
object
):
"""
This class stores information about a kernel.
"""
kernels
=
[]
profStart
=
0
def
__init__
(
self
):
self
.
kNameId
=
None
self
.
kShortName
=
None
self
.
kLongName
=
None
self
.
kStartTime
=
None
#GPU start time
self
.
kEndTime
=
None
#GPU end time
self
.
kDuration
=
None
self
.
device
=
None
self
.
stream
=
None
self
.
grid
=
()
self
.
block
=
()
self
.
corrId
=
None
self
.
rStartTime
=
None
#CPU start time
self
.
rEndTime
=
None
#CPU end time
self
.
rDuration
=
None
self
.
tid
=
None
self
.
pid
=
None
self
.
objId
=
None
self
.
timeOffset
=
None
self
.
layerMarkers
=
[]
self
.
traceMarkers
=
[]
self
.
reprMarkers
=
[]
self
.
pyprofMarkers
=
[]
self
.
seqMarkers
=
[]
self
.
otherMarkers
=
[]
self
.
altMarkers
=
[]
self
.
seqId
=
[]
self
.
altSeqId
=
[]
self
.
layer
=
[]
self
.
subSeqId
=
None
self
.
dir
=
None
self
.
mod
=
[]
self
.
op
=
[]
def
setKernelInfo
(
self
,
info
):
self
.
kNameId
=
info
[
'name'
]
self
.
corrId
=
int
(
info
[
'correlationId'
])
start
=
int
(
info
[
'start'
])
end
=
int
(
info
[
'end'
])
assert
end
>
start
,
"This assertion can fail for very large profiles. It usually fails when start = end = 0."
self
.
kStartTime
=
start
self
.
kEndTime
=
end
self
.
kDuration
=
end
-
start
assert
(
start
>
Kernel
.
profStart
)
self
.
device
=
int
(
info
[
'deviceId'
])
self
.
stream
=
int
(
info
[
'streamId'
])
self
.
grid
=
(
info
[
'gridX'
],
info
[
'gridY'
],
info
[
'gridZ'
])
self
.
block
=
(
info
[
'blockX'
],
info
[
'blockY'
],
info
[
'blockZ'
])
self
.
timeOffset
=
Kernel
.
profStart
def
setKernelName
(
self
,
name
):
cadena
=
demangle
(
name
)
self
.
kLongName
=
cadena
self
.
kShortName
=
getShortName
(
cadena
)
def
setRunTimeInfo
(
self
,
info
):
start
,
end
,
pid
,
tid
=
info
self
.
rStartTime
=
start
self
.
rEndTime
=
end
self
.
rDuration
=
end
-
start
self
.
pid
=
pid
self
.
tid
=
tid
self
.
objId
=
encode_object_id
(
pid
,
tid
)
def
setMarkerInfo
(
self
,
info
):
self
.
layerMarkers
,
self
.
traceMarkers
,
self
.
reprMarkers
,
self
.
pyprofMarkers
,
self
.
seqMarkers
,
self
.
otherMarkers
,
self
.
altMarkers
,
self
.
seqId
,
self
.
altSeqId
,
self
.
layer
=
info
self
.
subSeqId
=
0
def
setDirection
(
self
):
"""
Set direction (fprop, bprop) based on PyTorch sequence markers.
It is a heuristic and not a foolproof method.
"""
if
any
(
"Backward, seq = "
in
x
for
x
in
self
.
seqMarkers
)
or
\
any
(
"backward, seq = "
in
x
for
x
in
self
.
seqMarkers
)
or
\
any
(
"Backward0, seq = "
in
x
for
x
in
self
.
seqMarkers
):
self
.
dir
=
"bprop"
else
:
self
.
dir
=
"fprop"
def
setOp
(
self
):
"""
Detect and set the class/module (mod) and operation (op)
of the kernel e.g. torch.nn.functional / linear, torch / sigmoid.
The lookup sequence we use is
NVTX markers inserted by pyprof
NVTX markers inserted by PyTorch in bprop
NVTX markers inserted by PyTorch in fprop
It is a heuristic and not a foolproof method.
"""
def
sanitize
(
name
):
name
=
name
.
replace
(
"torch"
,
""
)
\
.
replace
(
"autograd"
,
""
)
\
.
replace
(
"_backward"
,
""
)
\
.
replace
(
"::"
,
""
)
\
.
replace
(
"jit"
,
""
)
\
.
replace
(
"(anonymous namespace)"
,
""
)
head
,
sep
,
tail
=
name
.
partition
(
"Backward"
)
return
head
#Check pyprof markers
for
m
in
self
.
pyprofMarkers
:
assert
(
"mod"
in
m
)
and
(
"op"
in
m
)
and
(
"args"
in
m
)
t
=
eval
(
m
)
self
.
op
.
append
(
t
[
'op'
])
self
.
mod
.
append
(
t
[
'mod'
])
if
len
(
self
.
op
):
return
#Check bprop kernel markers
for
m
in
self
.
seqMarkers
:
if
(
"backward, seq = "
in
m
)
or
(
"Backward, seq = "
in
m
):
op
=
m
.
split
(
","
)[
0
]
op
=
sanitize
(
op
)
self
.
op
.
append
(
op
)
self
.
mod
.
append
(
'na'
)
if
len
(
self
.
op
):
return
#Check markers with "seq = "
for
m
in
self
.
seqMarkers
:
if
", seq = "
in
m
:
op
=
m
.
split
(
","
)[
0
]
self
.
op
.
append
(
op
)
self
.
mod
.
append
(
'na'
)
if
len
(
self
.
op
):
return
#If nothing else
if
len
(
self
.
otherMarkers
):
self
.
op
.
append
(
self
.
otherMarkers
[
0
])
self
.
mod
.
append
(
'na'
)
def
print
(
self
):
"""
Print kernel information. This is used by prof.py.
"""
a
=
lambda
:
None
a
.
kShortName
=
self
.
kShortName
a
.
kDuration
=
self
.
kDuration
#a.layerMarkers = self.layerMarkers
a
.
layer
=
self
.
layer
a
.
trace
=
self
.
traceMarkers
a
.
reprMarkers
=
self
.
reprMarkers
a
.
marker
=
self
.
pyprofMarkers
a
.
seqMarker
=
self
.
seqMarkers
a
.
seqId
=
self
.
seqId
a
.
subSeqId
=
self
.
subSeqId
a
.
altSeqId
=
self
.
altSeqId
a
.
dir
=
self
.
dir
a
.
mod
=
self
.
mod
a
.
op
=
self
.
op
a
.
tid
=
self
.
tid
a
.
device
=
self
.
device
a
.
stream
=
self
.
stream
a
.
grid
=
self
.
grid
a
.
block
=
self
.
block
a
.
kLongName
=
self
.
kLongName
print
(
a
.
__dict__
)
apex/pyprof/parse/nvvp.py
deleted
100644 → 0
View file @
87fc4125
import
sys
class
NVVP
(
object
):
"""
This class gets kernel information from the SQL (nvvp) database.
"""
driverT
=
"CUPTI_ACTIVITY_KIND_DRIVER"
runtimeT
=
"CUPTI_ACTIVITY_KIND_RUNTIME"
kernelT
=
"CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL"
markerT
=
"CUPTI_ACTIVITY_KIND_MARKER"
stringT
=
"StringTable"
def
__init__
(
self
,
db
):
self
.
db
=
db
self
.
markerId
=
0
def
getProfileStart
(
self
):
"""
Get the profile start time
"""
profStart
=
sys
.
maxsize
for
table
in
[
self
.
driverT
,
self
.
runtimeT
,
self
.
kernelT
,
self
.
markerT
]:
colname
=
"timestamp"
if
table
is
self
.
markerT
else
"start"
cmd
=
"select {} from {} ORDER BY {} ASC LIMIT 1"
.
format
(
colname
,
table
,
colname
)
result
=
self
.
db
.
select
(
cmd
)
assert
(
len
(
result
)
<=
1
)
if
(
len
(
result
)
==
1
):
assert
(
colname
in
result
[
0
])
t
=
result
[
0
][
colname
]
if
(
t
<
profStart
):
profStart
=
t
assert
(
profStart
<
sys
.
maxsize
)
return
profStart
def
getString
(
self
,
id_
):
"""
Get the string associated with an id.
"""
cmd
=
"select value from {} where _id_ = {}"
.
format
(
self
.
stringT
,
id_
)
result
=
self
.
db
.
select
(
cmd
)
assert
(
len
(
result
)
==
1
)
return
result
[
0
][
'value'
]
def
createMarkerTable
(
self
):
"""
Create a temporary table and index it to speed up repeated SQL quesries.
The table is an INNER JOIN of CUPTI_ACTIVITY_KIND_MARKER with itself.
"""
cmd
=
'CREATE TEMPORARY TABLE marker AS SELECT
\
a._id_ as id,
\
a.timestamp AS startTime,
\
b.timestamp AS endTime,
\
HEX(a.objectId) AS objectId,
\
a.name AS name
\
FROM {} AS a INNER JOIN {} AS b ON
\
a.id = b.id and
\
a.flags = 2 and b.flags = 4'
.
format
(
self
.
markerT
,
self
.
markerT
)
self
.
db
.
execute
(
cmd
)
self
.
db
.
execute
(
'CREATE INDEX start_index ON marker (startTime)'
)
self
.
db
.
execute
(
'CREATE INDEX end_index ON marker (endTime)'
)
self
.
db
.
execute
(
'CREATE INDEX id_index ON marker (id)'
)
def
getCPUInfo
(
self
,
corrId
):
"""
Given the correlation id, get CPU start, end, thread id, process id.
The information can be in the runtime table or the driver table.
"""
#First look in the runtime table
cmd
=
"select start,end,processId,threadId from {} where correlationId={}"
.
format
(
self
.
runtimeT
,
corrId
);
result
=
self
.
db
.
select
(
cmd
)
assert
(
len
(
result
)
<=
1
)
if
(
len
(
result
)
==
0
):
#Look in the driver table
cmd
=
"select start,end,processId,threadId from {} where correlationId={}"
.
format
(
self
.
driverT
,
corrId
);
result
=
self
.
db
.
select
(
cmd
)
assert
(
len
(
result
)
==
1
)
info
=
result
[
0
]
start
=
info
[
'start'
]
end
=
info
[
'end'
]
pid
=
info
[
'processId'
]
tid
=
info
[
'threadId'
]
tid
=
tid
&
0xffffffff
#convert to unsigned
assert
(
end
>
start
)
return
[
start
,
end
,
pid
,
tid
]
def
getKernelInfo
(
self
):
"""
Get GPU kernel info
"""
cmd
=
"select name,correlationId,start,end,deviceId,streamId,gridX,gridY,gridZ,blockX,blockY,blockZ from {}"
.
format
(
self
.
kernelT
)
result
=
self
.
db
.
select
(
cmd
)
return
result
def
getMarkerInfo
(
self
,
objId
,
startTime
,
endTime
):
"""
This function first finds all NVTX markers encapsulating
a runtime / driver kernel launch.
It then splits the markers into many lists.
layerMarkers : User added NVTX markers
traceMarkers : Call trace markers (inserted by pyprof)
reprMarkers : Markers containing the extra_repr() of a module (inserted by pyprof)
pyprofMarkers: Markers containing args and kwargs (tensor shape, datatype etc.)
seqMarkers : Markers containing PyTorch internal sequence markers (inserted by PyTorch)
altSeqMarkers: Markers inserted by PyTorch between two kernel launches. Needs better explanation.
otherMarkers : Markers not in either of the above categories.
We extract seqId from the seq and altSeq markers. The seqId is used in bprop.
We also extract information from the layerMarkers.
"""
layerMarkers
=
[]
traceMarkers
=
[]
reprMarkers
=
[]
pyprofMarkers
=
[]
seqMarkers
=
[]
otherMarkers
=
[]
altSeqMarkers
=
[]
bprop
=
False
#Helper functions
def
delete
(
objId
,
sTime
):
"""
Delete rows from the temporary SQL table which are no longer required.
This speeds up future queries.
"""
margin
=
0
cmd
=
'DELETE FROM marker WHERE objectId = "{}" AND endTime < {}'
.
format
(
objId
,
sTime
-
margin
)
#cmd = 'DELETE FROM marker WHERE endTime < {}'.format(sTime - margin)
self
.
db
.
execute
(
cmd
)
def
getLayerName
(
mlist
):
"""
Get layer names from layer marker list.
"""
layers
=
[]
assert
(
type
(
mlist
)
==
list
)
for
m
in
mlist
:
assert
(
"layer:"
in
m
)
l
=
m
.
split
(
":"
)[
1
]
layers
.
append
(
l
)
return
layers
def
getSeqId
(
mlist
):
"""
Get sequence ids from seq / alt seq marker list.
"""
ids
=
[]
assert
(
type
(
mlist
)
==
list
)
for
m
in
mlist
:
assert
(
", seq = "
in
m
)
seq
=
int
(
m
.
split
(
"="
)[
1
])
ids
.
append
(
seq
)
#Remove duplicates
ids
=
list
(
set
(
ids
))
ids
.
sort
()
return
ids
def
seqcompare
(
elem
):
"""
Sorting function for sequence markers
"""
assert
(
", seq = "
in
elem
)
#sort by sequence id and then the string
l
=
elem
.
split
(
" = "
)
return
l
[
1
]
+
l
[
0
]
def
prune
(
mlist
):
"""
Remove markers with the same seqId and if the strings are similar.
This function works on a sorted sequence.
"""
assert
(
type
(
mlist
)
==
list
)
assert
(
len
(
mlist
))
a
=
mlist
[
0
:
1
]
for
i
in
range
(
1
,
len
(
mlist
)):
m
=
mlist
[
i
]
pm
=
mlist
[
i
-
1
]
name
,
seq
=
m
.
split
(
","
)
pname
,
pseq
=
pm
.
split
(
","
)
similar
=
(
name
in
pname
)
or
(
pname
in
name
)
if
(
seq
==
pseq
)
and
similar
:
continue
else
:
a
.
append
(
m
)
return
a
def
filterTrace
(
mlist
):
"""
Filter trace markers to remove certain file names.
"""
assert
(
type
(
mlist
)
==
list
)
if
len
(
mlist
)
==
0
:
return
mlist
mlist
=
mlist
[
-
1
]
#The last stack trace will be a super set.
mlist
=
eval
(
mlist
)
mlist
=
mlist
[
'traceMarker'
]
assert
(
type
(
mlist
)
==
list
)
mlist
=
list
(
filter
(
lambda
x
:
"/torch/nn/modules/"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/nn/functional.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/tensor.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/autograd/__init__.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/_jit_internal.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/pyprof/nvtx/nvmarker.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/apex/optimizers/"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/_utils.py"
not
in
x
,
mlist
))
mlist
=
list
(
filter
(
lambda
x
:
"/torch/optim/"
not
in
x
,
mlist
))
return
mlist
#Find all encapsulating markers
cmd
=
'SELECT id,name from marker where
\
objectId = "{}" and
\
startTime < {} and
\
endTime > {}
\
ORDER BY startTime ASC'
.
format
(
objId
,
startTime
,
endTime
)
result
=
self
.
db
.
select
(
cmd
)
#Bin markers into different lists
for
r
in
result
:
m
=
self
.
getString
(
r
[
'name'
])
#Hack: If its a known gradient checkpointing marker, ignore it.
if
m
.
find
(
"CheckpointFunctionBackward"
)
>=
0
:
continue
if
(
"_backward, seq ="
in
m
)
or
(
"Backward, seq ="
in
m
)
or
(
"Backward0, seq ="
in
m
):
bprop
=
True
if
(
"mod"
in
m
)
and
(
"op"
in
m
)
and
(
"args"
in
m
)
and
(
"type"
in
m
):
pyprofMarkers
.
append
(
m
)
elif
(
"layer:"
in
m
):
layerMarkers
.
append
(
m
)
elif
(
"traceMarker"
in
m
):
traceMarkers
.
append
(
m
)
elif
(
"strRepr"
in
m
):
reprMarkers
.
append
(
m
)
elif
(
", seq = "
in
m
):
seqMarkers
.
append
(
m
)
else
:
otherMarkers
.
append
(
m
)
#Remove duplicates, sort and prune seqMarkers
if
(
len
(
seqMarkers
)):
seqMarkers
=
list
(
set
(
seqMarkers
))
seqMarkers
.
sort
(
key
=
seqcompare
)
seqMarkers
=
prune
(
seqMarkers
)
#Remove duplicates from otherMarkers
otherMarkers
=
list
(
set
(
otherMarkers
))
#Get markers with seq id (inserted by PyTorch) from the previous kernel to the present kernel
#Only for fprop kernels
if
(
len
(
result
)
and
not
bprop
):
loId
=
self
.
markerId
hiId
=
result
[
-
1
][
'id'
]
self
.
markerId
=
hiId
#Get markers between loId and hiId
cmd
=
'SELECT id,name from marker where objectId = "{}" and id > {} and id < {} ORDER BY startTime ASC'
.
format
(
objId
,
loId
,
hiId
)
result1
=
self
.
db
.
select
(
cmd
)
for
r
in
result1
:
m
=
self
.
getString
(
r
[
'name'
])
#Get only markers with seq id
if
(
", seq="
in
m
):
altSeqMarkers
.
append
(
m
)
#Remove duplicates, sort and prune altSeqMarkers
if
(
len
(
altSeqMarkers
)):
altSeqMarkers
=
list
(
set
(
altSeqMarkers
))
altSeqMarkers
.
sort
(
key
=
seqcompare
)
altSeqMarkers
=
prune
(
altSeqMarkers
)
delete
(
objId
,
startTime
)
return
layerMarkers
,
filterTrace
(
traceMarkers
),
reprMarkers
,
pyprofMarkers
,
seqMarkers
,
otherMarkers
,
altSeqMarkers
,
getSeqId
(
seqMarkers
),
getSeqId
(
altSeqMarkers
),
getLayerName
(
layerMarkers
)
apex/pyprof/parse/parse.py
deleted
100755 → 0
View file @
87fc4125
#!/usr/bin/env python3
"""
Parse the SQL db and print a dictionary for every kernel.
"""
import
sys
import
argparse
from
tqdm
import
tqdm
from
.db
import
DB
from
.kernel
import
Kernel
from
.nvvp
import
NVVP
def
parseArgs
():
parser
=
argparse
.
ArgumentParser
(
prog
=
sys
.
argv
[
0
],
description
=
"Parse SQL (nvvp) db."
)
parser
.
add_argument
(
"file"
,
type
=
str
,
default
=
None
,
help
=
"SQL db (nvvp) file."
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parseArgs
()
db
=
DB
(
args
.
file
)
nvvp
=
NVVP
(
db
)
kInfo
=
nvvp
.
getKernelInfo
()
if
len
(
kInfo
)
==
0
:
print
(
"Found 0 kernels. Exiting."
,
file
=
sys
.
stderr
)
db
.
close
()
sys
.
exit
(
0
)
else
:
print
(
"Found {} kernels. Getting info for each kernel."
.
format
(
len
(
kInfo
)),
file
=
sys
.
stderr
)
nvvp
.
createMarkerTable
()
prevSeqId
=
-
1
prevSubSeqId
=
-
1
prevOp
=
"na"
Kernel
.
profStart
=
nvvp
.
getProfileStart
()
for
i
in
tqdm
(
range
(
len
(
kInfo
)),
ascii
=
True
):
info
=
kInfo
[
i
]
k
=
Kernel
()
#Set kernel info
k
.
setKernelInfo
(
info
)
#Get, set kernel name
name
=
nvvp
.
getString
(
k
.
kNameId
)
k
.
setKernelName
(
name
)
#Get runtime info
info
=
nvvp
.
getCPUInfo
(
k
.
corrId
)
k
.
setRunTimeInfo
(
info
)
#Get and set marker and seqid info
info
=
nvvp
.
getMarkerInfo
(
k
.
objId
,
k
.
rStartTime
,
k
.
rEndTime
)
k
.
setMarkerInfo
(
info
)
#If the seqId contains both 0 and non zero integers, remove 0.
if
any
(
seq
!=
0
for
seq
in
k
.
seqId
)
and
(
0
in
k
.
seqId
):
k
.
seqId
.
remove
(
0
)
#Set direction (it uses seq id)
k
.
setDirection
()
#Set op
k
.
setOp
()
#The following code is based on heuristics.
#TODO: Refactor.
#Assign subSeqId, adjust seqId and altSeqId
#seqId can be 0.
#A kernel can have multiple seqIds both in fprop and bprop.
#In bprop, seqIds might not decrease monotonically. I have observed a few blips.
if
len
(
k
.
seqId
):
assert
(
k
.
dir
in
[
"fprop"
,
"bprop"
])
if
(
k
.
dir
==
"fprop"
):
#Check if there is a sequence id larger than the previous
inc
=
(
k
.
seqId
[
-
1
]
>
prevSeqId
)
if
inc
:
currSeqId
=
[
x
for
x
in
k
.
seqId
if
x
>
prevSeqId
][
0
]
else
:
currSeqId
=
prevSeqId
else
:
currSeqId
=
k
.
seqId
[
0
]
#if ((currSeqId == prevSeqId) and (k.op == prevOp)):
if
((
currSeqId
==
prevSeqId
)
and
(
k
.
op
==
prevOp
))
or
((
k
.
op
[
0
]
==
"forward"
)
and
(
k
.
op
==
prevOp
)
and
(
k
.
mod
[
0
]
in
[
"LSTMCell"
,
"GRUCell"
,
"RNNCell"
])):
#The second condition is to trap cases when pytorch does not use cudnn for a LSTMCell.
k
.
subSeqId
=
prevSubSeqId
+
1
prevSeqId
=
currSeqId
prevSubSeqId
=
k
.
subSeqId
prevOp
=
k
.
op
#Keep currSeqId in k.seqId, move everything else to k.altSeqId
for
s
in
k
.
seqId
:
if
s
!=
currSeqId
:
k
.
seqId
.
remove
(
s
)
k
.
altSeqId
.
append
(
s
)
for
s
in
k
.
altSeqId
:
if
s
==
currSeqId
:
k
.
altSeqId
.
remove
(
s
)
k
.
altSeqId
=
list
(
set
(
k
.
altSeqId
))
if
(
len
(
k
.
altSeqId
)):
(
k
.
altSeqId
).
sort
()
k
.
print
()
db
.
close
()
if
__name__
==
'__main__'
:
main
()
apex/pyprof/prof/__init__.py
deleted
100644 → 0
View file @
87fc4125
from
.
import
data
,
prof
apex/pyprof/prof/__main__.py
deleted
100644 → 0
View file @
87fc4125
import
warnings
try
:
from
.prof
import
main
except
ImportError
as
e
:
warnings
.
warn
(
"Did you make sure to install PyProf dependencies by using the --pyprof flag during Apex installation?"
)
raise
e
if
__name__
==
'__main__'
:
main
()
apex/pyprof/prof/activation.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Activation
(
OperatorLayerBase
):
"""
This class handles the various activation functions.
"""
ops
=
[
"celu"
,
"elu"
,
"elu_"
,
"hardshrink"
,
"hardtanh"
,
"hardtanh_"
,
"leaky_relu"
,
"leaky_relu_"
,
"logsigmoid"
,
"prelu"
,
"relu"
,
"relu_"
,
"relu6"
,
"rrelu"
,
"rrelu_"
,
"selu"
,
"sigmoid"
,
"softplus"
,
"softshrink"
,
"softsign"
,
"tanh"
,
"tanhshrink"
,
"threshold"
,
"threshold_"
]
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
in
[
"torch.nn.functional"
,
"torch"
,
"Tensor"
])
#Filter out named parameters
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
''
,
args
))
assert
(
len
(
args
)
>=
1
)
arg
=
args
[
0
]
assert
(
arg
[
'type'
]
==
"tensor"
)
self
.
i
=
arg
self
.
dir
=
d
.
dir
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
i
[
'shape'
]),(
'type'
,
self
.
i
[
'dtype'
])])
return
p
def
flops
(
self
):
direction
=
self
.
dir
tensor
=
self
.
i
[
'shape'
]
t
=
self
.
i
[
'dtype'
]
# TODO: revise
elems
=
Utility
.
numElems
(
tensor
)
return
elems
def
bytes
(
self
):
direction
=
self
.
dir
tensor
=
self
.
i
[
'shape'
]
t
=
self
.
i
[
'dtype'
]
elems
=
Utility
.
numElems
(
tensor
)
elems
=
elems
*
(
2
if
direction
==
"fprop"
else
3
)
return
elems
*
Utility
.
typeToBytes
(
t
)
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
apex/pyprof/prof/base.py
deleted
100644 → 0
View file @
87fc4125
from
abc
import
ABC
,
abstractmethod
class
OperatorLayerBase
(
ABC
):
"""
Base class for all layers and operators.
Every derived class should have the following functions.
"""
@
abstractmethod
def
tc
(
self
):
"""
Tensor core usage by the kernel.
Return "1" (yes), "0" (no, but possible), "-" (not applicable)
"""
pass
@
abstractmethod
def
params
(
self
):
"""
Kernel parameters to be printed.
"""
pass
@
abstractmethod
def
flops
(
self
):
"""
Note that 1 FMA = 2 flops.
"""
pass
@
abstractmethod
def
bytes
(
self
):
pass
@
abstractmethod
def
mod
(
self
):
"""
Name of the module/class e.g. torch.nn.functional.
"""
pass
@
abstractmethod
def
op
(
self
):
"""
Name of the operator e.g. sigmoid.
"""
pass
apex/pyprof/prof/blas.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
import
numpy
as
np
TC_GEMMS
=
[
"884gemm"
,
"1688gemm"
]
class
Addmm
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
in
[
"torch"
,
"Tensor"
,])
assert
(
op
in
[
"addmm"
,
"addmm_"
,])
#Get alpha and beta
alpha
=
1
beta
=
1
if
any
(
x
[
'name'
]
==
'alpha'
for
x
in
args
):
alpha
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"alpha"
,
args
))[
0
]
alpha
=
alpha
[
'value'
]
if
any
(
x
[
'name'
]
==
'beta'
for
x
in
args
):
beta
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"beta"
,
args
))[
0
]
beta
=
beta
[
'value'
]
self
.
alpha
=
alpha
self
.
beta
=
beta
#Filter out named parameters
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
''
,
args
))
assert
(
len
(
args
)
==
3
)
C
,
A
,
B
=
args
m
,
k1
=
A
[
'shape'
]
k2
,
n
=
B
[
'shape'
]
assert
(
k1
==
k2
)
t1
=
A
[
'dtype'
]
t2
=
B
[
'dtype'
]
t3
=
C
[
'dtype'
]
assert
(
t1
==
t2
==
t3
)
self
.
A
=
A
self
.
B
=
B
self
.
C
=
C
self
.
m
=
m
self
.
n
=
n
self
.
k
=
k1
self
.
type
=
t1
self
.
name
=
d
.
name
return
def
tc
(
self
):
for
s
in
TC_GEMMS
:
if
s
in
self
.
name
:
return
1
return
0
def
bytes
(
self
):
m
,
n
,
k
=
self
.
m
,
self
.
n
,
self
.
k
return
Utility
.
typeToBytes
(
self
.
type
)
*
(
m
*
n
+
m
*
k
+
n
*
k
)
def
flops
(
self
):
return
self
.
m
*
self
.
n
*
self
.
k
*
2
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
params
(
self
):
p
=
OrderedDict
([(
'M'
,
self
.
n
),(
'N'
,
self
.
m
),(
'K'
,
self
.
k
),(
'type'
,
self
.
type
)])
return
p
class
Bmm
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch"
)
and
(
op
==
"bmm"
)
#Filter out named params (kwargs)
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
""
,
args
))
assert
(
len
(
args
)
==
2
)
A
,
B
=
args
b1
,
m
,
k1
=
A
[
'shape'
]
b2
,
k2
,
n
=
B
[
'shape'
]
assert
(
b1
==
b2
)
assert
(
k1
==
k2
)
t1
=
A
[
'dtype'
]
t2
=
B
[
'dtype'
]
assert
(
t1
==
t2
)
self
.
A
=
A
self
.
B
=
B
self
.
b
=
b1
self
.
m
=
m
self
.
n
=
n
self
.
k
=
k1
self
.
type
=
t1
self
.
name
=
d
.
name
def
tc
(
self
):
for
s
in
TC_GEMMS
:
if
s
in
self
.
name
:
return
1
return
0
def
params
(
self
):
#p = OrderedDict([('A', A['shape']), ('B', B['shape']), ('type', t1)])
p
=
OrderedDict
([(
'B'
,
self
.
b
),
(
'M'
,
self
.
n
),(
'N'
,
self
.
m
),(
'K'
,
self
.
k
),(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
self
.
b
*
self
.
m
*
self
.
n
*
self
.
k
*
2
def
bytes
(
self
):
b
,
m
,
n
,
k
=
self
.
b
,
self
.
m
,
self
.
n
,
self
.
k
return
Utility
.
typeToBytes
(
self
.
type
)
*
b
*
(
m
*
n
+
m
*
k
+
n
*
k
)
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
class
Matmul
(
OperatorLayerBase
):
NON_GEMM
=
[
"kernelPointwiseApply2"
,
"reduce_1Block_kernel"
,
"elementwise_kernel"
]
NON_TC
=
NON_GEMM
+
[
"dot_kernel"
]
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
self
.
name
=
d
.
name
self
.
sub
=
d
.
sub
assert
((
mod
==
"torch"
)
and
(
op
==
"matmul"
))
or
((
mod
==
"Tensor"
)
and
(
op
==
"__matmul__"
))
assert
(
len
(
args
)
==
2
)
assert
any
([
x
in
d
.
name
for
x
in
Matmul
.
NON_TC
+
[
"gemm"
,
"gemv"
]])
A
,
B
=
args
t1
=
A
[
'dtype'
]
t2
=
B
[
'dtype'
]
assert
(
t1
==
t2
)
A
=
A
[
'shape'
]
B
=
B
[
'shape'
]
self
.
A
=
A
self
.
B
=
B
self
.
type
=
t1
# batch, MNK
if
(
len
(
A
)
==
1
)
and
(
len
(
B
)
==
1
):
#dot product
assert
(
A
[
0
]
==
B
[
0
])
self
.
b
=
(
1
,)
self
.
m
=
1
self
.
n
=
1
self
.
k
=
A
[
0
]
elif
(
len
(
A
)
==
2
)
and
(
len
(
B
)
==
2
):
#gemm
m
,
k1
=
A
k2
,
n
=
B
assert
(
k1
==
k2
)
self
.
b
=
(
1
,)
self
.
m
=
m
self
.
n
=
n
self
.
k
=
k1
elif
(
len
(
A
)
==
1
)
and
(
len
(
B
)
==
2
):
#vector matrix
k1
=
A
[
0
]
k2
,
n
=
B
assert
(
k1
==
k2
)
self
.
b
=
(
1
,)
self
.
m
=
1
self
.
n
=
n
self
.
k
=
k1
elif
(
len
(
A
)
==
2
)
and
(
len
(
B
)
==
1
):
#gemv
m
,
k1
=
A
k2
=
B
[
0
]
assert
(
k1
==
k2
)
self
.
b
=
(
1
,)
self
.
m
=
m
self
.
n
=
1
self
.
k
=
k1
elif
(
len
(
A
)
==
1
)
and
(
len
(
B
)
>
2
):
assert
(
A
[
0
]
==
B
[
-
2
])
self
.
b
=
B
[
0
:
-
2
]
self
.
m
=
1
self
.
n
=
B
[
-
1
]
self
.
k
=
B
[
-
2
]
elif
(
len
(
B
)
==
1
)
and
(
len
(
A
)
>
2
):
assert
(
B
[
0
]
==
A
[
-
1
])
self
.
b
=
A
[
0
:
-
2
]
self
.
m
=
A
[
-
2
]
self
.
n
=
1
self
.
k
=
A
[
-
1
]
else
:
assert
(
len
(
A
)
>=
2
)
assert
(
len
(
B
)
>=
2
)
assert
(
A
[
-
1
]
==
B
[
-
2
])
self
.
m
=
A
[
-
2
]
self
.
n
=
B
[
-
1
]
self
.
k
=
A
[
-
1
]
aa
=
np
.
empty
(
A
[
0
:
-
2
])
bb
=
np
.
empty
(
B
[
0
:
-
2
])
self
.
b
=
np
.
broadcast
(
aa
,
bb
).
shape
def
params
(
self
):
return
OrderedDict
([(
'A'
,
self
.
A
),
(
'B'
,
self
.
B
),
(
'type'
,
self
.
type
)])
def
tc
(
self
):
if
self
.
name
in
Matmul
.
NON_TC
:
return
"-"
else
:
for
s
in
TC_GEMMS
:
if
s
in
self
.
name
:
return
1
return
0
def
bytes
(
self
):
# TODO: check bytes for non-GEMM cases
if
self
.
name
in
Matmul
.
NON_GEMM
:
return
2
*
Utility
.
typeToBytes
(
self
.
type
)
*
Utility
.
numElems
(
self
.
A
)
#could be B as well
else
:
m
,
n
,
k
=
self
.
m
,
self
.
n
,
self
.
k
return
Utility
.
typeToBytes
(
self
.
type
)
*
(
m
*
n
+
m
*
k
+
n
*
k
)
def
flops
(
self
):
# TODO: calculate actual FLOPs. At least we're not saying it's GEMM FLOPs for now.
if
self
.
name
in
Matmul
.
NON_GEMM
:
return
0
else
:
return
Utility
.
numElems
(
self
.
b
)
*
self
.
m
*
self
.
n
*
self
.
k
*
2
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
class
Mm
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch"
)
and
(
op
==
"mm"
)
assert
(
len
(
args
)
==
2
)
A
,
B
=
args
m
,
k1
=
A
[
'shape'
]
k2
,
n
=
B
[
'shape'
]
assert
(
k1
==
k2
)
t1
=
A
[
'dtype'
]
t2
=
B
[
'dtype'
]
assert
(
t1
==
t2
)
self
.
A
=
A
self
.
B
=
B
self
.
m
=
m
self
.
n
=
n
self
.
k
=
k1
self
.
type
=
t1
self
.
name
=
d
.
name
return
def
params
(
self
):
p
=
OrderedDict
([(
'M'
,
self
.
n
),(
'N'
,
self
.
m
),(
'K'
,
self
.
k
),(
'type'
,
self
.
type
)])
return
p
def
tc
(
self
):
for
s
in
TC_GEMMS
:
if
s
in
self
.
name
:
return
1
return
0
def
bytes
(
self
):
m
,
n
,
k
=
self
.
m
,
self
.
n
,
self
.
k
return
Utility
.
typeToBytes
(
self
.
type
)
*
(
m
*
n
+
m
*
k
+
n
*
k
)
def
flops
(
self
):
return
self
.
m
*
self
.
n
*
self
.
k
*
2
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
apex/pyprof/prof/conv.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Conv
(
OperatorLayerBase
):
"""
# N = batch size
# C,H,W = input channels, height, width
# K,P,Q = output channels, height, width
# R,S = filter height, width
# g = groups
"""
#todo: refine winograd and FFT
convAuxList
=
[
"nchwToNhwc"
,
"nhwcToNchw"
,
"OffsetsKernel"
,]
winoAuxList
=
[
"generateWinogradTilesKernel"
,
"winogradWgradData"
,
"winogradWgradOutput"
,
"winogradWgradDelta"
]
fftAuxList
=
[
"compute_gemm_pointers"
,
"flip_filter"
,
"fft2d_r2c_"
,
"fft2d_c2r_"
,
"fft1d_r2c"
,
"fft1d_c2r"
]
miscAuxList
=
[
"scaleTensor_kernel"
,]
convList
=
[
"_s884cudnn_"
,
"_s1688cudnn_"
,
"_scudnn_"
,
"2d_grouped_direct_kernel"
,
"cudnn::detail::implicit_convolve_sgemm"
,
"cudnn::detail::dgrad2d_alg1_1"
,
"cudnn::detail::wgrad_alg0_engine"
,
"cudnn::detail::dgrad_engine"
,
"dgrad_1x1_stride_2x2"
,
"spatialDepthwiseConvolutionUpdateOutput"
]
winoList
=
[
"winograd3x3Kernel"
,
"_sgemm_"
]
fftList
=
[
"fermiPlusCgemmLDS128_batched"
,
"_gcgemm_"
,]
miscList
=
[]
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
self
.
dir
=
d
.
dir
self
.
name
=
d
.
name
self
.
sub
=
d
.
sub
assert
(
mod
==
"torch.nn.functional"
)
assert
(
op
in
[
"conv1d"
,
"conv2d"
])
length
=
len
(
args
)
assert
(
length
>=
2
)
and
(
length
<=
7
)
i
,
w
=
args
[
0
],
args
[
1
]
assert
(
i
[
'type'
]
==
"tensor"
)
assert
(
w
[
'type'
]
==
"tensor"
)
#ignore bias
if
(
length
>=
4
)
and
(
args
[
3
][
'name'
]
==
""
):
s
=
args
[
3
]
elif
any
(
x
[
'name'
]
==
'stride'
for
x
in
args
):
s
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
'stride'
,
args
))[
0
]
else
:
s
=
{
'name'
:
'stride'
,
'type'
:
'int'
,
'value'
:
1
}
if
(
length
>=
5
)
and
(
args
[
4
][
'name'
]
==
""
):
p
=
args
[
4
]
elif
any
(
x
[
'name'
]
==
'padding'
for
x
in
args
):
p
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
'padding'
,
args
))[
0
]
else
:
p
=
{
'name'
:
'padding'
,
'type'
:
'int'
,
'value'
:
0
}
if
(
length
>=
6
)
and
(
args
[
5
][
'name'
]
==
""
):
d
=
args
[
5
]
elif
any
(
x
[
'name'
]
==
'dilation'
for
x
in
args
):
d
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
'dilation'
,
args
))[
0
]
else
:
d
=
{
'name'
:
'dilation'
,
'type'
:
'int'
,
'value'
:
1
}
if
(
length
==
7
)
and
(
args
[
6
][
'name'
]
==
""
):
g
=
args
[
6
]
elif
any
(
x
[
'name'
]
==
'groups'
for
x
in
args
):
g
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
'groups'
,
args
))[
0
]
else
:
g
=
{
'name'
:
'groups'
,
'type'
:
'int'
,
'value'
:
1
}
if
op
==
"conv1d"
:
assert
(
len
(
i
[
'shape'
])
==
3
)
assert
(
len
(
w
[
'shape'
])
==
3
)
assert
(
i
[
'dtype'
]
==
w
[
'dtype'
])
N
,
C1
,
W
=
i
[
'shape'
]
K
,
C2
,
S
=
w
[
'shape'
]
assert
(
C1
==
C2
)
p
=
p
[
'value'
]
if
Utility
.
isscalar
(
p
[
'type'
])
else
p
[
'value'
][
0
]
s
=
s
[
'value'
]
if
Utility
.
isscalar
(
s
[
'type'
])
else
s
[
'value'
][
0
]
d
=
d
[
'value'
]
if
Utility
.
isscalar
(
d
[
'type'
])
else
d
[
'value'
][
0
]
g
=
g
[
'value'
]
assert
(
g
==
1
)
H
=
1
R
=
1
P
=
1
+
(
H
-
(((
R
-
1
))
+
1
))
Q
=
1
+
(
W
+
2
*
p
-
(((
S
-
1
)
*
d
)
+
1
))
/
s
P
=
int
(
P
)
Q
=
int
(
Q
)
if
(
H
==
1
):
assert
(
P
==
1
)
if
(
W
==
1
):
assert
(
Q
==
1
)
self
.
N
=
N
self
.
C
=
C1
self
.
H
=
H
self
.
W
=
W
self
.
K
=
K
self
.
P
=
P
self
.
Q
=
Q
self
.
R
=
R
self
.
S
=
S
self
.
ph
=
0
self
.
pw
=
p
self
.
U
=
1
self
.
V
=
s
self
.
dh
=
1
self
.
dw
=
d
self
.
g
=
g
self
.
type
=
i
[
'dtype'
]
elif
op
==
"conv2d"
:
assert
(
len
(
i
[
'shape'
])
==
4
)
assert
(
len
(
w
[
'shape'
])
==
4
)
assert
(
i
[
'dtype'
]
==
w
[
'dtype'
])
N
,
C1
,
H
,
W
=
i
[
'shape'
]
K
,
C2
,
R
,
S
=
w
[
'shape'
]
if
Utility
.
isscalar
(
p
[
'type'
]):
ph
=
pw
=
p
[
'value'
]
else
:
assert
(
p
[
'type'
]
==
"tuple"
)
ph
,
pw
=
p
[
'value'
]
if
Utility
.
isscalar
(
s
[
'type'
]):
sh
=
sw
=
s
[
'value'
]
else
:
assert
(
s
[
'type'
]
==
"tuple"
)
sh
,
sw
=
s
[
'value'
]
if
Utility
.
isscalar
(
d
[
'type'
]):
dh
=
dw
=
d
[
'value'
]
else
:
assert
(
d
[
'type'
]
==
"tuple"
)
dh
,
dw
=
d
[
'value'
]
g
=
g
[
'value'
]
assert
(
g
>=
1
)
assert
(
C1
==
C2
*
g
)
P
=
1
+
(
H
+
2
*
ph
-
(((
R
-
1
)
*
dh
)
+
1
))
/
sh
Q
=
1
+
(
W
+
2
*
pw
-
(((
S
-
1
)
*
dw
)
+
1
))
/
sw
P
=
int
(
P
)
Q
=
int
(
Q
)
if
(
H
==
1
):
assert
(
P
==
1
)
if
(
W
==
1
):
assert
(
Q
==
1
)
self
.
N
=
N
self
.
C
=
C1
self
.
H
=
H
self
.
W
=
W
self
.
K
=
K
self
.
P
=
P
self
.
Q
=
Q
self
.
R
=
R
self
.
S
=
S
self
.
ph
=
ph
self
.
pw
=
pw
self
.
U
=
sh
self
.
V
=
sw
self
.
dh
=
dh
self
.
dw
=
dw
self
.
g
=
g
self
.
type
=
i
[
'dtype'
]
else
:
assert
False
def
params
(
self
):
p
=
OrderedDict
([(
'N'
,
self
.
N
),
(
'C'
,
self
.
C
),
(
'H'
,
self
.
H
),
(
'W'
,
self
.
W
),
(
'K'
,
self
.
K
),
(
'P'
,
self
.
P
),
(
'Q'
,
self
.
Q
),
(
'R'
,
self
.
R
),
(
'S'
,
self
.
S
),
(
'ph'
,
self
.
ph
),
(
'pw'
,
self
.
pw
),
(
'U'
,
self
.
U
),
(
'V'
,
self
.
V
),
(
'dh'
,
self
.
dh
),
(
'dw'
,
self
.
dw
),
(
'g'
,
self
.
g
),
(
'type'
,
self
.
type
)])
return
p
def
conv_bytes_flops
(
self
,
N
,
C
,
H
,
W
,
K
,
P
,
Q
,
R
,
S
,
g
,
t
):
f
=
2
*
N
*
K
*
P
*
Q
*
C
*
R
*
S
/
g
#for fprop
elems
=
N
*
C
*
H
*
W
+
K
*
C
*
R
*
S
/
g
+
N
*
K
*
P
*
Q
b
=
elems
*
Utility
.
typeToBytes
(
t
)
return
b
,
f
def
bytes_flops
(
self
):
N
,
C
,
H
,
W
,
K
,
P
,
Q
,
R
,
S
,
ph
,
pw
,
U
,
V
,
dh
,
dw
,
g
,
t
=
self
.
params
().
values
()
if
any
(
x
in
self
.
name
for
x
in
Conv
.
convAuxList
+
Conv
.
winoAuxList
+
Conv
.
fftAuxList
+
Conv
.
miscAuxList
):
bytes
,
flops
=
[
0
,
0
]
elif
any
(
x
in
self
.
name
for
x
in
Conv
.
convList
+
Conv
.
winoList
+
Conv
.
fftList
+
Conv
.
miscList
):
if
g
==
1
:
bytes
,
flops
=
self
.
conv_bytes_flops
(
N
,
C
,
H
,
W
,
K
,
P
,
Q
,
R
,
S
,
g
,
t
)
else
:
if
"2d_grouped_direct_kernel"
in
self
.
name
:
#only 1 kernel is called
bytes
,
flops
=
self
.
conv_bytes_flops
(
N
,
C
,
H
,
W
,
K
,
P
,
Q
,
R
,
S
,
g
,
t
)
elif
"spatialDepthwiseConvolutionUpdateOutput"
in
self
.
name
:
#one kernel for separable conv
bytes
,
flops
=
self
.
conv_bytes_flops
(
N
,
C
,
H
,
W
,
K
,
P
,
Q
,
R
,
S
,
g
,
t
)
else
:
#a kernel per group is called
bytes
,
flops
=
self
.
conv_bytes_flops
(
N
,
C
/
g
,
H
,
W
,
K
/
g
,
P
,
Q
,
R
,
S
,
1
,
t
)
elif
(
"calc_bias_diff"
in
self
.
name
):
#bias gradient
elems
=
N
*
K
*
P
*
Q
flops
=
elems
bytes
=
2
*
elems
*
Utility
.
typeToBytes
(
t
)
#params = OrderedDict([('N',N), ('K',K), ('P',P), ('Q',Q), ('type', t)])
else
:
bytes
,
flops
=
[
0
,
0
]
return
bytes
,
flops
def
bytes
(
self
):
b
,
_
=
self
.
bytes_flops
()
return
b
def
flops
(
self
):
_
,
f
=
self
.
bytes_flops
()
return
f
def
tc
(
self
):
for
s
in
[
"884cudnn"
,
"1688cudnn"
]:
if
s
in
self
.
name
:
return
1
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
apex/pyprof/prof/convert.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Convert
(
OperatorLayerBase
):
"""
Class to handle convert operations.
"""
ops
=
[
"byte"
,
"char"
,
"double"
,
"float"
,
"half"
,
"int"
,
"long"
,
"short"
,
"to"
]
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
in
Convert
.
ops
)
assert
(
len
(
args
)
==
1
)
#The argument could be a tensor or scalar
t
=
args
[
0
]
if
t
[
'type'
]
==
"tensor"
:
shape
=
t
[
'shape'
]
stype
=
t
[
'dtype'
]
else
:
shape
=
(
1
,)
stype
=
t
[
'type'
]
if
self
.
op_
==
"to"
:
op
=
stype
self
.
shape
=
shape
self
.
stype
=
stype
self
.
dtype
=
op
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'stype'
,
self
.
stype
),
(
'dtype'
,
self
.
dtype
)])
return
p
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
tc
(
self
):
return
"-"
def
elems
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
def
flops
(
self
):
return
0
def
bytes
(
self
):
b
=
self
.
elems
()
*
(
Utility
.
typeToBytes
(
self
.
stype
)
+
Utility
.
typeToBytes
(
self
.
dtype
))
return
b
apex/pyprof/prof/data.py
deleted
100644 → 0
View file @
87fc4125
from
.utility
import
Utility
class
Data
(
object
):
"""
Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc.
"""
def
__init__
(
self
,
kernel
):
#Available from NVprof
self
.
tid
=
kernel
[
'tid'
]
self
.
device
=
kernel
[
'device'
]
self
.
stream
=
kernel
[
'stream'
]
self
.
grid
=
str
(
kernel
[
'grid'
]).
replace
(
" "
,
""
).
replace
(
"("
,
""
).
replace
(
")"
,
""
)
self
.
block
=
str
(
kernel
[
'block'
]).
replace
(
" "
,
""
).
replace
(
"("
,
""
).
replace
(
")"
,
""
)
self
.
name
=
kernel
[
'kShortName'
].
replace
(
" "
,
"_"
)
self
.
lName
=
kernel
[
'kLongName'
]
self
.
sil
=
kernel
[
'kDuration'
]
#units ns
self
.
index
=
None
#Markers
self
.
argMarker
=
kernel
[
'marker'
]
self
.
modMarker
=
kernel
[
'reprMarkers'
]
self
.
seqMarker
=
kernel
[
'seqMarker'
]
self
.
layer
=
kernel
[
'layer'
]
self
.
trace
=
kernel
[
'trace'
]
self
.
seqId
=
kernel
[
'seqId'
]
self
.
altSeqId
=
kernel
[
'altSeqId'
]
self
.
dir
=
kernel
[
'dir'
]
self
.
sub
=
kernel
[
'subSeqId'
]
self
.
mod
=
"na"
self
.
op
=
"na"
self
.
params
=
{
"na"
:
"na"
}
self
.
tc
=
"na"
self
.
flops
=
0
self
.
bytes
=
0
def
setParams
(
self
,
params
):
#Remove space from params
qaz
=
""
for
key
,
value
in
params
.
items
():
if
"type"
not
in
key
:
qaz
+=
"{}={},"
.
format
(
key
,
value
)
else
:
if
type
(
value
)
is
str
:
qaz
+=
"{},"
.
format
(
Utility
.
typeToString
(
value
))
else
:
qaz
+=
"{}"
.
format
(
value
)
self
.
params
=
qaz
.
replace
(
" "
,
""
)
apex/pyprof/prof/dropout.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Dropout
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch.nn.functional"
)
assert
(
op
==
"dropout"
)
#assert (len(args) == 1)
self
.
shape
=
args
[
0
][
'shape'
]
self
.
type
=
args
[
0
][
'dtype'
]
self
.
dir
=
d
.
dir
return
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
tc
(
self
):
return
"-"
def
elems
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
def
bytes
(
self
):
#Ignoring the cost of writing and reading the mask
return
Utility
.
typeToBytes
(
self
.
type
)
*
self
.
elems
()
*
2
def
flops
(
self
):
# Note: This is approximate and depends on the RNG
return
5
*
self
.
elems
()
apex/pyprof/prof/embedding.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Embedding
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch.nn.functional"
)
assert
(
op
==
"embedding"
)
self
.
ishape
=
args
[
0
][
'shape'
]
self
.
itype
=
args
[
0
][
'dtype'
]
self
.
eshape
=
args
[
1
][
'shape'
]
self
.
etype
=
args
[
1
][
'dtype'
]
assert
(
len
(
self
.
eshape
)
==
2
)
self
.
dir
=
d
.
dir
self
.
sub
=
d
.
sub
return
def
params
(
self
):
p
=
OrderedDict
([(
'I'
,
self
.
ishape
),
(
'itype'
,
self
.
itype
),
(
'E'
,
self
.
eshape
),
(
'etype'
,
self
.
etype
)])
return
p
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
tc
(
self
):
return
"-"
def
bytes
(
self
):
ishape
=
self
.
ishape
itype
=
self
.
itype
eshape
=
self
.
eshape
etype
=
self
.
etype
ielems
=
Utility
.
numElems
(
ishape
)
b
=
0
if
self
.
dir
==
"fprop"
:
#indices
b
+=
ielems
*
Utility
.
typeToBytes
(
itype
)
#read and write the embedding matrix
b
+=
ielems
*
eshape
[
1
]
*
2
*
Utility
.
typeToBytes
(
etype
)
else
:
#3 times the size of the incoming gradient
b
=
ielems
*
eshape
[
1
]
*
3
*
Utility
.
typeToBytes
(
etype
)
if
self
.
sub
>
0
:
b
=
0
return
b
def
flops
(
self
):
# Note: not implemented yet
return
0
apex/pyprof/prof/index_slice_join_mutate.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
import
numpy
as
np
from
.base
import
OperatorLayerBase
class
Cat
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch"
)
assert
(
op
==
"cat"
)
assert
(
len
(
args
)
>=
2
)
t
=
args
[
0
][
'dtype'
]
shapes
=
[]
for
arg
in
args
:
if
arg
[
'type'
]
==
"tensor"
:
assert
(
arg
[
'dtype'
]
==
t
)
shapes
.
append
(
arg
[
'shape'
])
self
.
type
=
t
self
.
shapes
=
shapes
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shapes
),
(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
b
=
0
for
s
in
self
.
shapes
:
b
+=
Utility
.
numElems
(
s
)
return
2
*
b
*
Utility
.
typeToBytes
(
self
.
type
)
class
Reshape
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"reshape"
)
#Temporarily commenting three lines
#assert (len(args) == 2)
#t,s = args
#assert s['type'] == "tuple"
t
=
args
[
0
]
assert
t
[
'type'
]
==
"tensor"
self
.
type
=
t
[
'dtype'
]
self
.
shape
=
t
[
'shape'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
return
0
class
Gather
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
or
(
mod
==
"torch"
)
assert
(
op
==
"gather"
)
#Filter out the "out" parameter
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
!=
'out'
,
args
))
assert
(
len
(
args
)
==
3
)
#Get input
if
(
args
[
0
][
'name'
]
==
""
):
arg
=
args
[
0
]
else
:
arg
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"input"
,
args
))[
0
]
assert
(
arg
[
'type'
]
==
"tensor"
)
self
.
shape
=
arg
[
'shape'
]
self
.
type
=
arg
[
'dtype'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
return
2
*
Utility
.
numElems
(
self
.
shape
)
*
Utility
.
typeToBytes
(
self
.
type
)
class
MaskedScatter
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"masked_scatter_"
)
assert
(
len
(
args
)
==
3
)
dst
,
mask
,
src
=
args
assert
(
dst
[
'type'
]
==
mask
[
'type'
]
==
src
[
'type'
]
==
"tensor"
)
assert
(
mask
[
'dtype'
]
==
"uint8"
)
assert
(
dst
[
'dtype'
]
==
src
[
'dtype'
])
assert
(
dst
[
'shape'
]
==
mask
[
'shape'
])
self
.
shape
=
dst
[
'shape'
]
self
.
type
=
dst
[
'dtype'
]
self
.
seqId
=
d
.
seqId
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
elems
=
Utility
.
numElems
(
self
.
shape
)
#src and dst
b
=
2
*
elems
*
Utility
.
typeToBytes
(
self
.
type
)
#mask (uint8)
b
+=
elems
if
(
self
.
seqId
>
0
):
b
=
0
return
b
class
Nonzero
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
in
[
"torch"
,
"Tensor"
])
assert
(
op
==
"nonzero"
)
assert
(
len
(
args
)
==
1
)
arg
=
args
[
0
]
self
.
shape
=
arg
[
'shape'
]
self
.
type
=
arg
[
'dtype'
]
self
.
seqId
=
d
.
seqId
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
elems
=
Utility
.
numElems
(
self
.
shape
)
dim
=
len
(
self
.
shape
)
#input tensor
b
=
elems
*
Utility
.
typeToBytes
(
self
.
type
)
#in the worst case, the output is a (elems x dim) tensor of type "long"
b
+=
elems
*
dim
*
Utility
.
typeToBytes
(
"int64"
)
if
self
.
seqId
>
0
:
return
0
else
:
return
b
class
IndexSelect
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
or
(
mod
==
"torch"
)
assert
(
op
==
"index_select"
)
#Filter out the "out" parameter
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
!=
'out'
,
args
))
assert
(
len
(
args
)
==
3
)
#Get input, dim and index
if
(
args
[
0
][
'name'
]
==
""
):
t
=
args
[
0
]
else
:
t
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"input"
,
args
))[
0
]
if
(
args
[
1
][
'name'
]
==
""
):
d
=
args
[
1
]
else
:
d
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"dim"
,
args
))[
0
]
if
(
args
[
2
][
'name'
]
==
""
):
i
=
args
[
2
]
else
:
i
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"index"
,
args
))[
0
]
assert
(
t
[
'type'
]
==
i
[
'type'
]
==
"tensor"
)
assert
(
d
[
'type'
]
==
"int"
)
assert
(
i
[
'dtype'
]
==
"int64"
)
assert
(
len
(
i
[
'shape'
])
==
1
)
shape
=
t
[
'shape'
]
dim
=
d
[
'value'
]
indices
=
i
[
'shape'
][
0
]
assert
(
dim
<
len
(
shape
))
self
.
shape
=
shape
self
.
dim
=
dim
self
.
indices
=
indices
self
.
type
=
t
[
'dtype'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),(
'D'
,
self
.
dim
),(
'I'
,
self
.
indices
),(
'type'
,
self
.
type
)])
return
p
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
flops
(
self
):
return
0
def
bytes
(
self
):
#determine the shape of the output tensor
shape
=
list
(
self
.
shape
)
shape
[
self
.
dim
]
=
self
.
indices
b
=
0
#time to read the input and write the output
elems
=
Utility
.
numElems
(
shape
)
b
+=
2
*
elems
*
Utility
.
typeToBytes
(
self
.
type
)
#time to read the indices
b
+=
self
.
indices
*
Utility
.
typeToBytes
(
"int64"
)
return
b
class
MaskedSelect
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
self
.
sub
=
d
.
sub
assert
(
mod
==
"Tensor"
)
or
(
mod
==
"torch"
)
assert
(
op
==
"masked_select"
)
#Filter out the "out" parameter
args
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
!=
'out'
,
args
))
assert
(
len
(
args
)
==
2
)
#Get input and mask
if
(
args
[
0
][
'name'
]
==
""
):
t
=
args
[
0
]
else
:
t
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"input"
,
args
))[
0
]
if
(
args
[
1
][
'name'
]
==
""
):
m
=
args
[
1
]
else
:
m
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"mask"
,
args
))[
0
]
assert
(
m
[
'dtype'
]
==
"uint8"
)
tensor
=
t
[
'shape'
]
mask
=
m
[
'shape'
]
#check for broadcast condition
if
(
tensor
!=
mask
):
array1
=
np
.
empty
(
list
(
tensor
))
array2
=
np
.
empty
(
list
(
mask
))
try
:
out
=
np
.
broadcast
(
array1
,
array2
).
shape
except
:
assert
False
self
.
tshape
=
tensor
self
.
mshape
=
mask
self
.
type
=
t
[
'dtype'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
tshape
),(
'M'
,
self
.
mshape
),(
'type'
,
self
.
type
)])
return
p
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
bytes
(
self
):
tensor
=
self
.
tshape
mask
=
self
.
mshape
t
=
self
.
type
#in the worst case, #output elements = #input elements
b
=
2
*
Utility
.
numElems
(
tensor
)
*
Utility
.
typeToBytes
(
t
)
#mask tensor (assuming uint8)
b
+=
Utility
.
numElems
(
mask
)
return
b
def
flops
(
self
):
return
0
apex/pyprof/prof/linear.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Linear
(
OperatorLayerBase
):
'''
Notes:
If the bias occurs before the GEMM, then its 1 write (bias expansion).
If the bias occurs after, then its 1 read and 1 write.
bias in bprop is a reduction and hence is 1 read.
'''
gemmKernels
=
[
"gemm"
,
"gemv"
,
"dot_kernel"
,
"splitKreduce_kernel"
,
"reduce_1Block_kernel"
]
biasKernels
=
[
"kernelReduceContigDim"
,
"kernelReduceNoncontigDim_shared"
,
"elementwise_kernel"
,
"reduce_kernel"
]
def
setXWBMNK
(
self
,
args
):
x
=
None
w
=
None
b
=
None
if
(
len
(
args
)
==
2
):
x
,
w
=
args
elif
(
len
(
args
)
==
3
):
x
,
w
,
b
=
args
assert
(
x
[
'type'
]
==
w
[
'type'
]
==
"tensor"
)
if
(
b
[
'type'
]
==
"tensor"
):
assert
(
len
(
b
[
'shape'
])
==
1
)
elif
(
b
[
'type'
]
==
"NoneType"
):
assert
b
[
'value'
]
is
None
b
=
None
else
:
assert
False
else
:
assert
False
assert
(
len
(
w
[
'shape'
])
==
2
)
k1
=
x
[
'shape'
][
-
1
]
n
,
k2
=
w
[
'shape'
]
assert
(
k1
==
k2
)
if
b
is
not
None
:
assert
(
b
[
'shape'
][
0
]
==
n
)
t1
=
x
[
'dtype'
]
t2
=
w
[
'dtype'
]
assert
(
t1
==
t2
)
# X, W, B
self
.
x
=
x
[
'shape'
]
self
.
w
=
w
[
'shape'
]
self
.
b
=
b
[
'shape'
]
if
b
is
not
None
else
None
self
.
type
=
t1
# M, N, K
#n = Utility.numElems(x[0:-1])
n
=
self
.
x
[
0
:
-
1
]
k
=
self
.
x
[
-
1
]
m
,
k1
=
self
.
w
assert
(
k
==
k1
)
self
.
m
=
m
self
.
n
=
n
self
.
k
=
k
def
tc
(
self
):
if
self
.
op
()
==
"linear"
:
return
1
if
"884gemm"
in
self
.
name
else
0
else
:
return
"-"
def
__init__
(
self
,
d
):
self
.
name
=
d
.
name
self
.
dir
=
d
.
dir
self
.
sub
=
d
.
sub
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
assert
(
mod
==
"torch.nn.functional"
)
assert
(
op
==
"linear"
)
self
.
setXWBMNK
(
args
)
if
any
(
x
in
d
.
name
for
x
in
Linear
.
gemmKernels
):
self
.
op_
=
"linear"
else
:
assert
(
d
.
name
in
Linear
.
biasKernels
)
self
.
op_
=
"bias"
'''
elif (("kernelPointwiseApply2" in d.name) or ("kernelReduceContigDim" in d.name) or ("kernelReduceNoncontigDim_shared" in d.name)):
#bias expansion was before the gemm
self.op_ = "bias"
elif ("elementwise_kernel" in d.name):
#Bias addition happens later with a broadcast tensor
self.op_ = "bias"
assert (len(d.argMarker) == 2)
marker = eval(d.argMarker[1])
mod = marker['mod']
op = marker['op']
args = marker['args']
assert (mod == "Tensor")
assert (op == "__iadd__")
assert (len(args) == 2)
mn = args[0]['shape']
b = args[1]['shape']
assert (len(b) == 1)
assert (mn == (self.n + (self.m,)))
assert (b == self.b)
else:
assert False
'''
def
params
(
self
):
#p = OrderedDict([('X', self.x), ('W', self.w), ('B', self.b), ('type', self.type)])
m
,
n
,
k
,
x
,
w
,
t
=
self
.
m
,
self
.
n
,
self
.
k
,
self
.
x
,
self
.
w
,
self
.
type
if
len
(
n
)
==
1
:
n
=
n
[
0
]
if
self
.
op_
==
"linear"
:
if
self
.
dir
==
"fprop"
:
p
=
OrderedDict
([(
'M'
,
m
),
(
'N'
,
n
),
(
'K'
,
k
),
(
'type'
,
t
)])
elif
self
.
dir
==
"bprop"
:
if
self
.
sub
==
0
:
#dgrad (most likely)
p
=
OrderedDict
([(
'M'
,
k
),
(
'N'
,
n
),
(
'K'
,
m
),
(
'type'
,
t
)])
elif
self
.
sub
==
1
:
#wgrad (most likely)
p
=
OrderedDict
([(
'M'
,
k
),
(
'N'
,
m
),
(
'K'
,
n
),
(
'type'
,
t
)])
else
:
#This happens when there are additional kernels for reduction
p
=
OrderedDict
([(
'X'
,
x
),
(
'W'
,
w
),
(
'type'
,
t
)])
else
:
assert
False
elif
self
.
op_
==
"bias"
:
p
=
OrderedDict
([(
'M'
,
m
),
(
'N'
,
n
),
(
'type'
,
t
)])
else
:
assert
False
return
p
def
op
(
self
):
return
self
.
op_
def
bytesFlops
(
self
):
m
=
self
.
m
n
=
Utility
.
numElems
(
self
.
n
)
k
=
self
.
k
if
self
.
op_
==
"linear"
:
if
self
.
dir
==
"fprop"
:
f
=
m
*
n
*
k
*
2
b
=
m
*
n
+
m
*
k
+
n
*
k
*
Utility
.
typeToBytes
(
self
.
type
)
elif
self
.
dir
==
"bprop"
:
if
self
.
sub
==
0
:
#dgrad (most likely)
f
=
m
*
n
*
k
*
2
b
=
m
*
n
+
m
*
k
+
n
*
k
*
Utility
.
typeToBytes
(
self
.
type
)
elif
self
.
sub
==
1
:
#wgrad (most likely)
f
=
m
*
n
*
k
*
2
b
=
m
*
n
+
m
*
k
+
n
*
k
*
Utility
.
typeToBytes
(
self
.
type
)
else
:
#This happens when there are additional kernels for reduction
f
=
0
b
=
0
else
:
assert
False
elif
self
.
op_
==
"bias"
:
f
=
m
*
n
b
=
2
*
m
*
n
*
Utility
.
typeToBytes
(
self
.
type
)
else
:
assert
False
return
b
,
f
def
bytes
(
self
):
b
,
f
=
self
.
bytesFlops
()
return
b
def
flops
(
self
):
b
,
f
=
self
.
bytesFlops
()
return
f
def
mod
(
self
):
return
self
.
mod_
apex/pyprof/prof/loss.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
#TODO: Add support for additional loss functions.
class
MSELoss
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"torch.nn.functional"
)
assert
(
op
==
"mse_loss"
)
assert
(
len
(
args
)
==
3
)
#Get input, target and reduction
if
(
args
[
0
][
'name'
]
==
""
):
x
=
args
[
0
]
else
:
x
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"input"
,
args
))[
0
]
if
(
args
[
1
][
'name'
]
==
""
):
y
=
args
[
1
]
else
:
y
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"target"
,
args
))[
0
]
if
(
args
[
2
][
'name'
]
==
""
):
r
=
args
[
2
]
else
:
r
=
list
(
filter
(
lambda
x
:
x
[
'name'
]
==
"reduction"
,
args
))[
0
]
assert
(
x
[
'type'
]
==
y
[
'type'
]
==
"tensor"
)
assert
(
x
[
'shape'
]
==
y
[
'shape'
])
assert
(
x
[
'dtype'
]
==
y
[
'dtype'
])
assert
(
r
[
'type'
]
==
"str"
)
assert
(
r
[
'value'
]
in
[
"none"
,
"mean"
,
"sum"
])
self
.
shape
=
x
[
'shape'
]
self
.
type
=
x
[
'dtype'
]
self
.
red
=
r
[
'value'
]
self
.
dir
=
d
.
dir
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
),
(
'red'
,
self
.
red
)])
return
p
def
elems
(
self
):
red
=
self
.
red
e
=
Utility
.
numElems
(
self
.
shape
)
if
self
.
dir
==
"fprop"
:
if
red
==
"none"
:
e
*=
3
else
:
e
*=
2
else
:
if
red
==
"none"
:
e
*=
4
else
:
e
*=
3
return
e
def
bytes
(
self
):
return
self
.
elems
()
*
Utility
.
typeToBytes
(
self
.
type
)
def
flops
(
self
):
return
self
.
elems
()
*
2
+
1
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
apex/pyprof/prof/misc.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
Foo
(
OperatorLayerBase
):
"""
An object of Foo is instantiated when we detect an unsupported operator.
"""
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
shapes
=
[]
types
=
[]
for
arg
in
args
:
if
arg
[
'type'
]
==
"tensor"
:
shapes
.
append
(
arg
[
'shape'
])
types
.
append
(
arg
[
'dtype'
])
self
.
shape
=
shapes
self
.
type
=
types
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
flops
(
self
):
return
0
def
bytes
(
self
):
return
0
class
Copy
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"copy_"
)
assert
(
len
(
args
)
==
2
)
dst
,
src
=
args
assert
(
src
[
'type'
]
==
dst
[
'type'
])
assert
(
src
[
'shape'
]
==
dst
[
'shape'
])
self
.
shape
=
src
[
'shape'
]
self
.
stype
=
src
[
'dtype'
]
self
.
dtype
=
dst
[
'dtype'
]
def
params
(
self
):
#The data type might be different
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'stype'
,
self
.
stype
),
(
'dtype'
,
self
.
dtype
)])
return
p
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
flops
(
self
):
return
0
def
elems
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
def
bytes
(
self
):
return
self
.
elems
()
*
(
Utility
.
typeToBytes
(
self
.
stype
)
+
Utility
.
typeToBytes
(
self
.
dtype
))
class
Clone
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"clone"
)
assert
(
len
(
args
)
==
1
)
t
=
args
[
0
]
self
.
shape
=
t
[
'shape'
]
self
.
type
=
t
[
'dtype'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
elems
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
def
bytes
(
self
):
return
2
*
self
.
elems
()
*
Utility
.
typeToBytes
(
self
.
type
)
class
Contiguous
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"contiguous"
)
assert
(
len
(
args
)
==
1
)
t
=
args
[
0
]
self
.
shape
=
t
[
'shape'
]
self
.
type
=
t
[
'dtype'
]
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
flops
(
self
):
return
0
def
bytes
(
self
):
return
2
*
Utility
.
numElems
(
self
.
shape
)
*
Utility
.
typeToBytes
(
self
.
type
)
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
class
Any
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
mod
==
"Tensor"
)
assert
(
op
==
"any"
)
assert
(
len
(
args
)
==
1
)
#could be 2 as well, the second argument is a bool
t
=
args
[
0
]
self
.
shape
=
t
[
'shape'
]
self
.
type
=
t
[
'dtype'
]
self
.
sub
=
d
.
sub
return
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
tc
(
self
):
return
"-"
def
flops
(
self
):
return
0
def
bytes
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
*
Utility
.
typeToBytes
(
self
.
type
)
apex/pyprof/prof/normalization.py
deleted
100644 → 0
View file @
87fc4125
from
collections
import
OrderedDict
from
.utility
import
Utility
from
.base
import
OperatorLayerBase
class
BatchNorm
(
OperatorLayerBase
):
def
__init__
(
self
,
d
):
marker
=
eval
(
d
.
argMarker
[
0
])
mod
=
marker
[
'mod'
]
op
=
marker
[
'op'
]
args
=
marker
[
'args'
]
self
.
marker
=
marker
self
.
mod_
=
mod
self
.
op_
=
op
self
.
args
=
args
assert
(
op
==
"batch_norm"
)
assert
(
len
(
args
)
==
8
)
i
=
args
[
0
]
assert
(
i
[
'type'
]
==
"tensor"
)
self
.
shape
=
i
[
'shape'
]
self
.
type
=
i
[
'dtype'
]
self
.
dir
=
d
.
dir
def
params
(
self
):
p
=
OrderedDict
([(
'T'
,
self
.
shape
),
(
'type'
,
self
.
type
)])
return
p
def
tc
(
self
):
return
"-"
def
op
(
self
):
return
self
.
op_
def
mod
(
self
):
return
self
.
mod_
def
elems
(
self
):
return
Utility
.
numElems
(
self
.
shape
)
def
flops
(
self
):
# Variance algo-dependent, but this is a reasonable value.
return
self
.
elems
()
*
8
def
bytes
(
self
):
e
=
self
.
elems
()
if
self
.
dir
==
"fprop"
:
e
*=
4
else
:
e
*=
5
return
e
*
Utility
.
typeToBytes
(
self
.
type
)
Prev
1
…
3
4
5
6
7
8
9
10
11
12
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment