Unverified Commit 8a7a3325 authored by Masaki Kozuki's avatar Masaki Kozuki Committed by GitHub
Browse files

Remove `pyprof` and `reparameterization` (#1404)

* remove pyprof

* remove reparameterization

* remove pyprof test

* clean up
parent cd499737
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
import numpy as np
TC_GEMMS = ["884gemm", "1688gemm"]
class Addmm(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod in ["torch", "Tensor",])
assert (op in ["addmm", "addmm_",])
#Get alpha and beta
alpha = 1
beta = 1
if any(x['name'] == 'alpha' for x in args):
alpha = list(filter(lambda x : x['name'] == "alpha", args))[0]
alpha = alpha['value']
if any(x['name'] == 'beta' for x in args):
beta = list(filter(lambda x : x['name'] == "beta", args))[0]
beta = beta['value']
self.alpha = alpha
self.beta = beta
#Filter out named parameters
args = list(filter(lambda x : x['name'] == '', args))
assert (len(args) == 3)
C,A,B = args
m,k1 = A['shape']
k2,n = B['shape']
assert (k1 == k2)
t1 = A['dtype']
t2 = B['dtype']
t3 = C['dtype']
assert(t1 == t2 == t3)
self.A = A
self.B = B
self.C = C
self.m = m
self.n = n
self.k = k1
self.type = t1
self.name = d.name
return
def tc(self):
for s in TC_GEMMS:
if s in self.name:
return 1
return 0
def bytes(self):
m, n, k = self.m, self.n, self.k
return Utility.typeToBytes(self.type) * (m*n + m*k + n*k)
def flops(self):
return self.m * self.n * self.k * 2
def op(self):
return self.op_
def mod(self):
return self.mod_
def params(self):
p = OrderedDict([('M',self.n),('N',self.m),('K',self.k),('type',self.type)])
return p
class Bmm(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch") and (op == "bmm")
#Filter out named params (kwargs)
args = list(filter(lambda x : x['name'] == "", args))
assert (len(args) == 2)
A,B = args
b1,m,k1 = A['shape']
b2,k2,n = B['shape']
assert (b1 == b2)
assert (k1 == k2)
t1 = A['dtype']
t2 = B['dtype']
assert(t1 == t2)
self.A = A
self.B = B
self.b = b1
self.m = m
self.n = n
self.k = k1
self.type = t1
self.name = d.name
def tc(self):
for s in TC_GEMMS:
if s in self.name:
return 1
return 0
def params(self):
#p = OrderedDict([('A', A['shape']), ('B', B['shape']), ('type', t1)])
p = OrderedDict([('B',self.b), ('M',self.n),('N',self.m),('K',self.k),('type',self.type)])
return p
def flops(self):
return self.b * self.m * self.n * self.k * 2
def bytes(self):
b, m, n, k = self.b, self.m, self.n, self.k
return Utility.typeToBytes(self.type) * b * (m*n + m*k + n*k)
def op(self):
return self.op_
def mod(self):
return self.mod_
class Matmul(OperatorLayerBase):
NON_GEMM = ["kernelPointwiseApply2", "reduce_1Block_kernel", "elementwise_kernel"]
NON_TC = NON_GEMM + ["dot_kernel"]
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
self.name = d.name
self.sub = d.sub
assert ((mod == "torch") and (op == "matmul")) or ((mod == "Tensor") and (op == "__matmul__"))
assert (len(args) == 2)
assert any([x in d.name for x in Matmul.NON_TC + ["gemm", "gemv"]])
A,B = args
t1 = A['dtype']
t2 = B['dtype']
assert(t1 == t2)
A = A['shape']
B = B['shape']
self.A = A
self.B = B
self.type = t1
# batch, MNK
if (len(A) == 1) and (len(B) == 1):
#dot product
assert (A[0] == B[0])
self.b = (1,)
self.m = 1
self.n = 1
self.k = A[0]
elif (len(A) == 2) and (len(B) == 2):
#gemm
m,k1 = A
k2,n = B
assert(k1 == k2)
self.b = (1,)
self.m = m
self.n = n
self.k = k1
elif (len(A) == 1) and (len(B) == 2):
#vector matrix
k1 = A[0]
k2,n = B
assert(k1 == k2)
self.b = (1,)
self.m = 1
self.n = n
self.k = k1
elif (len(A) == 2) and (len(B) == 1):
#gemv
m,k1 = A
k2 = B[0]
assert (k1 == k2)
self.b = (1,)
self.m = m
self.n = 1
self.k = k1
elif (len(A) == 1) and (len(B) > 2):
assert (A[0] == B[-2])
self.b = B[0:-2]
self.m = 1
self.n = B[-1]
self.k = B[-2]
elif (len(B) == 1) and (len(A) > 2):
assert (B[0] == A[-1])
self.b = A[0:-2]
self.m = A[-2]
self.n = 1
self.k = A[-1]
else:
assert (len(A) >= 2)
assert (len(B) >= 2)
assert (A[-1] == B[-2])
self.m = A[-2]
self.n = B[-1]
self.k = A[-1]
aa = np.empty(A[0:-2])
bb = np.empty(B[0:-2])
self.b = np.broadcast(aa, bb).shape
def params(self):
return OrderedDict([('A', self.A), ('B', self.B), ('type', self.type)])
def tc(self):
if self.name in Matmul.NON_TC:
return "-"
else:
for s in TC_GEMMS:
if s in self.name:
return 1
return 0
def bytes(self):
# TODO: check bytes for non-GEMM cases
if self.name in Matmul.NON_GEMM:
return 2 * Utility.typeToBytes(self.type) * Utility.numElems(self.A) #could be B as well
else:
m, n, k = self.m, self.n, self.k
return Utility.typeToBytes(self.type) * (m*n + m*k + n*k)
def flops(self):
# TODO: calculate actual FLOPs. At least we're not saying it's GEMM FLOPs for now.
if self.name in Matmul.NON_GEMM:
return 0
else:
return Utility.numElems(self.b) * self.m * self.n * self.k * 2
def op(self):
return self.op_
def mod(self):
return self.mod_
class Mm(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch") and (op == "mm")
assert (len(args) == 2)
A,B = args
m,k1 = A['shape']
k2,n = B['shape']
assert (k1 == k2)
t1 = A['dtype']
t2 = B['dtype']
assert(t1 == t2)
self.A = A
self.B = B
self.m = m
self.n = n
self.k = k1
self.type = t1
self.name = d.name
return
def params(self):
p = OrderedDict([('M',self.n),('N',self.m),('K',self.k),('type',self.type)])
return p
def tc(self):
for s in TC_GEMMS:
if s in self.name:
return 1
return 0
def bytes(self):
m, n, k = self.m, self.n, self.k
return Utility.typeToBytes(self.type) * (m*n + m*k + n*k)
def flops(self):
return self.m * self.n * self.k * 2
def op(self):
return self.op_
def mod(self):
return self.mod_
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Conv(OperatorLayerBase):
"""
# N = batch size
# C,H,W = input channels, height, width
# K,P,Q = output channels, height, width
# R,S = filter height, width
# g = groups
"""
#todo: refine winograd and FFT
convAuxList = ["nchwToNhwc", "nhwcToNchw", "OffsetsKernel",]
winoAuxList = ["generateWinogradTilesKernel", "winogradWgradData", "winogradWgradOutput", "winogradWgradDelta"]
fftAuxList = ["compute_gemm_pointers", "flip_filter", "fft2d_r2c_", "fft2d_c2r_", "fft1d_r2c", "fft1d_c2r"]
miscAuxList = ["scaleTensor_kernel",]
convList = ["_s884cudnn_", "_s1688cudnn_", "_scudnn_", "2d_grouped_direct_kernel", "cudnn::detail::implicit_convolve_sgemm", "cudnn::detail::dgrad2d_alg1_1", "cudnn::detail::wgrad_alg0_engine", "cudnn::detail::dgrad_engine", "dgrad_1x1_stride_2x2", "spatialDepthwiseConvolutionUpdateOutput"]
winoList = ["winograd3x3Kernel", "_sgemm_"]
fftList = ["fermiPlusCgemmLDS128_batched", "_gcgemm_",]
miscList = []
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
self.dir = d.dir
self.name = d.name
self.sub = d.sub
assert (mod == "torch.nn.functional")
assert (op in ["conv1d", "conv2d"])
length = len(args)
assert (length >= 2) and (length <= 7)
i,w = args[0], args[1]
assert (i['type'] == "tensor")
assert (w['type'] == "tensor")
#ignore bias
if (length >= 4) and (args[3]['name'] == ""):
s = args[3]
elif any(x['name'] == 'stride' for x in args):
s = list(filter(lambda x : x['name'] == 'stride', args))[0]
else:
s = {'name': 'stride', 'type': 'int', 'value': 1}
if (length >= 5) and (args[4]['name'] == ""):
p = args[4]
elif any(x['name'] == 'padding' for x in args):
p = list(filter(lambda x : x['name'] == 'padding', args))[0]
else:
p = {'name': 'padding', 'type': 'int', 'value': 0}
if (length >= 6) and (args[5]['name'] == ""):
d = args[5]
elif any(x['name'] == 'dilation' for x in args):
d = list(filter(lambda x : x['name'] == 'dilation', args))[0]
else:
d = {'name': 'dilation', 'type': 'int', 'value': 1}
if (length == 7) and (args[6]['name'] == ""):
g = args[6]
elif any(x['name'] == 'groups' for x in args):
g = list(filter(lambda x : x['name'] == 'groups', args))[0]
else:
g = {'name': 'groups', 'type': 'int', 'value': 1}
if op == "conv1d":
assert (len(i['shape']) == 3)
assert (len(w['shape']) == 3)
assert (i['dtype'] == w['dtype'])
N, C1, W = i['shape']
K, C2, S = w['shape']
assert (C1 == C2)
p = p['value'] if Utility.isscalar(p['type']) else p['value'][0]
s = s['value'] if Utility.isscalar(s['type']) else s['value'][0]
d = d['value'] if Utility.isscalar(d['type']) else d['value'][0]
g = g['value']
assert (g == 1)
H = 1
R = 1
P = 1 + (H - (((R-1))+1))
Q = 1 + (W + 2*p - (((S-1)*d)+1))/s
P = int(P)
Q = int(Q)
if (H == 1):
assert (P == 1)
if (W == 1):
assert (Q == 1)
self.N = N
self.C = C1
self.H = H
self.W = W
self.K = K
self.P = P
self.Q = Q
self.R = R
self.S = S
self.ph = 0
self.pw = p
self.U = 1
self.V = s
self.dh = 1
self.dw = d
self.g = g
self.type = i['dtype']
elif op == "conv2d":
assert (len(i['shape']) == 4)
assert (len(w['shape']) == 4)
assert (i['dtype'] == w['dtype'])
N, C1, H, W = i['shape']
K, C2, R, S = w['shape']
if Utility.isscalar(p['type']):
ph = pw = p['value']
else:
assert (p['type'] == "tuple")
ph, pw = p['value']
if Utility.isscalar(s['type']):
sh = sw = s['value']
else:
assert (s['type'] == "tuple")
sh, sw = s['value']
if Utility.isscalar(d['type']):
dh = dw = d['value']
else:
assert (d['type'] == "tuple")
dh, dw = d['value']
g = g['value']
assert (g >= 1)
assert (C1 == C2*g)
P = 1 + (H + 2*ph - (((R-1)*dh)+1))/sh
Q = 1 + (W + 2*pw - (((S-1)*dw)+1))/sw
P = int(P)
Q = int(Q)
if (H == 1):
assert (P == 1)
if (W == 1):
assert (Q == 1)
self.N = N
self.C = C1
self.H = H
self.W = W
self.K = K
self.P = P
self.Q = Q
self.R = R
self.S = S
self.ph = ph
self.pw = pw
self.U = sh
self.V = sw
self.dh = dh
self.dw = dw
self.g = g
self.type = i['dtype']
else:
assert False
def params(self):
p = OrderedDict([('N',self.N), ('C',self.C), ('H',self.H), ('W',self.W), ('K',self.K), ('P',self.P), ('Q',self.Q), ('R',self.R), ('S',self.S), ('ph',self.ph), ('pw',self.pw), ('U',self.U), ('V',self.V), ('dh',self.dh), ('dw',self.dw), ('g',self.g), ('type',self.type)])
return p
def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t):
f = 2*N*K*P*Q*C*R*S/g #for fprop
elems = N*C*H*W + K*C*R*S/g + N*K*P*Q
b = elems * Utility.typeToBytes(t)
return b,f
def bytes_flops(self):
N,C,H,W,K,P,Q,R,S,ph,pw,U,V,dh,dw,g,t = self.params().values()
if any(x in self.name for x in Conv.convAuxList+Conv.winoAuxList+Conv.fftAuxList+Conv.miscAuxList):
bytes, flops = [0, 0]
elif any(x in self.name for x in Conv.convList+Conv.winoList+Conv.fftList+Conv.miscList):
if g == 1:
bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t)
else:
if "2d_grouped_direct_kernel" in self.name: #only 1 kernel is called
bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t)
elif "spatialDepthwiseConvolutionUpdateOutput" in self.name: #one kernel for separable conv
bytes, flops = self.conv_bytes_flops(N,C,H,W,K,P,Q,R,S,g,t)
else: #a kernel per group is called
bytes, flops = self.conv_bytes_flops(N,C/g,H,W,K/g,P,Q,R,S,1,t)
elif ("calc_bias_diff" in self.name): #bias gradient
elems = N*K*P*Q
flops = elems
bytes = 2 * elems * Utility.typeToBytes(t)
#params = OrderedDict([('N',N), ('K',K), ('P',P), ('Q',Q), ('type', t)])
else:
bytes, flops = [0, 0]
return bytes, flops
def bytes(self):
b,_ = self.bytes_flops()
return b
def flops(self):
_,f = self.bytes_flops()
return f
def tc(self):
for s in ["884cudnn", "1688cudnn"]:
if s in self.name:
return 1
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Convert(OperatorLayerBase):
"""
Class to handle convert operations.
"""
ops = ["byte", "char", "double", "float", "half", "int", "long", "short", "to"]
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op in Convert.ops)
assert (len(args) == 1)
#The argument could be a tensor or scalar
t = args[0]
if t['type'] == "tensor":
shape = t['shape']
stype = t['dtype']
else:
shape = (1,)
stype = t['type']
if self.op_ == "to":
op = stype
self.shape = shape
self.stype = stype
self.dtype = op
def params(self):
p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)])
return p
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def elems(self):
return Utility.numElems(self.shape)
def flops(self):
return 0
def bytes(self):
b = self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype))
return b
from .utility import Utility
class Data(object):
"""
Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc.
"""
def __init__(self, kernel):
#Available from NVprof
self.tid = kernel['tid']
self.device = kernel['device']
self.stream = kernel['stream']
self.grid = str(kernel['grid']).replace(" ","").replace("(","").replace(")","")
self.block = str(kernel['block']).replace(" ","").replace("(","").replace(")","")
self.name = kernel['kShortName'].replace(" ","_")
self.lName = kernel['kLongName']
self.sil = kernel['kDuration'] #units ns
self.index = None
#Markers
self.argMarker = kernel['marker']
self.modMarker = kernel['reprMarkers']
self.seqMarker = kernel['seqMarker']
self.layer = kernel['layer']
self.trace = kernel['trace']
self.seqId = kernel['seqId']
self.altSeqId = kernel['altSeqId']
self.dir = kernel['dir']
self.sub = kernel['subSeqId']
self.mod = "na"
self.op = "na"
self.params = {"na":"na"}
self.tc = "na"
self.flops = 0
self.bytes = 0
def setParams(self, params):
#Remove space from params
qaz = ""
for key,value in params.items():
if "type" not in key:
qaz += "{}={},".format(key,value)
else:
if type(value) is str:
qaz += "{},".format(Utility.typeToString(value))
else:
qaz += "{}".format(value)
self.params = qaz.replace(" ", "")
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Dropout(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch.nn.functional")
assert (op == "dropout")
#assert (len(args) == 1)
self.shape = args[0]['shape']
self.type = args[0]['dtype']
self.dir = d.dir
return
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def elems(self):
return Utility.numElems(self.shape)
def bytes(self):
#Ignoring the cost of writing and reading the mask
return Utility.typeToBytes(self.type) * self.elems() * 2
def flops(self):
# Note: This is approximate and depends on the RNG
return 5*self.elems()
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Embedding(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch.nn.functional")
assert (op == "embedding")
self.ishape = args[0]['shape']
self.itype = args[0]['dtype']
self.eshape = args[1]['shape']
self.etype = args[1]['dtype']
assert (len(self.eshape) == 2)
self.dir = d.dir
self.sub = d.sub
return
def params(self):
p = OrderedDict([('I', self.ishape), ('itype', self.itype), ('E', self.eshape), ('etype', self.etype)])
return p
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def bytes(self):
ishape = self.ishape
itype = self.itype
eshape = self.eshape
etype = self.etype
ielems = Utility.numElems(ishape)
b = 0
if self.dir == "fprop":
#indices
b += ielems * Utility.typeToBytes(itype)
#read and write the embedding matrix
b += ielems * eshape[1] * 2 * Utility.typeToBytes(etype)
else:
#3 times the size of the incoming gradient
b = ielems * eshape[1] * 3 * Utility.typeToBytes(etype)
if self.sub > 0:
b = 0
return b
def flops(self):
# Note: not implemented yet
return 0
from collections import OrderedDict
from .utility import Utility
import numpy as np
from .base import OperatorLayerBase
class Cat(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch")
assert (op == "cat")
assert (len(args) >= 2)
t = args[0]['dtype']
shapes = []
for arg in args:
if arg['type'] == "tensor":
assert (arg['dtype'] == t)
shapes.append(arg['shape'])
self.type = t
self.shapes = shapes
def params(self):
p = OrderedDict([('T', self.shapes), ('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
b = 0
for s in self.shapes:
b += Utility.numElems(s)
return 2 * b * Utility.typeToBytes(self.type)
class Reshape(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "reshape")
#Temporarily commenting three lines
#assert (len(args) == 2)
#t,s = args
#assert s['type'] == "tuple"
t = args[0]
assert t['type'] == "tensor"
self.type = t['dtype']
self.shape = t['shape']
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
return 0
class Gather(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor") or (mod == "torch")
assert (op == "gather")
#Filter out the "out" parameter
args = list(filter(lambda x : x['name'] != 'out', args))
assert (len(args) == 3)
#Get input
if (args[0]['name'] == ""):
arg = args[0]
else:
arg = list(filter(lambda x : x['name'] == "input", args))[0]
assert (arg['type'] == "tensor")
self.shape = arg['shape']
self.type = arg['dtype']
def params(self):
p = OrderedDict([('T', self.shape),('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
return 2 * Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
class MaskedScatter(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "masked_scatter_")
assert (len(args) == 3)
dst, mask, src = args
assert (dst['type'] == mask['type'] == src['type'] == "tensor")
assert (mask['dtype'] == "uint8")
assert (dst['dtype'] == src['dtype'])
assert (dst['shape'] == mask['shape'])
self.shape = dst['shape']
self.type = dst['dtype']
self.seqId = d.seqId
def params(self):
p = OrderedDict([('T', self.shape),('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
elems = Utility.numElems(self.shape)
#src and dst
b = 2 * elems * Utility.typeToBytes(self.type)
#mask (uint8)
b += elems
if (self.seqId > 0):
b = 0
return b
class Nonzero(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod in ["torch", "Tensor"])
assert (op == "nonzero")
assert (len(args) == 1)
arg = args[0]
self.shape = arg['shape']
self.type = arg['dtype']
self.seqId = d.seqId
def params(self):
p = OrderedDict([('T', self.shape),('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
elems = Utility.numElems(self.shape)
dim = len(self.shape)
#input tensor
b = elems * Utility.typeToBytes(self.type)
#in the worst case, the output is a (elems x dim) tensor of type "long"
b += elems * dim * Utility.typeToBytes("int64")
if self.seqId > 0:
return 0
else:
return b
class IndexSelect(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor") or (mod == "torch")
assert (op == "index_select")
#Filter out the "out" parameter
args = list(filter(lambda x : x['name'] != 'out', args))
assert (len(args) == 3)
#Get input, dim and index
if (args[0]['name'] == ""):
t = args[0]
else:
t = list(filter(lambda x : x['name'] == "input", args))[0]
if (args[1]['name'] == ""):
d = args[1]
else:
d = list(filter(lambda x : x['name'] == "dim", args))[0]
if (args[2]['name'] == ""):
i = args[2]
else:
i = list(filter(lambda x : x['name'] == "index", args))[0]
assert (t['type'] == i['type'] == "tensor")
assert (d['type'] == "int")
assert (i['dtype'] == "int64")
assert (len(i['shape']) == 1)
shape = t['shape']
dim = d['value']
indices = i['shape'][0]
assert (dim < len(shape))
self.shape = shape
self.dim = dim
self.indices = indices
self.type = t['dtype']
def params(self):
p = OrderedDict([('T', self.shape),('D', self.dim),('I', self.indices),('type', self.type)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def flops(self):
return 0
def bytes(self):
#determine the shape of the output tensor
shape = list(self.shape)
shape[self.dim] = self.indices
b = 0
#time to read the input and write the output
elems = Utility.numElems(shape)
b += 2 * elems * Utility.typeToBytes(self.type)
#time to read the indices
b += self.indices * Utility.typeToBytes("int64")
return b
class MaskedSelect(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
self.sub = d.sub
assert (mod == "Tensor") or (mod == "torch")
assert (op == "masked_select")
#Filter out the "out" parameter
args = list(filter(lambda x : x['name'] != 'out', args))
assert (len(args) == 2)
#Get input and mask
if (args[0]['name'] == ""):
t = args[0]
else:
t = list(filter(lambda x : x['name'] == "input", args))[0]
if (args[1]['name'] == ""):
m = args[1]
else:
m = list(filter(lambda x : x['name'] == "mask", args))[0]
assert (m['dtype'] == "uint8")
tensor = t['shape']
mask = m['shape']
#check for broadcast condition
if (tensor != mask):
array1 = np.empty(list(tensor))
array2 = np.empty(list(mask))
try:
out = np.broadcast(array1, array2).shape
except:
assert False
self.tshape = tensor
self.mshape = mask
self.type = t['dtype']
def params(self):
p = OrderedDict([('T', self.tshape),('M', self.mshape),('type', self.type)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
tensor = self.tshape
mask = self.mshape
t = self.type
#in the worst case, #output elements = #input elements
b = 2 * Utility.numElems(tensor) * Utility.typeToBytes(t)
#mask tensor (assuming uint8)
b += Utility.numElems(mask)
return b
def flops(self):
return 0
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Linear(OperatorLayerBase):
'''
Notes:
If the bias occurs before the GEMM, then its 1 write (bias expansion).
If the bias occurs after, then its 1 read and 1 write.
bias in bprop is a reduction and hence is 1 read.
'''
gemmKernels = ["gemm", "gemv", "dot_kernel", "splitKreduce_kernel", "reduce_1Block_kernel"]
biasKernels = ["kernelReduceContigDim", "kernelReduceNoncontigDim_shared", "elementwise_kernel", "reduce_kernel"]
def setXWBMNK(self, args):
x = None
w = None
b = None
if (len(args) == 2):
x,w = args
elif (len(args) == 3):
x,w,b = args
assert (x['type'] == w['type'] == "tensor")
if (b['type'] == "tensor"):
assert(len(b['shape']) == 1)
elif (b['type'] == "NoneType"):
assert b['value'] is None
b = None
else:
assert False
else:
assert False
assert(len(w['shape']) == 2)
k1 = x['shape'][-1]
n,k2 = w['shape']
assert(k1 == k2)
if b is not None:
assert(b['shape'][0] == n)
t1 = x['dtype']
t2 = w['dtype']
assert(t1 == t2)
# X, W, B
self.x = x['shape']
self.w = w['shape']
self.b = b['shape'] if b is not None else None
self.type = t1
# M, N, K
#n = Utility.numElems(x[0:-1])
n = self.x[0:-1]
k = self.x[-1]
m,k1 = self.w
assert (k == k1)
self.m = m
self.n = n
self.k = k
def tc(self):
if self.op() == "linear":
return 1 if "884gemm" in self.name else 0
else:
return "-"
def __init__(self, d):
self.name = d.name
self.dir = d.dir
self.sub = d.sub
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
assert (mod == "torch.nn.functional")
assert (op == "linear")
self.setXWBMNK(args)
if any(x in d.name for x in Linear.gemmKernels):
self.op_ = "linear"
else:
assert (d.name in Linear.biasKernels)
self.op_ = "bias"
'''
elif (("kernelPointwiseApply2" in d.name) or ("kernelReduceContigDim" in d.name) or ("kernelReduceNoncontigDim_shared" in d.name)):
#bias expansion was before the gemm
self.op_ = "bias"
elif ("elementwise_kernel" in d.name):
#Bias addition happens later with a broadcast tensor
self.op_ = "bias"
assert (len(d.argMarker) == 2)
marker = eval(d.argMarker[1])
mod = marker['mod']
op = marker['op']
args = marker['args']
assert (mod == "Tensor")
assert (op == "__iadd__")
assert (len(args) == 2)
mn = args[0]['shape']
b = args[1]['shape']
assert (len(b) == 1)
assert (mn == (self.n + (self.m,)))
assert (b == self.b)
else:
assert False
'''
def params(self):
#p = OrderedDict([('X', self.x), ('W', self.w), ('B', self.b), ('type', self.type)])
m, n, k, x, w, t = self.m, self.n, self.k, self.x, self.w, self.type
if len(n) == 1:
n = n[0]
if self.op_ == "linear":
if self.dir == "fprop":
p = OrderedDict([('M', m), ('N', n), ('K', k), ('type', t)])
elif self.dir == "bprop":
if self.sub == 0: #dgrad (most likely)
p = OrderedDict([('M', k), ('N', n), ('K', m), ('type', t)])
elif self.sub == 1: #wgrad (most likely)
p = OrderedDict([('M', k), ('N', m), ('K', n), ('type', t)])
else:
#This happens when there are additional kernels for reduction
p = OrderedDict([('X', x), ('W', w), ('type', t)])
else:
assert False
elif self.op_ == "bias":
p = OrderedDict([('M', m), ('N', n), ('type', t)])
else:
assert False
return p
def op(self):
return self.op_
def bytesFlops(self):
m = self.m
n = Utility.numElems(self.n)
k = self.k
if self.op_ == "linear":
if self.dir == "fprop":
f = m * n * k * 2
b = m*n + m*k + n*k * Utility.typeToBytes(self.type)
elif self.dir == "bprop":
if self.sub == 0: #dgrad (most likely)
f = m * n * k * 2
b = m*n + m*k + n*k * Utility.typeToBytes(self.type)
elif self.sub == 1: #wgrad (most likely)
f = m * n * k * 2
b = m*n + m*k + n*k * Utility.typeToBytes(self.type)
else:
#This happens when there are additional kernels for reduction
f = 0
b = 0
else:
assert False
elif self.op_ == "bias":
f = m * n
b = 2 * m * n * Utility.typeToBytes(self.type)
else:
assert False
return b,f
def bytes(self):
b, f = self.bytesFlops()
return b
def flops(self):
b, f = self.bytesFlops()
return f
def mod(self):
return self.mod_
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
#TODO: Add support for additional loss functions.
class MSELoss(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch.nn.functional")
assert (op == "mse_loss")
assert (len(args) == 3)
#Get input, target and reduction
if (args[0]['name'] == ""):
x = args[0]
else:
x = list(filter(lambda x : x['name'] == "input", args))[0]
if (args[1]['name'] == ""):
y = args[1]
else:
y = list(filter(lambda x : x['name'] == "target", args))[0]
if (args[2]['name'] == ""):
r = args[2]
else:
r = list(filter(lambda x : x['name'] == "reduction", args))[0]
assert (x['type'] == y['type'] == "tensor")
assert (x['shape'] == y['shape'])
assert (x['dtype'] == y['dtype'])
assert (r['type'] == "str")
assert (r['value'] in ["none", "mean", "sum"])
self.shape = x['shape']
self.type = x['dtype']
self.red = r['value']
self.dir = d.dir
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type), ('red', self.red)])
return p
def elems(self):
red = self.red
e = Utility.numElems(self.shape)
if self.dir == "fprop":
if red == "none":
e *= 3
else:
e *= 2
else:
if red == "none":
e *= 4
else:
e *= 3
return e
def bytes(self):
return self.elems() * Utility.typeToBytes(self.type)
def flops(self):
return self.elems() * 2 + 1
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Foo(OperatorLayerBase):
"""
An object of Foo is instantiated when we detect an unsupported operator.
"""
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
shapes = []
types = []
for arg in args:
if arg['type'] == "tensor":
shapes.append(arg['shape'])
types.append(arg['dtype'])
self.shape = shapes
self.type = types
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def flops(self):
return 0
def bytes(self):
return 0
class Copy(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "copy_")
assert (len(args) == 2)
dst, src = args
assert (src['type'] == dst['type'])
assert (src['shape'] == dst['shape'])
self.shape = src['shape']
self.stype = src['dtype']
self.dtype = dst['dtype']
def params(self):
#The data type might be different
p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def flops(self):
return 0
def elems(self):
return Utility.numElems(self.shape)
def bytes(self):
return self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype))
class Clone(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "clone")
assert (len(args) == 1)
t = args[0]
self.shape = t['shape']
self.type = t['dtype']
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def flops(self):
return 0
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def elems(self):
return Utility.numElems(self.shape)
def bytes(self):
return 2 * self.elems() * Utility.typeToBytes(self.type)
class Contiguous(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "contiguous")
assert (len(args) == 1)
t = args[0]
self.shape = t['shape']
self.type = t['dtype']
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def flops(self):
return 0
def bytes(self):
return 2 * Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
class Any(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "Tensor")
assert (op == "any")
assert (len(args) == 1) #could be 2 as well, the second argument is a bool
t = args[0]
self.shape = t['shape']
self.type = t['dtype']
self.sub = d.sub
return
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def flops(self):
return 0
def bytes(self):
return Utility.numElems(self.shape) * Utility.typeToBytes(self.type)
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class BatchNorm(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (op == "batch_norm")
assert (len(args) == 8)
i = args[0]
assert (i['type'] == "tensor")
self.shape = i['shape']
self.type = i['dtype']
self.dir = d.dir
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def elems(self):
return Utility.numElems(self.shape)
def flops(self):
# Variance algo-dependent, but this is a reasonable value.
return self.elems() * 8
def bytes(self):
e = self.elems()
if self.dir == "fprop":
e *= 4
else:
e *= 5
return e * Utility.typeToBytes(self.type)
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
#TODO: Add support for other optimizers.
class Adam(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert(op == "adam")
assert (len(args) == 12) or (len(args) == 14)
w, hw, m, v, g = args[0:5]
assert (w['shape'] == m['shape'] == v['shape'] == g['shape'])
assert (hw['shape'] == w['shape']) or (hw['shape'] == (0,)) #hw could be null
assert (w['type'] == m['type'] == v['type'] == g['type'] == hw['type'] == "tensor")
assert (w['dtype'] == m['dtype'] == v['dtype'] == "float32")
self.w = w
self.g = g
def params(self):
p = OrderedDict([('T',self.w['shape']), ('wtype',self.w['dtype']), ('gtype',self.g['dtype'])])
return p
def flops(self):
return 0
def bytes(self):
wshape = self.w['shape']
wtype = self.w['dtype']
gtype = self.g['dtype']
b = 0
elems = Utility.numElems(wshape)
#Get time to stream read/write w, m, v
b += 6 * elems * Utility.typeToBytes(wtype)
#Get time to read "g"
b += elems * Utility.typeToBytes(gtype)
if wtype != gtype: #mixed precision
#Get time to write "hw
b += elems * Utility.typeToBytes(gtype)
return b
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
import errno, os, sys
class Output():
"""
This class handles printing of a columed output and a CSV.
"""
# The table below is organized as
# user_option: [output_header, attribute_in_Data_class, type, min_width_in_columed_output]
table = {
"idx": ["Idx", "index", int, 7],
"seq": ["SeqId", "seqId", str, 7],
"altseq": ["AltSeqId", "altSeqId", str, 7],
"tid": ["TId", "tid", int, 12],
"layer": ["Layer", "layer", str, 10],
"trace": ["Trace", "trace", str, 25],
"dir": ["Direction", "dir", str, 5],
"sub": ["Sub", "sub", int, 3],
"mod": ["Module", "mod", str, 15],
"op": ["Op", "op", str, 15],
"kernel": ["Kernel", "name", str, 0],
"params": ["Params", "params", str, 0],
"sil": ["Sil(ns)", "sil", int, 10],
"tc": ["TC", "tc", str, 2],
"device": ["Device", "device", int, 3],
"stream": ["Stream", "stream", int, 3],
"grid": ["Grid", "grid", str, 12],
"block": ["Block", "block", str, 12],
"flops": ["FLOPs", "flops", int, 12],
"bytes": ["Bytes", "bytes", int, 12]
}
def __init__(self, args):
self.cols = args.c
self.csv = args.csv
self.col = True if (args.w > 0) else False
self.width = args.w
w = 0
for col in self.cols:
assert col in Output.table.keys()
w += Output.table[col][3]
if ((self.col) and (w > self.width)):
print("Minimum width required to print {} = {}. Exiting.".format(",".join(self.cols), w))
sys.exit(1)
remainder = self.width - w
if ("kernel" in self.cols) and ("params" in self.cols):
Output.table["kernel"][3] = int(remainder/2)
Output.table["params"][3] = int(remainder/2)
elif ("kernel" in self.cols):
Output.table["kernel"][3] = remainder
elif ("params" in self.cols):
Output.table["params"][3] = remainder
#header format
cadena = ""
for col in self.cols:
_,_,t,w = Output.table[col]
cadena += "%-{}.{}s ".format(w,w)
self.hFormat = cadena
#data format
cadena = ""
for col in self.cols:
_,_,t,w = Output.table[col]
if (t == str):
cadena += "%-{}.{}s ".format(w,w)
elif (t == int):
cadena += "%{}d ".format(w)
self.dFormat = cadena
def foo(self, cadena, pformat):
if self.csv:
cadena = ",".join(map(lambda x : '"' + str(x) + '"', cadena))
elif self.col:
cadena = pformat % cadena
else:
cadena = " ".join(map(str,cadena))
try:
print(cadena)
except IOError as e:
#gracefully handle pipes
if e.errno == errno.EPIPE:
# Python flushes standard streams on exit; redirect remaining output
# to devnull to avoid another BrokenPipeError at shutdown
devnull = os.open(os.devnull, os.O_WRONLY)
os.dup2(devnull, sys.stdout.fileno())
sys.exit(0)
else:
sys.exit(-1)
def header(self):
cadena = ()
for col in self.cols:
h = Output.table[col][0]
cadena = cadena + (h,)
self.foo(cadena, self.hFormat)
def data(self, a):
if a.dir == "":
direc = "na"
else:
direc = a.dir
if a.op == "":
op = "na"
else:
op = a.op
if a.mod == "":
mod = "na"
else:
mod = a.mod
cadena = ()
for col in self.cols:
attr = Output.table[col][1]
val = getattr(a, attr)
if col == "layer":
assert(type(val) == list)
val = ":".join(val)
val = "-" if val == "" else val
if col == "trace":
assert(type(val) == list)
if self.col and len(val):
val = val[-1]
val = val.split("/")[-1]
else:
val = ",".join(val)
val = "-" if val == "" else val
if col in ["seq", "altseq"]:
assert(type(val) == list)
val = ",".join(map(str,val))
val = "-" if val == "" else val
cadena = cadena + (val,)
self.foo(cadena, self.dFormat)
import numpy as np
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Pointwise(OperatorLayerBase):
ops = []
ops += ["__abs__", "__neg__", "__invert__"]
ops += ["__add__", "__sub__", "__mul__", "__floordiv__", "__truediv__", "__pow__", "__mod__"]
ops += ["__radd__", "__rsub__", "__rmul__", "__rdiv__", "__rtruediv__", "__rfloordiv__", "__rpow__"]
ops += ["__iadd__", "__isub__", "__imul__", "__itruediv__",]
ops += ["__lt__", "__gt__", "__ge__", "__le__", "__eq__", "__ne__",]
ops += ["lt", "lt_", "gt", "gt_", "ge", "ge_", "le", "le_", "eq", "eq_", "ne", "ne_",]
ops += ["__and__", "__or__", "__xor__", "__lshift__", "__rshift__"]
ops += ["__iand__", "__ior__", "__ixor__", "__ilshift__", "__irshift__"]
ops += ["abs", "abs_", "neg", "neg_"]
ops += ["add", "add_", "div", "div_", "mul", "mul_", "reciprocal", "reciprocal_", "remainder", "remainder_", "sub", "sub_",]
ops += ["addcdiv", "addcdiv_", "addcmul", "addcmul_"]
ops += ["exp", "exp_", "exp1m", "exp1m_", "log", "log_", "log10", "log10_", "log1p", "log1p_", "log2", "log2_", "pow", "pow_", "rsqrt", "rsqrt_", "sqrt", "sqrt_",]
ops += ["ceil", "ceil_", "clamp", "clamp_", "floor", "floor_", "fmod", "fmod_", "frac", "frac_", "round", "round_", "sign", "sign_", "trunc", "trunc_"]
ops += ["acos", "acos_", "asin", "asin_", "atan", "atan_", "atan2", "atan2_", "cos", "cos_", "cosh", "cosh_", "sin", "sin_", "sinh", "sinh_", "tan", "tan_", "sigmoid", "sigmoid_", "tanh", "tanh_"]
ops += ["digamma", "erf", "erf_", "erfc", "erfc_", "erfinv", "erfinv_", "lerp", "lerp_", "mvlgamma",]
@staticmethod
def foo(d):
return d['name'],d['type'],d['shape'],d['dtype']
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
self.dir = d.dir
assert (d.dir in ["fprop", "bprop"])
assert (op in Pointwise.ops)
#Filter out all named parameters (kwargs).
#This might require revisiting in future.
args = list(filter(lambda x : x['name'] == "", args))
#Filter out non tensors
args = list(filter(lambda x : x['type'] == "tensor", args))
if (len(args) == 0):
self.shape = [(1,)]
self.type = "float32" #FIX
elif (len(args) == 1):
in0 = args[0]
_,t0,s0,dt0 = Pointwise.foo(in0)
assert (t0 == "tensor")
self.shape = [s0,]
self.type = dt0
elif (len(args) == 2):
in0,in1 = args
_,t0,s0,dt0 = Pointwise.foo(in0)
_,t1,s1,dt1 = Pointwise.foo(in1)
assert (t0 == t1 == "tensor")
assert (dt0 == dt1)
self.shape = [s0,s1]
self.type = dt0
elif (len(args) == 3):
in0,in1,in2 = args
_,t0,s0,dt0 = Pointwise.foo(in0)
_,t1,s1,dt1 = Pointwise.foo(in1)
_,t2,s2,dt2 = Pointwise.foo(in2)
assert (t0 == t1 == t2 == "tensor")
assert (dt0 == dt1 == dt2)
self.shape = [s0,s1,s2]
self.type = dt0
else:
assert False
return
def params(self):
p = OrderedDict([('T',self.shape), ('type', self.type)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def elems(self):
tensor = self.shape
t = self.type
if (len(tensor) == 1):
elems = 2 * Utility.numElems(tensor[0])
elif (len(tensor) == 2):
if (tensor[0] == tensor[1]): # same shape
elems = Utility.numElems(tensor[0])
if self.dir == "fprop":
elems *= 3
else:
if (self.op_ in ["add", "__add__", "sub", "__sub__", "__isub__"]):
elems *= 2
elif (self.op_ in ["__mul__", "__rmul__", "div", "__truediv__"]):
elems *= 3
else:
assert False
else: #check for broadcast conditions
array1 = np.empty(list(tensor[0]))
array2 = np.empty(list(tensor[1]))
try:
out = np.broadcast(array1, array2).shape
except:
assert False
elems = Utility.numElems(tensor[0])
elems += Utility.numElems(tensor[1])
elems += Utility.numElems(out)
#TODO bprop
elif (len(tensor) == 3):
if (tensor[0] == tensor[1] == tensor[2]): #same shape
elems = Utility.numElems(tensor[0])
elems *= 4
else:
assert False
else:
assert False
return elems
def bytes(self):
return self.elems() * Utility.typeToBytes(self.type)
def flops(self):
# Note: some cases may still be missing.
f = 0
if self.op_ in ["__abs__", "__neg__", "__add__", "__sub__", "__mul__",
"__radd__", "__rmul__", "__iadd__", "__isub__", "__imul__", "__itruediv__",
"abs", "abs_", "neg", "neg_", "add", "add_", "div", "div_", "mul", "mul_",
"sub", "sub_", "exp", "exp_", "sign", "sign_", "trunc", "trunc_",
"sin", "sin_", "cos", "cos_", "sinh", "sinh_", "cosh", "cosh_",
"sqrt", "sqrt_", "rsqrt", "rsqrt_", "__lt__", "__gt__", "__ge__", "__le__",
"__eq__", "__ne__", "lt", "lt_", "gt", "gt_", "ge", "ge_", "le", "le_",
"eq", "eq_", "ne", "ne_", "ceil", "ceil_", "clamp", "clamp_", "floor", "floor_",
"round", "sign", "sign_", "trunc", "trunc_"]:
# We're counting only one operand, not two (2 operands, 1 op)
f = self.elems() / 2
elif self.op_ in ["fmod", "fmod_"]:
f = self.elems()
elif self.op_ in ["tanh", "tanh_", "sigmoid", "sigmoid_", "log", "log_", "log2",
"log2_", "log10", "log10_"]:
f = self.elems() * 2
elif self.op_ in ["asin", "asin_", "acos", "acos_", "atan", "atan_"]:
# no intrinsic, hence slow execution
# surprisingly, asin/acos and atan were all the same (via nvprof measurement)
f = self.elems() * 10
return f
from .collections import OrderedDict
from .utility import Utility
# Work in progress.
#poolFuncs = ["max_pool2d_with_indices_forward", "max_pool2d_with_indices"]
class MaxPool2d(object):
def parse(marker):
def convert2Tuple(arg):
assert (arg['type'] in ["int", "tuple"])
if arg['type'] == "int":
return (arg['value'], arg['value'])
else:
return arg['value']
mod = marker['mod']
op = marker['op']
args = marker['args']
assert (mod == "torch.nn.functional")
assert (op == "max_pool2d")
assert (len(args) >= 2)
#input
assert (args[0]['name'] == "")
inp = args[0]
assert (inp['type'] == "tensor")
i = inp['shape']
t = inp['dtype']
assert (len(i) == 4) #nchw tensor
#kernel
if (args[1]['name'] == ""):
k = args[1]
else:
k = list(filter(lambda x : x['name'] == "kernel_size", args))[0]
k = convert2Tuple(k)
#stride
s = k #default value
if ((len(args) >= 3) and args[2] == ""):
s = args[2]
s = convert2Tuple(s)
elif any(x['name'] == "stride" for x in args):
s = list(filter(lambda x : x['name'] == "stride", args))[0]
s = convert2Tuple(s)
#padding
p = (0,0)
if ((len(args) >= 4) and args[3] == ""):
p = args[3]
p = convert2Tuple(p)
elif any(x['name'] == "padding" for x in args):
p = list(filter(lambda x : x['name'] == "padding", args))[0]
p = convert2Tuple(p)
params = OrderedDict([('T', i), ('K', k), ('s',s), ('p',p), ('type', t)])
return params
#!/usr/bin/env python3
"""
This script reads the output (Python dictionary) created by parse.py.
For every kernel (line) in the input it determines
module / class name e.g. torch.nn.functional
operator name e.g. linear
kernel parameters e.g. GEMM M, N, K, datatype
bytes
flops
tensor core usage
direction (fprop, bprop)
and other things. Please see the tool usage.
"""
from .usage import parseArgs
from .output import Output
from .utility import Utility
from .pointwise import Pointwise
from .convert import Convert
from .blas import *
from .embedding import Embedding
from .reduction import *
from .dropout import Dropout
from .softmax import *
#from pooling import * # work in progress
from .linear import Linear
from .optim import Adam
from .misc import *
from .conv import Conv
from .activation import Activation
from .index_slice_join_mutate import Cat, Reshape, MaskedScatter, Gather, Nonzero, IndexSelect, MaskedSelect
from .recurrentCell import RNNCell
from .normalization import BatchNorm
from .randomSample import RandPerm
from .loss import MSELoss
from .data import Data
def findFpropKernel(seq):
#Find the last fprop kernel with the same seqId
#First look at seqId and then at altSeqId
for idx in reversed(range(len(kernels))):
k = kernels[idx]
if (seq in k['seqId']) and (k['dir'] == "fprop"):
return idx
for idx in reversed(range(len(kernels))):
k = kernels[idx]
if (seq in k['altSeqId']) and (k['dir'] == "fprop"):
return idx
return -1
#print("Error: seqId {} not found.".format(seq), file=sys.stderr)
#assert False
def foo(mod, op, d):
if (op[0] == "linear"):
xx = Linear(d)
# rnncell, lstmcell, grucell
elif (mod[0] in["LSTMCell", "GRUCell"]) and (op[0] == "forward"):
xx = RNNCell(d)
elif op[0] in ["conv1d", "conv2d",]:
xx = Conv(d)
elif (op[0] in Pointwise.ops):
xx = Pointwise(d)
elif (op[0] in Convert.ops):
xx = Convert(d)
elif op[0] in ["__matmul__", "matmul"]:
xx = Matmul(d)
elif op[0] == "embedding":
xx = Embedding(d)
#reduction
elif op[0] == "sum":
xx = Sum(d)
elif op[0] == "mean":
xx = Mean(d)
elif op[0] == "norm":
xx = Norm(d)
elif op[0] == "dropout":
xx = Dropout(d)
#Index, Slice, Join, Mutate
elif (op[0] == "cat"):
xx = Cat(d)
elif (op[0] == "reshape"):
xx = Reshape(d)
elif (op[0] == "masked_scatter_"):
xx = MaskedScatter(d)
elif (op[0] == "gather"):
xx = Gather(d)
elif (op[0] == "nonzero"):
xx = Nonzero(d)
elif (op[0] == "index_select"):
xx = IndexSelect(d)
elif (op[0] == "masked_select"):
xx = MaskedSelect(d)
#blas
elif op[0] in ["addmm", "addmm_"]:
xx = Addmm(d)
elif op[0] == "mm":
xx = Mm(d)
elif op[0] == "bmm":
xx = Bmm(d)
#softmax
elif op[0] == "softmax":
xx = Softmax(d)
elif op[0] == "log_softmax":
xx = LogSoftmax(d)
#loss
elif op[0] == "mse_loss":
xx = MSELoss(d)
#optimizers
elif op[0] == "adam":
xx = Adam(d)
#normalization
elif op[0] == "batch_norm":
xx = BatchNorm(d)
#random
elif op[0] == "randperm":
xx = RandPerm(d)
#misc
elif op[0] == "copy_":
xx = Copy(d)
elif op[0] == "clone":
xx = Clone(d)
elif op[0] == "contiguous":
xx = Contiguous(d)
elif op[0] == "any":
xx = Any(d)
elif (op[0] in Activation.ops):
xx = Activation(d)
elif op[0] == "to":
xx = Convert(d)
else:
xx = Foo(d)
return xx
def main():
#Read cmd line arguments
cmdArgs = parseArgs()
output = Output(cmdArgs)
output.header()
idx = -1
#Read in all the kernel info
for line in cmdArgs.file:
idx += 1
kernel = eval(line)
assert(kernel)
kernels.append(kernel)
k = kernel
d = Data(k)
mod = k['mod']
op = k['op']
flops = 0
params = {"na":"na"}
tc = "na"
bytes = 0
if (d.dir == "bprop"):
d.seqMarker = k['seqMarker']
seq = k['seqId']
if len(seq) > 1:
pass
seq = k['seqId'][:1]
assert (len(seq) == 1), seq
#assert (seq[0] != 0)
assert (len(d.seqMarker) > 0)
#If there is no useful marker associated, use the
#sequence number to find the kernel from fprop
if len(d.argMarker) == 0:
index = findFpropKernel(seq[0])
if index >= 0:
d.argMarker = kernels[index]['marker']
d.modMarker = kernels[index]['reprMarkers']
mod = kernels[index]['mod']
op = kernels[index]['op']
d.layer = kernels[index]['layer']
d.trace = kernels[index]['trace']
# Check if marker has our annotations
if len(d.argMarker) and Utility.hasNVTX(d.argMarker[0]):
xx = foo(mod, op, d)
bytes = xx.bytes()
flops = xx.flops()
op = xx.op()
params = xx.params()
tc = xx.tc()
if type(op) is list:
if len(op):
op = op[0]
else:
op = ""
if type(mod) is list:
if len(mod):
mod = mod[0]
else:
mod = ""
d.index = idx+1
# The following 8 come from operator class functions.
d.setParams(params)
d.tc = tc
d.flops = flops
d.bytes = bytes
d.mod = mod
d.op = op
output.data(d)
kernels = []
if __name__ == '__main__':
main()
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class RandPerm(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch")
assert (op == "randperm")
assert (len(args) == 1)
n = args[0]
assert n['type'] == "int"
self.n = n['value']
def params(self):
p = OrderedDict([('N', self.n)])
return p
def tc(self):
return "-"
def op(self):
return self.op_
def mod(self):
return self.mod_
def bytes(self):
return self.n * Utility.typeToBytes("int64")
def flops(self):
# Depends on RNG but this is probably a reasonable assumption.
return self.n * 3
This diff is collapsed.
This diff is collapsed.
from collections import OrderedDict
from .utility import Utility
from .base import OperatorLayerBase
class Softmax(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch.nn.functional")
assert (op == "softmax")
#Filter out named parameters
args = list(filter(lambda x : x['name'] == '', args))
assert (len(args) <= 2)
self.shape = args[0]['shape']
self.type = args[0]['dtype']
self.dir = d.dir
return
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def elems(self):
return Utility.numElems(self.shape)
def flops(self):
# Note: exp, sum-reduce, divide
#flops = elems * 3
return 0
def bytes(self):
b = self.elems() * Utility.typeToBytes(self.type)
b *= 3 if self.dir == "fprop" else 5 #verify
return b
class LogSoftmax(OperatorLayerBase):
def __init__(self, d):
marker = eval(d.argMarker[0])
mod = marker['mod']
op = marker['op']
args = marker['args']
self.marker = marker
self.mod_ = mod
self.op_ = op
self.args = args
assert (mod == "torch.nn.functional")
assert (op == "log_softmax")
#Filter out named parameters
args = list(filter(lambda x : x['name'] == '', args))
assert (len(args) <= 2)
#Get input
if (args[0]['name'] == ""):
i = args[0]
else:
i = list(filter(lambda x : x['name'] == "input", args))[0]
t = i['dtype']
self.shape = i['shape']
self.type = i['dtype']
self.dir = d.dir
return
def op(self):
return self.op_
def mod(self):
return self.mod_
def tc(self):
return "-"
def params(self):
p = OrderedDict([('T', self.shape), ('type', self.type)])
return p
def elems(self):
return Utility.numElems(self.shape)
def flops(self):
# Note: exp, sum-reduce, divide, log
#flops = elems * 4
return 0
def bytes(self):
b = self.elems() * Utility.typeToBytes(self.type)
b *= 3 if self.dir == "fprop" else 5 #verify
return b
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment