Unverified Commit 8a7a3325 authored by Masaki Kozuki's avatar Masaki Kozuki Committed by GitHub
Browse files

Remove `pyprof` and `reparameterization` (#1404)

* remove pyprof

* remove reparameterization

* remove pyprof test

* clean up
parent cd499737
import sys
import argparse
def parseArgs():
"""
Print usage and parse arguments.
"""
def check_cols(value):
valid = ["idx", "seq", "altseq", "tid", "layer", "trace", "dir", "sub", "mod", "op", "kernel", "params", "sil", "tc", "device", "stream", "grid", "block", "flops", "bytes"]
cols = value.split(",")
for col in cols:
if col not in valid:
raise argparse.ArgumentTypeError("{} is not a valid column name. Valid column names are {}.".format(col, ",".join(valid)))
return cols
def openFile(f):
try:
d = open(f, "r")
return d
except IOError:
print("Error opening file {}. Exiting.".format(f), file=sys.stderr)
sys.exit(1)
parser = argparse.ArgumentParser(prog=sys.argv[0], description="PyTorch Profiler", formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument("file",
nargs='?',
type=str,
default=None,
help="Output of parse.py (Python dictionary).")
parser.add_argument("-c",
type=check_cols,
default="idx,dir,sub,mod,op,kernel,params,sil",
help='''Comma seperated names of columns to print.
idx: Index
seq: PyTorch Sequence Id
altseq: PyTorch Alternate Sequence Id
tid: Thread Id
layer: User annotated NVTX string (can be nested)
trace: Function Call Trace
dir: Direction
sub: Sub Sequence Id
mod: Module
op: Operattion
kernel: Kernel Name
params: Parameters
sil: Silicon Time (in ns)
tc: Tensor Core Usage
device: GPU Device Id
stream: Stream Id
grid: Grid Dimensions
block: Block Dimensions
flops: Floating point ops (FMA = 2 FLOPs)
bytes: Number of bytes in and out of DRAM
e.g. -c idx,kernel,sil''')
group = parser.add_mutually_exclusive_group()
group.add_argument("--csv",
action="store_true",
default=False,
help="Print a CSV output.")
group.add_argument("-w",
type=int,
default=0,
help="Width of columnated output.")
args = parser.parse_args()
if args.file is None:
args.file = sys.stdin
else:
args.file = openFile(args.file)
return args
from functools import reduce
class Utility(object):
@staticmethod
def numElems(shape):
assert (type(shape) == tuple)
return reduce(lambda x,y: x*y, shape, 1)
@staticmethod
def typeToBytes(t):
if (t in ["uint8", "int8", "byte", "char", "bool"]):
return 1
elif (t in ["float16", "half", "int16", "short"]):
return 2
elif (t in ["float32", "float", "int32", "int"]):
return 4
elif (t in ["int64", "long", "float64", "double"]):
return 8
assert False
@staticmethod
def typeToString(t):
if (t in ["uint8", "byte", "char",]):
return "uint8"
elif (t in ["int8",]):
return "int8"
elif (t in ["int16", "short",]):
return "int16"
elif (t in ["float16", "half"]):
return "fp16"
elif (t in ["float32", "float"]):
return "fp32"
elif (t in ["int32", "int",]):
return "int32"
elif (t in ["int64", "long"]):
return "int64"
elif (t in ["float64", "double",]):
return "fp64"
elif (t in ["bool",]):
return "bool"
assert False
@staticmethod
def hasNVTX(marker):
if type(marker) is str:
try:
marker = eval(marker)
except:
return False
if type(marker) is dict:
keys = marker.keys()
return ("mod" in keys) and ("op" in keys) and ("args" in keys)
else:
return False
@staticmethod
def isscalar(t):
return (t in ["float", "int"])
import warnings
warnings.warn("reparameterization will be removed by the end of June, 2022", FutureWarning)
from .weight_norm import WeightNorm
from .reparameterization import Reparameterization
def apply_weight_norm(module, name='', dim=0, hook_child=True):
r"""
Applies weight normalization to a parameter in the given module.
If no parameter is provided, applies weight normalization to all
parameters in model (except 1-d vectors and scalars).
.. math::
\mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|}
Weight normalization is a reparameterization that decouples the magnitude
of a weight tensor from its direction. This replaces the parameter specified
by `name` (e.g. "weight") with two parameters: one specifying the magnitude
(e.g. "weight_g") and one specifying the direction (e.g. "weight_v").
Weight normalization is implemented via a hook that recomputes the weight
tensor from the magnitude and direction before every :meth:`~Module.forward`
call.
By default, with `dim=0`, the norm is computed independently per output
channel/plane. To compute a norm over the entire weight tensor, use
`dim=None`.
See https://arxiv.org/abs/1602.07868
Args:
module (nn.Module): containing module
name (str, optional): name of weight parameter
dim (int, optional): dimension over which to compute the norm
hook_child (boolean, optional): adds reparameterization hook to direct parent of the
parameters. If False, it's added to `module` instead. Default: True
Returns:
The original module with the weight norm hook
Example::
>>> m = apply_weight_norm(nn.Linear(20, 40), name='weight')
Linear (20 -> 40)
>>> m.weight_g.size()
torch.Size([40, 1])
>>> m.weight_v.size()
torch.Size([40, 20])
"""
return apply_reparameterization(module, reparameterization=WeightNorm, hook_child=hook_child,
name=name, dim=dim)
def remove_weight_norm(module, name='', remove_all=False):
"""
Removes the weight normalization reparameterization of a parameter from a module.
If no parameter is supplied then all weight norm parameterizations are removed.
Args:
module (nn.Module): containing module
name (str, optional): name of weight parameter
Example:
>>> m = apply_weight_norm(nn.Linear(20, 40))
>>> remove_weight_norm(m)
"""
return remove_reparameterization(module, reparameterization=WeightNorm,
name=name, remove_all=remove_all)
def apply_reparameterization(module, reparameterization=None, name='', dim=0, hook_child=True):
"""
Applies a given weight reparameterization (such as weight normalization) to
a parameter in the given module. If no parameter is given, applies the reparameterization
to all parameters in model (except 1-d vectors and scalars).
Args:
module (nn.Module): containing module
reparameterization (Reparameterization): reparamaterization class to apply
name (str, optional): name of weight parameter
dim (int, optional): dimension over which to perform reparameterization op
hook_child (boolean, optional): adds reparameterization hook to direct parent of the
parameters. If False, it's added to `module` instead. Default: True
Returns:
The original module with the reparameterization hook
Example::
>>> m = apply_reparameterization(nn.Linear(20, 40), WeightNorm)
Linear (20 -> 40)
"""
assert reparameterization is not None
if name != '':
Reparameterization.apply(module, name, dim, reparameterization, hook_child)
else:
names = list(module.state_dict().keys())
for name in names:
apply_reparameterization(module, reparameterization, name, dim, hook_child)
return module
def remove_reparameterization(module, reparameterization=Reparameterization,
name='', remove_all=False):
"""
Removes the given reparameterization of a parameter from a module.
If no parameter is supplied then all reparameterizations are removed.
Args:
module (nn.Module): containing module
reparameterization (Reparameterization): reparamaterization class to apply
name (str, optional): name of weight parameter
remove_all (bool, optional): if True, remove all reparamaterizations of given type. Default: False
Example:
>>> m = apply_reparameterization(nn.Linear(20, 40),WeightNorm)
>>> remove_reparameterization(m)
"""
if name != '' or remove_all:
to_remove = []
for k, hook in module._forward_pre_hooks.items():
if isinstance(hook, reparameterization) and (hook.name == name or remove_all):
hook.remove(module)
to_remove.append(k)
if len(to_remove) > 0:
for k in to_remove:
del module._forward_pre_hooks[k]
return module
if not remove_all:
raise ValueError("reparameterization of '{}' not found in {}"
.format(name, module))
else:
modules = [module]+[x for x in module.modules()]
for m in modules:
remove_reparameterization(m, reparameterization=reparameterization, remove_all=True)
return module
import torch
from torch.nn.parameter import Parameter
import sys
class Reparameterization(object):
"""
Class interface for performing weight reparameterizations
Arguments:
name (str): name of weight parameter
dim (int): dimension over which to compute the norm
module (nn.Module): parent module to which param `name` is registered to
retain_forward (bool, optional): if False deletes weight on call to
module.backward. Used to avoid memory leaks with DataParallel Default: True
Attributes:
reparameterization_names (list, str): contains names of all parameters
needed to compute reparameterization.
backward_hook_key (int): torch.utils.hooks.RemovableHandle.id for hook used in module backward pass.
"""
def __init__(self, name, dim, module, retain_forward=True):
self.name = name
self.dim = dim
self.evaluated = False
self.retain_forward = retain_forward
self.reparameterization_names = []
self.backward_hook_key = None
self.module = module
def compute_weight(self, module=None, name=None):
"""
Computes reparameterized weight value to assign value to module attribute
with name `name`.
See WeightNorm class for example.
Arguments:
module (nn.Module): module with weight we'd like to reparameterize
Returns:
w (Tensor): Tensor object containing value of reparameterized weight
"""
raise NotImplementedError
def reparameterize(self, name, weight, dim):
"""
Creates Parameters to be used for reparameterization and creates names that
for attributes for the module these Parameters will correspond to.
The parameters will be registered according to the names provided.
See WeightNorm class for example.
Arguments:
module (nn.Module): module with weight we'd like to reparameterize
name (str, optional): name of weight parameter
dim (int, optional): dimension over which to compute parameterization
Returns:
names (list, str): names of Parameters to be used for reparameterization
params (list, Parameter): Parameters to be used for reparameterization
"""
raise NotImplementedError
@staticmethod
def apply(module, name, dim, reparameterization=None, hook_child=True):
"""
Applies reparametrization to module's `name` parameter and modifies instance attributes as appropriate.
`hook_child` adds reparameterization hook to direct parent of the parameters. If False, it's added to `module` instead.
"""
if reparameterization is None:
reparameterization = Reparameterization
module2use, name2use = Reparameterization.get_module_and_name(module, name)
# does not work on sparse
if name2use is None or isinstance(module2use, (torch.nn.Embedding, torch.nn.EmbeddingBag)):
return
if hook_child:
fn = reparameterization(name2use, dim, module2use)
else:
fn = reparameterization(name, dim, module)
weight = getattr(module2use, name2use)
if weight.dim() <= 1:
return
# remove weight from parameter list
del module2use._parameters[name2use]
# add parameters of reparameterization of parameter to module
names, params = fn.reparameterize(name2use, weight, dim)
for n, p in zip(names, params):
module2use.register_parameter(n, p)
# add parameters to reparameterization so they can be removed later
fn.reparameterization_names = names
setattr(module2use, name2use, None)
hook_module = module2use
if not hook_child:
hook_module = module
# recompute weight before every forward()
hook_module.register_forward_pre_hook(fn)
# remove weight during backward
handle = hook_module.register_backward_hook(fn.backward_hook)
# get hook key so we can delete it later
fn.backward_hook_key = handle.id
return fn
@staticmethod
def get_module_and_name(module, name):
"""
recursively fetches (possible) child module and name of weight to be reparameterized
"""
name2use = None
module2use = None
names = name.split('.')
if len(names) == 1 and names[0] != '':
name2use = names[0]
module2use = module
elif len(names) > 1:
module2use = module
name2use = names[0]
for i in range(len(names)-1):
module2use = getattr(module2use, name2use)
name2use = names[i+1]
return module2use, name2use
def get_params(self, module):
"""gets params of reparameterization based on known attribute names"""
return [getattr(module, n) for n in self.reparameterization_names]
def remove(self, module):
"""removes reparameterization and backward hook (does not remove forward hook)"""
module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
for p in self.get_params(module2use):
p.requires_grad = False
weight = self.compute_weight(module2use, name2use)
delattr(module2use, name2use)
for n in self.reparameterization_names:
del module2use._parameters[n]
module2use.register_parameter(name2use, Parameter(weight.data))
del module._backward_hooks[self.backward_hook_key]
def __call__(self, module, inputs):
"""callable hook for forward pass"""
module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
_w = getattr(module2use, name2use)
if not self.evaluated or _w is None:
setattr(module2use, name2use, self.compute_weight(module2use, name2use))
self.evaluated = True
def backward_hook(self, module, grad_input, grad_output):
"""callable hook for backward pass"""
module2use, name2use = Reparameterization.get_module_and_name(module, self.name)
wn = getattr(module2use, name2use)
self.evaluated = False
import torch
from torch.nn.parameter import Parameter
from ..fp16_utils import Fused_Weight_Norm
import time
from .reparameterization import Reparameterization
def _norm(p, dim):
"""Computes the norm over all dimensions except dim"""
if dim is None:
return p.norm()
elif dim == 0:
output_size = (p.size(0),) + (1,) * (p.dim() - 1)
return p.contiguous().view(p.size(0), -1).norm(dim=1).view(*output_size)
elif dim == p.dim() - 1:
output_size = (1,) * (p.dim() - 1) + (p.size(-1),)
return p.contiguous().view(-1, p.size(-1)).norm(dim=0).view(*output_size)
return _norm(p.transpose(0, dim), 0).transpose(0, dim)
HALF_TYPES = (torch.cuda.HalfTensor, torch.HalfTensor)
class WeightNorm(Reparameterization):
r"""
Weight normalization is a reparameterization that decouples the magnitude
of a weight tensor from its direction. This replaces the parameter specified
by `name` (e.g. "weight") with two parameters: one specifying the magnitude
(e.g. "weight_g") and one specifying the direction (e.g. "weight_v").
Weight normalization is implemented via a hook that recomputes the weight
tensor from the magnitude and direction before every :meth:`~Module.forward`
call.
.. math::
\mathbf{w} = g \dfrac{\mathbf{v}}{\|\mathbf{v}\|}
By default, with `dim=0`, the norm is computed independently per output
channel/plane. To compute a norm over the entire weight tensor, use
`dim=None`.
"""
def compute_weight(self, module=None, name=None):
"""
Computes weight normalized weight value to assign value to module attribute
with name `name`.
Arguments:
module (nn.Module): module with weight we'd like to reparameterize
Returns:
w (Tensor): Tensor object containing value of reparameterized weight
"""
if module is None:
module = self.module
if name is None:
name = self.name
module, name = Reparameterization.get_module_and_name(module, name)
g = getattr(module, name + '_g')
v = getattr(module, name + '_v')
fused_weight_norm = Fused_Weight_Norm.apply
v = v.contiguous()
w = fused_weight_norm(v, g, self.dim)
return w
def reparameterize(self, name, weight, dim):
"""
Creates Parameters v and gto be used for weight normalization
and creates names that for attributes for the module these Parameters
will correspond to. The parameters will be registered according to the names
provided.
Arguments:
module (nn.Module): module with weight we'd like to reparameterize
name (str, optional): name of weight parameter
dim (int, optional): dimension over which to compute parameterization
Returns:
names (list, str): names of Parameters to be used for reparameterization
params (list, Parameter): Parameters to be used for reparameterization
"""
names = [name + '_g', name + '_v']
params = [Parameter(_norm(weight, dim).data), Parameter(weight.data)]
return names, params
......@@ -44,7 +44,6 @@ Some other useful material, including GTC 2019 and Pytorch DevCon 2019 Slides, c
:caption: Deprecated mixed precision API
fp16_util
.. reparameterization
.. RNN
Indices and tables
......
......@@ -105,20 +105,6 @@ cmdclass = {}
ext_modules = []
extras = {}
if "--pyprof" in sys.argv:
string = (
"\n\nPyprof has been moved to its own dedicated repository and will "
"soon be removed from Apex. Please visit\n"
"https://github.com/NVIDIA/PyProf\n"
"for the latest version."
)
warnings.warn(string, DeprecationWarning)
with open("requirements.txt") as f:
required_packages = f.read().splitlines()
extras["pyprof"] = required_packages
sys.argv.remove("--pyprof")
else:
warnings.warn("Option --pyprof not specified. Not installing PyProf dependencies!")
if "--cpp_ext" in sys.argv or "--cuda_ext" in sys.argv:
if TORCH_MAJOR == 0:
......
import inspect
import unittest
from apex.pyprof.prof.data import Data
from apex.pyprof.prof.prof import foo
class TestPyProfData(unittest.TestCase):
def __init__(self, testName):
super().__init__(testName)
def setUp(self):
pass
def tearDown(self):
pass
def test_data(self):
kernels = [
{'kShortName': 'elementwise_kernel', 'kDuration': 2848, 'layer': [], 'trace': [], 'reprMarkers': [], 'marker': ["{'mod': 'Tensor', 'op': 'float', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 104, 160), 'dtype': 'bool'}]}"], 'seqMarker': ['to, seq = 60471'], 'seqId': [60471], 'subSeqId': 0, 'altSeqId': [], 'dir': 'fprop', 'mod': ['Tensor'], 'op': ['float'], 'tid': 1431533376, 'device': 0, 'stream': 7, 'grid': (585, 1, 1), 'block': (512, 1, 1), 'kLongName': 'void at::native::elementwise_kernel<512, 1, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1}>(int, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, bool>(at::TensorIterator&)::{lambda(bool)#1} const&)::{lambda(int)#1})'},
{'kShortName': 'elementwise_kernel', 'kDuration': 201182, 'layer': [], 'trace': [], 'reprMarkers': [], 'marker': ["{'mod': 'Tensor', 'op': 'clone', 'args': [{'name': '', 'type': 'tensor', 'shape': (18, 4, 416, 640), 'dtype': 'float32'}]}"], 'seqMarker': ['clone, seq = 60161'], 'seqId': [60161], 'subSeqId': 0, 'altSeqId': [], 'dir': 'fprop', 'mod': ['Tensor'], 'op': ['clone'], 'tid': 1431533376, 'device': 0, 'stream': 7, 'grid': (37440, 1, 1), 'block': (128, 1, 1), 'kLongName': 'void at::native::elementwise_kernel<128, 4, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2}>(int, void at::native::gpu_kernel_impl<void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1}>(at::TensorIterator&, void at::native::copy_kernel_impl<float, float>(at::TensorIterator&)::{lambda(float)#1} const&)::{lambda(int)#2})'},
]
for k in kernels:
d = Data(k)
mod = k['mod']
op = k['op']
xx = foo(mod, op, d)
d.setParams(xx.params())
def run_tests(test_name):
dummy = TestPyProfData(test_name)
test_cases = list(filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod))))
print(f'Running tests for {test_name}')
suite = unittest.TestSuite()
for test_case in test_cases:
suite.addTest(TestPyProfData(test_case))
unittest.TextTestRunner().run(suite)
if __name__ == '__main__':
run_tests('test_data')
import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx
import inspect
import os
import torch
import torch.nn.functional as F
import unittest
from apex import pyprof
pyprof.nvtx.init()
# TODO: add tests for:
# F.bilinear, F.l1_loss, F.multilabel_soft_margin_loss, F.multi_margin_loss
class TestPyProfNvtx(unittest.TestCase):
def __init__(self, testName, dtype=torch.float16):
super().__init__(testName)
self.dtype = dtype
def setUp(self):
pass
def tearDown(self):
pass
def test_conv1d(self):
# Data and weight tensors
tensor1d_in_conv = torch.randn(32, 3, 224, device='cuda', dtype=self.dtype)
tensor1d_in_conv_grouped = torch.randn(32, 6, 224, device='cuda', dtype=self.dtype)
conv1d_filter = torch.randn(16, 3, 3, device='cuda', dtype=self.dtype)
conv1d_bias = torch.ones(16, device='cuda', dtype=self.dtype)
# Vanilla conv1d
conv1d_out_vanilla = F.conv1d(tensor1d_in_conv, conv1d_filter)
# conv1d with bias
conv1d_out_with_bias = F.conv1d(tensor1d_in_conv, conv1d_filter, bias=conv1d_bias)
# conv1d - stride > 1
conv1d_out_strided = F.conv1d(tensor1d_in_conv, conv1d_filter, stride=2)
# conv1d - dilation > 1
conv1d_out_dilated = F.conv1d(tensor1d_in_conv, conv1d_filter, dilation=2)
# conv1d - groups > 1
conv1d_out_grouped = F.conv1d(tensor1d_in_conv_grouped, conv1d_filter, groups=2)
# conv1d - padding with zeros
conv1d_out_padding_zeros = F.conv1d(tensor1d_in_conv, conv1d_filter, padding=6)
def test_conv2d(self):
# Data and weight tensors
tensor2d_in_conv = torch.randn(32, 3, 224, 224, device='cuda', dtype=self.dtype)
tensor2d_in_conv_grouped = torch.randn(32, 6, 224, 224, device='cuda', dtype=self.dtype)
conv2d_filter = torch.randn(16, 3, 3, 3, device='cuda', dtype=self.dtype)
conv2d_bias = torch.ones(16, device='cuda', dtype=self.dtype)
# Vanilla conv2d
conv2d_out_vanilla = F.conv2d(tensor2d_in_conv, conv2d_filter)
# conv2d with bias
conv2d_with_bias = F.conv2d(tensor2d_in_conv, conv2d_filter, bias=conv2d_bias)
# conv2d - stride > 1
conv2d_out_strided = F.conv2d(tensor2d_in_conv, conv2d_filter, stride=2)
# conv2d - dilation > 1
conv2d_out_dilated = F.conv2d(tensor2d_in_conv, conv2d_filter, dilation=2)
# conv2d - groups > 1
conv2d_out_grouped = F.conv2d(tensor2d_in_conv_grouped, conv2d_filter, groups=2)
# conv2d - padding with zeros
conv2d_out_padding_zeros = F.conv2d(tensor2d_in_conv, conv2d_filter, padding=6)
def test_conv3d(self):
# Data and weight tensors
tensor3d_in_conv = torch.randn(32, 3, 16, 224, 224, device='cuda', dtype=self.dtype)
tensor3d_in_conv_grouped = torch.randn(32, 6, 16, 224, 224, device='cuda', dtype=self.dtype)
conv3d_filter = torch.randn(16, 3, 3, 3, 3, device='cuda', dtype=self.dtype)
conv3d_bias = torch.ones(16, device='cuda', dtype=self.dtype)
# Vanilla conv3d
conv3d_out_vanilla = F.conv3d(tensor3d_in_conv, conv3d_filter)
# conv3d - stride > 1
conv3d_out_strided = F.conv3d(tensor3d_in_conv, conv3d_filter, stride=2)
# conv3d - dilation > 1
conv3d_out_dilated = F.conv3d(tensor3d_in_conv, conv3d_filter, dilation=2)
# conv3d - groups > 1
conv3d_out_grouped = F.conv3d(tensor3d_in_conv_grouped, conv3d_filter, groups=2)
# conv3d - padding with zeros
conv3d_out_padding_zeros = F.conv3d(tensor3d_in_conv, conv3d_filter, padding=6)
def test_conv_transpose1d(self):
# Data and weight tensors
conv_transpose1d_tensor = torch.randn(64, 16, 64, device='cuda', dtype=self.dtype)
conv_transpose1d_filter = torch.randn(16, 32, 3, device='cuda', dtype=self.dtype)
conv_transpose1d_bias = torch.randn(32, device='cuda', dtype=self.dtype)
# Conv transpose runs
conv_transpose1d_out = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter)
conv_transpose1d_out_biased = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, bias=conv_transpose1d_bias)
conv_transpose1d_out_strided = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, stride=2)
conv_transpose1d_out_padded = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, padding=3)
conv_transpose1d_out2_padded = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, output_padding=2, dilation=3)
conv_transpose1d_out_grouped = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, groups=2)
conv_transpose1d_out_dilated = F.conv_transpose1d(conv_transpose1d_tensor, conv_transpose1d_filter, dilation=2)
def test_conv_transpose2d(self):
# Data and weight tensors
conv_transpose2d_tensor = torch.randn(64, 8, 5, 5, device='cuda', dtype=self.dtype)
conv_transpose2d_filter = torch.randn(8, 16, 3, 3, device='cuda', dtype=self.dtype)
conv_transpose2d_bias = torch.randn(16, device='cuda', dtype=self.dtype)
# Conv transpose runs
conv_transpose2d_out = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter)
conv_transpose2d_out_biased = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, bias=conv_transpose2d_bias)
conv_transpose2d_out_strided = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, stride=2)
conv_transpose2d_out_padded = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, padding=3)
conv_transpose2d_out2_padded = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, output_padding=2, dilation=3)
conv_transpose2d_out_grouped = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, groups=2)
conv_transpose2d_out_dilated = F.conv_transpose2d(conv_transpose2d_tensor, conv_transpose2d_filter, dilation=2)
def test_conv_transpose3d(self):
# Data and weight tensors
conv_transpose3d_tensor = torch.randn(20, 16, 50, 10, 20, device='cuda', dtype=self.dtype)
conv_transpose3d_filter = torch.randn(16, 33, 3, 3, 3, device='cuda', dtype=self.dtype)
conv_transpose3d_bias = torch.randn(33, device='cuda', dtype=self.dtype)
# Conv transpose runs
conv_transpose3d_out = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter)
conv_transpose3d_out_biased = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, bias=conv_transpose3d_bias)
conv_transpose3d_out_strided = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, stride=2)
conv_transpose3d_out_padded = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, padding=3)
conv_transpose3d_out2_padded = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, output_padding=2, dilation=3)
conv_transpose3d_out_grouped = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, groups=2)
conv_transpose3d_out_dilated = F.conv_transpose3d(conv_transpose3d_tensor, conv_transpose3d_filter, dilation=2)
def test_unfold(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
kernel_size = (4, 5)
inp_unf_dilated = F.unfold(inp, kernel_size, dilation=2)
inp_unf_padded = F.unfold(inp, kernel_size, padding=2)
inp_unf_strided = F.unfold(inp, kernel_size, stride=2)
def test_fold(self):
inp = torch.randn(3, 20, 20, device='cuda', dtype=self.dtype)
inp_folded = F.fold(inp, (4, 5), (1, 1))
def test_avg_pool1d(self):
inp = torch.randn(1, 1, 28, device='cuda', dtype=self.dtype)
out = F.avg_pool1d(inp, kernel_size=5, stride=2, padding=2, ceil_mode=True, count_include_pad=False)
def test_avg_pool2d(self):
inp = torch.randn(1, 3, 224, 224, device='cuda', dtype=self.dtype)
out = F.avg_pool2d(inp, kernel_size=5, stride=2, padding=2, ceil_mode=True, count_include_pad=False)
def test_avg_pool3d(self):
inp = torch.randn(1, 3, 16, 224, 224, device='cuda', dtype=self.dtype)
out = F.avg_pool3d(inp, kernel_size=5, stride=2, padding=2, ceil_mode=True, count_include_pad=False)
def test_adaptive_avg_pool1d(self):
inp = torch.randn(1, 1, 28, device='cuda', dtype=self.dtype)
out = F.adaptive_avg_pool1d(inp, output_size=5)
def test_adaptive_avg_pool2d(self):
inp = torch.randn(1, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.adaptive_avg_pool2d(inp, output_size=5)
def test_adaptive_avg_pool3d(self):
inp = torch.randn(1, 16, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.adaptive_avg_pool3d(inp, output_size=5)
def test_max_pool1d(self):
inp = torch.randn(1, 16, 32, device='cuda', dtype=self.dtype)
out = F.max_pool1d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
def test_max_pool2d(self):
inp = torch.randn(1, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.max_pool2d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
def test_max_pool3d(self):
inp = torch.randn(1, 16, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.max_pool3d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
def test_adaptive_max_pool1d(self):
inp = torch.randn(1, 16, 28, device='cuda', dtype=self.dtype)
out = F.adaptive_max_pool1d(inp, output_size=5, return_indices=True)
def test_adaptive_max_pool2d(self):
inp = torch.randn(1, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.adaptive_max_pool2d(inp, output_size=5, return_indices=True)
def test_adaptive_max_pool3d(self):
inp = torch.randn(1, 16, 16, 32, 32, device='cuda', dtype=self.dtype)
out = F.adaptive_max_pool3d(inp, output_size=5, return_indices=True)
def test_max_unpool1d(self):
inp = torch.randn(1, 16, 32, device='cuda', dtype=self.dtype)
output, indices = F.max_pool1d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
output = F.max_unpool1d(output, indices, kernel_size=2, stride=2, padding=2)
def test_max_unpool2d(self):
inp = torch.randn(1, 16, 32, 32, device='cuda', dtype=self.dtype)
output, indices = F.max_pool2d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
output = F.max_unpool2d(output, indices, kernel_size=2, stride=2, padding=2)
def test_max_unpool3d(self):
inp = torch.randn(1, 16, 8, 32, 32, device='cuda', dtype=self.dtype)
output, indices = F.max_pool3d(inp, kernel_size=5, stride=2, padding=2, return_indices=True, ceil_mode=True)
output = F.max_unpool3d(output, indices, kernel_size=2, stride=2, padding=2)
def test_lp_pool1d(self):
inp = torch.randn(1, 32, 64, device='cuda', dtype=self.dtype)
output = F.lp_pool1d(inp, 2, 3, stride=2, ceil_mode=True)
def test_lp_pool2d(self):
#torch.nn.LPPool2d(norm_type, kernel_size, stride=None, ceil_mode=False)
inp = torch.randn(1, 32, 64, 64, device='cuda', dtype=self.dtype)
output = F.lp_pool2d(inp, 2, 3, stride=2, ceil_mode=True)
def test_threshold(self):
inp = torch.randn(1, 8, 32, 32, device='cuda', dtype=self.dtype)
output = F.threshold(inp, 6, 6, inplace=False)
def test_threshold_(self):
inp = torch.randn(1, 8, 32, 32, device='cuda', dtype=self.dtype)
output = F.threshold_(inp, 6, 6)
def test_relu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.relu(inp, inplace=False)
def test_relu_(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.relu_(inp)
def test_hardtanh(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.hardtanh(inp, min_val=-1., max_val=1., inplace=False)
def test_hardtanh_(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.hardtanh_(inp, min_val=-1., max_val=1.)
def test_relu6(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.relu6(inp, inplace=False)
def test_elu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.elu(inp, alpha=1.0, inplace=False)
def test_elu_(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.elu_(inp, alpha=1.0)
def test_selu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.selu(inp)
def test_celu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.celu(inp, alpha=1.0, inplace=False)
def test_leaky_relu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.leaky_relu(inp, negative_slope=0.01, inplace=False)
def test_leaky_relu_(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.leaky_relu_(inp, negative_slope=0.01)
def test_prelu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
weight = torch.randn(1, device='cuda', dtype=self.dtype)
output = F.prelu(inp, weight)
def test_rrelu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.rrelu(inp, lower=1./8, upper=1./3, training=False, inplace=False)
def test_rrelu_(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.rrelu(inp, lower=1./8, upper=1./3, training=False)
def test_glu(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.glu(inp, dim=-1)
def test_logsigmoid(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.logsigmoid(inp)
def test_hardshrink(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.hardshrink(inp, lambd=0.5)
def test_tanhshrink(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.tanhshrink(inp)
def test_softsign(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.softsign(inp)
def test_softplus(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.softplus(inp, beta=1, threshold=20)
def test_softmin(self):
inp = torch.randn(16, 1024, device='cuda', dtype=self.dtype)
output = F.softmin(inp, dim=1, _stacklevel=3, dtype=self.dtype)
def test_softmax(self):
inp = torch.randn(16, 1024, device='cuda', dtype=self.dtype)
output = F.softmax(inp, dim=1, _stacklevel=3, dtype=self.dtype)
def test_softshrink(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.softshrink(inp, lambd=0.5)
def test_gumbel_softmax(self):
inp = torch.randn(16, 1024, device='cuda', dtype=self.dtype)
output = F.gumbel_softmax(inp, tau=1, hard=False, eps=1e-10, dim=-1)
def test_log_softmax(self):
inp = torch.randn(16, 1024, device='cuda', dtype=self.dtype)
output = F.log_softmax(inp, dim=-1, _stacklevel=3)
def test_tanh(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = torch.tanh(inp)
def test_sigmoid(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = torch.sigmoid(inp)
def test_batch_norm(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
# running_mean, running_var
running_mean = torch.randn(3, device='cuda', dtype=self.dtype)
running_var = torch.randn(3, device='cuda', dtype=self.dtype)
output = F.batch_norm(inp, running_mean, running_var, weight=None, bias=None, training=False, momentum=0.1, eps=1e-05)
def test_instance_norm(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
running_mean = torch.randn(3, device='cuda', dtype=self.dtype)
running_var = torch.randn(3, device='cuda', dtype=self.dtype)
output = F.instance_norm(inp, running_mean=running_mean, running_var=running_var, weight=None, bias=None, use_input_stats=True, momentum=0.1, eps=1e-05)
def test_layer_norm(self):
inp = torch.randn(1, 3, 32, 32, device='cuda', dtype=self.dtype)
output = F.layer_norm(inp, inp.size()[1:], weight=None, bias=None, eps=1e-05)
def test_local_response_norm(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.local_response_norm(inp, 2, alpha=0.0001, beta=0.75, k=1.0)
def test_normalize(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.normalize(inp, p=2, dim=1, eps=1e-12, out=None)
def test_linear(self):
inp = torch.randn(32, 64, 128, device='cuda', dtype=self.dtype)
weight = torch.randn(256, 128, device='cuda', dtype=self.dtype)
output = F.linear(inp, weight, bias=None)
def test_dropout(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.dropout(inp, p=0.5, training=True, inplace=False)
def test_alpha_dropout(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.alpha_dropout(inp, p=0.5, training=True, inplace=False)
def test_dropout2d(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.dropout2d(inp, p=0.5, training=True, inplace=False)
def test_dropout3d(self):
inp = torch.randn(16, 8, 32, 64, 64, device='cuda', dtype=self.dtype)
output = F.dropout3d(inp, p=0.5, training=True, inplace=False)
def test_embedding(self):
pre_embed_dim = 1024
post_embed_dim = 32
inp = torch.randint(0, pre_embed_dim, (128, 16), device='cuda')
weight = torch.randn(pre_embed_dim, post_embed_dim, device='cuda', dtype=self.dtype)
output = F.embedding(inp, weight, padding_idx=None, max_norm=None, norm_type=2.0, scale_grad_by_freq=False, sparse=False)
def test_embedding_bag(self):
pre_embed_dim = 1024
post_embed_dim = 32
inp = torch.randint(0, pre_embed_dim, (128, 16), device='cuda')
weight = torch.randn(pre_embed_dim, post_embed_dim, device='cuda', dtype=self.dtype)
output = F.embedding_bag(inp, weight, offsets=None, max_norm=None, norm_type=2,
scale_grad_by_freq=False, mode='mean', sparse=False)
def test_one_hot(self):
num_classes = 10
inp = torch.randint(0, num_classes, (128, 16), device='cuda')
output = F.one_hot(inp, num_classes=10)
def test_pairwise_distance(self):
inp1 = torch.randn(1024, 128, device='cuda', dtype=self.dtype)
inp2 = torch.randn(1024, 128, device='cuda', dtype=self.dtype)
output = F.pairwise_distance(inp1, inp2, p=2.0, eps=1e-06, keepdim=False)
def test_cosine_similarity(self):
inp1 = torch.randn(1024, 128, device='cuda', dtype=self.dtype)
inp2 = torch.randn(1024, 128, device='cuda', dtype=self.dtype)
output = F.cosine_similarity(inp1, inp2, dim=1, eps=1e-8)
def test_pdist(self):
# pdist is not implemented for fp16
inp = torch.randn(128, 128, device='cuda', dtype=torch.float32)
output = F.pdist(inp, p=2)
def test_binary_cross_entropy(self):
# binary_cross_entropy is not implemented for fp16
inp = torch.randn(32, 128, device='cuda', dtype=torch.float32, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=torch.float32, requires_grad=False)
output = F.binary_cross_entropy(torch.sigmoid(inp), target)
def test_binary_cross_entropy_with_logits(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.empty_like(inp).random_(2)
output = F.binary_cross_entropy_with_logits(inp, target)
def test_poisson_nll_loss(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=False)
output = F.poisson_nll_loss(inp, target, log_input=True, full=False,
size_average=None, eps=1e-08, reduce=None, reduction='mean')
def test_cosine_embedding_loss(self):
inp1 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
inp2 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, device='cuda', dtype=self.dtype, requires_grad=False)
output = F.cosine_embedding_loss(inp1, inp2, target, margin=0,
size_average=None, reduce=None, reduction='mean')
def test_cross_entropy(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randint(0, 100, (32,), device='cuda', dtype=torch.long, requires_grad=False)
output = F.cross_entropy(inp, target, weight=None, size_average=None,
ignore_index=-100, reduce=None, reduction='mean')
def test_ctc_loss(self):
# force fp32 because _th_normal_ (used by next line is not supported for fp16)
log_probs = torch.randn(50, 16, 20, device='cuda', dtype=torch.float32).log_softmax(2).detach().requires_grad_()
targets = torch.randint(1, 20, (16, 30), device='cuda', dtype=torch.long)
input_lengths = torch.full((16,), 50, dtype=torch.long)
target_lengths = torch.randint(10, 30, (16,), dtype=torch.long)
loss = F.ctc_loss(log_probs, targets, input_lengths, target_lengths)
def test_hinge_embedding_loss(self):
inp = torch.randn(128, 32, device='cuda', dtype=self.dtype)
target = torch.randint(0, 1, (32,), device='cuda') - 1
output = F.hinge_embedding_loss(inp, target, margin=1.0, size_average=None, reduce=None, reduction='mean')
def test_kl_div(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
output = F.kl_div(inp, target, size_average=None, reduce=None, reduction='batchmean')
def test_mse_loss(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
output = F.mse_loss(inp, target, size_average=None, reduce=None, reduction='mean')
def test_margin_ranking_loss(self):
inp1 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
inp2 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = (torch.randint(0, 1, (128,), device='cuda') - 1).type_as(inp1)
output = F.margin_ranking_loss(inp1, inp2, target, margin=0, size_average=None, reduce=None, reduction='mean')
def test_multilabel_margin_loss(self):
inp = torch.randn(1024, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randint(0, 10, (1024,), dtype=torch.long, device='cuda')
output = F.multilabel_margin_loss(inp, target, size_average=None, reduce=None, reduction='mean')
def test_nll_loss(self):
inp = torch.randn(64, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randint(0, 10, (64,), device='cuda', dtype=torch.long)
output = F.nll_loss(inp, target, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
def test_smooth_l1_loss(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=False)
output = F.smooth_l1_loss(inp, target, size_average=None, reduce=None, reduction='mean')
def test_soft_margin_loss(self):
inp = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
target = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=False)
output = F.soft_margin_loss(inp, target, size_average=None, reduce=None, reduction='mean')
def test_triplet_margin_loss(self):
inp1 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
inp2 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
inp3 = torch.randn(32, 128, device='cuda', dtype=self.dtype, requires_grad=True)
output = F.triplet_margin_loss(inp1, inp2, inp3, margin=1.0, p=2,
eps=1e-06, swap=False, size_average=None, reduce=None, reduction='mean')
def test_pixel_shuffle(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = torch.nn.functional.pixel_shuffle(inp, 2)
def test_pad(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
pad = (3, 3)
output = F.pad(inp, pad, mode='constant', value=0)
def test_interpolate(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
output = F.interpolate(inp, size=None, scale_factor=2, mode='nearest', align_corners=None)
def test_grid_sample(self):
inp = torch.randn(16, 8, 64, 64, device='cuda', dtype=self.dtype)
grid = torch.randn(16, 32, 32, 2, device='cuda', dtype=self.dtype)
output = F.grid_sample(inp, grid, mode='bilinear', padding_mode='zeros')
def test_affine_grid(self):
theta = torch.randn(32, 2, 3, device='cuda', dtype=self.dtype)
size = (32, 8, 32, 32)
output = F.affine_grid(theta, size)
def run_tests(precision):
dummy = TestPyProfNvtx('test_affine_grid', None)
test_cases = list(filter(lambda x: 'test_' in x, map(lambda x: x[0], inspect.getmembers(dummy, predicate=inspect.ismethod))))
print("Running tests for {}".format(precision))
suite = unittest.TestSuite()
for test_case in test_cases:
suite.addTest(TestPyProfNvtx(test_case, precision))
unittest.TextTestRunner().run(suite)
if __name__ == '__main__':
run_tests(torch.float32)
run_tests(torch.float16)
......@@ -20,8 +20,6 @@ TEST_DIRS = [
"run_fp16util",
"run_optimizers",
"run_fused_layer_norm",
"run_pyprof_nvtx",
"run_pyprof_data",
"run_mlp",
"run_transformer",
]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment