Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
701b7341
"...text-generation-inference.git" did not exist on "38cff84a3e1019e760577467299d82112da62de6"
Commit
701b7341
authored
Aug 07, 2019
by
Chao Liu
Browse files
clean up
parent
bc9ea646
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
38 additions
and
199 deletions
+38
-199
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
...n_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
+5
-19
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
.../tensor_operation/blockwise_generic_tensor_slice_copy.hpp
+32
-81
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
...tensor_operation/threadwise_generic_tensor_slice_copy.hpp
+0
-98
driver/src/driver.cpp
driver/src/driver.cpp
+1
-1
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer.hpp
View file @
701b7341
...
@@ -155,13 +155,12 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -155,13 +155,12 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
static_assert
(
in_e_n1_b_n2_block_desc
.
GetStride
(
I1
)
%
GemmDataPerReadB
==
0
,
static_assert
(
in_e_n1_b_n2_block_desc
.
GetStride
(
I1
)
%
GemmDataPerReadB
==
0
,
"GemmDataPerReadB alignment requirement is not satisfied"
);
"GemmDataPerReadB alignment requirement is not satisfied"
);
#if
0
#if
1
// input blockwise copy
// input blockwise copy
// slice a merged tensor, reorder and copy to a normal tensor
// slice a merged tensor, reorder and copy to a normal tensor
// this copy operator already has blockwise offset built-in
// this copy operator already has blockwise offset built-in
auto
blockwise_in_copy
=
auto
blockwise_in_copy
=
BlockwiseGenericTensorSliceCopy_v1
<
BlockSize
,
BlockwiseGenericTensorSliceCopy_v1
<
BlockSize
,
Float,
decltype
(
in_e_n1_b_n2_global_merged_desc
),
decltype
(
in_e_n1_b_n2_global_merged_desc
),
decltype
(
in_e_n1_b_n2_block_desc
),
decltype
(
in_e_n1_b_n2_block_desc
),
decltype
(
in_e_n1_b_n2_block_desc
.
GetLengths
()),
decltype
(
in_e_n1_b_n2_block_desc
.
GetLengths
()),
...
@@ -199,13 +198,12 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -199,13 +198,12 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
Sequence
<
EPerBlock
,
KPerBlock
>
{},
Sequence
<
EPerBlock
,
KPerBlock
>
{},
Number
<
math
::
lcm
(
WeiBlockCopyDstDataPerWrite_K
,
GemmDataPerReadA
)
>
{});
Number
<
math
::
lcm
(
WeiBlockCopyDstDataPerWrite_K
,
GemmDataPerReadA
)
>
{});
#if
0
#if
1
// operator for blockwise copy of weight into LDS
// operator for blockwise copy of weight into LDS
// slice a tensor, and copy it into another tensor
// slice a tensor, and copy it into another tensor
// this copy operator already have blockwise offset built-in
// this copy operator already have blockwise offset built-in
auto
blockwise_wei_copy
=
auto
blockwise_wei_copy
=
BlockwiseGenericTensorSliceCopy_v1
<
BlockSize
,
BlockwiseGenericTensorSliceCopy_v1
<
BlockSize
,
Float,
decltype
(
wei_e_k_global_desc
),
decltype
(
wei_e_k_global_desc
),
decltype
(
wei_e_k_block_desc
),
decltype
(
wei_e_k_block_desc
),
decltype
(
wei_e_k_block_desc
.
GetLengths
()),
decltype
(
wei_e_k_block_desc
.
GetLengths
()),
...
@@ -324,7 +322,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -324,7 +322,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
Float
p_in_register_buffer
[
blockwise_in_copy
.
GetRegisterBufferSize
()];
Float
p_in_register_buffer
[
blockwise_in_copy
.
GetRegisterBufferSize
()];
Float
p_wei_register_buffer
[
blockwise_wei_copy
.
GetRegisterBufferSize
()];
Float
p_wei_register_buffer
[
blockwise_wei_copy
.
GetRegisterBufferSize
()];
#if
0
#if
1
blockwise_in_copy
.
MoveSlicingWindowOnSourceTensor
(
I0
,
Number
<
EPerBlock
>
{},
True
);
blockwise_in_copy
.
MoveSlicingWindowOnSourceTensor
(
I0
,
Number
<
EPerBlock
>
{},
True
);
// blockwise_wei_copy.MoveSlicingWindowOnSourceTensor(I0, Number<EPerBlock>{},
// blockwise_wei_copy.MoveSlicingWindowOnSourceTensor(I0, Number<EPerBlock>{},
// True);
// True);
...
@@ -356,7 +354,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -356,7 +354,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
Float
p_in_register_buffer
[
blockwise_in_copy
.
GetRegisterBufferSize
()];
Float
p_in_register_buffer
[
blockwise_in_copy
.
GetRegisterBufferSize
()];
Float
p_wei_register_buffer
[
blockwise_wei_copy
.
GetRegisterBufferSize
()];
Float
p_wei_register_buffer
[
blockwise_wei_copy
.
GetRegisterBufferSize
()];
#if
0
#if
1
blockwise_in_copy
.
MoveSlicingWindowOnSourceTensor
(
I0
,
Number
<
EPerBlock
>
{},
True
);
blockwise_in_copy
.
MoveSlicingWindowOnSourceTensor
(
I0
,
Number
<
EPerBlock
>
{},
True
);
// blockwise_wei_copy.MoveSlicingWindowOnSourceTensor(I0, Number<EPerBlock>{}, True);
// blockwise_wei_copy.MoveSlicingWindowOnSourceTensor(I0, Number<EPerBlock>{}, True);
p_wei_block_on_global
+=
EPerBlock
*
wei_e_k_global_desc
.
GetStride
(
I0
);
p_wei_block_on_global
+=
EPerBlock
*
wei_e_k_global_desc
.
GetStride
(
I0
);
...
@@ -436,19 +434,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
...
@@ -436,19 +434,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_lds_double_buffer
out_k_n1_b_n2_global_merged_desc
.
GetOffsetFromMultiIndex
(
out_k_n1_b_n2_global_merged_desc
.
GetOffsetFromMultiIndex
(
k_thread_data_on_global
,
0
,
b_thread_data_on_global
,
0
);
k_thread_data_on_global
,
0
,
b_thread_data_on_global
,
0
);
#if 0
#if 1
ThreadwiseGenericTensorSliceCopy_v1r1<
decltype(out_n0_n1_n2_k0_k1_k2_h_w_thread_desc),
decltype(out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc),
decltype(out_n0_n1_n2_k0_k1_k2_h_w_thread_desc.GetLengths()),
arithmetic_sequence_gen<0, 8, 1>::type,
arithmetic_sequence_gen<0, 8, 1>::type,
7,
7,
1,
1>(make_zero_array<index_t, 8>(), make_zero_array<index_t, 8>())
.Run(p_out_thread, p_out_thread_on_global);
#elif
0
ThreadwiseGenericTensorSliceCopy_v1r2
<
ThreadwiseGenericTensorSliceCopy_v1r2
<
decltype
(
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc
),
decltype
(
out_n0_n1_n2_k0_k1_k2_h_w_thread_desc
),
decltype
(
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
),
decltype
(
out_n0_n1_n2_k0_k1_k2_h_w_global_mem_desc
),
...
...
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy.hpp
View file @
701b7341
...
@@ -237,47 +237,23 @@ struct BlockwiseGenericTensorSliceCopy_v1
...
@@ -237,47 +237,23 @@ struct BlockwiseGenericTensorSliceCopy_v1
thread_buffer_desc
.
GetOffsetFromMultiIndex
(
buffer_data_id_begin
);
thread_buffer_desc
.
GetOffsetFromMultiIndex
(
buffer_data_id_begin
);
#endif
#endif
// By position the origin of the per-thread window at the point, where multi-index
// By position the origin of the per-thread window at the point, where multi-index
// of the SrcDesc (might be a merged tensor) is all-zero. This threadwise slice copy
// of the SrcDesc (might be a merged tensor) is all-zero. This threadwise slice copy
// is assuming each thread is copy a noraml (not merged) tensor.
// is assuming each thread is copy a noraml (not merged) tensor.
// To satisfy this assumption, the user need to make sure that, on a merged dimension
// To satisfy this assumption, the user need to make sure that, on a merged dimension
// that constains multiple original dimensions, the length of the last original
// that constains multiple original dimensions, the length of the last original
// dimension need to be evenly dividable by its sub-lengths. Also, the repeat-length on
// dimension need to be evenly dividable by its sub-lengths. Also, the repeat-length on
// the merged dimension need to be 1. These sanity checks are performed in constructor
// the merged dimension need to be 1. These sanity checks are performed in constructor
// of BlockwiseGenericTensorSliceCopy_v1
// of BlockwiseGenericTensorSliceCopy_v1
#if 0
ThreadwiseGenericTensorSliceCopy_v1r2
<
SrcDesc
,
threadwise_generic_tensor_slice_copy_v1(SrcDesc{},
decltype
(
thread_buffer_desc
),
p_src + src_offset + mThreadSrcOffset,
SubLengths
,
make_zero_array<index_t, nDim>(),
SrcDimAccessOrder
,
thread_buffer_desc,
SrcVectorAccessDim
,
p_buffer + buffer_offset,
SrcDataPerAccess
,
make_zero_array<index_t, nDim>(),
1
>
(
make_zero_array
<
index_t
,
nDim
>
(),
thread_sub_tensor_lengths,
make_zero_array
<
index_t
,
nDim
>
())
SrcDimAccessOrder{},
Number<SrcDataPerAccess>{});
#elif
1
ThreadwiseGenericTensorSliceCopy_v1r1
<
SrcDesc
,
decltype
(
thread_buffer_desc
),
SubLengths
,
SrcDimAccessOrder
,
typename
arithmetic_sequence_gen
<
0
,
nDim
,
1
>::
type
,
SrcVectorAccessDim
,
0
,
SrcDataPerAccess
,
1
>
(
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
())
.
Run
(
p_src
+
src_offset
+
mThreadSrcOffset
,
p_buffer
+
buffer_offset
);
.
Run
(
p_src
+
src_offset
+
mThreadSrcOffset
,
p_buffer
+
buffer_offset
);
#elif 1
ThreadwiseGenericTensorSliceCopy_v1r2
<
SrcDesc
,
decltype
(
thread_buffer_desc
),
SubLengths
,
SrcDimAccessOrder
,
SrcVectorAccessDim
,
SrcDataPerAccess
,
1
>
(
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
())
.
Run
(
p_src
+
src_offset
+
mThreadSrcOffset
,
p_buffer
+
buffer_offset
);
#endif
});
});
}
}
...
@@ -316,48 +292,23 @@ struct BlockwiseGenericTensorSliceCopy_v1
...
@@ -316,48 +292,23 @@ struct BlockwiseGenericTensorSliceCopy_v1
const
index_t
dst_offset
=
DstDesc
::
GetOffsetFromMultiIndex
(
dst_data_id_begin
);
const
index_t
dst_offset
=
DstDesc
::
GetOffsetFromMultiIndex
(
dst_data_id_begin
);
#endif
#endif
// By position the origin of the per-thread window at the point, where multi-index
// By position the origin of the per-thread window at the point, where multi-index
// of the SrcDesc (might be a merged tensor) is all-zero. This threadwise slice copy
// of the SrcDesc (might be a merged tensor) is all-zero. This threadwise slice copy
// is assuming each thread is copy a noraml (not merged) tensor.
// is assuming each thread is copy a noraml (not merged) tensor.
// To satisfy this assumption, the user need to make sure that, on a merged dimension
// To satisfy this assumption, the user need to make sure that, on a merged dimension
// that constains multiple original dimensions, the length of the last original
// that constains multiple original dimensions, the length of the last original
// dimension need to be evenly dividable by its sub-lengths. Also, the repeat-length on
// dimension need to be evenly dividable by its sub-lengths. Also, the repeat-length on
// the merged dimension need to be 1. These sanity checks are performed in constructor
// the merged dimension need to be 1. These sanity checks are performed in constructor
// of BlockwiseGenericTensorSliceCopy_v1
// of BlockwiseGenericTensorSliceCopy_v1
#if 0
ThreadwiseGenericTensorSliceCopy_v1r2
<
decltype
(
thread_buffer_desc
),
threadwise_generic_tensor_slice_copy_v1(thread_buffer_desc,
DstDesc
,
p_buffer + buffer_offset,
SubLengths
,
make_zero_array<index_t, nDim>(),
DstDimAccessOrder
,
DstDesc{},
DstVectorAccessDim
,
p_dst + dst_offset + mThreadDstOffset,
1
,
make_zero_array<index_t, nDim>(),
DstDataPerAccess
>
(
thread_sub_tensor_lengths,
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
())
DstDimAccessOrder{},
Number<DstDataPerAccess>{});
#elif
1
ThreadwiseGenericTensorSliceCopy_v1r1
<
decltype
(
thread_buffer_desc
),
DstDesc
,
SubLengths
,
typename
arithmetic_sequence_gen
<
0
,
nDim
,
1
>::
type
,
DstDimAccessOrder
,
0
,
DstVectorAccessDim
,
1
,
DstDataPerAccess
>
(
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
())
.
Run
(
p_buffer
+
buffer_offset
,
p_dst
+
dst_offset
+
mThreadDstOffset
);
.
Run
(
p_buffer
+
buffer_offset
,
p_dst
+
dst_offset
+
mThreadDstOffset
);
#elif 1
ThreadwiseGenericTensorSliceCopy_v1r2
<
decltype
(
thread_buffer_desc
),
DstDesc
,
SubLengths
,
DstDimAccessOrder
,
DstVectorAccessDim
,
1
,
DstDataPerAccess
>
(
make_zero_array
<
index_t
,
nDim
>
(),
make_zero_array
<
index_t
,
nDim
>
())
.
Run
(
p_buffer
+
buffer_offset
,
p_dst
+
dst_offset
+
mThreadDstOffset
);
#endif
});
});
}
}
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy.hpp
View file @
701b7341
...
@@ -6,10 +6,6 @@
...
@@ -6,10 +6,6 @@
#include "ConstantMergedTensorDescriptor.hpp"
#include "ConstantMergedTensorDescriptor.hpp"
#include "tensor_coordinate.hpp"
#include "tensor_coordinate.hpp"
#ifndef CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1
#define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1 0
#endif
#ifndef CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1R1
#ifndef CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1R1
#define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1R1 0
#define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1R1 0
#endif
#endif
...
@@ -24,100 +20,6 @@
...
@@ -24,100 +20,6 @@
namespace
ck
{
namespace
ck
{
// user need to make sure alignment requirement is satisfied when setting DataPerAccesss > 1
template
<
class
Float
,
class
SrcDesc
,
class
DstDesc
,
class
SliceLengths
,
class
DimAccessOrder
,
index_t
DataPerAccess
>
__device__
void
threadwise_generic_tensor_slice_copy_v1
(
SrcDesc
,
const
Float
*
__restrict__
p_src
,
Array
<
index_t
,
SrcDesc
::
GetNumOfDimension
()
>
src_multi_id_begin
,
DstDesc
,
Float
*
__restrict__
p_dst
,
Array
<
index_t
,
DstDesc
::
GetNumOfDimension
()
>
dst_multi_id_begin
,
SliceLengths
,
DimAccessOrder
,
Number
<
DataPerAccess
>
)
{
constexpr
index_t
nDim
=
SrcDesc
::
GetNumOfDimension
();
static_assert
(
nDim
==
SrcDesc
::
GetNumOfDimension
()
&&
nDim
==
DstDesc
::
GetNumOfDimension
()
&&
nDim
==
SliceLengths
::
GetSize
()
&&
nDim
==
DimAccessOrder
::
GetSize
(),
"wrong! # of dimensions not the same"
);
static_assert
(
is_valid_sequence_map
<
DimAccessOrder
>::
value
,
"wrong! map is not valid"
);
// TODO: do more sanity-check here, something like:
// constexpr auto src_strides_in_access_order =
// SrcDesc::ReorderGivenNew2Old(DimAccessOrder{}).GetStride(Number<nDim-1>{});
// constexpr auto dst_strides_in_access_order =
// SrcDesc::ReorderGivenNew2Old(DimAccessOrder{}).GetStride(Number<nDim-1>{});
// // check src/dst stride on the lowest access dimension
// static_assert((DataPerAccess == 1 || src_strides_in_access_order.Back() == 1) &&
// (DataPerAccess == 1 || dst_strides_in_access_order.Back() == 1),
// "wrong! src/dst stride on the lowest access dimension needs to be 1 for "
// "vectorized read/write");
constexpr
auto
slice_lengths_in_access_order
=
SliceLengths
::
ReorderGivenNew2Old
(
DimAccessOrder
{});
// check slice length on the lowest access dimension
static_assert
(
slice_lengths_in_access_order
.
Back
()
%
DataPerAccess
==
0
,
"wrong! slice length on the lowest access dimension should be evenly divided by "
"DataPerAccess"
);
constexpr
index_t
num_access_on_lowest_access_dimension
=
slice_lengths_in_access_order
.
Back
()
/
DataPerAccess
;
constexpr
auto
access_lengths
=
slice_lengths_in_access_order
.
Modify
(
Number
<
nDim
-
1
>
{},
Number
<
num_access_on_lowest_access_dimension
>
{});
using
vector_t
=
typename
vector_type
<
Float
,
DataPerAccess
>::
MemoryType
;
#if CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1
static_ford
<
decltype
(
access_lengths
)
>
{}([
&
](
auto
access_multi_id
)
{
constexpr
index_t
itmp
=
access_multi_id
.
Back
()
*
DataPerAccess
;
constexpr
auto
data_multi_id_in_access_order
=
access_multi_id
.
Modify
(
Number
<
nDim
-
1
>
{},
Number
<
itmp
>
{});
constexpr
auto
data_multi_id
=
data_multi_id_in_access_order
.
ReorderGivenOld2New
(
DimAccessOrder
{});
const
index_t
src_index
=
SrcDesc
::
GetOffsetFromMultiIndex
(
src_multi_id_begin
+
data_multi_id
);
const
index_t
dst_index
=
DstDesc
::
GetOffsetFromMultiIndex
(
dst_multi_id_begin
+
data_multi_id
);
*
reinterpret_cast
<
vector_t
*>
(
&
p_dst
[
dst_index
])
=
*
reinterpret_cast
<
const
vector_t
*>
(
&
p_src
[
src_index
]);
});
#else
ford
<
decltype
(
access_lengths
)
>
{}([
&
](
auto
access_multi_id
)
{
auto
data_multi_id_in_access_order
=
access_multi_id
;
data_multi_id_in_access_order
(
nDim
-
1
)
=
access_multi_id
[
nDim
-
1
]
*
DataPerAccess
;
const
auto
data_multi_id
=
reorder_array_given_old2new
(
data_multi_id_in_access_order
,
DimAccessOrder
{});
const
index_t
src_index
=
SrcDesc
::
GetOffsetFromMultiIndex
(
src_multi_id_begin
+
data_multi_id
);
const
index_t
dst_index
=
DstDesc
::
GetOffsetFromMultiIndex
(
dst_multi_id_begin
+
data_multi_id
);
*
reinterpret_cast
<
vector_t
*>
(
&
p_dst
[
dst_index
])
=
*
reinterpret_cast
<
const
vector_t
*>
(
&
p_src
[
src_index
]);
});
#endif
}
// This threadwise copy allow vector access of src and dst.
// This threadwise copy allow vector access of src and dst.
// It allows the dimensions of vector access to be different on src and dst.
// It allows the dimensions of vector access to be different on src and dst.
// It also allows the vector size to be different on src and dst.
// It also allows the vector size to be different on src and dst.
...
...
driver/src/driver.cpp
View file @
701b7341
...
@@ -379,7 +379,7 @@ int main(int argc, char* argv[])
...
@@ -379,7 +379,7 @@ int main(int argc, char* argv[])
#elif 0
#elif 0
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw
(
device_convolution_implicit_gemm_v3_nchw_cyxk_nkhw
(
(
in_nchw_desc
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx
,
out_nkhw_desc
,
out_nkhw_device
,
nrepeat
);
(
in_nchw_desc
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx
,
out_nkhw_desc
,
out_nkhw_device
,
nrepeat
);
#elif
0
#elif
1
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
device_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw
(
in_nchw_desc
,
in_nchw
,
in_nchw
,
wei_kcyx_desc
,
wei_kcyx_desc
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment