Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
f7b61333
Commit
f7b61333
authored
Aug 14, 2023
by
Michael Yang
Browse files
update llama.cpp
parent
2ab20095
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
115 additions
and
58 deletions
+115
-58
llm/ggml-alloc.c
llm/ggml-alloc.c
+1
-1
llm/ggml-alloc.h
llm/ggml-alloc.h
+1
-1
llm/ggml-cuda.cu
llm/ggml-cuda.cu
+69
-27
llm/ggml-cuda.h
llm/ggml-cuda.h
+1
-1
llm/ggml-metal.h
llm/ggml-metal.h
+1
-1
llm/ggml-metal.m
llm/ggml-metal.m
+4
-4
llm/ggml-metal.metal
llm/ggml-metal.metal
+1
-1
llm/ggml-mpi.c
llm/ggml-mpi.c
+1
-1
llm/ggml-mpi.h
llm/ggml-mpi.h
+1
-1
llm/ggml-opencl.cpp
llm/ggml-opencl.cpp
+1
-1
llm/ggml-opencl.h
llm/ggml-opencl.h
+1
-1
llm/ggml.c
llm/ggml.c
+1
-1
llm/ggml.h
llm/ggml.h
+1
-1
llm/k_quants.c
llm/k_quants.c
+1
-1
llm/k_quants.h
llm/k_quants.h
+1
-1
llm/llama-util.h
llm/llama-util.h
+21
-12
llm/llama.cpp
llm/llama.cpp
+7
-1
llm/llama.h
llm/llama.h
+1
-1
No files found.
llm/ggml-alloc.c
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-alloc.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-cuda.cu
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
@@ -1779,7 +1779,6 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1_impl_vmmq(
}
// contiguous u/y values
// also used for q5_K
static
__device__
__forceinline__
float
vec_dot_q4_K_q8_1_impl_mmq
(
const
int
*
__restrict__
v
,
const
int
*
__restrict__
u
,
const
uint8_t
*
__restrict__
sc
,
const
uint8_t
*
__restrict__
m
,
const
half2
&
dm4
,
const
half2
*
__restrict__
ds8
)
{
...
...
@@ -1789,19 +1788,18 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1_impl_mmq(
float
sumf_m
=
0.0
f
;
#pragma unroll
for
(
int
i
0
=
0
;
i
0
<
VDR_Q4_K_Q8_1_MMQ
;
i0
+=
(
QI8_1
/
QR4_K
)
)
{
for
(
int
i
=
0
;
i
<
QR4_K
*
VDR_Q4_K_Q8_1_MMQ
/
QI8_1
;
++
i
)
{
int
sumi_d
=
0
;
#pragma unroll
for
(
int
i
=
i0
;
i
<
i0
+
(
QI8_1
/
QR4_K
);
++
i
)
{
sumi_d
=
__dp4a
(
v
[
2
*
i
+
0
],
u
[
2
*
i
+
0
],
sumi_d
);
// SIMD dot product
sumi_d
=
__dp4a
(
v
[
2
*
i
+
1
],
u
[
2
*
i
+
1
],
sumi_d
);
// SIMD dot product
for
(
int
j
=
0
;
j
<
QI8_1
;
++
j
)
{
sumi_d
=
__dp4a
((
v
[
j
]
>>
(
4
*
i
))
&
0x0F0F0F0F
,
u
[
i
*
QI8_1
+
j
],
sumi_d
);
// SIMD dot product
}
const
float2
ds8f
=
__half22float2
(
ds8
[
i
0
/
4
]);
const
float2
ds8f
=
__half22float2
(
ds8
[
i
]);
sumf_d
+=
ds8f
.
x
*
(
sc
[
i
0
/
4
]
*
sumi_d
);
sumf_m
+=
ds8f
.
y
*
m
[
i
0
/
4
];
// sum of q8_1 block * q4_K min val
sumf_d
+=
ds8f
.
x
*
(
sc
[
i
]
*
sumi_d
);
sumf_m
+=
ds8f
.
y
*
m
[
i
];
// sum of q8_1 block * q4_K min val
}
const
float2
dm4f
=
__half22float2
(
dm4
);
...
...
@@ -1818,7 +1816,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1_impl_mmq(
#define VDR_Q5_K_Q8_1_MMQ 8
// contiguous v/x values
static
__device__
__forceinline__
float
vec_dot_q5_K_q8_1_impl
(
static
__device__
__forceinline__
float
vec_dot_q5_K_q8_1_impl
_vmmq
(
const
int
*
__restrict__
vl
,
const
int
*
__restrict__
vh
,
const
int
*
__restrict__
u
,
const
uint8_t
*
__restrict__
sc
,
const
uint8_t
*
__restrict__
m
,
const
half2
&
dm5
,
const
float
*
__restrict__
d8
)
{
...
...
@@ -1855,6 +1853,40 @@ static __device__ __forceinline__ float vec_dot_q5_K_q8_1_impl(
#endif // __CUDA_ARCH__ >= MIN_CC_DP4A
}
// contiguous u/y values
static
__device__
__forceinline__
float
vec_dot_q5_K_q8_1_impl_mmq
(
const
int
*
__restrict__
v
,
const
int
*
__restrict__
u
,
const
uint8_t
*
__restrict__
sc
,
const
uint8_t
*
__restrict__
m
,
const
half2
&
dm4
,
const
half2
*
__restrict__
ds8
)
{
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
float
sumf_d
=
0.0
f
;
float
sumf_m
=
0.0
f
;
#pragma unroll
for
(
int
i
=
0
;
i
<
QR5_K
*
VDR_Q5_K_Q8_1_MMQ
/
QI8_1
;
++
i
)
{
int
sumi_d
=
0
;
#pragma unroll
for
(
int
j
=
0
;
j
<
QI8_1
;
++
j
)
{
sumi_d
=
__dp4a
(
v
[
i
*
QI8_1
+
j
],
u
[
i
*
QI8_1
+
j
],
sumi_d
);
// SIMD dot product
}
const
float2
ds8f
=
__half22float2
(
ds8
[
i
]);
sumf_d
+=
ds8f
.
x
*
(
sc
[
i
]
*
sumi_d
);
sumf_m
+=
ds8f
.
y
*
m
[
i
];
// sum of q8_1 block * q4_K min val
}
const
float2
dm4f
=
__half22float2
(
dm4
);
return
dm4f
.
x
*
sumf_d
-
dm4f
.
y
*
sumf_m
;
#else
assert
(
false
);
return
0.0
f
;
// only to satisfy the compiler
#endif // __CUDA_ARCH__ >= MIN_CC_DP4A
}
#define VDR_Q6_K_Q8_1_MMVQ 1
#define VDR_Q6_K_Q8_1_MMQ 8
...
...
@@ -2850,18 +2882,11 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1_mul_mat(
const
int
*
__restrict__
x_ql
,
const
half2
*
__restrict__
x_dm
,
const
int
*
__restrict__
x_qh
,
const
int
*
__restrict__
x_sc
,
const
int
*
__restrict__
y_qs
,
const
half2
*
__restrict__
y_ds
,
const
int
&
i
,
const
int
&
j
,
const
int
&
k
)
{
int
v
[
QR4_K
*
VDR_Q4_K_Q8_1_MMQ
];
#pragma unroll
for
(
int
l
=
0
;
l
<
VDR_Q4_K_Q8_1_MMQ
;
++
l
)
{
v
[
l
+
0
]
=
(
x_ql
[
i
*
(
WARP_SIZE
+
1
)
+
k
+
l
]
>>
0
)
&
0x0F0F0F0F
;
v
[
l
+
(
QI4_K
/
4
)]
=
(
x_ql
[
i
*
(
WARP_SIZE
+
1
)
+
k
+
l
]
>>
4
)
&
0x0F0F0F0F
;
}
const
uint8_t
*
sc
=
((
const
uint8_t
*
)
&
x_sc
[
i
*
(
WARP_SIZE
/
8
)
+
i
/
8
+
k
/
16
])
+
2
*
((
k
%
16
)
/
8
);
const
int
index_y
=
j
*
WARP_SIZE
+
(
QR4_K
*
k
)
%
WARP_SIZE
;
return
vec_dot_q4_K_q8_1_impl_mmq
(
v
,
&
y_qs
[
index_y
],
sc
,
sc
+
8
,
x_dm
[
i
*
(
WARP_SIZE
/
QI4_K
)
+
i
/
QI4_K
],
&
y_ds
[
index_y
/
QI8_1
]);
return
vec_dot_q4_K_q8_1_impl_mmq
(
&
x_ql
[
i
*
(
WARP_SIZE
+
1
)
+
k
],
&
y_qs
[
index_y
],
sc
,
sc
+
8
,
x_dm
[
i
*
(
WARP_SIZE
/
QI4_K
)
+
i
/
QI4_K
],
&
y_ds
[
index_y
/
QI8_1
]);
}
static
__device__
__forceinline__
float
vec_dot_q5_K_q8_1
(
...
...
@@ -2908,7 +2933,7 @@ static __device__ __forceinline__ float vec_dot_q5_K_q8_1(
u
[
2
*
i
+
1
]
=
q8
[
4
];
}
return
vec_dot_q5_K_q8_1_impl
(
vl
,
vh
,
u
,
sc
,
m
,
bq5_K
->
dm
,
d8
);
return
vec_dot_q5_K_q8_1_impl
_vmmq
(
vl
,
vh
,
u
,
sc
,
m
,
bq5_K
->
dm
,
d8
);
#else
...
...
@@ -3051,7 +3076,8 @@ static __device__ __forceinline__ float vec_dot_q5_K_q8_1_mul_mat(
const
int
index_x
=
i
*
(
QR5_K
*
WARP_SIZE
+
1
)
+
QR5_K
*
k
;
const
int
index_y
=
j
*
WARP_SIZE
+
(
QR5_K
*
k
)
%
WARP_SIZE
;
return
vec_dot_q4_K_q8_1_impl_mmq
(
&
x_ql
[
index_x
],
&
y_qs
[
index_y
],
sc
,
sc
+
8
,
x_dm
[
i
*
(
WARP_SIZE
/
QI5_K
)
+
i
/
QI5_K
],
&
y_ds
[
index_y
/
QI8_1
]);
return
vec_dot_q5_K_q8_1_impl_mmq
(
&
x_ql
[
index_x
],
&
y_qs
[
index_y
],
sc
,
sc
+
8
,
x_dm
[
i
*
(
WARP_SIZE
/
QI5_K
)
+
i
/
QI5_K
],
&
y_ds
[
index_y
/
QI8_1
]);
}
static
__device__
__forceinline__
float
vec_dot_q6_K_q8_1
(
...
...
@@ -3327,7 +3353,11 @@ template <bool need_check> static __global__ void mul_mat_q4_0(
#define MMQ_Y_Q4_1_PASCAL 64
#define NWARPS_Q4_1_PASCAL 8
template
<
bool
need_check
>
static
__global__
void
mul_mat_q4_1
(
template
<
bool
need_check
>
static
__global__
void
#if __CUDA_ARCH__ < CC_TURING
__launch_bounds__
(
WARP_SIZE
*
NWARPS_Q4_1_PASCAL
,
2
)
#endif // __CUDA_ARCH__ < CC_TURING
mul_mat_q4_1
(
const
void
*
__restrict__
vx
,
const
void
*
__restrict__
vy
,
float
*
__restrict__
dst
,
const
int
ncols_x
,
const
int
nrows_x
,
const
int
ncols_y
,
const
int
nrows_y
,
const
int
nrows_dst
)
{
...
...
@@ -3497,7 +3527,11 @@ template <bool need_check> static __global__ void mul_mat_q2_K(
#define MMQ_Y_Q3_K_PASCAL 64
#define NWARPS_Q3_K_PASCAL 8
template
<
bool
need_check
>
static
__global__
void
mul_mat_q3_K
(
template
<
bool
need_check
>
static
__global__
void
#if __CUDA_ARCH__ < CC_TURING
__launch_bounds__
(
WARP_SIZE
*
NWARPS_Q3_K_PASCAL
,
2
)
#endif // __CUDA_ARCH__ < CC_TURING
mul_mat_q3_K
(
const
void
*
__restrict__
vx
,
const
void
*
__restrict__
vy
,
float
*
__restrict__
dst
,
const
int
ncols_x
,
const
int
nrows_x
,
const
int
ncols_y
,
const
int
nrows_y
,
const
int
nrows_dst
)
{
...
...
@@ -3527,11 +3561,15 @@ template <bool need_check> static __global__ void mul_mat_q3_K(
#define MMQ_X_Q4_K_AMPERE 64
#define MMQ_Y_Q4_K_AMPERE 128
#define NWARPS_Q4_K_AMPERE 4
#define MMQ_X_Q4_K_PASCAL
32
#define MMQ_X_Q4_K_PASCAL
64
#define MMQ_Y_Q4_K_PASCAL 64
#define NWARPS_Q4_K_PASCAL 8
template
<
bool
need_check
>
static
__global__
void
mul_mat_q4_K
(
template
<
bool
need_check
>
static
__global__
void
#if __CUDA_ARCH__ < CC_TURING
__launch_bounds__
(
WARP_SIZE
*
NWARPS_Q4_K_PASCAL
,
2
)
#endif // __CUDA_ARCH__ < CC_TURING
mul_mat_q4_K
(
const
void
*
__restrict__
vx
,
const
void
*
__restrict__
vy
,
float
*
__restrict__
dst
,
const
int
ncols_x
,
const
int
nrows_x
,
const
int
ncols_y
,
const
int
nrows_y
,
const
int
nrows_dst
)
{
...
...
@@ -3595,11 +3633,15 @@ template <bool need_check> static __global__ void mul_mat_q5_K(
#define MMQ_X_Q6_K_AMPERE 64
#define MMQ_Y_Q6_K_AMPERE 64
#define NWARPS_Q6_K_AMPERE 4
#define MMQ_X_Q6_K_PASCAL
32
#define MMQ_X_Q6_K_PASCAL
64
#define MMQ_Y_Q6_K_PASCAL 64
#define NWARPS_Q6_K_PASCAL 8
template
<
bool
need_check
>
static
__global__
void
mul_mat_q6_K
(
template
<
bool
need_check
>
static
__global__
void
#if __CUDA_ARCH__ < CC_TURING
__launch_bounds__
(
WARP_SIZE
*
NWARPS_Q6_K_PASCAL
,
2
)
#endif // __CUDA_ARCH__ < CC_TURING
mul_mat_q6_K
(
const
void
*
__restrict__
vx
,
const
void
*
__restrict__
vy
,
float
*
__restrict__
dst
,
const
int
ncols_x
,
const
int
nrows_x
,
const
int
ncols_y
,
const
int
nrows_y
,
const
int
nrows_dst
)
{
...
...
llm/ggml-cuda.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-metal.h
View file @
f7b61333
//go:build darwin
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-metal.m
View file @
f7b61333
//go:build darwin
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
@@ -154,7 +154,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error];
if (error) {
fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]);
exit(1)
;
return NULL
;
}
}
#else
...
...
@@ -172,7 +172,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
NSString
*
src
=
[
NSString
stringWithContentsOfFile
:
path
encoding
:
NSUTF8StringEncoding
error
:&
error
];
if
(
error
)
{
fprintf
(
stderr
,
"%s: error: %s
\n
"
,
__func__
,
[[
error
description
]
UTF8String
]);
exit
(
1
)
;
return
NULL
;
}
#ifdef GGML_QKK_64
...
...
@@ -184,7 +184,7 @@ struct ggml_metal_context * ggml_metal_init(int n_cb) {
#endif
if
(
error
)
{
fprintf
(
stderr
,
"%s: error: %s
\n
"
,
__func__
,
[[
error
description
]
UTF8String
]);
exit
(
1
)
;
return
NULL
;
}
}
#endif
...
...
llm/ggml-metal.metal
View file @
f7b61333
//go:build darwin
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-mpi.c
View file @
f7b61333
//go:build mpi
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-mpi.h
View file @
f7b61333
//go:build mpi
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-opencl.cpp
View file @
f7b61333
//go:build opencl
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml-opencl.h
View file @
f7b61333
//go:build opencl
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml.c
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/ggml.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/k_quants.c
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/k_quants.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
llm/llama-util.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
@@ -297,20 +297,29 @@ struct llama_mmap {
throw
std
::
runtime_error
(
format
(
"MapViewOfFile failed: %s"
,
llama_format_win_err
(
error
).
c_str
()));
}
#if _WIN32_WINNT >= _WIN32_WINNT_WIN8
if
(
prefetch
)
{
// Advise the kernel to preload the mapped memory
WIN32_MEMORY_RANGE_ENTRY
range
;
range
.
VirtualAddress
=
addr
;
range
.
NumberOfBytes
=
(
SIZE_T
)
size
;
if
(
!
PrefetchVirtualMemory
(
GetCurrentProcess
(),
1
,
&
range
,
0
))
{
fprintf
(
stderr
,
"warning: PrefetchVirtualMemory failed: %s
\n
"
,
llama_format_win_err
(
GetLastError
()).
c_str
());
// The PrefetchVirtualMemory API is only present on Windows 8 and above, so we
// will dynamically load it using GetProcAddress.
BOOL
(
WINAPI
*
pPrefetchVirtualMemory
)
(
HANDLE
,
ULONG_PTR
,
PWIN32_MEMORY_RANGE_ENTRY
,
ULONG
);
HMODULE
hKernel32
;
// This call is guaranteed to succeed.
hKernel32
=
GetModuleHandleW
(
L"kernel32.dll"
);
// This call may fail if on a pre-Win8 system.
pPrefetchVirtualMemory
=
reinterpret_cast
<
decltype
(
pPrefetchVirtualMemory
)
>
(
GetProcAddress
(
hKernel32
,
"PrefetchVirtualMemory"
));
if
(
pPrefetchVirtualMemory
)
{
// Advise the kernel to preload the mapped memory.
WIN32_MEMORY_RANGE_ENTRY
range
;
range
.
VirtualAddress
=
addr
;
range
.
NumberOfBytes
=
(
SIZE_T
)
size
;
if
(
!
pPrefetchVirtualMemory
(
GetCurrentProcess
(),
1
,
&
range
,
0
))
{
fprintf
(
stderr
,
"warning: PrefetchVirtualMemory failed: %s
\n
"
,
llama_format_win_err
(
GetLastError
()).
c_str
());
}
}
}
#else
#pragma message("warning: You are building for pre-Windows 8; prefetch not supported")
#endif // _WIN32_WINNT >= _WIN32_WINNT_WIN8
}
~
llama_mmap
()
{
...
...
llm/llama.cpp
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
@@ -3363,6 +3363,12 @@ struct llama_context * llama_new_context_with_model(
// this allocates all Metal resources and memory buffers
ctx
->
ctx_metal
=
ggml_metal_init
(
1
);
if
(
!
ctx
->
ctx_metal
)
{
LLAMA_LOG_ERROR
(
"%s: ggml_metal_init() failed
\n
"
,
__func__
);
llama_free
(
ctx
);
return
NULL
;
}
void
*
data_ptr
=
NULL
;
size_t
data_size
=
0
;
...
...
llm/llama.h
View file @
f7b61333
/**
* llama.cpp - git
f64d44a9b9581cd58f7ec40f4fa1c3ca5ca18e1e
* llama.cpp - git
3ebb00935f3f0522b75df49c2769ab1774b91380
*
* MIT License
*
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment