Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
0221ee14
Commit
0221ee14
authored
Sep 15, 2017
by
Peter Eastman
Browse files
Beginnings of volta support
parent
1ef93e47
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
13 additions
and
11 deletions
+13
-11
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+8
-6
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+5
-5
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
0221ee14
...
...
@@ -236,12 +236,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor
=
3
;
}
}
if
(
major
==
7
)
{
// Don't generate Volta-specific code until we've made the changes needed
// to support it properly.
major
=
6
;
minor
=
0
;
}
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
computeCapability
=
major
+
0.1
*
minor
;
...
...
@@ -263,6 +257,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
if
(
cudaDriverVersion
>=
9000
)
{
compilationDefines
[
"SYNC_WARPS"
]
=
"__syncwarp();"
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl_sync(0xffffffff, var, srcLane);"
;
}
else
{
compilationDefines
[
"SYNC_WARPS"
]
=
""
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl(var, srcLane);"
;
}
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
0221ee14
...
...
@@ -15,22 +15,22 @@ typedef struct {
#ifdef ENABLE_SHUFFLE
//support for 64 bit shuffles
static
__inline__
__device__
float
real_shfl
(
float
var
,
int
srcLane
)
{
return
__shfl
(
var
,
srcLane
);
return
SHFL
(
var
,
srcLane
);
}
static
__inline__
__device__
double
real_shfl
(
double
var
,
int
srcLane
)
{
int
hi
,
lo
;
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"d"
(
var
));
hi
=
__shfl
(
hi
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
hi
=
SHFL
(
hi
,
srcLane
);
lo
=
SHFL
(
lo
,
srcLane
);
return
__hiloint2double
(
hi
,
lo
);
}
static
__inline__
__device__
long
long
real_shfl
(
long
long
var
,
int
srcLane
)
{
int
hi
,
lo
;
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"l"
(
var
));
hi
=
__shfl
(
hi
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
hi
=
SHFL
(
hi
,
srcLane
);
lo
=
SHFL
(
lo
,
srcLane
);
// unforunately there isn't an __nv_hiloint2long(hi,lo) intrinsic cast
int2
fuse
;
fuse
.
x
=
lo
;
fuse
.
y
=
hi
;
return
*
reinterpret_cast
<
long
long
*>
(
&
fuse
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment