Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f3e727df
Commit
f3e727df
authored
Sep 25, 2017
by
peastman
Committed by
GitHub
Sep 25, 2017
Browse files
Merge pull request #1906 from peastman/volta
Support for Volta
parents
74ad9a16
e28c6947
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
20 additions
and
16 deletions
+20
-16
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+10
-6
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+1
-1
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+4
-4
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+5
-5
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
f3e727df
...
@@ -236,12 +236,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -236,12 +236,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
minor
=
3
;
minor
=
3
;
}
}
}
}
if
(
major
==
7
)
{
// Don't generate Volta-specific code until we've made the changes needed
// to support it properly.
major
=
6
;
minor
=
0
;
}
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
computeCapability
=
major
+
0.1
*
minor
;
computeCapability
=
major
+
0.1
*
minor
;
...
@@ -263,6 +257,16 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -263,6 +257,16 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int
multiprocessors
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
if
(
cudaDriverVersion
>=
9000
)
{
compilationDefines
[
"SYNC_WARPS"
]
=
"__syncwarp();"
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl_sync(0xffffffff, var, srcLane);"
;
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot_sync(0xffffffff, var);"
;
}
else
{
compilationDefines
[
"SYNC_WARPS"
]
=
""
;
compilationDefines
[
"SHFL(var, srcLane)"
]
=
"__shfl(var, srcLane);"
;
compilationDefines
[
"BALLOT(var)"
]
=
"__ballot(var);"
;
}
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
posq
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
posq
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
velm
=
CudaArray
::
create
<
double4
>
(
*
this
,
paddedNumAtoms
,
"velm"
);
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
f3e727df
...
@@ -225,7 +225,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
...
@@ -225,7 +225,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
// Loop over any blocks we identified as potentially containing neighbors.
// Loop over any blocks we identified as potentially containing neighbors.
int
includeBlockFlags
=
__ballot
(
includeBlock2
);
int
includeBlockFlags
=
BALLOT
(
includeBlock2
);
while
(
includeBlockFlags
!=
0
)
{
while
(
includeBlockFlags
!=
0
)
{
int
i
=
__ffs
(
includeBlockFlags
)
-
1
;
int
i
=
__ffs
(
includeBlockFlags
)
-
1
;
includeBlockFlags
&=
includeBlockFlags
-
1
;
includeBlockFlags
&=
includeBlockFlags
-
1
;
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
f3e727df
...
@@ -115,7 +115,7 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign
...
@@ -115,7 +115,7 @@ __device__ int saveSinglePairs(int x, int* atoms, int* flags, int length, unsign
int
atom
=
atoms
[
i
];
int
atom
=
atoms
[
i
];
int
flag
=
flags
[
i
];
int
flag
=
flags
[
i
];
bool
include
=
(
i
<
length
&&
__popc
(
flags
[
i
])
>
MAX_BITS_FOR_PAIRS
);
bool
include
=
(
i
<
length
&&
__popc
(
flags
[
i
])
>
MAX_BITS_FOR_PAIRS
);
int
includeFlags
=
__ballot
(
include
);
int
includeFlags
=
BALLOT
(
include
);
if
(
include
)
{
if
(
include
)
{
int
index
=
numCompacted
+
__popc
(
includeFlags
&
warpMask
);
int
index
=
numCompacted
+
__popc
(
includeFlags
&
warpMask
);
atoms
[
index
]
=
atom
;
atoms
[
index
]
=
atom
;
...
@@ -271,7 +271,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
...
@@ -271,7 +271,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
// Loop over any blocks we identified as potentially containing neighbors.
// Loop over any blocks we identified as potentially containing neighbors.
int
includeBlockFlags
=
__ballot
(
includeBlock2
);
int
includeBlockFlags
=
BALLOT
(
includeBlock2
);
while
(
includeBlockFlags
!=
0
)
{
while
(
includeBlockFlags
!=
0
)
{
int
i
=
__ffs
(
includeBlockFlags
)
-
1
;
int
i
=
__ffs
(
includeBlockFlags
)
-
1
;
includeBlockFlags
&=
includeBlockFlags
-
1
;
includeBlockFlags
&=
includeBlockFlags
-
1
;
...
@@ -291,7 +291,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
...
@@ -291,7 +291,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
atomDelta
)
APPLY_PERIODIC_TO_DELTA
(
atomDelta
)
#endif
#endif
int
atomFlags
=
ballot
(
atomDelta
.
x
*
atomDelta
.
x
+
atomDelta
.
y
*
atomDelta
.
y
+
atomDelta
.
z
*
atomDelta
.
z
<
(
PADDED_CUTOFF
+
blockCenterY
.
w
)
*
(
PADDED_CUTOFF
+
blockCenterY
.
w
));
int
atomFlags
=
BALLOT
(
atomDelta
.
x
*
atomDelta
.
x
+
atomDelta
.
y
*
atomDelta
.
y
+
atomDelta
.
z
*
atomDelta
.
z
<
(
PADDED_CUTOFF
+
blockCenterY
.
w
)
*
(
PADDED_CUTOFF
+
blockCenterY
.
w
));
int
interacts
=
0
;
int
interacts
=
0
;
if
(
atom2
<
NUM_ATOMS
&&
atomFlags
!=
0
)
{
if
(
atom2
<
NUM_ATOMS
&&
atomFlags
!=
0
)
{
int
first
=
__ffs
(
atomFlags
)
-
1
;
int
first
=
__ffs
(
atomFlags
)
-
1
;
...
@@ -317,7 +317,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
...
@@ -317,7 +317,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
// Add any interacting atoms to the buffer.
// Add any interacting atoms to the buffer.
int
includeAtomFlags
=
__ballot
(
interacts
);
int
includeAtomFlags
=
BALLOT
(
interacts
);
if
(
interacts
)
{
if
(
interacts
)
{
int
index
=
neighborsInBuffer
+
__popc
(
includeAtomFlags
&
warpMask
);
int
index
=
neighborsInBuffer
+
__popc
(
includeAtomFlags
&
warpMask
);
buffer
[
index
]
=
atom2
;
buffer
[
index
]
=
atom2
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
f3e727df
...
@@ -15,22 +15,22 @@ typedef struct {
...
@@ -15,22 +15,22 @@ typedef struct {
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
//support for 64 bit shuffles
//support for 64 bit shuffles
static
__inline__
__device__
float
real_shfl
(
float
var
,
int
srcLane
)
{
static
__inline__
__device__
float
real_shfl
(
float
var
,
int
srcLane
)
{
return
__shfl
(
var
,
srcLane
);
return
SHFL
(
var
,
srcLane
);
}
}
static
__inline__
__device__
double
real_shfl
(
double
var
,
int
srcLane
)
{
static
__inline__
__device__
double
real_shfl
(
double
var
,
int
srcLane
)
{
int
hi
,
lo
;
int
hi
,
lo
;
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"d"
(
var
));
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"d"
(
var
));
hi
=
__shfl
(
hi
,
srcLane
);
hi
=
SHFL
(
hi
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
lo
=
SHFL
(
lo
,
srcLane
);
return
__hiloint2double
(
hi
,
lo
);
return
__hiloint2double
(
hi
,
lo
);
}
}
static
__inline__
__device__
long
long
real_shfl
(
long
long
var
,
int
srcLane
)
{
static
__inline__
__device__
long
long
real_shfl
(
long
long
var
,
int
srcLane
)
{
int
hi
,
lo
;
int
hi
,
lo
;
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"l"
(
var
));
asm
volatile
(
"mov.b64 { %0, %1 }, %2;"
:
"=r"
(
lo
),
"=r"
(
hi
)
:
"l"
(
var
));
hi
=
__shfl
(
hi
,
srcLane
);
hi
=
SHFL
(
hi
,
srcLane
);
lo
=
__shfl
(
lo
,
srcLane
);
lo
=
SHFL
(
lo
,
srcLane
);
// unforunately there isn't an __nv_hiloint2long(hi,lo) intrinsic cast
// unforunately there isn't an __nv_hiloint2long(hi,lo) intrinsic cast
int2
fuse
;
fuse
.
x
=
lo
;
fuse
.
y
=
hi
;
int2
fuse
;
fuse
.
x
=
lo
;
fuse
.
y
=
hi
;
return
*
reinterpret_cast
<
long
long
*>
(
&
fuse
);
return
*
reinterpret_cast
<
long
long
*>
(
&
fuse
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment