Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
978e3a08
Commit
978e3a08
authored
Aug 05, 2016
by
Peter Eastman
Browse files
Prefer nvcc over runtime compiler
parent
1f7866ad
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
35 deletions
+64
-35
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+1
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+63
-34
No files found.
platforms/cuda/include/CudaContext.h
View file @
978e3a08
...
@@ -611,7 +611,7 @@ private:
...
@@ -611,7 +611,7 @@ private:
int
paddedNumAtoms
;
int
paddedNumAtoms
;
int
numAtomBlocks
;
int
numAtomBlocks
;
int
numThreadBlocks
;
int
numThreadBlocks
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
,
boxIsTriclinic
,
hasCompilerKernel
,
forcesValid
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
,
boxIsTriclinic
,
hasCompilerKernel
,
isNvccAvailable
,
forcesValid
;
std
::
string
compiler
,
tempDir
,
cacheDir
,
gpuArchitecture
;
std
::
string
compiler
,
tempDir
,
cacheDir
,
gpuArchitecture
;
float4
periodicBoxVecXFloat
,
periodicBoxVecYFloat
,
periodicBoxVecZFloat
,
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
float4
periodicBoxVecXFloat
,
periodicBoxVecYFloat
,
periodicBoxVecZFloat
,
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
double4
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
,
periodicBoxSize
,
invPeriodicBoxSize
;
double4
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
,
periodicBoxSize
,
invPeriodicBoxSize
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
978e3a08
...
@@ -73,10 +73,43 @@ const int CudaContext::ThreadBlockSize = 64;
...
@@ -73,10 +73,43 @@ const int CudaContext::ThreadBlockSize = 64;
const
int
CudaContext
::
TileSize
=
sizeof
(
tileflags
)
*
8
;
const
int
CudaContext
::
TileSize
=
sizeof
(
tileflags
)
*
8
;
bool
CudaContext
::
hasInitializedCuda
=
false
;
bool
CudaContext
::
hasInitializedCuda
=
false
;
#ifdef WIN32
#include <Windows.h>
static
int
executeInWindows
(
const
string
&
command
)
{
// COMSPEC is an env variable pointing to full dir of cmd.exe
// it always defined on pretty much all Windows OSes
string
fullcommand
=
getenv
(
"COMSPEC"
)
+
string
(
" /C "
)
+
command
;
STARTUPINFO
si
;
PROCESS_INFORMATION
pi
;
ZeroMemory
(
&
si
,
sizeof
(
si
)
);
si
.
cb
=
sizeof
(
si
);
ZeroMemory
(
&
pi
,
sizeof
(
pi
)
);
vector
<
char
>
args
(
std
::
max
(
1000
,
(
int
)
fullcommand
.
size
()
+
1
));
strcpy
(
&
args
[
0
],
fullcommand
.
c_str
());
si
.
dwFlags
=
STARTF_USESHOWWINDOW
;
si
.
wShowWindow
=
SW_HIDE
;
if
(
!
CreateProcess
(
NULL
,
&
args
[
0
],
NULL
,
NULL
,
FALSE
,
0
,
NULL
,
NULL
,
&
si
,
&
pi
))
{
return
-
1
;
}
WaitForSingleObject
(
pi
.
hProcess
,
INFINITE
);
DWORD
exitCode
=
-
1
;
if
(
!
GetExitCodeProcess
(
pi
.
hProcess
,
&
exitCode
))
{
throw
(
OpenMMException
(
"Could not get nvcc.exe's exit code
\n
"
));
}
else
{
if
(
exitCode
==
0
)
return
0
;
else
return
-
1
;
}
}
#endif
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
if
(
platformData
.
context
!=
NULL
)
{
if
(
platformData
.
context
!=
NULL
)
{
try
{
try
{
...
@@ -87,6 +120,24 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -87,6 +120,24 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// The runtime compiler plugin isn't available.
// The runtime compiler plugin isn't available.
}
}
}
}
#ifdef WIN32
string
testCompilerCommand
=
this
->
compiler
+
" --version > nul 2> nul"
;
int
res
=
executeInWindows
(
testCompilerCommand
.
c_str
());
#else
string
testCompilerCommand
=
this
->
compiler
+
" --version > /dev/null 2> /dev/null"
;
int
res
=
std
::
system
(
testCompilerCommand
.
c_str
());
#endif
isNvccAvailable
=
(
res
==
0
);
static
bool
hasShownNvccWarning
=
false
;
if
(
hasCompilerKernel
&&
!
isNvccAvailable
&&
!
hasShownNvccWarning
)
{
hasShownNvccWarning
=
true
;
printf
(
"Could not find nvcc. Using runtime compiler, which may produce slower performance. "
);
#ifdef WIN32
printf
(
"Set CUDA_BIN_PATH to specify where nvcc is located.
\n
"
);
#else
printf
(
"Set OPENMM_CUDA_COMPILER to specify where nvcc is located.
\n
"
);
#endif
}
if
(
hostCompiler
.
size
()
>
0
)
if
(
hostCompiler
.
size
()
>
0
)
this
->
compiler
=
compiler
+
" --compiler-bindir "
+
hostCompiler
;
this
->
compiler
=
compiler
+
" --compiler-bindir "
+
hostCompiler
;
if
(
!
hasInitializedCuda
)
{
if
(
!
hasInitializedCuda
)
{
...
@@ -160,6 +211,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -160,6 +211,15 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
// anything beyond 5.0.
// anything beyond 5.0.
if
(
major
==
5
)
if
(
major
==
5
)
minor
=
0
;
minor
=
0
;
#endif
#if __CUDA_API_VERSION < 8000
// This is a workaround to support Pascal with CUDA 7.5. It reports
// its compute capability as 6.x, but the compiler doesn't support
// anything beyond 5.3.
if
(
major
==
6
)
{
major
=
5
;
minor
=
3
;
}
#endif
#endif
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
computeCapability
=
major
+
0.1
*
minor
;
computeCapability
=
major
+
0.1
*
minor
;
...
@@ -454,37 +514,6 @@ CUmodule CudaContext::createModule(const string source, const char* optimization
...
@@ -454,37 +514,6 @@ CUmodule CudaContext::createModule(const string source, const char* optimization
return
createModule
(
source
,
map
<
string
,
string
>
(),
optimizationFlags
);
return
createModule
(
source
,
map
<
string
,
string
>
(),
optimizationFlags
);
}
}
#ifdef WIN32
#include <Windows.h>
static
bool
compileInWindows
(
const
string
&
command
)
{
// COMSPEC is an env variable pointing to full dir of cmd.exe
// it always defined on pretty much all Windows OSes
string
fullcommand
=
getenv
(
"COMSPEC"
)
+
string
(
" /C "
)
+
command
;
STARTUPINFO
si
;
PROCESS_INFORMATION
pi
;
ZeroMemory
(
&
si
,
sizeof
(
si
)
);
si
.
cb
=
sizeof
(
si
);
ZeroMemory
(
&
pi
,
sizeof
(
pi
)
);
vector
<
char
>
args
(
std
::
max
(
1000
,
(
int
)
fullcommand
.
size
()
+
1
));
strcpy
(
&
args
[
0
],
fullcommand
.
c_str
());
si
.
dwFlags
=
STARTF_USESHOWWINDOW
;
si
.
wShowWindow
=
SW_HIDE
;
if
(
!
CreateProcess
(
NULL
,
&
args
[
0
],
NULL
,
NULL
,
FALSE
,
0
,
NULL
,
NULL
,
&
si
,
&
pi
))
{
return
-
1
;
}
WaitForSingleObject
(
pi
.
hProcess
,
INFINITE
);
DWORD
exitCode
=
-
1
;
if
(
!
GetExitCodeProcess
(
pi
.
hProcess
,
&
exitCode
))
{
throw
(
OpenMMException
(
"Could not get nvcc.exe's exit code
\n
"
));
}
else
{
if
(
exitCode
==
0
)
return
0
;
else
return
-
1
;
}
}
#endif
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
map
<
string
,
string
>&
defines
,
const
char
*
optimizationFlags
)
{
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
map
<
string
,
string
>&
defines
,
const
char
*
optimizationFlags
)
{
string
bits
=
intToString
(
8
*
sizeof
(
void
*
));
string
bits
=
intToString
(
8
*
sizeof
(
void
*
));
string
options
=
(
optimizationFlags
==
NULL
?
defaultOptimizationOptions
:
string
(
optimizationFlags
));
string
options
=
(
optimizationFlags
==
NULL
?
defaultOptimizationOptions
:
string
(
optimizationFlags
));
...
@@ -567,7 +596,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -567,7 +596,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
// If the runtime compiler plugin is available, use it.
// If the runtime compiler plugin is available, use it.
if
(
hasCompilerKernel
)
{
if
(
hasCompilerKernel
&&
!
isNvccAvailable
)
{
string
ptx
=
compilerKernel
.
getAs
<
CudaCompilerKernel
>
().
createModule
(
src
.
str
(),
"-arch=compute_"
+
gpuArchitecture
+
" "
+
options
,
*
this
);
string
ptx
=
compilerKernel
.
getAs
<
CudaCompilerKernel
>
().
createModule
(
src
.
str
(),
"-arch=compute_"
+
gpuArchitecture
+
" "
+
options
,
*
this
);
// If possible, write the PTX out to a temporary file so we can cache it for later use.
// If possible, write the PTX out to a temporary file so we can cache it for later use.
...
@@ -602,7 +631,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -602,7 +631,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
#else
#else
string
command
=
compiler
+
" --ptx -lineinfo --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
string
command
=
compiler
+
" --ptx -lineinfo --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
#endif
#endif
int
res
=
compil
eInWindows
(
command
);
int
res
=
execut
eInWindows
(
command
);
#else
#else
string
command
=
compiler
+
" --ptx --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o
\"
"
+
outputFile
+
"
\"
"
+
options
+
"
\"
"
+
inputFile
+
"
\"
2>
\"
"
+
logFile
+
"
\"
"
;
string
command
=
compiler
+
" --ptx --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o
\"
"
+
outputFile
+
"
\"
"
+
options
+
"
\"
"
+
inputFile
+
"
\"
2>
\"
"
+
logFile
+
"
\"
"
;
res
=
std
::
system
(
command
.
c_str
());
res
=
std
::
system
(
command
.
c_str
());
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment