Commit 2f498f23 authored by Samuli Laine's avatar Samuli Laine
Browse files

Bug and perf fixes

parent 5a2d5d59
......@@ -901,12 +901,15 @@ must have shape [minibatch_size, height, width, 2]. When sampling a cube map
texture, must have shape [minibatch_size, height, width, 3].</td></tr><tr class="arg"><td class="argname">uv_da</td><td class="arg_short">(Optional) Tensor containing image-space derivatives of texture coordinates.
Must have same shape as <code>uv</code> except for the last dimension that is to be twice
as long.</td></tr><tr class="arg"><td class="argname">mip_level_bias</td><td class="arg_short">(Optional) Per-pixel bias for mip level selection. If <code>uv_da</code> is omitted,
determines mip level directly. Must have shape [minibatch_size, height, width].</td></tr><tr class="arg"><td class="argname">mip</td><td class="arg_short">(Optional) Preconstructed mipmap stack from a <code>texture_construct_mip()</code> call or a list
of tensors specifying a custom mipmap stack. Gradients of a custom mipmap stack
are not automatically propagated to base texture but the mipmap tensors will
receive gradients of their own. If a mipmap stack is not specified but the chosen
filter mode requires it, the mipmap stack is constructed internally and
discarded afterwards.</td></tr><tr class="arg"><td class="argname">filter_mode</td><td class="arg_short">Texture filtering mode to be used. Valid values are 'auto', 'nearest',
determines mip level directly. Must have shape [minibatch_size, height, width].</td></tr><tr class="arg"><td class="argname">mip</td><td class="arg_short">(Optional) Preconstructed mipmap stack from a <code>texture_construct_mip()</code> call, or a list
of tensors specifying a custom mipmap stack. When specifying a custom mipmap stack,
the tensors in the list must follow the same format as <code>tex</code> except for width and
height that must follow the usual rules for mipmap sizes. The base level texture
is still supplied in <code>tex</code> and must not be included in the list. Gradients of a
custom mipmap stack are not automatically propagated to base texture but the mipmap
tensors will receive gradients of their own. If a mipmap stack is not specified
but the chosen filter mode requires it, the mipmap stack is constructed internally
and discarded afterwards.</td></tr><tr class="arg"><td class="argname">filter_mode</td><td class="arg_short">Texture filtering mode to be used. Valid values are 'auto', 'nearest',
'linear', 'linear-mipmap-nearest', and 'linear-mipmap-linear'. Mode 'auto'
selects 'linear' if neither <code>uv_da</code> or <code>mip_level_bias</code> is specified, and
'linear-mipmap-linear' when at least one of them is specified, these being
......
......@@ -42,7 +42,7 @@ using namespace tensorflow::shape_inference;
#define NVDR_CTX_ARGS int _nvdr_ctx_dummy
#define NVDR_CTX_PARAMS 0
#define NVDR_CHECK(COND, ERR) do { TORCH_CHECK(COND, ERR) } while(0)
#define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; AT_CUDA_CHECK(cudaGetLastError()); } while(0)
#define NVDR_CHECK_CUDA_ERROR(CUDA_CALL) do { cudaError_t err = CUDA_CALL; TORCH_CHECK(err == CUDA_SUCCESS, "Cuda error: ", cudaGetLastError(), "[", #CUDA_CALL, ";]"); } while(0)
#define NVDR_CHECK_GL_ERROR(GL_CALL) do { GL_CALL; GLenum err = glGetError(); TORCH_CHECK(err == GL_NO_ERROR, "OpenGL error: ", getGLErrorString(err), "[", #GL_CALL, ";]"); } while(0)
#endif
......
......@@ -168,10 +168,9 @@ void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceId
int layer_id = v_layer[0];
int prim_id = gl_PrimitiveIDIn + v_offset[0];
// Flip z before hw depth test because depth is cleared to zero.
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, -gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_db = db0; EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, -gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_db = db1; EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, -gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_db = db2; EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_db = db0; EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_db = db1; EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_db = db2; EmitVertex();
}
)
);
......@@ -209,10 +208,9 @@ void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceId
int layer_id = v_layer[0];
int prim_id = gl_PrimitiveIDIn + v_offset[0];
// Flip z before hw depth test because depth is cleared to zero.
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, -gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, -gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, -gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[0].gl_Position.x, gl_in[0].gl_Position.y, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); var_uvzw = vec4(1.f, 0.f, gl_in[0].gl_Position.z, gl_in[0].gl_Position.w); EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[1].gl_Position.x, gl_in[1].gl_Position.y, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); var_uvzw = vec4(0.f, 1.f, gl_in[1].gl_Position.z, gl_in[1].gl_Position.w); EmitVertex();
gl_Layer = layer_id; gl_PrimitiveID = prim_id; gl_Position = vec4(gl_in[2].gl_Position.x, gl_in[2].gl_Position.y, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); var_uvzw = vec4(0.f, 0.f, gl_in[2].gl_Position.z, gl_in[2].gl_Position.w); EmitVertex();
}
)
);
......@@ -262,9 +260,10 @@ void rasterizeInitGLContext(NVDR_CTX_ARGS, RasterizeGLState& s, int cudaDeviceId
NVDR_CHECK_GL_ERROR(glBindFragDataLocation(s.glProgram, 1, "out_db"));
NVDR_CHECK_GL_ERROR(glUseProgram(s.glProgram));
// Set up rendering mode. Inverted depth so that all buffers can be cleared to zero.
// Set up depth test.
NVDR_CHECK_GL_ERROR(glEnable(GL_DEPTH_TEST));
NVDR_CHECK_GL_ERROR(glDepthFunc(GL_GEQUAL));
NVDR_CHECK_GL_ERROR(glDepthFunc(GL_LESS));
NVDR_CHECK_GL_ERROR(glClearDepth(1.0));
// Create and bind output buffers. Storage is allocated later.
NVDR_CHECK_GL_ERROR(glGenTextures(num_outputs, s.glColorBuffer));
......@@ -375,18 +374,14 @@ void rasterizeRender(NVDR_CTX_ARGS, RasterizeGLState& s, cudaStream_t stream, co
NVDR_CHECK_CUDA_ERROR(cudaGraphicsUnmapResources(1, &s.cudaPosBuffer, stream));
}
// Set viewport, clear color and depth/stencil buffers.
// Set viewport, clear color buffer(s) and depth/stencil buffer.
NVDR_CHECK_GL_ERROR(glViewport(0, 0, width, height));
NVDR_CHECK_GL_ERROR(glClearTexSubImage(s.glDepthStencilBuffer, 0, 0, 0, 0, width, height, depth, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 0));
NVDR_CHECK_GL_ERROR(glClearTexSubImage(s.glColorBuffer[0], 0, 0, 0, 0, width, height, depth, GL_RGBA, GL_FLOAT, 0));
NVDR_CHECK_GL_ERROR(glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT));
// If outputting bary differentials, clear second output buffer and set resolution uniform
// If outputting bary differentials, set resolution uniform
if (s.enableDB)
{
NVDR_CHECK_GL_ERROR(glClearTexSubImage(s.glColorBuffer[1], 0, 0, 0, 0, width, height, depth, GL_RGBA, GL_FLOAT, 0));
NVDR_CHECK_GL_ERROR(glUniform2f(0, 2.f / (float)width, 2.f / (float)height));
}
// Render the meshes.
if (depth == 1 && !rangesPtr)
{
......
......@@ -56,6 +56,9 @@ verbose = True # Print status messages to stdout.
# Internal helper funcs.
def _find_compiler_bindir():
hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Enterprise/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
if hostx64_paths != []:
return hostx64_paths[0]
hostx64_paths = sorted(glob.glob('C:/Program Files (x86)/Microsoft Visual Studio/*/Professional/VC/Tools/MSVC/*/bin/Hostx64/x64'), reverse=True)
if hostx64_paths != []:
return hostx64_paths[0]
......
......@@ -28,7 +28,7 @@ def _get_plugin():
lib_dir = os.path.dirname(__file__) + r"\..\lib"
def find_cl_path():
import glob
for edition in ['Professional', 'BuildTools', 'Community']:
for edition in ['Enterprise', 'Professional', 'BuildTools', 'Community']:
paths = sorted(glob.glob(r"C:\Program Files (x86)\Microsoft Visual Studio\*\%s\VC\Tools\MSVC\*\bin\Hostx64\x64" % edition), reverse=True)
if paths:
return paths[0]
......@@ -389,12 +389,15 @@ def texture(tex, uv, uv_da=None, mip_level_bias=None, mip=None, filter_mode='aut
as long.
mip_level_bias: (Optional) Per-pixel bias for mip level selection. If `uv_da` is omitted,
determines mip level directly. Must have shape [minibatch_size, height, width].
mip: (Optional) Preconstructed mipmap stack from a `texture_construct_mip()` call or a list
of tensors specifying a custom mipmap stack. Gradients of a custom mipmap stack
are not automatically propagated to base texture but the mipmap tensors will
receive gradients of their own. If a mipmap stack is not specified but the chosen
filter mode requires it, the mipmap stack is constructed internally and
discarded afterwards.
mip: (Optional) Preconstructed mipmap stack from a `texture_construct_mip()` call, or a list
of tensors specifying a custom mipmap stack. When specifying a custom mipmap stack,
the tensors in the list must follow the same format as `tex` except for width and
height that must follow the usual rules for mipmap sizes. The base level texture
is still supplied in `tex` and must not be included in the list. Gradients of a
custom mipmap stack are not automatically propagated to base texture but the mipmap
tensors will receive gradients of their own. If a mipmap stack is not specified
but the chosen filter mode requires it, the mipmap stack is constructed internally
and discarded afterwards.
filter_mode: Texture filtering mode to be used. Valid values are 'auto', 'nearest',
'linear', 'linear-mipmap-nearest', and 'linear-mipmap-linear'. Mode 'auto'
selects 'linear' if neither `uv_da` or `mip_level_bias` is specified, and
......
......@@ -319,14 +319,14 @@ torch::Tensor texture_fwd_mip(torch::Tensor tex, torch::Tensor uv, torch::Tensor
NVDR_CHECK(!((uintptr_t)p.uv & 7), "uv input tensor not aligned to float2");
if ((p.channels & 3) == 0)
{
for (int i=1; 0 <= p.mipLevelMax; i++)
for (int i=0; i <= p.mipLevelMax; i++)
NVDR_CHECK(!((uintptr_t)p.tex[i] & 15), "tex or mip input tensor not aligned to float4");
NVDR_CHECK(!((uintptr_t)p.out & 15), "out output tensor not aligned to float4");
NVDR_CHECK(!((uintptr_t)pmip & 15), "mip input tensor not aligned to float4");
}
if ((p.channels & 1) == 0)
{
for (int i=1; 0 <= p.mipLevelMax; i++)
for (int i=0; i <= p.mipLevelMax; i++)
NVDR_CHECK(!((uintptr_t)p.tex[i] & 7), "tex or mip input tensor not aligned to float2");
NVDR_CHECK(!((uintptr_t)p.out & 7), "out output tensor not aligned to float2");
NVDR_CHECK(!((uintptr_t)pmip & 7), "mip input tensor not aligned to float2");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment