Intelligent Sigend Distance Fields
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

578 lines
23 KiB

/*
* Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
*
* Please refer to the NVIDIA end user license agreement (EULA) associated
* with this source code for terms and conditions that govern your use of
* this software. Any use, reproduction, disclosure, or distribution of
* this software and related documentation outside the terms of the EULA
* is strictly prohibited.
*
*/
#ifndef _MARCHING_CUBES_KERNEL_CU_
#define _MARCHING_CUBES_KERNEL_CU_
#include <stdio.h>
#include <string.h>
#include <helper_cuda.h> // includes for helper CUDA functions
#include <helper_math.h>
#include <cuda_runtime_api.h>
#include <thrust/device_vector.h>
#include <thrust/scan.h>
#include "defines.h"
#include "tables.h"
#include "functions.cuh"
// textures containing look-up tables
texture<uint, 1, cudaReadModeElementType> edgeTex;
texture<uint, 1, cudaReadModeElementType> triTex;
texture<uint, 1, cudaReadModeElementType> numVertsTex;
// volume data
//texture<uchar, 1, cudaReadModeNormalizedFloat> volumeTex;
texture<float, 1, cudaReadModeElementType> volumeTex;
extern "C"
void allocateTextures(uint **d_edgeTable, uint **d_triTable, uint **d_numVertsTable)
{
checkCudaErrors(cudaMalloc((void **) d_edgeTable, 256*sizeof(uint)));
checkCudaErrors(cudaMemcpy((void *)*d_edgeTable, (void *)edgeTable, 256*sizeof(uint), cudaMemcpyHostToDevice));
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindUnsigned);
checkCudaErrors(cudaBindTexture(0, edgeTex, *d_edgeTable, channelDesc));
checkCudaErrors(cudaMalloc((void **) d_triTable, 256*16*sizeof(uint)));
checkCudaErrors(cudaMemcpy((void *)*d_triTable, (void *)triTable, 256*16*sizeof(uint), cudaMemcpyHostToDevice));
checkCudaErrors(cudaBindTexture(0, triTex, *d_triTable, channelDesc));
checkCudaErrors(cudaMalloc((void **) d_numVertsTable, 256*sizeof(uint)));
checkCudaErrors(cudaMemcpy((void *)*d_numVertsTable, (void *)numVertsTable, 256*sizeof(uint), cudaMemcpyHostToDevice));
checkCudaErrors(cudaBindTexture(0, numVertsTex, *d_numVertsTable, channelDesc));
}
extern "C"
void bindVolumeTexture(float *d_volume)
{
// bind to linear texture
checkCudaErrors(cudaBindTexture(0, volumeTex, d_volume, cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKindFloat)));
}
// an interesting field function
__device__
float eval(float x, float y, float z, int renderType)
{
switch (renderType) {
case 1: // a simple TPMS model
return cos(2*M_PI*x+(2*M_PI)) + cos(2*M_PI*y+(2*M_PI)) + cos(2*M_PI*z+(2*M_PI));
case 2: // a circle
return x * x + y * y + z * z - 0.5;
case 3:
return Max_function(x, y, z);
case 4:
return Min_function(x, y, z);
case 5:
return R_a_function(x, y, z);
case 6:
return R_p_function(x, y, z);
case 7:
return K_s_function(x, y, z);
case 8:
return P_norm(x, y, z);
default:
break;
}
return 0;
}
// evaluate field function at point
__device__
float fieldFunc(float3 p, int renderType)
{
return eval(p.x, p.y, p.z, renderType);
}
// evaluate field function at a point
// returns value and gradient in float4
__device__
float4 fieldFunc4(float3 p, int renderType)
{
float v = eval(p.x, p.y, p.z, renderType);
const float d = 0.001f;
float dx = eval(p.x + d, p.y, p.z, renderType) - v;
float dy = eval(p.x, p.y + d, p.z, renderType) - v;
float dz = eval(p.x, p.y, p.z + d, renderType) - v;
return make_float4(dx, dy, dz, v);
}
// sample volume data set at a point
__device__
float sampleVolume(float *data, uint3 p, uint3 gridSize)
{
p.x = min(p.x, gridSize.x - 1);
p.y = min(p.y, gridSize.y - 1);
p.z = min(p.z, gridSize.z - 1);
uint i = (p.z*gridSize.x*gridSize.y) + (p.y*gridSize.x) + p.x;
// return (float) data[i] / 255.0f;
return tex1Dfetch(volumeTex, i);
}
// compute position in 3d grid from 1d index
// only works for power of 2 sizes
__device__
uint3 calcGridPos(uint i, uint3 gridSizeShift, uint3 gridSizeMask)
{
uint3 gridPos;
gridPos.x = i & gridSizeMask.x;
gridPos.y = (i >> gridSizeShift.y) & gridSizeMask.y;
gridPos.z = (i >> gridSizeShift.z) & gridSizeMask.z;
return gridPos;
}
// classify voxel based on number of vertices it will generate
// one thread per voxel
__global__ void
classifyVoxel(uint *voxelVerts, uint *voxelOccupied, float *volume,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask, uint numVoxels,
float3 voxelSize, float isoValue, int renderType)
{
uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
uint3 gridPos = calcGridPos(i, gridSizeShift, gridSizeMask);
// read field values at neighbouring grid vertices
float field[8];
if(!renderType) {
field[0] = sampleVolume(volume, gridPos, gridSize);
field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
} else {
float3 p;
p.x = -1.0f + (gridPos.x * voxelSize.x);
p.y = -1.0f + (gridPos.y * voxelSize.y);
p.z = -1.0f + (gridPos.z * voxelSize.z);
field[0] = fieldFunc(p, renderType);
field[1] = fieldFunc(p + make_float3(voxelSize.x, 0, 0), renderType);
field[2] = fieldFunc(p + make_float3(voxelSize.x, voxelSize.y, 0), renderType);
field[3] = fieldFunc(p + make_float3(0, voxelSize.y, 0), renderType);
field[4] = fieldFunc(p + make_float3(0, 0, voxelSize.z), renderType);
field[5] = fieldFunc(p + make_float3(voxelSize.x, 0, voxelSize.z), renderType);
field[6] = fieldFunc(p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z), renderType);
field[7] = fieldFunc(p + make_float3(0, voxelSize.y, voxelSize.z), renderType);
}
// calculate flag indicating if each vertex is inside or outside isosurface
uint cubeindex;
cubeindex = uint(field[0] < isoValue);
cubeindex += uint(field[1] < isoValue)*2;
cubeindex += uint(field[2] < isoValue)*4;
cubeindex += uint(field[3] < isoValue)*8;
cubeindex += uint(field[4] < isoValue)*16;
cubeindex += uint(field[5] < isoValue)*32;
cubeindex += uint(field[6] < isoValue)*64;
cubeindex += uint(field[7] < isoValue)*128;
// read number of vertices from texture
uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
if (i < numVoxels)
{
voxelVerts[i] = numVerts;
voxelOccupied[i] = (numVerts > 0);
}
}
extern "C" void
launch_classifyVoxel(dim3 grid, dim3 threads, uint *voxelVerts, uint *voxelOccupied, float *volume,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask, uint numVoxels,
float3 voxelSize, float isoValue, int renderType)
{
// calculate number of vertices need per voxel
classifyVoxel<<<grid, threads>>>(voxelVerts, voxelOccupied, volume,
gridSize, gridSizeShift, gridSizeMask,
numVoxels, voxelSize, isoValue, renderType);
getLastCudaError("classifyVoxel failed");
}
// compact voxel array
__global__ void
compactVoxels(uint *compactedVoxelArray, uint *voxelOccupied, uint *voxelOccupiedScan, uint numVoxels)
{
uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
if (voxelOccupied[i] && (i < numVoxels))
{
compactedVoxelArray[ voxelOccupiedScan[i] ] = i;
}
}
extern "C" void
launch_compactVoxels(dim3 grid, dim3 threads, uint *compactedVoxelArray, uint *voxelOccupied, uint *voxelOccupiedScan, uint numVoxels)
{
compactVoxels<<<grid, threads>>>(compactedVoxelArray, voxelOccupied,
voxelOccupiedScan, numVoxels);
getLastCudaError("compactVoxels failed");
}
// compute interpolated vertex along an edge
__device__
float3 vertexInterp(float isolevel, float3 p0, float3 p1, float f0, float f1)
{
float t = (isolevel - f0) / (f1 - f0);
return lerp(p0, p1, t);
}
// compute interpolated vertex position and normal along an edge
__device__
void vertexInterp2(float isolevel, float3 p0, float3 p1, float4 f0, float4 f1, float3 &p, float3 &n)
{
float t = (isolevel - f0.w) / (f1.w - f0.w);
p = lerp(p0, p1, t);
n.x = lerp(f0.x, f1.x, t);
n.y = lerp(f0.y, f1.y, t);
n.z = lerp(f0.z, f1.z, t);
// n = normalize(n);
}
// generate triangles for each voxel using marching cubes
// interpolates normals from field function
__global__ void
generateTriangles(float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts, int renderType)
{
uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
if (i > activeVoxels - 1)
{
// can't return here because of syncthreads()
i = activeVoxels - 1;
}
#if SKIP_EMPTY_VOXELS
uint voxel = compactedVoxelArray[i];
#else
uint voxel = i;
#endif
// compute position in 3d grid
uint3 gridPos = calcGridPos(voxel, gridSizeShift, gridSizeMask);
float3 p;
p.x = -1.0f + (gridPos.x * voxelSize.x);
p.y = -1.0f + (gridPos.y * voxelSize.y);
p.z = -1.0f + (gridPos.z * voxelSize.z);
// calculate cell vertex positions
float3 v[8];
v[0] = p;
v[1] = p + make_float3(voxelSize.x, 0, 0);
v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
v[3] = p + make_float3(0, voxelSize.y, 0);
v[4] = p + make_float3(0, 0, voxelSize.z);
v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
// evaluate field values
float4 field[8];
field[0] = fieldFunc4(v[0], renderType);
field[1] = fieldFunc4(v[1], renderType);
field[2] = fieldFunc4(v[2], renderType);
field[3] = fieldFunc4(v[3], renderType);
field[4] = fieldFunc4(v[4], renderType);
field[5] = fieldFunc4(v[5], renderType);
field[6] = fieldFunc4(v[6], renderType);
field[7] = fieldFunc4(v[7], renderType);
// recalculate flag
// (this is faster than storing it in global memory)
uint cubeindex;
cubeindex = uint(field[0].w < isoValue);
cubeindex += uint(field[1].w < isoValue)*2;
cubeindex += uint(field[2].w < isoValue)*4;
cubeindex += uint(field[3].w < isoValue)*8;
cubeindex += uint(field[4].w < isoValue)*16;
cubeindex += uint(field[5].w < isoValue)*32;
cubeindex += uint(field[6].w < isoValue)*64;
cubeindex += uint(field[7].w < isoValue)*128;
// find the vertices where the surface intersects the cube
#if USE_SHARED
// use partioned shared memory to avoid using local memory
__shared__ float3 vertlist[12*NTHREADS];
__shared__ float3 normlist[12*NTHREADS];
vertexInterp2(isoValue, v[0], v[1], field[0], field[1], vertlist[threadIdx.x], normlist[threadIdx.x]);
vertexInterp2(isoValue, v[1], v[2], field[1], field[2], vertlist[threadIdx.x+NTHREADS], normlist[threadIdx.x+NTHREADS]);
vertexInterp2(isoValue, v[2], v[3], field[2], field[3], vertlist[threadIdx.x+(NTHREADS*2)], normlist[threadIdx.x+(NTHREADS*2)]);
vertexInterp2(isoValue, v[3], v[0], field[3], field[0], vertlist[threadIdx.x+(NTHREADS*3)], normlist[threadIdx.x+(NTHREADS*3)]);
vertexInterp2(isoValue, v[4], v[5], field[4], field[5], vertlist[threadIdx.x+(NTHREADS*4)], normlist[threadIdx.x+(NTHREADS*4)]);
vertexInterp2(isoValue, v[5], v[6], field[5], field[6], vertlist[threadIdx.x+(NTHREADS*5)], normlist[threadIdx.x+(NTHREADS*5)]);
vertexInterp2(isoValue, v[6], v[7], field[6], field[7], vertlist[threadIdx.x+(NTHREADS*6)], normlist[threadIdx.x+(NTHREADS*6)]);
vertexInterp2(isoValue, v[7], v[4], field[7], field[4], vertlist[threadIdx.x+(NTHREADS*7)], normlist[threadIdx.x+(NTHREADS*7)]);
vertexInterp2(isoValue, v[0], v[4], field[0], field[4], vertlist[threadIdx.x+(NTHREADS*8)], normlist[threadIdx.x+(NTHREADS*8)]);
vertexInterp2(isoValue, v[1], v[5], field[1], field[5], vertlist[threadIdx.x+(NTHREADS*9)], normlist[threadIdx.x+(NTHREADS*9)]);
vertexInterp2(isoValue, v[2], v[6], field[2], field[6], vertlist[threadIdx.x+(NTHREADS*10)], normlist[threadIdx.x+(NTHREADS*10)]);
vertexInterp2(isoValue, v[3], v[7], field[3], field[7], vertlist[threadIdx.x+(NTHREADS*11)], normlist[threadIdx.x+(NTHREADS*11)]);
__syncthreads();
#else
float3 vertlist[12];
float3 normlist[12];
vertexInterp2(isoValue, v[0], v[1], field[0], field[1], vertlist[0], normlist[0]);
vertexInterp2(isoValue, v[1], v[2], field[1], field[2], vertlist[1], normlist[1]);
vertexInterp2(isoValue, v[2], v[3], field[2], field[3], vertlist[2], normlist[2]);
vertexInterp2(isoValue, v[3], v[0], field[3], field[0], vertlist[3], normlist[3]);
vertexInterp2(isoValue, v[4], v[5], field[4], field[5], vertlist[4], normlist[4]);
vertexInterp2(isoValue, v[5], v[6], field[5], field[6], vertlist[5], normlist[5]);
vertexInterp2(isoValue, v[6], v[7], field[6], field[7], vertlist[6], normlist[6]);
vertexInterp2(isoValue, v[7], v[4], field[7], field[4], vertlist[7], normlist[7]);
vertexInterp2(isoValue, v[0], v[4], field[0], field[4], vertlist[8], normlist[8]);
vertexInterp2(isoValue, v[1], v[5], field[1], field[5], vertlist[9], normlist[9]);
vertexInterp2(isoValue, v[2], v[6], field[2], field[6], vertlist[10], normlist[10]);
vertexInterp2(isoValue, v[3], v[7], field[3], field[7], vertlist[11], normlist[11]);
#endif
// output triangle vertices
uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
for (int i=0; i<numVerts; i++)
{
uint edge = tex1Dfetch(triTex, cubeindex*16 + i);
uint index = numVertsScanned[voxel] + i;
if (index < maxVerts)
{
#if USE_SHARED
pos[index] = make_float4(vertlist[(edge*NTHREADS)+threadIdx.x], 1.0f);
norm[index] = make_float4(normlist[(edge*NTHREADS)+threadIdx.x], 0.0f);
#else
pos[index] = make_float4(vertlist[edge], 1.0f);
norm[index] = make_float4(normlist[edge], 0.0f);
#endif
}
}
}
extern "C" void
launch_generateTriangles(dim3 grid, dim3 threads,
float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts, int renderType)
{
generateTriangles<<<grid, NTHREADS>>>(pos, norm,
compactedVoxelArray,
numVertsScanned,
gridSize, gridSizeShift, gridSizeMask,
voxelSize, isoValue, activeVoxels,
maxVerts, renderType);
getLastCudaError("generateTriangles failed");
}
// calculate triangle normal
__device__
float3 calcNormal(float3 *v0, float3 *v1, float3 *v2)
{
float3 edge0 = *v1 - *v0;
float3 edge1 = *v2 - *v0;
// note - it's faster to perform normalization in vertex shader rather than here
return cross(edge0, edge1);
}
// version that calculates flat surface normal for each triangle
__global__ void
generateTriangles2(float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned, float *volume,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts, int renderType)
{
uint blockId = __mul24(blockIdx.y, gridDim.x) + blockIdx.x;
uint i = __mul24(blockId, blockDim.x) + threadIdx.x;
if (i > activeVoxels - 1)
{
i = activeVoxels - 1;
}
#if SKIP_EMPTY_VOXELS
uint voxel = compactedVoxelArray[i];
#else
uint voxel = i;
#endif
// compute position in 3d grid
uint3 gridPos = calcGridPos(voxel, gridSizeShift, gridSizeMask);
float3 p;
p.x = -1.0f + (gridPos.x * voxelSize.x);
p.y = -1.0f + (gridPos.y * voxelSize.y);
p.z = -1.0f + (gridPos.z * voxelSize.z);
// calculate cell vertex positions
float3 v[8];
v[0] = p;
v[1] = p + make_float3(voxelSize.x, 0, 0);
v[2] = p + make_float3(voxelSize.x, voxelSize.y, 0);
v[3] = p + make_float3(0, voxelSize.y, 0);
v[4] = p + make_float3(0, 0, voxelSize.z);
v[5] = p + make_float3(voxelSize.x, 0, voxelSize.z);
v[6] = p + make_float3(voxelSize.x, voxelSize.y, voxelSize.z);
v[7] = p + make_float3(0, voxelSize.y, voxelSize.z);
float field[8];
if(!renderType) {
field[0] = sampleVolume(volume, gridPos, gridSize);
field[1] = sampleVolume(volume, gridPos + make_uint3(1, 0, 0), gridSize);
field[2] = sampleVolume(volume, gridPos + make_uint3(1, 1, 0), gridSize);
field[3] = sampleVolume(volume, gridPos + make_uint3(0, 1, 0), gridSize);
field[4] = sampleVolume(volume, gridPos + make_uint3(0, 0, 1), gridSize);
field[5] = sampleVolume(volume, gridPos + make_uint3(1, 0, 1), gridSize);
field[6] = sampleVolume(volume, gridPos + make_uint3(1, 1, 1), gridSize);
field[7] = sampleVolume(volume, gridPos + make_uint3(0, 1, 1), gridSize);
} else {
// evaluate field values
field[0] = fieldFunc(v[0], renderType);
field[1] = fieldFunc(v[1], renderType);
field[2] = fieldFunc(v[2], renderType);
field[3] = fieldFunc(v[3], renderType);
field[4] = fieldFunc(v[4], renderType);
field[5] = fieldFunc(v[5], renderType);
field[6] = fieldFunc(v[6], renderType);
field[7] = fieldFunc(v[7], renderType);
}
// recalculate flag
uint cubeindex;
cubeindex = uint(field[0] < isoValue);
cubeindex += uint(field[1] < isoValue)*2;
cubeindex += uint(field[2] < isoValue)*4;
cubeindex += uint(field[3] < isoValue)*8;
cubeindex += uint(field[4] < isoValue)*16;
cubeindex += uint(field[5] < isoValue)*32;
cubeindex += uint(field[6] < isoValue)*64;
cubeindex += uint(field[7] < isoValue)*128;
// find the vertices where the surface intersects the cube
#if USE_SHARED
// use shared memory to avoid using local
__shared__ float3 vertlist[12*NTHREADS];
vertlist[threadIdx.x] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
vertlist[NTHREADS+threadIdx.x] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
vertlist[(NTHREADS*2)+threadIdx.x] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
vertlist[(NTHREADS*3)+threadIdx.x] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
vertlist[(NTHREADS*4)+threadIdx.x] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
vertlist[(NTHREADS*5)+threadIdx.x] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
vertlist[(NTHREADS*6)+threadIdx.x] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
vertlist[(NTHREADS*7)+threadIdx.x] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
vertlist[(NTHREADS*8)+threadIdx.x] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
vertlist[(NTHREADS*9)+threadIdx.x] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
vertlist[(NTHREADS*10)+threadIdx.x] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
vertlist[(NTHREADS*11)+threadIdx.x] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
__syncthreads();
#else
float3 vertlist[12];
vertlist[0] = vertexInterp(isoValue, v[0], v[1], field[0], field[1]);
vertlist[1] = vertexInterp(isoValue, v[1], v[2], field[1], field[2]);
vertlist[2] = vertexInterp(isoValue, v[2], v[3], field[2], field[3]);
vertlist[3] = vertexInterp(isoValue, v[3], v[0], field[3], field[0]);
vertlist[4] = vertexInterp(isoValue, v[4], v[5], field[4], field[5]);
vertlist[5] = vertexInterp(isoValue, v[5], v[6], field[5], field[6]);
vertlist[6] = vertexInterp(isoValue, v[6], v[7], field[6], field[7]);
vertlist[7] = vertexInterp(isoValue, v[7], v[4], field[7], field[4]);
vertlist[8] = vertexInterp(isoValue, v[0], v[4], field[0], field[4]);
vertlist[9] = vertexInterp(isoValue, v[1], v[5], field[1], field[5]);
vertlist[10] = vertexInterp(isoValue, v[2], v[6], field[2], field[6]);
vertlist[11] = vertexInterp(isoValue, v[3], v[7], field[3], field[7]);
#endif
// output triangle vertices
uint numVerts = tex1Dfetch(numVertsTex, cubeindex);
for (int i=0; i<numVerts; i+=3)
{
uint index = numVertsScanned[voxel] + i;
float3 *v[3];
uint edge;
edge = tex1Dfetch(triTex, (cubeindex*16) + i);
#if USE_SHARED
v[0] = &vertlist[(edge*NTHREADS)+threadIdx.x];
#else
v[0] = &vertlist[edge];
#endif
edge = tex1Dfetch(triTex, (cubeindex*16) + i + 1);
#if USE_SHARED
v[1] = &vertlist[(edge*NTHREADS)+threadIdx.x];
#else
v[1] = &vertlist[edge];
#endif
edge = tex1Dfetch(triTex, (cubeindex*16) + i + 2);
#if USE_SHARED
v[2] = &vertlist[(edge*NTHREADS)+threadIdx.x];
#else
v[2] = &vertlist[edge];
#endif
// calculate triangle surface normal
float3 n = calcNormal(v[0], v[1], v[2]);
if (index < (maxVerts - 3))
{
pos[index] = make_float4(*v[0], 1.0f);
norm[index] = make_float4(n, 0.0f);
pos[index+1] = make_float4(*v[1], 1.0f);
norm[index+1] = make_float4(n, 0.0f);
pos[index+2] = make_float4(*v[2], 1.0f);
norm[index+2] = make_float4(n, 0.0f);
}
}
}
extern "C" void
launch_generateTriangles2(dim3 grid, dim3 threads,
float4 *pos, float4 *norm, uint *compactedVoxelArray, uint *numVertsScanned, float *volume,
uint3 gridSize, uint3 gridSizeShift, uint3 gridSizeMask,
float3 voxelSize, float isoValue, uint activeVoxels, uint maxVerts, int renderType)
{
generateTriangles2<<<grid, NTHREADS>>>(pos, norm,
compactedVoxelArray,
numVertsScanned, volume,
gridSize, gridSizeShift, gridSizeMask,
voxelSize, isoValue, activeVoxels,
maxVerts, renderType);
getLastCudaError("generateTriangles2 failed");
}
extern "C" void ThrustScanWrapper(unsigned int *output, unsigned int *input, unsigned int numElements)
{
thrust::exclusive_scan(thrust::device_ptr<unsigned int>(input),
thrust::device_ptr<unsigned int>(input + numElements),
thrust::device_ptr<unsigned int>(output));
}
#endif