You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
784 lines
35 KiB
784 lines
35 KiB
//
|
|
// Created by 14727 on 2022/12/9.
|
|
//
|
|
|
|
#include "device/Nurbs/nurbs_surface.cuh"
|
|
#include "device/Nurbs/nurbs_common.cuh"
|
|
#include "utils.h"
|
|
#include "device/Nurbs/bvh.cuh"
|
|
#include "device/device_utils.cuh"
|
|
#include "tinynurbs/tinynurbs.h"
|
|
|
|
__global__ void
|
|
NurbsSurface::g_evaluate(float *res, const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points,
|
|
int d_pointsCnt_u,
|
|
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u,
|
|
int d_sampleCnt_v) {
|
|
// 二维grid和二维的block
|
|
int ix = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int iy = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1);
|
|
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1);
|
|
|
|
if (u > 1.0 * d_lastKnot_u || v > 1.0 * d_lastKnot_v) {
|
|
return;
|
|
}
|
|
|
|
float x = 0., y = 0., z = 0., sumW = 0.;
|
|
for (int i = 0; i < d_pointsCnt_u; i++) {
|
|
float N_U = d_nTexture_u[ix * d_pointsCnt_u + i];
|
|
for (int j = 0; j < d_pointsCnt_v; j++) {
|
|
float N_V = d_nTexture_v[iy * d_pointsCnt_v + j];
|
|
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE;
|
|
float w = d_points[idx + 3];
|
|
x += N_U * N_V * w * d_points[idx];
|
|
y += N_U * N_V * w * d_points[idx + 1];
|
|
z += N_U * N_V * w * d_points[idx + 2];
|
|
sumW += N_U * N_V * w;
|
|
}
|
|
}
|
|
x = x / sumW;
|
|
y = y / sumW;
|
|
z = z / sumW;
|
|
|
|
int baseIdx = (ix * d_sampleCnt_v + iy) * 3;
|
|
res[baseIdx] = x;
|
|
res[baseIdx + 1] = y;
|
|
res[baseIdx + 2] = z;
|
|
|
|
// printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, x, y, z); // %g输出,舍弃无意义的0
|
|
}
|
|
|
|
|
|
__global__ void
|
|
NurbsSurface::g_derivative(float *derivatives, float *normals, const float *derTexture_u, const float *derTexture_v,
|
|
const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u,
|
|
int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v,
|
|
int d_sampleCnt_u, int d_sampleCnt_v) {
|
|
// 二维grid和二维的block
|
|
int ix = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int iy = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) {
|
|
return;
|
|
}
|
|
float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1);
|
|
float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1);
|
|
|
|
float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.;
|
|
float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.;
|
|
|
|
for (int i = 0; i < d_pointsCnt_u; i++) {
|
|
for (int j = 0; j < d_pointsCnt_u; j++) {
|
|
int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE;
|
|
float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j];
|
|
float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i];
|
|
float wij = d_points[baseIdx + 3];
|
|
nubsPdx_u += factor_u * wij * d_points[baseIdx];
|
|
nubsPdy_u += factor_u * wij * d_points[baseIdx + 1];
|
|
nubsPdz_u += factor_u * wij * d_points[baseIdx + 2];
|
|
nubsPdw_u += factor_u * wij;
|
|
|
|
nubsPdx_v += factor_v * wij * d_points[baseIdx];
|
|
nubsPdy_v += factor_v * wij * d_points[baseIdx + 1];
|
|
nubsPdz_v += factor_v * wij * d_points[baseIdx + 2];
|
|
nubsPdw_v += factor_v * wij;
|
|
}
|
|
}
|
|
|
|
float x = 0., y = 0., z = 0., w = 0.;
|
|
for (int i = 0; i < d_pointsCnt_u; i++) {
|
|
float N_U = nTexture_u[ix * d_pointsCnt_u + i];
|
|
for (int j = 0; j < d_pointsCnt_v; j++) {
|
|
float N_V = nTexture_v[iy * d_pointsCnt_v + j];
|
|
int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE;
|
|
float wij = d_points[idx + 3];
|
|
x += N_U * N_V * wij * d_points[idx];
|
|
y += N_U * N_V * wij * d_points[idx + 1];
|
|
z += N_U * N_V * wij * d_points[idx + 2];
|
|
w += N_U * N_V * wij;
|
|
}
|
|
}
|
|
|
|
float w2 = w * w;
|
|
float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2;
|
|
float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2;
|
|
float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2;
|
|
|
|
float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2;
|
|
float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2;
|
|
float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2;
|
|
// float pdz_u = (nubsPdz_u * w - z )
|
|
|
|
int baseIdx = (ix * d_sampleCnt_v + iy) * 6;
|
|
derivatives[baseIdx] = pdx_u;
|
|
derivatives[baseIdx + 1] = pdy_u;
|
|
derivatives[baseIdx + 2] = pdz_u;
|
|
derivatives[baseIdx + 3] = pdx_v;
|
|
derivatives[baseIdx + 4] = pdy_v;
|
|
derivatives[baseIdx + 5] = pdz_v;
|
|
|
|
// 叉乘得到法向量
|
|
baseIdx = (ix * d_sampleCnt_v + iy) * 3;
|
|
normals[baseIdx] = pdy_u * pdz_v - pdy_v * pdz_u;
|
|
normals[baseIdx + 1] = pdx_v * pdz_u - pdx_u * pdz_v;
|
|
normals[baseIdx + 2] = pdx_u * pdy_v - pdx_v * pdy_u;
|
|
normalization(normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]);
|
|
// if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) ||
|
|
// (ix == 8 && iy == 10))
|
|
// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v,
|
|
// pdz_v, normals[baseIdx], normals[baseIdx + 1], normals[baseIdx + 2]);
|
|
}
|
|
|
|
__global__ void
|
|
NurbsSurface::g_curvature(const float *derivatives, const int sampleCnt_u, const int sampleCnt_v, float lastKnot_u,
|
|
float lastKnot_v, float *ms, float *k) {
|
|
// 二维grid和二维的block
|
|
int ix = blockIdx.x * blockDim.x + threadIdx.x;
|
|
int iy = blockIdx.y * blockDim.y + threadIdx.y;
|
|
|
|
if (ix >= sampleCnt_u || iy >= sampleCnt_v) {
|
|
return;
|
|
}
|
|
|
|
|
|
float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1);
|
|
float u = ix * step_u, v = iy * step_v;
|
|
|
|
int baseIdx = (ix * sampleCnt_v + iy) * 6;
|
|
int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6;
|
|
int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6;
|
|
|
|
// printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1],
|
|
// derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]);
|
|
|
|
float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv; // 二阶导
|
|
float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu;
|
|
|
|
if (ix == 0) {
|
|
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u;
|
|
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u;
|
|
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u;
|
|
|
|
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u;
|
|
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u;
|
|
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u;
|
|
} else if (ix == sampleCnt_u - 1) {
|
|
sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u;
|
|
sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u;
|
|
sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u;
|
|
|
|
sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u;
|
|
sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u;
|
|
sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u;
|
|
} else {
|
|
sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u);
|
|
sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u);
|
|
sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u);
|
|
|
|
sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u);
|
|
sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u);
|
|
sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u);
|
|
}
|
|
|
|
if (iy == 0) {
|
|
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v;
|
|
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v;
|
|
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v;
|
|
|
|
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v;
|
|
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v;
|
|
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v;
|
|
} else if (iy == sampleCnt_v - 1) {
|
|
sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v;
|
|
sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v;
|
|
sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v;
|
|
|
|
sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v;
|
|
sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v;
|
|
sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v;
|
|
} else {
|
|
sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v);
|
|
sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v);
|
|
sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v);
|
|
|
|
sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v);
|
|
sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v);
|
|
sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v);
|
|
}
|
|
|
|
float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2;
|
|
// normalization(sndPdx_uv, sndPdy_uv, sndPdz_uv);
|
|
// normalization(sndPdx_vu, sndPdy_vu, sndPdz_vu);
|
|
// normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu);
|
|
// normalization(uvx, uvy, uvz);
|
|
// normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv);
|
|
|
|
// if (ix == 8 && iy == 9) {
|
|
// printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu,
|
|
// sndPdz_uu,
|
|
// uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv);
|
|
// printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu,
|
|
// sndPdz_vu);
|
|
// }
|
|
|
|
float m1 = max(max(sndPdx_uu, sndPdy_uu), sndPdz_uu);
|
|
float m2 = max(max(uvx, uvy), uvz);
|
|
float m3 = max(max(sndPdx_vv, sndPdy_vv), sndPdz_vv);
|
|
|
|
// __shared__ float ms[363];
|
|
ms[(ix * sampleCnt_v + iy) * 3] = m1;
|
|
ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2;
|
|
ms[(ix * sampleCnt_v + iy) * 3 + 2] = m3;
|
|
__syncthreads();
|
|
// if(ix == 1 && iy == 1) {
|
|
// for(int i = 0; i < sampleCnt_u; i++) {
|
|
// for(int j = 0; j < sampleCnt_v; j++) {
|
|
// printf("%g ", ms[(i * sampleCnt_v + j) * 3]);
|
|
// }
|
|
// printf("\n");
|
|
// }
|
|
// }
|
|
|
|
// 规约求最大值
|
|
for (int step = (sampleCnt_u + 1) / 2; step > 1; step = (step + 1) / 2) {
|
|
// step 表示现在参与计算最大值的数据的长度的一半
|
|
if (ix < step && ix + step < sampleCnt_u) {
|
|
ms[(ix * sampleCnt_v + iy) * 3] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3]);
|
|
ms[(ix * sampleCnt_v + iy) * 3 + 1] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 1]);
|
|
ms[(ix * sampleCnt_v + iy) * 3 + 2] = max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 2]);
|
|
}
|
|
}
|
|
|
|
for (int step = (sampleCnt_v + 1) / 2; step > 1; step = (step + 1) / 2) {
|
|
// step 表示现在参与计算最大值的数据的长度的一半
|
|
if (iy < step && iy + step < sampleCnt_v) {
|
|
ms[iy * 3] = max(ms[iy * 3], ms[(iy + step) * 3]);
|
|
ms[iy * 3 + 1] = max(ms[iy * 3 + 1], ms[(iy + step) * 3 + 1]);
|
|
ms[iy * 3 + 2] = max(ms[iy * 3 + 2], ms[(iy + step) * 3 + 2]);
|
|
}
|
|
}
|
|
__syncthreads();
|
|
int n = sampleCnt_u - 1;
|
|
int m = sampleCnt_v - 1;
|
|
*k = (ms[0] / (n * n) + 2 * ms[1] / (n * m) + ms[2] / (m * m)) / 8;
|
|
// if(ix == 1 && iy == 1)printf("%g gggg\n", ms[0]);
|
|
}
|
|
|
|
|
|
__host__ NurbsSurface::Surface::Surface(MeshPoints4 controlPoints, std::vector<float> knots_u,
|
|
std::vector<float> knots_v) {
|
|
this->knots_u = std::move(knots_u);
|
|
this->knots_v = std::move(knots_v);
|
|
this->controlPoints = std::move(controlPoints);
|
|
recordTime = false;
|
|
d_nTexture_u = nullptr;
|
|
d_nTexture_v = nullptr;
|
|
d_nTexture1_u = nullptr;
|
|
d_nTexture1_v = nullptr;
|
|
d_knots_u = nullptr;
|
|
d_knots_v = nullptr;
|
|
d_points = nullptr;
|
|
d_evaluationRes = nullptr;
|
|
d_derivatives = nullptr;
|
|
d_k = nullptr;
|
|
d_normals = nullptr;
|
|
bvh.nodes = nullptr;
|
|
h_evaluations = nullptr;
|
|
h_derivatives = nullptr;
|
|
h_normals = nullptr;
|
|
}
|
|
|
|
__host__ void NurbsSurface::Surface::evaluate(int sampleCnt_u, int sampleCnt_v) {
|
|
// 构造指向device的controlPoints
|
|
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size();
|
|
const int pointsBytes = pointsCnt_u * pointsCnt_v * sizeof(glm::vec4);
|
|
auto *h_points = (float *) malloc(pointsBytes);
|
|
for (int i = 0; i < pointsCnt_u; i++) {
|
|
for (int j = 0; j < pointsCnt_v; j++) {
|
|
for (int k = 0; k < POINT_SIZE; k++) {
|
|
h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k];
|
|
}
|
|
// printf("%f/ %f/ %f/ %f ", controlPoints[i][j][0], controlPoints[i][j][1], controlPoints[i][j][2], controlPoints[i][j][3]);
|
|
}
|
|
}
|
|
cudaMalloc((void **) &d_points, pointsBytes);
|
|
cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice);
|
|
|
|
// 构造指向device的knots
|
|
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size();
|
|
const int knotsBytes_u = knotsCnt_u * sizeof(float), knotsBytes_v = knotsCnt_v * sizeof(float);
|
|
auto *h_knots_u = (float *) malloc(knotsBytes_u), *h_knots_v = (float *) malloc(knotsBytes_v);
|
|
for (int i = 0; i < knotsCnt_u; i++) h_knots_u[i] = knots_u[i];
|
|
for (int i = 0; i < knotsCnt_v; i++) h_knots_v[i] = knots_v[i];
|
|
|
|
safeCudaFree(d_knots_u);
|
|
safeCudaFree(d_knots_v);
|
|
cudaMalloc((void **) &d_knots_u, knotsBytes_u);
|
|
cudaMalloc((void **) &d_knots_v, knotsBytes_v);
|
|
cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice);
|
|
cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice);
|
|
|
|
// 构造nTexture
|
|
cudaMalloc((void **) &d_nTexture_u,
|
|
sampleCnt_u * pointsCnt_u * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值
|
|
cudaMalloc((void **) &d_nTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float));
|
|
|
|
// 构造nTexture1
|
|
cudaMalloc((void **) &d_nTexture1_u, sampleCnt_u * (pointsCnt_u + 1) * sizeof(float));
|
|
cudaMalloc((void **) &d_nTexture1_v, sampleCnt_v * (pointsCnt_v + 1) * sizeof(float));
|
|
|
|
// 结果数组
|
|
size_t resBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float);
|
|
safeCudaFree(d_evaluationRes);
|
|
cudaMalloc((void **) &d_evaluationRes, resBytes);
|
|
safeFree(h_evaluations);
|
|
h_evaluations = (float *) malloc(resBytes);
|
|
|
|
// 构造g_basisTexture线程层级
|
|
dim3 blockBasis(512);
|
|
dim3 gridBasis_u((sampleCnt_u + blockBasis.x - 1) / blockBasis.x);
|
|
dim3 gridBasis_v((sampleCnt_v + blockBasis.x - 1) / blockBasis.x);
|
|
|
|
// 构造线程层级,调用核函数
|
|
dim3 block(32, 32);
|
|
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
|
|
// 记录用时
|
|
double time_cost_device;
|
|
if (recordTime) time_cost_device = get_time();
|
|
g_basisTexture<<<gridBasis_u, blockBasis>>>(d_nTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u,
|
|
sampleCnt_u);
|
|
cudaDeviceSynchronize();
|
|
g_basisTexture<<<gridBasis_v, blockBasis>>>(d_nTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v,
|
|
sampleCnt_v);
|
|
cudaDeviceSynchronize();
|
|
|
|
g_evaluate <<<grid, block>>>(d_evaluationRes, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v,
|
|
POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u,
|
|
sampleCnt_v);
|
|
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
|
|
if (recordTime) {
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("GPU time cost of surface evaluation for %d samples: %lf\n", sampleCnt_u * sampleCnt_v,
|
|
time_cost_device);
|
|
}
|
|
|
|
cudaMemcpy(h_evaluations, d_evaluationRes, resBytes, cudaMemcpyDeviceToHost);
|
|
|
|
|
|
// 释放内存
|
|
safeFree(h_points);
|
|
safeFree(h_knots_u);
|
|
safeFree(h_knots_v);
|
|
}
|
|
|
|
|
|
__host__ std::vector<std::vector<glm::vec3>>
|
|
NurbsSurface::Surface::getEvaluateVec(int sampleCnt_u, int sampleCnt_v) const {
|
|
std::vector<std::vector<glm::vec3>> res(sampleCnt_u, std::vector<glm::vec3>(sampleCnt_v, glm::vec3()));
|
|
for (int i = 0; i < sampleCnt_u; i++) {
|
|
int baseIdx = i * sampleCnt_v * 3;
|
|
for (int j = 0; j < sampleCnt_v; j++) {
|
|
baseIdx += j * 3;
|
|
res[i][j].x = h_evaluations[baseIdx];
|
|
res[i][j].y = h_evaluations[baseIdx + 1];
|
|
res[i][j].z = h_evaluations[baseIdx + 2];
|
|
// printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0], res[i][j][1], res[i][j][2]);
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
__host__ std::vector<MeshPoints3> NurbsSurface::Surface::getDerivativeVec(int sampleCnt_u, int sampleCnt_v) const {
|
|
MeshPoints3 der_u(sampleCnt_u, LinePoints3(sampleCnt_v));
|
|
MeshPoints3 der_v(sampleCnt_u, LinePoints3(sampleCnt_v));
|
|
MeshPoints3 normal(sampleCnt_u, LinePoints3(sampleCnt_v));
|
|
for (int i = 0; i < sampleCnt_u; i++) {
|
|
int baseIdx = i * sampleCnt_v * 6;
|
|
for (int j = 0; j < sampleCnt_v; j++) {
|
|
baseIdx += j * 6;
|
|
der_u[i][j].x = h_derivatives[baseIdx];
|
|
der_u[i][j].y = h_derivatives[baseIdx + 1];
|
|
der_u[i][j].z = h_derivatives[baseIdx + 2];
|
|
der_v[i][j].x = h_derivatives[baseIdx + 3];
|
|
der_v[i][j].y = h_derivatives[baseIdx + 4];
|
|
der_v[i][j].z = h_derivatives[baseIdx + 5];
|
|
auto baseIdxNorm = baseIdx / 2;
|
|
normal[i][j].x = h_normals[baseIdxNorm];
|
|
normal[i][j].y = h_normals[baseIdxNorm + 1];
|
|
normal[i][j].z = h_normals[baseIdxNorm + 2];
|
|
// TODO normalize 归一化在gpu中实现!
|
|
normal[i][j] = glm::normalize(normal[i][j]);
|
|
}
|
|
}
|
|
return {der_u, der_v, normal};
|
|
}
|
|
|
|
__host__ void NurbsSurface::Surface::derivative(int sampleCnt_u, int sampleCnt_v) {
|
|
if (POINT_SIZE != controlPoints[0][0].size()) {
|
|
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
|
|
return;
|
|
}
|
|
|
|
float *d_derTexture_u = nullptr;
|
|
float *d_derTexture_v = nullptr;
|
|
const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size();
|
|
const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size();
|
|
cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float));
|
|
cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float));
|
|
|
|
// 构造切向量计算结果
|
|
safeCudaFree(d_derivatives);
|
|
size_t derBytes = sampleCnt_u * sampleCnt_v * 6 * sizeof(float);
|
|
cudaMalloc((void **) &d_derivatives, derBytes); // 每个采样所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导
|
|
|
|
// 构造法向量计算结果
|
|
safeCudaFree(d_normals);
|
|
size_t normalBytes = sampleCnt_u * sampleCnt_v * 3 * sizeof(float);
|
|
cudaMalloc((void **) &d_normals, normalBytes);
|
|
|
|
// 构造线程层级
|
|
dim3 block(32, 32);
|
|
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
|
|
// 构造g_basisTexture线程层级
|
|
dim3 blockTex(512);
|
|
dim3 gridTex_u((sampleCnt_u + blockTex.x - 1) / blockTex.x);
|
|
dim3 gridTex_v((sampleCnt_v + blockTex.x - 1) / blockTex.x);
|
|
// 记录用时
|
|
double time_cost_device;
|
|
if (recordTime) time_cost_device = get_time();
|
|
g_derTexture<<<gridTex_u, blockTex>>>(d_derTexture_u, d_nTexture1_u, d_knots_u, pointsCnt_u, knotsCnt_u,
|
|
sampleCnt_u);
|
|
g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v,
|
|
sampleCnt_v);
|
|
cudaDeviceSynchronize();
|
|
g_derivative<<<grid, block>>>(d_derivatives, d_normals, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v,
|
|
d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1],
|
|
knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v);
|
|
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
|
|
if (recordTime) {
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("GPU time cost of surface first derivative calculating for %d samples: %lf\n", sampleCnt_u * sampleCnt_v,
|
|
time_cost_device);
|
|
}
|
|
|
|
// 结果数组
|
|
safeFree(h_normals);
|
|
h_normals = (float *) malloc(normalBytes);
|
|
cudaMemcpy(h_normals, d_normals, normalBytes, cudaMemcpyDeviceToHost);
|
|
|
|
safeFree(h_derivatives);
|
|
h_derivatives = (float *) malloc(derBytes);
|
|
cudaMemcpy(h_derivatives, d_derivatives, derBytes, cudaMemcpyDeviceToHost);
|
|
|
|
cudaFree(d_derTexture_u);
|
|
cudaFree(d_derTexture_v);
|
|
}
|
|
|
|
__host__ void NurbsSurface::Surface::curvature(int sampleCnt_u, int sampleCnt_v) {
|
|
|
|
if (POINT_SIZE != controlPoints[0][0].size()) {
|
|
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
|
|
return;
|
|
}
|
|
|
|
// 构造记录每个采样点中的最大M1、M2、M3的数组
|
|
// 这里用共享内存会更好,但使用共享内存动态分配长度总是出错(好像是因为长度过长),后续需要思考解决这个问题
|
|
float *ms = nullptr;
|
|
cudaMalloc((void **) &ms, sampleCnt_u * sampleCnt_v * 3 * sizeof(float));
|
|
|
|
cudaMalloc((void **) &d_k, sizeof(float));
|
|
|
|
// 构造线程层级
|
|
dim3 block(32, 32);
|
|
dim3 grid((sampleCnt_u - 1 + block.x - 1) / block.x, (sampleCnt_v - 1 + block.y - 1) / block.y);
|
|
|
|
// 记录用时
|
|
double time_cost_device;
|
|
if (recordTime) time_cost_device = get_time();
|
|
g_curvature<<<grid, block>>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1],
|
|
knots_v[knots_v.size() - 1], ms, d_k);
|
|
cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
|
|
if (recordTime) {
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("GPU time cost of surface curvature calculating for %d samples: %lf\n",
|
|
sampleCnt_u * sampleCnt_v, time_cost_device);
|
|
}
|
|
safeCudaFree(ms);
|
|
}
|
|
|
|
void NurbsSurface::Surface::setRecordTime(bool r) {
|
|
recordTime = r;
|
|
}
|
|
|
|
NurbsSurface::Surface::~Surface() {
|
|
safeCudaFree(d_nTexture_u);
|
|
safeCudaFree(d_nTexture_v);
|
|
safeCudaFree(d_nTexture1_u);
|
|
safeCudaFree(d_nTexture1_v);
|
|
safeCudaFree(d_points);
|
|
safeCudaFree(d_knots_u);
|
|
safeCudaFree(d_knots_v);
|
|
safeCudaFree(d_k);
|
|
safeCudaFree(d_evaluationRes);
|
|
safeCudaFree(d_normals);
|
|
safeCudaFree(d_derivatives);
|
|
safeFree(bvh.nodes);
|
|
safeFree(h_evaluations);
|
|
safeFree(h_normals);
|
|
safeFree(h_derivatives);
|
|
cudaDeviceReset();
|
|
}
|
|
|
|
__host__ void NurbsSurface::Surface::buildBVH(int layerCnt, bool useK) {
|
|
// TODO 构造BVH的函数不应该出现在NURBS Surface中,应该是BVH类的事情!
|
|
// TODO NURBS Surface只需要一个函数去调用BVH对象的方法即可
|
|
int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
|
|
if (!useK) {
|
|
// 必须safeFree一下,这样global函数中才能通过d_k = nullptr知道不需要再free
|
|
safeCudaFree(d_k);
|
|
}
|
|
if (POINT_SIZE != controlPoints[0][0].size()) {
|
|
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
|
|
return;
|
|
}
|
|
// 构造线程层级
|
|
dim3 block(32, 32);
|
|
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
|
|
|
|
// bvh.maxLevel = max(int(ceil(log2f(sampleCnt_u - 1))) + 1, int(ceil(log2f(sampleCnt_v - 1))) + 1);
|
|
bvh.maxLevel = layerCnt;
|
|
bvh.size = (pow(4, bvh.maxLevel) - 1) / 3; // 等比数列求和公示求出数总的节点数
|
|
size_t bvhBytes = sizeof(BVHNode) * bvh.size;
|
|
BVHNode *d_bvh = nullptr;
|
|
cudaMalloc((void **) &d_bvh, bvhBytes);
|
|
// 记录用时
|
|
double time_cost_device;
|
|
if (recordTime) time_cost_device = get_time();
|
|
g_buildBvh<<<grid, block>>>(d_k, bvh.maxLevel, d_evaluationRes, knots_u[knots_u.size() - 1],
|
|
knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_bvh);
|
|
cudaDeviceSynchronize();
|
|
if (recordTime) {
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("GPU time cost of a %d-layer BVH building: %lf\n",
|
|
bvh.maxLevel, time_cost_device);
|
|
}
|
|
// 将bvh拷贝到cpu中
|
|
safeFree(bvh.nodes);
|
|
bvh.nodes = (BVHNode *) malloc(bvhBytes);
|
|
cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost);
|
|
safeCudaFree(d_bvh);
|
|
// bvh.printQuadTree();
|
|
}
|
|
|
|
__host__ void NurbsSurface::Surface::buildGaussMap(int layerCnt) {
|
|
// TODO,构造GAUSS Map的函数不应该出现在NURBS Surface中,应该是GAUSS Map类的事情!
|
|
int sampleCnt_u = pow(2, layerCnt - 1) + 1, sampleCnt_v = sampleCnt_u;
|
|
|
|
if (POINT_SIZE != controlPoints[0][0].size()) {
|
|
printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n");
|
|
return;
|
|
}
|
|
// 构造线程层级
|
|
dim3 block(32, 32);
|
|
dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y);
|
|
|
|
gauss_map.maxLevel = layerCnt;
|
|
gauss_map.size = (pow(4, layerCnt) - 1) / 3; // 等比数列求和公示求出数总的节点数
|
|
size_t gaussMapBytes = sizeof(BVHNode) * gauss_map.size;
|
|
BVHNode *d_gaussMapTree = nullptr;
|
|
cudaMalloc((void **) &d_gaussMapTree, gaussMapBytes);
|
|
// 记录用时
|
|
double time_cost_device;
|
|
if (recordTime) time_cost_device = get_time();
|
|
g_buildBvh<<<grid, block>>>(nullptr, layerCnt, d_normals, knots_u[knots_u.size() - 1],
|
|
knots_v[knots_v.size() - 1], sampleCnt_u, sampleCnt_v, d_gaussMapTree);
|
|
cudaDeviceSynchronize();
|
|
if (recordTime) {
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("GPU time cost of a %d-layer Gauss Map building: %lf\n",
|
|
layerCnt, time_cost_device);
|
|
}
|
|
safeFree(gauss_map.nodes);
|
|
gauss_map.nodes = (BVHNode *) malloc(gaussMapBytes);
|
|
cudaMemcpy(gauss_map.nodes, d_gaussMapTree, gaussMapBytes, cudaMemcpyDeviceToHost);
|
|
safeCudaFree(d_gaussMapTree);
|
|
}
|
|
|
|
__host__ void NurbsSurface::recursiveGetOverlapLeafNodes(const BVH &bvh1, const BVH &bvh2, int idx1, int idx2,
|
|
std::vector<std::pair<int, int>> &pairs) {
|
|
auto A = bvh1.nodes[idx1];
|
|
auto B = bvh2.nodes[idx2];
|
|
auto AABBSize = [](const AABB &aabb) {
|
|
return (aabb.pMax.z - aabb.pMin.z) *
|
|
(aabb.pMax.y - aabb.pMin.y) *
|
|
(aabb.pMax.x - aabb.pMin.x);
|
|
};
|
|
// 两个包围盒不相交,返回
|
|
if (!A.bounds.IsOverlap(B.bounds)) return;
|
|
// 相交
|
|
if (A.firstChild == -1 && B.firstChild == -1) {
|
|
// 两者都是叶子节点
|
|
pairs.emplace_back(idx1, idx2);
|
|
} else if (A.firstChild != -1 && B.firstChild == -1) {
|
|
// A是中间结点,B是叶子结点
|
|
for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
|
|
} else if (A.firstChild == -1 && B.firstChild != -1) {
|
|
// A是叶子结点,B是中间结点
|
|
for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
|
|
} else {
|
|
// 都是中间结点
|
|
if (AABBSize(A.bounds) > AABBSize(B.bounds)) {
|
|
// A的包围盒更大
|
|
for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
|
|
} else {
|
|
// B的包围盒更大
|
|
for (int i = 0; i < 4; i++) recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
__host__ std::vector<boxPair>
|
|
NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) {
|
|
std::vector<idx2> resPairs;
|
|
// 记录用时
|
|
double time_cost_device = get_time();
|
|
|
|
recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs);
|
|
std::vector<boxPair> boxPairsIdx2(resPairs.size());
|
|
for (int i = 0; i < resPairs.size(); i++) {
|
|
boxPairsIdx2[i] = {{bvh1.nodes[resPairs[i].first].idx_u, bvh1.nodes[resPairs[i].first].idx_v},
|
|
{bvh2.nodes[resPairs[i].second].idx_u, bvh2.nodes[resPairs[i].second].idx_v}};
|
|
}
|
|
|
|
time_cost_device = get_time() - time_cost_device;
|
|
printf("CPU time cost for recursively calculating the overlapped leaf nodes: %lf\n", time_cost_device);
|
|
|
|
return boxPairsIdx2;
|
|
}
|
|
|
|
__host__ bool
|
|
NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<int, int> idxRange_u1,
|
|
std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
|
|
std::pair<int, int> idxRange_v2) {
|
|
if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
|
|
printf("BVH Layer error!\n");
|
|
return false;
|
|
}
|
|
int commonMaxLayer = gm1.maxLevel;
|
|
int edgeCellCnt = pow(2, commonMaxLayer - 1);
|
|
if (idxRange_u1.first < 0 || idxRange_u2.first < 0 || idxRange_v1.first < 0 || idxRange_v2.first < 0 ||
|
|
idxRange_u1.second >= edgeCellCnt || idxRange_u2.second >= edgeCellCnt ||
|
|
idxRange_v1.second >= edgeCellCnt || idxRange_v2.second >= edgeCellCnt) {
|
|
printf("Error when detecting overlapping: idx range invalid!\n");
|
|
return false;
|
|
}
|
|
|
|
auto getRangedBox = [&commonMaxLayer](const BVH &bvh, const std::pair<int, int> &idxRange_u,
|
|
const std::pair<int, int> idxRange_v) {
|
|
// 获取某个范围的gauss map的aabb
|
|
AABB bounding;
|
|
for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) {
|
|
for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) {
|
|
bounding = bounding.Union(
|
|
bvh.nodes[getStartIdxOfLayerN(commonMaxLayer) + h_getChildNodeIdx(i, j)].bounds);
|
|
}
|
|
}
|
|
return bounding;
|
|
};
|
|
return getRangedBox(gm1, idxRange_u1, idxRange_v1)
|
|
.IsOverlap(getRangedBox(gm2, idxRange_u2, idxRange_v2));
|
|
}
|
|
|
|
__host__ bool NurbsSurface::isGaussMapsOverlapped(const BVH &gm1, const BVH &gm2, std::pair<float, float> range_u1,
|
|
std::pair<float, float> range_v1, std::pair<float, float> range_u2,
|
|
std::pair<float, float> range_v2,
|
|
std::pair<float, float> paramRange_u1,
|
|
std::pair<float, float> paramRange_v1,
|
|
std::pair<float, float> paramRange_u2,
|
|
std::pair<float, float> paramRange_v2) {
|
|
if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
|
|
printf("BVH Layer error!\n");
|
|
return false;
|
|
}
|
|
int edgeCellCnt = pow(2, gm1.maxLevel - 1);
|
|
// 根据所给参数的范围和参数的定义域范围,获得对应的采样网格中的范围
|
|
auto getIdxRange = [](std::pair<float, float> range, std::pair<float, float> paramRange, int edgeCellCnt) {
|
|
float paramStep = (paramRange.second - paramRange.first) / edgeCellCnt;
|
|
return std::pair<int, int>({int((range.first - paramRange.first) / paramStep),
|
|
int((range.second - paramRange.first) / paramStep)});
|
|
};
|
|
auto idxRange_u1 = getIdxRange(range_u1, paramRange_u1, edgeCellCnt);
|
|
auto idxRange_v1 = getIdxRange(range_v1, paramRange_v1, edgeCellCnt);
|
|
auto idxRange_u2 = getIdxRange(range_u2, paramRange_u2, edgeCellCnt);
|
|
auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt);
|
|
return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2, idxRange_v2);
|
|
}
|
|
|
|
__host__ void
|
|
NurbsSurface::Surface::recursiveGetRayBVHIntersection(const glm::vec3 dir, const glm::vec3 startPoint, const int idx,
|
|
std::vector<BVHNode> &intersectionLeafNodes) {
|
|
auto bvhNode = bvh.nodes[idx];
|
|
// 射线与AABB判交
|
|
auto isRayBoxIntersect = [&]() {
|
|
const auto &box = bvhNode.bounds;
|
|
float t; // 射线的参数,当t<=0,表示线面交点不在视平面前方,视为没有交点
|
|
if (dir.x != 0.) {
|
|
// 当x分量不为0,则射线会与AABB中垂直于x轴的平面可能有交
|
|
// 注意x的正负。x为正会先遇到较小点所在平面
|
|
if (dir.x > 0) t = (box.pMin.x - startPoint.x) / dir.x;
|
|
else t = (box.pMax.x - startPoint.x) / dir.x;
|
|
if (t > 0.) {
|
|
// 射线与平面在前方有交点。但交点不一定在盒子上
|
|
auto tmpPt = startPoint + t * dir; // 交点
|
|
if (box.pMin.y <= tmpPt.y && box.pMin.z <= tmpPt.z && box.pMax.y >= tmpPt.y && box.pMax.z >= tmpPt.z)
|
|
return true;
|
|
}
|
|
}
|
|
if (dir.y != 0.) {
|
|
// 同上测试y方向
|
|
if (dir.y > 0) t = (box.pMin.y - startPoint.y) / dir.y;
|
|
else t = (box.pMax.y - startPoint.y) / dir.y;
|
|
if (t > 0.) {
|
|
auto tmpPt = startPoint + t * dir;
|
|
if (box.pMin.x <= tmpPt.x && box.pMin.z <= tmpPt.z && box.pMax.x >= tmpPt.x && box.pMax.z >= tmpPt.z)
|
|
return true;
|
|
}
|
|
}
|
|
if (dir.z != 0.) {
|
|
// 同上测试z方向
|
|
if (dir.z > 0) t = (box.pMin.z - startPoint.z) / dir.z;
|
|
else t = (box.pMax.z - startPoint.z) / dir.z;
|
|
if (t > 0.) {
|
|
auto tmpPt = startPoint + t * dir;
|
|
if (box.pMin.x <= tmpPt.x && box.pMin.y <= tmpPt.y && box.pMax.x >= tmpPt.x && box.pMax.y >= tmpPt.y)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
};
|
|
|
|
// 不相交
|
|
if (!isRayBoxIntersect()) return;
|
|
// 相交
|
|
if (bvhNode.firstChild == -1) {
|
|
// 与叶节点相交
|
|
intersectionLeafNodes.emplace_back(bvhNode);
|
|
} else {
|
|
// 与父节点相交
|
|
for (int i = 0; i < 4; i++)
|
|
recursiveGetRayBVHIntersection(dir, startPoint, bvhNode.firstChild + i, intersectionLeafNodes);
|
|
}
|
|
}
|
|
|
|
__host__ std::vector<BVHNode> NurbsSurface::Surface::rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint) {
|
|
std::vector<BVHNode> res;
|
|
recursiveGetRayBVHIntersection(dir, startPoint, 0, res);
|
|
//TODO sort res by t
|
|
|
|
// res.emplace_back(BVHNode());
|
|
// printf("res size: %lld\n", res.size());
|
|
return res;
|
|
}
|