You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							856 lines
						
					
					
						
							33 KiB
						
					
					
				
			
		
		
		
			
			
			
				
					
				
				
					
				
			
		
		
	
	
							856 lines
						
					
					
						
							33 KiB
						
					
					
				
								//
							 | 
						|
								// Created by 14727 on 2022/12/9.
							 | 
						|
								//
							 | 
						|
								#pragma once
							 | 
						|
								#include "chrono"
							 | 
						|
								#include "device/Nurbs/bvh.cuh"
							 | 
						|
								#include "device/Nurbs/nurbs_common.cuh"
							 | 
						|
								#include "device/Nurbs/nurbs_surface.cuh"
							 | 
						|
								#include "device/device_utils.cuh"
							 | 
						|
								#include "tinynurbs/tinynurbs.h"
							 | 
						|
								#include "utils.h"
							 | 
						|
								
							 | 
						|
								__global__ void NurbsSurface::g_evaluate(glm::vec3 *res,
							 | 
						|
								                                         const float *d_nTexture_u,
							 | 
						|
								                                         const float *d_nTexture_v,
							 | 
						|
								                                         const glm::vec4 *d_points,
							 | 
						|
								                                         int d_pointsCnt_u, int d_pointsCnt_v,
							 | 
						|
								                                         float d_lastKnot_u, float d_lastKnot_v,
							 | 
						|
								                                         int d_sampleCnt_u, int d_sampleCnt_v) {
							 | 
						|
								  // 二维grid和二维的block
							 | 
						|
								  int ix = blockIdx.x * blockDim.x + threadIdx.x;
							 | 
						|
								  int iy = blockIdx.y * blockDim.y + threadIdx.y;
							 | 
						|
								
							 | 
						|
								  if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v)
							 | 
						|
								    return;
							 | 
						|
								  //    if(ix == 128 || iy == 128)    printf("!!!!!!!!!@@@@%d, %d, %d, %d\n",
							 | 
						|
								  //    ix, iy, d_sampleCnt_u, d_sampleCnt_v); float u = ix * d_lastKnot_u /
							 | 
						|
								  //    (d_sampleCnt_u - 1); float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1);
							 | 
						|
								
							 | 
						|
								  float x = 0., y = 0., z = 0., sumW = 0.;
							 | 
						|
								  for (int i = 0; i < d_pointsCnt_u; i++) {
							 | 
						|
								    float N_U = d_nTexture_u[ix * d_pointsCnt_u + i];
							 | 
						|
								    for (int j = 0; j < d_pointsCnt_v; j++) {
							 | 
						|
								      float N_V = d_nTexture_v[iy * d_pointsCnt_v + j];
							 | 
						|
								      int idx = i * d_pointsCnt_v + j;
							 | 
						|
								      float w = d_points[idx].w;
							 | 
						|
								      x += N_U * N_V * w * d_points[idx].x;
							 | 
						|
								      y += N_U * N_V * w * d_points[idx].y;
							 | 
						|
								      z += N_U * N_V * w * d_points[idx].z;
							 | 
						|
								      sumW += N_U * N_V * w;
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								  float sumWInv = 1.f / sumW;
							 | 
						|
								  x *= sumWInv;
							 | 
						|
								  y *= sumWInv;
							 | 
						|
								  z *= sumWInv;
							 | 
						|
								
							 | 
						|
								  int baseIdx = ix * d_sampleCnt_v + iy;
							 | 
						|
								  res[baseIdx] = {x, y, z};
							 | 
						|
								
							 | 
						|
								  //    printf("(%d, %d)-->(%g, %g, %g)\n", ix, iy, res[baseIdx].x,
							 | 
						|
								  //    res[baseIdx].y, res[baseIdx].z); // %g输出,舍弃无意义的0
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__global__ void
							 | 
						|
								NurbsSurface::g_derivative(glm::vec3 *derivatives_u, glm::vec3 *derivatives_v,
							 | 
						|
								                           glm::vec3 *normals, const float *derTexture_u,
							 | 
						|
								                           const float *derTexture_v, const float *nTexture_u,
							 | 
						|
								                           const float *nTexture_v, const glm::vec4 *d_points,
							 | 
						|
								                           int d_pointsCnt_u, int d_pointsCnt_v,
							 | 
						|
								                           int d_sampleCnt_u, int d_sampleCnt_v) {
							 | 
						|
								  // 二维grid和二维的block
							 | 
						|
								  int ix = blockIdx.x * blockDim.x + threadIdx.x;
							 | 
						|
								  int iy = blockIdx.y * blockDim.y + threadIdx.y;
							 | 
						|
								
							 | 
						|
								  if (ix >= d_sampleCnt_u || iy >= d_sampleCnt_v) {
							 | 
						|
								    return;
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.;
							 | 
						|
								  float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.;
							 | 
						|
								
							 | 
						|
								  // TODO  合并两个循环?
							 | 
						|
								  for (int i = 0; i < d_pointsCnt_u; i++) {
							 | 
						|
								    for (int j = 0; j < d_pointsCnt_v; j++) {
							 | 
						|
								      int baseIdx = i * d_pointsCnt_v + j;
							 | 
						|
								      float factor_u = derTexture_u[ix * d_pointsCnt_u + i] *
							 | 
						|
								                       nTexture_v[iy * d_pointsCnt_v + j];
							 | 
						|
								      float factor_v = derTexture_v[iy * d_pointsCnt_v + j] *
							 | 
						|
								                       nTexture_u[ix * d_pointsCnt_u + i];
							 | 
						|
								      float wij = d_points[baseIdx].w;
							 | 
						|
								      nubsPdx_u += factor_u * wij * d_points[baseIdx].x;
							 | 
						|
								      nubsPdy_u += factor_u * wij * d_points[baseIdx].y;
							 | 
						|
								      nubsPdz_u += factor_u * wij * d_points[baseIdx].z;
							 | 
						|
								      nubsPdw_u += factor_u * wij;
							 | 
						|
								
							 | 
						|
								      nubsPdx_v += factor_v * wij * d_points[baseIdx].x;
							 | 
						|
								      nubsPdy_v += factor_v * wij * d_points[baseIdx].y;
							 | 
						|
								      nubsPdz_v += factor_v * wij * d_points[baseIdx].z;
							 | 
						|
								      nubsPdw_v += factor_v * wij;
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  float x = 0., y = 0., z = 0., w = 0.;
							 | 
						|
								  for (int i = 0; i < d_pointsCnt_u; i++) {
							 | 
						|
								    float N_U = nTexture_u[ix * d_pointsCnt_u + i];
							 | 
						|
								    for (int j = 0; j < d_pointsCnt_v; j++) {
							 | 
						|
								      float N_V = nTexture_v[iy * d_pointsCnt_v + j];
							 | 
						|
								      int idx = i * d_pointsCnt_v + j;
							 | 
						|
								      float wij = d_points[idx].w;
							 | 
						|
								      x += N_U * N_V * wij * d_points[idx].x;
							 | 
						|
								      y += N_U * N_V * wij * d_points[idx].y;
							 | 
						|
								      z += N_U * N_V * wij * d_points[idx].z;
							 | 
						|
								      w += N_U * N_V * wij;
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  float w2 = w * w;
							 | 
						|
								  float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2;
							 | 
						|
								  float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2;
							 | 
						|
								  float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2;
							 | 
						|
								
							 | 
						|
								  float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2;
							 | 
						|
								  float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2;
							 | 
						|
								  float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2;
							 | 
						|
								  //    float pdz_u = (nubsPdz_u * w - z )
							 | 
						|
								
							 | 
						|
								  int baseIdx = ix * d_sampleCnt_v + iy;
							 | 
						|
								  derivatives_u[baseIdx] = {pdx_u, pdy_u, pdz_u};
							 | 
						|
								  derivatives_v[baseIdx] = {pdx_v, pdy_v, pdz_v};
							 | 
						|
								
							 | 
						|
								  // 叉乘得到法向量
							 | 
						|
								  normals[baseIdx] = glm::cross(derivatives_u[baseIdx], derivatives_v[baseIdx]);
							 | 
						|
								  normals[baseIdx] = glm::normalize(normals[baseIdx]);
							 | 
						|
								  //    if ((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9)
							 | 
						|
								  //    || (ix == 8 && iy == 8) ||
							 | 
						|
								  //        (ix == 8 && iy == 10))
							 | 
						|
								  //        printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g),
							 | 
						|
								  //        normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v,
							 | 
						|
								  //               pdz_v, normals[baseIdx], normals[baseIdx + 1],
							 | 
						|
								  //               normals[baseIdx + 2]);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__global__ void NurbsSurface::g_curvature(const glm::vec3 *derivatives_u,
							 | 
						|
								                                          const glm::vec3 *derivatives_v,
							 | 
						|
								                                          const int sampleCnt_u,
							 | 
						|
								                                          const int sampleCnt_v,
							 | 
						|
								                                          float lastKnot_u, float lastKnot_v,
							 | 
						|
								                                          float *ms, float *k) {
							 | 
						|
								  // 二维grid和二维的block
							 | 
						|
								  int ix = blockIdx.x * blockDim.x + threadIdx.x;
							 | 
						|
								  int iy = blockIdx.y * blockDim.y + threadIdx.y;
							 | 
						|
								
							 | 
						|
								  if (ix >= sampleCnt_u || iy >= sampleCnt_v) {
							 | 
						|
								    return;
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  float step_u = lastKnot_u / (sampleCnt_u - 1),
							 | 
						|
								        step_v = lastKnot_v / (sampleCnt_v - 1);
							 | 
						|
								  float u = ix * step_u, v = iy * step_v;
							 | 
						|
								
							 | 
						|
								  int baseIdx = ix * sampleCnt_v + iy;
							 | 
						|
								  int lastBaseIdx_u = (ix - 1) * sampleCnt_v + iy,
							 | 
						|
								      nextBaseIdx_u = (ix + 1) * sampleCnt_v + iy;
							 | 
						|
								  int lastBaseIdx_v = ix * sampleCnt_v + iy - 1,
							 | 
						|
								      nextBaseIdx_v = ix * sampleCnt_v + iy + 1;
							 | 
						|
								
							 | 
						|
								  //    printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v,
							 | 
						|
								  //    derivatives[baseIdx], derivatives[baseIdx + 1],
							 | 
						|
								  //           derivatives[baseIdx + 2], derivatives[baseIdx + 3],
							 | 
						|
								  //           derivatives[baseIdx + 4], derivatives[baseIdx + 5]);
							 | 
						|
								
							 | 
						|
								  //    float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv;
							 | 
						|
								  //    // 二阶导 float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu,
							 | 
						|
								  //    sndPdz_vu;
							 | 
						|
								
							 | 
						|
								  glm::vec3 sndP_uu, sndP_uv, sndP_vu, sndP_vv;
							 | 
						|
								
							 | 
						|
								  if (ix == 0) {
							 | 
						|
								    sndP_uu = (derivatives_u[nextBaseIdx_u] - derivatives_u[baseIdx]) / step_u;
							 | 
						|
								    sndP_vu = (derivatives_v[nextBaseIdx_u] - derivatives_v[baseIdx]) / step_u;
							 | 
						|
								    //        sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx
							 | 
						|
								    //        + 1]) / step_u; sndPdz_uu = (derivatives[nextBaseIdx_u + 2] -
							 | 
						|
								    //        derivatives[baseIdx + 2]) / step_u;
							 | 
						|
								
							 | 
						|
								    //        sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx
							 | 
						|
								    //        + 3]) / step_u; sndPdy_vu = (derivatives[nextBaseIdx_u + 4] -
							 | 
						|
								    //        derivatives[baseIdx + 4]) / step_u; sndPdz_vu =
							 | 
						|
								    //        (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) /
							 | 
						|
								    //        step_u;
							 | 
						|
								  } else if (ix == sampleCnt_u - 1) {
							 | 
						|
								    sndP_uu = (derivatives_u[baseIdx] - derivatives_u[lastBaseIdx_u]) / step_u;
							 | 
						|
								    sndP_vu = (derivatives_v[baseIdx] - derivatives_v[lastBaseIdx_u]) / step_u;
							 | 
						|
								  } else {
							 | 
						|
								    sndP_uu =
							 | 
						|
								        (derivatives_u[nextBaseIdx_u] - derivatives_u[lastBaseIdx_u]) / step_u;
							 | 
						|
								    sndP_vu =
							 | 
						|
								        (derivatives_v[nextBaseIdx_u] - derivatives_v[lastBaseIdx_u]) / step_u;
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  if (iy == 0) {
							 | 
						|
								    sndP_vv = (derivatives_v[nextBaseIdx_u] - derivatives_v[baseIdx]) / step_v;
							 | 
						|
								    sndP_uv = (derivatives_u[nextBaseIdx_u] - derivatives_u[baseIdx]) / step_v;
							 | 
						|
								  } else if (iy == sampleCnt_v - 1) {
							 | 
						|
								    sndP_vv = (derivatives_v[nextBaseIdx_u] - derivatives_v[baseIdx]) / step_v;
							 | 
						|
								    sndP_uv = (derivatives_u[nextBaseIdx_u] - derivatives_u[baseIdx]) / step_v;
							 | 
						|
								  } else {
							 | 
						|
								    sndP_vv = (derivatives_v[nextBaseIdx_u] - derivatives_v[baseIdx]) / step_v;
							 | 
						|
								    sndP_uv = (derivatives_u[nextBaseIdx_u] - derivatives_u[baseIdx]) / step_v;
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  sndP_uv = (sndP_uv + sndP_vu) / 2.f;
							 | 
						|
								  //    normalization(sndPdx_uv, sndPdy_uv, sndPdz_uv);
							 | 
						|
								  //    normalization(sndPdx_vu, sndPdy_vu, sndPdz_vu);
							 | 
						|
								  //    normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu);
							 | 
						|
								  //    normalization(uvx, uvy, uvz);
							 | 
						|
								  //    normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv);
							 | 
						|
								
							 | 
						|
								  //    if (ix == 8 && iy == 9) {
							 | 
						|
								  //        printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g,
							 | 
						|
								  //        %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu,
							 | 
						|
								  //               sndPdz_uu,
							 | 
						|
								  //               uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv);
							 | 
						|
								  //        printf("uv: (%g, %g, %g), vu: (%g, %g, %g)\n", sndPdx_uv, sndPdy_uv,
							 | 
						|
								  //        sndPdz_uv, sndPdx_vu, sndPdy_vu,
							 | 
						|
								  //               sndPdz_vu);
							 | 
						|
								  //    }
							 | 
						|
								
							 | 
						|
								  float m1 = max(max(sndP_uu.x, sndP_uu.y), sndP_uu.z);
							 | 
						|
								  float m2 = max(max(sndP_uv.x, sndP_uv.y), sndP_uv.z);
							 | 
						|
								  float m3 = max(max(sndP_vv.x, sndP_vv.y), sndP_vv.z);
							 | 
						|
								
							 | 
						|
								  //    __shared__ float ms[363];
							 | 
						|
								  ms[(ix * sampleCnt_v + iy) * 3] = m1;
							 | 
						|
								  ms[(ix * sampleCnt_v + iy) * 3 + 1] = m2;
							 | 
						|
								  ms[(ix * sampleCnt_v + iy) * 3 + 2] = m3;
							 | 
						|
								  __syncthreads();
							 | 
						|
								  //    if(ix == 1 && iy == 1) {
							 | 
						|
								  //        for(int i = 0; i < sampleCnt_u; i++) {
							 | 
						|
								  //            for(int j = 0; j < sampleCnt_v; j++) {
							 | 
						|
								  //                printf("%g ", ms[(i * sampleCnt_v + j) * 3]);
							 | 
						|
								  //            }
							 | 
						|
								  //            printf("\n");
							 | 
						|
								  //        }
							 | 
						|
								  //    }
							 | 
						|
								
							 | 
						|
								  // 规约求最大值
							 | 
						|
								  for (int step = (sampleCnt_u + 1) / 2; step > 1; step = (step + 1) / 2) {
							 | 
						|
								    // step 表示现在参与计算最大值的数据的长度的一半
							 | 
						|
								    if (ix < step && ix + step < sampleCnt_u) {
							 | 
						|
								      ms[(ix * sampleCnt_v + iy) * 3] =
							 | 
						|
								          max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3]);
							 | 
						|
								      ms[(ix * sampleCnt_v + iy) * 3 + 1] =
							 | 
						|
								          max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 1]);
							 | 
						|
								      ms[(ix * sampleCnt_v + iy) * 3 + 2] =
							 | 
						|
								          max(m1, ms[((ix + step) * sampleCnt_v + iy) * 3 + 2]);
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  for (int step = (sampleCnt_v + 1) / 2; step > 1; step = (step + 1) / 2) {
							 | 
						|
								    // step 表示现在参与计算最大值的数据的长度的一半
							 | 
						|
								    if (iy < step && iy + step < sampleCnt_v) {
							 | 
						|
								      ms[iy * 3] = max(ms[iy * 3], ms[(iy + step) * 3]);
							 | 
						|
								      ms[iy * 3 + 1] = max(ms[iy * 3 + 1], ms[(iy + step) * 3 + 1]);
							 | 
						|
								      ms[iy * 3 + 2] = max(ms[iy * 3 + 2], ms[(iy + step) * 3 + 2]);
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								  __syncthreads();
							 | 
						|
								  int n = sampleCnt_u - 1;
							 | 
						|
								  int m = sampleCnt_v - 1;
							 | 
						|
								  *k = (ms[0] / (n * n) + 2 * ms[1] / (n * m) + ms[2] / (m * m)) / 8;
							 | 
						|
								  //    if(ix == 1 && iy == 1)printf("%g gggg\n", ms[0]);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ NurbsSurface::Surface::Surface(MeshPoints4 controlPoints,
							 | 
						|
								                                        std::vector<float> knots_u,
							 | 
						|
								                                        std::vector<float> knots_v,
							 | 
						|
								                                        int _layerCnt) {
							 | 
						|
								  this->knots_u = std::move(knots_u);
							 | 
						|
								  this->knots_v = std::move(knots_v);
							 | 
						|
								  this->controlPoints = std::move(controlPoints);
							 | 
						|
								
							 | 
						|
								  layerCnt = _layerCnt;
							 | 
						|
								  edgeSampleCnt_u = edgeSampleCnt_v = pow(2, layerCnt - 1) + 1;
							 | 
						|
								
							 | 
						|
								  recordTime = false;
							 | 
						|
								  d_nTexture_u = nullptr;
							 | 
						|
								  d_nTexture_v = nullptr;
							 | 
						|
								  d_nTexture1_u = nullptr;
							 | 
						|
								  d_nTexture1_v = nullptr;
							 | 
						|
								  d_knots_u = nullptr;
							 | 
						|
								  d_knots_v = nullptr;
							 | 
						|
								  d_points = nullptr;
							 | 
						|
								  d_evaluations = nullptr;
							 | 
						|
								  d_derivatives_u = nullptr;
							 | 
						|
								  d_derivatives_v = nullptr;
							 | 
						|
								  d_k = nullptr;
							 | 
						|
								  d_normals = nullptr;
							 | 
						|
								  bvh.nodes = nullptr;
							 | 
						|
								  h_evaluations = nullptr;
							 | 
						|
								  h_derivatives_u = nullptr;
							 | 
						|
								  h_derivatives_v = nullptr;
							 | 
						|
								  h_normals = nullptr;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::evaluate() {
							 | 
						|
								  // 构造指向device的controlPoints
							 | 
						|
								  const int pointsCnt_u = controlPoints.size(),
							 | 
						|
								            pointsCnt_v = controlPoints[0].size();
							 | 
						|
								  const int pointsBytes = pointsCnt_u * pointsCnt_v * sizeof(glm::vec4);
							 | 
						|
								  auto *h_points = (glm::vec4 *)malloc(pointsBytes);
							 | 
						|
								  for (int i = 0; i < pointsCnt_u; i++) {
							 | 
						|
								    for (int j = 0; j < pointsCnt_v; j++) {
							 | 
						|
								      h_points[i * pointsCnt_v + j] = controlPoints[i][j];
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								  cudaMalloc((void **)&d_points, pointsBytes);
							 | 
						|
								  cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice);
							 | 
						|
								
							 | 
						|
								  // 构造指向device的knots
							 | 
						|
								  const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size();
							 | 
						|
								  const int knotsBytes_u = knotsCnt_u * sizeof(float),
							 | 
						|
								            knotsBytes_v = knotsCnt_v * sizeof(float);
							 | 
						|
								  auto *h_knots_u = (float *)malloc(knotsBytes_u),
							 | 
						|
								       *h_knots_v = (float *)malloc(knotsBytes_v);
							 | 
						|
								  for (int i = 0; i < knotsCnt_u; i++)
							 | 
						|
								    h_knots_u[i] = knots_u[i];
							 | 
						|
								  for (int i = 0; i < knotsCnt_v; i++)
							 | 
						|
								    h_knots_v[i] = knots_v[i];
							 | 
						|
								
							 | 
						|
								  safeCudaFree(d_knots_u);
							 | 
						|
								  safeCudaFree(d_knots_v);
							 | 
						|
								  cudaMalloc((void **)&d_knots_u, knotsBytes_u);
							 | 
						|
								  cudaMalloc((void **)&d_knots_v, knotsBytes_v);
							 | 
						|
								  cudaMemcpy(d_knots_u, h_knots_u, knotsBytes_u, cudaMemcpyHostToDevice);
							 | 
						|
								  cudaMemcpy(d_knots_v, h_knots_v, knotsBytes_v, cudaMemcpyHostToDevice);
							 | 
						|
								
							 | 
						|
								  // 构造nTexture
							 | 
						|
								  cudaMalloc(
							 | 
						|
								      (void **)&d_nTexture_u,
							 | 
						|
								      edgeSampleCnt_u * pointsCnt_u *
							 | 
						|
								          sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt
							 | 
						|
								                          // + 1的基函数值
							 | 
						|
								  cudaMalloc((void **)&d_nTexture_v,
							 | 
						|
								             edgeSampleCnt_v * pointsCnt_v * sizeof(float));
							 | 
						|
								
							 | 
						|
								  // 构造nTexture1
							 | 
						|
								  cudaMalloc((void **)&d_nTexture1_u,
							 | 
						|
								             edgeSampleCnt_u * (pointsCnt_u + 1) * sizeof(float));
							 | 
						|
								  cudaMalloc((void **)&d_nTexture1_v,
							 | 
						|
								             edgeSampleCnt_v * (pointsCnt_v + 1) * sizeof(float));
							 | 
						|
								
							 | 
						|
								  // 结果数组
							 | 
						|
								  size_t resBytes = edgeSampleCnt_u * edgeSampleCnt_v * sizeof(glm::vec3);
							 | 
						|
								  safeCudaFree(d_evaluations);
							 | 
						|
								  cudaMalloc((void **)&d_evaluations, resBytes);
							 | 
						|
								  safeFree(h_evaluations);
							 | 
						|
								  h_evaluations = (glm::vec3 *)malloc(resBytes);
							 | 
						|
								
							 | 
						|
								  // 构造g_basisTexture线程层级
							 | 
						|
								  dim3 blockBasis(512);
							 | 
						|
								  dim3 gridBasis_u((edgeSampleCnt_u + blockBasis.x - 1) / blockBasis.x);
							 | 
						|
								  dim3 gridBasis_v((edgeSampleCnt_v + blockBasis.x - 1) / blockBasis.x);
							 | 
						|
								
							 | 
						|
								  // 构造线程层级,调用核函数
							 | 
						|
								  dim3 block(32, 32);
							 | 
						|
								  dim3 grid((edgeSampleCnt_u + block.x - 1) / block.x,
							 | 
						|
								            (edgeSampleCnt_v + block.y - 1) / block.y);
							 | 
						|
								  // 记录用时
							 | 
						|
								  auto startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_basisTexture<<<gridBasis_u, blockBasis>>>(d_nTexture_u, d_nTexture1_u,
							 | 
						|
								                                              d_knots_u, pointsCnt_u,
							 | 
						|
								                                              knotsCnt_u, edgeSampleCnt_u);
							 | 
						|
								  // cudaDeviceSynchronize();
							 | 
						|
								  auto duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                      std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                      .count();
							 | 
						|
								  printf("GPU time cost of u_basis evaluation for %d*%d samples: %lf ms\n",
							 | 
						|
								         edgeSampleCnt_u, edgeSampleCnt_v, duration);
							 | 
						|
								
							 | 
						|
								  startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_basisTexture<<<gridBasis_v, blockBasis>>>(d_nTexture_v, d_nTexture1_v,
							 | 
						|
								                                              d_knots_v, pointsCnt_v,
							 | 
						|
								                                              knotsCnt_v, edgeSampleCnt_u);
							 | 
						|
								  duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                 std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                 .count();
							 | 
						|
								  // cudaDeviceSynchronize();
							 | 
						|
								  printf("GPU time cost of v_basis evaluation for %d*%d samples: %lf ms\n",
							 | 
						|
								         edgeSampleCnt_u, edgeSampleCnt_v, duration);
							 | 
						|
								
							 | 
						|
								  // 记录用时
							 | 
						|
								  startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_evaluate<<<grid, block>>>(d_evaluations, d_nTexture_u, d_nTexture_v,
							 | 
						|
								                              d_points, pointsCnt_u, pointsCnt_v,
							 | 
						|
								                              knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1],
							 | 
						|
								                              edgeSampleCnt_u, edgeSampleCnt_v);
							 | 
						|
								  duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                 std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                 .count();
							 | 
						|
								  // cudaDeviceSynchronize(); //
							 | 
						|
								  // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
							 | 
						|
								  if (recordTime) {
							 | 
						|
								    auto duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                        std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                        .count();
							 | 
						|
								    printf("GPU time cost of surface evaluation for %d*%d samples: %lf ms\n",
							 | 
						|
								           edgeSampleCnt_u, edgeSampleCnt_v, duration);
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  // TODO 拷贝到h_evaluations放在该函数内合适吗?
							 | 
						|
								  cudaMemcpy(h_evaluations, d_evaluations, resBytes, cudaMemcpyDeviceToHost);
							 | 
						|
								
							 | 
						|
								  // 释放内存
							 | 
						|
								  safeFree(h_points);
							 | 
						|
								  safeFree(h_knots_u);
							 | 
						|
								  safeFree(h_knots_v);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ std::vector<std::vector<glm::vec3>>
							 | 
						|
								NurbsSurface::Surface::getEvaluateVec() const {
							 | 
						|
								  std::vector<std::vector<glm::vec3>> res(
							 | 
						|
								      edgeSampleCnt_u, std::vector<glm::vec3>(edgeSampleCnt_v, glm::vec3()));
							 | 
						|
								  for (int i = 0; i < edgeSampleCnt_u; i++) {
							 | 
						|
								    int baseIdx = i * edgeSampleCnt_v;
							 | 
						|
								    for (int j = 0; j < edgeSampleCnt_v; j++) {
							 | 
						|
								      baseIdx += j;
							 | 
						|
								      res[i][j].x = h_evaluations[baseIdx].x;
							 | 
						|
								      res[i][j].y = h_evaluations[baseIdx].y;
							 | 
						|
								      res[i][j].z = h_evaluations[baseIdx].z;
							 | 
						|
								      //            printf("%d, %d: %f, %f, %f\n", i, j, res[i][j][0],
							 | 
						|
								      //            res[i][j][1], res[i][j][2]);
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								  return res;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ std::vector<MeshPoints3>
							 | 
						|
								NurbsSurface::Surface::getDerivativeVec() const {
							 | 
						|
								  MeshPoints3 der_u(edgeSampleCnt_u, LinePoints3(edgeSampleCnt_v));
							 | 
						|
								  MeshPoints3 der_v(edgeSampleCnt_u, LinePoints3(edgeSampleCnt_v));
							 | 
						|
								  MeshPoints3 normal(edgeSampleCnt_u, LinePoints3(edgeSampleCnt_v));
							 | 
						|
								  for (int i = 0; i < edgeSampleCnt_u; i++) {
							 | 
						|
								    int baseIdx = i * edgeSampleCnt_v;
							 | 
						|
								    for (int j = 0; j < edgeSampleCnt_v; j++) {
							 | 
						|
								      baseIdx += j;
							 | 
						|
								      der_u[i][j] = h_derivatives_u[baseIdx];
							 | 
						|
								      der_v[i][j] = h_derivatives_v[baseIdx];
							 | 
						|
								      normal[i][j] = h_normals[baseIdx];
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								  return {der_u, der_v, normal};
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::derivative() {
							 | 
						|
								  float *d_derTexture_u = nullptr;
							 | 
						|
								  float *d_derTexture_v = nullptr;
							 | 
						|
								  const int pointsCnt_u = controlPoints.size(),
							 | 
						|
								            pointsCnt_v = controlPoints[0].size();
							 | 
						|
								  const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size();
							 | 
						|
								  cudaMalloc((void **)&d_derTexture_u,
							 | 
						|
								             edgeSampleCnt_u * pointsCnt_u * sizeof(float));
							 | 
						|
								  cudaMalloc((void **)&d_derTexture_v,
							 | 
						|
								             edgeSampleCnt_u * pointsCnt_v * sizeof(float));
							 | 
						|
								
							 | 
						|
								  // 构造切向量计算结果
							 | 
						|
								  size_t vec3MeshBytes = edgeSampleCnt_u * edgeSampleCnt_v * sizeof(glm::vec3);
							 | 
						|
								  safeCudaFree(d_derivatives_u);
							 | 
						|
								  cudaMalloc((void **)&d_derivatives_u, vec3MeshBytes);
							 | 
						|
								  safeCudaFree(d_derivatives_v);
							 | 
						|
								  cudaMalloc((void **)&d_derivatives_v, vec3MeshBytes);
							 | 
						|
								
							 | 
						|
								  // 构造法向量计算结果
							 | 
						|
								  safeCudaFree(d_normals);
							 | 
						|
								  cudaMalloc((void **)&d_normals, vec3MeshBytes);
							 | 
						|
								
							 | 
						|
								  // 构造线程层级
							 | 
						|
								  dim3 block(32, 32);
							 | 
						|
								  dim3 grid((edgeSampleCnt_u + block.x - 1) / block.x,
							 | 
						|
								            (edgeSampleCnt_v + block.y - 1) / block.y);
							 | 
						|
								  // 构造g_basisTexture线程层级
							 | 
						|
								  dim3 blockTex(512);
							 | 
						|
								  dim3 gridTex_u((edgeSampleCnt_u + blockTex.x - 1) / blockTex.x);
							 | 
						|
								  dim3 gridTex_v((edgeSampleCnt_v + blockTex.x - 1) / blockTex.x);
							 | 
						|
								  // 记录用时
							 | 
						|
								  auto startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_derTexture<<<gridTex_u, blockTex>>>(d_derTexture_u, d_nTexture1_u,
							 | 
						|
								                                        d_knots_u, pointsCnt_u, knotsCnt_u,
							 | 
						|
								                                        edgeSampleCnt_u);
							 | 
						|
								  g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v,
							 | 
						|
								                                        d_knots_v, pointsCnt_v, knotsCnt_v,
							 | 
						|
								                                        edgeSampleCnt_v);
							 | 
						|
								  cudaDeviceSynchronize();
							 | 
						|
								  g_derivative<<<grid, block>>>(d_derivatives_u, d_derivatives_v, d_normals,
							 | 
						|
								                                d_derTexture_u, d_derTexture_v, d_nTexture_u,
							 | 
						|
								                                d_nTexture_v, d_points, pointsCnt_u,
							 | 
						|
								                                pointsCnt_v, edgeSampleCnt_u, edgeSampleCnt_v);
							 | 
						|
								  cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
							 | 
						|
								  if (recordTime) {
							 | 
						|
								    auto duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                        std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                        .count();
							 | 
						|
								    printf("GPU time cost of surface first derivative calculating for %d * %d "
							 | 
						|
								           "samples: %lf ms\n",
							 | 
						|
								           edgeSampleCnt_u, edgeSampleCnt_v, duration);
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  // 结果数组
							 | 
						|
								  safeFree(h_normals);
							 | 
						|
								  h_normals = (glm::vec3 *)malloc(vec3MeshBytes);
							 | 
						|
								  cudaMemcpy(h_normals, d_normals, vec3MeshBytes, cudaMemcpyDeviceToHost);
							 | 
						|
								
							 | 
						|
								  safeFree(h_derivatives_u);
							 | 
						|
								  safeFree(h_derivatives_v);
							 | 
						|
								  h_derivatives_u = (glm::vec3 *)malloc(vec3MeshBytes);
							 | 
						|
								  h_derivatives_v = (glm::vec3 *)malloc(vec3MeshBytes);
							 | 
						|
								  cudaMemcpy(h_derivatives_u, d_derivatives_u, vec3MeshBytes,
							 | 
						|
								             cudaMemcpyDeviceToHost);
							 | 
						|
								  cudaMemcpy(h_derivatives_v, d_derivatives_v, vec3MeshBytes,
							 | 
						|
								             cudaMemcpyDeviceToHost);
							 | 
						|
								
							 | 
						|
								  cudaFree(d_derTexture_u);
							 | 
						|
								  cudaFree(d_derTexture_v);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::curvature() {
							 | 
						|
								
							 | 
						|
								  // 构造记录每个采样点中的最大M1、M2、M3的数组
							 | 
						|
								  // 这里用共享内存会更好,但使用共享内存动态分配长度总是出错(好像是因为长度过长),后续需要思考解决这个问题
							 | 
						|
								  float *ms = nullptr;
							 | 
						|
								  cudaMalloc((void **)&ms,
							 | 
						|
								             edgeSampleCnt_u * edgeSampleCnt_v * 3 * sizeof(float));
							 | 
						|
								
							 | 
						|
								  cudaMalloc((void **)&d_k, sizeof(float));
							 | 
						|
								
							 | 
						|
								  // 构造线程层级
							 | 
						|
								  dim3 block(32, 32);
							 | 
						|
								  dim3 grid((edgeSampleCnt_u - 1 + block.x - 1) / block.x,
							 | 
						|
								            (edgeSampleCnt_v - 1 + block.y - 1) / block.y);
							 | 
						|
								
							 | 
						|
								  // 记录用时
							 | 
						|
								  auto startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_curvature<<<grid, block>>>(
							 | 
						|
								      d_derivatives_u, d_derivatives_v, edgeSampleCnt_u, edgeSampleCnt_v,
							 | 
						|
								      knots_u[knots_u.size() - 1], knots_v[knots_v.size() - 1], ms, d_k);
							 | 
						|
								  cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用
							 | 
						|
								  if (recordTime) {
							 | 
						|
								    auto duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                        std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                        .count();
							 | 
						|
								    printf("GPU time cost of surface curvature calculating for %d*%d samples: "
							 | 
						|
								           "%lf\n",
							 | 
						|
								           edgeSampleCnt_u, edgeSampleCnt_v, duration);
							 | 
						|
								  }
							 | 
						|
								  safeCudaFree(ms);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								void NurbsSurface::Surface::setRecordTime(bool r) { recordTime = r; }
							 | 
						|
								
							 | 
						|
								NurbsSurface::Surface::~Surface() {
							 | 
						|
								  safeCudaFree(d_nTexture_u);
							 | 
						|
								  safeCudaFree(d_nTexture_v);
							 | 
						|
								  safeCudaFree(d_nTexture1_u);
							 | 
						|
								  safeCudaFree(d_nTexture1_v);
							 | 
						|
								  safeCudaFree(d_points);
							 | 
						|
								  safeCudaFree(d_knots_u);
							 | 
						|
								  safeCudaFree(d_knots_v);
							 | 
						|
								  safeCudaFree(d_k);
							 | 
						|
								  safeCudaFree(d_evaluations);
							 | 
						|
								  safeCudaFree(d_normals);
							 | 
						|
								  safeCudaFree(d_derivatives_u);
							 | 
						|
								  safeCudaFree(d_derivatives_v);
							 | 
						|
								  safeFree(bvh.nodes);
							 | 
						|
								  safeFree(h_evaluations);
							 | 
						|
								  safeFree(h_normals);
							 | 
						|
								  safeFree(h_derivatives_u);
							 | 
						|
								  safeFree(h_derivatives_v);
							 | 
						|
								  cudaDeviceReset();
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::buildBVH(bool useK) {
							 | 
						|
								  // TODO 构造BVH的函数不应该出现在NURBS Surface中,应该是BVH类的事情!
							 | 
						|
								  // TODO NURBS Surface只需要一个函数去调用BVH对象的方法即可
							 | 
						|
								  if (!useK) {
							 | 
						|
								    // 必须safeFree一下,这样global函数中才能通过d_k = nullptr知道不需要再free
							 | 
						|
								    safeCudaFree(d_k);
							 | 
						|
								  }
							 | 
						|
								  // 构造线程层级
							 | 
						|
								  dim3 block(32, 32);
							 | 
						|
								  dim3 grid((edgeSampleCnt_u + block.x - 1) / block.x,
							 | 
						|
								            (edgeSampleCnt_v + block.y - 1) / block.y);
							 | 
						|
								
							 | 
						|
								  bvh.maxLevel = layerCnt;
							 | 
						|
								  bvh.size = (pow(4, bvh.maxLevel) - 1) / 3; // 等比数列求和公示求出数总的节点数
							 | 
						|
								  size_t bvhBytes = sizeof(BVHNode) * bvh.size;
							 | 
						|
								  BVHNode *d_bvh = nullptr;
							 | 
						|
								  cudaMalloc((void **)&d_bvh, bvhBytes);
							 | 
						|
								  // 记录用时
							 | 
						|
								  auto startMom = std::chrono::steady_clock::now();
							 | 
						|
								  g_buildBvh<<<grid, block>>>(
							 | 
						|
								      d_k, bvh.maxLevel, d_evaluations, knots_u[knots_u.size() - 1],
							 | 
						|
								      knots_v[knots_v.size() - 1], edgeSampleCnt_u, edgeSampleCnt_v, d_bvh);
							 | 
						|
								  cudaDeviceSynchronize();
							 | 
						|
								  if (recordTime) {
							 | 
						|
								    auto duration = std::chrono::duration<double, std::milli>(
							 | 
						|
								                        std::chrono::steady_clock::now() - startMom)
							 | 
						|
								                        .count();
							 | 
						|
								    printf("GPU time cost of a %d-layer BVH building: %lf ms\n", bvh.maxLevel,
							 | 
						|
								           duration);
							 | 
						|
								  }
							 | 
						|
								  // 将bvh拷贝到cpu中
							 | 
						|
								  safeFree(bvh.nodes);
							 | 
						|
								  bvh.nodes = (BVHNode *)malloc(bvhBytes);
							 | 
						|
								  cudaMemcpy(bvh.nodes, d_bvh, bvhBytes, cudaMemcpyDeviceToHost);
							 | 
						|
								  safeCudaFree(d_bvh);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::buildGaussMap() {
							 | 
						|
								  // TODO,构造GAUSS Map的函数不应该出现在NURBS Surface中,应该是GAUSS
							 | 
						|
								  // Map类的事情!
							 | 
						|
								
							 | 
						|
								  // 构造线程层级
							 | 
						|
								  dim3 block(32, 32);
							 | 
						|
								  dim3 grid((edgeSampleCnt_u + block.x - 1) / block.x,
							 | 
						|
								            (edgeSampleCnt_v + block.y - 1) / block.y);
							 | 
						|
								
							 | 
						|
								  gauss_map.maxLevel = layerCnt;
							 | 
						|
								  gauss_map.size =
							 | 
						|
								      (pow(4, layerCnt) - 1) / 3; // 等比数列求和公示求出数总的节点数
							 | 
						|
								  size_t gaussMapBytes = sizeof(BVHNode) * gauss_map.size;
							 | 
						|
								  BVHNode *d_gaussMapTree = nullptr;
							 | 
						|
								  cudaMalloc((void **)&d_gaussMapTree, gaussMapBytes);
							 | 
						|
								  // 记录用时
							 | 
						|
								  double time_cost_device;
							 | 
						|
								  if (recordTime)
							 | 
						|
								    time_cost_device = get_time();
							 | 
						|
								  g_buildBvh<<<grid, block>>>(nullptr, layerCnt, d_normals,
							 | 
						|
								                              knots_u[knots_u.size() - 1],
							 | 
						|
								                              knots_v[knots_v.size() - 1], edgeSampleCnt_u,
							 | 
						|
								                              edgeSampleCnt_v, d_gaussMapTree);
							 | 
						|
								  cudaDeviceSynchronize();
							 | 
						|
								  if (recordTime) {
							 | 
						|
								    time_cost_device = get_time() - time_cost_device;
							 | 
						|
								    printf("GPU time cost of a %d-layer Gauss Map building: %lf\n", layerCnt,
							 | 
						|
								           time_cost_device);
							 | 
						|
								  }
							 | 
						|
								  safeFree(gauss_map.nodes);
							 | 
						|
								  gauss_map.nodes = (BVHNode *)malloc(gaussMapBytes);
							 | 
						|
								  cudaMemcpy(gauss_map.nodes, d_gaussMapTree, gaussMapBytes,
							 | 
						|
								             cudaMemcpyDeviceToHost);
							 | 
						|
								  safeCudaFree(d_gaussMapTree);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::recursiveGetOverlapLeafNodes(
							 | 
						|
								    const BVH &bvh1, const BVH &bvh2, int idx1, int idx2,
							 | 
						|
								    std::vector<std::pair<int, int>> &pairs) {
							 | 
						|
								  auto A = bvh1.nodes[idx1];
							 | 
						|
								  auto B = bvh2.nodes[idx2];
							 | 
						|
								  auto AABBSize = [](const AABB &aabb) {
							 | 
						|
								    return (aabb.pMax.z - aabb.pMin.z) * (aabb.pMax.y - aabb.pMin.y) *
							 | 
						|
								           (aabb.pMax.x - aabb.pMin.x);
							 | 
						|
								  };
							 | 
						|
								  // 两个包围盒不相交,返回
							 | 
						|
								  if (!A.bounds.IsOverlap(B.bounds))
							 | 
						|
								    return;
							 | 
						|
								  // 相交
							 | 
						|
								  if (A.firstChild == -1 && B.firstChild == -1) {
							 | 
						|
								    // 两者都是叶子节点
							 | 
						|
								    pairs.emplace_back(idx1, idx2);
							 | 
						|
								  } else if (A.firstChild != -1 && B.firstChild == -1) {
							 | 
						|
								    // A是中间结点,B是叶子结点
							 | 
						|
								    for (int i = 0; i < 4; i++)
							 | 
						|
								      recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
							 | 
						|
								  } else if (A.firstChild == -1 && B.firstChild != -1) {
							 | 
						|
								    // A是叶子结点,B是中间结点
							 | 
						|
								    for (int i = 0; i < 4; i++)
							 | 
						|
								      recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
							 | 
						|
								  } else {
							 | 
						|
								    // 都是中间结点
							 | 
						|
								    if (AABBSize(A.bounds) > AABBSize(B.bounds)) {
							 | 
						|
								      // A的包围盒更大
							 | 
						|
								      for (int i = 0; i < 4; i++)
							 | 
						|
								        recursiveGetOverlapLeafNodes(bvh1, bvh2, A.firstChild + i, idx2, pairs);
							 | 
						|
								    } else {
							 | 
						|
								      // B的包围盒更大
							 | 
						|
								      for (int i = 0; i < 4; i++)
							 | 
						|
								        recursiveGetOverlapLeafNodes(bvh1, bvh2, idx1, B.firstChild + i, pairs);
							 | 
						|
								    }
							 | 
						|
								  }
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ std::vector<boxPair>
							 | 
						|
								NurbsSurface::getOverlappedLeafNodes(const BVH &bvh1, const BVH &bvh2) {
							 | 
						|
								  std::vector<idx2> resPairs;
							 | 
						|
								  // 记录用时
							 | 
						|
								  double time_cost_device = get_time();
							 | 
						|
								
							 | 
						|
								  recursiveGetOverlapLeafNodes(bvh1, bvh2, 0, 0, resPairs);
							 | 
						|
								  std::vector<boxPair> boxPairsIdx2(resPairs.size());
							 | 
						|
								  for (int i = 0; i < resPairs.size(); i++) {
							 | 
						|
								    boxPairsIdx2[i] = {{bvh1.nodes[resPairs[i].first].idx_u,
							 | 
						|
								                        bvh1.nodes[resPairs[i].first].idx_v},
							 | 
						|
								                       {bvh2.nodes[resPairs[i].second].idx_u,
							 | 
						|
								                        bvh2.nodes[resPairs[i].second].idx_v}};
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  time_cost_device = get_time() - time_cost_device;
							 | 
						|
								  printf("CPU time cost for recursively calculating the overlapped leaf nodes: "
							 | 
						|
								         "%lf\n",
							 | 
						|
								         time_cost_device);
							 | 
						|
								
							 | 
						|
								  return boxPairsIdx2;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ bool NurbsSurface::isGaussMapsOverlapped(
							 | 
						|
								    const BVH &gm1, const BVH &gm2, std::pair<int, int> idxRange_u1,
							 | 
						|
								    std::pair<int, int> idxRange_v1, std::pair<int, int> idxRange_u2,
							 | 
						|
								    std::pair<int, int> idxRange_v2) {
							 | 
						|
								  if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
							 | 
						|
								    printf("BVH Layer error!\n");
							 | 
						|
								    return false;
							 | 
						|
								  }
							 | 
						|
								  int commonMaxLayer = gm1.maxLevel;
							 | 
						|
								  int edgeCellCnt = pow(2, commonMaxLayer - 1);
							 | 
						|
								  if (idxRange_u1.first < 0 || idxRange_u2.first < 0 || idxRange_v1.first < 0 ||
							 | 
						|
								      idxRange_v2.first < 0 || idxRange_u1.second >= edgeCellCnt ||
							 | 
						|
								      idxRange_u2.second >= edgeCellCnt || idxRange_v1.second >= edgeCellCnt ||
							 | 
						|
								      idxRange_v2.second >= edgeCellCnt) {
							 | 
						|
								    printf("Error when detecting overlapping: idx range invalid!\n");
							 | 
						|
								    return false;
							 | 
						|
								  }
							 | 
						|
								
							 | 
						|
								  auto getRangedBox = [&commonMaxLayer](const BVH &bvh,
							 | 
						|
								                                        const std::pair<int, int> &idxRange_u,
							 | 
						|
								                                        const std::pair<int, int> idxRange_v) {
							 | 
						|
								    // 获取某个范围的gauss map的aabb
							 | 
						|
								    AABB bounding;
							 | 
						|
								    for (int i = idxRange_u.first; i <= idxRange_u.second; ++i) {
							 | 
						|
								      for (int j = idxRange_v.first; j <= idxRange_v.second; ++j) {
							 | 
						|
								        bounding =
							 | 
						|
								            bounding.Union(bvh.nodes[getStartIdxOfLayerN(commonMaxLayer) +
							 | 
						|
								                                     h_getChildNodeIdx(i, j)]
							 | 
						|
								                               .bounds);
							 | 
						|
								      }
							 | 
						|
								    }
							 | 
						|
								    return bounding;
							 | 
						|
								  };
							 | 
						|
								  return getRangedBox(gm1, idxRange_u1, idxRange_v1)
							 | 
						|
								      .IsOverlap(getRangedBox(gm2, idxRange_u2, idxRange_v2));
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ bool NurbsSurface::isGaussMapsOverlapped(
							 | 
						|
								    const BVH &gm1, const BVH &gm2, std::pair<float, float> range_u1,
							 | 
						|
								    std::pair<float, float> range_v1, std::pair<float, float> range_u2,
							 | 
						|
								    std::pair<float, float> range_v2, std::pair<float, float> paramRange_u1,
							 | 
						|
								    std::pair<float, float> paramRange_v1,
							 | 
						|
								    std::pair<float, float> paramRange_u2,
							 | 
						|
								    std::pair<float, float> paramRange_v2) {
							 | 
						|
								  if (gm1.maxLevel != gm2.maxLevel || gm1.maxLevel <= 0) {
							 | 
						|
								    printf("BVH Layer error!\n");
							 | 
						|
								    return false;
							 | 
						|
								  }
							 | 
						|
								  int edgeCellCnt = pow(2, gm1.maxLevel - 1);
							 | 
						|
								  // 根据所给参数的范围和参数的定义域范围,获得对应的采样网格中的范围
							 | 
						|
								  auto getIdxRange = [](std::pair<float, float> range,
							 | 
						|
								                        std::pair<float, float> paramRange, int edgeCellCnt) {
							 | 
						|
								    float paramStep = (paramRange.second - paramRange.first) / edgeCellCnt;
							 | 
						|
								    return std::pair<int, int>(
							 | 
						|
								        {int((range.first - paramRange.first) / paramStep),
							 | 
						|
								         int((range.second - paramRange.first) / paramStep)});
							 | 
						|
								  };
							 | 
						|
								  auto idxRange_u1 = getIdxRange(range_u1, paramRange_u1, edgeCellCnt);
							 | 
						|
								  auto idxRange_v1 = getIdxRange(range_v1, paramRange_v1, edgeCellCnt);
							 | 
						|
								  auto idxRange_u2 = getIdxRange(range_u2, paramRange_u2, edgeCellCnt);
							 | 
						|
								  auto idxRange_v2 = getIdxRange(range_v2, paramRange_v2, edgeCellCnt);
							 | 
						|
								  return isGaussMapsOverlapped(gm1, gm2, idxRange_u1, idxRange_v1, idxRange_u2,
							 | 
						|
								                               idxRange_v2);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ void NurbsSurface::Surface::recursiveGetRayBVHIntersection(
							 | 
						|
								    const glm::vec3 dir, const glm::vec3 startPoint, const int idx,
							 | 
						|
								    std::vector<BVHNode> &intersectionLeafNodes) {
							 | 
						|
								  auto bvhNode = bvh.nodes[idx];
							 | 
						|
								  // 射线与AABB判交
							 | 
						|
								  auto isRayBoxIntersect = [&]() {
							 | 
						|
								    const auto &box = bvhNode.bounds;
							 | 
						|
								    float t; // 射线的参数,当t<=0,表示线面交点不在视平面前方,视为没有交点
							 | 
						|
								    if (dir.x != 0.) {
							 | 
						|
								      // 当x分量不为0,则射线会与AABB中垂直于x轴的平面可能有交
							 | 
						|
								      // 注意x的正负。x为正会先遇到较小点所在平面
							 | 
						|
								      if (dir.x > 0)
							 | 
						|
								        t = (box.pMin.x - startPoint.x) / dir.x;
							 | 
						|
								      else
							 | 
						|
								        t = (box.pMax.x - startPoint.x) / dir.x;
							 | 
						|
								      if (t > 0.) {
							 | 
						|
								        // 射线与平面在前方有交点。但交点不一定在盒子上
							 | 
						|
								        auto tmpPt = startPoint + t * dir; // 交点
							 | 
						|
								        if (box.pMin.y <= tmpPt.y && box.pMin.z <= tmpPt.z &&
							 | 
						|
								            box.pMax.y >= tmpPt.y && box.pMax.z >= tmpPt.z)
							 | 
						|
								          return true;
							 | 
						|
								      }
							 | 
						|
								    }
							 | 
						|
								    if (dir.y != 0.) {
							 | 
						|
								      // 同上测试y方向
							 | 
						|
								      if (dir.y > 0)
							 | 
						|
								        t = (box.pMin.y - startPoint.y) / dir.y;
							 | 
						|
								      else
							 | 
						|
								        t = (box.pMax.y - startPoint.y) / dir.y;
							 | 
						|
								      if (t > 0.) {
							 | 
						|
								        auto tmpPt = startPoint + t * dir;
							 | 
						|
								        if (box.pMin.x <= tmpPt.x && box.pMin.z <= tmpPt.z &&
							 | 
						|
								            box.pMax.x >= tmpPt.x && box.pMax.z >= tmpPt.z)
							 | 
						|
								          return true;
							 | 
						|
								      }
							 | 
						|
								    }
							 | 
						|
								    if (dir.z != 0.) {
							 | 
						|
								      // 同上测试z方向
							 | 
						|
								      if (dir.z > 0)
							 | 
						|
								        t = (box.pMin.z - startPoint.z) / dir.z;
							 | 
						|
								      else
							 | 
						|
								        t = (box.pMax.z - startPoint.z) / dir.z;
							 | 
						|
								      if (t > 0.) {
							 | 
						|
								        auto tmpPt = startPoint + t * dir;
							 | 
						|
								        if (box.pMin.x <= tmpPt.x && box.pMin.y <= tmpPt.y &&
							 | 
						|
								            box.pMax.x >= tmpPt.x && box.pMax.y >= tmpPt.y)
							 | 
						|
								          return true;
							 | 
						|
								      }
							 | 
						|
								    }
							 | 
						|
								    return false;
							 | 
						|
								  };
							 | 
						|
								
							 | 
						|
								  // 不相交
							 | 
						|
								  if (!isRayBoxIntersect())
							 | 
						|
								    return;
							 | 
						|
								  // 相交
							 | 
						|
								  if (bvhNode.firstChild == -1) {
							 | 
						|
								    // 与叶节点相交
							 | 
						|
								    intersectionLeafNodes.emplace_back(bvhNode);
							 | 
						|
								  } else {
							 | 
						|
								    // 与父节点相交
							 | 
						|
								    for (int i = 0; i < 4; i++)
							 | 
						|
								      recursiveGetRayBVHIntersection(dir, startPoint, bvhNode.firstChild + i,
							 | 
						|
								                                     intersectionLeafNodes);
							 | 
						|
								  }
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ std::vector<BVHNode>
							 | 
						|
								NurbsSurface::Surface::rayBVHIntersection(glm::vec3 dir, glm::vec3 startPoint) {
							 | 
						|
								  std::vector<BVHNode> res;
							 | 
						|
								  recursiveGetRayBVHIntersection(dir, startPoint, 0, res);
							 | 
						|
								  // TODO sort res by t
							 | 
						|
								
							 | 
						|
								  //    res.emplace_back(BVHNode());
							 | 
						|
								  //    printf("res size: %lld\n", res.size());
							 | 
						|
								  return res;
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								__host__ SrfMesh *NurbsSurface::Surface::getDMesh() {
							 | 
						|
								  if (edgeSampleCnt_u != edgeSampleCnt_v || edgeSampleCnt_u <= 2) {
							 | 
						|
								    printf("getMesh Error: the surface is not sampled as a Quadtree!\n");
							 | 
						|
								    return nullptr;
							 | 
						|
								  }
							 | 
						|
								  auto *res = new SrfMesh(true, edgeSampleCnt_u, d_evaluations, d_derivatives_u,
							 | 
						|
								                          d_derivatives_u, d_normals);
							 | 
						|
								  return res;
							 | 
						|
								}
							 | 
						|
								
							 |