| 
						
						
							
								
							
						
						
					 | 
					@ -9,6 +9,16 @@ | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					#include "utils.h" | 
					 | 
					 | 
					#include "utils.h" | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//#include "NurbsBasis.cuh" | 
					 | 
					 | 
					//#include "NurbsBasis.cuh" | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__device__ void normalization(float &a, float &b, float &c) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sumA = a * a; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sumB = b * b; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sumC = c * c; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sum = sumA + sumB + sumC; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    a = sqrt(sumA / sum); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    b = sqrt(sumB / sum); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    c = sqrt(sumC / sum); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//extern __device__ void NurbsBasis::d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {}; | 
					 | 
					 | 
					//extern __device__ void NurbsBasis::d_basisFunction(float *N_Texture, const float *knots, float u, int degree, int d_knotsCnt) {}; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__host__ NurbsSurface::Evaluator::Evaluator(std::vector<std::vector<std::vector<float>>> controlPoints, | 
					 | 
					 | 
					__host__ NurbsSurface::Evaluator::Evaluator(std::vector<std::vector<std::vector<float>>> controlPoints, | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -24,23 +34,24 @@ __host__ NurbsSurface::Evaluator::Evaluator(std::vector<std::vector<std::vector< | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_knots_u = nullptr; | 
					 | 
					 | 
					    d_knots_u = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_knots_v = nullptr; | 
					 | 
					 | 
					    d_knots_v = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_points = nullptr; | 
					 | 
					 | 
					    d_points = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    d_derivatives = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__host__ std::vector<std::map<std::pair<float, float>, std::vector<float>>> | 
					 | 
					 | 
					__host__ std::vector<std::map<std::pair<float, float>, std::vector<float>>> | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { | 
					 | 
					 | 
					NurbsSurface::Evaluator::evaluate(int sampleCnt_u, int sampleCnt_v) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    sampleCnt_u = sampleCnt_u_; | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0][0].size()) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    sampleCnt_v = sampleCnt_v_; | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    printf("outside  printf..\n"); | 
					 | 
					 | 
					        return {}; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					//    NurbsBasis::myPrint11(1, 3); | 
					 | 
					 | 
					    } | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 构造指向device的controlPoints | 
					 | 
					 | 
					    // 构造指向device的controlPoints | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(), pointSize = controlPoints[0][0].size(); | 
					 | 
					 | 
					    const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsBytes = pointsCnt_u * pointsCnt_v * pointSize * sizeof(float); | 
					 | 
					 | 
					    const int pointsBytes = pointsCnt_u * pointsCnt_v * POINT_SIZE * sizeof(float); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    auto *h_points = (float *) malloc(pointsBytes); | 
					 | 
					 | 
					    auto *h_points = (float *) malloc(pointsBytes); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < pointsCnt_u; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < pointsCnt_u; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        for (int j = 0; j < pointsCnt_v; j++) { | 
					 | 
					 | 
					        for (int j = 0; j < pointsCnt_v; j++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					            for (int k = 0; k < pointSize; k++) { | 
					 | 
					 | 
					            for (int k = 0; k < POINT_SIZE; k++) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					                h_points[(i * pointsCnt_v + j) * pointSize + k] = controlPoints[i][j][k]; | 
					 | 
					 | 
					                h_points[(i * pointsCnt_v + j) * POINT_SIZE + k] = controlPoints[i][j][k]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					            } | 
					 | 
					 | 
					            } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        } | 
					 | 
					 | 
					        } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					@ -86,7 +97,7 @@ NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    g_evaluate <<<grid, block>>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, pointSize, | 
					 | 
					 | 
					    g_evaluate <<<grid, block>>>(d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, pointsCnt_v, POINT_SIZE, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					                                 knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); | 
					 | 
					 | 
					                                 knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, sampleCnt_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) { | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -100,25 +111,27 @@ NurbsSurface::Evaluator::evaluate(int sampleCnt_u_, int sampleCnt_v_) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    free(h_knots_u); | 
					 | 
					 | 
					    free(h_knots_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    free(h_knots_v); | 
					 | 
					 | 
					    free(h_knots_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    printf("First derivatives and normal vectors calculated by GPU:\n"); | 
					 | 
					 | 
					    printf("First derivatives and normal vectors calculated by GPU:\n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    derivative(); | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceReset(); | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    return {}; | 
					 | 
					 | 
					    return {}; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__host__ std::vector<std::map<float, std::vector<float>>> | 
					 | 
					 | 
					__host__ std::vector<std::map<float, std::vector<float>>> | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					NurbsCurve::Evaluator::evaluate(int sampleCnt_) { | 
					 | 
					 | 
					NurbsCurve::Evaluator::evaluate(int sampleCnt) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    this->sampleCnt = sampleCnt_; | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0].size()) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return {}; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 构造指向device的controlPoints | 
					 | 
					 | 
					    // 构造指向device的controlPoints | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsCnt = controlPoints.size(), pointSize = controlPoints[0].size(); | 
					 | 
					 | 
					    const int pointsCnt = controlPoints.size(); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsBytes = pointsCnt * pointSize * sizeof(float); | 
					 | 
					 | 
					    const int pointsBytes = pointsCnt * POINT_SIZE * sizeof(float); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    auto *h_points = (float *) malloc(pointsBytes); | 
					 | 
					 | 
					    auto *h_points = (float *) malloc(pointsBytes); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < pointsCnt; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < pointsCnt; i++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					        for (int j = 0; j < pointSize; j++) { | 
					 | 
					 | 
					        for (int j = 0; j < POINT_SIZE; j++) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            h_points[i * pointSize + j] = controlPoints[i][j]; | 
					 | 
					 | 
					            h_points[i * POINT_SIZE + j] = controlPoints[i][j]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					        } | 
					 | 
					 | 
					        } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_points);  // 注意内存管理 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_points, pointsBytes); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_points, pointsBytes); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); | 
					 | 
					 | 
					    cudaMemcpy(d_points, h_points, pointsBytes, cudaMemcpyHostToDevice); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -127,16 +140,19 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    const int knotsBytes = knotsCnt * sizeof(float); | 
					 | 
					 | 
					    const int knotsBytes = knotsCnt * sizeof(float); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    auto *h_knots = (float *) malloc(knotsBytes); | 
					 | 
					 | 
					    auto *h_knots = (float *) malloc(knotsBytes); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; | 
					 | 
					 | 
					    for (int i = 0; i < knotsCnt; i++) h_knots[i] = knots[i]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_knots);  // 注意内存管理 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_knots, knotsBytes); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_knots, knotsBytes); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); | 
					 | 
					 | 
					    cudaMemcpy(d_knots, h_knots, knotsBytes, cudaMemcpyHostToDevice); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 分配nTexture的内存。只需要GPU内存 | 
					 | 
					 | 
					    // 分配nTexture的内存。只需要GPU内存 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    float *d_nTexture = nullptr; | 
					 | 
					 | 
					//    float *d_nTexture = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_nTexture);  // 注意内存管理 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_nTexture, | 
					 | 
					 | 
					    cudaMalloc((void **) &d_nTexture, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					               sampleCnt * pointsCnt * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 | 
					 | 
					 | 
					               sampleCnt * pointsCnt * sizeof(float)); // 注意nTexture的大小,在算梯度时用得到i=pointsCnt + 1的基函数值 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 分配nTexture1的内存。只需要GPU内存 | 
					 | 
					 | 
					    // 分配nTexture1的内存。只需要GPU内存 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    float *d_nTexture1 = nullptr; | 
					 | 
					 | 
					//    float *d_nTexture1 = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_nTexture1);  // 注意内存管理 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_nTexture1, sampleCnt * (pointsCnt + 1) * sizeof(float)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 构造g_basisTexture线程层级 | 
					 | 
					 | 
					    // 构造g_basisTexture线程层级 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -154,7 +170,7 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 | 
					 | 
					 | 
					//    cudaMemcpy(d_nTextureCpy, d_nTexture, nTextureBytes, cudaMemcpyDeviceToDevice); // 有同步功能 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    printf("here..\n"); | 
					 | 
					 | 
					    printf("here..\n"); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    g_evaluate <<<grid, block>>>(d_nTexture, d_points, pointsCnt, pointSize, knots[knotsCnt - 1], sampleCnt); | 
					 | 
					 | 
					    g_evaluate <<<grid, block>>>(d_nTexture, d_points, pointsCnt, POINT_SIZE, knots[knotsCnt - 1], sampleCnt); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					//    g_test<<<1,6>>>(d_nTextureCpy); | 
					 | 
					 | 
					//    g_test<<<1,6>>>(d_nTextureCpy); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) { | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -163,22 +179,30 @@ NurbsCurve::Evaluator::evaluate(int sampleCnt_) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    free(h_points); | 
					 | 
					 | 
					    free(h_points); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    free(h_knots); | 
					 | 
					 | 
					    free(h_knots); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    return {}; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    printf("First derivatives calculated by GPU:\n"); | 
					 | 
					 | 
					__host__ void NurbsSurface::Evaluator::derivative(int sampleCnt_u, int sampleCnt_v) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    derivative(); | 
					 | 
					 | 
					    // 先完成evaluation | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    evaluate(sampleCnt_u, sampleCnt_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaDeviceReset(); | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0][0].size()) { | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    return {}; | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__host__ void NurbsSurface::Evaluator::derivative() { | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float *d_derTexture_u = nullptr; | 
					 | 
					 | 
					    float *d_derTexture_u = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float *d_derTexture_v = nullptr; | 
					 | 
					 | 
					    float *d_derTexture_v = nullptr; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(), pointSize = controlPoints[0][0].size(); | 
					 | 
					 | 
					    const int pointsCnt_u = controlPoints.size(), pointsCnt_v = controlPoints[0].size(); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); | 
					 | 
					 | 
					    const int knotsCnt_u = knots_u.size(), knotsCnt_v = knots_v.size(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture_u, sampleCnt_u * pointsCnt_u * sizeof(float)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture_v, sampleCnt_v * pointsCnt_v * sizeof(float)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 构造切向量计算结果 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_derivatives); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaMalloc((void **) &d_derivatives, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					               sampleCnt_v * sampleCnt_u * 6 * sizeof(float)); // 每个采用所求的切向量是一个六元向量,前三位是对u的偏导、后三位是对v的偏导 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 构造线程层级 | 
					 | 
					 | 
					    // 构造线程层级 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    dim3 block(32, 32); | 
					 | 
					 | 
					    dim3 block(32, 32); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); | 
					 | 
					 | 
					    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -194,8 +218,10 @@ __host__ void NurbsSurface::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, | 
					 | 
					 | 
					    g_derTexture<<<gridTex_v, blockTex>>>(d_derTexture_v, d_nTexture1_v, d_knots_v, pointsCnt_v, knotsCnt_v, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					                                          sampleCnt_v); | 
					 | 
					 | 
					                                          sampleCnt_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    g_derivative<<<grid, block>>>(d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, pointsCnt_u, | 
					 | 
					 | 
					    g_derivative<<<grid, block>>>(d_derivatives, d_derTexture_u, d_derTexture_v, d_nTexture_u, d_nTexture_v, d_points, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					                                  pointsCnt_v, pointSize, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], sampleCnt_u, | 
					 | 
					 | 
					                                  pointsCnt_u, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                                  pointsCnt_v, POINT_SIZE, knots_u[knotsCnt_u - 1], knots_v[knotsCnt_v - 1], | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                                  sampleCnt_u, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					                                  sampleCnt_v); | 
					 | 
					 | 
					                                  sampleCnt_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) { | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -207,9 +233,17 @@ __host__ void NurbsSurface::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaFree(d_derTexture_v); | 
					 | 
					 | 
					    cudaFree(d_derTexture_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					__host__ void NurbsCurve::Evaluator::derivative() { | 
					 | 
					 | 
					__host__ void NurbsCurve::Evaluator::derivative(int sampleCnt) { | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 先完成evaluation | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    evaluate(sampleCnt); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0].size()) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float *d_derTexture = nullptr; | 
					 | 
					 | 
					    float *d_derTexture = nullptr; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    const int pointsCnt = controlPoints.size(), pointSize = controlPoints[0].size(); | 
					 | 
					 | 
					    const int pointsCnt = controlPoints.size(); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    const int knotsCnt = knots.size(); | 
					 | 
					 | 
					    const int knotsCnt = knots.size(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); | 
					 | 
					 | 
					    cudaMalloc((void **) &d_derTexture, sampleCnt * pointsCnt * sizeof(float)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -219,12 +253,19 @@ __host__ void NurbsCurve::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 构造g_basisTexture线程层级 | 
					 | 
					 | 
					    // 构造g_basisTexture线程层级 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    dim3 blockTex(512); | 
					 | 
					 | 
					    dim3 blockTex(512); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); | 
					 | 
					 | 
					    dim3 gridTex((sampleCnt + blockTex.x - 1) / blockTex.x); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 构造切向量计算结果 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    myCudaFree(d_derivatives); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaMalloc((void **) &d_derivatives, sampleCnt * 3 * sizeof(float)); // 每个采用所求的切向量是一个三维向量 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 记录用时 | 
					 | 
					 | 
					    // 记录用时 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    double time_cost_device; | 
					 | 
					 | 
					    double time_cost_device; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    g_derTexture<<<gridTex, blockTex>>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); | 
					 | 
					 | 
					    g_derTexture<<<gridTex, blockTex>>>(d_derTexture, d_nTexture1, d_knots, pointsCnt, knotsCnt, sampleCnt); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
					 | 
					 | 
					    cudaDeviceSynchronize(); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    g_derivative<<<grid, block>>>(d_derTexture, d_points, pointsCnt, pointSize, knots[knotsCnt - 1], sampleCnt); | 
					 | 
					 | 
					    g_derivative<<<grid, block>>>(d_derivatives, d_derTexture, d_nTexture, d_points, pointsCnt, POINT_SIZE, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                                  knots[knotsCnt - 1], sampleCnt); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (recordTime) { | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        time_cost_device = utils::get_time_windows() - time_cost_device; | 
					 | 
					 | 
					        time_cost_device = utils::get_time_windows() - time_cost_device; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -234,11 +275,62 @@ __host__ void NurbsCurve::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    cudaFree(d_derTexture); | 
					 | 
					 | 
					    cudaFree(d_derTexture); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__host__ void NurbsSurface::Evaluator::curvature(int sampleCnt_u, int sampleCnt_v) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 先计算切向量 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivative(sampleCnt_u, sampleCnt_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0][0].size()) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 构造线程层级 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    dim3 block(32, 32); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    dim3 grid((sampleCnt_u + block.x - 1) / block.x, (sampleCnt_v + block.y - 1) / block.y); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 记录用时 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    double time_cost_device; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    g_curvature<<<grid, block>>>(d_derivatives, sampleCnt_u, sampleCnt_v, knots_u[knots_u.size() - 1], | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                                 knots_v[knots_v.size() - 1]); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        time_cost_device = utils::get_time_windows() - time_cost_device; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("GPU time cost of surface second derivative calculating for %d samples: %lf\n", | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					               sampleCnt_u * sampleCnt_v, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					               time_cost_device); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__host__ void NurbsCurve::Evaluator::curvature(int sampleCnt) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 先计算切向量 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivative(sampleCnt); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (POINT_SIZE != controlPoints[0].size()) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("Error! Nurbs控制点应表示为长度为4的齐次坐标\n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 构造线程层级 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    dim3 block(32, 32); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    dim3 grid((sampleCnt + block.x * block.y - 1) / (block.x * block.y)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 记录用时 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    double time_cost_device; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (recordTime) time_cost_device = utils::get_time_windows(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    g_curvature<<<grid, block>>>(d_derivatives, sampleCnt, knots[knots.size() - 1]); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaDeviceSynchronize(); // 所用线程结束后再获取结束时间。cudaThreadSynchronize()在CUDA1.0后被弃用 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (recordTime) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        time_cost_device = utils::get_time_windows() - time_cost_device; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        printf("GPU time cost of curve second derivative calculating for %d samples: %lf\n", sampleCnt, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					               time_cost_device); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//__global__ void | 
					 | 
					 | 
					//__global__ void | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//NurbsSurface::g_evaluate(const float *d_points, const float *d_knots_u, const float *d_knots_v, | 
					 | 
					 | 
					//NurbsSurface::g_evaluate(const float *d_points, const float *d_knots_u, const float *d_knots_v, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//                         int d_pointsCnt_u, | 
					 | 
					 | 
					//                         int d_pointsCnt_u, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					//                         int d_pointsCnt_v, int d_pointSize, int d_knotsCnt_u, int d_knotsCnt_v, | 
					 | 
					 | 
					//                         int d_pointsCnt_v, int d_POINT_SIZE, int d_knotsCnt_u, int d_knotsCnt_v, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					//                         int d_sampleCnt_u, int d_sampleCnt_v) { | 
					 | 
					 | 
					//                         int d_sampleCnt_u, int d_sampleCnt_v) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					////    printf(" surface calculating... \n"); | 
					 | 
					 | 
					////    printf(" surface calculating... \n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    // 二维grid和二维的block | 
					 | 
					 | 
					//    // 二维grid和二维的block | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					@ -267,7 +359,7 @@ __host__ void NurbsCurve::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//        for (int j = 0; j < d_pointsCnt_v; j++) { | 
					 | 
					 | 
					//        for (int j = 0; j < d_pointsCnt_v; j++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//            float N_U = N_Texture_U[d_degree_u * (d_knotsCnt_u - 1) + i]; | 
					 | 
					 | 
					//            float N_U = N_Texture_U[d_degree_u * (d_knotsCnt_u - 1) + i]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//            float N_V = N_Texture_V[d_degree_v * (d_knotsCnt_v - 1) + j]; | 
					 | 
					 | 
					//            float N_V = N_Texture_V[d_degree_v * (d_knotsCnt_v - 1) + j]; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					//            int idx = (i * d_pointsCnt_v + j) * d_pointSize; | 
					 | 
					 | 
					//            int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					//            x += N_U * N_V * d_points[idx]; | 
					 | 
					 | 
					//            x += N_U * N_V * d_points[idx]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//            y += N_U * N_V * d_points[idx + 1]; | 
					 | 
					 | 
					//            y += N_U * N_V * d_points[idx + 1]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//            z += N_U * N_V * d_points[idx + 2]; | 
					 | 
					 | 
					//            z += N_U * N_V * d_points[idx + 2]; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -280,7 +372,7 @@ __host__ void NurbsCurve::Evaluator::derivative() { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__global__ void | 
					 | 
					 | 
					__global__ void | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, | 
					 | 
					 | 
					NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, const float *d_points, int d_pointsCnt_u, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					                         int d_pointsCnt_v, int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, | 
					 | 
					 | 
					                         int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					                         int d_sampleCnt_v) { | 
					 | 
					 | 
					                         int d_sampleCnt_v) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 二维grid和二维的block | 
					 | 
					 | 
					    // 二维grid和二维的block | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -294,25 +386,30 @@ NurbsSurface::g_evaluate(const float *d_nTexture_u, const float *d_nTexture_v, c | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        return; | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x = 0., y = 0., z = 0.; | 
					 | 
					 | 
					    float x = 0., y = 0., z = 0., sumW = 0.; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt_u; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt_u; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; | 
					 | 
					 | 
					        float N_U = d_nTexture_u[ix * d_pointsCnt_u + i]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        for (int j = 0; j < d_pointsCnt_v; j++) { | 
					 | 
					 | 
					        for (int j = 0; j < d_pointsCnt_v; j++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					            float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; | 
					 | 
					 | 
					            float N_V = d_nTexture_v[iy * d_pointsCnt_v + j]; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					            int idx = (i * d_pointsCnt_v + j) * d_pointSize; | 
					 | 
					 | 
					            int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            x += N_U * N_V * d_points[idx]; | 
					 | 
					 | 
					            float w = d_points[idx + 3]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            y += N_U * N_V * d_points[idx + 1]; | 
					 | 
					 | 
					            x += N_U * N_V * w * d_points[idx]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            z += N_U * N_V * d_points[idx + 2]; | 
					 | 
					 | 
					            y += N_U * N_V * w * d_points[idx + 1]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            z += N_U * N_V * w * d_points[idx + 2]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            sumW += N_U * N_V * w; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        } | 
					 | 
					 | 
					        } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 | 
					 | 
					 | 
					    x = x / sumW; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    y = y / sumW; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    z = z / sumW; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					//    printf("(%g, %g)-->(%g, %g, %g)\n", u, v, x, y, z); // %g输出,舍弃无意义的0 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__global__ void | 
					 | 
					 | 
					__global__ void | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					NurbsSurface::g_derivative(const float *derTexture_u, const float *derTexture_v, const float *nTexture_u, | 
					 | 
					 | 
					NurbsSurface::g_derivative(float *derivatives, const float *derTexture_u, const float *derTexture_v, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					                           const float *nTexture_v, const float *d_points, int d_pointsCnt_u, int d_pointsCnt_v, | 
					 | 
					 | 
					                           const float *nTexture_u, const float *nTexture_v, const float *d_points, int d_pointsCnt_u, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					                           int d_pointSize, float d_lastKnot_u, float d_lastKnot_v, int d_sampleCnt_u, | 
					 | 
					 | 
					                           int d_pointsCnt_v, int d_POINT_SIZE, float d_lastKnot_u, float d_lastKnot_v, | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					                           int d_sampleCnt_v) { | 
					 | 
					 | 
					                           int d_sampleCnt_u, int d_sampleCnt_v) { | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    // 二维grid和二维的block | 
					 | 
					 | 
					    // 二维grid和二维的block | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    int ix = blockIdx.x * blockDim.x + threadIdx.x; | 
					 | 
					 | 
					    int ix = blockIdx.x * blockDim.x + threadIdx.x; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    int iy = blockIdx.y * blockDim.y + threadIdx.y; | 
					 | 
					 | 
					    int iy = blockIdx.y * blockDim.y + threadIdx.y; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -323,31 +420,157 @@ NurbsSurface::g_derivative(const float *derTexture_u, const float *derTexture_v, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); | 
					 | 
					 | 
					    float u = ix * d_lastKnot_u / (d_sampleCnt_u - 1); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); | 
					 | 
					 | 
					    float v = iy * d_lastKnot_v / (d_sampleCnt_v - 1); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x_u = 0., y_u = 0, z_u = 0.; | 
					 | 
					 | 
					    float nubsPdx_u = 0., nubsPdy_u = 0, nubsPdz_u = 0., nubsPdw_u = 0.; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x_v = 0., y_v = 0, z_v = 0.; | 
					 | 
					 | 
					    float nubsPdx_v = 0., nubsPdy_v = 0, nubsPdz_v = 0., nubsPdw_v = 0.; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt_u; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt_u; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        for (int j = 0; j < d_pointsCnt_u; j++) { | 
					 | 
					 | 
					        for (int j = 0; j < d_pointsCnt_u; j++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					            int baseIdx = (i * d_pointsCnt_v + j) * d_pointSize; | 
					 | 
					 | 
					            int baseIdx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					            float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; | 
					 | 
					 | 
					            float factor_u = derTexture_u[ix * d_pointsCnt_u + i] * nTexture_v[iy * d_pointsCnt_v + j]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					            float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; | 
					 | 
					 | 
					            float factor_v = derTexture_v[iy * d_pointsCnt_v + j] * nTexture_u[ix * d_pointsCnt_u + i]; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					            x_u += factor_u * d_points[baseIdx]; | 
					 | 
					 | 
					            float wij = d_points[baseIdx + 3]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            y_u += factor_u * d_points[baseIdx + 1]; | 
					 | 
					 | 
					            nubsPdx_u += factor_u * wij * d_points[baseIdx]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            z_u += factor_u * d_points[baseIdx + 2]; | 
					 | 
					 | 
					            nubsPdy_u += factor_u * wij * d_points[baseIdx + 1]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            nubsPdz_u += factor_u * wij * d_points[baseIdx + 2]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            nubsPdw_u += factor_u * wij; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					            x_v += factor_v * d_points[baseIdx]; | 
					 | 
					 | 
					            nubsPdx_v += factor_v * wij * d_points[baseIdx]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            y_v += factor_v * d_points[baseIdx + 1]; | 
					 | 
					 | 
					            nubsPdy_v += factor_v * wij * d_points[baseIdx + 1]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					            z_v += factor_v * d_points[baseIdx + 2]; | 
					 | 
					 | 
					            nubsPdz_v += factor_v * wij * d_points[baseIdx + 2]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            nubsPdw_v += factor_v * wij; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float x = 0., y = 0., z = 0., w = 0.; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt_u; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        float N_U = nTexture_u[ix * d_pointsCnt_u + i]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        for (int j = 0; j < d_pointsCnt_v; j++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            float N_V = nTexture_v[iy * d_pointsCnt_v + j]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            int idx = (i * d_pointsCnt_v + j) * d_POINT_SIZE; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            float wij = d_points[idx + 3]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            x += N_U * N_V * wij * d_points[idx]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            y += N_U * N_V * wij * d_points[idx + 1]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            z += N_U * N_V * wij * d_points[idx + 2]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            w += N_U * N_V * wij; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        } | 
					 | 
					 | 
					        } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x_n = y_u * z_v - y_v * z_u, y_n = x_v * z_u - x_u * z_v, z_n = x_u * y_v - x_v * y_u; // 叉乘得到法向量 | 
					 | 
					 | 
					    } | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, x_u, y_u, z_u, x_v, y_v, z_v, | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					           x_n, y_n, z_n); | 
					 | 
					 | 
					    float w2 = w * w; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdx_u = (nubsPdx_u * w - x * nubsPdw_u) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdy_u = (nubsPdy_u * w - y * nubsPdw_u) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdz_u = (nubsPdz_u * w - z * nubsPdw_u) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdx_v = (nubsPdx_v * w - x * nubsPdw_v) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdy_v = (nubsPdy_v * w - y * nubsPdw_v) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float pdz_v = (nubsPdz_v * w - z * nubsPdw_v) / w2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					//    float pdz_u = (nubsPdz_u * w - z ) | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int baseIdx = (ix * d_sampleCnt_v + iy) * 6; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx] = pdx_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 1] = pdy_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 2] = pdz_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 3] = pdx_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 4] = pdy_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 5] = pdz_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float x_n = pdy_u * pdz_v - pdy_v * pdz_u, y_n = pdx_v * pdz_u - pdx_u * pdz_v, z_n = | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					            pdx_u * pdy_v - pdx_v * pdy_u; // 叉乘得到法向量 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if((ix == 8 && iy == 9) || (ix == 7 && iy == 9) || (ix == 9 && iy == 9) || (ix == 8 && iy == 8) || (ix == 8 && iy == 10)) | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g), normal:(%g,%g,%g)\n", u, v, pdx_u, pdy_u, pdz_u, pdx_v, pdy_v, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					           pdz_v, x_n, y_n, z_n); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__global__ void | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					NurbsSurface::g_curvature(const float *derivatives, int sampleCnt_u, int sampleCnt_v, float lastKnot_u, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                          float lastKnot_v) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 二维grid和二维的block | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int ix = blockIdx.x * blockDim.x + threadIdx.x; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int iy = blockIdx.y * blockDim.y + threadIdx.y; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (ix >= sampleCnt_u || iy >= sampleCnt_v) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float step_u = lastKnot_u / (sampleCnt_u - 1), step_v = lastKnot_v / (sampleCnt_v - 1); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float u = ix * step_u, v = iy * step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int baseIdx = (ix * sampleCnt_v + iy) * 6; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int lastBaseIdx_u = ((ix - 1) * sampleCnt_v + iy) * 6, nextBaseIdx_u = ((ix + 1) * sampleCnt_v + iy) * 6; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int lastBaseIdx_v = (ix * sampleCnt_v + iy - 1) * 6, nextBaseIdx_v = (ix * sampleCnt_v + iy + 1) * 6; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					//    printf("(%g,%g)-->u:(%g, %g, %g), v:(%g,%g,%g)\n", u, v, derivatives[baseIdx], derivatives[baseIdx + 1], | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					//           derivatives[baseIdx + 2], derivatives[baseIdx + 3], derivatives[baseIdx + 4], derivatives[baseIdx + 5]); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sndPdx_uu, sndPdy_uu, sndPdz_uu, sndPdx_vv, sndPdy_vv, sndPdz_vv;  // 二阶导 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sndPdx_uv, sndPdy_uv, sndPdz_uv, sndPdx_vu, sndPdy_vu, sndPdz_vu; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (ix == 0) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[baseIdx]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[baseIdx + 1]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[baseIdx + 2]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[baseIdx + 3]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[baseIdx + 4]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[baseIdx + 5]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else if (ix == sampleCnt_u - 1) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uu = (derivatives[baseIdx] - derivatives[lastBaseIdx_u]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uu = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_u + 1]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uu = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_u + 2]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vu = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_u + 3]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vu = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_u + 4]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vu = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_u + 5]) / step_u; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uu = (derivatives[nextBaseIdx_u] - derivatives[lastBaseIdx_u]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uu = (derivatives[nextBaseIdx_u + 1] - derivatives[lastBaseIdx_u + 1]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uu = (derivatives[nextBaseIdx_u + 2] - derivatives[lastBaseIdx_u + 2]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vu = (derivatives[nextBaseIdx_u + 3] - derivatives[lastBaseIdx_u + 3]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vu = (derivatives[nextBaseIdx_u + 4] - derivatives[lastBaseIdx_u + 4]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vu = (derivatives[nextBaseIdx_u + 5] - derivatives[lastBaseIdx_u + 5]) / (2 * step_u); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (iy == 0) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[baseIdx + 3]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[baseIdx + 4]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[baseIdx + 5]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[baseIdx]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[baseIdx + 1]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[baseIdx + 2]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else if (iy == sampleCnt_v - 1) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vv = (derivatives[baseIdx + 3] - derivatives[lastBaseIdx_v + 3]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vv = (derivatives[baseIdx + 4] - derivatives[lastBaseIdx_v + 4]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vv = (derivatives[baseIdx + 5] - derivatives[lastBaseIdx_v + 5]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uv = (derivatives[baseIdx] - derivatives[lastBaseIdx_v]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uv = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx_v + 1]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uv = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx_v + 2]) / step_v; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_vv = (derivatives[nextBaseIdx_v + 3] - derivatives[lastBaseIdx_v + 3]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_vv = (derivatives[nextBaseIdx_v + 4] - derivatives[lastBaseIdx_v + 4]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_vv = (derivatives[nextBaseIdx_v + 5] - derivatives[lastBaseIdx_v + 5]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx_uv = (derivatives[nextBaseIdx_v] - derivatives[lastBaseIdx_v]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy_uv = (derivatives[nextBaseIdx_v + 1] - derivatives[lastBaseIdx_v + 1]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz_uv = (derivatives[nextBaseIdx_v + 2] - derivatives[lastBaseIdx_v + 2]) / (2 * step_v); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float uvx = (sndPdx_uv + sndPdx_vu) / 2, uvy = (sndPdy_uv + sndPdy_vu) / 2, uvz = (sndPdz_uv + sndPdz_vu) / 2; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    normalization(sndPdx_uu, sndPdy_uu, sndPdz_uu); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    normalization(uvx, uvy, uvz); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    normalization(sndPdx_vv, sndPdy_vv, sndPdz_vv); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if(ix == 8 && iy == 9) | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    printf("(%g, %g) --> uu: (%g, %g, %g), uv: (%g, %g, %g), vv: (%g, %g, %g)\n", u, v, sndPdx_uu, sndPdy_uu, sndPdz_uu, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					           uvx, uvy, uvz, sndPdx_vv, sndPdy_vv, sndPdz_vv); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__global__ void | 
					 | 
					 | 
					__global__ void | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d_pointsCnt, | 
					 | 
					 | 
					NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d_pointsCnt, | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					                       const int d_pointSize, const float d_lastKnot, const int d_sampleCnt) { | 
					 | 
					 | 
					                       const int d_POINT_SIZE, const float d_lastKnot, const int d_sampleCnt) { | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					//    printf(" curve calculating... \n"); | 
					 | 
					 | 
					//    printf(" curve calculating... \n"); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 二维grid和一维的block | 
					 | 
					 | 
					    // 二维grid和一维的block | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; | 
					 | 
					 | 
					//    int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -362,35 +585,86 @@ NurbsCurve::g_evaluate(const float *NTexture, const float *d_points, const int d | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree | 
					 | 
					 | 
					//    // 注意,在device中,全局内存还是以malloc和free的方式分配和回收的,而不是使用cudaMalloc和cudaFree | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); | 
					 | 
					 | 
					//    auto *N_dp = (float *) malloc((d_degree + 1) * (d_knotsCnt - 1) * sizeof(float)); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//    d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); | 
					 | 
					 | 
					//    d_basisFunction(N_dp, d_knots, u, d_degree, d_knotsCnt); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x = 0., y = 0., z = 0.; | 
					 | 
					 | 
					    float x = 0., y = 0., z = 0., sumW = 0.; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					        float N = NTexture[idx * d_pointsCnt + i]; | 
					 | 
					 | 
					        float N = NTexture[idx * d_pointsCnt + i]; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					        int baseIdx = i * d_pointSize; | 
					 | 
					 | 
					        int baseIdx = i * d_POINT_SIZE; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					        x += N * d_points[baseIdx]; | 
					 | 
					 | 
					        float w = d_points[baseIdx + 3]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					        y += N * d_points[baseIdx + 1]; | 
					 | 
					 | 
					        x += N * w * d_points[baseIdx]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					        z += N * d_points[baseIdx + 2]; | 
					 | 
					 | 
					        y += N * w * d_points[baseIdx + 1]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    } | 
					 | 
					 | 
					        z += N * w * d_points[baseIdx + 2]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sumW += N * w; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    x = x / sumW; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    y = y / sumW; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    z = z / sumW; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 | 
					 | 
					 | 
					    printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); // %g输出,舍弃无意义的0 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__global__ void | 
					 | 
					 | 
					__global__ void | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					NurbsCurve::g_derivative(const float *derTexture, const float *d_points, int d_pointsCnt, int d_pointSize, | 
					 | 
					 | 
					NurbsCurve::g_derivative(float *derivatives, const float *derTexture, const float *nTexture, const float *d_points, | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					                         int d_pointsCnt, int d_POINT_SIZE, | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					                         float d_lastKnot, int d_sampleCnt) { | 
					 | 
					 | 
					                         float d_lastKnot, int d_sampleCnt) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    // 二维block和一维grid | 
					 | 
					 | 
					    // 二维block和一维grid | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; | 
					 | 
					 | 
					    int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    if (idx >= d_sampleCnt) return; | 
					 | 
					 | 
					    if (idx >= d_sampleCnt) return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    float u = idx * d_lastKnot / (d_sampleCnt - 1); | 
					 | 
					 | 
					    float u = idx * d_lastKnot / (d_sampleCnt - 1); | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    float x = 0., y = 0, z = 0.; | 
					 | 
					 | 
					    float nubs_dx = 0., nubs_dy = 0., nubs_dz = 0., nubs_dw = 0.; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					//    printf("pointSize: %d\n", d_pointSize); | 
					 | 
					 | 
					//    printf("POINT_SIZE: %d\n", d_POINT_SIZE); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt; i++) { | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt; i++) { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					        int baseIdx = i * d_pointSize; | 
					 | 
					 | 
					        int baseIdx = i * d_POINT_SIZE; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					        float nFactor = derTexture[idx * d_pointsCnt + i]; | 
					 | 
					 | 
					        float nFactor = derTexture[idx * d_pointsCnt + i]; | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					        x += nFactor * d_points[baseIdx]; | 
					 | 
					 | 
					        float wi = d_points[baseIdx + 3]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					        y += nFactor * d_points[baseIdx + 1]; | 
					 | 
					 | 
					        nubs_dx += nFactor * wi * d_points[baseIdx]; | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					        z += nFactor * d_points[baseIdx + 2]; | 
					 | 
					 | 
					        nubs_dy += nFactor * wi * d_points[baseIdx + 1]; | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        nubs_dz += nFactor * wi * d_points[baseIdx + 2]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        nubs_dw += nFactor * wi; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					//        printf("(x, y, z): (%g, %g, %g)\n", d_points[baseIdx], d_points[baseIdx + 1], d_points[baseIdx + 2]); | 
					 | 
					 | 
					//        printf("(x, y, z): (%g, %g, %g)\n", d_points[baseIdx], d_points[baseIdx + 1], d_points[baseIdx + 2]); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    } | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    printf("(%g)-->(%g, %g, %g)\n", u, x, y, z); | 
					 | 
					 | 
					
 | 
				
			
			
				
				
			
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float x = 0., y = 0., z = 0., w = 0.; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    for (int i = 0; i < d_pointsCnt; i++) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        float N = nTexture[idx * d_pointsCnt + i]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        int baseIdx = i * d_POINT_SIZE; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        float wi = d_points[baseIdx + 3]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        x += N * wi * d_points[baseIdx]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        y += N * wi * d_points[baseIdx + 1]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        z += N * wi * d_points[baseIdx + 2]; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        w += N * wi; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float dx = (nubs_dx * w - x * nubs_dw) / (w * w); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float dy = (nubs_dy * w - y * nubs_dw) / (w * w); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float dz = (nubs_dz * w - z * nubs_dw) / (w * w); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int baseIdx = idx * 3; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx] = dx; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 1] = dy; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    derivatives[baseIdx + 2] = dz; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    printf("(%g)-->(%g, %g, %g)\n", u, dx, dy, dz); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__global__ void NurbsCurve::g_curvature(const float *derivatives, int sampleCnt, float lastKnot) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    // 二维block和一维grid | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int idx = blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (idx >= sampleCnt) return; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float step = lastKnot / (sampleCnt - 1); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float u = idx * step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    float sndPdx, sndPdy, sndPdz;  // 二阶导 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    int baseIdx = idx * 3, lastBaseIdx = (idx - 1) * 3, nextBaseIdx = (idx + 1) * 3; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (idx == 0) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx = (derivatives[nextBaseIdx] - derivatives[baseIdx]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[baseIdx + 1]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[baseIdx + 2]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else if (idx == sampleCnt - 1) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx = (derivatives[baseIdx] - derivatives[lastBaseIdx]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy = (derivatives[baseIdx + 1] - derivatives[lastBaseIdx + 1]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz = (derivatives[baseIdx + 2] - derivatives[lastBaseIdx + 2]) / step; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } else { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdx = (derivatives[nextBaseIdx] - derivatives[lastBaseIdx]) / (2 * step); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdy = (derivatives[nextBaseIdx + 1] - derivatives[lastBaseIdx + 1]) / (2 * step); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        sndPdz = (derivatives[nextBaseIdx + 2] - derivatives[lastBaseIdx + 2]) / (2 * step); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    printf("%g --> (%g, %g, %g)\n", u, sndPdx, sndPdy, sndPdz); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, | 
					 | 
					 | 
					__global__ void g_basisTexture(float *nTexture, float *nTexture1, const float *d_knots, int d_pointsCnt, int d_knotsCnt, | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					@ -446,10 +720,9 @@ __host__ NurbsCurve::Evaluator::Evaluator(std::vector<std::vector<float>> contro | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    recordTime = false; | 
					 | 
					 | 
					    recordTime = false; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_nTexture = nullptr; | 
					 | 
					 | 
					    d_nTexture = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_nTexture1 = nullptr; | 
					 | 
					 | 
					    d_nTexture1 = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    sampleCnt = 0; | 
					 | 
					 | 
					 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_points = nullptr; | 
					 | 
					 | 
					    d_points = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    d_knots = nullptr; | 
					 | 
					 | 
					    d_knots = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    d_derivatives = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
	
		
		
			
				
					| 
						
							
								
							
						
						
							
								
							
						
						
					 | 
					@ -482,6 +755,13 @@ __device__ bool d_floatEqual(float a, float b) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    return abs(a - b) < 0.00001; | 
					 | 
					 | 
					    return abs(a - b) < 0.00001; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					__host__ void myCudaFree(float *&p) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    if (p != nullptr) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        cudaFree(p); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					        p = nullptr; | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    } | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					void NurbsCurve::Evaluator::setRecordTime(bool r) { | 
					 | 
					 | 
					void NurbsCurve::Evaluator::setRecordTime(bool r) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					    recordTime = r; | 
					 | 
					 | 
					    recordTime = r; | 
				
			
			
		
	
	
		
		
			
				
					| 
						
						
						
							
								
							
						
					 | 
					@ -492,18 +772,20 @@ void NurbsSurface::Evaluator::setRecordTime(bool r) { | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					NurbsSurface::Evaluator::~Evaluator() { | 
					 | 
					 | 
					NurbsSurface::Evaluator::~Evaluator() { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture_u); | 
					 | 
					 | 
					    myCudaFree(d_nTexture_u); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture_v); | 
					 | 
					 | 
					    myCudaFree(d_nTexture_v); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture1_u); | 
					 | 
					 | 
					    myCudaFree(d_nTexture1_u); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture1_v); | 
					 | 
					 | 
					    myCudaFree(d_nTexture1_v); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_points); | 
					 | 
					 | 
					    myCudaFree(d_points); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_knots_u); | 
					 | 
					 | 
					    myCudaFree(d_knots_u); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_knots_v); | 
					 | 
					 | 
					    myCudaFree(d_knots_v); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaDeviceReset(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					
 | 
					 | 
					 | 
					
 | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					NurbsCurve::Evaluator::~Evaluator() { | 
					 | 
					 | 
					NurbsCurve::Evaluator::~Evaluator() { | 
				
			
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture); | 
					 | 
					 | 
					    myCudaFree(d_nTexture); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_nTexture1); | 
					 | 
					 | 
					    myCudaFree(d_nTexture1); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_points); | 
					 | 
					 | 
					    myCudaFree(d_points); | 
				
			
			
				
				
			
		
	
		
		
			
				
					
					 | 
					 | 
					    cudaFree(d_knots); | 
					 | 
					 | 
					    myCudaFree(d_knots); | 
				
			
			
				
				
			
		
	
		
		
	
		
		
	
		
		
	
		
		
	
		
		
			
				
					 | 
					 | 
					 | 
					 | 
					 | 
					    cudaDeviceReset(); | 
				
			
			
		
	
		
		
			
				
					 | 
					 | 
					} | 
					 | 
					 | 
					} |