// // Created by dtouch on 23-5-23. // #include "../include/rod_generate.cuh" __global__ void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { // 3-dim grid and 3-dim block auto ix = blockIdx.x * blockDim.x + threadIdx.x; auto iy = blockIdx.y * blockDim.y + threadIdx.y; auto iz = blockIdx.z * blockDim.z + threadIdx.z; auto x = sampleMin->x() + static_cast(ix) * (sampleMax->x() - sampleMin->x()) / static_cast(extent->width); auto y = sampleMin->y() + static_cast(iy) * (sampleMax->y() - sampleMin->y()) / static_cast(extent->height); auto z = sampleMin->z() + static_cast(iz) * (sampleMax->z() - sampleMin->z()) / static_cast(extent->depth); // 获取sdf中下标为(ix,iy)的元素的行首指针 auto sdfPtr = reinterpret_cast((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); auto p = Eigen::Vector3f(x, y, z); for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) { auto a = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0))); auto b = Eigen::Matrix(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1))); auto ab = b - a; auto ap = p - a; auto bp = p - b; if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) { sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm(); } else { sdfPtr[iz] = std::min(ap.norm(), bp.norm()); } } } __host__ void h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, const Eigen::Vector3f &sampleMax, int radius) { RodCrystal *d_rodCrystal; cudaMalloc(&d_rodCrystal, sizeof(RodCrystal)); cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice); float *h_sdf; for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { h_sdf[i] = i; } cudaPitchedPtr d_sdf{}; cudaExtent extent = make_cudaExtent(sampleCnt.x() * sizeof(float), sampleCnt.y(), sampleCnt.z()); cudaMalloc3D(&d_sdf, extent); cudaMemcpy3DParms copyParams = {nullptr}; // 为什么srcPtr是一个pitchedPtr? copyParams.srcPtr = make_cudaPitchedPtr((void *) h_sdf, sampleCnt.x() * sizeof(float), sampleCnt.x(), sampleCnt.y()); copyParams.dstPtr = d_sdf; copyParams.extent = extent; copyParams.kind = cudaMemcpyHostToDevice; cudaMemcpy3D(©Params); Eigen::Vector3f *d_sampleMin; cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f)); cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); Eigen::Vector3f *d_sampleMax; cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f)); cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); cudaExtent *d_extent; cudaMalloc(&d_extent, sizeof(Eigen::Vector3i)); cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice); dim3 block(64, 64, 64); dim3 grid((sampleCnt.x() + block.x - 1) / block.x, (sampleCnt.y() + block.y - 1) / block.y, (sampleCnt.z() + block.z - 1) / block.z); g_rod_generate<<>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius); cudaDeviceSynchronize(); printf("copy back to host\n"); copyParams = {nullptr}; copyParams.srcPtr = d_sdf; copyParams.dstPtr = make_cudaPitchedPtr((void *) h_sdf, sampleCnt.x() * sizeof(float), sampleCnt.x(), sampleCnt.y()); copyParams.extent = extent; copyParams.kind = cudaMemcpyDeviceToHost; cudaMemcpy3D(©Params); cudaFree(d_rodCrystal); cudaFree(d_sdf.ptr); cudaFree(d_sampleMin); cudaFree(d_sampleMax); cudaFree(d_extent); }