|
|
@ -2,46 +2,91 @@ |
|
|
|
// Created by dtouch on 23-5-23. |
|
|
|
// |
|
|
|
|
|
|
|
#include "../include/rod_generate.cuh" |
|
|
|
#include "rod_generate.cuh" |
|
|
|
#include "device_functions.h" |
|
|
|
#include "float.h" |
|
|
|
|
|
|
|
__global__ |
|
|
|
void g_rod_generate(const RodCrystal *rodCrystal, cudaPitchedPtr sdf, const cudaExtent *extent, |
|
|
|
const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { |
|
|
|
void |
|
|
|
g_rod_generate(int *beamData, int beamCnt, float *pointData, int pointCnt, cudaPitchedPtr sdf, const cudaExtent *extent, |
|
|
|
size_t floatSize, const Eigen::Vector3f *sampleMin, const Eigen::Vector3f *sampleMax, int radius) { |
|
|
|
// 3-dim grid and 3-dim block |
|
|
|
Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, 2>> rod_beams(beamData, beamCnt, 2); |
|
|
|
Eigen::Map<Eigen::Matrix<float, Eigen::Dynamic, 3>> rod_points(pointData, pointCnt, 3); |
|
|
|
auto ix = blockIdx.x * blockDim.x + threadIdx.x; |
|
|
|
auto iy = blockIdx.y * blockDim.y + threadIdx.y; |
|
|
|
auto iz = blockIdx.z * blockDim.z + threadIdx.z; |
|
|
|
// if (ix == 0 && iy == 0 && iz == 0) { |
|
|
|
// for (int i = 0; i < beamCnt; ++i) { |
|
|
|
// printf("%d, %d\n", rod_beams(i, 0), rod_beams(i, 1)); |
|
|
|
// } |
|
|
|
// for(int i = 0; i < pointCnt; ++i) { |
|
|
|
// printf("%f, %f, %f\n", rod_points(i, 0), rod_points(i, 1), rod_points(i, 2)); |
|
|
|
// } |
|
|
|
// } |
|
|
|
if (ix >= extent->width / floatSize || iy >= extent->height || iz >= extent->depth) { |
|
|
|
return; |
|
|
|
} |
|
|
|
auto x = sampleMin->x() + |
|
|
|
static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width); |
|
|
|
static_cast<float>(ix) * (sampleMax->x() - sampleMin->x()) / static_cast<float>(extent->width / floatSize); |
|
|
|
auto y = sampleMin->y() + |
|
|
|
static_cast<float>(iy) * (sampleMax->y() - sampleMin->y()) / static_cast<float>(extent->height); |
|
|
|
auto z = sampleMin->z() + |
|
|
|
static_cast<float>(iz) * (sampleMax->z() - sampleMin->z()) / static_cast<float>(extent->depth); |
|
|
|
// printf("%d, %d, %d\n", ix, iy, iz); |
|
|
|
// 获取sdf中下标为(ix,iy)的元素的行首指针 |
|
|
|
auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); |
|
|
|
// auto sdfPtr = reinterpret_cast<float *>((char *) sdf.ptr + iy * sdf.pitch + iz * sdf.pitch * extent->height); |
|
|
|
char *sdfPtr = (char *) sdf.ptr; |
|
|
|
size_t pitch = sdf.pitch; |
|
|
|
size_t slicePitch = pitch * extent->height; |
|
|
|
auto p = Eigen::Vector3f(x, y, z); |
|
|
|
for (int i = 0; i < rodCrystal->rod_beams.rows(); ++i) { |
|
|
|
auto a = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 0))); |
|
|
|
auto b = Eigen::Matrix<float, 3, 1>(rodCrystal->rod_points.row(rodCrystal->rod_beams(i, 1))); |
|
|
|
char *slice = sdfPtr + iz * slicePitch; |
|
|
|
auto *row = (float *) (slice + iy * pitch); |
|
|
|
// row[ix] is initialized as the max float in GPU |
|
|
|
row[ix] = FLT_MAX; |
|
|
|
// auto aTmp = Eigen::Vector3f(rod_points.row(rod_beams(2, 1))); |
|
|
|
// printf("aTmp: (%f, %f, %f)\n", aTmp.x(), aTmp.y(), aTmp.z()); |
|
|
|
for (int i = 0; i < rod_beams.rows(); ++i) { |
|
|
|
auto a = Eigen::Vector3f(rod_points.row(rod_beams(i, 0))); |
|
|
|
auto b = Eigen::Vector3f(rod_points.row(rod_beams(i, 1))); |
|
|
|
auto ab = b - a; |
|
|
|
auto ap = p - a; |
|
|
|
auto bp = p - b; |
|
|
|
if (ab.dot(-bp) > 0 && ab.dot(ap) > 0) { |
|
|
|
sdfPtr[ix] = (ap.cross(bp)).norm() / ab.norm(); |
|
|
|
if (ab.x() * bp.x() + ab.y() * bp.y() + ab.z() + bp.z() < 0 && |
|
|
|
ab.x() * ap.x() + ab.y() * ap.y() + ab.z() * ap.z() > 0) { |
|
|
|
row[ix] = min(row[ix], (ap.cross(bp)).norm() / ab.norm()); |
|
|
|
} else { |
|
|
|
sdfPtr[iz] = std::min(ap.norm(), bp.norm()); |
|
|
|
row[ix] = min(row[ix], min(ap.norm(), bp.norm())); |
|
|
|
} |
|
|
|
} |
|
|
|
row[ix] -= radius; |
|
|
|
} |
|
|
|
|
|
|
|
__host__ void |
|
|
|
__host__ float* |
|
|
|
h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, const Eigen::Vector3f &sampleMin, |
|
|
|
const Eigen::Vector3f &sampleMax, int radius) { |
|
|
|
RodCrystal *d_rodCrystal; |
|
|
|
cudaMalloc(&d_rodCrystal, sizeof(RodCrystal)); |
|
|
|
cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(RodCrystal), cudaMemcpyHostToDevice); |
|
|
|
const Eigen::Vector3f &sampleMax, float radius) { |
|
|
|
int *d_beamData; |
|
|
|
size_t beamBytes = rodCrystal.rod_beams.rows() * rodCrystal.rod_beams.cols() * sizeof(int); |
|
|
|
cudaMalloc(&d_beamData, beamBytes); |
|
|
|
cudaMemcpy(d_beamData, rodCrystal.rod_beams.data(), beamBytes, cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
float *d_pointData; |
|
|
|
size_t pointBytes = rodCrystal.rod_points.rows() * rodCrystal.rod_points.cols() * sizeof(float); |
|
|
|
cudaMalloc(&d_pointData, pointBytes); |
|
|
|
cudaMemcpy(d_pointData, rodCrystal.rod_points.data(), pointBytes, cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
// RodCrystal *d_rodCrystal; |
|
|
|
// cudaMalloc(&d_rodCrystal, sizeof(rodCrystal)); |
|
|
|
// cudaMemcpy(d_rodCrystal, &rodCrystal, sizeof(rodCrystal), cudaMemcpyHostToDevice); |
|
|
|
// printf("size of rodCrystal: %lu; size of class RodCrystal: %lu\n", sizeof(rodCrystal), sizeof(RodCrystal)); |
|
|
|
// printf("size of rodCrystal.rod_points: %lu\n", sizeof(rodCrystal.rod_points)); |
|
|
|
// printf("size of rodCrystal.rod_beams: %lu\n", sizeof(rodCrystal.rod_beams)); |
|
|
|
// printf("size of rodCrystal.rod_points.row(0): %lu\n", sizeof(rodCrystal.rod_points.row(0))); |
|
|
|
|
|
|
|
|
|
|
|
int sampleCntAll = sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); |
|
|
|
float *h_sdf; |
|
|
|
h_sdf = (float *) malloc(sampleCnt.x() * sampleCnt.y() * sampleCnt.z() * sizeof(float)); |
|
|
|
for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { |
|
|
|
h_sdf[i] = i; |
|
|
|
} |
|
|
@ -58,24 +103,33 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c |
|
|
|
cudaMemcpy3D(©Params); |
|
|
|
|
|
|
|
Eigen::Vector3f *d_sampleMin; |
|
|
|
cudaMalloc(&d_sampleMin, sizeof(Eigen::Vector3f)); |
|
|
|
cudaMemcpy(d_sampleMin, &sampleMin, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); |
|
|
|
cudaMalloc(&d_sampleMin, sizeof(sampleMin)); |
|
|
|
cudaMemcpy(d_sampleMin, &sampleMin, sizeof(sampleMin), cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
Eigen::Vector3f *d_sampleMax; |
|
|
|
cudaMalloc(&d_sampleMax, sizeof(Eigen::Vector3f)); |
|
|
|
cudaMemcpy(d_sampleMax, &sampleMax, sizeof(Eigen::Vector3f), cudaMemcpyHostToDevice); |
|
|
|
cudaMalloc(&d_sampleMax, sizeof(sampleMax)); |
|
|
|
cudaMemcpy(d_sampleMax, &sampleMax, sizeof(sampleMax), cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
cudaExtent *d_extent; |
|
|
|
cudaMalloc(&d_extent, sizeof(Eigen::Vector3i)); |
|
|
|
cudaMemcpy(d_extent, &sampleCnt, sizeof(Eigen::Vector3i), cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
dim3 block(64, 64, 64); |
|
|
|
dim3 grid((sampleCnt.x() + block.x - 1) / block.x, |
|
|
|
(sampleCnt.y() + block.y - 1) / block.y, |
|
|
|
(sampleCnt.z() + block.z - 1) / block.z); |
|
|
|
g_rod_generate<<<grid, block>>>(d_rodCrystal, d_sdf, d_extent, d_sampleMin, d_sampleMax, radius); |
|
|
|
cudaMalloc(&d_extent, sizeof(extent)); |
|
|
|
cudaMemcpy(d_extent, &extent, sizeof(extent), cudaMemcpyHostToDevice); |
|
|
|
|
|
|
|
dim3 grid(16, 16, 16); |
|
|
|
dim3 block((sampleCnt.x() + grid.x - 1) / grid.x, |
|
|
|
(sampleCnt.y() + grid.y - 1) / grid.y, |
|
|
|
(sampleCnt.z() + grid.z - 1) / grid.z); |
|
|
|
g_rod_generate<<<grid, block>>>(d_beamData, rodCrystal.rod_beams.rows(), d_pointData, rodCrystal.rod_points.rows(), |
|
|
|
d_sdf, d_extent, sizeof(float), d_sampleMin, d_sampleMax, radius); |
|
|
|
cudaDeviceSynchronize(); |
|
|
|
|
|
|
|
// for (int i = 0; i < sampleCnt.x() * sampleCnt.y() * sampleCnt.z(); ++i) { |
|
|
|
// h_sdf[i] = -i; |
|
|
|
// } |
|
|
|
|
|
|
|
auto tmpBeam = rodCrystal.rod_beams(0, 1); |
|
|
|
printf("tmpBeam: %d\n", tmpBeam); |
|
|
|
|
|
|
|
|
|
|
|
printf("copy back to host\n"); |
|
|
|
copyParams = {nullptr}; |
|
|
|
copyParams.srcPtr = d_sdf; |
|
|
@ -85,9 +139,29 @@ h_rod_generate(const RodCrystal &rodCrystal, const Eigen::Vector3i &sampleCnt, c |
|
|
|
copyParams.kind = cudaMemcpyDeviceToHost; |
|
|
|
cudaMemcpy3D(©Params); |
|
|
|
|
|
|
|
cudaFree(d_rodCrystal); |
|
|
|
// cudaFree(d_rodCrystal); |
|
|
|
cudaFree(d_sdf.ptr); |
|
|
|
cudaFree(d_sampleMin); |
|
|
|
cudaFree(d_sampleMax); |
|
|
|
cudaFree(d_extent); |
|
|
|
cudaFree(d_beamData); |
|
|
|
cudaFree(d_pointData); |
|
|
|
printf("["); |
|
|
|
for (int i = 0; i < sampleCnt.x(); i++) { |
|
|
|
printf("["); |
|
|
|
for (int j = 0; j < sampleCnt.y(); j++) { |
|
|
|
printf("["); |
|
|
|
for (int k = 0; k < sampleCnt.z(); k++) { |
|
|
|
printf("%f", h_sdf[i * sampleCnt.y() * sampleCnt.z() + j * sampleCnt.z() + k]); |
|
|
|
if (k != sampleCnt.z() - 1) |
|
|
|
printf(","); |
|
|
|
} |
|
|
|
printf("]"); |
|
|
|
if (j != sampleCnt.y() - 1) |
|
|
|
printf(","); |
|
|
|
} |
|
|
|
printf("]"); |
|
|
|
} |
|
|
|
printf("]"); |
|
|
|
free(h_sdf); |
|
|
|
} |
|
|
|