Merge pull request #75 from khammernik/master

andyschwarzl · web-flow · commit 7d5fc93526a7 · 2020-03-17T14:17:05.000+01:00
Fix memory leak and move print statement to debug mode.
diff --git a/CUDA/inc/gpuNUFFT_operator.hpp b/CUDA/inc/gpuNUFFT_operator.hpp
@@ -64,6 +64,7 @@ class GpuNUFFTOperator
     freeLocalMemberArray(this->kernel.data);
 
     if (!matlabSharedMem) {
+      freeLocalMemberArray(this->dens.data);
       freeLocalMemberArray(this->deapo.data);
       freeLocalMemberArray(this->kSpaceTraj.data);
       freeLocalMemberArray(this->sectorCenters.data);
diff --git a/CUDA/src/gpu/std_gpuNUFFT_kernels.cu b/CUDA/src/gpu/std_gpuNUFFT_kernels.cu
@@ -134,7 +134,7 @@ void performFFTScaling(CufftType* data,int N, gpuNUFFT::GpuNUFFTInfo* gi_host)
   dim3 block_dim(64, 1, 8);
   //dim3 block_dim(THREAD_BLOCK_SIZE);
   dim3 grid_dim(getOptimalGridDim(N,THREAD_BLOCK_SIZE));
-  DType scaling_factor = (DType)1.0 / (DType) sqrt((DType)gi_host->im_width_dim);
+  DType scaling_factor = (DType)1.0 / (DType) sqrt((DType)gi_host->gridDims_count);
 
   fftScaleKernel<<<grid_dim,block_dim>>>(data,scaling_factor,N);
 }
diff --git a/CUDA/src/gpuNUFFT_operator.cpp b/CUDA/src/gpuNUFFT_operator.cpp
@@ -505,7 +505,8 @@ void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(
 
       freeTotalDeviceMemory(imdata_sum_d, NULL);
 
-      printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
+      if (DEBUG)
+        printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
       return;
     }
     if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
@@ -780,8 +781,8 @@ void gpuNUFFT::GpuNUFFTOperator::performGpuNUFFTAdj(
       copyFromDevice<CufftType>(imdata_d, imgData.data, gi_host->imgDims_count);
 
       freeTotalDeviceMemory(data_d, imdata_d, imdata_sum_d, NULL);
-
-      printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
+      if (DEBUG)
+        printf("last cuda error: %s\n", cudaGetErrorString(cudaGetLastError()));
       return;
     }
     if (DEBUG && (cudaThreadSynchronize() != cudaSuccess))
diff --git a/CUDA/src/gpuNUFFT_operator_factory.cpp b/CUDA/src/gpuNUFFT_operator_factory.cpp
@@ -412,15 +412,15 @@ gpuNUFFT::Array<DType> gpuNUFFT::GpuNUFFTOperatorFactory::computeDeapodizationFu
   // cleanup locally initialized arrays here
   free(dataArray.data);
   free(assignedSectors.data);
-  delete deapoGpuNUFFTOp;
 
   // Compute abs values of deapo function and compensate
   // FFT scaling sqrt(N)
   Array<DType> deapoAbs = initDeapoData(deapoFunction.count());
 
   DType maxDeapoVal = 0;
   DType minDeapoVal = std::numeric_limits<DType>::max();
-  double fft_scaling_factor = std::sqrt(imgDims.count()); 
+  double fft_scaling_factor = std::sqrt(deapoGpuNUFFTOp->getGridDims().count()); 
+
   for (unsigned cnt = 0; cnt < deapoFunction.count(); cnt++)
   {
     deapoFunction.data[cnt].x = deapoFunction.data[cnt].x * fft_scaling_factor;
@@ -432,6 +432,8 @@ gpuNUFFT::Array<DType> gpuNUFFT::GpuNUFFTOperatorFactory::computeDeapodizationFu
       minDeapoVal = deapoAbs.data[cnt];
   }
 
+  // cleanup
+  delete deapoGpuNUFFTOp;
   free(deapoFunction.data);
   return deapoAbs;
 }
@@ -531,6 +533,7 @@ gpuNUFFT::GpuNUFFTOperatorFactory::createGpuNUFFTOperator(
 
   // free temporary array
   free(assignedSectors.data);
+  assignedSectors.data = NULL;
 
   gpuNUFFTOp->setDeapodizationFunction(
     this->computeDeapodizationFunction(kernelWidth, osf, imgDims));

Original file line number	Diff line number	Diff line change
`@@ -134,7 +134,7 @@ void performFFTScaling(CufftType* data,int N, gpuNUFFT::GpuNUFFTInfo* gi_host)`
`134`	`134`	`dim3 block_dim(64, 1, 8);`
`135`	`135`	`//dim3 block_dim(THREAD_BLOCK_SIZE);`
`136`	`136`	`dim3 grid_dim(getOptimalGridDim(N,THREAD_BLOCK_SIZE));`
`137`		`- DType scaling_factor = (DType)1.0 / (DType) sqrt((DType)gi_host->im_width_dim);`
	`137`	`+ DType scaling_factor = (DType)1.0 / (DType) sqrt((DType)gi_host->gridDims_count);`
`138`	`138`
`139`	`139`	`fftScaleKernel<<<grid_dim,block_dim>>>(data,scaling_factor,N);`
`140`	`140`	`}`