Namespace UtilKernels
Namespace containing utility CUDA kernel wrappers.
Public Functions
| Type | Name |
|---|---|
| void | add_scalar (const double * d_in, double * d_out, double scalar, size_t size, cudaStream_t s) Adds a scalar value to each element of a vector: d_out[i] = d_in[i] + scalar. |
| void | cast_vector (const T_in *const d_in, T_out *const d_out, const unsigned int size, cudaStream_t s) Casts a vector from one type to another. |
| void | elementwise_abs (const double * d_in, double * d_out, size_t size, cudaStream_t s) Computes the element-wise absolute value of a vector: d_out[i] = fabs(d_in[i]) |
| void | elementwise_divide (const double * d_in1, const double * d_in2, double * d_out, size_t size, cudaStream_t s) Computes the element-wise (Hadamard) quotient of two vectors: d_out[i] = d_in1[i] / d_in2[i]. |
| void | elementwise_inverse (const double * d_in, double * d_out, size_t size, cudaStream_t s) Computes the element-wise inverse of a vector: d_out[i] = 1.0 / d_in[i]. |
| void | elementwise_max (const double * d_in1, const double * d_in2, double * d_out, size_t size, cudaStream_t s) Computes the element-wise maximum of two vectors: d_out[i] = fmax(d_in1[i], d_in2[i]) |
| void | elementwise_min (const double * d_in1, const double * d_in2, double * d_out, size_t size, cudaStream_t s) Computes the element-wise minimum of two vectors: d_out[i] = fmin(d_in1[i], d_in2[i]) |
| void | elementwise_multiply (const double * d_in1, const double * d_in2, double * d_out, size_t size, cudaStream_t s) Computes the element-wise (Hadamard) product of two vectors: d_out[i] = d_in1[i] * d_in2[i]. |
| void | elementwise_multiply_add (const double * d_x, const double * d_y, const double * d_z, double * d_out, size_t size, cudaStream_t s) Computes the fused element-wise multiply-add of three vectors: d_out[i] = d_x[i] * d_y[i] + d_z[i]. |
| void | elementwise_power (const double * d_in, double * d_out, double exponent, size_t size, cudaStream_t s) Computes the element-wise power of a vector: d_out[i] = pow(d_in[i], exponent) |
| void | elementwise_sign (const double * d_in, double * d_out, size_t size, cudaStream_t s) Computes the element-wise sign of a vector: d_out[i] = sign(d_in[i]) (returns -1, 0, or 1). |
| void | extend_vector (const double * d_in, double * d_out, size_t num_blocks, size_t current_block_size, size_t new_block_size, cudaStream_t s) Extends a vector by padding each block with zeros. |
| void | max_scalar (const double * d_in, double * d_out, double scalar, size_t size, cudaStream_t s) Computes the element-wise maximum of a vector and a scalar: d_out[i] = fmax(d_in[i], scalar) |
| void | min_scalar (const double * d_in, double * d_out, double scalar, size_t size, cudaStream_t s) Computes the element-wise minimum of a vector and a scalar: d_out[i] = fmin(d_in[i], scalar) |
| void | pad_vector (const T_in *const d_in, T_out *const d_pad, const unsigned int num_blocks, const unsigned int padded_size, cudaStream_t s) Pads each block of a vector to twice the length with zeros. |
| void | reduce_max (const double * d_in, double * d_out, size_t size, cudaStream_t s) Reduces a vector to its maximum element value on the device. |
| void | reduce_min (const double * d_in, double * d_out, size_t size, cudaStream_t s) Reduces a vector to its minimum element value on the device. |
| void | shrink_vector (const double * d_in, double * d_out, size_t num_blocks, size_t current_block_size, size_t new_block_size, cudaStream_t s) Shrinks a vector by removing padding. |
| void | swap_axes_cutranspose (const T_complex * d_in, T_complex * d_out, const unsigned int num_cols, const unsigned int num_rows, const unsigned int block_size, cudaStream_t s) Swaps the axes of a matrix and using cutranspose. |
| void | unpad_repad_vector (const T_in *const d_in, T_out *const d_out, const unsigned int num_blocks, const unsigned int padded_size, const bool unpad, cudaStream_t s) Unpads or repads a vector. |
Public Functions Documentation
function add_scalar
Adds a scalar value to each element of a vector: d_out[i] = d_in[i] + scalar.
void UtilKernels::add_scalar (
const double * d_in,
double * d_out,
double scalar,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.scalarThe scalar value to add.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function cast_vector
Casts a vector from one type to another.
template<typename T_in, typename T_out>
void UtilKernels::cast_vector (
const T_in *const d_in,
T_out *const d_out,
const unsigned int size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.sizeSize of the input and output vectors.sThe CUDA stream to use for the operation.
Template parameters:
T_inInput type.T_outOutput type.
function elementwise_abs
Computes the element-wise absolute value of a vector: d_out[i] = fabs(d_in[i])
void UtilKernels::elementwise_abs (
const double * d_in,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_divide
Computes the element-wise (Hadamard) quotient of two vectors: d_out[i] = d_in1[i] / d_in2[i].
void UtilKernels::elementwise_divide (
const double * d_in1,
const double * d_in2,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_in1Pointer to the first input vector.d_in2Pointer to the second input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_inverse
Computes the element-wise inverse of a vector: d_out[i] = 1.0 / d_in[i].
void UtilKernels::elementwise_inverse (
const double * d_in,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_max
Computes the element-wise maximum of two vectors: d_out[i] = fmax(d_in1[i], d_in2[i])
void UtilKernels::elementwise_max (
const double * d_in1,
const double * d_in2,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_in1Pointer to the first input vector.d_in2Pointer to the second input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_min
Computes the element-wise minimum of two vectors: d_out[i] = fmin(d_in1[i], d_in2[i])
void UtilKernels::elementwise_min (
const double * d_in1,
const double * d_in2,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_in1Pointer to the first input vector.d_in2Pointer to the second input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_multiply
Computes the element-wise (Hadamard) product of two vectors: d_out[i] = d_in1[i] * d_in2[i].
void UtilKernels::elementwise_multiply (
const double * d_in1,
const double * d_in2,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_in1Pointer to the first input vector.d_in2Pointer to the second input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_multiply_add
Computes the fused element-wise multiply-add of three vectors: d_out[i] = d_x[i] * d_y[i] + d_z[i].
void UtilKernels::elementwise_multiply_add (
const double * d_x,
const double * d_y,
const double * d_z,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_xPointer to the first input vector (multiplicand).d_yPointer to the second input vector (multiplier).d_zPointer to the third input vector (addend).d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_power
Computes the element-wise power of a vector: d_out[i] = pow(d_in[i], exponent)
void UtilKernels::elementwise_power (
const double * d_in,
double * d_out,
double exponent,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.exponentThe scalar power to raise each element to.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function elementwise_sign
Computes the element-wise sign of a vector: d_out[i] = sign(d_in[i]) (returns -1, 0, or 1).
void UtilKernels::elementwise_sign (
const double * d_in,
double * d_out,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function extend_vector
Extends a vector by padding each block with zeros.
void UtilKernels::extend_vector (
const double * d_in,
double * d_out,
size_t num_blocks,
size_t current_block_size,
size_t new_block_size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.num_blocksNumber of blocks in the vector.current_block_sizeCurrent size of each block.new_block_sizeNew size of each block after padding.sThe CUDA stream to use for the operation.
function max_scalar
Computes the element-wise maximum of a vector and a scalar: d_out[i] = fmax(d_in[i], scalar)
void UtilKernels::max_scalar (
const double * d_in,
double * d_out,
double scalar,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.scalarThe scalar value to compare against.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function min_scalar
Computes the element-wise minimum of a vector and a scalar: d_out[i] = fmin(d_in[i], scalar)
void UtilKernels::min_scalar (
const double * d_in,
double * d_out,
double scalar,
size_t size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.scalarThe scalar value to compare against.sizeTotal number of elements in the vectors.sThe CUDA stream to use for the operation.
function pad_vector
Pads each block of a vector to twice the length with zeros.
template<typename T_in, typename T_out>
void UtilKernels::pad_vector (
const T_in *const d_in,
T_out *const d_pad,
const unsigned int num_blocks,
const unsigned int padded_size,
cudaStream_t s
)
This function takes an input vector d_in and pads each block of the vector to twice the length with zeros. The padded vector is stored in the output vector d_pad. The number of columns in each block is specified by num_cols. The total size of the vector is specified by size. The padding operation is performed asynchronously on the CUDA stream s.
Parameters:
d_inPointer to the input vector.d_padPointer to the output padded vector.num_blocksNumber of blocks in the vector.padded_sizePadded size of each block.sCUDA stream for asynchronous execution.
Template parameters:
T_inData type of the input and output vectors (real).T_outData type of the input and output vectors (real).
function reduce_max
Reduces a vector to its maximum element value on the device.
Parameters:
d_inPointer to the input vector.d_outPointer to a single device double storing the result.sizeTotal number of elements in the input vector.sThe CUDA stream to use for the operation.
function reduce_min
Reduces a vector to its minimum element value on the device.
Parameters:
d_inPointer to the input vector.d_outPointer to a single device double storing the result.sizeTotal number of elements in the input vector.sThe CUDA stream to use for the operation.
function shrink_vector
Shrinks a vector by removing padding.
void UtilKernels::shrink_vector (
const double * d_in,
double * d_out,
size_t num_blocks,
size_t current_block_size,
size_t new_block_size,
cudaStream_t s
)
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.num_blocksNumber of blocks in the vector.current_block_sizeCurrent size of each block.new_block_sizeNew size of each block after padding.sThe CUDA stream to use for the operation.
function swap_axes_cutranspose
Swaps the axes of a matrix and using cutranspose.
template<typename T_complex>
void UtilKernels::swap_axes_cutranspose (
const T_complex * d_in,
T_complex * d_out,
const unsigned int num_cols,
const unsigned int num_rows,
const unsigned int block_size,
cudaStream_t s
)
Parameters:
d_inPointer to the input matrix.d_outPointer to the output matrix.num_colsNumber of columns in the input matrix.num_rowsNumber of rows in the input matrix.block_sizeBlock size of input matrix.sThe CUDA stream to use for the operation.
Template parameters:
T_complexData type of the input and output matrices.
function unpad_repad_vector
Unpads or repads a vector.
template<typename T_in, typename T_out>
void UtilKernels::unpad_repad_vector (
const T_in *const d_in,
T_out *const d_out,
const unsigned int num_blocks,
const unsigned int padded_size,
const bool unpad,
cudaStream_t s
)
This function either unpads each block of the vector back to the original length or resets the second half of each block to zeros.
Parameters:
d_inPointer to the input vector.d_outPointer to the output vector.num_blocksNumber of blocks in the vector.padded_sizePadded size of each block.unpadFlag indicating whether to unpad or repad the vector. If true, the vector will be unpadded. If false, the second half of each block will be reset to zeros.sThe CUDA stream to use for the operation.
Template parameters:
T_inData type of the input and output vectors (real).T_outData type of the input and output vectors (real).
The documentation for this class was generated from the following file src/util_kernels.hpp