Generate cudnn types

This commit is contained in:
Andrzej Janik 2025-04-19 15:02:34 +00:00
parent 734db223d6
commit 686416d7af
9 changed files with 14507 additions and 3 deletions

6815
cuda_base/src/cublas.rs Normal file

File diff suppressed because it is too large Load diff

583
cuda_base/src/cublaslt.rs Normal file
View file

@ -0,0 +1,583 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
extern "system" {
#[must_use]
fn cublasLtCreate(
lightHandle: *mut cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtDestroy(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
) -> cuda_types::cublaslt::cublasStatus_t;
fn cublasLtGetStatusName(
status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetStatusString(
status: cuda_types::cublaslt::cublasStatus_t,
) -> *const ::core::ffi::c_char;
fn cublasLtGetVersion() -> usize;
fn cublasLtGetCudartVersion() -> usize;
#[must_use]
fn cublasLtGetProperty(
type_: cuda_types::cublaslt::libraryPropertyType,
value: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheGetCapacity(
capacity: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
fn cublasLtHeuristicsCacheSetCapacity(
capacity: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
/** Restricts usage of CPU instructions (ISA) specified by the flags in the mask.
Flags can be combined with bitwise OR(|) operator. Supported flags:
- 0x1 -- x86-64 AVX512 ISA
Default mask: 0 (any applicable ISA is allowed).
The function returns the previous value of the mask.
The function takes precedence over the environment variable CUBLASLT_DISABLE_CPU_INSTRUCTIONS_MASK.*/
fn cublasLtDisableCpuInstructionsSetMask(
mask: ::core::ffi::c_uint,
) -> ::core::ffi::c_uint;
#[must_use]
/** Execute matrix multiplication (D = alpha * op(A) * op(B) + beta * C).
\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized
\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.
when workspaceSizeInBytes is less than workspace required by configured
algo
\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured
operation
\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device
\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device
\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/
fn cublasLtMatmul(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
alpha: *const ::core::ffi::c_void,
A: *const ::core::ffi::c_void,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
B: *const ::core::ffi::c_void,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
beta: *const ::core::ffi::c_void,
C: *const ::core::ffi::c_void,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
D: *mut ::core::ffi::c_void,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
workspace: *mut ::core::ffi::c_void,
workspaceSizeInBytes: usize,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Matrix layout conversion helper (C = alpha * op(A) + beta * op(B))
Can be used to change memory order of data or to scale and shift the values.
\retval CUBLAS_STATUS_NOT_INITIALIZED if cuBLASLt handle has not been initialized
\retval CUBLAS_STATUS_INVALID_VALUE if parameters are in conflict or in an impossible configuration; e.g.
when A is not NULL, but Adesc is NULL
\retval CUBLAS_STATUS_NOT_SUPPORTED if current implementation on selected device doesn't support configured
operation
\retval CUBLAS_STATUS_ARCH_MISMATCH if configured operation cannot be run using selected device
\retval CUBLAS_STATUS_EXECUTION_FAILED if cuda reported execution error from the device
\retval CUBLAS_STATUS_SUCCESS if the operation completed successfully*/
fn cublasLtMatrixTransform(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
alpha: *const ::core::ffi::c_void,
A: *const ::core::ffi::c_void,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
beta: *const ::core::ffi::c_void,
B: *const ::core::ffi::c_void,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
C: *mut ::core::ffi::c_void,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
stream: cuda_types::cublaslt::cudaStream_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixLayoutInit_internal(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
size: usize,
type_: cuda_types::cublaslt::cudaDataType,
rows: cuda_types::cublaslt::u64,
cols: cuda_types::cublaslt::u64,
ld: cuda_types::cublaslt::i64,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matrix layout descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatrixLayoutCreate(
matLayout: *mut cuda_types::cublaslt::cublasLtMatrixLayout_t,
type_: cuda_types::cublaslt::cudaDataType,
rows: cuda_types::cublaslt::u64,
cols: cuda_types::cublaslt::u64,
ld: cuda_types::cublaslt::i64,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matrix layout descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixLayoutDestroy(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matrix layout descriptor attribute.
\param[in] matLayout The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatrixLayoutSetAttribute(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matrix layout descriptor attribute.
\param[in] matLayout The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatrixLayoutGetAttribute(
matLayout: cuda_types::cublaslt::cublasLtMatrixLayout_t,
attr: cuda_types::cublaslt::cublasLtMatrixLayoutAttribute_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulDescInit_internal(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
size: usize,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matmul operation descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatmulDescCreate(
matmulDesc: *mut cuda_types::cublaslt::cublasLtMatmulDesc_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matmul operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulDescDestroy(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matmul operation descriptor attribute.
\param[in] matmulDesc The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulDescSetAttribute(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matmul operation descriptor attribute.
\param[in] matmulDesc The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulDescGetAttribute(
matmulDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
attr: cuda_types::cublaslt::cublasLtMatmulDescAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatrixTransformDescInit_internal(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
size: usize,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matrix transform operation descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatrixTransformDescCreate(
transformDesc: *mut cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
scaleType: cuda_types::cublaslt::cudaDataType,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matrix transform operation descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatrixTransformDescDestroy(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matrix transform operation descriptor attribute.
\param[in] transformDesc The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatrixTransformDescSetAttribute(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matrix transform operation descriptor attribute.
\param[in] transformDesc The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number
of bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatrixTransformDescGetAttribute(
transformDesc: cuda_types::cublaslt::cublasLtMatrixTransformDesc_t,
attr: cuda_types::cublaslt::cublasLtMatrixTransformDescAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/// Internal. Do not use directly.
fn cublasLtMatmulPreferenceInit_internal(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
size: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Create new matmul heuristic search preference descriptor.
\retval CUBLAS_STATUS_ALLOC_FAILED if memory could not be allocated
\retval CUBLAS_STATUS_SUCCESS if desciptor was created successfully*/
fn cublasLtMatmulPreferenceCreate(
pref: *mut cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Destroy matmul heuristic search preference descriptor.
\retval CUBLAS_STATUS_SUCCESS if operation was successful*/
fn cublasLtMatmulPreferenceDestroy(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set matmul heuristic search preference descriptor attribute.
\param[in] pref The descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulPreferenceSetAttribute(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get matmul heuristic search preference descriptor attribute.
\param[in] pref The descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulPreferenceGetAttribute(
pref: cuda_types::cublaslt::cublasLtMatmulPreference_t,
attr: cuda_types::cublaslt::cublasLtMatmulPreferenceAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Query cublasLt heuristic for algorithm appropriate for given use case.
\param[in] lightHandle Pointer to the allocated cuBLASLt handle for the cuBLASLt
context. See cublasLtHandle_t.
\param[in] operationDesc Handle to the matrix multiplication descriptor.
\param[in] Adesc Handle to the layout descriptors for matrix A.
\param[in] Bdesc Handle to the layout descriptors for matrix B.
\param[in] Cdesc Handle to the layout descriptors for matrix C.
\param[in] Ddesc Handle to the layout descriptors for matrix D.
\param[in] preference Pointer to the structure holding the heuristic search
preferences descriptor. See cublasLtMatrixLayout_t.
\param[in] requestedAlgoCount Size of heuristicResultsArray (in elements) and requested
maximum number of algorithms to return.
\param[in, out] heuristicResultsArray Output algorithms and associated runtime characteristics,
ordered in increasing estimated compute time.
\param[out] returnAlgoCount The number of heuristicResultsArray elements written.
\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero
\retval CUBLAS_STATUS_NOT_SUPPORTED if no heuristic function available for current configuration
\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect
heuristicResultsArray[0 to (returnAlgoCount - 1)].state
for detail status of results*/
fn cublasLtMatmulAlgoGetHeuristic(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
preference: cuda_types::cublaslt::cublasLtMatmulPreference_t,
requestedAlgoCount: ::core::ffi::c_int,
heuristicResultsArray: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Routine to get all algo IDs that can potentially run
\param[in] int requestedAlgoCount requested number of algos (must be less or equal to size of algoIdsA
(in elements)) \param[out] algoIdsA array to write algoIds to \param[out] returnAlgoCount number of algoIds
actually written
\retval CUBLAS_STATUS_INVALID_VALUE if requestedAlgoCount is less or equal to zero
\retval CUBLAS_STATUS_SUCCESS if query was successful, inspect returnAlgoCount to get actual number of IDs
available*/
fn cublasLtMatmulAlgoGetIds(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
Atype: cuda_types::cublaslt::cudaDataType_t,
Btype: cuda_types::cublaslt::cudaDataType_t,
Ctype: cuda_types::cublaslt::cudaDataType_t,
Dtype: cuda_types::cublaslt::cudaDataType_t,
requestedAlgoCount: ::core::ffi::c_int,
algoIdsArray: *mut ::core::ffi::c_int,
returnAlgoCount: *mut ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Initialize algo structure
\retval CUBLAS_STATUS_INVALID_VALUE if algo is NULL or algoId is outside of recognized range
\retval CUBLAS_STATUS_NOT_SUPPORTED if algoId is not supported for given combination of data types
\retval CUBLAS_STATUS_SUCCESS if the structure was successfully initialized*/
fn cublasLtMatmulAlgoInit(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
computeType: cuda_types::cublaslt::cublasComputeType_t,
scaleType: cuda_types::cublaslt::cudaDataType_t,
Atype: cuda_types::cublaslt::cudaDataType_t,
Btype: cuda_types::cublaslt::cudaDataType_t,
Ctype: cuda_types::cublaslt::cudaDataType_t,
Dtype: cuda_types::cublaslt::cudaDataType_t,
algoId: ::core::ffi::c_int,
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Check configured algo descriptor for correctness and support on current device.
Result includes required workspace size and calculated wave count.
CUBLAS_STATUS_SUCCESS doesn't fully guarantee algo will run (will fail if e.g. buffers are not correctly aligned);
but if cublasLtMatmulAlgoCheck fails, the algo will not run.
\param[in] algo algo configuration to check
\param[out] result result structure to report algo runtime characteristics; algo field is never updated
\retval CUBLAS_STATUS_INVALID_VALUE if matrix layout descriptors or operation descriptor don't match algo
descriptor
\retval CUBLAS_STATUS_NOT_SUPPORTED if algo configuration or data type combination is not currently supported on
given device
\retval CUBLAS_STATUS_ARCH_MISMATCH if algo configuration cannot be run using the selected device
\retval CUBLAS_STATUS_SUCCESS if check was successful*/
fn cublasLtMatmulAlgoCheck(
lightHandle: cuda_types::cublaslt::cublasLtHandle_t,
operationDesc: cuda_types::cublaslt::cublasLtMatmulDesc_t,
Adesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Bdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Cdesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
Ddesc: cuda_types::cublaslt::cublasLtMatrixLayout_t,
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
result: *mut cuda_types::cublaslt::cublasLtMatmulHeuristicResult_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get algo capability attribute.
E.g. to get list of supported Tile IDs:
cublasLtMatmulTile_t tiles[CUBLASLT_MATMUL_TILE_END];
size_t num_tiles, size_written;
if (cublasLtMatmulAlgoCapGetAttribute(algo, CUBLASLT_ALGO_CAP_TILE_IDS, tiles, sizeof(tiles), size_written) ==
CUBLAS_STATUS_SUCCESS) { num_tiles = size_written / sizeof(tiles[0]);
}
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulAlgoCapGetAttribute(
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoCapAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Set algo configuration attribute.
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[in] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\retval CUBLAS_STATUS_INVALID_VALUE if buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute was set successfully*/
fn cublasLtMatmulAlgoConfigSetAttribute(
algo: *mut cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
buf: *const ::core::ffi::c_void,
sizeInBytes: usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Get algo configuration attribute.
\param[in] algo The algo descriptor
\param[in] attr The attribute
\param[out] buf memory address containing the new value
\param[in] sizeInBytes size of buf buffer for verification (in bytes)
\param[out] sizeWritten only valid when return value is CUBLAS_STATUS_SUCCESS. If sizeInBytes is non-zero: number of
bytes actually written, if sizeInBytes is 0: number of bytes needed to write full contents
\retval CUBLAS_STATUS_INVALID_VALUE if sizeInBytes is 0 and sizeWritten is NULL, or if sizeInBytes is non-zero
and buf is NULL or sizeInBytes doesn't match size of internal storage for
selected attribute
\retval CUBLAS_STATUS_SUCCESS if attribute's value was successfully written to user memory*/
fn cublasLtMatmulAlgoConfigGetAttribute(
algo: *const cuda_types::cublaslt::cublasLtMatmulAlgo_t,
attr: cuda_types::cublaslt::cublasLtMatmulAlgoConfigAttributes_t,
buf: *mut ::core::ffi::c_void,
sizeInBytes: usize,
sizeWritten: *mut usize,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Logger callback setter.
\param[in] callback a user defined callback function to be called by the logger
\retval CUBLAS_STATUS_SUCCESS if callback was set successfully*/
fn cublasLtLoggerSetCallback(
callback: cuda_types::cublaslt::cublasLtLoggerCallback_t,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log file setter.
\param[in] file an open file with write permissions
\retval CUBLAS_STATUS_SUCCESS if log file was set successfully*/
fn cublasLtLoggerSetFile(
file: *mut cuda_types::cublaslt::FILE,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Open log file.
\param[in] logFile log file path. if the log file does not exist, it will be created
\retval CUBLAS_STATUS_SUCCESS if log file was created successfully*/
fn cublasLtLoggerOpenFile(
logFile: *const ::core::ffi::c_char,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log level setter.
\param[in] level log level, should be one of the following:
0. Off
1. Errors
2. Performance Trace
3. Performance Hints
4. Heuristics Trace
5. API Trace
\retval CUBLAS_STATUS_INVALID_VALUE if log level is not one of the above levels
\retval CUBLAS_STATUS_SUCCESS if log level was set successfully*/
fn cublasLtLoggerSetLevel(
level: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Log mask setter.
\param[in] mask log mask, should be a combination of the following masks:
0. Off
1. Errors
2. Performance Trace
4. Performance Hints
8. Heuristics Trace
16. API Trace
\retval CUBLAS_STATUS_SUCCESS if log mask was set successfully*/
fn cublasLtLoggerSetMask(
mask: ::core::ffi::c_int,
) -> cuda_types::cublaslt::cublasStatus_t;
#[must_use]
/** Experimental: Disable logging for the entire session.
\retval CUBLAS_STATUS_SUCCESS if disabled logging*/
fn cublasLtLoggerForceDisable() -> cuda_types::cublaslt::cublasStatus_t;
}

313
cuda_types/src/cublas.rs Normal file
View file

@ -0,0 +1,313 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub const CUBLAS_VER_MAJOR: u32 = 12;
pub const CUBLAS_VER_MINOR: u32 = 4;
pub const CUBLAS_VER_PATCH: u32 = 5;
pub const CUBLAS_VER_BUILD: u32 = 8;
pub const CUBLAS_VERSION: u32 = 120405;
impl cublasStatus_t {
pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_INITIALIZED: cublasStatus_t = cublasStatus_t(1);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ALLOC_FAILED: cublasStatus_t = cublasStatus_t(3);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INVALID_VALUE: cublasStatus_t = cublasStatus_t(7);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_ARCH_MISMATCH: cublasStatus_t = cublasStatus_t(8);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_MAPPING_ERROR: cublasStatus_t = cublasStatus_t(11);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_EXECUTION_FAILED: cublasStatus_t = cublasStatus_t(13);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_INTERNAL_ERROR: cublasStatus_t = cublasStatus_t(14);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_NOT_SUPPORTED: cublasStatus_t = cublasStatus_t(15);
}
impl cublasStatus_t {
pub const CUBLAS_STATUS_LICENSE_ERROR: cublasStatus_t = cublasStatus_t(16);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasStatus_t(pub ::core::ffi::c_uint);
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_LOWER: cublasFillMode_t = cublasFillMode_t(0);
}
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_UPPER: cublasFillMode_t = cublasFillMode_t(1);
}
impl cublasFillMode_t {
pub const CUBLAS_FILL_MODE_FULL: cublasFillMode_t = cublasFillMode_t(2);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasFillMode_t(pub ::core::ffi::c_uint);
impl cublasDiagType_t {
pub const CUBLAS_DIAG_NON_UNIT: cublasDiagType_t = cublasDiagType_t(0);
}
impl cublasDiagType_t {
pub const CUBLAS_DIAG_UNIT: cublasDiagType_t = cublasDiagType_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasDiagType_t(pub ::core::ffi::c_uint);
impl cublasSideMode_t {
pub const CUBLAS_SIDE_LEFT: cublasSideMode_t = cublasSideMode_t(0);
}
impl cublasSideMode_t {
pub const CUBLAS_SIDE_RIGHT: cublasSideMode_t = cublasSideMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasSideMode_t(pub ::core::ffi::c_uint);
impl cublasOperation_t {
pub const CUBLAS_OP_N: cublasOperation_t = cublasOperation_t(0);
}
impl cublasOperation_t {
pub const CUBLAS_OP_T: cublasOperation_t = cublasOperation_t(1);
}
impl cublasOperation_t {
pub const CUBLAS_OP_C: cublasOperation_t = cublasOperation_t(2);
}
impl cublasOperation_t {
pub const CUBLAS_OP_HERMITAN: cublasOperation_t = cublasOperation_t(2);
}
impl cublasOperation_t {
pub const CUBLAS_OP_CONJG: cublasOperation_t = cublasOperation_t(3);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasOperation_t(pub ::core::ffi::c_uint);
impl cublasPointerMode_t {
pub const CUBLAS_POINTER_MODE_HOST: cublasPointerMode_t = cublasPointerMode_t(0);
}
impl cublasPointerMode_t {
pub const CUBLAS_POINTER_MODE_DEVICE: cublasPointerMode_t = cublasPointerMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasPointerMode_t(pub ::core::ffi::c_uint);
impl cublasAtomicsMode_t {
pub const CUBLAS_ATOMICS_NOT_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(0);
}
impl cublasAtomicsMode_t {
pub const CUBLAS_ATOMICS_ALLOWED: cublasAtomicsMode_t = cublasAtomicsMode_t(1);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasAtomicsMode_t(pub ::core::ffi::c_uint);
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DFALT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DEFAULT: cublasGemmAlgo_t = cublasGemmAlgo_t(-1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO0: cublasGemmAlgo_t = cublasGemmAlgo_t(0);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO1: cublasGemmAlgo_t = cublasGemmAlgo_t(1);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO2: cublasGemmAlgo_t = cublasGemmAlgo_t(2);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO3: cublasGemmAlgo_t = cublasGemmAlgo_t(3);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO4: cublasGemmAlgo_t = cublasGemmAlgo_t(4);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO5: cublasGemmAlgo_t = cublasGemmAlgo_t(5);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO6: cublasGemmAlgo_t = cublasGemmAlgo_t(6);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO7: cublasGemmAlgo_t = cublasGemmAlgo_t(7);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO8: cublasGemmAlgo_t = cublasGemmAlgo_t(8);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO9: cublasGemmAlgo_t = cublasGemmAlgo_t(9);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO10: cublasGemmAlgo_t = cublasGemmAlgo_t(10);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO11: cublasGemmAlgo_t = cublasGemmAlgo_t(11);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO12: cublasGemmAlgo_t = cublasGemmAlgo_t(12);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO13: cublasGemmAlgo_t = cublasGemmAlgo_t(13);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO14: cublasGemmAlgo_t = cublasGemmAlgo_t(14);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO15: cublasGemmAlgo_t = cublasGemmAlgo_t(15);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO16: cublasGemmAlgo_t = cublasGemmAlgo_t(16);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO17: cublasGemmAlgo_t = cublasGemmAlgo_t(17);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO18: cublasGemmAlgo_t = cublasGemmAlgo_t(18);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO19: cublasGemmAlgo_t = cublasGemmAlgo_t(19);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO20: cublasGemmAlgo_t = cublasGemmAlgo_t(20);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO21: cublasGemmAlgo_t = cublasGemmAlgo_t(21);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO22: cublasGemmAlgo_t = cublasGemmAlgo_t(22);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO23: cublasGemmAlgo_t = cublasGemmAlgo_t(23);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DEFAULT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_DFALT_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(99);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO0_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(100);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO1_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(101);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO2_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(102);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO3_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(103);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO4_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(104);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO5_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(105);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO6_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(106);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO7_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(107);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO8_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(108);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO9_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(109);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO10_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(110);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO11_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(111);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO12_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(112);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO13_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(113);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO14_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(114);
}
impl cublasGemmAlgo_t {
pub const CUBLAS_GEMM_ALGO15_TENSOR_OP: cublasGemmAlgo_t = cublasGemmAlgo_t(115);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasGemmAlgo_t(pub ::core::ffi::c_int);
impl cublasMath_t {
pub const CUBLAS_DEFAULT_MATH: cublasMath_t = cublasMath_t(0);
}
impl cublasMath_t {
pub const CUBLAS_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(1);
}
impl cublasMath_t {
pub const CUBLAS_PEDANTIC_MATH: cublasMath_t = cublasMath_t(2);
}
impl cublasMath_t {
pub const CUBLAS_TF32_TENSOR_OP_MATH: cublasMath_t = cublasMath_t(3);
}
impl cublasMath_t {
pub const CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION: cublasMath_t = cublasMath_t(
16,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasMath_t(pub ::core::ffi::c_uint);
pub use super::cuda::cudaDataType as cublasDataType_t;
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_16F: cublasComputeType_t = cublasComputeType_t(64);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_16F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(65);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F: cublasComputeType_t = cublasComputeType_t(68);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(69);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_16F: cublasComputeType_t = cublasComputeType_t(74);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_16BF: cublasComputeType_t = cublasComputeType_t(
75,
);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32F_FAST_TF32: cublasComputeType_t = cublasComputeType_t(
77,
);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_64F: cublasComputeType_t = cublasComputeType_t(70);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_64F_PEDANTIC: cublasComputeType_t = cublasComputeType_t(71);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32I: cublasComputeType_t = cublasComputeType_t(72);
}
impl cublasComputeType_t {
pub const CUBLAS_COMPUTE_32I_PEDANTIC: cublasComputeType_t = cublasComputeType_t(73);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cublasComputeType_t(pub ::core::ffi::c_uint);
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cublasContext {
_unused: [u8; 0],
}
pub type cublasHandle_t = *mut cublasContext;
pub type cublasLogCallback = ::core::option::Option<
unsafe extern "C" fn(msg: *const ::core::ffi::c_char),
>;

2260
cuda_types/src/cublaslt.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -7587,6 +7587,100 @@ pub type VdpGetProcAddress = ::core::option::Option<
function_pointer: *mut *mut ::core::ffi::c_void,
) -> VdpStatus,
>;
impl cudaDataType_t {
pub const CUDA_R_16F: cudaDataType_t = cudaDataType_t(2);
}
impl cudaDataType_t {
pub const CUDA_C_16F: cudaDataType_t = cudaDataType_t(6);
}
impl cudaDataType_t {
pub const CUDA_R_16BF: cudaDataType_t = cudaDataType_t(14);
}
impl cudaDataType_t {
pub const CUDA_C_16BF: cudaDataType_t = cudaDataType_t(15);
}
impl cudaDataType_t {
pub const CUDA_R_32F: cudaDataType_t = cudaDataType_t(0);
}
impl cudaDataType_t {
pub const CUDA_C_32F: cudaDataType_t = cudaDataType_t(4);
}
impl cudaDataType_t {
pub const CUDA_R_64F: cudaDataType_t = cudaDataType_t(1);
}
impl cudaDataType_t {
pub const CUDA_C_64F: cudaDataType_t = cudaDataType_t(5);
}
impl cudaDataType_t {
pub const CUDA_R_4I: cudaDataType_t = cudaDataType_t(16);
}
impl cudaDataType_t {
pub const CUDA_C_4I: cudaDataType_t = cudaDataType_t(17);
}
impl cudaDataType_t {
pub const CUDA_R_4U: cudaDataType_t = cudaDataType_t(18);
}
impl cudaDataType_t {
pub const CUDA_C_4U: cudaDataType_t = cudaDataType_t(19);
}
impl cudaDataType_t {
pub const CUDA_R_8I: cudaDataType_t = cudaDataType_t(3);
}
impl cudaDataType_t {
pub const CUDA_C_8I: cudaDataType_t = cudaDataType_t(7);
}
impl cudaDataType_t {
pub const CUDA_R_8U: cudaDataType_t = cudaDataType_t(8);
}
impl cudaDataType_t {
pub const CUDA_C_8U: cudaDataType_t = cudaDataType_t(9);
}
impl cudaDataType_t {
pub const CUDA_R_16I: cudaDataType_t = cudaDataType_t(20);
}
impl cudaDataType_t {
pub const CUDA_C_16I: cudaDataType_t = cudaDataType_t(21);
}
impl cudaDataType_t {
pub const CUDA_R_16U: cudaDataType_t = cudaDataType_t(22);
}
impl cudaDataType_t {
pub const CUDA_C_16U: cudaDataType_t = cudaDataType_t(23);
}
impl cudaDataType_t {
pub const CUDA_R_32I: cudaDataType_t = cudaDataType_t(10);
}
impl cudaDataType_t {
pub const CUDA_C_32I: cudaDataType_t = cudaDataType_t(11);
}
impl cudaDataType_t {
pub const CUDA_R_32U: cudaDataType_t = cudaDataType_t(12);
}
impl cudaDataType_t {
pub const CUDA_C_32U: cudaDataType_t = cudaDataType_t(13);
}
impl cudaDataType_t {
pub const CUDA_R_64I: cudaDataType_t = cudaDataType_t(24);
}
impl cudaDataType_t {
pub const CUDA_C_64I: cudaDataType_t = cudaDataType_t(25);
}
impl cudaDataType_t {
pub const CUDA_R_64U: cudaDataType_t = cudaDataType_t(26);
}
impl cudaDataType_t {
pub const CUDA_C_64U: cudaDataType_t = cudaDataType_t(27);
}
impl cudaDataType_t {
pub const CUDA_R_8F_E4M3: cudaDataType_t = cudaDataType_t(28);
}
impl cudaDataType_t {
pub const CUDA_R_8F_E5M2: cudaDataType_t = cudaDataType_t(29);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudaDataType_t(pub ::core::ffi::c_uint);
pub use self::cudaDataType_t as cudaDataType;
impl CUerror {
pub const INVALID_VALUE: CUerror = CUerror(unsafe {
::core::num::NonZeroU32::new_unchecked(1)

1478
cuda_types/src/cudnn.rs Normal file

File diff suppressed because it is too large Load diff

565
cuda_types/src/cudnn8.rs Normal file
View file

@ -0,0 +1,565 @@
// Generated automatically by zluda_bindgen
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub const CUDNN_MAJOR: u32 = 8;
pub const CUDNN_MINOR: u32 = 9;
pub const CUDNN_PATCHLEVEL: u32 = 7;
pub const CUDNN_VERSION: u32 = 8907;
pub const CUDNN_MAX_SM_MAJOR_NUMBER: u32 = 9;
pub const CUDNN_MAX_SM_MINOR_NUMBER: u32 = 0;
pub const CUDNN_MAX_DEVICE_VERSION: u32 = 900;
pub const CUDNN_SM_50: u32 = 500;
pub const CUDNN_SM_52: u32 = 520;
pub const CUDNN_SM_53: u32 = 530;
pub const CUDNN_SM_60: u32 = 600;
pub const CUDNN_SM_61: u32 = 610;
pub const CUDNN_SM_62: u32 = 620;
pub const CUDNN_SM_70: u32 = 700;
pub const CUDNN_SM_72: u32 = 720;
pub const CUDNN_SM_75: u32 = 750;
pub const CUDNN_SM_80: u32 = 800;
pub const CUDNN_SM_86: u32 = 860;
pub const CUDNN_SM_87: u32 = 870;
pub const CUDNN_SM_89: u32 = 890;
pub const CUDNN_SM_90: u32 = 900;
pub const CUDNN_SM_9X_END: u32 = 999;
pub const CUDNN_MIN_DEVICE_VERSION: u32 = 500;
pub const CUDNN_OPS_INFER_MAJOR: u32 = 8;
pub const CUDNN_OPS_INFER_MINOR: u32 = 9;
pub const CUDNN_OPS_INFER_PATCH: u32 = 7;
pub const CUDNN_DIM_MAX: u32 = 8;
pub const CUDNN_LRN_MIN_N: u32 = 1;
pub const CUDNN_LRN_MAX_N: u32 = 16;
pub const CUDNN_LRN_MIN_K: f64 = 0.00001;
pub const CUDNN_LRN_MIN_BETA: f64 = 0.01;
pub const CUDNN_BN_MIN_EPSILON: f64 = 0.0;
pub const CUDNN_OPS_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_OPS_TRAIN_MINOR: u32 = 9;
pub const CUDNN_OPS_TRAIN_PATCH: u32 = 7;
pub const CUDNN_ADV_INFER_MAJOR: u32 = 8;
pub const CUDNN_ADV_INFER_MINOR: u32 = 9;
pub const CUDNN_ADV_INFER_PATCH: u32 = 7;
pub const CUDNN_RNN_PADDED_IO_DISABLED: u32 = 0;
pub const CUDNN_RNN_PADDED_IO_ENABLED: u32 = 1;
pub const CUDNN_SEQDATA_DIM_COUNT: u32 = 4;
pub const CUDNN_ATTN_QUERYMAP_ALL_TO_ONE: u32 = 0;
pub const CUDNN_ATTN_QUERYMAP_ONE_TO_ONE: u32 = 1;
pub const CUDNN_ATTN_DISABLE_PROJ_BIASES: u32 = 0;
pub const CUDNN_ATTN_ENABLE_PROJ_BIASES: u32 = 2;
pub const CUDNN_ATTN_WKIND_COUNT: u32 = 8;
pub const CUDNN_ADV_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_ADV_TRAIN_MINOR: u32 = 9;
pub const CUDNN_ADV_TRAIN_PATCH: u32 = 7;
pub const CUDNN_CNN_INFER_MAJOR: u32 = 8;
pub const CUDNN_CNN_INFER_MINOR: u32 = 9;
pub const CUDNN_CNN_INFER_PATCH: u32 = 7;
pub const CUDNN_CNN_TRAIN_MAJOR: u32 = 8;
pub const CUDNN_CNN_TRAIN_MINOR: u32 = 9;
pub const CUDNN_CNN_TRAIN_PATCH: u32 = 7;
pub use super::cudnn::cudnnContext;
pub type cudnnHandle_t = *mut cudnnContext;
impl cudnnStatus_t {
pub const CUDNN_STATUS_SUCCESS: cudnnStatus_t = cudnnStatus_t(0);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_NOT_INITIALIZED: cudnnStatus_t = cudnnStatus_t(1);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_ALLOC_FAILED: cudnnStatus_t = cudnnStatus_t(2);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_BAD_PARAM: cudnnStatus_t = cudnnStatus_t(3);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_INTERNAL_ERROR: cudnnStatus_t = cudnnStatus_t(4);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_INVALID_VALUE: cudnnStatus_t = cudnnStatus_t(5);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_ARCH_MISMATCH: cudnnStatus_t = cudnnStatus_t(6);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_MAPPING_ERROR: cudnnStatus_t = cudnnStatus_t(7);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_EXECUTION_FAILED: cudnnStatus_t = cudnnStatus_t(8);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_NOT_SUPPORTED: cudnnStatus_t = cudnnStatus_t(9);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_LICENSE_ERROR: cudnnStatus_t = cudnnStatus_t(10);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING: cudnnStatus_t = cudnnStatus_t(
11,
);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_IN_PROGRESS: cudnnStatus_t = cudnnStatus_t(12);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_RUNTIME_FP_OVERFLOW: cudnnStatus_t = cudnnStatus_t(13);
}
impl cudnnStatus_t {
pub const CUDNN_STATUS_VERSION_MISMATCH: cudnnStatus_t = cudnnStatus_t(14);
}
#[repr(transparent)]
#[must_use]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnStatus_t(pub ::core::ffi::c_uint);
pub use super::cudnn::cudnnRuntimeTag_t;
pub use super::cudnn::cudnnErrQueryMode_t;
pub use super::cudnn::cudnnTensorStruct;
pub type cudnnTensorDescriptor_t = *mut cudnnTensorStruct;
pub use super::cudnn::cudnnPoolingStruct;
pub type cudnnPoolingDescriptor_t = *mut cudnnPoolingStruct;
pub use super::cudnn::cudnnFilterStruct;
pub type cudnnFilterDescriptor_t = *mut cudnnFilterStruct;
pub use super::cudnn::cudnnLRNStruct;
pub type cudnnLRNDescriptor_t = *mut cudnnLRNStruct;
pub use super::cudnn::cudnnActivationStruct;
pub type cudnnActivationDescriptor_t = *mut cudnnActivationStruct;
pub use super::cudnn::cudnnSpatialTransformerStruct;
pub type cudnnSpatialTransformerDescriptor_t = *mut cudnnSpatialTransformerStruct;
pub use super::cudnn::cudnnOpTensorStruct;
pub type cudnnOpTensorDescriptor_t = *mut cudnnOpTensorStruct;
pub use super::cudnn::cudnnReduceTensorStruct;
pub type cudnnReduceTensorDescriptor_t = *mut cudnnReduceTensorStruct;
pub use super::cudnn::cudnnCTCLossStruct;
pub type cudnnCTCLossDescriptor_t = *mut cudnnCTCLossStruct;
pub use super::cudnn::cudnnTensorTransformStruct;
pub type cudnnTensorTransformDescriptor_t = *mut cudnnTensorTransformStruct;
pub use super::cudnn9::cudnnDataType_t;
pub use super::cudnn::cudnnMathType_t;
pub use super::cudnn::cudnnNanPropagation_t;
pub use super::cudnn::cudnnDeterminism_t;
pub use super::cudnn::cudnnTensorFormat_t;
pub use super::cudnn::cudnnFoldingDirection_t;
pub use super::cudnn::cudnnOpTensorOp_t;
pub use super::cudnn::cudnnReduceTensorOp_t;
pub use super::cudnn::cudnnReduceTensorIndices_t;
pub use super::cudnn::cudnnIndicesType_t;
pub use super::cudnn::cudnnSoftmaxAlgorithm_t;
pub use super::cudnn::cudnnSoftmaxMode_t;
pub use super::cudnn::cudnnPoolingMode_t;
pub use super::cudnn::cudnnActivationMode_t;
pub use super::cudnn::cudnnLRNMode_t;
pub use super::cudnn::cudnnDivNormMode_t;
pub use super::cudnn::cudnnBatchNormMode_t;
pub use super::cudnn::cudnnBatchNormOps_t;
pub use super::cudnn::cudnnNormMode_t;
pub use super::cudnn::cudnnNormAlgo_t;
pub use super::cudnn::cudnnNormOps_t;
pub use super::cudnn::cudnnSamplerType_t;
pub use super::cudnn::cudnnDropoutStruct;
pub type cudnnDropoutDescriptor_t = *mut cudnnDropoutStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnAlgorithmStruct {
_unused: [u8; 0],
}
pub type cudnnAlgorithmDescriptor_t = *mut cudnnAlgorithmStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnAlgorithmPerformanceStruct {
_unused: [u8; 0],
}
pub type cudnnAlgorithmPerformance_t = *mut cudnnAlgorithmPerformanceStruct;
pub use super::cudnn::cudnnConvolutionFwdAlgo_t;
pub use super::cudnn::cudnnConvolutionBwdFilterAlgo_t;
pub use super::cudnn::cudnnConvolutionBwdDataAlgo_t;
pub use super::cudnn::cudnnRNNAlgo_t;
pub use super::cudnn::cudnnCTCLossAlgo_t;
#[repr(C)]
#[derive(Copy, Clone)]
pub struct cudnnAlgorithmUnionStruct {
pub algo: cudnnAlgorithmUnionStruct_Algorithm,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub union cudnnAlgorithmUnionStruct_Algorithm {
pub convFwdAlgo: cudnnConvolutionFwdAlgo_t,
pub convBwdFilterAlgo: cudnnConvolutionBwdFilterAlgo_t,
pub convBwdDataAlgo: cudnnConvolutionBwdDataAlgo_t,
pub RNNAlgo: cudnnRNNAlgo_t,
pub CTCLossAlgo: cudnnCTCLossAlgo_t,
}
pub type cudnnAlgorithm_t = cudnnAlgorithmUnionStruct;
pub use super::cudnn::cudnnSeverity_t;
#[repr(C)]
pub struct cudnnDebugStruct {
pub cudnn_version: ::core::ffi::c_uint,
pub cudnnStatus: cudnnStatus_t,
pub time_sec: ::core::ffi::c_uint,
pub time_usec: ::core::ffi::c_uint,
pub time_delta: ::core::ffi::c_uint,
pub handle: cudnnHandle_t,
pub stream: super::cuda::CUstream,
pub pid: ::core::ffi::c_ulonglong,
pub tid: ::core::ffi::c_ulonglong,
pub cudaDeviceId: ::core::ffi::c_int,
pub reserved: [::core::ffi::c_int; 15usize],
}
pub type cudnnDebug_t = cudnnDebugStruct;
pub type cudnnCallback_t = ::core::option::Option<
unsafe extern "C" fn(
sev: cudnnSeverity_t,
udata: *mut ::core::ffi::c_void,
dbg: *const cudnnDebug_t,
msg: *const ::core::ffi::c_char,
),
>;
pub use super::cudnn::cudnnForwardMode_t;
pub use super::cudnn::cudnnRNNMode_t;
pub use super::cudnn::cudnnRNNBiasMode_t;
pub use super::cudnn::cudnnDirectionMode_t;
pub use super::cudnn::cudnnRNNInputMode_t;
pub use super::cudnn::cudnnRNNClipMode_t;
pub use super::cudnn::cudnnRNNDataLayout_t;
pub type cudnnRNNPaddingMode_t = ::core::ffi::c_uint;
pub use super::cudnn::cudnnRNNStruct;
pub type cudnnRNNDescriptor_t = *mut cudnnRNNStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct cudnnPersistentRNNPlan {
_unused: [u8; 0],
}
pub type cudnnPersistentRNNPlan_t = *mut cudnnPersistentRNNPlan;
pub use super::cudnn::cudnnRNNDataStruct;
pub type cudnnRNNDataDescriptor_t = *mut cudnnRNNDataStruct;
pub use super::cudnn::cudnnSeqDataAxis_t;
pub use super::cudnn::cudnnSeqDataStruct;
pub type cudnnSeqDataDescriptor_t = *mut cudnnSeqDataStruct;
pub type cudnnAttnQueryMap_t = ::core::ffi::c_uint;
pub use super::cudnn::cudnnAttnStruct;
pub type cudnnAttnDescriptor_t = *mut cudnnAttnStruct;
pub use super::cudnn::cudnnMultiHeadAttnWeightKind_t;
pub use super::cudnn::cudnnWgradMode_t;
pub use super::cudnn::cudnnLossNormalizationMode_t;
pub use super::cudnn::cudnnConvolutionStruct;
pub type cudnnConvolutionDescriptor_t = *mut cudnnConvolutionStruct;
pub use super::cudnn::cudnnConvolutionMode_t;
pub use super::cudnn::cudnnReorderType_t;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionFwdAlgoPerfStruct {
pub algo: cudnnConvolutionFwdAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionFwdAlgoPerf_t = cudnnConvolutionFwdAlgoPerfStruct;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionBwdDataAlgoPerfStruct {
pub algo: cudnnConvolutionBwdDataAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionBwdDataAlgoPerf_t = cudnnConvolutionBwdDataAlgoPerfStruct;
pub use super::cudnn::cudnnFusedOpsConstParamStruct;
pub type cudnnFusedOpsConstParamPack_t = *mut cudnnFusedOpsConstParamStruct;
pub use super::cudnn::cudnnFusedOpsVariantParamStruct;
pub type cudnnFusedOpsVariantParamPack_t = *mut cudnnFusedOpsVariantParamStruct;
pub use super::cudnn::cudnnFusedOpsPlanStruct;
pub type cudnnFusedOpsPlan_t = *mut cudnnFusedOpsPlanStruct;
pub use super::cudnn::cudnnFusedOps_t;
pub use super::cudnn::cudnnFusedOpsConstParamLabel_t;
pub use super::cudnn::cudnnFusedOpsPointerPlaceHolder_t;
pub use super::cudnn::cudnnFusedOpsVariantParamLabel_t;
#[repr(C)]
#[derive(Debug, Copy, Clone, PartialEq)]
pub struct cudnnConvolutionBwdFilterAlgoPerfStruct {
pub algo: cudnnConvolutionBwdFilterAlgo_t,
pub status: cudnnStatus_t,
pub time: f32,
pub memory: usize,
pub determinism: cudnnDeterminism_t,
pub mathType: cudnnMathType_t,
pub reserved: [::core::ffi::c_int; 3usize],
}
pub type cudnnConvolutionBwdFilterAlgoPerf_t = cudnnConvolutionBwdFilterAlgoPerfStruct;
pub type cudnnBackendDescriptor_t = *mut ::core::ffi::c_void;
pub use super::cudnn::cudnnFractionStruct;
pub type cudnnFraction_t = cudnnFractionStruct;
pub use super::cudnn9::cudnnPointwiseMode_t;
pub use super::cudnn::cudnnResampleMode_t;
pub use super::cudnn::cudnnSignalMode_t;
pub use super::cudnn::cudnnGenStatsMode_t;
pub use super::cudnn::cudnnBnFinalizeStatsMode_t;
pub use super::cudnn::cudnnRngDistribution_t;
pub use super::cudnn9::cudnnBackendAttributeName_t;
pub use super::cudnn::cudnnBackendAttributeType_t;
pub use super::cudnn9::cudnnBackendDescriptorType_t;
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_TENSOR_CORE: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
0,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_DOWN_CONVERT_INPUTS: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
1,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_REDUCED_PRECISION_REDUCTION: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
2,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_FFT: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
3,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_NONDETERMINISTIC: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
4,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
5,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_4x4: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
6,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_6x6: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
7,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_WINOGRAD_TILE_13x13: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
8,
);
}
impl cudnnBackendNumericalNote_t {
pub const CUDNN_NUMERICAL_NOTE_TYPE_COUNT: cudnnBackendNumericalNote_t = cudnnBackendNumericalNote_t(
9,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendNumericalNote_t(pub ::core::ffi::c_uint);
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_RUNTIME_COMPILATION: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
0,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_REQUIRES_FILTER_INT8x32_REORDER: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
1,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_REQUIRES_BIAS_INT8x32_REORDER: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
2,
);
}
impl cudnnBackendBehaviorNote_t {
pub const CUDNN_BEHAVIOR_NOTE_TYPE_COUNT: cudnnBackendBehaviorNote_t = cudnnBackendBehaviorNote_t(
3,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendBehaviorNote_t(pub ::core::ffi::c_uint);
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
0,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SWIZZLE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
1,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
2,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_USE_TEX: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
3,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_EDGE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(4);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_KBLOCK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(5);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGA: cudnnBackendKnobType_t = cudnnBackendKnobType_t(6);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGB: cudnnBackendKnobType_t = cudnnBackendKnobType_t(7);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_CHUNK_K: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
8,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_H: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
9,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_WINO_TILE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
10,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_MULTIPLY: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
11,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K_BUF: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
12,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILEK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(13);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_STAGES: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
14,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_REDUCTION_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
15,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_CTA_SPLIT_K_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
16,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_K_SLC: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
17,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_IDX_MODE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
18,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SLICED: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
19,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_RS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
20,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SINGLEBUFFER: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
21,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LDGC: cudnnBackendKnobType_t = cudnnBackendKnobType_t(22);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPECFILT: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
23,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_KERNEL_CFG: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
24,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_WORKSPACE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
25,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
26,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA_M: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
27,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_CGA_N: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
28,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_BLOCK_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
29,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_OCCUPANCY: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
30,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_ARRAY_SIZE_PER_THREAD: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
31,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_NUM_C_PER_BLOCK: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
32,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_SPLIT_COLS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
33,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_ROWS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
34,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_TILE_COLS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
35,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_LOAD_SIZE: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
36,
);
}
impl cudnnBackendKnobType_t {
pub const CUDNN_KNOB_TYPE_COUNTS: cudnnBackendKnobType_t = cudnnBackendKnobType_t(
37,
);
}
#[repr(transparent)]
#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
pub struct cudnnBackendKnobType_t(pub ::core::ffi::c_uint);
pub use super::cudnn::cudnnBackendLayoutType_t;
pub use super::cudnn::cudnnBackendHeurMode_t;
pub use super::cudnn9::cudnnBackendTensorReordering_t;
pub use super::cudnn::cudnnPaddingMode_t;
pub use super::cudnn9::cudnnBackendNormMode_t;
pub use super::cudnn::cudnnBackendNormFwdPhase_t;

2393
cuda_types/src/cudnn9.rs Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,4 +1,7 @@
pub mod cuda;
pub mod nvml;
pub mod cublas;
pub mod cublaslt;
pub mod cuda;
pub mod cudnn;
pub mod cudnn8;
pub mod cudnn9;
pub mod nvml;