Update do CUDA 12.8 and ROCm 6.4

This commit is contained in:
Andrzej Janik 2025-04-20 12:52:12 +00:00
parent 686416d7af
commit 6a511ffb8a
12 changed files with 11192 additions and 1636 deletions

View file

@ -1,4 +1,4 @@
FROM nvidia/cuda:12.4.1-base-ubuntu22.04
FROM nvidia/cuda:12.8.1-base-ubuntu24.04
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
wget \
@ -18,10 +18,14 @@ RUN wget https://apt.llvm.org/llvm.sh && \
./llvm.sh ${LLVM_VERSION}
# Feel free to change to a newer version if you have a newer verison on your host
ARG CUDA_PKG_VERSION=12-4
ARG CUDA_PKG_VERSION=12-8
# Docker <-> host driver version compatiblity is newer host <-> older docker
# We don't care about a specific driver version, so pick oldest 5XX
ARG CUDA_DRIVER=515
# We don't care about a specific driver version, so pick oldest 5XX compatible
ARG CUDA_DRIVER=570
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
dpkg -i libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb && \
rm libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb libcudnn8-dev_8.9.7.29-1+cuda12.2_amd64.deb
RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
# CUDA headers need it for interop
libgl-dev libegl-dev libvdpau-dev \
@ -30,13 +34,17 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get update -y && DEBIAN_FRONTEND=noninter
cuda-nvml-dev-${CUDA_PKG_VERSION} \
cuda-cudart-${CUDA_PKG_VERSION} \
cuda-profiler-api-${CUDA_PKG_VERSION} \
cuda-nvcc-${CUDA_PKG_VERSION}
cuda-nvcc-${CUDA_PKG_VERSION} \
libcudnn8-dev \
cudnn9-cuda-${CUDA_PKG_VERSION} \
libcufft-dev-${CUDA_PKG_VERSION} \
libcublas-dev-${CUDA_PKG_VERSION}
ARG ROCM_VERSION=6.3.1
ARG ROCM_VERSION=6.4
RUN mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} jammy main" > /etc/apt/sources.list.d/rocm.list && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} noble main" > /etc/apt/sources.list.d/rocm.list && \
echo 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' > /etc/apt/preferences.d/rocm-pin-600 && \
DEBIAN_FRONTEND=noninteractive apt update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
rocminfo \

View file

@ -7,7 +7,7 @@
},
"securityOpt": [ "seccomp=unconfined" ],
"runArgs": [
"--runtime=nvidia",
//"--runtime=nvidia",
"--device=/dev/kfd",
"--device=/dev/dri",
"--group-add=video"
@ -25,7 +25,7 @@
},
// https://aka.ms/dev-containers-non-root.
"remoteUser": "root",
//"hostRequirements": { "gpu": "optional" }
"hostRequirements": { "gpu": true },
"customizations": {
"vscode": {
"extensions": [ "mhutchie.git-graph" ]

View file

@ -6068,6 +6068,52 @@ extern "system" {
group_size: *const cuda_types::cublas::i64,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGemmGroupedBatchedEx(
handle: cuda_types::cublas::cublasHandle_t,
transa_array: *const cuda_types::cublas::cublasOperation_t,
transb_array: *const cuda_types::cublas::cublasOperation_t,
m_array: *const ::core::ffi::c_int,
n_array: *const ::core::ffi::c_int,
k_array: *const ::core::ffi::c_int,
alpha_array: *const ::core::ffi::c_void,
Aarray: *const *const ::core::ffi::c_void,
Atype: cuda_types::cublas::cudaDataType_t,
lda_array: *const ::core::ffi::c_int,
Barray: *const *const ::core::ffi::c_void,
Btype: cuda_types::cublas::cudaDataType_t,
ldb_array: *const ::core::ffi::c_int,
beta_array: *const ::core::ffi::c_void,
Carray: *const *mut ::core::ffi::c_void,
Ctype: cuda_types::cublas::cudaDataType_t,
ldc_array: *const ::core::ffi::c_int,
group_count: ::core::ffi::c_int,
group_size: *const ::core::ffi::c_int,
computeType: cuda_types::cublas::cublasComputeType_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasGemmGroupedBatchedEx_64(
handle: cuda_types::cublas::cublasHandle_t,
transa_array: *const cuda_types::cublas::cublasOperation_t,
transb_array: *const cuda_types::cublas::cublasOperation_t,
m_array: *const cuda_types::cublas::i64,
n_array: *const cuda_types::cublas::i64,
k_array: *const cuda_types::cublas::i64,
alpha_array: *const ::core::ffi::c_void,
Aarray: *const *const ::core::ffi::c_void,
Atype: cuda_types::cublas::cudaDataType_t,
lda_array: *const cuda_types::cublas::i64,
Barray: *const *const ::core::ffi::c_void,
Btype: cuda_types::cublas::cudaDataType_t,
ldb_array: *const cuda_types::cublas::i64,
beta_array: *const ::core::ffi::c_void,
Carray: *const *mut ::core::ffi::c_void,
Ctype: cuda_types::cublas::cudaDataType_t,
ldc_array: *const cuda_types::cublas::i64,
group_count: cuda_types::cublas::i64,
group_size: *const cuda_types::cublas::i64,
computeType: cuda_types::cublas::cublasComputeType_t,
) -> cuda_types::cublas::cublasStatus_t;
#[must_use]
fn cublasSgeam(
handle: cuda_types::cublas::cublasHandle_t,
transa: cuda_types::cublas::cublasOperation_t,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,10 +2,10 @@
// DO NOT EDIT MANUALLY
#![allow(warnings)]
pub const CUBLAS_VER_MAJOR: u32 = 12;
pub const CUBLAS_VER_MINOR: u32 = 4;
pub const CUBLAS_VER_PATCH: u32 = 5;
pub const CUBLAS_VER_BUILD: u32 = 8;
pub const CUBLAS_VERSION: u32 = 120405;
pub const CUBLAS_VER_MINOR: u32 = 8;
pub const CUBLAS_VER_PATCH: u32 = 4;
pub const CUBLAS_VER_BUILD: u32 = 1;
pub const CUBLAS_VERSION: u32 = 120804;
impl cublasStatus_t {
pub const CUBLAS_STATUS_SUCCESS: cublasStatus_t = cublasStatus_t(0);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -5,9 +5,9 @@ use std::{
borrow::Cow, collections::hash_map, fs::File, io::Write, iter, path::PathBuf, str::FromStr,
};
use syn::{
parse, parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed,
FnArg, ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr,
Path, PathArguments, PathSegment, Signature, Type, TypePath, UseTree,
parse_quote, punctuated::Punctuated, visit_mut::VisitMut, Abi, Fields, FieldsUnnamed, FnArg,
ForeignItem, ForeignItemFn, Ident, Item, ItemConst, ItemForeignMod, ItemUse, LitStr, Path,
PathArguments, PathSegment, Signature, Type, TypePath, UseTree,
};
fn main() {
@ -36,13 +36,13 @@ fn generate_cudnn(crate_root: &PathBuf) {
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cudnn9).unwrap();
generate_functions(
&crate_root,
"cudnn9",
&["..", "cuda_base", "src", "cudnn9.rs"],
&module,
);
let cudnn9_types = generate_types_library_impl(&module);
//generate_functions(
// &crate_root,
// "cudnn9",
// &["..", "cuda_base", "src", "cudnn9.rs"],
// &module,
//);
let mut current_dir = PathBuf::from(file!());
current_dir.pop();
let cudnn8 = new_builder()
@ -60,13 +60,13 @@ fn generate_cudnn(crate_root: &PathBuf) {
.unwrap()
.to_string();
let module: syn::File = syn::parse_str(&cudnn8).unwrap();
generate_functions(
&crate_root,
"cudnn8",
&["..", "cuda_base", "src", "cudnn8.rs"],
&module,
);
let cudnn8_types = generate_types_library_impl(&module);
//generate_functions(
// &crate_root,
// "cudnn8",
// &["..", "cuda_base", "src", "cudnn8.rs"],
// &module,
//);
merge_types(
&crate_root,
&["..", "cuda_types", "src", "cudnn.rs"],
@ -82,10 +82,10 @@ fn generate_cudnn(crate_root: &PathBuf) {
// - cudnn9-specific
// - cudnn shared
// With the rules being:
// - constants go to the specific files
// - if there's conflict between types they go to specific files
// - constants go to the version-specific files
// - if there's conflict between types they go to version-specific files
// - if the cudnn9 type is purely additive over cudnn8 then it goes into the
// shared (and is re-exported)
// shared (and is re-exported by both)
fn merge_types(
output: &PathBuf,
cudnn_path: &[&str],
@ -96,6 +96,134 @@ fn merge_types(
) {
let cudnn_enums = merge_enums(&cudnn9_types, &cudnn8_types);
let conflicting_types = get_conflicting_structs(&cudnn9_types, &cudnn8_types, cudnn_enums);
write_common_cudnn_types(output, cudnn_path, &cudnn9_types, &conflicting_types);
write_cudnn8_types(output, cudnn8_path, &cudnn8_types, &conflicting_types);
write_cudnn9_types(output, cudnn9_path, &cudnn9_types, &conflicting_types);
}
fn write_cudnn9_types(
output: &PathBuf,
cudnn9_path: &[&str],
cudnn9_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let items = cudnn9_types.items.iter().filter_map(|item| match item {
Item::Impl(impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Option::<syn::Item>::Some(parse_quote!( #impl_))
}
Some(CudnnEnumMergeResult::Same) => None,
},
Item::Struct(struct_) => match conflicting_types.get(&struct_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #struct_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::Enum(enum_) => match conflicting_types.get(&enum_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #enum_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::ForeignMod(ItemForeignMod { .. }) => None,
Item::Const(const_) => Some(parse_quote!(#const_)),
Item::Union(union_) => match conflicting_types.get(&union_.ident) {
Some(CudnnEnumMergeResult::Conflict) | Some(CudnnEnumMergeResult::Cudnn9) | None => {
Some(parse_quote!( #union_))
}
Some(CudnnEnumMergeResult::Same) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
},
Item::Use(use_) => Some(parse_quote!(#use_)),
Item::Type(type_) => Some(parse_quote!(#type_)),
_ => unimplemented!(),
});
let module: syn::File = parse_quote! {
#(#items)*
};
let mut output = output.clone();
output.extend(cudnn9_path);
let text = prettyplease::unparse(&module).replace("cudaStream_t", "super::cuda::CUstream");
write_rust_to_file(output, &text)
}
fn write_cudnn8_types(
output: &PathBuf,
cudnn8_path: &[&str],
cudnn8_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let items = cudnn8_types.items.iter().filter_map(|item| match item {
Item::Impl(impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) | None => {
Option::<syn::Item>::Some(parse_quote!( #impl_))
}
Some(CudnnEnumMergeResult::Same) => None,
Some(CudnnEnumMergeResult::Cudnn9) => None,
},
Item::Struct(struct_) => match conflicting_types.get(&struct_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #struct_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &struct_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::Enum(enum_) => match conflicting_types.get(&enum_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #enum_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &enum_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::ForeignMod(ItemForeignMod { .. }) => None,
Item::Const(const_) => Some(parse_quote!(#const_)),
Item::Union(union_) => match conflicting_types.get(&union_.ident) {
Some(CudnnEnumMergeResult::Conflict) | None => Some(parse_quote!( #union_)),
Some(CudnnEnumMergeResult::Same) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn:: #type_; ))
}
Some(CudnnEnumMergeResult::Cudnn9) => {
let type_ = &union_.ident;
Some(parse_quote!( pub use super::cudnn9:: #type_; ))
}
},
Item::Use(use_) => Some(parse_quote!(#use_)),
Item::Type(type_) => Some(parse_quote!(#type_)),
_ => unimplemented!(),
});
let module: syn::File = parse_quote! {
#(#items)*
};
let mut output = output.clone();
output.extend(cudnn8_path);
let text = prettyplease::unparse(&module).replace("cudaStream_t", "super::cuda::CUstream");
write_rust_to_file(output, &text)
}
fn write_common_cudnn_types(
output: &PathBuf,
cudnn_path: &[&str],
cudnn9_types: &syn::File,
conflicting_types: &FxHashMap<&Ident, CudnnEnumMergeResult>,
) {
let common_items = cudnn9_types.items.iter().filter_map(|item| match item {
Item::Impl(ref impl_) => match conflicting_types.get(type_to_ident(&*impl_.self_ty)) {
Some(CudnnEnumMergeResult::Conflict) => None,
@ -134,16 +262,13 @@ fn merge_types(
_ => None,
//_ => unimplemented!(),
});
let file: syn::File = parse_quote! {
let cudnn_common: syn::File = parse_quote! {
#(#common_items)*
};
{
let mut output = output.clone();
output.extend(cudnn_path);
let text = prettyplease::unparse(&file);
write_rust_to_file(output, &text)
}
let mut output = output.clone();
output.extend(cudnn_path);
let text = prettyplease::unparse(&cudnn_common);
write_rust_to_file(output, &text)
}
fn get_conflicting_structs<'a>(

File diff suppressed because it is too large Load diff