From 986fa49097ef31fcd5eedcc05a624eb57d582ba4 Mon Sep 17 00:00:00 2001 From: Andrzej Janik Date: Mon, 13 Sep 2021 23:43:50 +0000 Subject: [PATCH] Zero out buffer on creation --- ptx/src/test/spirv_run/mod.rs | 1 + zluda/src/cuda.rs | 18 +++++++++++++++++- zluda/src/impl/module.rs | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/ptx/src/test/spirv_run/mod.rs b/ptx/src/test/spirv_run/mod.rs index 0330d3f..51f1930 100644 --- a/ptx/src/test/spirv_run/mod.rs +++ b/ptx/src/test/spirv_run/mod.rs @@ -637,6 +637,7 @@ fn compile_amd( let mut compiler_cmd = Command::new(&clang_exe); compiler_cmd .arg(format!("-mcpu={}", device_name)) + .arg("-ffp-contract=off") .arg("-nogpulib") .arg("-mno-wavefrontsize64") .arg("-O3") diff --git a/zluda/src/cuda.rs b/zluda/src/cuda.rs index 9be70b5..e66ee75 100644 --- a/zluda/src/cuda.rs +++ b/zluda/src/cuda.rs @@ -2588,7 +2588,23 @@ pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> C #[cfg_attr(not(test), no_mangle)] pub extern "system" fn cuMemAlloc_v2(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult { - unsafe { hipMalloc(dptr as _, bytesize).into() } + let mut dev_ptr = std::ptr::null_mut(); + let err = unsafe { hipMalloc(&mut dev_ptr, bytesize) }.into(); + if err != CUresult::CUDA_SUCCESS { + return err; + } + // HACK ALERT: GeekBench is buggy and sometimes assumes that buffers are zeroed-out on creation + let err = unsafe { hipMemsetD8(dev_ptr, 0, bytesize) }.into(); + /* + let bytesize_rounded_down = bytesize & !3usize; + let bytes = usize::min(bytesize_rounded_down, 4096); + let err = unsafe { hipMemsetD32(dev_ptr, 0, bytes / 1024).into() }; + */ + if err != CUresult::CUDA_SUCCESS { + return err; + } + unsafe { *dptr = CUdeviceptr(dev_ptr as usize) }; + CUresult::CUDA_SUCCESS } #[cfg_attr(not(test), no_mangle)] diff --git a/zluda/src/impl/module.rs b/zluda/src/impl/module.rs index 6bd9a40..6575d96 100644 --- a/zluda/src/impl/module.rs +++ b/zluda/src/impl/module.rs @@ -185,6 +185,7 @@ fn compile_amd( let mut compiler_cmd = Command::new(&clang_exe); compiler_cmd .arg(format!("-mcpu={}", device_name)) + .arg("-ffp-contract=off") .arg("-nogpulib") .arg("-mno-wavefrontsize64") .arg("-O3")