mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-28 13:37:57 +03:00
Convert OpenCL host code to SVM
This commit is contained in:
parent
638786b0ec
commit
becda31524
3 changed files with 77 additions and 189 deletions
|
@ -4,6 +4,7 @@ use cuda::{CUdevice_attribute, CUuuid_st};
|
|||
use ocl_core::{ClDeviceIdPtr, ContextProperties, DeviceType};
|
||||
use std::{
|
||||
cmp,
|
||||
collections::HashSet,
|
||||
ffi::c_void,
|
||||
mem,
|
||||
os::raw::{c_char, c_int, c_uint},
|
||||
|
@ -24,175 +25,14 @@ pub struct Device {
|
|||
pub ocl_base: ocl_core::DeviceId,
|
||||
pub default_queue: ocl_core::CommandQueue,
|
||||
pub ocl_context: ocl_core::Context,
|
||||
pub(crate) ocl_ext: OpenCLExtensions,
|
||||
pub primary_context: context::Context,
|
||||
pub allocations: HashSet<*mut c_void>,
|
||||
properties: Option<Box<l0::sys::ze_device_properties_t>>,
|
||||
image_properties: Option<Box<l0::sys::ze_device_image_properties_t>>,
|
||||
memory_properties: Option<Vec<l0::sys::ze_device_memory_properties_t>>,
|
||||
compute_properties: Option<Box<l0::sys::ze_device_compute_properties_t>>,
|
||||
}
|
||||
|
||||
type cl_mem_properties_intel = ocl_core::ffi::cl_bitfield;
|
||||
|
||||
pub(crate) struct OpenCLExtensions {
|
||||
pub clDeviceMemAllocINTEL: unsafe extern "system" fn(
|
||||
ocl_core::ffi::cl_context,
|
||||
ocl_core::ffi::cl_device_id,
|
||||
*const cl_mem_properties_intel,
|
||||
usize,
|
||||
ocl_core::ffi::cl_uint,
|
||||
*mut ocl_core::ffi::cl_int,
|
||||
) -> *mut c_void,
|
||||
pub clEnqueueMemcpyINTEL: unsafe extern "system" fn(
|
||||
ocl_core::ffi::cl_command_queue,
|
||||
ocl_core::ffi::cl_bool,
|
||||
*mut c_void,
|
||||
*const c_void,
|
||||
usize,
|
||||
ocl_core::ffi::cl_uint,
|
||||
*const ocl_core::ffi::cl_event,
|
||||
*mut ocl_core::ffi::cl_event,
|
||||
) -> ocl_core::ffi::cl_int,
|
||||
pub clMemBlockingFreeINTEL:
|
||||
unsafe extern "system" fn(ocl_core::ffi::cl_context, *mut c_void) -> ocl_core::ffi::cl_int,
|
||||
pub clEnqueueMemFillINTEL: unsafe extern "system" fn(
|
||||
ocl_core::ffi::cl_command_queue,
|
||||
*mut c_void,
|
||||
*const c_void,
|
||||
usize,
|
||||
usize,
|
||||
ocl_core::ffi::cl_uint,
|
||||
*const ocl_core::ffi::cl_event,
|
||||
*mut ocl_core::ffi::cl_event,
|
||||
) -> ocl_core::ffi::cl_int,
|
||||
}
|
||||
|
||||
impl OpenCLExtensions {
|
||||
fn new(plat: &ocl_core::PlatformId) -> Result<Self, CUresult> {
|
||||
let clDeviceMemAllocINTEL = unsafe {
|
||||
ocl_core::get_extension_function_address_for_platform(
|
||||
plat,
|
||||
"clDeviceMemAllocINTEL",
|
||||
None,
|
||||
)?
|
||||
};
|
||||
let clEnqueueMemcpyINTEL = unsafe {
|
||||
ocl_core::get_extension_function_address_for_platform(
|
||||
plat,
|
||||
"clEnqueueMemcpyINTEL",
|
||||
None,
|
||||
)?
|
||||
};
|
||||
let clMemBlockingFreeINTEL = unsafe {
|
||||
ocl_core::get_extension_function_address_for_platform(
|
||||
plat,
|
||||
"clMemBlockingFreeINTEL",
|
||||
None,
|
||||
)?
|
||||
};
|
||||
let clEnqueueMemFillINTEL = unsafe {
|
||||
ocl_core::get_extension_function_address_for_platform(
|
||||
plat,
|
||||
"clEnqueueMemFillINTEL",
|
||||
None,
|
||||
)?
|
||||
};
|
||||
Ok(Self {
|
||||
clDeviceMemAllocINTEL: unsafe { mem::transmute(clDeviceMemAllocINTEL) },
|
||||
clEnqueueMemcpyINTEL: unsafe { mem::transmute(clEnqueueMemcpyINTEL) },
|
||||
clMemBlockingFreeINTEL: unsafe { mem::transmute(clMemBlockingFreeINTEL) },
|
||||
clEnqueueMemFillINTEL: unsafe { mem::transmute(clEnqueueMemFillINTEL) },
|
||||
})
|
||||
}
|
||||
|
||||
pub unsafe fn device_mem_alloc(
|
||||
&self,
|
||||
ctx: &ocl_core::Context,
|
||||
device: &ocl_core::DeviceId,
|
||||
size: usize,
|
||||
alignment: ocl_core::ffi::cl_uint,
|
||||
) -> Result<*mut c_void, CUresult> {
|
||||
let mut error = 0;
|
||||
let result = (self.clDeviceMemAllocINTEL)(
|
||||
ctx.as_ptr(),
|
||||
device.as_ptr(),
|
||||
ptr::null(),
|
||||
size,
|
||||
alignment,
|
||||
&mut error,
|
||||
);
|
||||
if error == 0 {
|
||||
Ok(result)
|
||||
} else {
|
||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn enqueue_memcpy(
|
||||
&self,
|
||||
queue: &ocl_core::CommandQueue,
|
||||
blocking: bool,
|
||||
dst: *mut c_void,
|
||||
src: *const c_void,
|
||||
size: usize,
|
||||
) -> Result<(), CUresult> {
|
||||
let error = (self.clEnqueueMemcpyINTEL)(
|
||||
queue.as_ptr(),
|
||||
if blocking { 1 } else { 0 },
|
||||
dst,
|
||||
src,
|
||||
size,
|
||||
0,
|
||||
ptr::null(),
|
||||
ptr::null_mut(),
|
||||
);
|
||||
if error == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn mem_blocking_free(
|
||||
&self,
|
||||
ctx: &ocl_core::Context,
|
||||
mem_ptr: *mut c_void,
|
||||
) -> Result<(), CUresult> {
|
||||
let error = (self.clMemBlockingFreeINTEL)(ctx.as_ptr(), mem_ptr);
|
||||
if error == 0 {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
||||
}
|
||||
}
|
||||
|
||||
pub unsafe fn enqueue_memfill(
|
||||
&self,
|
||||
queue: &ocl_core::CommandQueue,
|
||||
dst: *mut c_void,
|
||||
pattern: *const c_void,
|
||||
patternSize: usize,
|
||||
size: usize,
|
||||
) -> Result<ocl_core::Event, CUresult> {
|
||||
let mut signal: ocl_core::ffi::cl_event = ptr::null_mut();
|
||||
let error = (self.clEnqueueMemFillINTEL)(
|
||||
queue.as_ptr(),
|
||||
dst,
|
||||
pattern,
|
||||
patternSize,
|
||||
size,
|
||||
0,
|
||||
ptr::null(),
|
||||
&mut signal,
|
||||
);
|
||||
if error == 0 {
|
||||
Ok(ocl_core::Event::from_raw(signal))
|
||||
} else {
|
||||
Err(CUresult::CUDA_ERROR_UNKNOWN)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Device {}
|
||||
|
||||
impl Device {
|
||||
|
@ -202,7 +42,6 @@ impl Device {
|
|||
ocl_dev: ocl_core::DeviceId,
|
||||
idx: usize,
|
||||
) -> Result<Self, CUresult> {
|
||||
let ocl_ext = OpenCLExtensions::new(&platform)?;
|
||||
let mut props = ocl_core::ContextProperties::new();
|
||||
props.set_platform(platform);
|
||||
let ctx = ocl_core::create_context(Some(&props), &[ocl_dev], None, None)?;
|
||||
|
@ -210,13 +49,13 @@ impl Device {
|
|||
let primary_context =
|
||||
context::Context::new(context::ContextData::new(0, true, ptr::null_mut())?);
|
||||
Ok(Self {
|
||||
ocl_ext,
|
||||
index: Index(idx as c_int),
|
||||
base: l0_dev,
|
||||
ocl_base: ocl_dev,
|
||||
default_queue: queue,
|
||||
ocl_context: ctx,
|
||||
primary_context,
|
||||
allocations: HashSet::new(),
|
||||
properties: None,
|
||||
image_properties: None,
|
||||
memory_properties: None,
|
||||
|
|
|
@ -3,7 +3,7 @@ use ocl_core::DeviceId;
|
|||
use super::{stream::Stream, CUresult, GlobalState, HasLivenessCookie, LiveCheck};
|
||||
use crate::cuda::CUfunction_attribute;
|
||||
use ::std::os::raw::{c_uint, c_void};
|
||||
use std::{hint, ptr};
|
||||
use std::{hint, mem, ptr};
|
||||
|
||||
const CU_LAUNCH_PARAM_END: *mut c_void = 0 as *mut _;
|
||||
const CU_LAUNCH_PARAM_BUFFER_POINTER: *mut c_void = 1 as *mut _;
|
||||
|
@ -101,7 +101,9 @@ pub fn launch_kernel(
|
|||
{
|
||||
return Err(CUresult::CUDA_ERROR_INVALID_VALUE);
|
||||
}
|
||||
GlobalState::lock_enqueue(hstream, |queue| {
|
||||
GlobalState::lock_stream(hstream, |stream_data| {
|
||||
let dev = unsafe { &mut *(*stream_data.context).device };
|
||||
let queue = stream_data.cmd_list.as_ref().unwrap();
|
||||
let func: &mut FunctionData = unsafe { &mut *f }.as_result_mut()?;
|
||||
if kernel_params != ptr::null_mut() {
|
||||
for (i, &(arg_size, is_mem)) in func.arg_size.iter().enumerate() {
|
||||
|
@ -162,6 +164,16 @@ pub fn launch_kernel(
|
|||
)?
|
||||
};
|
||||
}
|
||||
let buffers = dev.allocations.iter().copied().collect::<Vec<_>>();
|
||||
let err = unsafe {
|
||||
ocl_core::ffi::clSetKernelExecInfo(
|
||||
func.base.as_ptr(),
|
||||
ocl_core::ffi::CL_KERNEL_EXEC_INFO_SVM_PTRS,
|
||||
buffers.len() * mem::size_of::<*mut c_void>(),
|
||||
buffers.as_ptr() as *const _,
|
||||
)
|
||||
};
|
||||
assert_eq!(err, 0);
|
||||
let global_dims = [
|
||||
(block_dim_x * grid_dim_x) as usize,
|
||||
(block_dim_y * grid_dim_y) as usize,
|
||||
|
@ -184,7 +196,7 @@ pub fn launch_kernel(
|
|||
)?
|
||||
};
|
||||
Ok::<_, CUresult>(())
|
||||
})
|
||||
})?
|
||||
}
|
||||
|
||||
fn round_up_to_multiple(x: usize, multiple: usize) -> usize {
|
||||
|
|
|
@ -5,27 +5,39 @@ use super::{
|
|||
use std::{
|
||||
ffi::c_void,
|
||||
mem::{self, size_of},
|
||||
ptr,
|
||||
};
|
||||
|
||||
pub fn alloc_v2(dptr: *mut *mut c_void, bytesize: usize) -> Result<(), CUresult> {
|
||||
let ptr = GlobalState::lock_stream(CU_STREAM_LEGACY, |stream_data| {
|
||||
let dev = unsafe { &*(*stream_data.context).device };
|
||||
let dev = unsafe { &mut *(*stream_data.context).device };
|
||||
let queue = stream_data.cmd_list.as_ref().unwrap();
|
||||
let ptr = unsafe {
|
||||
dev.ocl_ext
|
||||
.device_mem_alloc(&dev.ocl_context, &dev.ocl_base, bytesize, 0)?
|
||||
ocl_core::ffi::clSVMAlloc(
|
||||
dev.ocl_context.as_ptr(),
|
||||
ocl_core::ffi::CL_MEM_READ_WRITE,
|
||||
bytesize,
|
||||
0,
|
||||
)
|
||||
};
|
||||
// CUDA does the same thing and e.g. GeekBench relies on this behavior
|
||||
let event = unsafe {
|
||||
dev.ocl_ext.enqueue_memfill(
|
||||
queue,
|
||||
let mut event = ptr::null_mut();
|
||||
let err = unsafe {
|
||||
ocl_core::ffi::clEnqueueSVMMemFill(
|
||||
queue.as_ptr(),
|
||||
ptr,
|
||||
&0u8 as *const u8 as *const c_void,
|
||||
1,
|
||||
bytesize,
|
||||
)?
|
||||
0,
|
||||
ptr::null(),
|
||||
&mut event,
|
||||
)
|
||||
};
|
||||
ocl_core::wait_for_event(&event)?;
|
||||
assert_eq!(err, 0);
|
||||
let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
|
||||
assert_eq!(err, 0);
|
||||
dev.allocations.insert(ptr);
|
||||
Ok::<_, CUresult>(ptr)
|
||||
})??;
|
||||
unsafe { *dptr = ptr };
|
||||
|
@ -36,10 +48,22 @@ pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<
|
|||
GlobalState::lock_stream(stream::CU_STREAM_LEGACY, |stream_data| {
|
||||
let dev = unsafe { &*(*stream_data.context).device };
|
||||
let queue = stream_data.cmd_list.as_ref().unwrap();
|
||||
unsafe {
|
||||
dev.ocl_ext
|
||||
.enqueue_memcpy(queue, true, dst, src, bytesize)?
|
||||
let mut event = ptr::null_mut();
|
||||
let err = unsafe {
|
||||
ocl_core::ffi::clEnqueueSVMMemcpy(
|
||||
queue.as_ptr(),
|
||||
1,
|
||||
dst,
|
||||
src,
|
||||
bytesize,
|
||||
0,
|
||||
ptr::null(),
|
||||
&mut event,
|
||||
)
|
||||
};
|
||||
assert_eq!(err, 0);
|
||||
let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
|
||||
assert_eq!(err, 0);
|
||||
Ok(())
|
||||
})?
|
||||
}
|
||||
|
@ -47,7 +71,8 @@ pub fn copy_v2(dst: *mut c_void, src: *const c_void, bytesize: usize) -> Result<
|
|||
pub fn free_v2(ptr: *mut c_void) -> Result<(), CUresult> {
|
||||
GlobalState::lock_current_context(|ctx| {
|
||||
let dev = unsafe { &mut *ctx.device };
|
||||
unsafe { dev.ocl_ext.mem_blocking_free(&dev.ocl_context, ptr)? };
|
||||
unsafe { ocl_core::ffi::clSVMFree(dev.ocl_context.as_ptr(), ptr) };
|
||||
dev.allocations.remove(&ptr);
|
||||
Ok(())
|
||||
})?
|
||||
}
|
||||
|
@ -57,16 +82,22 @@ pub(crate) fn set_d32_v2(dst: *mut c_void, mut ui: u32, n: usize) -> Result<(),
|
|||
let dev = unsafe { &*(*stream_data.context).device };
|
||||
let queue = stream_data.cmd_list.as_ref().unwrap();
|
||||
let pattern_size = mem::size_of_val(&ui);
|
||||
let event = unsafe {
|
||||
dev.ocl_ext.enqueue_memfill(
|
||||
queue,
|
||||
let mut event = ptr::null_mut();
|
||||
let err = unsafe {
|
||||
ocl_core::ffi::clEnqueueSVMMemFill(
|
||||
queue.as_ptr(),
|
||||
dst,
|
||||
&ui as *const _ as *const _,
|
||||
pattern_size,
|
||||
pattern_size * n,
|
||||
)?
|
||||
0,
|
||||
ptr::null(),
|
||||
&mut event,
|
||||
)
|
||||
};
|
||||
ocl_core::wait_for_event(&event)?;
|
||||
assert_eq!(err, 0);
|
||||
let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
|
||||
assert_eq!(err, 0);
|
||||
Ok(())
|
||||
})?
|
||||
}
|
||||
|
@ -76,16 +107,22 @@ pub(crate) fn set_d8_v2(dst: *mut c_void, mut uc: u8, n: usize) -> Result<(), CU
|
|||
let dev = unsafe { &*(*stream_data.context).device };
|
||||
let queue = stream_data.cmd_list.as_ref().unwrap();
|
||||
let pattern_size = mem::size_of_val(&uc);
|
||||
let event = unsafe {
|
||||
dev.ocl_ext.enqueue_memfill(
|
||||
queue,
|
||||
let mut event = ptr::null_mut();
|
||||
let err = unsafe {
|
||||
ocl_core::ffi::clEnqueueSVMMemFill(
|
||||
queue.as_ptr(),
|
||||
dst,
|
||||
&uc as *const _ as *const _,
|
||||
pattern_size,
|
||||
pattern_size * n,
|
||||
)?
|
||||
0,
|
||||
ptr::null(),
|
||||
&mut event,
|
||||
)
|
||||
};
|
||||
ocl_core::wait_for_event(&event)?;
|
||||
assert_eq!(err, 0);
|
||||
let err = unsafe { ocl_core::ffi::clWaitForEvents(1, &mut event) };
|
||||
assert_eq!(err, 0);
|
||||
Ok(())
|
||||
})?
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue