Add missing V-RAY host functions

This commit is contained in:
Andrzej Janik 2021-09-16 23:11:34 +00:00
parent ca0d8ec666
commit 314e3dcb49
8 changed files with 306 additions and 51 deletions

View file

@ -2184,11 +2184,11 @@ pub struct CUgraphExecUpdateResult_enum(pub ::std::os::raw::c_uint);
pub use self::CUgraphExecUpdateResult_enum as CUgraphExecUpdateResult;
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuGetErrorString(
pub unsafe extern "system" fn cuGetErrorString(
CUresult(e): CUresult,
pStr: *mut *const ::std::os::raw::c_char,
) -> CUresult {
unsafe { *pStr = hipGetErrorString(hipError_t(e)) };
*pStr = hipGetErrorString(hipError_t(e));
CUresult::CUDA_SUCCESS
}
@ -2273,17 +2273,20 @@ pub extern "system" fn cuDeviceGetNvSciSyncAttributes(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuDeviceGetProperties(prop: *mut CUdevprop, dev: CUdevice) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuDeviceGetProperties(
prop: *mut CUdevprop,
dev: CUdevice,
) -> CUresult {
r#impl::device::get_properties(prop, dev).encuda()
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuDeviceComputeCapability(
pub unsafe extern "system" fn cuDeviceComputeCapability(
major: *mut ::std::os::raw::c_int,
minor: *mut ::std::os::raw::c_int,
dev: CUdevice,
) -> CUresult {
r#impl::unimplemented()
hipDeviceComputeCapability(major, minor, dev.0).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -2309,7 +2312,7 @@ pub extern "system" fn cuDevicePrimaryCtxSetFlags(
dev: CUdevice,
flags: ::std::os::raw::c_uint,
) -> CUresult {
cuDevicePrimaryCtxSetFlags_v2(dev, flags)
CUresult::CUDA_SUCCESS
}
#[cfg_attr(not(test), no_mangle)]
@ -2317,7 +2320,7 @@ pub extern "system" fn cuDevicePrimaryCtxSetFlags_v2(
dev: CUdevice,
flags: ::std::os::raw::c_uint,
) -> CUresult {
r#impl::unimplemented()
cuDevicePrimaryCtxSetFlags(dev, flags)
}
#[cfg_attr(not(test), no_mangle)]
@ -2391,12 +2394,12 @@ pub extern "system" fn cuCtxSynchronize() -> CUresult {
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxSetLimit(limit: CUlimit, value: usize) -> CUresult {
r#impl::unimplemented()
r#impl::context::set_limit(limit, value)
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuCtxGetLimit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
r#impl::context::get_limit(pvalue, limit)
}
#[cfg_attr(not(test), no_mangle)]
@ -2406,7 +2409,7 @@ pub extern "system" fn cuCtxGetCacheConfig(pconfig: *mut CUfunc_cache) -> CUresu
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuCtxSetCacheConfig(config: CUfunc_cache) -> CUresult {
r#impl::unimplemented()
CUresult::CUDA_SUCCESS
}
#[cfg_attr(not(test), no_mangle)]
@ -2504,13 +2507,13 @@ pub extern "system" fn cuModuleGetFunction(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuModuleGetGlobal_v2(
pub unsafe extern "system" fn cuModuleGetGlobal_v2(
dptr: *mut CUdeviceptr,
bytes: *mut usize,
hmod: CUmodule,
name: *const ::std::os::raw::c_char,
) -> CUresult {
r#impl::unimplemented()
hipModuleGetGlobal(dptr as _, bytes, hmod as _, name).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -2532,17 +2535,17 @@ pub extern "system" fn cuModuleGetSurfRef(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkCreate_v2(
pub unsafe extern "system" fn cuLinkCreate_v2(
numOptions: ::std::os::raw::c_uint,
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
stateOut: *mut CUlinkState,
) -> CUresult {
r#impl::unimplemented()
r#impl::link::create(numOptions, options, optionValues, stateOut)
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkAddData_v2(
pub unsafe extern "system" fn cuLinkAddData_v2(
state: CUlinkState,
type_: CUjitInputType,
data: *mut ::std::os::raw::c_void,
@ -2552,7 +2555,16 @@ pub extern "system" fn cuLinkAddData_v2(
options: *mut CUjit_option,
optionValues: *mut *mut ::std::os::raw::c_void,
) -> CUresult {
r#impl::unimplemented()
r#impl::link::add_data(
state,
type_,
data,
size,
name,
numOptions,
options,
optionValues,
)
}
#[cfg_attr(not(test), no_mangle)]
@ -2573,17 +2585,17 @@ pub extern "system" fn cuLinkComplete(
cubinOut: *mut *mut ::std::os::raw::c_void,
sizeOut: *mut usize,
) -> CUresult {
r#impl::unimplemented()
r#impl::link::complete(state, cubinOut, sizeOut)
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuLinkDestroy(state: CUlinkState) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuLinkDestroy(state: CUlinkState) -> CUresult {
r#impl::link::destroy(state)
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuMemGetInfo_v2(free: *mut usize, total: *mut usize) -> CUresult {
hipMemGetInfo(free, total).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -2641,17 +2653,17 @@ pub extern "system" fn cuMemAllocHost_v2(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult {
hipFreeHost(p).into()
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemHostAlloc(
pub unsafe extern "system" fn cuMemHostAlloc(
pp: *mut *mut ::std::os::raw::c_void,
bytesize: usize,
Flags: ::std::os::raw::c_uint,
) -> CUresult {
r#impl::unimplemented()
hipMemAllocHost(pp, bytesize).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -2694,7 +2706,7 @@ pub extern "system" fn cuDeviceGetPCIBusId(
len: ::std::os::raw::c_int,
dev: CUdevice,
) -> CUresult {
r#impl::unimplemented()
unsafe { hipDeviceGetPCIBusId(pciBusId, len, dev.0) }.into()
}
#[cfg_attr(not(test), no_mangle)]
@ -2883,8 +2895,8 @@ pub extern "system" fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CU
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuMemcpy3D_v2(pCopy: *const CUDA_MEMCPY3D) -> CUresult {
r#impl::memory::copy_3d(pCopy).encuda()
}
#[cfg_attr(not(test), no_mangle)]
@ -2915,13 +2927,13 @@ pub extern "system" fn cuMemcpyPeerAsync(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuMemcpyHtoDAsync_v2(
pub unsafe extern "system" fn cuMemcpyHtoDAsync_v2(
dstDevice: CUdeviceptr,
srcHost: *const ::std::os::raw::c_void,
ByteCount: usize,
hStream: CUstream,
) -> CUresult {
r#impl::unimplemented()
hipMemcpyHtoDAsync(dstDevice.0 as _, srcHost as _, ByteCount, hStream as _).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -3153,16 +3165,16 @@ pub extern "system" fn cuArrayGetDescriptor_v2(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuArrayDestroy(hArray: CUarray) -> CUresult {
r#impl::unimplemented()
pub unsafe extern "system" fn cuArrayDestroy(hArray: CUarray) -> CUresult {
hipArrayDestroy(hArray as _).into()
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuArray3DCreate_v2(
pub unsafe extern "system" fn cuArray3DCreate_v2(
pHandle: *mut CUarray,
pAllocateArray: *const CUDA_ARRAY3D_DESCRIPTOR,
) -> CUresult {
r#impl::unimplemented()
hipArray3DCreate(pHandle as _, pAllocateArray as _).into()
}
#[cfg_attr(not(test), no_mangle)]
@ -3307,12 +3319,12 @@ pub extern "system" fn cuMemRetainAllocationHandle(
}
#[cfg_attr(not(test), no_mangle)]
pub extern "system" fn cuPointerGetAttribute(
pub unsafe extern "system" fn cuPointerGetAttribute(
data: *mut ::std::os::raw::c_void,
attribute: CUpointer_attribute,
ptr: CUdeviceptr,
) -> CUresult {
r#impl::unimplemented()
r#impl::pointer::get_attribute(data, attribute, ptr).encuda()
}
#[cfg_attr(not(test), no_mangle)]

24
zluda/src/impl/context.rs Normal file
View file

@ -0,0 +1,24 @@
use std::ptr;
use crate::cuda::CUlimit;
use crate::cuda::CUresult;
pub(crate) unsafe fn get_limit(pvalue: *mut usize, limit: CUlimit) -> CUresult {
if pvalue == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
*pvalue = 512; // GTX 1060 reports 1024
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
}
}
pub(crate) fn set_limit(limit: CUlimit, value: usize) -> CUresult {
if limit == CUlimit::CU_LIMIT_STACK_SIZE {
CUresult::CUDA_SUCCESS
} else {
CUresult::CUDA_ERROR_NOT_SUPPORTED
}
}

View file

@ -1,5 +1,8 @@
use super::{transmute_lifetime, transmute_lifetime_mut, CUresult};
use crate::cuda;
use crate::{
cuda::{self, CUdevice, CUdevprop},
hip_call,
};
use cuda::{CUdevice_attribute, CUuuid_st};
use hip_runtime_sys::{
hipDeviceAttribute_t, hipDeviceGetAttribute, hipError_t, hipGetDeviceProperties,
@ -325,3 +328,21 @@ pub fn get_luid(
unsafe { *dev_node_mask = 0 };
Ok(())
}
pub(crate) unsafe fn get_properties(prop: *mut CUdevprop, dev: CUdevice) -> Result<(), hipError_t> {
if prop == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let mut hip_props = mem::zeroed();
hip_call! { hipGetDeviceProperties(&mut hip_props, dev.0) };
(*prop).maxThreadsPerBlock = hip_props.maxThreadsPerBlock;
(*prop).maxThreadsDim = hip_props.maxThreadsDim;
(*prop).maxGridSize = hip_props.maxGridSize;
(*prop).totalConstantMemory = usize::min(hip_props.totalConstMem, i32::MAX as usize) as i32;
(*prop).SIMDWidth = hip_props.warpSize;
(*prop).memPitch = usize::min(hip_props.memPitch, i32::MAX as usize) as i32;
(*prop).regsPerBlock = hip_props.regsPerBlock;
(*prop).clockRate = hip_props.clockRate;
(*prop).textureAlign = usize::min(hip_props.textureAlignment, i32::MAX as usize) as i32;
Ok(())
}

67
zluda/src/impl/link.rs Normal file
View file

@ -0,0 +1,67 @@
use std::{
ffi::{c_void, CStr},
mem, ptr, slice,
};
use crate::cuda::{CUjitInputType, CUjit_option, CUlinkState, CUresult};
struct LinkState {
modules: Vec<String>,
}
pub(crate) unsafe fn create(
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
state_out: *mut CUlinkState,
) -> CUresult {
if state_out == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
let state = Box::new(LinkState {
modules: Vec::new(),
});
*state_out = mem::transmute(state);
CUresult::CUDA_SUCCESS
}
pub(crate) unsafe fn add_data(
state: CUlinkState,
type_: CUjitInputType,
data: *mut c_void,
size: usize,
name: *const i8,
num_options: u32,
options: *mut CUjit_option,
option_values: *mut *mut c_void,
) -> CUresult {
if state == ptr::null_mut() {
return CUresult::CUDA_ERROR_INVALID_VALUE;
}
let state: *mut LinkState = mem::transmute(state);
let state = &mut *state;
// V-RAY specific hack
if state.modules.len() == 2 {
return CUresult::CUDA_SUCCESS;
}
let ptx = slice::from_raw_parts(data as *mut u8, size);
state.modules.push(
CStr::from_bytes_with_nul_unchecked(ptx)
.to_string_lossy()
.to_string(),
);
CUresult::CUDA_SUCCESS
}
pub(crate) fn complete(
state: CUlinkState,
cubin_out: *mut *mut c_void,
size_out: *mut usize,
) -> CUresult {
CUresult::CUDA_SUCCESS
}
pub(crate) unsafe fn destroy(state: CUlinkState) -> CUresult {
let state: Box<LinkState> = mem::transmute(state);
CUresult::CUDA_SUCCESS
}

55
zluda/src/impl/memory.rs Normal file
View file

@ -0,0 +1,55 @@
use hip_runtime_sys::{
hipDrvMemcpy3D, hipError_t, hipMemcpy3D, hipMemcpy3DParms, hipMemoryType, hipPitchedPtr,
hipPos, HIP_MEMCPY3D,
};
use std::ptr;
use crate::{
cuda::{CUDA_MEMCPY3D_st, CUdeviceptr, CUmemorytype, CUresult},
hip_call,
};
// TODO change HIP impl to 64 bits
pub(crate) unsafe fn copy_3d(cu_copy: *const CUDA_MEMCPY3D_st) -> Result<(), hipError_t> {
if cu_copy == ptr::null() {
return Err(hipError_t::hipErrorInvalidValue);
}
let cu_copy = *cu_copy;
let hip_copy = HIP_MEMCPY3D {
srcXInBytes: cu_copy.srcXInBytes as u32,
srcY: cu_copy.srcY as u32,
srcZ: cu_copy.srcZ as u32,
srcLOD: cu_copy.srcLOD as u32,
srcMemoryType: memory_type(cu_copy.srcMemoryType)?,
srcHost: cu_copy.srcHost,
srcDevice: cu_copy.srcDevice.0 as _,
srcArray: cu_copy.srcArray as _,
srcPitch: cu_copy.srcPitch as u32,
srcHeight: cu_copy.srcHeight as u32,
dstXInBytes: cu_copy.dstXInBytes as u32,
dstY: cu_copy.dstY as u32,
dstZ: cu_copy.dstZ as u32,
dstLOD: cu_copy.dstLOD as u32,
dstMemoryType: memory_type(cu_copy.dstMemoryType)?,
dstHost: cu_copy.dstHost,
dstDevice: cu_copy.dstDevice.0 as _,
dstArray: cu_copy.dstArray as _,
dstPitch: cu_copy.dstPitch as u32,
dstHeight: cu_copy.dstHeight as u32,
WidthInBytes: cu_copy.WidthInBytes as u32,
Height: cu_copy.Height as u32,
Depth: cu_copy.Depth as u32,
};
hip_call! { hipDrvMemcpy3D(&hip_copy) };
Ok(())
}
pub(crate) fn memory_type(cu: CUmemorytype) -> Result<hipMemoryType, hipError_t> {
match cu {
CUmemorytype::CU_MEMORYTYPE_HOST => Ok(hipMemoryType::hipMemoryTypeHost),
CUmemorytype::CU_MEMORYTYPE_DEVICE => Ok(hipMemoryType::hipMemoryTypeDevice),
CUmemorytype::CU_MEMORYTYPE_ARRAY => Ok(hipMemoryType::hipMemoryTypeArray),
CUmemorytype::CU_MEMORYTYPE_UNIFIED => Ok(hipMemoryType::hipMemoryTypeUnified),
_ => Err(hipError_t::hipErrorInvalidValue),
}
}

View file

@ -20,6 +20,10 @@ pub mod function;
#[cfg_attr(not(windows), path = "os_unix.rs")]
pub(crate) mod os;
pub(crate) mod module;
pub(crate) mod context;
pub(crate) mod memory;
pub(crate) mod link;
pub(crate) mod pointer;
#[cfg(debug_assertions)]
pub fn unimplemented() -> CUresult {
@ -31,6 +35,19 @@ pub fn unimplemented() -> CUresult {
CUresult::CUDA_ERROR_NOT_SUPPORTED
}
#[macro_export]
macro_rules! hip_call {
($expr:expr) => {
#[allow(unused_unsafe)]
{
let err = unsafe { $expr };
if err != hip_runtime_sys::hipError_t::hipSuccess {
return Result::Err(err);
}
}
};
}
pub trait HasLivenessCookie: Sized {
const COOKIE: usize;
const LIVENESS_FAIL: CUresult;

View file

@ -87,7 +87,7 @@ pub fn load_data_impl(pmod: *mut CUmodule, spirv_data: SpirvModule) -> Result<()
let err = unsafe { hipGetDeviceProperties(&mut props, dev) };
let arch_binary = compile_amd(
&props,
&spirv_data.binaries[..],
&[&spirv_data.binaries[..]],
spirv_data.should_link_ptx_impl,
)
.map_err(|_| hipError_t::hipErrorUnknown)?;
@ -115,7 +115,7 @@ const AMDGPU_BITCODE_DEVICE_PREFIX: &'static str = "oclc_isa_version_";
fn compile_amd(
device_pros: &hipDeviceProp_t,
spirv_il: &[u32],
spirv_il: &[&[u32]],
ptx_lib: Option<(&'static [u8], &'static [u8])>,
) -> io::Result<Vec<u8>> {
let null_terminator = device_pros
@ -133,24 +133,30 @@ fn compile_amd(
return Err(io::Error::new(io::ErrorKind::Other, ""));
};
let dir = tempfile::tempdir()?;
let mut spirv = NamedTempFile::new_in(&dir)?;
let llvm = NamedTempFile::new_in(&dir)?;
let spirv_il_u8 = unsafe {
slice::from_raw_parts(
spirv_il.as_ptr() as *const u8,
spirv_il.len() * mem::size_of::<u32>(),
)
};
spirv.write_all(spirv_il_u8)?;
let spirv_files = spirv_il
.iter()
.map(|spirv| {
let mut spirv = NamedTempFile::new_in(&dir)?;
let spirv_il_u8 = unsafe {
slice::from_raw_parts(
spirv_il.as_ptr() as *const u8,
spirv_il.len() * mem::size_of::<u32>(),
)
};
spirv.write_all(spirv_il_u8)?;
Ok::<_, io::Error>(spirv)
})
.collect::<Result<Vec<_>, _>>()?;
let llvm_spirv_path = match env::var("LLVM_SPIRV") {
Ok(path) => Cow::Owned(path),
Err(_) => Cow::Borrowed(LLVM_SPIRV),
};
let llvm = NamedTempFile::new_in(&dir)?;
let to_llvm_cmd = Command::new(&*llvm_spirv_path)
.arg("-r")
.arg("-o")
.arg(llvm.path())
.arg(spirv.path())
.args(spirv_files.iter().map(|f| f.path()))
.status()?;
assert!(to_llvm_cmd.success());
if cfg!(debug_assertions) {

53
zluda/src/impl/pointer.rs Normal file
View file

@ -0,0 +1,53 @@
use std::{ffi::c_void, mem, ptr};
use hip_runtime_sys::{hipError_t, hipMemoryType, hipPointerGetAttributes};
use crate::{
cuda::{CUdeviceptr, CUmemorytype, CUpointer_attribute},
hip_call,
};
pub(crate) unsafe fn get_attribute(
data: *mut c_void,
attribute: CUpointer_attribute,
ptr: CUdeviceptr,
) -> Result<(), hipError_t> {
if data == ptr::null_mut() {
return Err(hipError_t::hipErrorInvalidValue);
}
let mut attribs = mem::zeroed();
hip_call! { hipPointerGetAttributes(&mut attribs, ptr.0 as _) };
match attribute {
CUpointer_attribute::CU_POINTER_ATTRIBUTE_CONTEXT => {
*(data as *mut _) = attribs.device;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_MEMORY_TYPE => {
*(data as *mut _) = memory_type(attribs.memoryType)?;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_DEVICE_POINTER => {
*(data as *mut _) = attribs.devicePointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_HOST_POINTER => {
*(data as *mut _) = attribs.hostPointer;
Ok(())
}
CUpointer_attribute::CU_POINTER_ATTRIBUTE_IS_MANAGED => {
*(data as *mut _) = attribs.isManaged;
Ok(())
}
_ => Err(hipError_t::hipErrorNotSupported),
}
}
pub(crate) fn memory_type(cu: hipMemoryType) -> Result<CUmemorytype, hipError_t> {
match cu {
hipMemoryType::hipMemoryTypeHost => Ok(CUmemorytype::CU_MEMORYTYPE_HOST),
hipMemoryType::hipMemoryTypeDevice => Ok(CUmemorytype::CU_MEMORYTYPE_DEVICE),
hipMemoryType::hipMemoryTypeArray => Ok(CUmemorytype::CU_MEMORYTYPE_ARRAY),
hipMemoryType::hipMemoryTypeUnified => Ok(CUmemorytype::CU_MEMORYTYPE_UNIFIED),
_ => Err(hipError_t::hipErrorInvalidValue),
}
}