mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-28 21:47:57 +03:00
Implement atomic instructions
This commit is contained in:
parent
861116f223
commit
a82eb20817
24 changed files with 1672 additions and 88 deletions
|
@ -7,4 +7,8 @@ edition = "2018"
|
||||||
[lib]
|
[lib]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
level_zero-sys = { path = "../level_zero-sys" }
|
level_zero-sys = { path = "../level_zero-sys" }
|
||||||
|
|
||||||
|
[dependencies.ocl-core]
|
||||||
|
version = "0.11"
|
||||||
|
features = ["opencl_version_1_2", "opencl_version_2_0", "opencl_version_2_1"]
|
|
@ -238,7 +238,76 @@ impl Drop for CommandQueue {
|
||||||
pub struct Module(sys::ze_module_handle_t);
|
pub struct Module(sys::ze_module_handle_t);
|
||||||
|
|
||||||
impl Module {
|
impl Module {
|
||||||
pub fn new_spirv(
|
// HACK ALERT
|
||||||
|
// We use OpenCL for now to do SPIR-V linking, because Level0
|
||||||
|
// does not allow linking. Don't let presence of zeModuleDynamicLink fool
|
||||||
|
// you, it's not currently possible to create non-compiled modules.
|
||||||
|
// zeModuleCreate always compiles (builds and links).
|
||||||
|
pub fn build_link_spirv<'a>(
|
||||||
|
ctx: &mut Context,
|
||||||
|
d: &Device,
|
||||||
|
binaries: &[&'a [u8]],
|
||||||
|
) -> (Result<Self>, Option<BuildLog>) {
|
||||||
|
let ocl_program = match Self::build_link_spirv_impl(binaries) {
|
||||||
|
Err(_) => return (Err(sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN), None),
|
||||||
|
Ok(prog) => prog,
|
||||||
|
};
|
||||||
|
match ocl_core::get_program_info(&ocl_program, ocl_core::ProgramInfo::Binaries) {
|
||||||
|
Ok(ocl_core::ProgramInfoResult::Binaries(binaries)) => {
|
||||||
|
let (module, build_log) = Self::build_native(ctx, d, &binaries[0]);
|
||||||
|
(module, Some(build_log))
|
||||||
|
}
|
||||||
|
_ => return (Err(sys::ze_result_t::ZE_RESULT_ERROR_UNKNOWN), None),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_link_spirv_impl<'a>(binaries: &[&'a [u8]]) -> ocl_core::Result<ocl_core::Program> {
|
||||||
|
let platforms = ocl_core::get_platform_ids()?;
|
||||||
|
let (platform, device) = platforms
|
||||||
|
.iter()
|
||||||
|
.find_map(|plat| {
|
||||||
|
let devices =
|
||||||
|
ocl_core::get_device_ids(plat, Some(ocl_core::DeviceType::GPU), None).ok()?;
|
||||||
|
for dev in devices {
|
||||||
|
let vendor =
|
||||||
|
ocl_core::get_device_info(dev, ocl_core::DeviceInfo::VendorId).ok()?;
|
||||||
|
if let ocl_core::DeviceInfoResult::VendorId(0x8086) = vendor {
|
||||||
|
let dev_type =
|
||||||
|
ocl_core::get_device_info(dev, ocl_core::DeviceInfo::Type).ok()?;
|
||||||
|
if let ocl_core::DeviceInfoResult::Type(ocl_core::DeviceType::GPU) =
|
||||||
|
dev_type
|
||||||
|
{
|
||||||
|
return Some((plat.clone(), dev));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
})
|
||||||
|
.ok_or("")?;
|
||||||
|
let ctx_props = ocl_core::ContextProperties::new().platform(platform);
|
||||||
|
let ocl_ctx = ocl_core::create_context_from_type::<ocl_core::DeviceId>(
|
||||||
|
Some(&ctx_props),
|
||||||
|
ocl_core::DeviceType::GPU,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)?;
|
||||||
|
let mut programs = Vec::with_capacity(binaries.len());
|
||||||
|
for binary in binaries {
|
||||||
|
programs.push(ocl_core::create_program_with_il(&ocl_ctx, binary, None)?);
|
||||||
|
}
|
||||||
|
let options = CString::default();
|
||||||
|
ocl_core::link_program::<ocl_core::DeviceId, _>(
|
||||||
|
&ocl_ctx,
|
||||||
|
Some(&[device]),
|
||||||
|
&options,
|
||||||
|
&programs.iter().collect::<Vec<_>>(),
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_spirv(
|
||||||
ctx: &mut Context,
|
ctx: &mut Context,
|
||||||
d: &Device,
|
d: &Device,
|
||||||
bin: &[u8],
|
bin: &[u8],
|
||||||
|
@ -247,7 +316,7 @@ impl Module {
|
||||||
Module::new(ctx, true, d, bin, opts)
|
Module::new(ctx, true, d, bin, opts)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_native(ctx: &mut Context, d: &Device, bin: &[u8]) -> (Result<Self>, BuildLog) {
|
pub fn build_native(ctx: &mut Context, d: &Device, bin: &[u8]) -> (Result<Self>, BuildLog) {
|
||||||
Module::new(ctx, false, d, bin, None)
|
Module::new(ctx, false, d, bin, None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -53,7 +53,7 @@ impl ModuleData {
|
||||||
Ok(_) if errors.len() > 0 => return Err(ModuleCompileError::Parse(errors, None)),
|
Ok(_) if errors.len() > 0 => return Err(ModuleCompileError::Parse(errors, None)),
|
||||||
Ok(ast) => ast,
|
Ok(ast) => ast,
|
||||||
};
|
};
|
||||||
let (spirv, all_arg_lens) = ptx::to_spirv(ast)?;
|
let (_, spirv, all_arg_lens) = ptx::to_spirv(ast)?;
|
||||||
let byte_il = unsafe {
|
let byte_il = unsafe {
|
||||||
slice::from_raw_parts::<u8>(
|
slice::from_raw_parts::<u8>(
|
||||||
spirv.as_ptr() as *const _,
|
spirv.as_ptr() as *const _,
|
||||||
|
@ -61,7 +61,7 @@ impl ModuleData {
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
let module = super::device::with_current_exclusive(|dev| {
|
let module = super::device::with_current_exclusive(|dev| {
|
||||||
l0::Module::new_spirv(&mut dev.l0_context, &dev.base, byte_il, None)
|
l0::Module::build_spirv(&mut dev.l0_context, &dev.base, byte_il, None)
|
||||||
});
|
});
|
||||||
match module {
|
match module {
|
||||||
Ok((Ok(module), _)) => Ok(Mutex::new(Self {
|
Ok((Ok(module), _)) => Ok(Mutex::new(Self {
|
||||||
|
|
121
ptx/lib/notcuda_ptx_impl.cl
Normal file
121
ptx/lib/notcuda_ptx_impl.cl
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
// Every time this file changes it must te rebuilt:
|
||||||
|
// ocloc -file notcuda_ptx_impl.cl -64 -options "-cl-std=CL2.0" -out_dir . -device kbl -output_no_suffix -spv_only
|
||||||
|
// Additionally you should strip names:
|
||||||
|
// spirv-opt --strip-debug notcuda_ptx_impl.spv -o notcuda_ptx_impl.spv
|
||||||
|
|
||||||
|
#define FUNC(NAME) __notcuda_ptx_impl__ ## NAME
|
||||||
|
|
||||||
|
#define atomic_inc(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
|
||||||
|
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
|
||||||
|
uint expected = *ptr; \
|
||||||
|
uint desired; \
|
||||||
|
do { \
|
||||||
|
desired = (expected >= threshold) ? 0 : expected + 1; \
|
||||||
|
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
|
||||||
|
return expected; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define atomic_dec(NAME, SUCCESS, FAILURE, SCOPE, SPACE) \
|
||||||
|
uint FUNC(NAME)(SPACE uint* ptr, uint threshold) { \
|
||||||
|
uint expected = *ptr; \
|
||||||
|
uint desired; \
|
||||||
|
do { \
|
||||||
|
desired = (expected == 0 || expected > threshold) ? threshold : expected - 1; \
|
||||||
|
} while (!atomic_compare_exchange_strong_explicit((volatile SPACE atomic_uint*)ptr, &expected, desired, SUCCESS, FAILURE, SCOPE)); \
|
||||||
|
return expected; \
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are doing all this mess instead of accepting memory_order and memory_scope parameters
|
||||||
|
// because ocloc emits broken (failing spirv-dis) SPIR-V when memory_order or memory_scope is a parameter
|
||||||
|
|
||||||
|
// atom.inc
|
||||||
|
atomic_inc(atom_relaxed_cta_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
|
||||||
|
atomic_inc(atom_acquire_cta_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
atomic_inc(atom_release_cta_generic_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
atomic_inc(atom_acq_rel_cta_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_gpu_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||||
|
atomic_inc(atom_acquire_gpu_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_inc(atom_release_gpu_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_inc(atom_acq_rel_gpu_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_sys_generic_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||||
|
atomic_inc(atom_acquire_sys_generic_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_inc(atom_release_sys_generic_inc, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_inc(atom_acq_rel_sys_generic_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_cta_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
|
||||||
|
atomic_inc(atom_acquire_cta_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
atomic_inc(atom_release_cta_global_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
atomic_inc(atom_acq_rel_cta_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_gpu_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_acquire_gpu_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_release_gpu_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_acq_rel_gpu_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_sys_global_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_acquire_sys_global_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_release_sys_global_inc, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_inc(atom_acq_rel_sys_global_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_cta_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
|
||||||
|
atomic_inc(atom_acquire_cta_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
atomic_inc(atom_release_cta_shared_inc, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
atomic_inc(atom_acq_rel_cta_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_gpu_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_acquire_gpu_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_release_gpu_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_acq_rel_gpu_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
|
||||||
|
atomic_inc(atom_relaxed_sys_shared_inc, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_acquire_sys_shared_inc, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_release_sys_shared_inc, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_inc(atom_acq_rel_sys_shared_inc, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
|
||||||
|
// atom.dec
|
||||||
|
atomic_dec(atom_relaxed_cta_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, );
|
||||||
|
atomic_dec(atom_acquire_cta_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
atomic_dec(atom_release_cta_generic_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
atomic_dec(atom_acq_rel_cta_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, );
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_gpu_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||||
|
atomic_dec(atom_acquire_gpu_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_dec(atom_release_gpu_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_dec(atom_acq_rel_gpu_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_sys_generic_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, );
|
||||||
|
atomic_dec(atom_acquire_sys_generic_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_dec(atom_release_sys_generic_dec, memory_order_release, memory_order_acquire, memory_scope_device, );
|
||||||
|
atomic_dec(atom_acq_rel_sys_generic_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, );
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_cta_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __global);
|
||||||
|
atomic_dec(atom_acquire_cta_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
atomic_dec(atom_release_cta_global_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
atomic_dec(atom_acq_rel_cta_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __global);
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_gpu_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_acquire_gpu_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_release_gpu_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_acq_rel_gpu_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_sys_global_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_acquire_sys_global_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_release_sys_global_dec, memory_order_release, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
atomic_dec(atom_acq_rel_sys_global_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __global);
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_cta_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_work_group, __local);
|
||||||
|
atomic_dec(atom_acquire_cta_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
atomic_dec(atom_release_cta_shared_dec, memory_order_release, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
atomic_dec(atom_acq_rel_cta_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_work_group, __local);
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_gpu_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_acquire_gpu_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_release_gpu_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_acq_rel_gpu_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
|
||||||
|
atomic_dec(atom_relaxed_sys_shared_dec, memory_order_relaxed, memory_order_relaxed, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_acquire_sys_shared_dec, memory_order_acquire, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_release_sys_shared_dec, memory_order_release, memory_order_acquire, memory_scope_device, __local);
|
||||||
|
atomic_dec(atom_acq_rel_sys_shared_dec, memory_order_acq_rel, memory_order_acquire, memory_scope_device, __local);
|
BIN
ptx/lib/notcuda_ptx_impl.spv
Normal file
BIN
ptx/lib/notcuda_ptx_impl.spv
Normal file
Binary file not shown.
|
@ -109,11 +109,12 @@ macro_rules! sub_type {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pointer is used when doing SLM converison to SPIRV
|
|
||||||
sub_type! {
|
sub_type! {
|
||||||
VariableRegType {
|
VariableRegType {
|
||||||
Scalar(ScalarType),
|
Scalar(ScalarType),
|
||||||
Vector(SizedScalarType, u8),
|
Vector(SizedScalarType, u8),
|
||||||
|
// Pointer variant is used when passing around SLM pointer between
|
||||||
|
// function calls for dynamic SLM
|
||||||
Pointer(SizedScalarType, PointerStateSpace)
|
Pointer(SizedScalarType, PointerStateSpace)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -215,6 +216,11 @@ sub_enum!(SelpType {
|
||||||
F64,
|
F64,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum BarDetails {
|
||||||
|
SyncAligned,
|
||||||
|
}
|
||||||
|
|
||||||
pub trait UnwrapWithVec<E, To> {
|
pub trait UnwrapWithVec<E, To> {
|
||||||
fn unwrap_with(self, errs: &mut Vec<E>) -> To;
|
fn unwrap_with(self, errs: &mut Vec<E>) -> To;
|
||||||
}
|
}
|
||||||
|
@ -301,6 +307,7 @@ impl From<FnArgumentType> for Type {
|
||||||
|
|
||||||
sub_enum!(
|
sub_enum!(
|
||||||
PointerStateSpace : LdStateSpace {
|
PointerStateSpace : LdStateSpace {
|
||||||
|
Generic,
|
||||||
Global,
|
Global,
|
||||||
Const,
|
Const,
|
||||||
Shared,
|
Shared,
|
||||||
|
@ -372,6 +379,8 @@ sub_enum!(IntType {
|
||||||
S64
|
S64
|
||||||
});
|
});
|
||||||
|
|
||||||
|
sub_enum!(BitType { B8, B16, B32, B64 });
|
||||||
|
|
||||||
sub_enum!(UIntType { U8, U16, U32, U64 });
|
sub_enum!(UIntType { U8, U16, U32, U64 });
|
||||||
|
|
||||||
sub_enum!(SIntType { S8, S16, S32, S64 });
|
sub_enum!(SIntType { S8, S16, S32, S64 });
|
||||||
|
@ -527,6 +536,9 @@ pub enum Instruction<P: ArgParams> {
|
||||||
Rcp(RcpDetails, Arg2<P>),
|
Rcp(RcpDetails, Arg2<P>),
|
||||||
And(OrAndType, Arg3<P>),
|
And(OrAndType, Arg3<P>),
|
||||||
Selp(SelpType, Arg4<P>),
|
Selp(SelpType, Arg4<P>),
|
||||||
|
Bar(BarDetails, Arg1Bar<P>),
|
||||||
|
Atom(AtomDetails, Arg3<P>),
|
||||||
|
AtomCas(AtomCasDetails, Arg4<P>),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
@ -577,6 +589,10 @@ pub struct Arg1<P: ArgParams> {
|
||||||
pub src: P::Id, // it is a jump destination, but in terms of operands it is a source operand
|
pub src: P::Id, // it is a jump destination, but in terms of operands it is a source operand
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct Arg1Bar<P: ArgParams> {
|
||||||
|
pub src: P::Operand,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Arg2<P: ArgParams> {
|
pub struct Arg2<P: ArgParams> {
|
||||||
pub dst: P::Id,
|
pub dst: P::Id,
|
||||||
pub src: P::Operand,
|
pub src: P::Operand,
|
||||||
|
@ -712,12 +728,12 @@ impl From<LdStType> for PointerType {
|
||||||
pub enum LdStQualifier {
|
pub enum LdStQualifier {
|
||||||
Weak,
|
Weak,
|
||||||
Volatile,
|
Volatile,
|
||||||
Relaxed(LdScope),
|
Relaxed(MemScope),
|
||||||
Acquire(LdScope),
|
Acquire(MemScope),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone, PartialEq, Eq)]
|
#[derive(Copy, Clone, PartialEq, Eq)]
|
||||||
pub enum LdScope {
|
pub enum MemScope {
|
||||||
Cta,
|
Cta,
|
||||||
Gpu,
|
Gpu,
|
||||||
Sys,
|
Sys,
|
||||||
|
@ -1051,6 +1067,74 @@ pub struct MinMaxFloat {
|
||||||
pub typ: FloatType,
|
pub typ: FloatType,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct AtomDetails {
|
||||||
|
pub semantics: AtomSemantics,
|
||||||
|
pub scope: MemScope,
|
||||||
|
pub space: AtomSpace,
|
||||||
|
pub inner: AtomInnerDetails,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub enum AtomSemantics {
|
||||||
|
Relaxed,
|
||||||
|
Acquire,
|
||||||
|
Release,
|
||||||
|
AcquireRelease,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub enum AtomSpace {
|
||||||
|
Generic,
|
||||||
|
Global,
|
||||||
|
Shared,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub enum AtomInnerDetails {
|
||||||
|
Bit { op: AtomBitOp, typ: BitType },
|
||||||
|
Unsigned { op: AtomUIntOp, typ: UIntType },
|
||||||
|
Signed { op: AtomSIntOp, typ: SIntType },
|
||||||
|
Float { op: AtomFloatOp, typ: FloatType },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum AtomBitOp {
|
||||||
|
And,
|
||||||
|
Or,
|
||||||
|
Xor,
|
||||||
|
Exchange,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum AtomUIntOp {
|
||||||
|
Add,
|
||||||
|
Inc,
|
||||||
|
Dec,
|
||||||
|
Min,
|
||||||
|
Max,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum AtomSIntOp {
|
||||||
|
Add,
|
||||||
|
Min,
|
||||||
|
Max,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Eq, PartialEq)]
|
||||||
|
pub enum AtomFloatOp {
|
||||||
|
Add,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub struct AtomCasDetails {
|
||||||
|
pub semantics: AtomSemantics,
|
||||||
|
pub scope: MemScope,
|
||||||
|
pub space: AtomSpace,
|
||||||
|
pub typ: BitType
|
||||||
|
}
|
||||||
|
|
||||||
pub enum NumsOrArrays<'a> {
|
pub enum NumsOrArrays<'a> {
|
||||||
Nums(Vec<(&'a str, u32)>),
|
Nums(Vec<(&'a str, u32)>),
|
||||||
Arrays(Vec<NumsOrArrays<'a>>),
|
Arrays(Vec<NumsOrArrays<'a>>),
|
||||||
|
|
|
@ -35,9 +35,12 @@ match {
|
||||||
"<", ">",
|
"<", ">",
|
||||||
"|",
|
"|",
|
||||||
"=",
|
"=",
|
||||||
|
".acq_rel",
|
||||||
".acquire",
|
".acquire",
|
||||||
|
".add",
|
||||||
".address_size",
|
".address_size",
|
||||||
".align",
|
".align",
|
||||||
|
".aligned",
|
||||||
".and",
|
".and",
|
||||||
".approx",
|
".approx",
|
||||||
".b16",
|
".b16",
|
||||||
|
@ -45,14 +48,17 @@ match {
|
||||||
".b64",
|
".b64",
|
||||||
".b8",
|
".b8",
|
||||||
".ca",
|
".ca",
|
||||||
|
".cas",
|
||||||
".cg",
|
".cg",
|
||||||
".const",
|
".const",
|
||||||
".cs",
|
".cs",
|
||||||
".cta",
|
".cta",
|
||||||
".cv",
|
".cv",
|
||||||
|
".dec",
|
||||||
".entry",
|
".entry",
|
||||||
".eq",
|
".eq",
|
||||||
".equ",
|
".equ",
|
||||||
|
".exch",
|
||||||
".extern",
|
".extern",
|
||||||
".f16",
|
".f16",
|
||||||
".f16x2",
|
".f16x2",
|
||||||
|
@ -69,6 +75,7 @@ match {
|
||||||
".gtu",
|
".gtu",
|
||||||
".hi",
|
".hi",
|
||||||
".hs",
|
".hs",
|
||||||
|
".inc",
|
||||||
".le",
|
".le",
|
||||||
".leu",
|
".leu",
|
||||||
".lo",
|
".lo",
|
||||||
|
@ -78,6 +85,8 @@ match {
|
||||||
".lt",
|
".lt",
|
||||||
".ltu",
|
".ltu",
|
||||||
".lu",
|
".lu",
|
||||||
|
".max",
|
||||||
|
".min",
|
||||||
".nan",
|
".nan",
|
||||||
".NaN",
|
".NaN",
|
||||||
".ne",
|
".ne",
|
||||||
|
@ -88,6 +97,7 @@ match {
|
||||||
".pred",
|
".pred",
|
||||||
".reg",
|
".reg",
|
||||||
".relaxed",
|
".relaxed",
|
||||||
|
".release",
|
||||||
".rm",
|
".rm",
|
||||||
".rmi",
|
".rmi",
|
||||||
".rn",
|
".rn",
|
||||||
|
@ -103,6 +113,7 @@ match {
|
||||||
".sat",
|
".sat",
|
||||||
".section",
|
".section",
|
||||||
".shared",
|
".shared",
|
||||||
|
".sync",
|
||||||
".sys",
|
".sys",
|
||||||
".target",
|
".target",
|
||||||
".to",
|
".to",
|
||||||
|
@ -126,6 +137,9 @@ match {
|
||||||
"abs",
|
"abs",
|
||||||
"add",
|
"add",
|
||||||
"and",
|
"and",
|
||||||
|
"atom",
|
||||||
|
"bar",
|
||||||
|
"barrier",
|
||||||
"bra",
|
"bra",
|
||||||
"call",
|
"call",
|
||||||
"cvt",
|
"cvt",
|
||||||
|
@ -162,6 +176,9 @@ ExtendedID : &'input str = {
|
||||||
"abs",
|
"abs",
|
||||||
"add",
|
"add",
|
||||||
"and",
|
"and",
|
||||||
|
"atom",
|
||||||
|
"bar",
|
||||||
|
"barrier",
|
||||||
"bra",
|
"bra",
|
||||||
"call",
|
"call",
|
||||||
"cvt",
|
"cvt",
|
||||||
|
@ -372,6 +389,7 @@ StateSpaceSpecifier: ast::StateSpace = {
|
||||||
".param" => ast::StateSpace::Param, // used to prepare function call
|
".param" => ast::StateSpace::Param, // used to prepare function call
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[inline]
|
||||||
ScalarType: ast::ScalarType = {
|
ScalarType: ast::ScalarType = {
|
||||||
".f16" => ast::ScalarType::F16,
|
".f16" => ast::ScalarType::F16,
|
||||||
".f16x2" => ast::ScalarType::F16x2,
|
".f16x2" => ast::ScalarType::F16x2,
|
||||||
|
@ -438,6 +456,7 @@ Variable: ast::Variable<ast::VariableType, &'input str> = {
|
||||||
let v_type = ast::VariableType::Param(v_type);
|
let v_type = ast::VariableType::Param(v_type);
|
||||||
ast::Variable {align, v_type, name, array_init}
|
ast::Variable {align, v_type, name, array_init}
|
||||||
},
|
},
|
||||||
|
SharedVariable,
|
||||||
};
|
};
|
||||||
|
|
||||||
RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
|
RegVariable: (Option<u32>, ast::VariableRegType, &'input str) = {
|
||||||
|
@ -478,6 +497,32 @@ LocalVariable: ast::Variable<ast::VariableType, &'input str> = {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SharedVariable: ast::Variable<ast::VariableType, &'input str> = {
|
||||||
|
".shared" <var:VariableScalar<SizedScalarType>> => {
|
||||||
|
let (align, t, name) = var;
|
||||||
|
let v_type = ast::VariableGlobalType::Scalar(t);
|
||||||
|
ast::Variable { align, v_type: ast::VariableType::Shared(v_type), name, array_init: Vec::new() }
|
||||||
|
},
|
||||||
|
".shared" <var:VariableVector<SizedScalarType>> => {
|
||||||
|
let (align, v_len, t, name) = var;
|
||||||
|
let v_type = ast::VariableGlobalType::Vector(t, v_len);
|
||||||
|
ast::Variable { align, v_type: ast::VariableType::Shared(v_type), name, array_init: Vec::new() }
|
||||||
|
},
|
||||||
|
".shared" <var:VariableArrayOrPointer<SizedScalarType>> =>? {
|
||||||
|
let (align, t, name, arr_or_ptr) = var;
|
||||||
|
let (v_type, array_init) = match arr_or_ptr {
|
||||||
|
ast::ArrayOrPointer::Array { dimensions, init } => {
|
||||||
|
(ast::VariableGlobalType::Array(t, dimensions), init)
|
||||||
|
}
|
||||||
|
ast::ArrayOrPointer::Pointer => {
|
||||||
|
return Err(ParseError::User { error: ast::PtxError::ZeroDimensionArray });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Ok(ast::Variable { align, v_type: ast::VariableType::Shared(v_type), name, array_init })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
ModuleVariable: ast::Variable<ast::VariableType, &'input str> = {
|
ModuleVariable: ast::Variable<ast::VariableType, &'input str> = {
|
||||||
LinkingDirectives ".global" <def:GlobalVariableDefinitionNoArray> => {
|
LinkingDirectives ".global" <def:GlobalVariableDefinitionNoArray> => {
|
||||||
let (align, v_type, name, array_init) = def;
|
let (align, v_type, name, array_init) = def;
|
||||||
|
@ -619,7 +664,10 @@ Instruction: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
InstMin,
|
InstMin,
|
||||||
InstMax,
|
InstMax,
|
||||||
InstRcp,
|
InstRcp,
|
||||||
InstSelp
|
InstSelp,
|
||||||
|
InstBar,
|
||||||
|
InstAtom,
|
||||||
|
InstAtomCas
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#data-movement-and-conversion-instructions-ld
|
||||||
|
@ -655,14 +703,14 @@ LdStType: ast::LdStType = {
|
||||||
LdStQualifier: ast::LdStQualifier = {
|
LdStQualifier: ast::LdStQualifier = {
|
||||||
".weak" => ast::LdStQualifier::Weak,
|
".weak" => ast::LdStQualifier::Weak,
|
||||||
".volatile" => ast::LdStQualifier::Volatile,
|
".volatile" => ast::LdStQualifier::Volatile,
|
||||||
".relaxed" <s:LdScope> => ast::LdStQualifier::Relaxed(s),
|
".relaxed" <s:MemScope> => ast::LdStQualifier::Relaxed(s),
|
||||||
".acquire" <s:LdScope> => ast::LdStQualifier::Acquire(s),
|
".acquire" <s:MemScope> => ast::LdStQualifier::Acquire(s),
|
||||||
};
|
};
|
||||||
|
|
||||||
LdScope: ast::LdScope = {
|
MemScope: ast::MemScope = {
|
||||||
".cta" => ast::LdScope::Cta,
|
".cta" => ast::MemScope::Cta,
|
||||||
".gpu" => ast::LdScope::Gpu,
|
".gpu" => ast::MemScope::Gpu,
|
||||||
".sys" => ast::LdScope::Sys
|
".sys" => ast::MemScope::Sys
|
||||||
};
|
};
|
||||||
|
|
||||||
LdStateSpace: ast::LdStateSpace = {
|
LdStateSpace: ast::LdStateSpace = {
|
||||||
|
@ -798,6 +846,13 @@ SIntType: ast::SIntType = {
|
||||||
".s64" => ast::SIntType::S64,
|
".s64" => ast::SIntType::S64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
FloatType: ast::FloatType = {
|
||||||
|
".f16" => ast::FloatType::F16,
|
||||||
|
".f16x2" => ast::FloatType::F16x2,
|
||||||
|
".f32" => ast::FloatType::F32,
|
||||||
|
".f64" => ast::FloatType::F64,
|
||||||
|
};
|
||||||
|
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#integer-arithmetic-instructions-add
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#floating-point-instructions-add
|
||||||
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#half-precision-floating-point-instructions-add
|
||||||
|
@ -1296,6 +1351,140 @@ SelpType: ast::SelpType = {
|
||||||
".f64" => ast::SelpType::F64,
|
".f64" => ast::SelpType::F64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-bar
|
||||||
|
InstBar: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
|
"barrier" ".sync" ".aligned" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a),
|
||||||
|
"bar" ".sync" <a:Arg1Bar> => ast::Instruction::Bar(ast::BarDetails::SyncAligned, a)
|
||||||
|
}
|
||||||
|
|
||||||
|
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-atom
|
||||||
|
// The documentation does not mention all spported operations:
|
||||||
|
// * Operation .add requires .u32 or .s32 or .u64 or .f64 or f16 or f16x2 or .f32
|
||||||
|
// * Operation .inc requires .u32 type for instuction
|
||||||
|
// * Operation .dec requires .u32 type for instuction
|
||||||
|
// Otherwise as documented
|
||||||
|
InstAtom: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op:AtomBitOp> <typ:AtomBitType> <a:Arg3Atom> => {
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Bit { op, typ }
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
},
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".inc" ".u32" <a:Arg3Atom> => {
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Unsigned {
|
||||||
|
op: ast::AtomUIntOp::Inc,
|
||||||
|
typ: ast::UIntType::U32
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
},
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".dec" ".u32" <a:Arg3Atom> => {
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Unsigned {
|
||||||
|
op: ast::AtomUIntOp::Dec,
|
||||||
|
typ: ast::UIntType::U32
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
},
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".add" <typ:FloatType> <a:Arg3Atom> => {
|
||||||
|
let op = ast::AtomFloatOp::Add;
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Float { op, typ }
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
},
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomUIntOp> <typ:AtomUIntType> <a:Arg3Atom> => {
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Unsigned { op, typ }
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
},
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> <op: AtomSIntOp> <typ:AtomSIntType> <a:Arg3Atom> => {
|
||||||
|
let details = ast::AtomDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
inner: ast::AtomInnerDetails::Signed { op, typ }
|
||||||
|
};
|
||||||
|
ast::Instruction::Atom(details,a)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
InstAtomCas: ast::Instruction<ast::ParsedArgParams<'input>> = {
|
||||||
|
"atom" <sema:AtomSemantics?> <scope:MemScope?> <space:AtomSpace?> ".cas" <typ:AtomBitType> <a:Arg4Atom> => {
|
||||||
|
let details = ast::AtomCasDetails {
|
||||||
|
semantics: sema.unwrap_or(ast::AtomSemantics::Relaxed),
|
||||||
|
scope: scope.unwrap_or(ast::MemScope::Gpu),
|
||||||
|
space: space.unwrap_or(ast::AtomSpace::Generic),
|
||||||
|
typ,
|
||||||
|
};
|
||||||
|
ast::Instruction::AtomCas(details,a)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomSemantics: ast::AtomSemantics = {
|
||||||
|
".relaxed" => ast::AtomSemantics::Relaxed,
|
||||||
|
".acquire" => ast::AtomSemantics::Acquire,
|
||||||
|
".release" => ast::AtomSemantics::Release,
|
||||||
|
".acq_rel" => ast::AtomSemantics::AcquireRelease
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomSpace: ast::AtomSpace = {
|
||||||
|
".global" => ast::AtomSpace::Global,
|
||||||
|
".shared" => ast::AtomSpace::Shared
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomBitOp: ast::AtomBitOp = {
|
||||||
|
".and" => ast::AtomBitOp::And,
|
||||||
|
".or" => ast::AtomBitOp::Or,
|
||||||
|
".xor" => ast::AtomBitOp::Xor,
|
||||||
|
".exch" => ast::AtomBitOp::Exchange,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomUIntOp: ast::AtomUIntOp = {
|
||||||
|
".add" => ast::AtomUIntOp::Add,
|
||||||
|
".min" => ast::AtomUIntOp::Min,
|
||||||
|
".max" => ast::AtomUIntOp::Max,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomSIntOp: ast::AtomSIntOp = {
|
||||||
|
".add" => ast::AtomSIntOp::Add,
|
||||||
|
".min" => ast::AtomSIntOp::Min,
|
||||||
|
".max" => ast::AtomSIntOp::Max,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomBitType: ast::BitType = {
|
||||||
|
".b32" => ast::BitType::B32,
|
||||||
|
".b64" => ast::BitType::B64,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomUIntType: ast::UIntType = {
|
||||||
|
".u32" => ast::UIntType::U32,
|
||||||
|
".u64" => ast::UIntType::U64,
|
||||||
|
}
|
||||||
|
|
||||||
|
AtomSIntType: ast::SIntType = {
|
||||||
|
".s32" => ast::SIntType::S32,
|
||||||
|
".s64" => ast::SIntType::S64,
|
||||||
|
}
|
||||||
|
|
||||||
ArithDetails: ast::ArithDetails = {
|
ArithDetails: ast::ArithDetails = {
|
||||||
<t:UIntType> => ast::ArithDetails::Unsigned(t),
|
<t:UIntType> => ast::ArithDetails::Unsigned(t),
|
||||||
<t:SIntType> => ast::ArithDetails::Signed(ast::ArithSInt {
|
<t:SIntType> => ast::ArithDetails::Signed(ast::ArithSInt {
|
||||||
|
@ -1414,6 +1603,10 @@ Arg1: ast::Arg1<ast::ParsedArgParams<'input>> = {
|
||||||
<src:ExtendedID> => ast::Arg1{<>}
|
<src:ExtendedID> => ast::Arg1{<>}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Arg1Bar: ast::Arg1Bar<ast::ParsedArgParams<'input>> = {
|
||||||
|
<src:Operand> => ast::Arg1Bar{<>}
|
||||||
|
};
|
||||||
|
|
||||||
Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
|
Arg2: ast::Arg2<ast::ParsedArgParams<'input>> = {
|
||||||
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
|
<dst:ExtendedID> "," <src:Operand> => ast::Arg2{<>}
|
||||||
};
|
};
|
||||||
|
@ -1448,10 +1641,18 @@ Arg3: ast::Arg3<ast::ParsedArgParams<'input>> = {
|
||||||
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
|
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> => ast::Arg3{<>}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Arg3Atom: ast::Arg3<ast::ParsedArgParams<'input>> = {
|
||||||
|
<dst:ExtendedID> "," "[" <src1:Operand> "]" "," <src2:Operand> => ast::Arg3{<>}
|
||||||
|
};
|
||||||
|
|
||||||
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
|
Arg4: ast::Arg4<ast::ParsedArgParams<'input>> = {
|
||||||
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
|
<dst:ExtendedID> "," <src1:Operand> "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Arg4Atom: ast::Arg4<ast::ParsedArgParams<'input>> = {
|
||||||
|
<dst:ExtendedID> "," "[" <src1:Operand> "]" "," <src2:Operand> "," <src3:Operand> => ast::Arg4{<>}
|
||||||
|
};
|
||||||
|
|
||||||
Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
|
Arg4Setp: ast::Arg4Setp<ast::ParsedArgParams<'input>> = {
|
||||||
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
|
<dst1:ExtendedID> <dst2:OptionalDst?> "," <src1:Operand> "," <src2:Operand> => ast::Arg4Setp{<>}
|
||||||
};
|
};
|
||||||
|
|
10
ptx/src/test/spirv_build/bar_sync.ptx
Normal file
10
ptx/src/test/spirv_build/bar_sync.ptx
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry bar_sync()
|
||||||
|
{
|
||||||
|
.reg .u32 temp_32;
|
||||||
|
bar.sync temp_32;
|
||||||
|
ret;
|
||||||
|
}
|
|
@ -11,8 +11,8 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%33 = OpExtInstImport "OpenCL.std"
|
%33 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "and"
|
OpEntryPoint Kernel %1 "and"
|
||||||
|
|
28
ptx/src/test/spirv_run/atom_add.ptx
Normal file
28
ptx/src/test/spirv_run/atom_add.ptx
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry atom_add(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.shared .align 4 .b8 shared_mem[1024];
|
||||||
|
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u32 temp1;
|
||||||
|
.reg .u32 temp2;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.u32 temp1, [in_addr];
|
||||||
|
ld.u32 temp2, [in_addr+4];
|
||||||
|
st.shared.u32 [shared_mem], temp1;
|
||||||
|
atom.shared.add.u32 temp1, [shared_mem], temp2;
|
||||||
|
ld.shared.u32 temp2, [shared_mem];
|
||||||
|
st.u32 [out_addr], temp1;
|
||||||
|
st.u32 [out_addr+4], temp2;
|
||||||
|
ret;
|
||||||
|
}
|
84
ptx/src/test/spirv_run/atom_add.spvtxt
Normal file
84
ptx/src/test/spirv_run/atom_add.spvtxt
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
; SPIR-V
|
||||||
|
; Version: 1.3
|
||||||
|
; Generator: rspirv
|
||||||
|
; Bound: 55
|
||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
; OpCapability FunctionFloatControlINTEL
|
||||||
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
|
%40 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "atom_add" %4
|
||||||
|
OpDecorate %4 Alignment 4
|
||||||
|
%41 = OpTypeVoid
|
||||||
|
%42 = OpTypeInt 32 0
|
||||||
|
%43 = OpTypeInt 8 0
|
||||||
|
%44 = OpConstant %42 1024
|
||||||
|
%45 = OpTypeArray %43 %44
|
||||||
|
%46 = OpTypePointer Workgroup %45
|
||||||
|
%4 = OpVariable %46 Workgroup
|
||||||
|
%47 = OpTypeInt 64 0
|
||||||
|
%48 = OpTypeFunction %41 %47 %47
|
||||||
|
%49 = OpTypePointer Function %47
|
||||||
|
%50 = OpTypePointer Function %42
|
||||||
|
%51 = OpTypePointer Generic %42
|
||||||
|
%27 = OpConstant %47 4
|
||||||
|
%52 = OpTypePointer Workgroup %42
|
||||||
|
%53 = OpConstant %42 1
|
||||||
|
%54 = OpConstant %42 0
|
||||||
|
%29 = OpConstant %47 4
|
||||||
|
%1 = OpFunction %41 None %48
|
||||||
|
%9 = OpFunctionParameter %47
|
||||||
|
%10 = OpFunctionParameter %47
|
||||||
|
%38 = OpLabel
|
||||||
|
%2 = OpVariable %49 Function
|
||||||
|
%3 = OpVariable %49 Function
|
||||||
|
%5 = OpVariable %49 Function
|
||||||
|
%6 = OpVariable %49 Function
|
||||||
|
%7 = OpVariable %50 Function
|
||||||
|
%8 = OpVariable %50 Function
|
||||||
|
OpStore %2 %9
|
||||||
|
OpStore %3 %10
|
||||||
|
%12 = OpLoad %47 %2
|
||||||
|
%11 = OpCopyObject %47 %12
|
||||||
|
OpStore %5 %11
|
||||||
|
%14 = OpLoad %47 %3
|
||||||
|
%13 = OpCopyObject %47 %14
|
||||||
|
OpStore %6 %13
|
||||||
|
%16 = OpLoad %47 %5
|
||||||
|
%31 = OpConvertUToPtr %51 %16
|
||||||
|
%15 = OpLoad %42 %31
|
||||||
|
OpStore %7 %15
|
||||||
|
%18 = OpLoad %47 %5
|
||||||
|
%28 = OpIAdd %47 %18 %27
|
||||||
|
%32 = OpConvertUToPtr %51 %28
|
||||||
|
%17 = OpLoad %42 %32
|
||||||
|
OpStore %8 %17
|
||||||
|
%19 = OpLoad %42 %7
|
||||||
|
%33 = OpBitcast %52 %4
|
||||||
|
OpStore %33 %19
|
||||||
|
%21 = OpLoad %42 %8
|
||||||
|
%34 = OpBitcast %52 %4
|
||||||
|
%20 = OpAtomicIAdd %42 %34 %53 %54 %21
|
||||||
|
OpStore %7 %20
|
||||||
|
%35 = OpBitcast %52 %4
|
||||||
|
%22 = OpLoad %42 %35
|
||||||
|
OpStore %8 %22
|
||||||
|
%23 = OpLoad %47 %6
|
||||||
|
%24 = OpLoad %42 %7
|
||||||
|
%36 = OpConvertUToPtr %51 %23
|
||||||
|
OpStore %36 %24
|
||||||
|
%25 = OpLoad %47 %6
|
||||||
|
%26 = OpLoad %42 %8
|
||||||
|
%30 = OpIAdd %47 %25 %29
|
||||||
|
%37 = OpConvertUToPtr %51 %30
|
||||||
|
OpStore %37 %26
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
24
ptx/src/test/spirv_run/atom_cas.ptx
Normal file
24
ptx/src/test/spirv_run/atom_cas.ptx
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry atom_cas(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u32 temp1;
|
||||||
|
.reg .u32 temp2;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.u32 temp1, [in_addr];
|
||||||
|
atom.cas.b32 temp1, [in_addr+4], temp1, 100;
|
||||||
|
ld.u32 temp2, [in_addr+4];
|
||||||
|
st.u32 [out_addr], temp1;
|
||||||
|
st.u32 [out_addr+4], temp2;
|
||||||
|
ret;
|
||||||
|
}
|
77
ptx/src/test/spirv_run/atom_cas.spvtxt
Normal file
77
ptx/src/test/spirv_run/atom_cas.spvtxt
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
; SPIR-V
|
||||||
|
; Version: 1.3
|
||||||
|
; Generator: rspirv
|
||||||
|
; Bound: 51
|
||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
; OpCapability FunctionFloatControlINTEL
|
||||||
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
|
%41 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "atom_cas"
|
||||||
|
%42 = OpTypeVoid
|
||||||
|
%43 = OpTypeInt 64 0
|
||||||
|
%44 = OpTypeFunction %42 %43 %43
|
||||||
|
%45 = OpTypePointer Function %43
|
||||||
|
%46 = OpTypeInt 32 0
|
||||||
|
%47 = OpTypePointer Function %46
|
||||||
|
%48 = OpTypePointer Generic %46
|
||||||
|
%25 = OpConstant %43 4
|
||||||
|
%27 = OpConstant %46 100
|
||||||
|
%49 = OpConstant %46 1
|
||||||
|
%50 = OpConstant %46 0
|
||||||
|
%28 = OpConstant %43 4
|
||||||
|
%30 = OpConstant %43 4
|
||||||
|
%1 = OpFunction %42 None %44
|
||||||
|
%8 = OpFunctionParameter %43
|
||||||
|
%9 = OpFunctionParameter %43
|
||||||
|
%39 = OpLabel
|
||||||
|
%2 = OpVariable %45 Function
|
||||||
|
%3 = OpVariable %45 Function
|
||||||
|
%4 = OpVariable %45 Function
|
||||||
|
%5 = OpVariable %45 Function
|
||||||
|
%6 = OpVariable %47 Function
|
||||||
|
%7 = OpVariable %47 Function
|
||||||
|
OpStore %2 %8
|
||||||
|
OpStore %3 %9
|
||||||
|
%11 = OpLoad %43 %2
|
||||||
|
%10 = OpCopyObject %43 %11
|
||||||
|
OpStore %4 %10
|
||||||
|
%13 = OpLoad %43 %3
|
||||||
|
%12 = OpCopyObject %43 %13
|
||||||
|
OpStore %5 %12
|
||||||
|
%15 = OpLoad %43 %4
|
||||||
|
%32 = OpConvertUToPtr %48 %15
|
||||||
|
%14 = OpLoad %46 %32
|
||||||
|
OpStore %6 %14
|
||||||
|
%17 = OpLoad %43 %4
|
||||||
|
%18 = OpLoad %46 %6
|
||||||
|
%26 = OpIAdd %43 %17 %25
|
||||||
|
%34 = OpConvertUToPtr %48 %26
|
||||||
|
%35 = OpCopyObject %46 %18
|
||||||
|
%33 = OpAtomicCompareExchange %46 %34 %49 %50 %50 %27 %35
|
||||||
|
%16 = OpCopyObject %46 %33
|
||||||
|
OpStore %6 %16
|
||||||
|
%20 = OpLoad %43 %4
|
||||||
|
%29 = OpIAdd %43 %20 %28
|
||||||
|
%36 = OpConvertUToPtr %48 %29
|
||||||
|
%19 = OpLoad %46 %36
|
||||||
|
OpStore %7 %19
|
||||||
|
%21 = OpLoad %43 %5
|
||||||
|
%22 = OpLoad %46 %6
|
||||||
|
%37 = OpConvertUToPtr %48 %21
|
||||||
|
OpStore %37 %22
|
||||||
|
%23 = OpLoad %43 %5
|
||||||
|
%24 = OpLoad %46 %7
|
||||||
|
%31 = OpIAdd %43 %23 %30
|
||||||
|
%38 = OpConvertUToPtr %48 %31
|
||||||
|
OpStore %38 %24
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
26
ptx/src/test/spirv_run/atom_inc.ptx
Normal file
26
ptx/src/test/spirv_run/atom_inc.ptx
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
.visible .entry atom_inc(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u32 temp1;
|
||||||
|
.reg .u32 temp2;
|
||||||
|
.reg .u32 temp3;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
atom.inc.u32 temp1, [in_addr], 101;
|
||||||
|
atom.global.inc.u32 temp2, [in_addr], 101;
|
||||||
|
ld.u32 temp3, [in_addr];
|
||||||
|
st.u32 [out_addr], temp1;
|
||||||
|
st.u32 [out_addr+4], temp2;
|
||||||
|
st.u32 [out_addr+8], temp3;
|
||||||
|
ret;
|
||||||
|
}
|
89
ptx/src/test/spirv_run/atom_inc.spvtxt
Normal file
89
ptx/src/test/spirv_run/atom_inc.spvtxt
Normal file
|
@ -0,0 +1,89 @@
|
||||||
|
; SPIR-V
|
||||||
|
; Version: 1.3
|
||||||
|
; Generator: rspirv
|
||||||
|
; Bound: 60
|
||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
; OpCapability FunctionFloatControlINTEL
|
||||||
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
|
%49 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "atom_inc"
|
||||||
|
OpDecorate %40 LinkageAttributes "__notcuda_ptx_impl__atom_relaxed_gpu_generic_inc" Import
|
||||||
|
OpDecorate %44 LinkageAttributes "__notcuda_ptx_impl__atom_relaxed_gpu_global_inc" Import
|
||||||
|
%50 = OpTypeVoid
|
||||||
|
%51 = OpTypeInt 32 0
|
||||||
|
%52 = OpTypePointer Generic %51
|
||||||
|
%53 = OpTypeFunction %51 %52 %51
|
||||||
|
%54 = OpTypePointer CrossWorkgroup %51
|
||||||
|
%55 = OpTypeFunction %51 %54 %51
|
||||||
|
%56 = OpTypeInt 64 0
|
||||||
|
%57 = OpTypeFunction %50 %56 %56
|
||||||
|
%58 = OpTypePointer Function %56
|
||||||
|
%59 = OpTypePointer Function %51
|
||||||
|
%27 = OpConstant %51 101
|
||||||
|
%28 = OpConstant %51 101
|
||||||
|
%29 = OpConstant %56 4
|
||||||
|
%31 = OpConstant %56 8
|
||||||
|
%40 = OpFunction %51 None %53
|
||||||
|
%42 = OpFunctionParameter %52
|
||||||
|
%43 = OpFunctionParameter %51
|
||||||
|
OpFunctionEnd
|
||||||
|
%44 = OpFunction %51 None %55
|
||||||
|
%46 = OpFunctionParameter %54
|
||||||
|
%47 = OpFunctionParameter %51
|
||||||
|
OpFunctionEnd
|
||||||
|
%1 = OpFunction %50 None %57
|
||||||
|
%9 = OpFunctionParameter %56
|
||||||
|
%10 = OpFunctionParameter %56
|
||||||
|
%39 = OpLabel
|
||||||
|
%2 = OpVariable %58 Function
|
||||||
|
%3 = OpVariable %58 Function
|
||||||
|
%4 = OpVariable %58 Function
|
||||||
|
%5 = OpVariable %58 Function
|
||||||
|
%6 = OpVariable %59 Function
|
||||||
|
%7 = OpVariable %59 Function
|
||||||
|
%8 = OpVariable %59 Function
|
||||||
|
OpStore %2 %9
|
||||||
|
OpStore %3 %10
|
||||||
|
%12 = OpLoad %56 %2
|
||||||
|
%11 = OpCopyObject %56 %12
|
||||||
|
OpStore %4 %11
|
||||||
|
%14 = OpLoad %56 %3
|
||||||
|
%13 = OpCopyObject %56 %14
|
||||||
|
OpStore %5 %13
|
||||||
|
%16 = OpLoad %56 %4
|
||||||
|
%33 = OpConvertUToPtr %52 %16
|
||||||
|
%15 = OpFunctionCall %51 %40 %33 %27
|
||||||
|
OpStore %6 %15
|
||||||
|
%18 = OpLoad %56 %4
|
||||||
|
%34 = OpConvertUToPtr %54 %18
|
||||||
|
%17 = OpFunctionCall %51 %44 %34 %28
|
||||||
|
OpStore %7 %17
|
||||||
|
%20 = OpLoad %56 %4
|
||||||
|
%35 = OpConvertUToPtr %52 %20
|
||||||
|
%19 = OpLoad %51 %35
|
||||||
|
OpStore %8 %19
|
||||||
|
%21 = OpLoad %56 %5
|
||||||
|
%22 = OpLoad %51 %6
|
||||||
|
%36 = OpConvertUToPtr %52 %21
|
||||||
|
OpStore %36 %22
|
||||||
|
%23 = OpLoad %56 %5
|
||||||
|
%24 = OpLoad %51 %7
|
||||||
|
%30 = OpIAdd %56 %23 %29
|
||||||
|
%37 = OpConvertUToPtr %52 %30
|
||||||
|
OpStore %37 %24
|
||||||
|
%25 = OpLoad %56 %5
|
||||||
|
%26 = OpLoad %51 %8
|
||||||
|
%32 = OpIAdd %56 %25 %31
|
||||||
|
%38 = OpConvertUToPtr %52 %32
|
||||||
|
OpStore %38 %26
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
|
@ -11,12 +11,12 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%24 = OpExtInstImport "OpenCL.std"
|
%24 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "constant_f32"
|
OpEntryPoint Kernel %1 "constant_f32"
|
||||||
OpDecorate %1 FunctionDenormModeINTEL 32 Preserve
|
; OpDecorate %1 FunctionDenormModeINTEL 32 Preserve
|
||||||
%25 = OpTypeVoid
|
%25 = OpTypeVoid
|
||||||
%26 = OpTypeInt 64 0
|
%26 = OpTypeInt 64 0
|
||||||
%27 = OpTypeFunction %25 %26 %26
|
%27 = OpTypeFunction %25 %26 %26
|
||||||
|
|
|
@ -11,8 +11,8 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%24 = OpExtInstImport "OpenCL.std"
|
%24 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "constant_negative"
|
OpEntryPoint Kernel %1 "constant_negative"
|
||||||
|
|
|
@ -11,12 +11,12 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%37 = OpExtInstImport "OpenCL.std"
|
%37 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "fma"
|
OpEntryPoint Kernel %1 "fma"
|
||||||
OpDecorate %1 FunctionDenormModeINTEL 32 Preserve
|
; OpDecorate %1 FunctionDenormModeINTEL 32 Preserve
|
||||||
%38 = OpTypeVoid
|
%38 = OpTypeVoid
|
||||||
%39 = OpTypeInt 64 0
|
%39 = OpTypeInt 64 0
|
||||||
%40 = OpTypeFunction %38 %39 %39
|
%40 = OpTypeFunction %38 %39 %39
|
||||||
|
|
|
@ -86,12 +86,20 @@ test_ptx!(rcp, [2f32], [0.5f32]);
|
||||||
// 0x3f000000 is 0.5
|
// 0x3f000000 is 0.5
|
||||||
// TODO: mul_ftz fails because IGC does not yet handle SPV_INTEL_float_controls2
|
// TODO: mul_ftz fails because IGC does not yet handle SPV_INTEL_float_controls2
|
||||||
// test_ptx!(mul_ftz, [0b1_00000000_10000000000000000000000u32, 0x3f000000u32], [0u32]);
|
// test_ptx!(mul_ftz, [0b1_00000000_10000000000000000000000u32, 0x3f000000u32], [0u32]);
|
||||||
test_ptx!(mul_non_ftz, [0b1_00000000_10000000000000000000000u32, 0x3f000000u32], [0b1_00000000_01000000000000000000000u32]);
|
test_ptx!(
|
||||||
|
mul_non_ftz,
|
||||||
|
[0b1_00000000_10000000000000000000000u32, 0x3f000000u32],
|
||||||
|
[0b1_00000000_01000000000000000000000u32]
|
||||||
|
);
|
||||||
test_ptx!(constant_f32, [10f32], [5f32]);
|
test_ptx!(constant_f32, [10f32], [5f32]);
|
||||||
test_ptx!(constant_negative, [-101i32], [101i32]);
|
test_ptx!(constant_negative, [-101i32], [101i32]);
|
||||||
test_ptx!(and, [6u32, 3u32], [2u32]);
|
test_ptx!(and, [6u32, 3u32], [2u32]);
|
||||||
test_ptx!(selp, [100u16, 200u16], [200u16]);
|
test_ptx!(selp, [100u16, 200u16], [200u16]);
|
||||||
test_ptx!(fma, [2f32, 3f32, 5f32], [11f32]);
|
test_ptx!(fma, [2f32, 3f32, 5f32], [11f32]);
|
||||||
|
test_ptx!(shared_variable, [513u64], [513u64]);
|
||||||
|
test_ptx!(atom_cas, [91u32, 91u32], [91u32, 100u32]);
|
||||||
|
test_ptx!(atom_inc, [100u32], [100u32, 101u32, 0u32]);
|
||||||
|
test_ptx!(atom_add, [2u32, 4u32], [2u32, 6u32]);
|
||||||
|
|
||||||
struct DisplayError<T: Debug> {
|
struct DisplayError<T: Debug> {
|
||||||
err: T,
|
err: T,
|
||||||
|
@ -124,7 +132,7 @@ fn test_ptx_assert<'a, T: From<u8> + ze::SafeRepr + Debug + Copy + PartialEq>(
|
||||||
let name = CString::new(name)?;
|
let name = CString::new(name)?;
|
||||||
let result = run_spirv(name.as_c_str(), notcuda_module, input, output)
|
let result = run_spirv(name.as_c_str(), notcuda_module, input, output)
|
||||||
.map_err(|err| DisplayError { err })?;
|
.map_err(|err| DisplayError { err })?;
|
||||||
assert_eq!(output, result.as_slice());
|
assert_eq!(result.as_slice(), output);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -145,8 +153,8 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
|
||||||
let use_shared_mem = module
|
let use_shared_mem = module
|
||||||
.kernel_info
|
.kernel_info
|
||||||
.get(name.to_str().unwrap())
|
.get(name.to_str().unwrap())
|
||||||
.unwrap()
|
.map(|info| info.uses_shared_mem)
|
||||||
.uses_shared_mem;
|
.unwrap_or(false);
|
||||||
let mut result = vec![0u8.into(); output.len()];
|
let mut result = vec![0u8.into(); output.len()];
|
||||||
{
|
{
|
||||||
let mut drivers = ze::Driver::get()?;
|
let mut drivers = ze::Driver::get()?;
|
||||||
|
@ -155,11 +163,20 @@ fn run_spirv<T: From<u8> + ze::SafeRepr + Copy + Debug>(
|
||||||
let mut devices = drv.devices()?;
|
let mut devices = drv.devices()?;
|
||||||
let dev = devices.drain(0..1).next().unwrap();
|
let dev = devices.drain(0..1).next().unwrap();
|
||||||
let queue = ze::CommandQueue::new(&mut ctx, &dev)?;
|
let queue = ze::CommandQueue::new(&mut ctx, &dev)?;
|
||||||
let (module, log) = ze::Module::new_spirv(&mut ctx, &dev, byte_il, None);
|
let (module, maybe_log) = match module.should_link_ptx_impl {
|
||||||
|
Some(ptx_impl) => ze::Module::build_link_spirv(&mut ctx, &dev, &[ptx_impl, byte_il]),
|
||||||
|
None => {
|
||||||
|
let (module, log) = ze::Module::build_spirv(&mut ctx, &dev, byte_il, None);
|
||||||
|
(module, Some(log))
|
||||||
|
}
|
||||||
|
};
|
||||||
let module = match module {
|
let module = match module {
|
||||||
Ok(m) => m,
|
Ok(m) => m,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let raw_err_string = log.get_cstring()?;
|
let raw_err_string = maybe_log
|
||||||
|
.map(|log| log.get_cstring())
|
||||||
|
.transpose()?
|
||||||
|
.unwrap_or(CString::default());
|
||||||
let err_string = raw_err_string.to_string_lossy();
|
let err_string = raw_err_string.to_string_lossy();
|
||||||
panic!("{:?}\n{}", err, err_string);
|
panic!("{:?}\n{}", err, err_string);
|
||||||
}
|
}
|
||||||
|
@ -215,7 +232,11 @@ fn test_spvtxt_assert<'a>(
|
||||||
ptr::null_mut(),
|
ptr::null_mut(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
assert!(result == spv_result_t::SPV_SUCCESS);
|
if result != spv_result_t::SPV_SUCCESS {
|
||||||
|
panic!("{:?}\n{}", result, unsafe {
|
||||||
|
str::from_utf8_unchecked(spirv_txt)
|
||||||
|
});
|
||||||
|
}
|
||||||
let mut parsed_spirv = Vec::<u32>::new();
|
let mut parsed_spirv = Vec::<u32>::new();
|
||||||
let result = unsafe {
|
let result = unsafe {
|
||||||
spirv_tools::spvBinaryParse(
|
spirv_tools::spvBinaryParse(
|
||||||
|
|
|
@ -11,8 +11,8 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%30 = OpExtInstImport "OpenCL.std"
|
%30 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "mul_ftz"
|
OpEntryPoint Kernel %1 "mul_ftz"
|
||||||
|
|
|
@ -11,8 +11,8 @@ OpCapability Int16
|
||||||
OpCapability Int64
|
OpCapability Int64
|
||||||
OpCapability Float16
|
OpCapability Float16
|
||||||
OpCapability Float64
|
OpCapability Float64
|
||||||
OpCapability FunctionFloatControlINTEL
|
; OpCapability FunctionFloatControlINTEL
|
||||||
OpExtension "SPV_INTEL_float_controls2"
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
%31 = OpExtInstImport "OpenCL.std"
|
%31 = OpExtInstImport "OpenCL.std"
|
||||||
OpMemoryModel Physical64 OpenCL
|
OpMemoryModel Physical64 OpenCL
|
||||||
OpEntryPoint Kernel %1 "selp"
|
OpEntryPoint Kernel %1 "selp"
|
||||||
|
|
26
ptx/src/test/spirv_run/shared_variable.ptx
Normal file
26
ptx/src/test/spirv_run/shared_variable.ptx
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
.version 6.5
|
||||||
|
.target sm_30
|
||||||
|
.address_size 64
|
||||||
|
|
||||||
|
|
||||||
|
.visible .entry shared_variable(
|
||||||
|
.param .u64 input,
|
||||||
|
.param .u64 output
|
||||||
|
)
|
||||||
|
{
|
||||||
|
.shared .align 4 .b8 shared_mem1[128];
|
||||||
|
|
||||||
|
.reg .u64 in_addr;
|
||||||
|
.reg .u64 out_addr;
|
||||||
|
.reg .u64 temp1;
|
||||||
|
.reg .u64 temp2;
|
||||||
|
|
||||||
|
ld.param.u64 in_addr, [input];
|
||||||
|
ld.param.u64 out_addr, [output];
|
||||||
|
|
||||||
|
ld.global.u64 temp1, [in_addr];
|
||||||
|
st.shared.u64 [shared_mem1], temp1;
|
||||||
|
ld.shared.u64 temp2, [shared_mem1];
|
||||||
|
st.global.u64 [out_addr], temp2;
|
||||||
|
ret;
|
||||||
|
}
|
65
ptx/src/test/spirv_run/shared_variable.spvtxt
Normal file
65
ptx/src/test/spirv_run/shared_variable.spvtxt
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
; SPIR-V
|
||||||
|
; Version: 1.3
|
||||||
|
; Generator: rspirv
|
||||||
|
; Bound: 39
|
||||||
|
OpCapability GenericPointer
|
||||||
|
OpCapability Linkage
|
||||||
|
OpCapability Addresses
|
||||||
|
OpCapability Kernel
|
||||||
|
OpCapability Int8
|
||||||
|
OpCapability Int16
|
||||||
|
OpCapability Int64
|
||||||
|
OpCapability Float16
|
||||||
|
OpCapability Float64
|
||||||
|
; OpCapability FunctionFloatControlINTEL
|
||||||
|
; OpExtension "SPV_INTEL_float_controls2"
|
||||||
|
%27 = OpExtInstImport "OpenCL.std"
|
||||||
|
OpMemoryModel Physical64 OpenCL
|
||||||
|
OpEntryPoint Kernel %1 "shared_variable" %4
|
||||||
|
OpDecorate %4 Alignment 4
|
||||||
|
%28 = OpTypeVoid
|
||||||
|
%29 = OpTypeInt 32 0
|
||||||
|
%30 = OpTypeInt 8 0
|
||||||
|
%31 = OpConstant %29 128
|
||||||
|
%32 = OpTypeArray %30 %31
|
||||||
|
%33 = OpTypePointer Workgroup %32
|
||||||
|
%4 = OpVariable %33 Workgroup
|
||||||
|
%34 = OpTypeInt 64 0
|
||||||
|
%35 = OpTypeFunction %28 %34 %34
|
||||||
|
%36 = OpTypePointer Function %34
|
||||||
|
%37 = OpTypePointer CrossWorkgroup %34
|
||||||
|
%38 = OpTypePointer Workgroup %34
|
||||||
|
%1 = OpFunction %28 None %35
|
||||||
|
%9 = OpFunctionParameter %34
|
||||||
|
%10 = OpFunctionParameter %34
|
||||||
|
%25 = OpLabel
|
||||||
|
%2 = OpVariable %36 Function
|
||||||
|
%3 = OpVariable %36 Function
|
||||||
|
%5 = OpVariable %36 Function
|
||||||
|
%6 = OpVariable %36 Function
|
||||||
|
%7 = OpVariable %36 Function
|
||||||
|
%8 = OpVariable %36 Function
|
||||||
|
OpStore %2 %9
|
||||||
|
OpStore %3 %10
|
||||||
|
%12 = OpLoad %34 %2
|
||||||
|
%11 = OpCopyObject %34 %12
|
||||||
|
OpStore %5 %11
|
||||||
|
%14 = OpLoad %34 %3
|
||||||
|
%13 = OpCopyObject %34 %14
|
||||||
|
OpStore %6 %13
|
||||||
|
%16 = OpLoad %34 %5
|
||||||
|
%21 = OpConvertUToPtr %37 %16
|
||||||
|
%15 = OpLoad %34 %21
|
||||||
|
OpStore %7 %15
|
||||||
|
%17 = OpLoad %34 %7
|
||||||
|
%22 = OpBitcast %38 %4
|
||||||
|
OpStore %22 %17
|
||||||
|
%23 = OpBitcast %38 %4
|
||||||
|
%18 = OpLoad %34 %23
|
||||||
|
OpStore %8 %18
|
||||||
|
%19 = OpLoad %34 %6
|
||||||
|
%20 = OpLoad %34 %8
|
||||||
|
%24 = OpConvertUToPtr %37 %19
|
||||||
|
OpStore %24 %20
|
||||||
|
OpReturn
|
||||||
|
OpFunctionEnd
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue