mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-28 21:47:57 +03:00
Update tests
This commit is contained in:
parent
30cbf6dd54
commit
36407dcc3a
93 changed files with 2772 additions and 2264 deletions
|
@ -12,21 +12,25 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
|
||||||
%"35" = alloca i64, align 8, addrspace(5)
|
%"36" = alloca i64, align 8, addrspace(5)
|
||||||
%"36" = alloca i32, align 4, addrspace(5)
|
%"37" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"37" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"44"
|
||||||
store i64 %"37", ptr addrspace(5) %"35", align 4
|
|
||||||
%"38" = call i32 @__zluda_ptx_impl_activemask()
|
"44": ; preds = %1
|
||||||
store i32 %"38", ptr addrspace(5) %"36", align 4
|
%"38" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
%"39" = load i64, ptr addrspace(5) %"35", align 4
|
store i64 %"38", ptr addrspace(5) %"36", align 4
|
||||||
%"40" = load i32, ptr addrspace(5) %"36", align 4
|
%"39" = call i32 @__zluda_ptx_impl_activemask()
|
||||||
%"41" = inttoptr i64 %"39" to ptr
|
store i32 %"39", ptr addrspace(5) %"37", align 4
|
||||||
store i32 %"40", ptr %"41", align 4
|
%"40" = load i64, ptr addrspace(5) %"36", align 4
|
||||||
|
%"41" = load i32, ptr addrspace(5) %"37", align 4
|
||||||
|
%"42" = inttoptr i64 %"40" to ptr
|
||||||
|
store i32 %"41", ptr %"42", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"46" = add i64 %"47", 1
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
%"45" = load i64, ptr %"51", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"47" = add i64 %"48", 1
|
||||||
store i64 %"49", ptr %"51", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
|
store i64 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
65
ptx/src/test/ll/add_ftz.ll
Normal file
65
ptx/src/test/ll/add_ftz.ll
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
|
define amdgpu_kernel void @add_ftz(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||||
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"46" = alloca float, align 4, addrspace(5)
|
||||||
|
%"47" = alloca float, align 4, addrspace(5)
|
||||||
|
%"48" = alloca float, align 4, addrspace(5)
|
||||||
|
%"49" = alloca float, align 4, addrspace(5)
|
||||||
|
br label %1
|
||||||
|
|
||||||
|
1: ; preds = %0
|
||||||
|
br label %"70"
|
||||||
|
|
||||||
|
"70": ; preds = %1
|
||||||
|
%"50" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
|
store i64 %"50", ptr addrspace(5) %"44", align 4
|
||||||
|
%"51" = load i64, ptr addrspace(4) %"43", align 4
|
||||||
|
store i64 %"51", ptr addrspace(5) %"45", align 4
|
||||||
|
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
|
%"66" = inttoptr i64 %"53" to ptr
|
||||||
|
%"52" = load float, ptr %"66", align 4
|
||||||
|
store float %"52", ptr addrspace(5) %"46", align 4
|
||||||
|
%"54" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
|
%"67" = inttoptr i64 %"54" to ptr
|
||||||
|
%"33" = getelementptr inbounds i8, ptr %"67", i64 4
|
||||||
|
%"55" = load float, ptr %"33", align 4
|
||||||
|
store float %"55", ptr addrspace(5) %"47", align 4
|
||||||
|
%"57" = load float, ptr addrspace(5) %"46", align 4
|
||||||
|
%"58" = load float, ptr addrspace(5) %"47", align 4
|
||||||
|
%"56" = fadd float %"57", %"58"
|
||||||
|
store float %"56", ptr addrspace(5) %"48", align 4
|
||||||
|
%"60" = load float, ptr addrspace(5) %"46", align 4
|
||||||
|
%"61" = load float, ptr addrspace(5) %"47", align 4
|
||||||
|
call void @llvm.amdgcn.s.setreg(i32 2305, i32 3)
|
||||||
|
%"59" = fadd float %"60", %"61"
|
||||||
|
store float %"59", ptr addrspace(5) %"49", align 4
|
||||||
|
%"62" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
|
%"63" = load float, ptr addrspace(5) %"48", align 4
|
||||||
|
%"68" = inttoptr i64 %"62" to ptr
|
||||||
|
store float %"63", ptr %"68", align 4
|
||||||
|
%"64" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
|
%"69" = inttoptr i64 %"64" to ptr
|
||||||
|
%"35" = getelementptr inbounds i8, ptr %"69", i64 4
|
||||||
|
%"65" = load float, ptr addrspace(5) %"49", align 4
|
||||||
|
store float %"65", ptr %"35", align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||||
|
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind willreturn }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
|
|
||||||
%"44" = load i64, ptr addrspace(1) %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"46" = add i64 %"47", 1
|
%"51" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
%"45" = load i64, ptr addrspace(1) %"51", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr addrspace(1)
|
%"47" = add i64 %"48", 1
|
||||||
store i64 %"49", ptr addrspace(1) %"51", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||||
|
store i64 %"50", ptr addrspace(1) %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"46" = add i64 %"47", 1
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
%"45" = load i64, ptr %"51", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"47" = add i64 %"48", 1
|
||||||
store i64 %"49", ptr %"51", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
|
store i64 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"61"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"61": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"56" = and i32 %"50", %"51"
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"56", ptr addrspace(5) %"41", align 4
|
%"57" = and i32 %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"57", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"53", ptr %"59", align 4
|
%"60" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"60", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,44 +12,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i32, align 4, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca i32, align 4, addrspace(5)
|
%"45" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"46" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
br label %"68"
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
|
||||||
|
"68": ; preds = %1
|
||||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
%"60" = inttoptr i64 %"49" to ptr
|
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||||
%"48" = load i32, ptr %"60", align 4
|
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"44", align 4
|
|
||||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
|
||||||
%"61" = inttoptr i64 %"50" to ptr
|
%"61" = inttoptr i64 %"50" to ptr
|
||||||
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
|
%"49" = load i32, ptr %"61", align 4
|
||||||
%"51" = load i32, ptr %"31", align 4
|
store i32 %"49", ptr addrspace(5) %"45", align 4
|
||||||
store i32 %"51", ptr addrspace(5) %"45", align 4
|
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
%"62" = inttoptr i64 %"51" to ptr
|
||||||
store i32 %"52", ptr addrspace(3) @shared_mem, align 4
|
%"32" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||||
%"54" = load i32, ptr addrspace(5) %"45", align 4
|
%"52" = load i32, ptr %"32", align 4
|
||||||
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"54" syncscope("agent-one-as") monotonic, align 4
|
store i32 %"52", ptr addrspace(5) %"46", align 4
|
||||||
store i32 %2, ptr addrspace(5) %"44", align 4
|
%"53" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
%"55" = load i32, ptr addrspace(3) @shared_mem, align 4
|
store i32 %"53", ptr addrspace(3) @shared_mem, align 4
|
||||||
store i32 %"55", ptr addrspace(5) %"45", align 4
|
%"55" = load i32, ptr addrspace(5) %"46", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"55" syncscope("agent-one-as") monotonic, align 4
|
||||||
%"57" = load i32, ptr addrspace(5) %"44", align 4
|
store i32 %2, ptr addrspace(5) %"45", align 4
|
||||||
%"65" = inttoptr i64 %"56" to ptr
|
%"56" = load i32, ptr addrspace(3) @shared_mem, align 4
|
||||||
store i32 %"57", ptr %"65", align 4
|
store i32 %"56", ptr addrspace(5) %"46", align 4
|
||||||
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"66" = inttoptr i64 %"58" to ptr
|
%"58" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"66", i64 4
|
%"66" = inttoptr i64 %"57" to ptr
|
||||||
%"59" = load i32, ptr addrspace(5) %"45", align 4
|
store i32 %"58", ptr %"66", align 4
|
||||||
store i32 %"59", ptr %"33", align 4
|
%"59" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
|
%"67" = inttoptr i64 %"59" to ptr
|
||||||
|
%"34" = getelementptr inbounds i8, ptr %"67", i64 4
|
||||||
|
%"60" = load i32, ptr addrspace(5) %"46", align 4
|
||||||
|
store i32 %"60", ptr %"34", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,44 +12,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca float, align 4, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca float, align 4, addrspace(5)
|
%"45" = alloca float, align 4, addrspace(5)
|
||||||
|
%"46" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
br label %"68"
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
|
||||||
|
"68": ; preds = %1
|
||||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
%"60" = inttoptr i64 %"49" to ptr
|
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||||
%"48" = load float, ptr %"60", align 4
|
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store float %"48", ptr addrspace(5) %"44", align 4
|
|
||||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
|
||||||
%"61" = inttoptr i64 %"50" to ptr
|
%"61" = inttoptr i64 %"50" to ptr
|
||||||
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
|
%"49" = load float, ptr %"61", align 4
|
||||||
%"51" = load float, ptr %"31", align 4
|
store float %"49", ptr addrspace(5) %"45", align 4
|
||||||
store float %"51", ptr addrspace(5) %"45", align 4
|
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"52" = load float, ptr addrspace(5) %"44", align 4
|
%"62" = inttoptr i64 %"51" to ptr
|
||||||
store float %"52", ptr addrspace(3) @shared_mem, align 4
|
%"32" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||||
%"54" = load float, ptr addrspace(5) %"45", align 4
|
%"52" = load float, ptr %"32", align 4
|
||||||
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"54" syncscope("agent-one-as") monotonic, align 4
|
store float %"52", ptr addrspace(5) %"46", align 4
|
||||||
store float %2, ptr addrspace(5) %"44", align 4
|
%"53" = load float, ptr addrspace(5) %"45", align 4
|
||||||
%"55" = load float, ptr addrspace(3) @shared_mem, align 4
|
store float %"53", ptr addrspace(3) @shared_mem, align 4
|
||||||
store float %"55", ptr addrspace(5) %"45", align 4
|
%"55" = load float, ptr addrspace(5) %"46", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"55" syncscope("agent-one-as") monotonic, align 4
|
||||||
%"57" = load float, ptr addrspace(5) %"44", align 4
|
store float %2, ptr addrspace(5) %"45", align 4
|
||||||
%"65" = inttoptr i64 %"56" to ptr
|
%"56" = load float, ptr addrspace(3) @shared_mem, align 4
|
||||||
store float %"57", ptr %"65", align 4
|
store float %"56", ptr addrspace(5) %"46", align 4
|
||||||
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"66" = inttoptr i64 %"58" to ptr
|
%"58" = load float, ptr addrspace(5) %"45", align 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"66", i64 4
|
%"66" = inttoptr i64 %"57" to ptr
|
||||||
%"59" = load float, ptr addrspace(5) %"45", align 4
|
store float %"58", ptr %"66", align 4
|
||||||
store float %"59", ptr %"33", align 4
|
%"59" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
|
%"67" = inttoptr i64 %"59" to ptr
|
||||||
|
%"34" = getelementptr inbounds i8, ptr %"67", i64 4
|
||||||
|
%"60" = load float, ptr addrspace(5) %"46", align 4
|
||||||
|
store float %"60", ptr %"34", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,44 +10,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
|
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
%"46" = alloca i32, align 4, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca i32, align 4, addrspace(5)
|
%"47" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"48" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
br label %"69"
|
||||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
|
||||||
|
"69": ; preds = %1
|
||||||
%"49" = load i64, ptr addrspace(4) %"43", align 4
|
%"49" = load i64, ptr addrspace(4) %"43", align 4
|
||||||
store i64 %"49", ptr addrspace(5) %"45", align 4
|
store i64 %"49", ptr addrspace(5) %"45", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
%"50" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
%"61" = inttoptr i64 %"51" to ptr
|
store i64 %"50", ptr addrspace(5) %"46", align 4
|
||||||
%"50" = load i32, ptr %"61", align 4
|
%"52" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store i32 %"50", ptr addrspace(5) %"46", align 4
|
|
||||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
|
||||||
%"62" = inttoptr i64 %"52" to ptr
|
%"62" = inttoptr i64 %"52" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"51" = load i32, ptr %"62", align 4
|
||||||
%"54" = load i32, ptr addrspace(5) %"46", align 4
|
store i32 %"51", ptr addrspace(5) %"47", align 4
|
||||||
%2 = cmpxchg ptr %"30", i32 %"54", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"63" = extractvalue { i32, i1 } %2, 0
|
%"63" = inttoptr i64 %"53" to ptr
|
||||||
store i32 %"63", ptr addrspace(5) %"46", align 4
|
%"31" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
%"55" = load i32, ptr addrspace(5) %"47", align 4
|
||||||
%"65" = inttoptr i64 %"55" to ptr
|
%2 = cmpxchg ptr %"31", i32 %"55", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"65", i64 4
|
%"64" = extractvalue { i32, i1 } %2, 0
|
||||||
%"56" = load i32, ptr %"33", align 4
|
store i32 %"64", ptr addrspace(5) %"47", align 4
|
||||||
store i32 %"56", ptr addrspace(5) %"47", align 4
|
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"45", align 4
|
%"66" = inttoptr i64 %"56" to ptr
|
||||||
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
%"34" = getelementptr inbounds i8, ptr %"66", i64 4
|
||||||
%"66" = inttoptr i64 %"57" to ptr
|
%"57" = load i32, ptr %"34", align 4
|
||||||
store i32 %"58", ptr %"66", align 4
|
store i32 %"57", ptr addrspace(5) %"48", align 4
|
||||||
%"59" = load i64, ptr addrspace(5) %"45", align 4
|
%"58" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"67" = inttoptr i64 %"59" to ptr
|
%"59" = load i32, ptr addrspace(5) %"47", align 4
|
||||||
%"35" = getelementptr inbounds i8, ptr %"67", i64 4
|
%"67" = inttoptr i64 %"58" to ptr
|
||||||
%"60" = load i32, ptr addrspace(5) %"47", align 4
|
store i32 %"59", ptr %"67", align 4
|
||||||
store i32 %"60", ptr %"35", align 4
|
%"60" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
|
%"68" = inttoptr i64 %"60" to ptr
|
||||||
|
%"36" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||||
|
%"61" = load i32, ptr addrspace(5) %"48", align 4
|
||||||
|
store i32 %"61", ptr %"36", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,46 +10,50 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
|
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
%"46" = alloca i32, align 4, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca i32, align 4, addrspace(5)
|
%"47" = alloca i32, align 4, addrspace(5)
|
||||||
%"48" = alloca i32, align 4, addrspace(5)
|
%"48" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"49" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
br label %"70"
|
||||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
|
||||||
|
"70": ; preds = %1
|
||||||
%"50" = load i64, ptr addrspace(4) %"43", align 4
|
%"50" = load i64, ptr addrspace(4) %"43", align 4
|
||||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
%"51" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
%"63" = inttoptr i64 %"52" to ptr
|
store i64 %"51", ptr addrspace(5) %"46", align 4
|
||||||
%2 = atomicrmw uinc_wrap ptr %"63", i32 101 syncscope("agent-one-as") monotonic, align 4
|
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store i32 %2, ptr addrspace(5) %"46", align 4
|
%"64" = inttoptr i64 %"53" to ptr
|
||||||
%"54" = load i64, ptr addrspace(5) %"44", align 4
|
%2 = atomicrmw uinc_wrap ptr %"64", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||||
%"64" = inttoptr i64 %"54" to ptr addrspace(1)
|
store i32 %2, ptr addrspace(5) %"47", align 4
|
||||||
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"64", i32 101 syncscope("agent-one-as") monotonic, align 4
|
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store i32 %3, ptr addrspace(5) %"47", align 4
|
%"65" = inttoptr i64 %"55" to ptr addrspace(1)
|
||||||
%"56" = load i64, ptr addrspace(5) %"44", align 4
|
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"65", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||||
%"65" = inttoptr i64 %"56" to ptr
|
store i32 %3, ptr addrspace(5) %"48", align 4
|
||||||
%"55" = load i32, ptr %"65", align 4
|
|
||||||
store i32 %"55", ptr addrspace(5) %"48", align 4
|
|
||||||
%"57" = load i64, ptr addrspace(5) %"45", align 4
|
%"57" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
|
||||||
%"66" = inttoptr i64 %"57" to ptr
|
%"66" = inttoptr i64 %"57" to ptr
|
||||||
store i32 %"58", ptr %"66", align 4
|
%"56" = load i32, ptr %"66", align 4
|
||||||
%"59" = load i64, ptr addrspace(5) %"45", align 4
|
store i32 %"56", ptr addrspace(5) %"49", align 4
|
||||||
%"67" = inttoptr i64 %"59" to ptr
|
%"58" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"67", i64 4
|
%"59" = load i32, ptr addrspace(5) %"47", align 4
|
||||||
%"60" = load i32, ptr addrspace(5) %"47", align 4
|
%"67" = inttoptr i64 %"58" to ptr
|
||||||
store i32 %"60", ptr %"33", align 4
|
store i32 %"59", ptr %"67", align 4
|
||||||
%"61" = load i64, ptr addrspace(5) %"45", align 4
|
%"60" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"68" = inttoptr i64 %"61" to ptr
|
%"68" = inttoptr i64 %"60" to ptr
|
||||||
%"35" = getelementptr inbounds i8, ptr %"68", i64 8
|
%"34" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||||
%"62" = load i32, ptr addrspace(5) %"48", align 4
|
%"61" = load i32, ptr addrspace(5) %"48", align 4
|
||||||
store i32 %"62", ptr %"35", align 4
|
store i32 %"61", ptr %"34", align 4
|
||||||
|
%"62" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
|
%"69" = inttoptr i64 %"62" to ptr
|
||||||
|
%"36" = getelementptr inbounds i8, ptr %"69", i64 8
|
||||||
|
%"63" = load i32, ptr addrspace(5) %"49", align 4
|
||||||
|
store i32 %"63", ptr %"36", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca double, align 8, addrspace(5)
|
%"38" = alloca double, align 8, addrspace(5)
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load double, ptr addrspace(4) %"35", align 8
|
br label %"54"
|
||||||
store double %"41", ptr addrspace(5) %"37", align 8
|
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
"54": ; preds = %1
|
||||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
%"42" = load double, ptr addrspace(4) %"36", align 8
|
||||||
%"44" = load double, ptr addrspace(5) %"37", align 8
|
store double %"42", ptr addrspace(5) %"38", align 8
|
||||||
%"50" = bitcast double %"44" to i64
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"50", ptr addrspace(5) %"38", align 4
|
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = load double, ptr addrspace(5) %"38", align 8
|
||||||
%"51" = inttoptr i64 %"46" to ptr
|
%"51" = bitcast double %"45" to i64
|
||||||
%"45" = load i64, ptr %"51", align 4
|
store i64 %"51", ptr addrspace(5) %"39", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
|
||||||
%"52" = inttoptr i64 %"47" to ptr
|
%"52" = inttoptr i64 %"47" to ptr
|
||||||
store i64 %"48", ptr %"52", align 4
|
%"46" = load i64, ptr %"52", align 4
|
||||||
|
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"53" = inttoptr i64 %"48" to ptr
|
||||||
|
store i64 %"49", ptr %"53", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,43 +12,47 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i32, align 4, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca i32, align 4, addrspace(5)
|
%"45" = alloca i32, align 4, addrspace(5)
|
||||||
%"46" = alloca i32, align 4, addrspace(5)
|
%"46" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"47" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
br label %"66"
|
||||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
|
||||||
|
"66": ; preds = %1
|
||||||
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
%"61" = inttoptr i64 %"50" to ptr
|
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||||
%"49" = load i32, ptr %"61", align 4
|
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"44", align 4
|
|
||||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
|
||||||
%"62" = inttoptr i64 %"51" to ptr
|
%"62" = inttoptr i64 %"51" to ptr
|
||||||
%"31" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"50" = load i32, ptr %"62", align 4
|
||||||
%"52" = load i32, ptr %"31", align 4
|
store i32 %"50", ptr addrspace(5) %"45", align 4
|
||||||
store i32 %"52", ptr addrspace(5) %"45", align 4
|
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
%"63" = inttoptr i64 %"52" to ptr
|
||||||
%"63" = inttoptr i64 %"53" to ptr
|
%"32" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"63", i64 8
|
%"53" = load i32, ptr %"32", align 4
|
||||||
%"54" = load i32, ptr %"33", align 4
|
store i32 %"53", ptr addrspace(5) %"46", align 4
|
||||||
store i32 %"54", ptr addrspace(5) %"46", align 4
|
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"56" = load i32, ptr addrspace(5) %"44", align 4
|
%"64" = inttoptr i64 %"54" to ptr
|
||||||
|
%"34" = getelementptr inbounds i8, ptr %"64", i64 8
|
||||||
|
%"55" = load i32, ptr %"34", align 4
|
||||||
|
store i32 %"55", ptr addrspace(5) %"47", align 4
|
||||||
%"57" = load i32, ptr addrspace(5) %"45", align 4
|
%"57" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
||||||
%"55" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"56", i32 %"57", i32 %"58")
|
%"59" = load i32, ptr addrspace(5) %"47", align 4
|
||||||
store i32 %"55", ptr addrspace(5) %"44", align 4
|
%"56" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"57", i32 %"58", i32 %"59")
|
||||||
%"59" = load i64, ptr addrspace(5) %"43", align 4
|
store i32 %"56", ptr addrspace(5) %"45", align 4
|
||||||
%"60" = load i32, ptr addrspace(5) %"44", align 4
|
%"60" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"64" = inttoptr i64 %"59" to ptr
|
%"61" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
store i32 %"60", ptr %"64", align 4
|
%"65" = inttoptr i64 %"60" to ptr
|
||||||
|
store i32 %"61", ptr %"65", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,50 +12,54 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 {
|
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca i32, align 4, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca i32, align 4, addrspace(5)
|
%"48" = alloca i32, align 4, addrspace(5)
|
||||||
%"49" = alloca i32, align 4, addrspace(5)
|
%"49" = alloca i32, align 4, addrspace(5)
|
||||||
%"50" = alloca i32, align 4, addrspace(5)
|
%"50" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"51" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"51" = load i64, ptr addrspace(4) %"43", align 4
|
br label %"77"
|
||||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
|
||||||
|
"77": ; preds = %1
|
||||||
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
store i64 %"52", ptr addrspace(5) %"46", align 4
|
store i64 %"52", ptr addrspace(5) %"46", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||||
%"68" = inttoptr i64 %"54" to ptr
|
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||||
%"53" = load i32, ptr %"68", align 4
|
%"55" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store i32 %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
|
||||||
%"69" = inttoptr i64 %"55" to ptr
|
%"69" = inttoptr i64 %"55" to ptr
|
||||||
%"32" = getelementptr inbounds i8, ptr %"69", i64 4
|
%"54" = load i32, ptr %"69", align 4
|
||||||
%"56" = load i32, ptr %"32", align 4
|
store i32 %"54", ptr addrspace(5) %"48", align 4
|
||||||
store i32 %"56", ptr addrspace(5) %"48", align 4
|
%"56" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"45", align 4
|
%"70" = inttoptr i64 %"56" to ptr
|
||||||
%"70" = inttoptr i64 %"57" to ptr
|
%"33" = getelementptr inbounds i8, ptr %"70", i64 4
|
||||||
%"34" = getelementptr inbounds i8, ptr %"70", i64 8
|
%"57" = load i32, ptr %"33", align 4
|
||||||
%"58" = load i32, ptr %"34", align 4
|
store i32 %"57", ptr addrspace(5) %"49", align 4
|
||||||
store i32 %"58", ptr addrspace(5) %"49", align 4
|
%"58" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"59" = load i64, ptr addrspace(5) %"45", align 4
|
%"71" = inttoptr i64 %"58" to ptr
|
||||||
%"71" = inttoptr i64 %"59" to ptr
|
%"35" = getelementptr inbounds i8, ptr %"71", i64 8
|
||||||
%"36" = getelementptr inbounds i8, ptr %"71", i64 12
|
%"59" = load i32, ptr %"35", align 4
|
||||||
%"60" = load i32, ptr %"36", align 4
|
store i32 %"59", ptr addrspace(5) %"50", align 4
|
||||||
store i32 %"60", ptr addrspace(5) %"50", align 4
|
%"60" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"62" = load i32, ptr addrspace(5) %"47", align 4
|
%"72" = inttoptr i64 %"60" to ptr
|
||||||
|
%"37" = getelementptr inbounds i8, ptr %"72", i64 12
|
||||||
|
%"61" = load i32, ptr %"37", align 4
|
||||||
|
store i32 %"61", ptr addrspace(5) %"51", align 4
|
||||||
%"63" = load i32, ptr addrspace(5) %"48", align 4
|
%"63" = load i32, ptr addrspace(5) %"48", align 4
|
||||||
%"64" = load i32, ptr addrspace(5) %"49", align 4
|
%"64" = load i32, ptr addrspace(5) %"49", align 4
|
||||||
%"65" = load i32, ptr addrspace(5) %"50", align 4
|
%"65" = load i32, ptr addrspace(5) %"50", align 4
|
||||||
%"72" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"62", i32 %"63", i32 %"64", i32 %"65")
|
%"66" = load i32, ptr addrspace(5) %"51", align 4
|
||||||
store i32 %"72", ptr addrspace(5) %"47", align 4
|
%"73" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"63", i32 %"64", i32 %"65", i32 %"66")
|
||||||
%"66" = load i64, ptr addrspace(5) %"46", align 4
|
store i32 %"73", ptr addrspace(5) %"48", align 4
|
||||||
%"67" = load i32, ptr addrspace(5) %"47", align 4
|
%"67" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
%"75" = inttoptr i64 %"66" to ptr
|
%"68" = load i32, ptr addrspace(5) %"48", align 4
|
||||||
store i32 %"67", ptr %"75", align 4
|
%"76" = inttoptr i64 %"67" to ptr
|
||||||
|
store i32 %"68", ptr %"76", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"51" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"58"
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
|
||||||
%"46" = load i64, ptr %"55", align 4
|
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"48" = add i64 %"49", 1
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
%"47" = load i64, ptr %"56", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"50", align 4
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = add i64 %"52", 1
|
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"51", ptr addrspace(5) %"50", align 4
|
%"49" = add i64 %"50", 1
|
||||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
%"53" = load i64, ptr addrspace(5) %"51", align 4
|
||||||
%"56" = inttoptr i64 %"53" to ptr
|
%"52" = add i64 %"53", 1
|
||||||
store i64 %"54", ptr %"56", align 4
|
store i64 %"52", ptr addrspace(5) %"51", align 4
|
||||||
|
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
|
%"57" = inttoptr i64 %"54" to ptr
|
||||||
|
store i64 %"55", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,42 +10,46 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
br label %"59"
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
|
||||||
|
"59": ; preds = %1
|
||||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
%"56" = inttoptr i64 %"49" to ptr
|
|
||||||
%"48" = load i64, ptr %"56", align 4
|
|
||||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||||
br label %"9"
|
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"57" = inttoptr i64 %"50" to ptr
|
||||||
|
%"49" = load i64, ptr %"57", align 4
|
||||||
|
store i64 %"49", ptr addrspace(5) %"45", align 4
|
||||||
|
br label %"10"
|
||||||
|
|
||||||
"9": ; preds = %1
|
"10": ; preds = %"59"
|
||||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
%"52" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"50" = add i64 %"51", 1
|
%"51" = add i64 %"52", 1
|
||||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
store i64 %"51", ptr addrspace(5) %"46", align 4
|
||||||
br label %"11"
|
br label %"12"
|
||||||
|
|
||||||
"10": ; No predecessors!
|
"11": ; No predecessors!
|
||||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"52" = add i64 %"53", 2
|
%"53" = add i64 %"54", 2
|
||||||
store i64 %"52", ptr addrspace(5) %"45", align 4
|
store i64 %"53", ptr addrspace(5) %"46", align 4
|
||||||
br label %"11"
|
br label %"12"
|
||||||
|
|
||||||
"11": ; preds = %"10", %"9"
|
"12": ; preds = %"11", %"10"
|
||||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
%"56" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"57" = inttoptr i64 %"54" to ptr
|
%"58" = inttoptr i64 %"55" to ptr
|
||||||
store i64 %"55", ptr %"57", align 4
|
store i64 %"56", ptr %"58", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load i32, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store i32 %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
%"42" = load i32, ptr %"48", align 4
|
||||||
%"43" = call i32 @llvm.bitreverse.i32(i32 %"44")
|
store i32 %"42", ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call i32 @llvm.bitreverse.i32(i32 %"45")
|
||||||
%"46" = load i32, ptr addrspace(5) %"38", align 4
|
store i32 %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"46", ptr %"48", align 4
|
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store i32 %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.bitreverse.i32(i32) #1
|
declare i32 @llvm.bitreverse.i32(i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,57 +10,64 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define i64 @__zluda_ptx_impl_incr(i64 %"42") #0 {
|
define i64 @incr(i64 %"43") #0 {
|
||||||
%"65" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"66" = alloca i64, align 8, addrspace(5)
|
%"66" = alloca i64, align 8, addrspace(5)
|
||||||
%"67" = alloca i64, align 8, addrspace(5)
|
%"67" = alloca i64, align 8, addrspace(5)
|
||||||
%"68" = alloca i64, align 8, addrspace(5)
|
%"68" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"69" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
store i64 %"42", ptr addrspace(5) %"67", align 4
|
br label %"80"
|
||||||
%"69" = load i64, ptr addrspace(5) %"67", align 4
|
|
||||||
store i64 %"69", ptr addrspace(5) %"68", align 4
|
"80": ; preds = %1
|
||||||
%"71" = load i64, ptr addrspace(5) %"68", align 4
|
store i64 %"43", ptr addrspace(5) %"68", align 4
|
||||||
%"70" = add i64 %"71", 1
|
%"70" = load i64, ptr addrspace(5) %"68", align 4
|
||||||
store i64 %"70", ptr addrspace(5) %"68", align 4
|
store i64 %"70", ptr addrspace(5) %"69", align 4
|
||||||
%"72" = load i64, ptr addrspace(5) %"68", align 4
|
%"72" = load i64, ptr addrspace(5) %"69", align 4
|
||||||
store i64 %"72", ptr addrspace(5) %"66", align 4
|
%"71" = add i64 %"72", 1
|
||||||
%"73" = load i64, ptr addrspace(5) %"66", align 4
|
store i64 %"71", ptr addrspace(5) %"69", align 4
|
||||||
store i64 %"73", ptr addrspace(5) %"65", align 4
|
%"73" = load i64, ptr addrspace(5) %"69", align 4
|
||||||
%2 = load i64, ptr addrspace(5) %"65", align 4
|
store i64 %"73", ptr addrspace(5) %"67", align 4
|
||||||
|
%"74" = load i64, ptr addrspace(5) %"67", align 4
|
||||||
|
store i64 %"74", ptr addrspace(5) %"66", align 4
|
||||||
|
%2 = load i64, ptr addrspace(5) %"66", align 4
|
||||||
ret i64 %2
|
ret i64 %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #0 {
|
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #1 {
|
||||||
%"52" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"53" = alloca i64, align 8, addrspace(5)
|
%"53" = alloca i64, align 8, addrspace(5)
|
||||||
%"54" = alloca i64, align 8, addrspace(5)
|
%"54" = alloca i64, align 8, addrspace(5)
|
||||||
%"59" = alloca i64, align 8, addrspace(5)
|
%"55" = alloca i64, align 8, addrspace(5)
|
||||||
%"60" = alloca i64, align 8, addrspace(5)
|
%"60" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"61" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"55" = load i64, ptr addrspace(4) %"50", align 4
|
br label %"79"
|
||||||
store i64 %"55", ptr addrspace(5) %"52", align 4
|
|
||||||
|
"79": ; preds = %1
|
||||||
%"56" = load i64, ptr addrspace(4) %"51", align 4
|
%"56" = load i64, ptr addrspace(4) %"51", align 4
|
||||||
store i64 %"56", ptr addrspace(5) %"53", align 4
|
store i64 %"56", ptr addrspace(5) %"53", align 4
|
||||||
%"58" = load i64, ptr addrspace(5) %"52", align 4
|
%"57" = load i64, ptr addrspace(4) %"52", align 4
|
||||||
%"74" = inttoptr i64 %"58" to ptr addrspace(1)
|
|
||||||
%"57" = load i64, ptr addrspace(1) %"74", align 4
|
|
||||||
store i64 %"57", ptr addrspace(5) %"54", align 4
|
store i64 %"57", ptr addrspace(5) %"54", align 4
|
||||||
%"61" = load i64, ptr addrspace(5) %"54", align 4
|
%"59" = load i64, ptr addrspace(5) %"53", align 4
|
||||||
store i64 %"61", ptr addrspace(5) %"59", align 4
|
%"75" = inttoptr i64 %"59" to ptr addrspace(1)
|
||||||
%"39" = load i64, ptr addrspace(5) %"59", align 4
|
%"58" = load i64, ptr addrspace(1) %"75", align 4
|
||||||
%"40" = call i64 @__zluda_ptx_impl_incr(i64 %"39")
|
store i64 %"58", ptr addrspace(5) %"55", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"60", align 4
|
%"62" = load i64, ptr addrspace(5) %"55", align 4
|
||||||
%"62" = load i64, ptr addrspace(5) %"60", align 4
|
store i64 %"62", ptr addrspace(5) %"60", align 4
|
||||||
store i64 %"62", ptr addrspace(5) %"54", align 4
|
%"40" = load i64, ptr addrspace(5) %"60", align 4
|
||||||
%"63" = load i64, ptr addrspace(5) %"53", align 4
|
%"41" = call i64 @incr(i64 %"40")
|
||||||
|
store i64 %"41", ptr addrspace(5) %"61", align 4
|
||||||
|
%"63" = load i64, ptr addrspace(5) %"61", align 4
|
||||||
|
store i64 %"63", ptr addrspace(5) %"55", align 4
|
||||||
%"64" = load i64, ptr addrspace(5) %"54", align 4
|
%"64" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
%"77" = inttoptr i64 %"63" to ptr addrspace(1)
|
%"65" = load i64, ptr addrspace(5) %"55", align 4
|
||||||
store i64 %"64", ptr addrspace(1) %"77", align 4
|
%"78" = inttoptr i64 %"64" to ptr addrspace(1)
|
||||||
|
store i64 %"65", ptr addrspace(1) %"78", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"51"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"51": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load i32, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store i32 %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
%"42" = load i32, ptr %"48", align 4
|
||||||
%"48" = call i32 @llvm.ctlz.i32(i32 %"44", i1 false)
|
store i32 %"42", ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"38", align 4
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"49" = call i32 @llvm.ctlz.i32(i32 %"45", i1 false)
|
||||||
%"46" = load i32, ptr addrspace(5) %"38", align 4
|
store i32 %"49", ptr addrspace(5) %"39", align 4
|
||||||
%"49" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"46", ptr %"49", align 4
|
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"50" = inttoptr i64 %"46" to ptr
|
||||||
|
store i32 %"47", ptr %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1
|
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -12,48 +12,52 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #0 {
|
define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #1 {
|
||||||
%"52" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"53" = alloca i64, align 8, addrspace(5)
|
%"53" = alloca i64, align 8, addrspace(5)
|
||||||
%"54" = alloca i16, align 2, addrspace(5)
|
%"54" = alloca i64, align 8, addrspace(5)
|
||||||
%"55" = alloca i16, align 2, addrspace(5)
|
%"55" = alloca i16, align 2, addrspace(5)
|
||||||
%"56" = alloca i16, align 2, addrspace(5)
|
%"56" = alloca i16, align 2, addrspace(5)
|
||||||
%"57" = alloca i16, align 2, addrspace(5)
|
%"57" = alloca i16, align 2, addrspace(5)
|
||||||
|
%"58" = alloca i16, align 2, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"58" = load i64, ptr addrspace(4) %"50", align 4
|
br label %"85"
|
||||||
store i64 %"58", ptr addrspace(5) %"52", align 4
|
|
||||||
|
"85": ; preds = %1
|
||||||
%"59" = load i64, ptr addrspace(4) %"51", align 4
|
%"59" = load i64, ptr addrspace(4) %"51", align 4
|
||||||
store i64 %"59", ptr addrspace(5) %"53", align 4
|
store i64 %"59", ptr addrspace(5) %"53", align 4
|
||||||
%"60" = load i16, ptr addrspace(4) @constparams, align 2
|
%"60" = load i64, ptr addrspace(4) %"52", align 4
|
||||||
store i16 %"60", ptr addrspace(5) %"54", align 2
|
store i64 %"60", ptr addrspace(5) %"54", align 4
|
||||||
%"61" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
|
%"61" = load i16, ptr addrspace(4) @constparams, align 2
|
||||||
store i16 %"61", ptr addrspace(5) %"55", align 2
|
store i16 %"61", ptr addrspace(5) %"55", align 2
|
||||||
%"62" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
|
%"62" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
|
||||||
store i16 %"62", ptr addrspace(5) %"56", align 2
|
store i16 %"62", ptr addrspace(5) %"56", align 2
|
||||||
%"63" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
|
%"63" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
|
||||||
store i16 %"63", ptr addrspace(5) %"57", align 2
|
store i16 %"63", ptr addrspace(5) %"57", align 2
|
||||||
%"64" = load i64, ptr addrspace(5) %"53", align 4
|
%"64" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
|
||||||
%"65" = load i16, ptr addrspace(5) %"54", align 2
|
store i16 %"64", ptr addrspace(5) %"58", align 2
|
||||||
%"76" = inttoptr i64 %"64" to ptr
|
%"65" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
store i16 %"65", ptr %"76", align 2
|
%"66" = load i16, ptr addrspace(5) %"55", align 2
|
||||||
%"66" = load i64, ptr addrspace(5) %"53", align 4
|
%"77" = inttoptr i64 %"65" to ptr
|
||||||
%"78" = inttoptr i64 %"66" to ptr
|
store i16 %"66", ptr %"77", align 2
|
||||||
%"39" = getelementptr inbounds i8, ptr %"78", i64 2
|
%"67" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
%"67" = load i16, ptr addrspace(5) %"55", align 2
|
%"79" = inttoptr i64 %"67" to ptr
|
||||||
store i16 %"67", ptr %"39", align 2
|
%"40" = getelementptr inbounds i8, ptr %"79", i64 2
|
||||||
%"68" = load i64, ptr addrspace(5) %"53", align 4
|
%"68" = load i16, ptr addrspace(5) %"56", align 2
|
||||||
%"80" = inttoptr i64 %"68" to ptr
|
store i16 %"68", ptr %"40", align 2
|
||||||
%"41" = getelementptr inbounds i8, ptr %"80", i64 4
|
%"69" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
%"69" = load i16, ptr addrspace(5) %"56", align 2
|
%"81" = inttoptr i64 %"69" to ptr
|
||||||
store i16 %"69", ptr %"41", align 2
|
%"42" = getelementptr inbounds i8, ptr %"81", i64 4
|
||||||
%"70" = load i64, ptr addrspace(5) %"53", align 4
|
%"70" = load i16, ptr addrspace(5) %"57", align 2
|
||||||
%"82" = inttoptr i64 %"70" to ptr
|
store i16 %"70", ptr %"42", align 2
|
||||||
%"43" = getelementptr inbounds i8, ptr %"82", i64 6
|
%"71" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
%"71" = load i16, ptr addrspace(5) %"57", align 2
|
%"83" = inttoptr i64 %"71" to ptr
|
||||||
store i16 %"71", ptr %"43", align 2
|
%"44" = getelementptr inbounds i8, ptr %"83", i64 6
|
||||||
|
%"72" = load i16, ptr addrspace(5) %"58", align 2
|
||||||
|
store i16 %"72", ptr %"44", align 2
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca float, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"51"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"51": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"48" = inttoptr i64 %"43" to ptr
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"42" = load float, ptr %"48", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"42", ptr addrspace(5) %"39", align 4
|
%"49" = inttoptr i64 %"44" to ptr
|
||||||
%"45" = load float, ptr addrspace(5) %"39", align 4
|
%"43" = load float, ptr %"49", align 4
|
||||||
%"44" = fmul float %"45", 5.000000e-01
|
store float %"43", ptr addrspace(5) %"40", align 4
|
||||||
store float %"44", ptr addrspace(5) %"39", align 4
|
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = fmul float %"46", 5.000000e-01
|
||||||
%"47" = load float, ptr addrspace(5) %"39", align 4
|
store float %"45", ptr addrspace(5) %"40", align 4
|
||||||
%"49" = inttoptr i64 %"46" to ptr
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store float %"47", ptr %"49", align 4
|
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = inttoptr i64 %"47" to ptr
|
||||||
|
store float %"48", ptr %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"51"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"51": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"48" = inttoptr i64 %"43" to ptr
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"42" = load i32, ptr %"48", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"42", ptr addrspace(5) %"39", align 4
|
%"49" = inttoptr i64 %"44" to ptr
|
||||||
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
%"43" = load i32, ptr %"49", align 4
|
||||||
%"44" = mul i32 %"45", -1
|
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"44", ptr addrspace(5) %"39", align 4
|
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = mul i32 %"46", -1
|
||||||
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||||
%"49" = inttoptr i64 %"46" to ptr
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"47", ptr %"49", align 4
|
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = inttoptr i64 %"47" to ptr
|
||||||
|
store i32 %"48", ptr %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call afn float @llvm.cos.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call afn float @llvm.cos.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.cos.f32(float) #1
|
declare float @llvm.cos.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca float, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca double, align 8, addrspace(5)
|
%"40" = alloca float, align 4, addrspace(5)
|
||||||
|
%"41" = alloca double, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"52"
|
||||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"52": ; preds = %1
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"49" = inttoptr i64 %"44" to ptr addrspace(1)
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"43" = load float, ptr addrspace(1) %"49", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"43", ptr addrspace(5) %"39", align 4
|
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||||
%"46" = load float, ptr addrspace(5) %"39", align 4
|
%"44" = load float, ptr addrspace(1) %"50", align 4
|
||||||
%"45" = fpext float %"46" to double
|
store float %"44", ptr addrspace(5) %"40", align 4
|
||||||
store double %"45", ptr addrspace(5) %"40", align 8
|
%"47" = load float, ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
%"46" = fpext float %"47" to double
|
||||||
%"48" = load double, ptr addrspace(5) %"40", align 8
|
store double %"46", ptr addrspace(5) %"41", align 8
|
||||||
%"50" = inttoptr i64 %"47" to ptr
|
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store double %"48", ptr %"50", align 8
|
%"49" = load double, ptr addrspace(5) %"41", align 8
|
||||||
|
%"51" = inttoptr i64 %"48" to ptr
|
||||||
|
store double %"49", ptr %"51", align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca float, align 4, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca float, align 4, addrspace(5)
|
%"44" = alloca float, align 4, addrspace(5)
|
||||||
|
%"45" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
br label %"64"
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
|
"64": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"48" to ptr
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"47" = load float, ptr %"59", align 4
|
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store float %"47", ptr addrspace(5) %"43", align 4
|
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
|
||||||
%"60" = inttoptr i64 %"49" to ptr
|
%"60" = inttoptr i64 %"49" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"60", i64 4
|
%"48" = load float, ptr %"60", align 4
|
||||||
%"50" = load float, ptr %"30", align 4
|
store float %"48", ptr addrspace(5) %"44", align 4
|
||||||
store float %"50", ptr addrspace(5) %"44", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"52" = load float, ptr addrspace(5) %"43", align 4
|
%"61" = inttoptr i64 %"50" to ptr
|
||||||
%2 = call float @llvm.roundeven.f32(float %"52")
|
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
|
||||||
%"51" = freeze float %2
|
%"51" = load float, ptr %"31", align 4
|
||||||
store float %"51", ptr addrspace(5) %"43", align 4
|
store float %"51", ptr addrspace(5) %"45", align 4
|
||||||
%"54" = load float, ptr addrspace(5) %"44", align 4
|
%"53" = load float, ptr addrspace(5) %"44", align 4
|
||||||
%3 = call float @llvm.roundeven.f32(float %"54")
|
%2 = call float @llvm.roundeven.f32(float %"53")
|
||||||
%"53" = freeze float %3
|
%"52" = freeze float %2
|
||||||
store float %"53", ptr addrspace(5) %"44", align 4
|
store float %"52", ptr addrspace(5) %"44", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
%"55" = load float, ptr addrspace(5) %"45", align 4
|
||||||
%"56" = load float, ptr addrspace(5) %"43", align 4
|
%3 = call float @llvm.roundeven.f32(float %"55")
|
||||||
%"61" = inttoptr i64 %"55" to ptr
|
%"54" = freeze float %3
|
||||||
store float %"56", ptr %"61", align 4
|
store float %"54", ptr addrspace(5) %"45", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"42", align 4
|
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"62" = inttoptr i64 %"57" to ptr
|
%"57" = load float, ptr addrspace(5) %"44", align 4
|
||||||
%"32" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"62" = inttoptr i64 %"56" to ptr
|
||||||
%"58" = load float, ptr addrspace(5) %"44", align 4
|
store float %"57", ptr %"62", align 4
|
||||||
store float %"58", ptr %"32", align 4
|
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"63" = inttoptr i64 %"58" to ptr
|
||||||
|
%"33" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||||
|
%"59" = load float, ptr addrspace(5) %"45", align 4
|
||||||
|
store float %"59", ptr %"33", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.roundeven.f32(float) #1
|
declare float @llvm.roundeven.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca float, align 4, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca float, align 4, addrspace(5)
|
%"44" = alloca float, align 4, addrspace(5)
|
||||||
|
%"45" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
br label %"64"
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
|
"64": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"48" to ptr
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"47" = load float, ptr %"59", align 4
|
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store float %"47", ptr addrspace(5) %"43", align 4
|
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
|
||||||
%"60" = inttoptr i64 %"49" to ptr
|
%"60" = inttoptr i64 %"49" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"60", i64 4
|
%"48" = load float, ptr %"60", align 4
|
||||||
%"50" = load float, ptr %"30", align 4
|
store float %"48", ptr addrspace(5) %"44", align 4
|
||||||
store float %"50", ptr addrspace(5) %"44", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"52" = load float, ptr addrspace(5) %"43", align 4
|
%"61" = inttoptr i64 %"50" to ptr
|
||||||
%2 = call float @llvm.trunc.f32(float %"52")
|
%"31" = getelementptr inbounds i8, ptr %"61", i64 4
|
||||||
%"51" = freeze float %2
|
%"51" = load float, ptr %"31", align 4
|
||||||
store float %"51", ptr addrspace(5) %"43", align 4
|
store float %"51", ptr addrspace(5) %"45", align 4
|
||||||
%"54" = load float, ptr addrspace(5) %"44", align 4
|
%"53" = load float, ptr addrspace(5) %"44", align 4
|
||||||
%3 = call float @llvm.trunc.f32(float %"54")
|
%2 = call float @llvm.trunc.f32(float %"53")
|
||||||
%"53" = freeze float %3
|
%"52" = freeze float %2
|
||||||
store float %"53", ptr addrspace(5) %"44", align 4
|
store float %"52", ptr addrspace(5) %"44", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
%"55" = load float, ptr addrspace(5) %"45", align 4
|
||||||
%"56" = load float, ptr addrspace(5) %"43", align 4
|
%3 = call float @llvm.trunc.f32(float %"55")
|
||||||
%"61" = inttoptr i64 %"55" to ptr
|
%"54" = freeze float %3
|
||||||
store float %"56", ptr %"61", align 4
|
store float %"54", ptr addrspace(5) %"45", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"42", align 4
|
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"62" = inttoptr i64 %"57" to ptr
|
%"57" = load float, ptr addrspace(5) %"44", align 4
|
||||||
%"32" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"62" = inttoptr i64 %"56" to ptr
|
||||||
%"58" = load float, ptr addrspace(5) %"44", align 4
|
store float %"57", ptr %"62", align 4
|
||||||
store float %"58", ptr %"32", align 4
|
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"63" = inttoptr i64 %"58" to ptr
|
||||||
|
%"33" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||||
|
%"59" = load float, ptr addrspace(5) %"45", align 4
|
||||||
|
store float %"59", ptr %"33", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.trunc.f32(float) #1
|
declare float @llvm.trunc.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,32 +10,36 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i32, align 4, addrspace(5)
|
%"40" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"41" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"54"
|
||||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"54": ; preds = %1
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"49" = inttoptr i64 %"44" to ptr addrspace(1)
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"43" = load i32, ptr addrspace(1) %"49", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
%"44" = load i32, ptr addrspace(1) %"50", align 4
|
||||||
%2 = trunc i32 %"46" to i8
|
store i32 %"44", ptr addrspace(5) %"41", align 4
|
||||||
%"50" = sext i8 %2 to i16
|
%"47" = load i32, ptr addrspace(5) %"41", align 4
|
||||||
%"45" = sext i16 %"50" to i32
|
%2 = trunc i32 %"47" to i8
|
||||||
store i32 %"45", ptr addrspace(5) %"39", align 4
|
%"51" = sext i8 %2 to i16
|
||||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
%"46" = sext i16 %"51" to i32
|
||||||
%"48" = load i32, ptr addrspace(5) %"39", align 4
|
store i32 %"46", ptr addrspace(5) %"40", align 4
|
||||||
%"52" = inttoptr i64 %"47" to ptr
|
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"48", ptr %"52", align 4
|
%"49" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
|
%"53" = inttoptr i64 %"48" to ptr
|
||||||
|
store i32 %"49", ptr %"53", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i32, align 4, addrspace(5)
|
%"44" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"45" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
br label %"72"
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
|
"72": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
%"60" = inttoptr i64 %"48" to ptr
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"59" = load float, ptr %"60", align 4
|
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"47" = bitcast float %"59" to i32
|
|
||||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
|
||||||
%"61" = inttoptr i64 %"49" to ptr
|
%"61" = inttoptr i64 %"49" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"61", i64 4
|
%"60" = load float, ptr %"61", align 4
|
||||||
%"62" = load float, ptr %"30", align 4
|
%"48" = bitcast float %"60" to i32
|
||||||
%"50" = bitcast float %"62" to i32
|
store i32 %"48", ptr addrspace(5) %"44", align 4
|
||||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
%"62" = inttoptr i64 %"50" to ptr
|
||||||
%"64" = bitcast i32 %"52" to float
|
%"31" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||||
%2 = call float @llvm.ceil.f32(float %"64")
|
%"63" = load float, ptr %"31", align 4
|
||||||
|
%"51" = bitcast float %"63" to i32
|
||||||
|
store i32 %"51", ptr addrspace(5) %"45", align 4
|
||||||
|
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
|
%"65" = bitcast i32 %"53" to float
|
||||||
|
%2 = call float @llvm.ceil.f32(float %"65")
|
||||||
%3 = fptosi float %2 to i32
|
%3 = fptosi float %2 to i32
|
||||||
%"63" = freeze i32 %3
|
%"64" = freeze i32 %3
|
||||||
store i32 %"63", ptr addrspace(5) %"43", align 4
|
store i32 %"64", ptr addrspace(5) %"44", align 4
|
||||||
%"54" = load i32, ptr addrspace(5) %"44", align 4
|
%"55" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
%"66" = bitcast i32 %"54" to float
|
%"67" = bitcast i32 %"55" to float
|
||||||
%4 = call float @llvm.ceil.f32(float %"66")
|
%4 = call float @llvm.ceil.f32(float %"67")
|
||||||
%5 = fptosi float %4 to i32
|
%5 = fptosi float %4 to i32
|
||||||
%"65" = freeze i32 %5
|
%"66" = freeze i32 %5
|
||||||
store i32 %"65", ptr addrspace(5) %"44", align 4
|
store i32 %"66", ptr addrspace(5) %"45", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
%"57" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
%"67" = inttoptr i64 %"55" to ptr addrspace(1)
|
%"68" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||||
store i32 %"56", ptr addrspace(1) %"67", align 4
|
store i32 %"57", ptr addrspace(1) %"68", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"42", align 4
|
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"69" = inttoptr i64 %"57" to ptr addrspace(1)
|
%"70" = inttoptr i64 %"58" to ptr addrspace(1)
|
||||||
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4
|
%"33" = getelementptr inbounds i8, ptr addrspace(1) %"70", i64 4
|
||||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
%"59" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
store i32 %"58", ptr addrspace(1) %"32", align 4
|
store i32 %"59", ptr addrspace(1) %"33", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.ceil.f32(float) #1
|
declare float @llvm.ceil.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"54"
|
||||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"54": ; preds = %1
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"50" = inttoptr i64 %"44" to ptr
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"49" = load i32, ptr %"50", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"39", align 4
|
%"51" = inttoptr i64 %"45" to ptr
|
||||||
%"46" = load i32, ptr addrspace(5) %"39", align 4
|
%"50" = load i32, ptr %"51", align 4
|
||||||
%"45" = sext i32 %"46" to i64
|
store i32 %"50", ptr addrspace(5) %"40", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
%"47" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
%"46" = sext i32 %"47" to i64
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"47" to ptr
|
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i64 %"48", ptr %"51", align 4
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"52" = inttoptr i64 %"48" to ptr
|
||||||
|
store i64 %"49", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,41 +10,45 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i32, align 4, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i32, align 4, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"56"
|
||||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"56": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"53" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||||
%"45" = load i32, ptr %"53", align 4
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"40", align 4
|
%"54" = inttoptr i64 %"47" to ptr
|
||||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
%"46" = load i32, ptr %"54", align 4
|
||||||
%2 = call i32 @llvm.smax.i32(i32 %"48", i32 0)
|
store i32 %"46", ptr addrspace(5) %"41", align 4
|
||||||
|
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
||||||
|
%2 = call i32 @llvm.smax.i32(i32 %"49", i32 0)
|
||||||
%3 = call i32 @llvm.umin.i32(i32 %2, i32 -1)
|
%3 = call i32 @llvm.umin.i32(i32 %2, i32 -1)
|
||||||
store i32 %3, ptr addrspace(5) %"41", align 4
|
store i32 %3, ptr addrspace(5) %"42", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
store i32 %"51", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"39", align 4
|
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"52" = load i32, ptr addrspace(5) %"42", align 4
|
%"53" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
%"54" = inttoptr i64 %"51" to ptr
|
%"55" = inttoptr i64 %"52" to ptr
|
||||||
store i32 %"52", ptr %"54", align 4
|
store i32 %"53", ptr %"55", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.smax.i32(i32, i32) #1
|
declare i32 @llvm.smax.i32(i32, i32) #2
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.umin.i32(i32, i32) #1
|
declare i32 @llvm.umin.i32(i32, i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"56"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"56": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%2 = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"49" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store ptr addrspace(1) %"49", ptr addrspace(5) %"36", align 8
|
%2 = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"50" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"44" to ptr
|
store ptr addrspace(1) %"50", ptr addrspace(5) %"37", align 8
|
||||||
%"51" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store ptr addrspace(1) %"51", ptr addrspace(5) %"37", align 8
|
%3 = inttoptr i64 %"45" to ptr
|
||||||
%"46" = load i64, ptr addrspace(5) %"36", align 4
|
%"52" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
%"53" = inttoptr i64 %"46" to ptr addrspace(1)
|
store ptr addrspace(1) %"52", ptr addrspace(5) %"38", align 8
|
||||||
%"45" = load float, ptr addrspace(1) %"53", align 4
|
|
||||||
store float %"45", ptr addrspace(5) %"38", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"37", align 4
|
%"47" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"48" = load float, ptr addrspace(5) %"38", align 4
|
|
||||||
%"54" = inttoptr i64 %"47" to ptr addrspace(1)
|
%"54" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||||
store float %"48", ptr addrspace(1) %"54", align 4
|
%"46" = load float, ptr addrspace(1) %"54", align 4
|
||||||
|
store float %"46", ptr addrspace(5) %"39", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
|
%"49" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"55" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||||
|
store float %"49", ptr addrspace(1) %"55", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca float, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca float, align 4, addrspace(5)
|
%"42" = alloca float, align 4, addrspace(5)
|
||||||
|
%"43" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load float, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store float %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load float, ptr %"55", align 4
|
||||||
%"48" = load float, ptr %"30", align 4
|
store float %"46", ptr addrspace(5) %"42", align 4
|
||||||
store float %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load float, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load float, ptr %"31", align 4
|
||||||
|
store float %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load float, ptr addrspace(5) %"42", align 4
|
%"51" = load float, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = fdiv arcp afn float %"50", %"51"
|
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||||
store float %"49", ptr addrspace(5) %"41", align 4
|
%"50" = fdiv arcp afn float %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store float %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load float, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load float, ptr addrspace(5) %"42", align 4
|
||||||
store float %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store float %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call float @llvm.amdgcn.exp2.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call float @llvm.amdgcn.exp2.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.amdgcn.exp2.f32(float) #1
|
declare float @llvm.amdgcn.exp2.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -12,30 +12,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"53"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"48" = inttoptr i64 %"43" to ptr addrspace(1)
|
|
||||||
%"42" = load i64, ptr addrspace(1) %"48", align 4
|
|
||||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"39", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(3) @shared_mem, align 4
|
%"49" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||||
%"45" = load i64, ptr addrspace(3) @shared_mem, align 4
|
%"43" = load i64, ptr addrspace(1) %"49", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
store i64 %"45", ptr addrspace(3) @shared_mem, align 4
|
||||||
|
%"46" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||||
|
store i64 %"46", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"51" = inttoptr i64 %"46" to ptr addrspace(1)
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i64 %"47", ptr addrspace(1) %"51", align 4
|
%"52" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||||
|
store i64 %"48", ptr addrspace(1) %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,46 +12,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define void @__zluda_ptx_impl_incr_shared_2_global() #0 {
|
define void @incr_shared_2_global() #0 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(3) @shared_mem, align 4
|
br label %"63"
|
||||||
store i64 %"39", ptr addrspace(5) %"38", align 4
|
|
||||||
%"41" = load i64, ptr addrspace(5) %"38", align 4
|
"63": ; preds = %1
|
||||||
%"40" = add i64 %"41", 2
|
%"40" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"38", align 4
|
store i64 %"40", ptr addrspace(5) %"39", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"38", align 4
|
%"42" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i64 %"42", ptr addrspace(3) @shared_mem, align 4
|
%"41" = add i64 %"42", 2
|
||||||
|
store i64 %"41", ptr addrspace(5) %"39", align 4
|
||||||
|
%"43" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
store i64 %"43", ptr addrspace(3) @shared_mem, align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 {
|
define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"48" = load i64, ptr addrspace(4) %"43", align 4
|
br label %"64"
|
||||||
store i64 %"48", ptr addrspace(5) %"45", align 4
|
|
||||||
|
"64": ; preds = %1
|
||||||
%"49" = load i64, ptr addrspace(4) %"44", align 4
|
%"49" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
store i64 %"49", ptr addrspace(5) %"46", align 4
|
store i64 %"49", ptr addrspace(5) %"46", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"45", align 4
|
%"50" = load i64, ptr addrspace(4) %"45", align 4
|
||||||
%"58" = inttoptr i64 %"51" to ptr addrspace(1)
|
|
||||||
%"50" = load i64, ptr addrspace(1) %"58", align 4
|
|
||||||
store i64 %"50", ptr addrspace(5) %"47", align 4
|
store i64 %"50", ptr addrspace(5) %"47", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"47", align 4
|
%"52" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store i64 %"52", ptr addrspace(3) @shared_mem, align 4
|
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
|
||||||
call void @__zluda_ptx_impl_incr_shared_2_global()
|
%"51" = load i64, ptr addrspace(1) %"59", align 4
|
||||||
%"53" = load i64, ptr addrspace(3) @shared_mem, align 4
|
store i64 %"51", ptr addrspace(5) %"48", align 4
|
||||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
%"53" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
store i64 %"53", ptr addrspace(3) @shared_mem, align 4
|
||||||
|
call void @incr_shared_2_global()
|
||||||
|
%"54" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||||
|
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"47", align 4
|
%"55" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
%"61" = inttoptr i64 %"54" to ptr addrspace(1)
|
%"56" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
store i64 %"55", ptr addrspace(1) %"61", align 4
|
%"62" = inttoptr i64 %"55" to ptr addrspace(1)
|
||||||
|
store i64 %"56", ptr addrspace(1) %"62", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,47 +10,51 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca float, align 4, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca float, align 4, addrspace(5)
|
%"45" = alloca float, align 4, addrspace(5)
|
||||||
%"46" = alloca float, align 4, addrspace(5)
|
%"46" = alloca float, align 4, addrspace(5)
|
||||||
|
%"47" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
br label %"66"
|
||||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
|
||||||
|
"66": ; preds = %1
|
||||||
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
%"61" = inttoptr i64 %"50" to ptr
|
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||||
%"49" = load float, ptr %"61", align 4
|
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store float %"49", ptr addrspace(5) %"44", align 4
|
|
||||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
|
||||||
%"62" = inttoptr i64 %"51" to ptr
|
%"62" = inttoptr i64 %"51" to ptr
|
||||||
%"31" = getelementptr inbounds i8, ptr %"62", i64 4
|
%"50" = load float, ptr %"62", align 4
|
||||||
%"52" = load float, ptr %"31", align 4
|
store float %"50", ptr addrspace(5) %"45", align 4
|
||||||
store float %"52", ptr addrspace(5) %"45", align 4
|
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
%"63" = inttoptr i64 %"52" to ptr
|
||||||
%"63" = inttoptr i64 %"53" to ptr
|
%"32" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||||
%"33" = getelementptr inbounds i8, ptr %"63", i64 8
|
%"53" = load float, ptr %"32", align 4
|
||||||
%"54" = load float, ptr %"33", align 4
|
store float %"53", ptr addrspace(5) %"46", align 4
|
||||||
store float %"54", ptr addrspace(5) %"46", align 4
|
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"56" = load float, ptr addrspace(5) %"44", align 4
|
%"64" = inttoptr i64 %"54" to ptr
|
||||||
|
%"34" = getelementptr inbounds i8, ptr %"64", i64 8
|
||||||
|
%"55" = load float, ptr %"34", align 4
|
||||||
|
store float %"55", ptr addrspace(5) %"47", align 4
|
||||||
%"57" = load float, ptr addrspace(5) %"45", align 4
|
%"57" = load float, ptr addrspace(5) %"45", align 4
|
||||||
%"58" = load float, ptr addrspace(5) %"46", align 4
|
%"58" = load float, ptr addrspace(5) %"46", align 4
|
||||||
%"55" = call float @llvm.fma.f32(float %"56", float %"57", float %"58")
|
%"59" = load float, ptr addrspace(5) %"47", align 4
|
||||||
store float %"55", ptr addrspace(5) %"44", align 4
|
%"56" = call float @llvm.fma.f32(float %"57", float %"58", float %"59")
|
||||||
%"59" = load i64, ptr addrspace(5) %"43", align 4
|
store float %"56", ptr addrspace(5) %"45", align 4
|
||||||
%"60" = load float, ptr addrspace(5) %"44", align 4
|
%"60" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"64" = inttoptr i64 %"59" to ptr
|
%"61" = load float, ptr addrspace(5) %"45", align 4
|
||||||
store float %"60", ptr %"64", align 4
|
%"65" = inttoptr i64 %"60" to ptr
|
||||||
|
store float %"61", ptr %"65", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.fma.f32(float, float, float) #1
|
declare float @llvm.fma.f32(float, float, float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -12,25 +12,29 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"37", align 4
|
br label %"50"
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
"50": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"38", align 4
|
||||||
%"47" = inttoptr i64 %"43" to ptr addrspace(1)
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"42" = load i32, ptr addrspace(1) %"47", align 4
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"42", ptr addrspace(5) %"39", align 4
|
|
||||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
|
||||||
%"48" = inttoptr i64 %"44" to ptr addrspace(1)
|
%"48" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||||
store i32 %"45", ptr addrspace(1) %"48", align 4
|
%"43" = load i32, ptr addrspace(1) %"48", align 4
|
||||||
|
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||||
|
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
|
%"49" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||||
|
store i32 %"46", ptr addrspace(1) %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,26 +10,30 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"48"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"48": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"45" = inttoptr i64 %"42" to ptr
|
|
||||||
%"41" = load i64, ptr %"45", align 4
|
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
|
||||||
%"46" = inttoptr i64 %"43" to ptr
|
%"46" = inttoptr i64 %"43" to ptr
|
||||||
store i64 %"44", ptr %"46", align 4
|
%"42" = load i64, ptr %"46", align 4
|
||||||
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
|
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"47" = inttoptr i64 %"44" to ptr
|
||||||
|
store i64 %"45", ptr %"47", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,31 +10,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"52"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"52": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
store i64 81985529216486895, ptr addrspace(5) %"39", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"44" to ptr addrspace(1)
|
store i64 81985529216486895, ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load float, ptr addrspace(1) %"48", align 4
|
|
||||||
%2 = bitcast float %"47" to i32
|
|
||||||
%"43" = zext i32 %2 to i64
|
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"49" = inttoptr i64 %"45" to ptr addrspace(1)
|
%"49" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||||
%3 = trunc i64 %"46" to i32
|
%"48" = load float, ptr addrspace(1) %"49", align 4
|
||||||
%"50" = bitcast i32 %3 to float
|
%2 = bitcast float %"48" to i32
|
||||||
store float %"50", ptr addrspace(1) %"49", align 4
|
%"44" = zext i32 %2 to i64
|
||||||
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||||
|
%3 = trunc i64 %"47" to i32
|
||||||
|
%"51" = bitcast i32 %3 to float
|
||||||
|
store float %"51", ptr addrspace(1) %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,37 +10,41 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i32, align 4, addrspace(5)
|
%"44" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"45" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
br label %"60"
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
|
"60": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
%"55" = inttoptr i64 %"48" to ptr
|
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||||
%"47" = load i32, ptr %"55", align 4
|
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
|
||||||
%"56" = inttoptr i64 %"49" to ptr
|
%"56" = inttoptr i64 %"49" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"56", i64 4
|
%"48" = load i32, ptr %"56", align 4
|
||||||
%"50" = load i32, ptr %"30", align 4
|
store i32 %"48", ptr addrspace(5) %"44", align 4
|
||||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
%"57" = inttoptr i64 %"50" to ptr
|
||||||
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||||
%"57" = inttoptr i64 %"51" to ptr
|
%"51" = load i32, ptr %"31", align 4
|
||||||
store i32 %"52", ptr %"57", align 4
|
store i32 %"51", ptr addrspace(5) %"45", align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
%"58" = inttoptr i64 %"53" to ptr
|
%"53" = load i32, ptr addrspace(5) %"45", align 4
|
||||||
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
|
%"58" = inttoptr i64 %"52" to ptr
|
||||||
%"54" = load i32, ptr addrspace(5) %"43", align 4
|
store i32 %"53", ptr %"58", align 4
|
||||||
store i32 %"54", ptr %"32", align 4
|
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"59" = inttoptr i64 %"54" to ptr
|
||||||
|
%"33" = getelementptr inbounds i8, ptr %"59", i64 4
|
||||||
|
%"55" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
|
store i32 %"55", ptr %"33", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call float @llvm.amdgcn.log.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call float @llvm.amdgcn.log.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.amdgcn.log.f32(float) #1
|
declare float @llvm.amdgcn.log.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"9" = alloca [8 x i8], align 8, addrspace(5)
|
%"10" = alloca [8 x i8], align 8, addrspace(5)
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"49"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"49": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"46" = inttoptr i64 %"43" to ptr
|
|
||||||
%"42" = load i64, ptr %"46", align 4
|
|
||||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"47" = inttoptr i64 %"44" to ptr
|
%"47" = inttoptr i64 %"44" to ptr
|
||||||
store i64 %"45", ptr %"47", align 4
|
%"43" = load i64, ptr %"47", align 4
|
||||||
|
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||||
|
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"48" = inttoptr i64 %"45" to ptr
|
||||||
|
store i64 %"46", ptr %"48", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 {
|
define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
%"49" = alloca i32, align 4, addrspace(5)
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i32, align 4, addrspace(5)
|
%"50" = alloca i32, align 4, addrspace(5)
|
||||||
%"51" = alloca i32, align 4, addrspace(5)
|
%"51" = alloca i32, align 4, addrspace(5)
|
||||||
%"52" = alloca i32, align 4, addrspace(5)
|
%"52" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"53" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
br label %"78"
|
||||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
|
"78": ; preds = %1
|
||||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||||
%"71" = inttoptr i64 %"56" to ptr
|
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||||
%"55" = load i32, ptr %"71", align 4
|
%"57" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
store i32 %"55", ptr addrspace(5) %"50", align 4
|
|
||||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
|
||||||
%"72" = inttoptr i64 %"57" to ptr
|
%"72" = inttoptr i64 %"57" to ptr
|
||||||
%"32" = getelementptr inbounds i8, ptr %"72", i64 4
|
%"56" = load i32, ptr %"72", align 4
|
||||||
%"58" = load i32, ptr %"32", align 4
|
store i32 %"56", ptr addrspace(5) %"51", align 4
|
||||||
store i32 %"58", ptr addrspace(5) %"51", align 4
|
%"58" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"59" = load i64, ptr addrspace(5) %"47", align 4
|
%"73" = inttoptr i64 %"58" to ptr
|
||||||
%"73" = inttoptr i64 %"59" to ptr
|
%"33" = getelementptr inbounds i8, ptr %"73", i64 4
|
||||||
%"34" = getelementptr inbounds i8, ptr %"73", i64 8
|
%"59" = load i32, ptr %"33", align 4
|
||||||
%"60" = load i32, ptr %"34", align 4
|
store i32 %"59", ptr addrspace(5) %"52", align 4
|
||||||
store i32 %"60", ptr addrspace(5) %"52", align 4
|
%"60" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"62" = load i32, ptr addrspace(5) %"50", align 4
|
%"74" = inttoptr i64 %"60" to ptr
|
||||||
|
%"35" = getelementptr inbounds i8, ptr %"74", i64 8
|
||||||
|
%"61" = load i32, ptr %"35", align 4
|
||||||
|
store i32 %"61", ptr addrspace(5) %"53", align 4
|
||||||
%"63" = load i32, ptr addrspace(5) %"51", align 4
|
%"63" = load i32, ptr addrspace(5) %"51", align 4
|
||||||
%"64" = load i32, ptr addrspace(5) %"52", align 4
|
%"64" = load i32, ptr addrspace(5) %"52", align 4
|
||||||
%2 = mul i32 %"62", %"63"
|
%"65" = load i32, ptr addrspace(5) %"53", align 4
|
||||||
%"61" = add i32 %2, %"64"
|
%2 = mul i32 %"63", %"64"
|
||||||
store i32 %"61", ptr addrspace(5) %"49", align 4
|
%"62" = add i32 %2, %"65"
|
||||||
%"65" = load i64, ptr addrspace(5) %"48", align 4
|
store i32 %"62", ptr addrspace(5) %"50", align 4
|
||||||
%"66" = load i32, ptr addrspace(5) %"49", align 4
|
%"66" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"74" = inttoptr i64 %"65" to ptr
|
%"67" = load i32, ptr addrspace(5) %"50", align 4
|
||||||
store i32 %"66", ptr %"74", align 4
|
%"75" = inttoptr i64 %"66" to ptr
|
||||||
%"67" = load i64, ptr addrspace(5) %"48", align 4
|
store i32 %"67", ptr %"75", align 4
|
||||||
%"75" = inttoptr i64 %"67" to ptr
|
%"68" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"36" = getelementptr inbounds i8, ptr %"75", i64 4
|
%"76" = inttoptr i64 %"68" to ptr
|
||||||
%"68" = load i32, ptr addrspace(5) %"49", align 4
|
%"37" = getelementptr inbounds i8, ptr %"76", i64 4
|
||||||
store i32 %"68", ptr %"36", align 4
|
%"69" = load i32, ptr addrspace(5) %"50", align 4
|
||||||
%"69" = load i64, ptr addrspace(5) %"48", align 4
|
store i32 %"69", ptr %"37", align 4
|
||||||
%"76" = inttoptr i64 %"69" to ptr
|
%"70" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"38" = getelementptr inbounds i8, ptr %"76", i64 8
|
%"77" = inttoptr i64 %"70" to ptr
|
||||||
%"70" = load i32, ptr addrspace(5) %"49", align 4
|
%"39" = getelementptr inbounds i8, ptr %"77", i64 8
|
||||||
store i32 %"70", ptr %"38", align 4
|
%"71" = load i32, ptr addrspace(5) %"50", align 4
|
||||||
|
store i32 %"71", ptr %"39", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
53
ptx/src/test/ll/malformed_label.ll
Normal file
53
ptx/src/test/ll/malformed_label.ll
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
|
define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
|
br label %1
|
||||||
|
|
||||||
|
1: ; preds = %0
|
||||||
|
br label %"57"
|
||||||
|
|
||||||
|
"57": ; preds = %1
|
||||||
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
|
br label %"10"
|
||||||
|
|
||||||
|
"58": ; No predecessors!
|
||||||
|
%"47" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"54" = inttoptr i64 %"47" to ptr
|
||||||
|
%"46" = load i64, ptr %"54", align 4
|
||||||
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
|
br label %"10"
|
||||||
|
|
||||||
|
"10": ; preds = %"58", %"57"
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"55" = inttoptr i64 %"49" to ptr
|
||||||
|
%"48" = load i64, ptr %"55", align 4
|
||||||
|
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||||
|
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"50" = add i64 %"51", 1
|
||||||
|
store i64 %"50", ptr addrspace(5) %"43", align 4
|
||||||
|
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"56" = inttoptr i64 %"52" to ptr
|
||||||
|
store i64 %"53", ptr %"56", align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,40 +10,44 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = call i32 @llvm.smax.i32(i32 %"50", i32 %"51")
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"41", align 4
|
%"50" = call i32 @llvm.smax.i32(i32 %"51", i32 %"52")
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.smax.i32(i32, i32) #1
|
declare i32 @llvm.smax.i32(i32, i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"49"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"49": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"46" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"45" = load i32, ptr %"46", align 4
|
|
||||||
store i32 %"45", ptr addrspace(5) %"38", align 4
|
|
||||||
fence seq_cst
|
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
|
||||||
%"47" = inttoptr i64 %"43" to ptr
|
%"47" = inttoptr i64 %"43" to ptr
|
||||||
store i32 %"44", ptr %"47", align 4
|
%"46" = load i32, ptr %"47", align 4
|
||||||
|
store i32 %"46", ptr addrspace(5) %"39", align 4
|
||||||
|
fence seq_cst
|
||||||
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"48" = inttoptr i64 %"44" to ptr
|
||||||
|
store i32 %"45", ptr %"48", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,40 +10,44 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = call i32 @llvm.smin.i32(i32 %"50", i32 %"51")
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"41", align 4
|
%"50" = call i32 @llvm.smin.i32(i32 %"51", i32 %"52")
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.smin.i32(i32, i32) #1
|
declare i32 @llvm.smin.i32(i32, i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"52"
|
||||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"52": ; preds = %1
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"49" = inttoptr i64 %"44" to ptr
|
|
||||||
%"43" = load i64, ptr %"49", align 4
|
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"40", align 4
|
%"50" = inttoptr i64 %"45" to ptr
|
||||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr %"50", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"50" = inttoptr i64 %"47" to ptr
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i64 %"48", ptr %"50", align 4
|
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"51" = inttoptr i64 %"48" to ptr
|
||||||
|
store i64 %"49", ptr %"51", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,15 +10,19 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
|
||||||
%"9" = alloca [8 x i8], align 1, addrspace(5)
|
%"10" = alloca [8 x i8], align 1, addrspace(5)
|
||||||
%"35" = alloca i64, align 8, addrspace(5)
|
%"36" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"37" = ptrtoint ptr addrspace(5) %"9" to i64
|
br label %"39"
|
||||||
store i64 %"37", ptr addrspace(5) %"35", align 4
|
|
||||||
|
"39": ; preds = %1
|
||||||
|
%"38" = ptrtoint ptr addrspace(5) %"10" to i64
|
||||||
|
store i64 %"38", ptr addrspace(5) %"36", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i32, align 4, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"44" = load i32, ptr %"50", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"44", ptr addrspace(5) %"40", align 4
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
%"47" = load i32, ptr addrspace(5) %"40", align 4
|
%"45" = load i32, ptr %"51", align 4
|
||||||
%"46" = call i32 @llvm.amdgcn.mul.u24(i32 %"47", i32 2)
|
store i32 %"45", ptr addrspace(5) %"41", align 4
|
||||||
store i32 %"46", ptr addrspace(5) %"41", align 4
|
%"48" = load i32, ptr addrspace(5) %"41", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = call i32 @llvm.amdgcn.mul.u24(i32 %"48", i32 2)
|
||||||
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
store i32 %"47", ptr addrspace(5) %"42", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"49", ptr %"51", align 4
|
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
|
store i32 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1
|
declare i32 @llvm.amdgcn.mul.u24(i32, i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca float, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca float, align 4, addrspace(5)
|
%"42" = alloca float, align 4, addrspace(5)
|
||||||
|
%"43" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load float, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store float %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load float, ptr %"55", align 4
|
||||||
%"48" = load float, ptr %"30", align 4
|
store float %"46", ptr addrspace(5) %"42", align 4
|
||||||
store float %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load float, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load float, ptr %"31", align 4
|
||||||
|
store float %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load float, ptr addrspace(5) %"42", align 4
|
%"51" = load float, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = fmul float %"50", %"51"
|
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||||
store float %"49", ptr addrspace(5) %"41", align 4
|
%"50" = fmul float %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store float %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load float, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load float, ptr addrspace(5) %"42", align 4
|
||||||
store float %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store float %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%2 = zext i64 %"47" to i128
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
|
%"45" = load i64, ptr %"51", align 4
|
||||||
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%2 = zext i64 %"48" to i128
|
||||||
%3 = mul i128 %2, 2
|
%3 = mul i128 %2, 2
|
||||||
%4 = lshr i128 %3, 64
|
%4 = lshr i128 %3, 64
|
||||||
%"46" = trunc i128 %4 to i64
|
%"47" = trunc i128 %4 to i64
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
store i64 %"49", ptr %"51", align 4
|
store i64 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"46" = mul i64 %"47", 2
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
%"45" = load i64, ptr %"51", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"47" = mul i64 %"48", 2
|
||||||
store i64 %"49", ptr %"51", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
|
store i64 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca float, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca float, align 4, addrspace(5)
|
%"42" = alloca float, align 4, addrspace(5)
|
||||||
|
%"43" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load float, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store float %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load float, ptr %"55", align 4
|
||||||
%"48" = load float, ptr %"30", align 4
|
store float %"46", ptr addrspace(5) %"42", align 4
|
||||||
store float %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load float, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load float, ptr %"31", align 4
|
||||||
|
store float %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load float, ptr addrspace(5) %"42", align 4
|
%"51" = load float, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = fmul float %"50", %"51"
|
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||||
store float %"49", ptr addrspace(5) %"41", align 4
|
%"50" = fmul float %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store float %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load float, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load float, ptr addrspace(5) %"42", align 4
|
||||||
store float %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store float %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,39 +10,43 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"61"
|
||||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"61": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%"56" = inttoptr i64 %"48" to ptr addrspace(1)
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
%"47" = load i32, ptr addrspace(1) %"56", align 4
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store i32 %"47", ptr addrspace(5) %"42", align 4
|
|
||||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
|
||||||
%"57" = inttoptr i64 %"49" to ptr addrspace(1)
|
%"57" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||||
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"57", i64 4
|
%"48" = load i32, ptr addrspace(1) %"57", align 4
|
||||||
%"50" = load i32, ptr addrspace(1) %"31", align 4
|
store i32 %"48", ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"50", ptr addrspace(5) %"43", align 4
|
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"52" = load i32, ptr addrspace(5) %"42", align 4
|
%"58" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||||
|
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
|
||||||
|
%"51" = load i32, ptr addrspace(1) %"32", align 4
|
||||||
|
store i32 %"51", ptr addrspace(5) %"44", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"43", align 4
|
%"53" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
%2 = sext i32 %"52" to i64
|
%"54" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
%3 = sext i32 %"53" to i64
|
%2 = sext i32 %"53" to i64
|
||||||
%"51" = mul i64 %2, %3
|
%3 = sext i32 %"54" to i64
|
||||||
store i64 %"51", ptr addrspace(5) %"44", align 4
|
%"52" = mul i64 %2, %3
|
||||||
%"54" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"52", ptr addrspace(5) %"45", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"58" = inttoptr i64 %"54" to ptr
|
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store i64 %"55", ptr %"58", align 4
|
%"59" = inttoptr i64 %"55" to ptr
|
||||||
|
store i64 %"56", ptr %"59", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load i32, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store i32 %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
%"42" = load i32, ptr %"48", align 4
|
||||||
%"43" = sub i32 0, %"44"
|
store i32 %"42", ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = sub i32 0, %"45"
|
||||||
%"46" = load i32, ptr addrspace(5) %"38", align 4
|
store i32 %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"46", ptr %"48", align 4
|
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store i32 %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,35 +10,39 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"44" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"57"
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"57": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr addrspace(1)
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8
|
%"47" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"29" = load <2 x i32>, ptr addrspace(1) %"31", align 8
|
%"55" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||||
%"47" = extractelement <2 x i32> %"29", i8 0
|
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8
|
||||||
%"48" = extractelement <2 x i32> %"29", i8 1
|
%"30" = load <2 x i32>, ptr addrspace(1) %"32", align 8
|
||||||
store i32 %"47", ptr addrspace(5) %"42", align 4
|
%"48" = extractelement <2 x i32> %"30", i8 0
|
||||||
|
%"49" = extractelement <2 x i32> %"30", i8 1
|
||||||
store i32 %"48", ptr addrspace(5) %"43", align 4
|
store i32 %"48", ptr addrspace(5) %"43", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
store i32 %"49", ptr addrspace(5) %"44", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
%"49" = add i32 %"50", %"51"
|
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"42", align 4
|
%"50" = add i32 %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
store i32 %"50", ptr addrspace(5) %"43", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"42", align 4
|
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"55" = inttoptr i64 %"52" to ptr addrspace(1)
|
%"54" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"53", ptr addrspace(1) %"55", align 4
|
%"56" = inttoptr i64 %"53" to ptr addrspace(1)
|
||||||
|
store i32 %"54", ptr addrspace(1) %"56", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"54"
|
||||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"54": ; preds = %1
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"49" = inttoptr i64 %"44" to ptr
|
|
||||||
%"43" = load i64, ptr %"49", align 4
|
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
%"50" = xor i64 %"46", -1
|
%"50" = inttoptr i64 %"45" to ptr
|
||||||
store i64 %"50", ptr addrspace(5) %"40", align 4
|
%"44" = load i64, ptr %"50", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"52" = inttoptr i64 %"47" to ptr
|
%"51" = xor i64 %"47", -1
|
||||||
store i64 %"48", ptr %"52", align 4
|
store i64 %"51", ptr addrspace(5) %"41", align 4
|
||||||
|
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
|
%"53" = inttoptr i64 %"48" to ptr
|
||||||
|
store i64 %"49", ptr %"53", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"56"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"56": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"53" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"53", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
%"54" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = call i32 @__zluda_ptx_impl_sreg_ntid(i8 0)
|
%"46" = load i32, ptr %"54", align 4
|
||||||
store i32 %"30", ptr addrspace(5) %"42", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
%"31" = call i32 @__zluda_ptx_impl_sreg_ntid(i8 0)
|
||||||
|
store i32 %"31", ptr addrspace(5) %"43", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"48" = add i32 %"49", %"50"
|
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"41", align 4
|
%"49" = add i32 %"50", %"51"
|
||||||
%"51" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"49", ptr addrspace(5) %"42", align 4
|
||||||
%"52" = load i32, ptr addrspace(5) %"41", align 4
|
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"54" = inttoptr i64 %"51" to ptr
|
%"53" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"52", ptr %"54", align 4
|
%"55" = inttoptr i64 %"52" to ptr
|
||||||
|
store i32 %"53", ptr %"55", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"61"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"61": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
|
||||||
%"45" = load i64, ptr %"54", align 4
|
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 8
|
%"46" = load i64, ptr %"55", align 4
|
||||||
%"48" = load i64, ptr %"30", align 4
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 8
|
||||||
|
%"49" = load i64, ptr %"31", align 4
|
||||||
|
store i64 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"56" = or i64 %"50", %"51"
|
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"56", ptr addrspace(5) %"41", align 4
|
%"57" = or i64 %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i64 %"57", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"52" to ptr
|
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store i64 %"53", ptr %"59", align 4
|
%"60" = inttoptr i64 %"53" to ptr
|
||||||
|
store i64 %"54", ptr %"60", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"51"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"51": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load i32, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store i32 %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
%"42" = load i32, ptr %"48", align 4
|
||||||
%"48" = call i32 @llvm.ctpop.i32(i32 %"44")
|
store i32 %"42", ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"38", align 4
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"49" = call i32 @llvm.ctpop.i32(i32 %"45")
|
||||||
%"46" = load i32, ptr addrspace(5) %"38", align 4
|
store i32 %"49", ptr addrspace(5) %"39", align 4
|
||||||
%"49" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"46", ptr %"49", align 4
|
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"50" = inttoptr i64 %"46" to ptr
|
||||||
|
store i32 %"47", ptr %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare i32 @llvm.ctpop.i32(i32) #1
|
declare i32 @llvm.ctpop.i32(i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,57 +10,61 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 {
|
define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
%"49" = alloca i64, align 8, addrspace(5)
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i64, align 8, addrspace(5)
|
%"50" = alloca i64, align 8, addrspace(5)
|
||||||
%"51" = alloca i64, align 8, addrspace(5)
|
%"51" = alloca i64, align 8, addrspace(5)
|
||||||
%"52" = alloca i1, align 1, addrspace(5)
|
%"52" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"53" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
br label %"74"
|
||||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
|
"74": ; preds = %1
|
||||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||||
%"70" = inttoptr i64 %"56" to ptr
|
|
||||||
%"55" = load i64, ptr %"70", align 4
|
|
||||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
%"57" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"71" = inttoptr i64 %"57" to ptr
|
%"71" = inttoptr i64 %"57" to ptr
|
||||||
%"36" = getelementptr inbounds i8, ptr %"71", i64 8
|
%"56" = load i64, ptr %"71", align 4
|
||||||
%"58" = load i64, ptr %"36", align 4
|
store i64 %"56", ptr addrspace(5) %"50", align 4
|
||||||
store i64 %"58", ptr addrspace(5) %"50", align 4
|
%"58" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"60" = load i64, ptr addrspace(5) %"49", align 4
|
%"72" = inttoptr i64 %"58" to ptr
|
||||||
|
%"37" = getelementptr inbounds i8, ptr %"72", i64 8
|
||||||
|
%"59" = load i64, ptr %"37", align 4
|
||||||
|
store i64 %"59", ptr addrspace(5) %"51", align 4
|
||||||
%"61" = load i64, ptr addrspace(5) %"50", align 4
|
%"61" = load i64, ptr addrspace(5) %"50", align 4
|
||||||
%"59" = icmp ult i64 %"60", %"61"
|
%"62" = load i64, ptr addrspace(5) %"51", align 4
|
||||||
store i1 %"59", ptr addrspace(5) %"52", align 1
|
%"60" = icmp ult i64 %"61", %"62"
|
||||||
%"63" = load i1, ptr addrspace(5) %"52", align 1
|
store i1 %"60", ptr addrspace(5) %"53", align 1
|
||||||
%"62" = xor i1 %"63", true
|
%"64" = load i1, ptr addrspace(5) %"53", align 1
|
||||||
store i1 %"62", ptr addrspace(5) %"52", align 1
|
%"63" = xor i1 %"64", true
|
||||||
%"64" = load i1, ptr addrspace(5) %"52", align 1
|
store i1 %"63", ptr addrspace(5) %"53", align 1
|
||||||
br i1 %"64", label %"15", label %"16"
|
%"65" = load i1, ptr addrspace(5) %"53", align 1
|
||||||
|
br i1 %"65", label %"16", label %"17"
|
||||||
|
|
||||||
"15": ; preds = %1
|
"16": ; preds = %"74"
|
||||||
store i64 1, ptr addrspace(5) %"51", align 4
|
store i64 1, ptr addrspace(5) %"52", align 4
|
||||||
br label %"16"
|
br label %"17"
|
||||||
|
|
||||||
"16": ; preds = %"15", %1
|
"17": ; preds = %"16", %"74"
|
||||||
%"66" = load i1, ptr addrspace(5) %"52", align 1
|
%"67" = load i1, ptr addrspace(5) %"53", align 1
|
||||||
br i1 %"66", label %"18", label %"17"
|
br i1 %"67", label %"19", label %"18"
|
||||||
|
|
||||||
"17": ; preds = %"16"
|
"18": ; preds = %"17"
|
||||||
store i64 2, ptr addrspace(5) %"51", align 4
|
store i64 2, ptr addrspace(5) %"52", align 4
|
||||||
br label %"18"
|
br label %"19"
|
||||||
|
|
||||||
"18": ; preds = %"17", %"16"
|
"19": ; preds = %"18", %"17"
|
||||||
%"68" = load i64, ptr addrspace(5) %"48", align 4
|
%"69" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"69" = load i64, ptr addrspace(5) %"51", align 4
|
%"70" = load i64, ptr addrspace(5) %"52", align 4
|
||||||
%"72" = inttoptr i64 %"68" to ptr
|
%"73" = inttoptr i64 %"69" to ptr
|
||||||
store i64 %"69", ptr %"72", align 4
|
store i64 %"70", ptr %"73", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,38 +10,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"61"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"61": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%2 = bitcast i32 %"50" to <4 x i8>
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
%3 = bitcast i32 %"51" to <4 x i8>
|
%2 = bitcast i32 %"51" to <4 x i8>
|
||||||
%"56" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
%3 = bitcast i32 %"52" to <4 x i8>
|
||||||
store <4 x i8> %"56", ptr addrspace(5) %"42", align 4
|
%"57" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store <4 x i8> %"57", ptr addrspace(5) %"43", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"42", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"53", ptr %"59", align 4
|
%"60" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"60", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call float @llvm.amdgcn.rcp.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call float @llvm.amdgcn.rcp.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.amdgcn.rcp.f32(float) #1
|
declare float @llvm.amdgcn.rcp.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||||
%"9" = alloca [8 x i8], align 8, addrspace(5)
|
%"10" = alloca [8 x i8], align 8, addrspace(5)
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"46" = load i64, ptr addrspace(4) %"41", align 4
|
br label %"63"
|
||||||
store i64 %"46", ptr addrspace(5) %"43", align 4
|
|
||||||
|
"63": ; preds = %1
|
||||||
%"47" = load i64, ptr addrspace(4) %"42", align 4
|
%"47" = load i64, ptr addrspace(4) %"42", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"44", align 4
|
store i64 %"47", ptr addrspace(5) %"44", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"43", align 4
|
%"48" = load i64, ptr addrspace(4) %"43", align 4
|
||||||
%"55" = inttoptr i64 %"49" to ptr addrspace(1)
|
store i64 %"48", ptr addrspace(5) %"45", align 4
|
||||||
%"54" = load i64, ptr addrspace(1) %"55", align 4
|
%"50" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"45", align 4
|
%"56" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||||
%"50" = load i64, ptr addrspace(5) %"45", align 4
|
%"55" = load i64, ptr addrspace(1) %"56", align 4
|
||||||
%"30" = add i64 %"50", 1
|
store i64 %"55", ptr addrspace(5) %"46", align 4
|
||||||
%"56" = addrspacecast ptr addrspace(5) %"9" to ptr
|
%"51" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store i64 %"30", ptr %"56", align 4
|
%"31" = add i64 %"51", 1
|
||||||
%"58" = addrspacecast ptr addrspace(5) %"9" to ptr
|
%"57" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||||
%"32" = getelementptr inbounds i8, ptr %"58", i64 0
|
store i64 %"31", ptr %"57", align 4
|
||||||
%"59" = load i64, ptr %"32", align 4
|
%"59" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||||
store i64 %"59", ptr addrspace(5) %"45", align 4
|
%"33" = getelementptr inbounds i8, ptr %"59", i64 0
|
||||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
%"60" = load i64, ptr %"33", align 4
|
||||||
%"60" = inttoptr i64 %"52" to ptr addrspace(1)
|
store i64 %"60", ptr addrspace(5) %"46", align 4
|
||||||
%"34" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 0
|
|
||||||
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store i64 %"53", ptr addrspace(1) %"34", align 4
|
%"61" = inttoptr i64 %"53" to ptr addrspace(1)
|
||||||
|
%"35" = getelementptr inbounds i8, ptr addrspace(1) %"61", i64 0
|
||||||
|
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
|
store i64 %"54", ptr addrspace(1) %"35", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = srem i32 %"50", %"51"
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"41", align 4
|
%"50" = srem i32 %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca double, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca double, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load double, ptr %"47", align 8
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store double %"41", ptr addrspace(5) %"38", align 8
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load double, ptr addrspace(5) %"38", align 8
|
%"42" = load double, ptr %"48", align 8
|
||||||
%"43" = call double @llvm.amdgcn.rsq.f64(double %"44")
|
store double %"42", ptr addrspace(5) %"39", align 8
|
||||||
store double %"43", ptr addrspace(5) %"38", align 8
|
%"45" = load double, ptr addrspace(5) %"39", align 8
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
call void @llvm.amdgcn.s.setreg(i32 2433, i32 3)
|
||||||
%"46" = load double, ptr addrspace(5) %"38", align 8
|
%"44" = call double @llvm.amdgcn.rsq.f64(double %"45")
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
store double %"44", ptr addrspace(5) %"39", align 8
|
||||||
store double %"46", ptr %"48", align 8
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
|
%"47" = load double, ptr addrspace(5) %"39", align 8
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store double %"47", ptr %"49", align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||||
declare double @llvm.amdgcn.rsq.f64(double) #1
|
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
declare double @llvm.amdgcn.rsq.f64(double) #3
|
||||||
|
|
||||||
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind willreturn }
|
||||||
|
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i16, align 2, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i16, align 2, addrspace(5)
|
%"43" = alloca i16, align 2, addrspace(5)
|
||||||
|
%"44" = alloca i16, align 2, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"59"
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"59": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"46" = load i16, ptr %"55", align 2
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store i16 %"46", ptr addrspace(5) %"42", align 2
|
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
|
||||||
%"56" = inttoptr i64 %"48" to ptr
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"56", i64 2
|
%"47" = load i16, ptr %"56", align 2
|
||||||
%"49" = load i16, ptr %"30", align 2
|
store i16 %"47", ptr addrspace(5) %"43", align 2
|
||||||
store i16 %"49", ptr addrspace(5) %"43", align 2
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = load i16, ptr addrspace(5) %"42", align 2
|
%"57" = inttoptr i64 %"49" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"57", i64 2
|
||||||
|
%"50" = load i16, ptr %"31", align 2
|
||||||
|
store i16 %"50", ptr addrspace(5) %"44", align 2
|
||||||
%"52" = load i16, ptr addrspace(5) %"43", align 2
|
%"52" = load i16, ptr addrspace(5) %"43", align 2
|
||||||
%"50" = select i1 false, i16 %"51", i16 %"52"
|
%"53" = load i16, ptr addrspace(5) %"44", align 2
|
||||||
store i16 %"50", ptr addrspace(5) %"42", align 2
|
%"51" = select i1 false, i16 %"52", i16 %"53"
|
||||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
store i16 %"51", ptr addrspace(5) %"43", align 2
|
||||||
%"54" = load i16, ptr addrspace(5) %"42", align 2
|
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"57" = inttoptr i64 %"53" to ptr
|
%"55" = load i16, ptr addrspace(5) %"43", align 2
|
||||||
store i16 %"54", ptr %"57", align 2
|
%"58" = inttoptr i64 %"54" to ptr
|
||||||
|
store i16 %"55", ptr %"58", align 2
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i16, align 2, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i16, align 2, addrspace(5)
|
%"43" = alloca i16, align 2, addrspace(5)
|
||||||
|
%"44" = alloca i16, align 2, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"59"
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"59": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||||
%"46" = load i16, ptr %"55", align 2
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store i16 %"46", ptr addrspace(5) %"42", align 2
|
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
|
||||||
%"56" = inttoptr i64 %"48" to ptr
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"56", i64 2
|
%"47" = load i16, ptr %"56", align 2
|
||||||
%"49" = load i16, ptr %"30", align 2
|
store i16 %"47", ptr addrspace(5) %"43", align 2
|
||||||
store i16 %"49", ptr addrspace(5) %"43", align 2
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = load i16, ptr addrspace(5) %"42", align 2
|
%"57" = inttoptr i64 %"49" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"57", i64 2
|
||||||
|
%"50" = load i16, ptr %"31", align 2
|
||||||
|
store i16 %"50", ptr addrspace(5) %"44", align 2
|
||||||
%"52" = load i16, ptr addrspace(5) %"43", align 2
|
%"52" = load i16, ptr addrspace(5) %"43", align 2
|
||||||
%"50" = select i1 true, i16 %"51", i16 %"52"
|
%"53" = load i16, ptr addrspace(5) %"44", align 2
|
||||||
store i16 %"50", ptr addrspace(5) %"42", align 2
|
%"51" = select i1 true, i16 %"52", i16 %"53"
|
||||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
store i16 %"51", ptr addrspace(5) %"43", align 2
|
||||||
%"54" = load i16, ptr addrspace(5) %"42", align 2
|
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"57" = inttoptr i64 %"53" to ptr
|
%"55" = load i16, ptr addrspace(5) %"43", align 2
|
||||||
store i16 %"54", ptr %"57", align 2
|
%"58" = inttoptr i64 %"54" to ptr
|
||||||
|
store i16 %"55", ptr %"58", align 2
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,54 +10,58 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 {
|
define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
%"49" = alloca i64, align 8, addrspace(5)
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i64, align 8, addrspace(5)
|
%"50" = alloca i64, align 8, addrspace(5)
|
||||||
%"51" = alloca i64, align 8, addrspace(5)
|
%"51" = alloca i64, align 8, addrspace(5)
|
||||||
%"52" = alloca i1, align 1, addrspace(5)
|
%"52" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"53" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
br label %"72"
|
||||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
|
"72": ; preds = %1
|
||||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||||
%"68" = inttoptr i64 %"56" to ptr
|
|
||||||
%"55" = load i64, ptr %"68", align 4
|
|
||||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
%"57" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"69" = inttoptr i64 %"57" to ptr
|
%"69" = inttoptr i64 %"57" to ptr
|
||||||
%"36" = getelementptr inbounds i8, ptr %"69", i64 8
|
%"56" = load i64, ptr %"69", align 4
|
||||||
%"58" = load i64, ptr %"36", align 4
|
store i64 %"56", ptr addrspace(5) %"50", align 4
|
||||||
store i64 %"58", ptr addrspace(5) %"50", align 4
|
%"58" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%"60" = load i64, ptr addrspace(5) %"49", align 4
|
%"70" = inttoptr i64 %"58" to ptr
|
||||||
|
%"37" = getelementptr inbounds i8, ptr %"70", i64 8
|
||||||
|
%"59" = load i64, ptr %"37", align 4
|
||||||
|
store i64 %"59", ptr addrspace(5) %"51", align 4
|
||||||
%"61" = load i64, ptr addrspace(5) %"50", align 4
|
%"61" = load i64, ptr addrspace(5) %"50", align 4
|
||||||
%"59" = icmp ult i64 %"60", %"61"
|
%"62" = load i64, ptr addrspace(5) %"51", align 4
|
||||||
store i1 %"59", ptr addrspace(5) %"52", align 1
|
%"60" = icmp ult i64 %"61", %"62"
|
||||||
%"62" = load i1, ptr addrspace(5) %"52", align 1
|
store i1 %"60", ptr addrspace(5) %"53", align 1
|
||||||
br i1 %"62", label %"15", label %"16"
|
%"63" = load i1, ptr addrspace(5) %"53", align 1
|
||||||
|
br i1 %"63", label %"16", label %"17"
|
||||||
|
|
||||||
"15": ; preds = %1
|
"16": ; preds = %"72"
|
||||||
store i64 1, ptr addrspace(5) %"51", align 4
|
store i64 1, ptr addrspace(5) %"52", align 4
|
||||||
br label %"16"
|
br label %"17"
|
||||||
|
|
||||||
"16": ; preds = %"15", %1
|
"17": ; preds = %"16", %"72"
|
||||||
%"64" = load i1, ptr addrspace(5) %"52", align 1
|
%"65" = load i1, ptr addrspace(5) %"53", align 1
|
||||||
br i1 %"64", label %"18", label %"17"
|
br i1 %"65", label %"19", label %"18"
|
||||||
|
|
||||||
"17": ; preds = %"16"
|
"18": ; preds = %"17"
|
||||||
store i64 2, ptr addrspace(5) %"51", align 4
|
store i64 2, ptr addrspace(5) %"52", align 4
|
||||||
br label %"18"
|
br label %"19"
|
||||||
|
|
||||||
"18": ; preds = %"17", %"16"
|
"19": ; preds = %"18", %"17"
|
||||||
%"66" = load i64, ptr addrspace(5) %"48", align 4
|
%"67" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"67" = load i64, ptr addrspace(5) %"51", align 4
|
%"68" = load i64, ptr addrspace(5) %"52", align 4
|
||||||
%"70" = inttoptr i64 %"66" to ptr
|
%"71" = inttoptr i64 %"67" to ptr
|
||||||
store i64 %"67", ptr %"70", align 4
|
store i64 %"68", ptr %"71", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,56 +10,60 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 {
|
define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca float, align 4, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca float, align 4, addrspace(5)
|
%"48" = alloca float, align 4, addrspace(5)
|
||||||
%"49" = alloca float, align 4, addrspace(5)
|
%"49" = alloca float, align 4, addrspace(5)
|
||||||
%"50" = alloca i1, align 1, addrspace(5)
|
%"50" = alloca float, align 4, addrspace(5)
|
||||||
|
%"51" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"51" = load i64, ptr addrspace(4) %"43", align 4
|
br label %"72"
|
||||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
|
||||||
|
"72": ; preds = %1
|
||||||
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
store i64 %"52", ptr addrspace(5) %"46", align 4
|
store i64 %"52", ptr addrspace(5) %"46", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||||
%"68" = inttoptr i64 %"54" to ptr
|
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||||
%"53" = load float, ptr %"68", align 4
|
%"55" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store float %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
|
||||||
%"69" = inttoptr i64 %"55" to ptr
|
%"69" = inttoptr i64 %"55" to ptr
|
||||||
%"36" = getelementptr inbounds i8, ptr %"69", i64 4
|
%"54" = load float, ptr %"69", align 4
|
||||||
%"56" = load float, ptr %"36", align 4
|
store float %"54", ptr addrspace(5) %"48", align 4
|
||||||
store float %"56", ptr addrspace(5) %"48", align 4
|
%"56" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"58" = load float, ptr addrspace(5) %"47", align 4
|
%"70" = inttoptr i64 %"56" to ptr
|
||||||
|
%"37" = getelementptr inbounds i8, ptr %"70", i64 4
|
||||||
|
%"57" = load float, ptr %"37", align 4
|
||||||
|
store float %"57", ptr addrspace(5) %"49", align 4
|
||||||
%"59" = load float, ptr addrspace(5) %"48", align 4
|
%"59" = load float, ptr addrspace(5) %"48", align 4
|
||||||
%"57" = fcmp ogt float %"58", %"59"
|
%"60" = load float, ptr addrspace(5) %"49", align 4
|
||||||
store i1 %"57", ptr addrspace(5) %"50", align 1
|
%"58" = fcmp ogt float %"59", %"60"
|
||||||
%"60" = load i1, ptr addrspace(5) %"50", align 1
|
store i1 %"58", ptr addrspace(5) %"51", align 1
|
||||||
br i1 %"60", label %"15", label %"16"
|
%"61" = load i1, ptr addrspace(5) %"51", align 1
|
||||||
|
br i1 %"61", label %"16", label %"17"
|
||||||
|
|
||||||
"15": ; preds = %1
|
"16": ; preds = %"72"
|
||||||
%"62" = load float, ptr addrspace(5) %"47", align 4
|
%"63" = load float, ptr addrspace(5) %"48", align 4
|
||||||
store float %"62", ptr addrspace(5) %"49", align 4
|
store float %"63", ptr addrspace(5) %"50", align 4
|
||||||
br label %"16"
|
br label %"17"
|
||||||
|
|
||||||
"16": ; preds = %"15", %1
|
"17": ; preds = %"16", %"72"
|
||||||
%"63" = load i1, ptr addrspace(5) %"50", align 1
|
%"64" = load i1, ptr addrspace(5) %"51", align 1
|
||||||
br i1 %"63", label %"18", label %"17"
|
br i1 %"64", label %"19", label %"18"
|
||||||
|
|
||||||
"17": ; preds = %"16"
|
"18": ; preds = %"17"
|
||||||
%"65" = load float, ptr addrspace(5) %"48", align 4
|
%"66" = load float, ptr addrspace(5) %"49", align 4
|
||||||
store float %"65", ptr addrspace(5) %"49", align 4
|
store float %"66", ptr addrspace(5) %"50", align 4
|
||||||
br label %"18"
|
br label %"19"
|
||||||
|
|
||||||
"18": ; preds = %"17", %"16"
|
"19": ; preds = %"18", %"17"
|
||||||
%"66" = load i64, ptr addrspace(5) %"46", align 4
|
%"67" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
%"67" = load float, ptr addrspace(5) %"49", align 4
|
%"68" = load float, ptr addrspace(5) %"50", align 4
|
||||||
%"70" = inttoptr i64 %"66" to ptr
|
%"71" = inttoptr i64 %"67" to ptr
|
||||||
store float %"67", ptr %"70", align 4
|
store float %"68", ptr %"71", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,56 +10,60 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 {
|
define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca float, align 4, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca float, align 4, addrspace(5)
|
%"48" = alloca float, align 4, addrspace(5)
|
||||||
%"49" = alloca float, align 4, addrspace(5)
|
%"49" = alloca float, align 4, addrspace(5)
|
||||||
%"50" = alloca i1, align 1, addrspace(5)
|
%"50" = alloca float, align 4, addrspace(5)
|
||||||
|
%"51" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"51" = load i64, ptr addrspace(4) %"43", align 4
|
br label %"72"
|
||||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
|
||||||
|
"72": ; preds = %1
|
||||||
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
store i64 %"52", ptr addrspace(5) %"46", align 4
|
store i64 %"52", ptr addrspace(5) %"46", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||||
%"68" = inttoptr i64 %"54" to ptr
|
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||||
%"53" = load float, ptr %"68", align 4
|
%"55" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store float %"53", ptr addrspace(5) %"47", align 4
|
|
||||||
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
|
||||||
%"69" = inttoptr i64 %"55" to ptr
|
%"69" = inttoptr i64 %"55" to ptr
|
||||||
%"36" = getelementptr inbounds i8, ptr %"69", i64 4
|
%"54" = load float, ptr %"69", align 4
|
||||||
%"56" = load float, ptr %"36", align 4
|
store float %"54", ptr addrspace(5) %"48", align 4
|
||||||
store float %"56", ptr addrspace(5) %"48", align 4
|
%"56" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
%"58" = load float, ptr addrspace(5) %"47", align 4
|
%"70" = inttoptr i64 %"56" to ptr
|
||||||
|
%"37" = getelementptr inbounds i8, ptr %"70", i64 4
|
||||||
|
%"57" = load float, ptr %"37", align 4
|
||||||
|
store float %"57", ptr addrspace(5) %"49", align 4
|
||||||
%"59" = load float, ptr addrspace(5) %"48", align 4
|
%"59" = load float, ptr addrspace(5) %"48", align 4
|
||||||
%"57" = fcmp ule float %"58", %"59"
|
%"60" = load float, ptr addrspace(5) %"49", align 4
|
||||||
store i1 %"57", ptr addrspace(5) %"50", align 1
|
%"58" = fcmp ule float %"59", %"60"
|
||||||
%"60" = load i1, ptr addrspace(5) %"50", align 1
|
store i1 %"58", ptr addrspace(5) %"51", align 1
|
||||||
br i1 %"60", label %"15", label %"16"
|
%"61" = load i1, ptr addrspace(5) %"51", align 1
|
||||||
|
br i1 %"61", label %"16", label %"17"
|
||||||
|
|
||||||
"15": ; preds = %1
|
"16": ; preds = %"72"
|
||||||
%"62" = load float, ptr addrspace(5) %"47", align 4
|
%"63" = load float, ptr addrspace(5) %"48", align 4
|
||||||
store float %"62", ptr addrspace(5) %"49", align 4
|
store float %"63", ptr addrspace(5) %"50", align 4
|
||||||
br label %"16"
|
br label %"17"
|
||||||
|
|
||||||
"16": ; preds = %"15", %1
|
"17": ; preds = %"16", %"72"
|
||||||
%"63" = load i1, ptr addrspace(5) %"50", align 1
|
%"64" = load i1, ptr addrspace(5) %"51", align 1
|
||||||
br i1 %"63", label %"18", label %"17"
|
br i1 %"64", label %"19", label %"18"
|
||||||
|
|
||||||
"17": ; preds = %"16"
|
"18": ; preds = %"17"
|
||||||
%"65" = load float, ptr addrspace(5) %"48", align 4
|
%"66" = load float, ptr addrspace(5) %"49", align 4
|
||||||
store float %"65", ptr addrspace(5) %"49", align 4
|
store float %"66", ptr addrspace(5) %"50", align 4
|
||||||
br label %"18"
|
br label %"19"
|
||||||
|
|
||||||
"18": ; preds = %"17", %"16"
|
"19": ; preds = %"18", %"17"
|
||||||
%"66" = load i64, ptr addrspace(5) %"46", align 4
|
%"67" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
%"67" = load float, ptr addrspace(5) %"49", align 4
|
%"68" = load float, ptr addrspace(5) %"50", align 4
|
||||||
%"70" = inttoptr i64 %"66" to ptr
|
%"71" = inttoptr i64 %"67" to ptr
|
||||||
store float %"67", ptr %"70", align 4
|
store float %"68", ptr %"71", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,10 +10,9 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"87", ptr addrspace(4) byref(i64) %"88") #0 {
|
define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"88", ptr addrspace(4) byref(i64) %"89") #1 {
|
||||||
%"89" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"90" = alloca i64, align 8, addrspace(5)
|
%"90" = alloca i64, align 8, addrspace(5)
|
||||||
%"91" = alloca float, align 4, addrspace(5)
|
%"91" = alloca i64, align 8, addrspace(5)
|
||||||
%"92" = alloca float, align 4, addrspace(5)
|
%"92" = alloca float, align 4, addrspace(5)
|
||||||
%"93" = alloca float, align 4, addrspace(5)
|
%"93" = alloca float, align 4, addrspace(5)
|
||||||
%"94" = alloca float, align 4, addrspace(5)
|
%"94" = alloca float, align 4, addrspace(5)
|
||||||
|
@ -21,154 +20,159 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"87", ptr addrs
|
||||||
%"96" = alloca float, align 4, addrspace(5)
|
%"96" = alloca float, align 4, addrspace(5)
|
||||||
%"97" = alloca float, align 4, addrspace(5)
|
%"97" = alloca float, align 4, addrspace(5)
|
||||||
%"98" = alloca float, align 4, addrspace(5)
|
%"98" = alloca float, align 4, addrspace(5)
|
||||||
%"99" = alloca i32, align 4, addrspace(5)
|
%"99" = alloca float, align 4, addrspace(5)
|
||||||
%"100" = alloca i1, align 1, addrspace(5)
|
%"100" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"101" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"101" = load i64, ptr addrspace(4) %"87", align 4
|
br label %"168"
|
||||||
store i64 %"101", ptr addrspace(5) %"89", align 4
|
|
||||||
|
"168": ; preds = %1
|
||||||
%"102" = load i64, ptr addrspace(4) %"88", align 4
|
%"102" = load i64, ptr addrspace(4) %"88", align 4
|
||||||
store i64 %"102", ptr addrspace(5) %"90", align 4
|
store i64 %"102", ptr addrspace(5) %"90", align 4
|
||||||
%"104" = load i64, ptr addrspace(5) %"89", align 4
|
%"103" = load i64, ptr addrspace(4) %"89", align 4
|
||||||
%"155" = inttoptr i64 %"104" to ptr
|
store i64 %"103", ptr addrspace(5) %"91", align 4
|
||||||
%"103" = load float, ptr %"155", align 4
|
%"105" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
store float %"103", ptr addrspace(5) %"91", align 4
|
|
||||||
%"105" = load i64, ptr addrspace(5) %"89", align 4
|
|
||||||
%"156" = inttoptr i64 %"105" to ptr
|
%"156" = inttoptr i64 %"105" to ptr
|
||||||
%"54" = getelementptr inbounds i8, ptr %"156", i64 4
|
%"104" = load float, ptr %"156", align 4
|
||||||
%"106" = load float, ptr %"54", align 4
|
store float %"104", ptr addrspace(5) %"92", align 4
|
||||||
store float %"106", ptr addrspace(5) %"92", align 4
|
%"106" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"107" = load i64, ptr addrspace(5) %"89", align 4
|
%"157" = inttoptr i64 %"106" to ptr
|
||||||
%"157" = inttoptr i64 %"107" to ptr
|
%"55" = getelementptr inbounds i8, ptr %"157", i64 4
|
||||||
%"56" = getelementptr inbounds i8, ptr %"157", i64 8
|
%"107" = load float, ptr %"55", align 4
|
||||||
%"108" = load float, ptr %"56", align 4
|
store float %"107", ptr addrspace(5) %"93", align 4
|
||||||
store float %"108", ptr addrspace(5) %"93", align 4
|
%"108" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"109" = load i64, ptr addrspace(5) %"89", align 4
|
%"158" = inttoptr i64 %"108" to ptr
|
||||||
%"158" = inttoptr i64 %"109" to ptr
|
%"57" = getelementptr inbounds i8, ptr %"158", i64 8
|
||||||
%"58" = getelementptr inbounds i8, ptr %"158", i64 12
|
%"109" = load float, ptr %"57", align 4
|
||||||
%"110" = load float, ptr %"58", align 4
|
store float %"109", ptr addrspace(5) %"94", align 4
|
||||||
store float %"110", ptr addrspace(5) %"94", align 4
|
%"110" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"111" = load i64, ptr addrspace(5) %"89", align 4
|
%"159" = inttoptr i64 %"110" to ptr
|
||||||
%"159" = inttoptr i64 %"111" to ptr
|
%"59" = getelementptr inbounds i8, ptr %"159", i64 12
|
||||||
%"60" = getelementptr inbounds i8, ptr %"159", i64 16
|
%"111" = load float, ptr %"59", align 4
|
||||||
%"112" = load float, ptr %"60", align 4
|
store float %"111", ptr addrspace(5) %"95", align 4
|
||||||
store float %"112", ptr addrspace(5) %"95", align 4
|
%"112" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"113" = load i64, ptr addrspace(5) %"89", align 4
|
%"160" = inttoptr i64 %"112" to ptr
|
||||||
%"160" = inttoptr i64 %"113" to ptr
|
%"61" = getelementptr inbounds i8, ptr %"160", i64 16
|
||||||
%"62" = getelementptr inbounds i8, ptr %"160", i64 20
|
%"113" = load float, ptr %"61", align 4
|
||||||
%"114" = load float, ptr %"62", align 4
|
store float %"113", ptr addrspace(5) %"96", align 4
|
||||||
store float %"114", ptr addrspace(5) %"96", align 4
|
%"114" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"115" = load i64, ptr addrspace(5) %"89", align 4
|
%"161" = inttoptr i64 %"114" to ptr
|
||||||
%"161" = inttoptr i64 %"115" to ptr
|
%"63" = getelementptr inbounds i8, ptr %"161", i64 20
|
||||||
%"64" = getelementptr inbounds i8, ptr %"161", i64 24
|
%"115" = load float, ptr %"63", align 4
|
||||||
%"116" = load float, ptr %"64", align 4
|
store float %"115", ptr addrspace(5) %"97", align 4
|
||||||
store float %"116", ptr addrspace(5) %"97", align 4
|
%"116" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"117" = load i64, ptr addrspace(5) %"89", align 4
|
%"162" = inttoptr i64 %"116" to ptr
|
||||||
%"162" = inttoptr i64 %"117" to ptr
|
%"65" = getelementptr inbounds i8, ptr %"162", i64 24
|
||||||
%"66" = getelementptr inbounds i8, ptr %"162", i64 28
|
%"117" = load float, ptr %"65", align 4
|
||||||
%"118" = load float, ptr %"66", align 4
|
store float %"117", ptr addrspace(5) %"98", align 4
|
||||||
store float %"118", ptr addrspace(5) %"98", align 4
|
%"118" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"120" = load float, ptr addrspace(5) %"91", align 4
|
%"163" = inttoptr i64 %"118" to ptr
|
||||||
|
%"67" = getelementptr inbounds i8, ptr %"163", i64 28
|
||||||
|
%"119" = load float, ptr %"67", align 4
|
||||||
|
store float %"119", ptr addrspace(5) %"99", align 4
|
||||||
%"121" = load float, ptr addrspace(5) %"92", align 4
|
%"121" = load float, ptr addrspace(5) %"92", align 4
|
||||||
%"119" = fcmp uno float %"120", %"121"
|
%"122" = load float, ptr addrspace(5) %"93", align 4
|
||||||
store i1 %"119", ptr addrspace(5) %"100", align 1
|
%"120" = fcmp uno float %"121", %"122"
|
||||||
%"122" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"120", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"122", label %"21", label %"22"
|
%"123" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"123", label %"22", label %"23"
|
||||||
|
|
||||||
"21": ; preds = %1
|
"22": ; preds = %"168"
|
||||||
store i32 1, ptr addrspace(5) %"99", align 4
|
store i32 1, ptr addrspace(5) %"100", align 4
|
||||||
br label %"22"
|
br label %"23"
|
||||||
|
|
||||||
"22": ; preds = %"21", %1
|
"23": ; preds = %"22", %"168"
|
||||||
%"124" = load i1, ptr addrspace(5) %"100", align 1
|
%"125" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"124", label %"24", label %"23"
|
br i1 %"125", label %"25", label %"24"
|
||||||
|
|
||||||
"23": ; preds = %"22"
|
"24": ; preds = %"23"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"24"
|
br label %"25"
|
||||||
|
|
||||||
"24": ; preds = %"23", %"22"
|
"25": ; preds = %"24", %"23"
|
||||||
%"126" = load i64, ptr addrspace(5) %"90", align 4
|
%"127" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"127" = load i32, ptr addrspace(5) %"99", align 4
|
%"128" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
%"163" = inttoptr i64 %"126" to ptr
|
%"164" = inttoptr i64 %"127" to ptr
|
||||||
store i32 %"127", ptr %"163", align 4
|
store i32 %"128", ptr %"164", align 4
|
||||||
%"129" = load float, ptr addrspace(5) %"93", align 4
|
|
||||||
%"130" = load float, ptr addrspace(5) %"94", align 4
|
%"130" = load float, ptr addrspace(5) %"94", align 4
|
||||||
%"128" = fcmp uno float %"129", %"130"
|
%"131" = load float, ptr addrspace(5) %"95", align 4
|
||||||
store i1 %"128", ptr addrspace(5) %"100", align 1
|
%"129" = fcmp uno float %"130", %"131"
|
||||||
%"131" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"129", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"131", label %"25", label %"26"
|
%"132" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"132", label %"26", label %"27"
|
||||||
|
|
||||||
"25": ; preds = %"24"
|
"26": ; preds = %"25"
|
||||||
store i32 1, ptr addrspace(5) %"99", align 4
|
store i32 1, ptr addrspace(5) %"100", align 4
|
||||||
br label %"26"
|
br label %"27"
|
||||||
|
|
||||||
"26": ; preds = %"25", %"24"
|
"27": ; preds = %"26", %"25"
|
||||||
%"133" = load i1, ptr addrspace(5) %"100", align 1
|
%"134" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"133", label %"28", label %"27"
|
br i1 %"134", label %"29", label %"28"
|
||||||
|
|
||||||
"27": ; preds = %"26"
|
"28": ; preds = %"27"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"28"
|
br label %"29"
|
||||||
|
|
||||||
"28": ; preds = %"27", %"26"
|
"29": ; preds = %"28", %"27"
|
||||||
%"135" = load i64, ptr addrspace(5) %"90", align 4
|
%"136" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"164" = inttoptr i64 %"135" to ptr
|
%"165" = inttoptr i64 %"136" to ptr
|
||||||
%"72" = getelementptr inbounds i8, ptr %"164", i64 4
|
%"73" = getelementptr inbounds i8, ptr %"165", i64 4
|
||||||
%"136" = load i32, ptr addrspace(5) %"99", align 4
|
%"137" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"136", ptr %"72", align 4
|
store i32 %"137", ptr %"73", align 4
|
||||||
%"138" = load float, ptr addrspace(5) %"95", align 4
|
|
||||||
%"139" = load float, ptr addrspace(5) %"96", align 4
|
%"139" = load float, ptr addrspace(5) %"96", align 4
|
||||||
%"137" = fcmp uno float %"138", %"139"
|
%"140" = load float, ptr addrspace(5) %"97", align 4
|
||||||
store i1 %"137", ptr addrspace(5) %"100", align 1
|
%"138" = fcmp uno float %"139", %"140"
|
||||||
%"140" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"138", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"140", label %"29", label %"30"
|
%"141" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"141", label %"30", label %"31"
|
||||||
|
|
||||||
"29": ; preds = %"28"
|
"30": ; preds = %"29"
|
||||||
store i32 1, ptr addrspace(5) %"99", align 4
|
store i32 1, ptr addrspace(5) %"100", align 4
|
||||||
br label %"30"
|
br label %"31"
|
||||||
|
|
||||||
"30": ; preds = %"29", %"28"
|
"31": ; preds = %"30", %"29"
|
||||||
%"142" = load i1, ptr addrspace(5) %"100", align 1
|
%"143" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"142", label %"32", label %"31"
|
br i1 %"143", label %"33", label %"32"
|
||||||
|
|
||||||
"31": ; preds = %"30"
|
"32": ; preds = %"31"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"32"
|
br label %"33"
|
||||||
|
|
||||||
"32": ; preds = %"31", %"30"
|
"33": ; preds = %"32", %"31"
|
||||||
%"144" = load i64, ptr addrspace(5) %"90", align 4
|
%"145" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"165" = inttoptr i64 %"144" to ptr
|
%"166" = inttoptr i64 %"145" to ptr
|
||||||
%"76" = getelementptr inbounds i8, ptr %"165", i64 8
|
%"77" = getelementptr inbounds i8, ptr %"166", i64 8
|
||||||
%"145" = load i32, ptr addrspace(5) %"99", align 4
|
%"146" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"145", ptr %"76", align 4
|
store i32 %"146", ptr %"77", align 4
|
||||||
%"147" = load float, ptr addrspace(5) %"97", align 4
|
|
||||||
%"148" = load float, ptr addrspace(5) %"98", align 4
|
%"148" = load float, ptr addrspace(5) %"98", align 4
|
||||||
%"146" = fcmp uno float %"147", %"148"
|
%"149" = load float, ptr addrspace(5) %"99", align 4
|
||||||
store i1 %"146", ptr addrspace(5) %"100", align 1
|
%"147" = fcmp uno float %"148", %"149"
|
||||||
%"149" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"147", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"149", label %"33", label %"34"
|
%"150" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"150", label %"34", label %"35"
|
||||||
|
|
||||||
"33": ; preds = %"32"
|
"34": ; preds = %"33"
|
||||||
store i32 1, ptr addrspace(5) %"99", align 4
|
store i32 1, ptr addrspace(5) %"100", align 4
|
||||||
br label %"34"
|
br label %"35"
|
||||||
|
|
||||||
"34": ; preds = %"33", %"32"
|
"35": ; preds = %"34", %"33"
|
||||||
%"151" = load i1, ptr addrspace(5) %"100", align 1
|
%"152" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"151", label %"36", label %"35"
|
br i1 %"152", label %"37", label %"36"
|
||||||
|
|
||||||
"35": ; preds = %"34"
|
"36": ; preds = %"35"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"36"
|
br label %"37"
|
||||||
|
|
||||||
"36": ; preds = %"35", %"34"
|
"37": ; preds = %"36", %"35"
|
||||||
%"153" = load i64, ptr addrspace(5) %"90", align 4
|
%"154" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"166" = inttoptr i64 %"153" to ptr
|
%"167" = inttoptr i64 %"154" to ptr
|
||||||
%"80" = getelementptr inbounds i8, ptr %"166", i64 12
|
%"81" = getelementptr inbounds i8, ptr %"167", i64 12
|
||||||
%"154" = load i32, ptr addrspace(5) %"99", align 4
|
%"155" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"154", ptr %"80", align 4
|
store i32 %"155", ptr %"81", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,10 +10,9 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"87", ptr addrspace(4) byref(i64) %"88") #0 {
|
define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"88", ptr addrspace(4) byref(i64) %"89") #1 {
|
||||||
%"89" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"90" = alloca i64, align 8, addrspace(5)
|
%"90" = alloca i64, align 8, addrspace(5)
|
||||||
%"91" = alloca float, align 4, addrspace(5)
|
%"91" = alloca i64, align 8, addrspace(5)
|
||||||
%"92" = alloca float, align 4, addrspace(5)
|
%"92" = alloca float, align 4, addrspace(5)
|
||||||
%"93" = alloca float, align 4, addrspace(5)
|
%"93" = alloca float, align 4, addrspace(5)
|
||||||
%"94" = alloca float, align 4, addrspace(5)
|
%"94" = alloca float, align 4, addrspace(5)
|
||||||
|
@ -21,154 +20,159 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"87", ptr addrs
|
||||||
%"96" = alloca float, align 4, addrspace(5)
|
%"96" = alloca float, align 4, addrspace(5)
|
||||||
%"97" = alloca float, align 4, addrspace(5)
|
%"97" = alloca float, align 4, addrspace(5)
|
||||||
%"98" = alloca float, align 4, addrspace(5)
|
%"98" = alloca float, align 4, addrspace(5)
|
||||||
%"99" = alloca i32, align 4, addrspace(5)
|
%"99" = alloca float, align 4, addrspace(5)
|
||||||
%"100" = alloca i1, align 1, addrspace(5)
|
%"100" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"101" = alloca i1, align 1, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"101" = load i64, ptr addrspace(4) %"87", align 4
|
br label %"168"
|
||||||
store i64 %"101", ptr addrspace(5) %"89", align 4
|
|
||||||
|
"168": ; preds = %1
|
||||||
%"102" = load i64, ptr addrspace(4) %"88", align 4
|
%"102" = load i64, ptr addrspace(4) %"88", align 4
|
||||||
store i64 %"102", ptr addrspace(5) %"90", align 4
|
store i64 %"102", ptr addrspace(5) %"90", align 4
|
||||||
%"104" = load i64, ptr addrspace(5) %"89", align 4
|
%"103" = load i64, ptr addrspace(4) %"89", align 4
|
||||||
%"155" = inttoptr i64 %"104" to ptr
|
store i64 %"103", ptr addrspace(5) %"91", align 4
|
||||||
%"103" = load float, ptr %"155", align 4
|
%"105" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
store float %"103", ptr addrspace(5) %"91", align 4
|
|
||||||
%"105" = load i64, ptr addrspace(5) %"89", align 4
|
|
||||||
%"156" = inttoptr i64 %"105" to ptr
|
%"156" = inttoptr i64 %"105" to ptr
|
||||||
%"54" = getelementptr inbounds i8, ptr %"156", i64 4
|
%"104" = load float, ptr %"156", align 4
|
||||||
%"106" = load float, ptr %"54", align 4
|
store float %"104", ptr addrspace(5) %"92", align 4
|
||||||
store float %"106", ptr addrspace(5) %"92", align 4
|
%"106" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"107" = load i64, ptr addrspace(5) %"89", align 4
|
%"157" = inttoptr i64 %"106" to ptr
|
||||||
%"157" = inttoptr i64 %"107" to ptr
|
%"55" = getelementptr inbounds i8, ptr %"157", i64 4
|
||||||
%"56" = getelementptr inbounds i8, ptr %"157", i64 8
|
%"107" = load float, ptr %"55", align 4
|
||||||
%"108" = load float, ptr %"56", align 4
|
store float %"107", ptr addrspace(5) %"93", align 4
|
||||||
store float %"108", ptr addrspace(5) %"93", align 4
|
%"108" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"109" = load i64, ptr addrspace(5) %"89", align 4
|
%"158" = inttoptr i64 %"108" to ptr
|
||||||
%"158" = inttoptr i64 %"109" to ptr
|
%"57" = getelementptr inbounds i8, ptr %"158", i64 8
|
||||||
%"58" = getelementptr inbounds i8, ptr %"158", i64 12
|
%"109" = load float, ptr %"57", align 4
|
||||||
%"110" = load float, ptr %"58", align 4
|
store float %"109", ptr addrspace(5) %"94", align 4
|
||||||
store float %"110", ptr addrspace(5) %"94", align 4
|
%"110" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"111" = load i64, ptr addrspace(5) %"89", align 4
|
%"159" = inttoptr i64 %"110" to ptr
|
||||||
%"159" = inttoptr i64 %"111" to ptr
|
%"59" = getelementptr inbounds i8, ptr %"159", i64 12
|
||||||
%"60" = getelementptr inbounds i8, ptr %"159", i64 16
|
%"111" = load float, ptr %"59", align 4
|
||||||
%"112" = load float, ptr %"60", align 4
|
store float %"111", ptr addrspace(5) %"95", align 4
|
||||||
store float %"112", ptr addrspace(5) %"95", align 4
|
%"112" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"113" = load i64, ptr addrspace(5) %"89", align 4
|
%"160" = inttoptr i64 %"112" to ptr
|
||||||
%"160" = inttoptr i64 %"113" to ptr
|
%"61" = getelementptr inbounds i8, ptr %"160", i64 16
|
||||||
%"62" = getelementptr inbounds i8, ptr %"160", i64 20
|
%"113" = load float, ptr %"61", align 4
|
||||||
%"114" = load float, ptr %"62", align 4
|
store float %"113", ptr addrspace(5) %"96", align 4
|
||||||
store float %"114", ptr addrspace(5) %"96", align 4
|
%"114" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"115" = load i64, ptr addrspace(5) %"89", align 4
|
%"161" = inttoptr i64 %"114" to ptr
|
||||||
%"161" = inttoptr i64 %"115" to ptr
|
%"63" = getelementptr inbounds i8, ptr %"161", i64 20
|
||||||
%"64" = getelementptr inbounds i8, ptr %"161", i64 24
|
%"115" = load float, ptr %"63", align 4
|
||||||
%"116" = load float, ptr %"64", align 4
|
store float %"115", ptr addrspace(5) %"97", align 4
|
||||||
store float %"116", ptr addrspace(5) %"97", align 4
|
%"116" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"117" = load i64, ptr addrspace(5) %"89", align 4
|
%"162" = inttoptr i64 %"116" to ptr
|
||||||
%"162" = inttoptr i64 %"117" to ptr
|
%"65" = getelementptr inbounds i8, ptr %"162", i64 24
|
||||||
%"66" = getelementptr inbounds i8, ptr %"162", i64 28
|
%"117" = load float, ptr %"65", align 4
|
||||||
%"118" = load float, ptr %"66", align 4
|
store float %"117", ptr addrspace(5) %"98", align 4
|
||||||
store float %"118", ptr addrspace(5) %"98", align 4
|
%"118" = load i64, ptr addrspace(5) %"90", align 4
|
||||||
%"120" = load float, ptr addrspace(5) %"91", align 4
|
%"163" = inttoptr i64 %"118" to ptr
|
||||||
|
%"67" = getelementptr inbounds i8, ptr %"163", i64 28
|
||||||
|
%"119" = load float, ptr %"67", align 4
|
||||||
|
store float %"119", ptr addrspace(5) %"99", align 4
|
||||||
%"121" = load float, ptr addrspace(5) %"92", align 4
|
%"121" = load float, ptr addrspace(5) %"92", align 4
|
||||||
%"119" = fcmp ord float %"120", %"121"
|
%"122" = load float, ptr addrspace(5) %"93", align 4
|
||||||
store i1 %"119", ptr addrspace(5) %"100", align 1
|
%"120" = fcmp ord float %"121", %"122"
|
||||||
%"122" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"120", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"122", label %"21", label %"22"
|
%"123" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"123", label %"22", label %"23"
|
||||||
|
|
||||||
"21": ; preds = %1
|
"22": ; preds = %"168"
|
||||||
store i32 2, ptr addrspace(5) %"99", align 4
|
store i32 2, ptr addrspace(5) %"100", align 4
|
||||||
br label %"22"
|
br label %"23"
|
||||||
|
|
||||||
"22": ; preds = %"21", %1
|
"23": ; preds = %"22", %"168"
|
||||||
%"124" = load i1, ptr addrspace(5) %"100", align 1
|
%"125" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"124", label %"24", label %"23"
|
br i1 %"125", label %"25", label %"24"
|
||||||
|
|
||||||
"23": ; preds = %"22"
|
"24": ; preds = %"23"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"24"
|
br label %"25"
|
||||||
|
|
||||||
"24": ; preds = %"23", %"22"
|
"25": ; preds = %"24", %"23"
|
||||||
%"126" = load i64, ptr addrspace(5) %"90", align 4
|
%"127" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"127" = load i32, ptr addrspace(5) %"99", align 4
|
%"128" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
%"163" = inttoptr i64 %"126" to ptr
|
%"164" = inttoptr i64 %"127" to ptr
|
||||||
store i32 %"127", ptr %"163", align 4
|
store i32 %"128", ptr %"164", align 4
|
||||||
%"129" = load float, ptr addrspace(5) %"93", align 4
|
|
||||||
%"130" = load float, ptr addrspace(5) %"94", align 4
|
%"130" = load float, ptr addrspace(5) %"94", align 4
|
||||||
%"128" = fcmp ord float %"129", %"130"
|
%"131" = load float, ptr addrspace(5) %"95", align 4
|
||||||
store i1 %"128", ptr addrspace(5) %"100", align 1
|
%"129" = fcmp ord float %"130", %"131"
|
||||||
%"131" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"129", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"131", label %"25", label %"26"
|
%"132" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"132", label %"26", label %"27"
|
||||||
|
|
||||||
"25": ; preds = %"24"
|
"26": ; preds = %"25"
|
||||||
store i32 2, ptr addrspace(5) %"99", align 4
|
store i32 2, ptr addrspace(5) %"100", align 4
|
||||||
br label %"26"
|
br label %"27"
|
||||||
|
|
||||||
"26": ; preds = %"25", %"24"
|
"27": ; preds = %"26", %"25"
|
||||||
%"133" = load i1, ptr addrspace(5) %"100", align 1
|
%"134" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"133", label %"28", label %"27"
|
br i1 %"134", label %"29", label %"28"
|
||||||
|
|
||||||
"27": ; preds = %"26"
|
"28": ; preds = %"27"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"28"
|
br label %"29"
|
||||||
|
|
||||||
"28": ; preds = %"27", %"26"
|
"29": ; preds = %"28", %"27"
|
||||||
%"135" = load i64, ptr addrspace(5) %"90", align 4
|
%"136" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"164" = inttoptr i64 %"135" to ptr
|
%"165" = inttoptr i64 %"136" to ptr
|
||||||
%"72" = getelementptr inbounds i8, ptr %"164", i64 4
|
%"73" = getelementptr inbounds i8, ptr %"165", i64 4
|
||||||
%"136" = load i32, ptr addrspace(5) %"99", align 4
|
%"137" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"136", ptr %"72", align 4
|
store i32 %"137", ptr %"73", align 4
|
||||||
%"138" = load float, ptr addrspace(5) %"95", align 4
|
|
||||||
%"139" = load float, ptr addrspace(5) %"96", align 4
|
%"139" = load float, ptr addrspace(5) %"96", align 4
|
||||||
%"137" = fcmp ord float %"138", %"139"
|
%"140" = load float, ptr addrspace(5) %"97", align 4
|
||||||
store i1 %"137", ptr addrspace(5) %"100", align 1
|
%"138" = fcmp ord float %"139", %"140"
|
||||||
%"140" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"138", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"140", label %"29", label %"30"
|
%"141" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"141", label %"30", label %"31"
|
||||||
|
|
||||||
"29": ; preds = %"28"
|
"30": ; preds = %"29"
|
||||||
store i32 2, ptr addrspace(5) %"99", align 4
|
store i32 2, ptr addrspace(5) %"100", align 4
|
||||||
br label %"30"
|
br label %"31"
|
||||||
|
|
||||||
"30": ; preds = %"29", %"28"
|
"31": ; preds = %"30", %"29"
|
||||||
%"142" = load i1, ptr addrspace(5) %"100", align 1
|
%"143" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"142", label %"32", label %"31"
|
br i1 %"143", label %"33", label %"32"
|
||||||
|
|
||||||
"31": ; preds = %"30"
|
"32": ; preds = %"31"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"32"
|
br label %"33"
|
||||||
|
|
||||||
"32": ; preds = %"31", %"30"
|
"33": ; preds = %"32", %"31"
|
||||||
%"144" = load i64, ptr addrspace(5) %"90", align 4
|
%"145" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"165" = inttoptr i64 %"144" to ptr
|
%"166" = inttoptr i64 %"145" to ptr
|
||||||
%"76" = getelementptr inbounds i8, ptr %"165", i64 8
|
%"77" = getelementptr inbounds i8, ptr %"166", i64 8
|
||||||
%"145" = load i32, ptr addrspace(5) %"99", align 4
|
%"146" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"145", ptr %"76", align 4
|
store i32 %"146", ptr %"77", align 4
|
||||||
%"147" = load float, ptr addrspace(5) %"97", align 4
|
|
||||||
%"148" = load float, ptr addrspace(5) %"98", align 4
|
%"148" = load float, ptr addrspace(5) %"98", align 4
|
||||||
%"146" = fcmp ord float %"147", %"148"
|
%"149" = load float, ptr addrspace(5) %"99", align 4
|
||||||
store i1 %"146", ptr addrspace(5) %"100", align 1
|
%"147" = fcmp ord float %"148", %"149"
|
||||||
%"149" = load i1, ptr addrspace(5) %"100", align 1
|
store i1 %"147", ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"149", label %"33", label %"34"
|
%"150" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
|
br i1 %"150", label %"34", label %"35"
|
||||||
|
|
||||||
"33": ; preds = %"32"
|
"34": ; preds = %"33"
|
||||||
store i32 2, ptr addrspace(5) %"99", align 4
|
store i32 2, ptr addrspace(5) %"100", align 4
|
||||||
br label %"34"
|
br label %"35"
|
||||||
|
|
||||||
"34": ; preds = %"33", %"32"
|
"35": ; preds = %"34", %"33"
|
||||||
%"151" = load i1, ptr addrspace(5) %"100", align 1
|
%"152" = load i1, ptr addrspace(5) %"101", align 1
|
||||||
br i1 %"151", label %"36", label %"35"
|
br i1 %"152", label %"37", label %"36"
|
||||||
|
|
||||||
"35": ; preds = %"34"
|
"36": ; preds = %"35"
|
||||||
store i32 0, ptr addrspace(5) %"99", align 4
|
store i32 0, ptr addrspace(5) %"100", align 4
|
||||||
br label %"36"
|
br label %"37"
|
||||||
|
|
||||||
"36": ; preds = %"35", %"34"
|
"37": ; preds = %"36", %"35"
|
||||||
%"153" = load i64, ptr addrspace(5) %"90", align 4
|
%"154" = load i64, ptr addrspace(5) %"91", align 4
|
||||||
%"166" = inttoptr i64 %"153" to ptr
|
%"167" = inttoptr i64 %"154" to ptr
|
||||||
%"80" = getelementptr inbounds i8, ptr %"166", i64 12
|
%"81" = getelementptr inbounds i8, ptr %"167", i64 12
|
||||||
%"154" = load i32, ptr addrspace(5) %"99", align 4
|
%"155" = load i32, ptr addrspace(5) %"100", align 4
|
||||||
store i32 %"154", ptr %"80", align 4
|
store i32 %"155", ptr %"81", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,38 +12,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i32, align 4, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i32, align 4, addrspace(5)
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
br label %"63"
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
|
||||||
|
"63": ; preds = %1
|
||||||
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"43", align 4
|
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
||||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||||
%"58" = inttoptr i64 %"50" to ptr addrspace(1)
|
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"44", align 4
|
||||||
%"49" = load i64, ptr addrspace(1) %"58", align 4
|
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
%"59" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||||
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
%"50" = load i64, ptr addrspace(1) %"59", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||||
%"59" = inttoptr i32 %"51" to ptr addrspace(3)
|
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
store i64 %"52", ptr addrspace(3) %"59", align 4
|
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"43", align 4
|
%"60" = inttoptr i32 %"52" to ptr addrspace(3)
|
||||||
%"60" = inttoptr i32 %"53" to ptr addrspace(3)
|
store i64 %"53", ptr addrspace(3) %"60", align 4
|
||||||
%"32" = getelementptr inbounds i8, ptr addrspace(3) %"60", i64 0
|
%"54" = load i32, ptr addrspace(5) %"44", align 4
|
||||||
%"54" = load i64, ptr addrspace(3) %"32", align 4
|
%"61" = inttoptr i32 %"54" to ptr addrspace(3)
|
||||||
store i64 %"54", ptr addrspace(5) %"45", align 4
|
%"33" = getelementptr inbounds i8, ptr addrspace(3) %"61", i64 0
|
||||||
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
%"55" = load i64, ptr addrspace(3) %"33", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
store i64 %"55", ptr addrspace(5) %"46", align 4
|
||||||
%"61" = inttoptr i64 %"55" to ptr addrspace(1)
|
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"56", ptr addrspace(1) %"61", align 4
|
%"57" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
|
%"62" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||||
|
store i64 %"57", ptr addrspace(1) %"62", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,37 +12,41 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"61"
|
||||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"61": ; preds = %1
|
||||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||||
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"41", align 4
|
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"48" to ptr addrspace(1)
|
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"42", align 4
|
||||||
%"47" = load i64, ptr addrspace(1) %"56", align 4
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
%"57" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(1) %"57", align 4
|
||||||
|
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"57" = inttoptr i64 %"49" to ptr addrspace(3)
|
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"50", ptr addrspace(3) %"57", align 4
|
%"58" = inttoptr i64 %"50" to ptr addrspace(3)
|
||||||
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"51", ptr addrspace(3) %"58", align 4
|
||||||
%"58" = inttoptr i64 %"52" to ptr addrspace(3)
|
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"51" = load i64, ptr addrspace(3) %"58", align 4
|
%"59" = inttoptr i64 %"53" to ptr addrspace(3)
|
||||||
store i64 %"51", ptr addrspace(5) %"43", align 4
|
%"52" = load i64, ptr addrspace(3) %"59", align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"40", align 4
|
store i64 %"52", ptr addrspace(5) %"44", align 4
|
||||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
%"54" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"59" = inttoptr i64 %"53" to ptr addrspace(1)
|
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
store i64 %"54", ptr addrspace(1) %"59", align 4
|
%"60" = inttoptr i64 %"54" to ptr addrspace(1)
|
||||||
|
store i64 %"55", ptr addrspace(1) %"60", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -13,68 +13,78 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define i64 @__zluda_ptx_impl_add() #0 {
|
define i64 @add() #0 {
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4
|
br label %"85"
|
||||||
store i64 %"49", ptr addrspace(5) %"47", align 4
|
|
||||||
%"50" = load i64, ptr addrspace(3) @shared_ex, align 4
|
"85": ; preds = %1
|
||||||
|
%"50" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||||
store i64 %"50", ptr addrspace(5) %"48", align 4
|
store i64 %"50", ptr addrspace(5) %"48", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"48", align 4
|
%"51" = load i64, ptr addrspace(3) @shared_ex, align 4
|
||||||
%"53" = load i64, ptr addrspace(5) %"47", align 4
|
store i64 %"51", ptr addrspace(5) %"49", align 4
|
||||||
%"75" = add i64 %"52", %"53"
|
%"53" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
store i64 %"75", ptr addrspace(5) %"46", align 4
|
%"54" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
%2 = load i64, ptr addrspace(5) %"46", align 4
|
%"76" = add i64 %"53", %"54"
|
||||||
|
store i64 %"76", ptr addrspace(5) %"47", align 4
|
||||||
|
%2 = load i64, ptr addrspace(5) %"47", align 4
|
||||||
ret i64 %2
|
ret i64 %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"15") #0 {
|
define i64 @set_shared_temp1(i64 %"15") #0 {
|
||||||
%"54" = alloca i64, align 8, addrspace(5)
|
%"55" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
|
br label %"86"
|
||||||
|
|
||||||
|
"86": ; preds = %1
|
||||||
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
||||||
%"55" = call i64 @__zluda_ptx_impl_add()
|
%"56" = call i64 @add()
|
||||||
store i64 %"55", ptr addrspace(5) %"54", align 4
|
store i64 %"56", ptr addrspace(5) %"55", align 4
|
||||||
%2 = load i64, ptr addrspace(5) %"54", align 4
|
%2 = load i64, ptr addrspace(5) %"55", align 4
|
||||||
ret i64 %2
|
ret i64 %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"56", ptr addrspace(4) byref(i64) %"57") #0 {
|
define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 {
|
||||||
%"58" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"59" = alloca i64, align 8, addrspace(5)
|
%"59" = alloca i64, align 8, addrspace(5)
|
||||||
%"60" = alloca i64, align 8, addrspace(5)
|
%"60" = alloca i64, align 8, addrspace(5)
|
||||||
%"61" = alloca i64, align 8, addrspace(5)
|
%"61" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"62" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"62" = load i64, ptr addrspace(4) %"56", align 4
|
br label %"87"
|
||||||
store i64 %"62", ptr addrspace(5) %"58", align 4
|
|
||||||
|
"87": ; preds = %1
|
||||||
%"63" = load i64, ptr addrspace(4) %"57", align 4
|
%"63" = load i64, ptr addrspace(4) %"57", align 4
|
||||||
store i64 %"63", ptr addrspace(5) %"59", align 4
|
store i64 %"63", ptr addrspace(5) %"59", align 4
|
||||||
%"65" = load i64, ptr addrspace(5) %"58", align 4
|
%"64" = load i64, ptr addrspace(4) %"58", align 4
|
||||||
%"78" = inttoptr i64 %"65" to ptr addrspace(1)
|
|
||||||
%"64" = load i64, ptr addrspace(1) %"78", align 4
|
|
||||||
store i64 %"64", ptr addrspace(5) %"60", align 4
|
store i64 %"64", ptr addrspace(5) %"60", align 4
|
||||||
%"66" = load i64, ptr addrspace(5) %"58", align 4
|
%"66" = load i64, ptr addrspace(5) %"59", align 4
|
||||||
%"79" = inttoptr i64 %"66" to ptr addrspace(1)
|
%"79" = inttoptr i64 %"66" to ptr addrspace(1)
|
||||||
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"79", i64 8
|
%"65" = load i64, ptr addrspace(1) %"79", align 4
|
||||||
%"67" = load i64, ptr addrspace(1) %"39", align 4
|
store i64 %"65", ptr addrspace(5) %"61", align 4
|
||||||
store i64 %"67", ptr addrspace(5) %"61", align 4
|
%"67" = load i64, ptr addrspace(5) %"59", align 4
|
||||||
%"68" = load i64, ptr addrspace(5) %"61", align 4
|
%"80" = inttoptr i64 %"67" to ptr addrspace(1)
|
||||||
store i64 %"68", ptr addrspace(3) @shared_mod, align 4
|
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"80", i64 8
|
||||||
%"70" = load i64, ptr addrspace(5) %"60", align 4
|
%"68" = load i64, ptr addrspace(1) %"40", align 4
|
||||||
%"81" = call i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"70")
|
store i64 %"68", ptr addrspace(5) %"62", align 4
|
||||||
store i64 %"81", ptr addrspace(5) %"61", align 4
|
%"69" = load i64, ptr addrspace(5) %"62", align 4
|
||||||
%"71" = load i64, ptr addrspace(5) %"59", align 4
|
store i64 %"69", ptr addrspace(3) @shared_mod, align 4
|
||||||
%"72" = load i64, ptr addrspace(5) %"61", align 4
|
%"71" = load i64, ptr addrspace(5) %"61", align 4
|
||||||
%"83" = inttoptr i64 %"71" to ptr
|
%"82" = call i64 @set_shared_temp1(i64 %"71")
|
||||||
store i64 %"72", ptr %"83", align 4
|
store i64 %"82", ptr addrspace(5) %"62", align 4
|
||||||
|
%"72" = load i64, ptr addrspace(5) %"60", align 4
|
||||||
|
%"73" = load i64, ptr addrspace(5) %"62", align 4
|
||||||
|
%"84" = inttoptr i64 %"72" to ptr
|
||||||
|
store i64 %"73", ptr %"84", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -13,65 +13,75 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define i64 @__zluda_ptx_impl_add(i64 %"10") #0 {
|
define i64 @add(i64 %"10") #0 {
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
|
br label %"81"
|
||||||
|
|
||||||
|
"81": ; preds = %1
|
||||||
store i64 %"10", ptr addrspace(3) @shared_mod, align 4
|
store i64 %"10", ptr addrspace(3) @shared_mod, align 4
|
||||||
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4
|
%"50" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||||
store i64 %"49", ptr addrspace(5) %"48", align 4
|
store i64 %"50", ptr addrspace(5) %"49", align 4
|
||||||
%"101" = load i64, ptr addrspace(3) @shared_ex, align 4
|
%"101" = load i64, ptr addrspace(3) @shared_ex, align 4
|
||||||
%"51" = load i64, ptr addrspace(5) %"48", align 4
|
%"52" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
%"72" = add i64 %"101", %"51"
|
%"73" = add i64 %"101", %"52"
|
||||||
store i64 %"72", ptr addrspace(5) %"47", align 4
|
store i64 %"73", ptr addrspace(5) %"48", align 4
|
||||||
%2 = load i64, ptr addrspace(5) %"47", align 4
|
%2 = load i64, ptr addrspace(5) %"48", align 4
|
||||||
ret i64 %2
|
ret i64 %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"15", i64 %"16") #0 {
|
define i64 @set_shared_temp1(i64 %"15", i64 %"16") #0 {
|
||||||
%"52" = alloca i64, align 8, addrspace(5)
|
%"53" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
|
br label %"82"
|
||||||
|
|
||||||
|
"82": ; preds = %1
|
||||||
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
||||||
%"53" = call i64 @__zluda_ptx_impl_add(i64 %"16")
|
%"54" = call i64 @add(i64 %"16")
|
||||||
store i64 %"53", ptr addrspace(5) %"52", align 4
|
store i64 %"54", ptr addrspace(5) %"53", align 4
|
||||||
%2 = load i64, ptr addrspace(5) %"52", align 4
|
%2 = load i64, ptr addrspace(5) %"53", align 4
|
||||||
ret i64 %2
|
ret i64 %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 {
|
define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #1 {
|
||||||
%"56" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"57" = alloca i64, align 8, addrspace(5)
|
%"57" = alloca i64, align 8, addrspace(5)
|
||||||
%"58" = alloca i64, align 8, addrspace(5)
|
%"58" = alloca i64, align 8, addrspace(5)
|
||||||
%"59" = alloca i64, align 8, addrspace(5)
|
%"59" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"60" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"60" = load i64, ptr addrspace(4) %"54", align 4
|
br label %"83"
|
||||||
store i64 %"60", ptr addrspace(5) %"56", align 4
|
|
||||||
|
"83": ; preds = %1
|
||||||
%"61" = load i64, ptr addrspace(4) %"55", align 4
|
%"61" = load i64, ptr addrspace(4) %"55", align 4
|
||||||
store i64 %"61", ptr addrspace(5) %"57", align 4
|
store i64 %"61", ptr addrspace(5) %"57", align 4
|
||||||
%"63" = load i64, ptr addrspace(5) %"56", align 4
|
%"62" = load i64, ptr addrspace(4) %"56", align 4
|
||||||
%"75" = inttoptr i64 %"63" to ptr addrspace(1)
|
|
||||||
%"62" = load i64, ptr addrspace(1) %"75", align 4
|
|
||||||
store i64 %"62", ptr addrspace(5) %"58", align 4
|
store i64 %"62", ptr addrspace(5) %"58", align 4
|
||||||
%"64" = load i64, ptr addrspace(5) %"56", align 4
|
%"64" = load i64, ptr addrspace(5) %"57", align 4
|
||||||
%"76" = inttoptr i64 %"64" to ptr addrspace(1)
|
%"76" = inttoptr i64 %"64" to ptr addrspace(1)
|
||||||
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"76", i64 8
|
%"63" = load i64, ptr addrspace(1) %"76", align 4
|
||||||
%"65" = load i64, ptr addrspace(1) %"40", align 4
|
store i64 %"63", ptr addrspace(5) %"59", align 4
|
||||||
store i64 %"65", ptr addrspace(5) %"59", align 4
|
%"65" = load i64, ptr addrspace(5) %"57", align 4
|
||||||
%"67" = load i64, ptr addrspace(5) %"58", align 4
|
%"77" = inttoptr i64 %"65" to ptr addrspace(1)
|
||||||
|
%"41" = getelementptr inbounds i8, ptr addrspace(1) %"77", i64 8
|
||||||
|
%"66" = load i64, ptr addrspace(1) %"41", align 4
|
||||||
|
store i64 %"66", ptr addrspace(5) %"60", align 4
|
||||||
%"68" = load i64, ptr addrspace(5) %"59", align 4
|
%"68" = load i64, ptr addrspace(5) %"59", align 4
|
||||||
%"77" = call i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"67", i64 %"68")
|
%"69" = load i64, ptr addrspace(5) %"60", align 4
|
||||||
store i64 %"77", ptr addrspace(5) %"59", align 4
|
%"78" = call i64 @set_shared_temp1(i64 %"68", i64 %"69")
|
||||||
%"69" = load i64, ptr addrspace(5) %"57", align 4
|
store i64 %"78", ptr addrspace(5) %"60", align 4
|
||||||
%"70" = load i64, ptr addrspace(5) %"59", align 4
|
%"70" = load i64, ptr addrspace(5) %"58", align 4
|
||||||
%"79" = inttoptr i64 %"69" to ptr
|
%"71" = load i64, ptr addrspace(5) %"60", align 4
|
||||||
store i64 %"70", ptr %"79", align 4
|
%"80" = inttoptr i64 %"70" to ptr
|
||||||
|
store i64 %"71", ptr %"80", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -12,31 +12,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"55"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"55": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
|
|
||||||
%"44" = load i64, ptr addrspace(1) %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i64 %"46", ptr addrspace(3) @shared_mem1, align 4
|
%"51" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||||
%"47" = load i64, ptr addrspace(3) @shared_mem1, align 4
|
%"45" = load i64, ptr addrspace(1) %"51", align 4
|
||||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"47", ptr addrspace(3) @shared_mem1, align 4
|
||||||
%"53" = inttoptr i64 %"48" to ptr addrspace(1)
|
%"48" = load i64, ptr addrspace(3) @shared_mem1, align 4
|
||||||
store i64 %"49", ptr addrspace(1) %"53", align 4
|
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"54" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||||
|
store i64 %"50", ptr addrspace(1) %"54", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,31 +10,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"55"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"55": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%2 = shl i64 %"47", 2
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
%"51" = select i1 false, i64 0, i64 %2
|
%"45" = load i64, ptr %"51", align 4
|
||||||
store i64 %"51", ptr addrspace(5) %"41", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%2 = shl i64 %"48", 2
|
||||||
%"53" = inttoptr i64 %"48" to ptr
|
%"52" = select i1 false, i64 0, i64 %2
|
||||||
store i64 %"49", ptr %"53", align 4
|
store i64 %"52", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"54" = inttoptr i64 %"49" to ptr
|
||||||
|
store i64 %"50", ptr %"54", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
%"39" = alloca i32, align 4, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"40" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
br label %"51"
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
|
||||||
|
"51": ; preds = %1
|
||||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
%"48" = inttoptr i64 %"43" to ptr
|
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||||
%"42" = load i32, ptr %"48", align 4
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store i32 %"42", ptr addrspace(5) %"39", align 4
|
%"49" = inttoptr i64 %"44" to ptr
|
||||||
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
%"43" = load i32, ptr %"49", align 4
|
||||||
%2 = ashr i32 %"45", 1
|
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||||
%"44" = select i1 false, i32 0, i32 %2
|
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"44", ptr addrspace(5) %"39", align 4
|
%2 = ashr i32 %"46", 1
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = select i1 false, i32 0, i32 %2
|
||||||
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||||
%"49" = inttoptr i64 %"46" to ptr
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store i32 %"47", ptr %"49", align 4
|
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = inttoptr i64 %"47" to ptr
|
||||||
|
store i32 %"48", ptr %"50", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca i32, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"49"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"49": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"46" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"45" = load i16, ptr %"46", align 2
|
|
||||||
%"41" = sext i16 %"45" to i32
|
|
||||||
store i32 %"41", ptr addrspace(5) %"38", align 4
|
|
||||||
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
%"44" = load i32, ptr addrspace(5) %"38", align 4
|
|
||||||
%"47" = inttoptr i64 %"43" to ptr
|
%"47" = inttoptr i64 %"43" to ptr
|
||||||
store i32 %"44", ptr %"47", align 4
|
%"46" = load i16, ptr %"47", align 2
|
||||||
|
%"42" = sext i16 %"46" to i32
|
||||||
|
store i32 %"42", ptr addrspace(5) %"39", align 4
|
||||||
|
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
|
%"45" = load i32, ptr addrspace(5) %"39", align 4
|
||||||
|
%"48" = inttoptr i64 %"44" to ptr
|
||||||
|
store i32 %"45", ptr %"48", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call afn float @llvm.sin.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call afn float @llvm.sin.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.sin.f32(float) #1
|
declare float @llvm.sin.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||||
%"36" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"37" = alloca i64, align 8, addrspace(5)
|
%"37" = alloca i64, align 8, addrspace(5)
|
||||||
%"38" = alloca float, align 4, addrspace(5)
|
%"38" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"39" = alloca float, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"39" = load i64, ptr addrspace(4) %"34", align 4
|
br label %"50"
|
||||||
store i64 %"39", ptr addrspace(5) %"36", align 4
|
|
||||||
|
"50": ; preds = %1
|
||||||
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
%"40" = load i64, ptr addrspace(4) %"35", align 4
|
||||||
store i64 %"40", ptr addrspace(5) %"37", align 4
|
store i64 %"40", ptr addrspace(5) %"37", align 4
|
||||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||||
%"47" = inttoptr i64 %"42" to ptr
|
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||||
%"41" = load float, ptr %"47", align 4
|
%"43" = load i64, ptr addrspace(5) %"37", align 4
|
||||||
store float %"41", ptr addrspace(5) %"38", align 4
|
%"48" = inttoptr i64 %"43" to ptr
|
||||||
%"44" = load float, ptr addrspace(5) %"38", align 4
|
%"42" = load float, ptr %"48", align 4
|
||||||
%"43" = call float @llvm.amdgcn.sqrt.f32(float %"44")
|
store float %"42", ptr addrspace(5) %"39", align 4
|
||||||
store float %"43", ptr addrspace(5) %"38", align 4
|
%"45" = load float, ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
%"44" = call float @llvm.amdgcn.sqrt.f32(float %"45")
|
||||||
%"46" = load float, ptr addrspace(5) %"38", align 4
|
store float %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"48" = inttoptr i64 %"45" to ptr
|
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||||
store float %"46", ptr %"48", align 4
|
%"47" = load float, ptr addrspace(5) %"39", align 4
|
||||||
|
%"49" = inttoptr i64 %"46" to ptr
|
||||||
|
store float %"47", ptr %"49", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||||
declare float @llvm.amdgcn.sqrt.f32(float) #1
|
declare float @llvm.amdgcn.sqrt.f32(float) #2
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"64" = load i64, ptr addrspace(4) %"38", align 4
|
br label %"73"
|
||||||
store i64 %"64", ptr addrspace(5) %"40", align 4
|
|
||||||
|
"73": ; preds = %1
|
||||||
%"65" = load i64, ptr addrspace(4) %"39", align 4
|
%"65" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
store i64 %"65", ptr addrspace(5) %"41", align 4
|
store i64 %"65", ptr addrspace(5) %"41", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
%"66" = load i64, ptr addrspace(4) %"40", align 4
|
||||||
%2 = inttoptr i64 %"48" to ptr
|
store i64 %"66", ptr addrspace(5) %"42", align 4
|
||||||
%"47" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store ptr addrspace(1) %"47", ptr addrspace(5) %"40", align 8
|
%2 = inttoptr i64 %"49" to ptr
|
||||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"50" to ptr
|
store ptr addrspace(1) %"48", ptr addrspace(5) %"41", align 8
|
||||||
%"49" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store ptr addrspace(1) %"49", ptr addrspace(5) %"41", align 8
|
%3 = inttoptr i64 %"51" to ptr
|
||||||
%"31" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
%"50" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
store i32 %"31", ptr addrspace(5) %"42", align 4
|
store ptr addrspace(1) %"50", ptr addrspace(5) %"42", align 8
|
||||||
%"53" = load i32, ptr addrspace(5) %"42", align 4
|
%"32" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||||
%"52" = zext i32 %"53" to i64
|
store i32 %"32", ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"52", ptr addrspace(5) %"43", align 4
|
%"54" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"40", align 4
|
%"53" = zext i32 %"54" to i64
|
||||||
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
store i64 %"53", ptr addrspace(5) %"44", align 4
|
||||||
%"66" = add i64 %"55", %"56"
|
%"56" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store i64 %"66", ptr addrspace(5) %"40", align 4
|
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"58" = load i64, ptr addrspace(5) %"41", align 4
|
%"67" = add i64 %"56", %"57"
|
||||||
%"59" = load i64, ptr addrspace(5) %"43", align 4
|
store i64 %"67", ptr addrspace(5) %"41", align 4
|
||||||
%"68" = add i64 %"58", %"59"
|
%"59" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
store i64 %"68", ptr addrspace(5) %"41", align 4
|
%"60" = load i64, ptr addrspace(5) %"44", align 4
|
||||||
%"61" = load i64, ptr addrspace(5) %"40", align 4
|
%"69" = add i64 %"59", %"60"
|
||||||
%"70" = inttoptr i64 %"61" to ptr addrspace(1)
|
store i64 %"69", ptr addrspace(5) %"42", align 4
|
||||||
%"60" = load i64, ptr addrspace(1) %"70", align 4
|
|
||||||
store i64 %"60", ptr addrspace(5) %"44", align 4
|
|
||||||
%"62" = load i64, ptr addrspace(5) %"41", align 4
|
%"62" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"63" = load i64, ptr addrspace(5) %"44", align 4
|
|
||||||
%"71" = inttoptr i64 %"62" to ptr addrspace(1)
|
%"71" = inttoptr i64 %"62" to ptr addrspace(1)
|
||||||
store i64 %"63", ptr addrspace(1) %"71", align 4
|
%"61" = load i64, ptr addrspace(1) %"71", align 4
|
||||||
|
store i64 %"61", ptr addrspace(5) %"45", align 4
|
||||||
|
%"63" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"64" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
|
%"72" = inttoptr i64 %"63" to ptr addrspace(1)
|
||||||
|
store i64 %"64", ptr addrspace(1) %"72", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,53 +10,57 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 {
|
define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"45" = alloca i64, align 8, addrspace(5)
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
%"46" = alloca i64, align 8, addrspace(5)
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
%"49" = alloca i64, align 8, addrspace(5)
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i32, align 4, addrspace(5)
|
%"50" = alloca i64, align 8, addrspace(5)
|
||||||
%"51" = alloca i64, align 8, addrspace(5)
|
%"51" = alloca i32, align 4, addrspace(5)
|
||||||
%"52" = alloca i64, align 8, addrspace(5)
|
%"52" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"53" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"72" = load i64, ptr addrspace(4) %"42", align 4
|
br label %"81"
|
||||||
store i64 %"72", ptr addrspace(5) %"44", align 4
|
|
||||||
|
"81": ; preds = %1
|
||||||
%"73" = load i64, ptr addrspace(4) %"43", align 4
|
%"73" = load i64, ptr addrspace(4) %"43", align 4
|
||||||
store i64 %"73", ptr addrspace(5) %"47", align 4
|
store i64 %"73", ptr addrspace(5) %"45", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"44", align 4
|
%"74" = load i64, ptr addrspace(4) %"44", align 4
|
||||||
%2 = inttoptr i64 %"56" to ptr
|
store i64 %"74", ptr addrspace(5) %"48", align 4
|
||||||
%"55" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"57" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
store ptr addrspace(1) %"55", ptr addrspace(5) %"45", align 8
|
%2 = inttoptr i64 %"57" to ptr
|
||||||
%"58" = load i64, ptr addrspace(5) %"47", align 4
|
%"56" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"58" to ptr
|
store ptr addrspace(1) %"56", ptr addrspace(5) %"46", align 8
|
||||||
%"57" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"59" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
store ptr addrspace(1) %"57", ptr addrspace(5) %"48", align 8
|
%3 = inttoptr i64 %"59" to ptr
|
||||||
%"35" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
%"58" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
store i32 %"35", ptr addrspace(5) %"50", align 4
|
store ptr addrspace(1) %"58", ptr addrspace(5) %"49", align 8
|
||||||
%"61" = load i32, ptr addrspace(5) %"50", align 4
|
%"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||||
%"60" = zext i32 %"61" to i64
|
store i32 %"36", ptr addrspace(5) %"51", align 4
|
||||||
store i64 %"60", ptr addrspace(5) %"51", align 4
|
%"62" = load i32, ptr addrspace(5) %"51", align 4
|
||||||
%"63" = load i64, ptr addrspace(5) %"45", align 4
|
%"61" = zext i32 %"62" to i64
|
||||||
%"64" = load i64, ptr addrspace(5) %"51", align 4
|
store i64 %"61", ptr addrspace(5) %"52", align 4
|
||||||
%"74" = add i64 %"63", %"64"
|
%"64" = load i64, ptr addrspace(5) %"46", align 4
|
||||||
store i64 %"74", ptr addrspace(5) %"46", align 4
|
%"65" = load i64, ptr addrspace(5) %"52", align 4
|
||||||
%"66" = load i64, ptr addrspace(5) %"48", align 4
|
%"75" = add i64 %"64", %"65"
|
||||||
%"67" = load i64, ptr addrspace(5) %"51", align 4
|
store i64 %"75", ptr addrspace(5) %"47", align 4
|
||||||
%"76" = add i64 %"66", %"67"
|
%"67" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
store i64 %"76", ptr addrspace(5) %"49", align 4
|
%"68" = load i64, ptr addrspace(5) %"52", align 4
|
||||||
%"69" = load i64, ptr addrspace(5) %"46", align 4
|
%"77" = add i64 %"67", %"68"
|
||||||
%"78" = inttoptr i64 %"69" to ptr addrspace(1)
|
store i64 %"77", ptr addrspace(5) %"50", align 4
|
||||||
%"68" = load i64, ptr addrspace(1) %"78", align 4
|
%"70" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
store i64 %"68", ptr addrspace(5) %"52", align 4
|
|
||||||
%"70" = load i64, ptr addrspace(5) %"49", align 4
|
|
||||||
%"71" = load i64, ptr addrspace(5) %"52", align 4
|
|
||||||
%"79" = inttoptr i64 %"70" to ptr addrspace(1)
|
%"79" = inttoptr i64 %"70" to ptr addrspace(1)
|
||||||
store i64 %"71", ptr addrspace(1) %"79", align 4
|
%"69" = load i64, ptr addrspace(1) %"79", align 4
|
||||||
|
store i64 %"69", ptr addrspace(5) %"53", align 4
|
||||||
|
%"71" = load i64, ptr addrspace(5) %"50", align 4
|
||||||
|
%"72" = load i64, ptr addrspace(5) %"53", align 4
|
||||||
|
%"80" = inttoptr i64 %"71" to ptr addrspace(1)
|
||||||
|
store i64 %"72", ptr addrspace(1) %"80", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
|
define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
|
||||||
%"48" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"49" = alloca i64, align 8, addrspace(5)
|
%"49" = alloca i64, align 8, addrspace(5)
|
||||||
%"50" = alloca i64, align 8, addrspace(5)
|
%"50" = alloca i64, align 8, addrspace(5)
|
||||||
%"51" = alloca i64, align 8, addrspace(5)
|
%"51" = alloca i64, align 8, addrspace(5)
|
||||||
%"52" = alloca i64, align 8, addrspace(5)
|
%"52" = alloca i64, align 8, addrspace(5)
|
||||||
%"53" = alloca i64, align 8, addrspace(5)
|
%"53" = alloca i64, align 8, addrspace(5)
|
||||||
%"54" = alloca i32, align 4, addrspace(5)
|
%"54" = alloca i64, align 8, addrspace(5)
|
||||||
%"55" = alloca i64, align 8, addrspace(5)
|
%"55" = alloca i32, align 4, addrspace(5)
|
||||||
%"56" = alloca i64, align 8, addrspace(5)
|
%"56" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"57" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"76" = load i64, ptr addrspace(4) %"46", align 4
|
br label %"87"
|
||||||
store i64 %"76", ptr addrspace(5) %"48", align 4
|
|
||||||
|
"87": ; preds = %1
|
||||||
%"77" = load i64, ptr addrspace(4) %"47", align 4
|
%"77" = load i64, ptr addrspace(4) %"47", align 4
|
||||||
store i64 %"77", ptr addrspace(5) %"51", align 4
|
store i64 %"77", ptr addrspace(5) %"49", align 4
|
||||||
%"60" = load i64, ptr addrspace(5) %"48", align 4
|
%"78" = load i64, ptr addrspace(4) %"48", align 4
|
||||||
%2 = inttoptr i64 %"60" to ptr
|
store i64 %"78", ptr addrspace(5) %"52", align 4
|
||||||
%"59" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"61" = load i64, ptr addrspace(5) %"49", align 4
|
||||||
store ptr addrspace(1) %"59", ptr addrspace(5) %"49", align 8
|
%2 = inttoptr i64 %"61" to ptr
|
||||||
%"62" = load i64, ptr addrspace(5) %"51", align 4
|
%"60" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"62" to ptr
|
store ptr addrspace(1) %"60", ptr addrspace(5) %"50", align 8
|
||||||
%"61" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"63" = load i64, ptr addrspace(5) %"52", align 4
|
||||||
store ptr addrspace(1) %"61", ptr addrspace(5) %"52", align 8
|
%3 = inttoptr i64 %"63" to ptr
|
||||||
%"35" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
%"62" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
store i32 %"35", ptr addrspace(5) %"54", align 4
|
store ptr addrspace(1) %"62", ptr addrspace(5) %"53", align 8
|
||||||
%"65" = load i32, ptr addrspace(5) %"54", align 4
|
%"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||||
%"64" = zext i32 %"65" to i64
|
store i32 %"36", ptr addrspace(5) %"55", align 4
|
||||||
store i64 %"64", ptr addrspace(5) %"55", align 4
|
%"66" = load i32, ptr addrspace(5) %"55", align 4
|
||||||
%"67" = load i64, ptr addrspace(5) %"49", align 4
|
%"65" = zext i32 %"66" to i64
|
||||||
%"68" = load i64, ptr addrspace(5) %"55", align 4
|
store i64 %"65", ptr addrspace(5) %"56", align 4
|
||||||
%"78" = sub i64 %"67", %"68"
|
%"68" = load i64, ptr addrspace(5) %"50", align 4
|
||||||
store i64 %"78", ptr addrspace(5) %"50", align 4
|
%"69" = load i64, ptr addrspace(5) %"56", align 4
|
||||||
%"70" = load i64, ptr addrspace(5) %"52", align 4
|
%"79" = sub i64 %"68", %"69"
|
||||||
%"71" = load i64, ptr addrspace(5) %"55", align 4
|
store i64 %"79", ptr addrspace(5) %"51", align 4
|
||||||
%"81" = sub i64 %"70", %"71"
|
%"71" = load i64, ptr addrspace(5) %"53", align 4
|
||||||
store i64 %"81", ptr addrspace(5) %"53", align 4
|
%"72" = load i64, ptr addrspace(5) %"56", align 4
|
||||||
%"72" = load i64, ptr addrspace(5) %"50", align 4
|
%"82" = sub i64 %"71", %"72"
|
||||||
%"84" = inttoptr i64 %"72" to ptr addrspace(1)
|
store i64 %"82", ptr addrspace(5) %"54", align 4
|
||||||
%"37" = getelementptr inbounds i8, ptr addrspace(1) %"84", i64 0
|
%"73" = load i64, ptr addrspace(5) %"51", align 4
|
||||||
%"73" = load i64, ptr addrspace(1) %"37", align 4
|
%"85" = inttoptr i64 %"73" to ptr addrspace(1)
|
||||||
store i64 %"73", ptr addrspace(5) %"56", align 4
|
%"38" = getelementptr inbounds i8, ptr addrspace(1) %"85", i64 0
|
||||||
%"74" = load i64, ptr addrspace(5) %"53", align 4
|
%"74" = load i64, ptr addrspace(1) %"38", align 4
|
||||||
%"85" = inttoptr i64 %"74" to ptr addrspace(1)
|
store i64 %"74", ptr addrspace(5) %"57", align 4
|
||||||
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"85", i64 0
|
%"75" = load i64, ptr addrspace(5) %"54", align 4
|
||||||
%"75" = load i64, ptr addrspace(5) %"56", align 4
|
%"86" = inttoptr i64 %"75" to ptr addrspace(1)
|
||||||
store i64 %"75", ptr addrspace(1) %"39", align 4
|
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"86", i64 0
|
||||||
|
%"76" = load i64, ptr addrspace(5) %"57", align 4
|
||||||
|
store i64 %"76", ptr addrspace(1) %"40", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"60"
|
||||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"60": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%2 = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||||
%"53" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store ptr addrspace(1) %"53", ptr addrspace(5) %"40", align 8
|
%2 = inttoptr i64 %"47" to ptr
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"54" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"48" to ptr
|
store ptr addrspace(1) %"54", ptr addrspace(5) %"41", align 8
|
||||||
%"55" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store ptr addrspace(1) %"55", ptr addrspace(5) %"41", align 8
|
%3 = inttoptr i64 %"49" to ptr
|
||||||
%"50" = load i64, ptr addrspace(5) %"40", align 4
|
%"56" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
%"57" = inttoptr i64 %"50" to ptr addrspace(1)
|
store ptr addrspace(1) %"56", ptr addrspace(5) %"42", align 8
|
||||||
%"49" = load i64, ptr addrspace(1) %"57", align 4
|
|
||||||
store i64 %"49", ptr addrspace(5) %"42", align 4
|
|
||||||
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"52" = load i64, ptr addrspace(5) %"42", align 4
|
|
||||||
%"58" = inttoptr i64 %"51" to ptr addrspace(1)
|
%"58" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||||
store i64 %"52", ptr addrspace(1) %"58", align 4
|
%"50" = load i64, ptr addrspace(1) %"58", align 4
|
||||||
|
store i64 %"50", ptr addrspace(5) %"43", align 4
|
||||||
|
%"52" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
|
||||||
|
store i64 %"53", ptr addrspace(1) %"59", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,45 +10,49 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i64, align 8, addrspace(5)
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
%"43" = alloca i64, align 8, addrspace(5)
|
%"43" = alloca i64, align 8, addrspace(5)
|
||||||
%"44" = alloca i64, align 8, addrspace(5)
|
%"44" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"45" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"45" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"68"
|
||||||
store i64 %"45", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"68": ; preds = %1
|
||||||
%"46" = load i64, ptr addrspace(4) %"38", align 4
|
%"46" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"46", ptr addrspace(5) %"40", align 4
|
store i64 %"46", ptr addrspace(5) %"40", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%2 = inttoptr i64 %"48" to ptr
|
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||||
%"61" = addrspacecast ptr %2 to ptr addrspace(1)
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store ptr addrspace(1) %"61", ptr addrspace(5) %"41", align 8
|
%2 = inttoptr i64 %"49" to ptr
|
||||||
%"50" = load i64, ptr addrspace(5) %"40", align 4
|
%"62" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||||
%3 = inttoptr i64 %"50" to ptr
|
store ptr addrspace(1) %"62", ptr addrspace(5) %"42", align 8
|
||||||
%"63" = addrspacecast ptr %3 to ptr addrspace(1)
|
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
store ptr addrspace(1) %"63", ptr addrspace(5) %"42", align 8
|
%3 = inttoptr i64 %"51" to ptr
|
||||||
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
%"64" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||||
|
store ptr addrspace(1) %"64", ptr addrspace(5) %"43", align 8
|
||||||
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
%"53" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"51" = add i64 %"52", %"53"
|
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"51", ptr addrspace(5) %"43", align 4
|
%"52" = add i64 %"53", %"54"
|
||||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"52", ptr addrspace(5) %"44", align 4
|
||||||
%"56" = load i64, ptr addrspace(5) %"42", align 4
|
%"56" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"54" = sub i64 %"55", %"56"
|
%"57" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"43", align 4
|
%"55" = sub i64 %"56", %"57"
|
||||||
%"58" = load i64, ptr addrspace(5) %"41", align 4
|
store i64 %"55", ptr addrspace(5) %"44", align 4
|
||||||
%"65" = inttoptr i64 %"58" to ptr addrspace(1)
|
|
||||||
%"57" = load i64, ptr addrspace(1) %"65", align 4
|
|
||||||
store i64 %"57", ptr addrspace(5) %"44", align 4
|
|
||||||
%"59" = load i64, ptr addrspace(5) %"42", align 4
|
%"59" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
%"60" = load i64, ptr addrspace(5) %"44", align 4
|
|
||||||
%"66" = inttoptr i64 %"59" to ptr addrspace(1)
|
%"66" = inttoptr i64 %"59" to ptr addrspace(1)
|
||||||
store i64 %"60", ptr addrspace(1) %"66", align 4
|
%"58" = load i64, ptr addrspace(1) %"66", align 4
|
||||||
|
store i64 %"58", ptr addrspace(5) %"45", align 4
|
||||||
|
%"60" = load i64, ptr addrspace(5) %"43", align 4
|
||||||
|
%"61" = load i64, ptr addrspace(5) %"45", align 4
|
||||||
|
%"67" = inttoptr i64 %"60" to ptr addrspace(1)
|
||||||
|
store i64 %"61", ptr addrspace(1) %"67", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i64, align 8, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
|
%"42" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"53"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"53": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
|
||||||
%"44" = load i64, ptr %"50", align 4
|
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
%"46" = sub i64 %"47", 1
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
%"45" = load i64, ptr %"51", align 4
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"51" = inttoptr i64 %"48" to ptr
|
%"47" = sub i64 %"48", 1
|
||||||
store i64 %"49", ptr %"51", align 4
|
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||||
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
|
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||||
|
%"52" = inttoptr i64 %"49" to ptr
|
||||||
|
store i64 %"50", ptr %"52", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,70 +10,77 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define <2 x i32> @__zluda_ptx_impl_impl(<2 x i32> %"9") #0 {
|
define <2 x i32> @impl(<2 x i32> %"9") #0 {
|
||||||
%"49" = alloca <2 x i32>, align 8, addrspace(5)
|
|
||||||
%"50" = alloca <2 x i32>, align 8, addrspace(5)
|
%"50" = alloca <2 x i32>, align 8, addrspace(5)
|
||||||
%"51" = alloca i32, align 4, addrspace(5)
|
%"51" = alloca <2 x i32>, align 8, addrspace(5)
|
||||||
%"52" = alloca i32, align 4, addrspace(5)
|
%"52" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"53" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"37" = extractelement <2 x i32> %"9", i8 0
|
br label %"91"
|
||||||
store i32 %"37", ptr addrspace(5) %"51", align 4
|
|
||||||
%"38" = extractelement <2 x i32> %"9", i8 1
|
"91": ; preds = %1
|
||||||
|
%"38" = extractelement <2 x i32> %"9", i8 0
|
||||||
store i32 %"38", ptr addrspace(5) %"52", align 4
|
store i32 %"38", ptr addrspace(5) %"52", align 4
|
||||||
%"56" = load i32, ptr addrspace(5) %"51", align 4
|
%"39" = extractelement <2 x i32> %"9", i8 1
|
||||||
|
store i32 %"39", ptr addrspace(5) %"53", align 4
|
||||||
%"57" = load i32, ptr addrspace(5) %"52", align 4
|
%"57" = load i32, ptr addrspace(5) %"52", align 4
|
||||||
%"55" = add i32 %"56", %"57"
|
%"58" = load i32, ptr addrspace(5) %"53", align 4
|
||||||
store i32 %"55", ptr addrspace(5) %"52", align 4
|
%"56" = add i32 %"57", %"58"
|
||||||
%"58" = load i32, ptr addrspace(5) %"52", align 4
|
store i32 %"56", ptr addrspace(5) %"53", align 4
|
||||||
%"60" = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
%"59" = load i32, ptr addrspace(5) %"53", align 4
|
||||||
%"59" = insertelement <2 x i32> %"60", i32 %"58", i8 0
|
%"61" = load <2 x i32>, ptr addrspace(5) %"51", align 8
|
||||||
store <2 x i32> %"59", ptr addrspace(5) %"50", align 8
|
%"60" = insertelement <2 x i32> %"61", i32 %"59", i8 0
|
||||||
%"61" = load i32, ptr addrspace(5) %"52", align 4
|
store <2 x i32> %"60", ptr addrspace(5) %"51", align 8
|
||||||
%"63" = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
%"62" = load i32, ptr addrspace(5) %"53", align 4
|
||||||
%"62" = insertelement <2 x i32> %"63", i32 %"61", i8 1
|
%"64" = load <2 x i32>, ptr addrspace(5) %"51", align 8
|
||||||
store <2 x i32> %"62", ptr addrspace(5) %"50", align 8
|
%"63" = insertelement <2 x i32> %"64", i32 %"62", i8 1
|
||||||
%"64" = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
store <2 x i32> %"63", ptr addrspace(5) %"51", align 8
|
||||||
%"42" = extractelement <2 x i32> %"64", i8 1
|
%"65" = load <2 x i32>, ptr addrspace(5) %"51", align 8
|
||||||
%"66" = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
%"43" = extractelement <2 x i32> %"65", i8 1
|
||||||
%"65" = insertelement <2 x i32> %"66", i32 %"42", i8 0
|
%"67" = load <2 x i32>, ptr addrspace(5) %"51", align 8
|
||||||
store <2 x i32> %"65", ptr addrspace(5) %"50", align 8
|
%"66" = insertelement <2 x i32> %"67", i32 %"43", i8 0
|
||||||
%"68" = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
store <2 x i32> %"66", ptr addrspace(5) %"51", align 8
|
||||||
store <2 x i32> %"68", ptr addrspace(5) %"49", align 8
|
%"69" = load <2 x i32>, ptr addrspace(5) %"51", align 8
|
||||||
%2 = load <2 x i32>, ptr addrspace(5) %"49", align 8
|
store <2 x i32> %"69", ptr addrspace(5) %"50", align 8
|
||||||
|
%2 = load <2 x i32>, ptr addrspace(5) %"50", align 8
|
||||||
ret <2 x i32> %2
|
ret <2 x i32> %2
|
||||||
}
|
}
|
||||||
|
|
||||||
define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"69", ptr addrspace(4) byref(i64) %"70") #0 {
|
define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"70", ptr addrspace(4) byref(i64) %"71") #1 {
|
||||||
%"71" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"72" = alloca i64, align 8, addrspace(5)
|
%"72" = alloca i64, align 8, addrspace(5)
|
||||||
%"73" = alloca <2 x i32>, align 8, addrspace(5)
|
%"73" = alloca i64, align 8, addrspace(5)
|
||||||
%"74" = alloca i32, align 4, addrspace(5)
|
%"74" = alloca <2 x i32>, align 8, addrspace(5)
|
||||||
%"75" = alloca i32, align 4, addrspace(5)
|
%"75" = alloca i32, align 4, addrspace(5)
|
||||||
%"76" = alloca i64, align 8, addrspace(5)
|
%"76" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"77" = alloca i64, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"77" = load i64, ptr addrspace(4) %"69", align 4
|
br label %"92"
|
||||||
store i64 %"77", ptr addrspace(5) %"71", align 4
|
|
||||||
|
"92": ; preds = %1
|
||||||
%"78" = load i64, ptr addrspace(4) %"70", align 4
|
%"78" = load i64, ptr addrspace(4) %"70", align 4
|
||||||
store i64 %"78", ptr addrspace(5) %"72", align 4
|
store i64 %"78", ptr addrspace(5) %"72", align 4
|
||||||
%"80" = load i64, ptr addrspace(5) %"71", align 4
|
%"79" = load i64, ptr addrspace(4) %"71", align 4
|
||||||
%"87" = inttoptr i64 %"80" to ptr
|
store i64 %"79", ptr addrspace(5) %"73", align 4
|
||||||
%"79" = load <2 x i32>, ptr %"87", align 8
|
%"81" = load i64, ptr addrspace(5) %"72", align 4
|
||||||
store <2 x i32> %"79", ptr addrspace(5) %"73", align 8
|
%"88" = inttoptr i64 %"81" to ptr
|
||||||
%"82" = load <2 x i32>, ptr addrspace(5) %"73", align 8
|
%"80" = load <2 x i32>, ptr %"88", align 8
|
||||||
%"81" = call <2 x i32> @__zluda_ptx_impl_impl(<2 x i32> %"82")
|
store <2 x i32> %"80", ptr addrspace(5) %"74", align 8
|
||||||
store <2 x i32> %"81", ptr addrspace(5) %"73", align 8
|
%"83" = load <2 x i32>, ptr addrspace(5) %"74", align 8
|
||||||
%"84" = load <2 x i32>, ptr addrspace(5) %"73", align 8
|
%"82" = call <2 x i32> @impl(<2 x i32> %"83")
|
||||||
%"88" = bitcast <2 x i32> %"84" to i64
|
store <2 x i32> %"82", ptr addrspace(5) %"74", align 8
|
||||||
store i64 %"88", ptr addrspace(5) %"76", align 4
|
%"85" = load <2 x i32>, ptr addrspace(5) %"74", align 8
|
||||||
%"85" = load i64, ptr addrspace(5) %"72", align 4
|
%"89" = bitcast <2 x i32> %"85" to i64
|
||||||
%"86" = load <2 x i32>, ptr addrspace(5) %"73", align 8
|
store i64 %"89", ptr addrspace(5) %"77", align 4
|
||||||
%"89" = inttoptr i64 %"85" to ptr
|
%"86" = load i64, ptr addrspace(5) %"73", align 4
|
||||||
store <2 x i32> %"86", ptr %"89", align 8
|
%"87" = load <2 x i32>, ptr addrspace(5) %"74", align 8
|
||||||
|
%"90" = inttoptr i64 %"86" to ptr
|
||||||
|
store <2 x i32> %"87", ptr %"90", align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||||
%"38" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
%"39" = alloca i64, align 8, addrspace(5)
|
||||||
%"40" = alloca <4 x i32>, align 16, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca <4 x i32>, align 16, addrspace(5)
|
||||||
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
br label %"55"
|
||||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
|
||||||
|
"55": ; preds = %1
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
%"50" = inttoptr i64 %"45" to ptr
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"44" = load <4 x i32>, ptr %"50", align 16
|
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||||
store <4 x i32> %"44", ptr addrspace(5) %"40", align 16
|
%"51" = inttoptr i64 %"46" to ptr
|
||||||
%"46" = load <4 x i32>, ptr addrspace(5) %"40", align 16
|
%"45" = load <4 x i32>, ptr %"51", align 16
|
||||||
%"29" = extractelement <4 x i32> %"46", i8 3
|
store <4 x i32> %"45", ptr addrspace(5) %"41", align 16
|
||||||
store i32 %"29", ptr addrspace(5) %"41", align 4
|
%"47" = load <4 x i32>, ptr addrspace(5) %"41", align 16
|
||||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
%"30" = extractelement <4 x i32> %"47", i8 3
|
||||||
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
store i32 %"30", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = inttoptr i64 %"48" to ptr
|
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"49", ptr %"53", align 4
|
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
|
%"54" = inttoptr i64 %"49" to ptr
|
||||||
|
store i32 %"50", ptr %"54", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,86 +10,90 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 {
|
define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
|
||||||
%"46" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"47" = alloca i64, align 8, addrspace(5)
|
%"47" = alloca i64, align 8, addrspace(5)
|
||||||
%"48" = alloca i16, align 2, addrspace(5)
|
%"48" = alloca i64, align 8, addrspace(5)
|
||||||
%"49" = alloca i16, align 2, addrspace(5)
|
%"49" = alloca i16, align 2, addrspace(5)
|
||||||
%"50" = alloca i16, align 2, addrspace(5)
|
%"50" = alloca i16, align 2, addrspace(5)
|
||||||
%"51" = alloca i16, align 2, addrspace(5)
|
%"51" = alloca i16, align 2, addrspace(5)
|
||||||
%"52" = alloca <4 x i16>, align 8, addrspace(5)
|
%"52" = alloca i16, align 2, addrspace(5)
|
||||||
|
%"53" = alloca <4 x i16>, align 8, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"53" = load i64, ptr addrspace(4) %"44", align 4
|
br label %"94"
|
||||||
store i64 %"53", ptr addrspace(5) %"46", align 4
|
|
||||||
|
"94": ; preds = %1
|
||||||
%"54" = load i64, ptr addrspace(4) %"45", align 4
|
%"54" = load i64, ptr addrspace(4) %"45", align 4
|
||||||
store i64 %"54", ptr addrspace(5) %"47", align 4
|
store i64 %"54", ptr addrspace(5) %"47", align 4
|
||||||
%"55" = load i64, ptr addrspace(5) %"46", align 4
|
%"55" = load i64, ptr addrspace(4) %"46", align 4
|
||||||
%"83" = inttoptr i64 %"55" to ptr addrspace(1)
|
store i64 %"55", ptr addrspace(5) %"48", align 4
|
||||||
%"32" = load <4 x i8>, ptr addrspace(1) %"83", align 4
|
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
||||||
%"84" = extractelement <4 x i8> %"32", i8 0
|
%"84" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||||
%"85" = extractelement <4 x i8> %"32", i8 1
|
%"33" = load <4 x i8>, ptr addrspace(1) %"84", align 4
|
||||||
%"86" = extractelement <4 x i8> %"32", i8 2
|
%"85" = extractelement <4 x i8> %"33", i8 0
|
||||||
%"87" = extractelement <4 x i8> %"32", i8 3
|
%"86" = extractelement <4 x i8> %"33", i8 1
|
||||||
%"56" = zext i8 %"84" to i16
|
%"87" = extractelement <4 x i8> %"33", i8 2
|
||||||
|
%"88" = extractelement <4 x i8> %"33", i8 3
|
||||||
%"57" = zext i8 %"85" to i16
|
%"57" = zext i8 %"85" to i16
|
||||||
%"58" = zext i8 %"86" to i16
|
%"58" = zext i8 %"86" to i16
|
||||||
%"59" = zext i8 %"87" to i16
|
%"59" = zext i8 %"87" to i16
|
||||||
store i16 %"56", ptr addrspace(5) %"48", align 2
|
%"60" = zext i8 %"88" to i16
|
||||||
store i16 %"57", ptr addrspace(5) %"49", align 2
|
store i16 %"57", ptr addrspace(5) %"49", align 2
|
||||||
store i16 %"58", ptr addrspace(5) %"50", align 2
|
store i16 %"58", ptr addrspace(5) %"50", align 2
|
||||||
store i16 %"59", ptr addrspace(5) %"51", align 2
|
store i16 %"59", ptr addrspace(5) %"51", align 2
|
||||||
%"60" = load i16, ptr addrspace(5) %"49", align 2
|
store i16 %"60", ptr addrspace(5) %"52", align 2
|
||||||
%"61" = load i16, ptr addrspace(5) %"50", align 2
|
%"61" = load i16, ptr addrspace(5) %"50", align 2
|
||||||
%"62" = load i16, ptr addrspace(5) %"51", align 2
|
%"62" = load i16, ptr addrspace(5) %"51", align 2
|
||||||
%"63" = load i16, ptr addrspace(5) %"48", align 2
|
%"63" = load i16, ptr addrspace(5) %"52", align 2
|
||||||
%2 = insertelement <4 x i16> undef, i16 %"60", i8 0
|
%"64" = load i16, ptr addrspace(5) %"49", align 2
|
||||||
%3 = insertelement <4 x i16> %2, i16 %"61", i8 1
|
%2 = insertelement <4 x i16> undef, i16 %"61", i8 0
|
||||||
%4 = insertelement <4 x i16> %3, i16 %"62", i8 2
|
%3 = insertelement <4 x i16> %2, i16 %"62", i8 1
|
||||||
%"33" = insertelement <4 x i16> %4, i16 %"63", i8 3
|
%4 = insertelement <4 x i16> %3, i16 %"63", i8 2
|
||||||
store <4 x i16> %"33", ptr addrspace(5) %"52", align 8
|
%"34" = insertelement <4 x i16> %4, i16 %"64", i8 3
|
||||||
%"65" = load <4 x i16>, ptr addrspace(5) %"52", align 8
|
store <4 x i16> %"34", ptr addrspace(5) %"53", align 8
|
||||||
%"66" = extractelement <4 x i16> %"65", i8 0
|
%"66" = load <4 x i16>, ptr addrspace(5) %"53", align 8
|
||||||
%"67" = extractelement <4 x i16> %"65", i8 1
|
%"67" = extractelement <4 x i16> %"66", i8 0
|
||||||
%"68" = extractelement <4 x i16> %"65", i8 2
|
%"68" = extractelement <4 x i16> %"66", i8 1
|
||||||
%"69" = extractelement <4 x i16> %"65", i8 3
|
%"69" = extractelement <4 x i16> %"66", i8 2
|
||||||
store i16 %"66", ptr addrspace(5) %"50", align 2
|
%"70" = extractelement <4 x i16> %"66", i8 3
|
||||||
store i16 %"67", ptr addrspace(5) %"51", align 2
|
store i16 %"67", ptr addrspace(5) %"51", align 2
|
||||||
store i16 %"68", ptr addrspace(5) %"48", align 2
|
store i16 %"68", ptr addrspace(5) %"52", align 2
|
||||||
store i16 %"69", ptr addrspace(5) %"49", align 2
|
store i16 %"69", ptr addrspace(5) %"49", align 2
|
||||||
%"70" = load i16, ptr addrspace(5) %"50", align 2
|
store i16 %"70", ptr addrspace(5) %"50", align 2
|
||||||
%"71" = load i16, ptr addrspace(5) %"51", align 2
|
%"71" = load i16, ptr addrspace(5) %"51", align 2
|
||||||
%"72" = load i16, ptr addrspace(5) %"48", align 2
|
%"72" = load i16, ptr addrspace(5) %"52", align 2
|
||||||
%"73" = load i16, ptr addrspace(5) %"49", align 2
|
%"73" = load i16, ptr addrspace(5) %"49", align 2
|
||||||
%5 = insertelement <4 x i16> undef, i16 %"70", i8 0
|
%"74" = load i16, ptr addrspace(5) %"50", align 2
|
||||||
%6 = insertelement <4 x i16> %5, i16 %"71", i8 1
|
%5 = insertelement <4 x i16> undef, i16 %"71", i8 0
|
||||||
%7 = insertelement <4 x i16> %6, i16 %"72", i8 2
|
%6 = insertelement <4 x i16> %5, i16 %"72", i8 1
|
||||||
%"36" = insertelement <4 x i16> %7, i16 %"73", i8 3
|
%7 = insertelement <4 x i16> %6, i16 %"73", i8 2
|
||||||
%"74" = extractelement <4 x i16> %"36", i8 0
|
%"37" = insertelement <4 x i16> %7, i16 %"74", i8 3
|
||||||
%"75" = extractelement <4 x i16> %"36", i8 1
|
%"75" = extractelement <4 x i16> %"37", i8 0
|
||||||
%"76" = extractelement <4 x i16> %"36", i8 2
|
%"76" = extractelement <4 x i16> %"37", i8 1
|
||||||
%"77" = extractelement <4 x i16> %"36", i8 3
|
%"77" = extractelement <4 x i16> %"37", i8 2
|
||||||
store i16 %"74", ptr addrspace(5) %"51", align 2
|
%"78" = extractelement <4 x i16> %"37", i8 3
|
||||||
store i16 %"75", ptr addrspace(5) %"48", align 2
|
store i16 %"75", ptr addrspace(5) %"52", align 2
|
||||||
store i16 %"76", ptr addrspace(5) %"49", align 2
|
store i16 %"76", ptr addrspace(5) %"49", align 2
|
||||||
store i16 %"77", ptr addrspace(5) %"50", align 2
|
store i16 %"77", ptr addrspace(5) %"50", align 2
|
||||||
%"78" = load i16, ptr addrspace(5) %"48", align 2
|
store i16 %"78", ptr addrspace(5) %"51", align 2
|
||||||
%"79" = load i16, ptr addrspace(5) %"49", align 2
|
%"79" = load i16, ptr addrspace(5) %"49", align 2
|
||||||
%"80" = load i16, ptr addrspace(5) %"50", align 2
|
%"80" = load i16, ptr addrspace(5) %"50", align 2
|
||||||
%"81" = load i16, ptr addrspace(5) %"51", align 2
|
%"81" = load i16, ptr addrspace(5) %"51", align 2
|
||||||
%"88" = trunc i16 %"78" to i8
|
%"82" = load i16, ptr addrspace(5) %"52", align 2
|
||||||
%"89" = trunc i16 %"79" to i8
|
%"89" = trunc i16 %"79" to i8
|
||||||
%"90" = trunc i16 %"80" to i8
|
%"90" = trunc i16 %"80" to i8
|
||||||
%"91" = trunc i16 %"81" to i8
|
%"91" = trunc i16 %"81" to i8
|
||||||
%8 = insertelement <4 x i8> undef, i8 %"88", i8 0
|
%"92" = trunc i16 %"82" to i8
|
||||||
%9 = insertelement <4 x i8> %8, i8 %"89", i8 1
|
%8 = insertelement <4 x i8> undef, i8 %"89", i8 0
|
||||||
%10 = insertelement <4 x i8> %9, i8 %"90", i8 2
|
%9 = insertelement <4 x i8> %8, i8 %"90", i8 1
|
||||||
%"37" = insertelement <4 x i8> %10, i8 %"91", i8 3
|
%10 = insertelement <4 x i8> %9, i8 %"91", i8 2
|
||||||
%"82" = load i64, ptr addrspace(5) %"47", align 4
|
%"38" = insertelement <4 x i8> %10, i8 %"92", i8 3
|
||||||
%"92" = inttoptr i64 %"82" to ptr addrspace(1)
|
%"83" = load i64, ptr addrspace(5) %"48", align 4
|
||||||
store <4 x i8> %"37", ptr addrspace(1) %"92", align 4
|
%"93" = inttoptr i64 %"83" to ptr addrspace(1)
|
||||||
|
store <4 x i8> %"38", ptr addrspace(1) %"93", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||||
|
|
||||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||||
|
|
||||||
define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||||
%"39" = alloca i64, align 8, addrspace(5)
|
|
||||||
%"40" = alloca i64, align 8, addrspace(5)
|
%"40" = alloca i64, align 8, addrspace(5)
|
||||||
%"41" = alloca i32, align 4, addrspace(5)
|
%"41" = alloca i64, align 8, addrspace(5)
|
||||||
%"42" = alloca i32, align 4, addrspace(5)
|
%"42" = alloca i32, align 4, addrspace(5)
|
||||||
|
%"43" = alloca i32, align 4, addrspace(5)
|
||||||
br label %1
|
br label %1
|
||||||
|
|
||||||
1: ; preds = %0
|
1: ; preds = %0
|
||||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
br label %"58"
|
||||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
|
||||||
|
"58": ; preds = %1
|
||||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||||
%"54" = inttoptr i64 %"46" to ptr
|
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||||
%"45" = load i32, ptr %"54", align 4
|
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
store i32 %"45", ptr addrspace(5) %"41", align 4
|
|
||||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
|
||||||
%"55" = inttoptr i64 %"47" to ptr
|
%"55" = inttoptr i64 %"47" to ptr
|
||||||
%"30" = getelementptr inbounds i8, ptr %"55", i64 4
|
%"46" = load i32, ptr %"55", align 4
|
||||||
%"48" = load i32, ptr %"30", align 4
|
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"48", ptr addrspace(5) %"42", align 4
|
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
%"56" = inttoptr i64 %"48" to ptr
|
||||||
|
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||||
|
%"49" = load i32, ptr %"31", align 4
|
||||||
|
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||||
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
%"51" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
%"49" = xor i32 %"50", %"51"
|
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||||
store i32 %"49", ptr addrspace(5) %"41", align 4
|
%"50" = xor i32 %"51", %"52"
|
||||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||||
%"56" = inttoptr i64 %"52" to ptr
|
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||||
store i32 %"53", ptr %"56", align 4
|
%"57" = inttoptr i64 %"53" to ptr
|
||||||
|
store i32 %"54", ptr %"57", align 4
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||||
|
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
Loading…
Add table
Add a link
Reference in a new issue