Update tests

This commit is contained in:
Andrzej Janik 2025-02-24 01:08:03 +00:00
parent 30cbf6dd54
commit 36407dcc3a
93 changed files with 2772 additions and 2264 deletions

View file

@ -12,21 +12,25 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 { define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
%"35" = alloca i64, align 8, addrspace(5) %"36" = alloca i64, align 8, addrspace(5)
%"36" = alloca i32, align 4, addrspace(5) %"37" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"37" = load i64, ptr addrspace(4) %"34", align 4 br label %"44"
store i64 %"37", ptr addrspace(5) %"35", align 4
%"38" = call i32 @__zluda_ptx_impl_activemask() "44": ; preds = %1
store i32 %"38", ptr addrspace(5) %"36", align 4 %"38" = load i64, ptr addrspace(4) %"35", align 4
%"39" = load i64, ptr addrspace(5) %"35", align 4 store i64 %"38", ptr addrspace(5) %"36", align 4
%"40" = load i32, ptr addrspace(5) %"36", align 4 %"39" = call i32 @__zluda_ptx_impl_activemask()
%"41" = inttoptr i64 %"39" to ptr store i32 %"39", ptr addrspace(5) %"37", align 4
store i32 %"40", ptr %"41", align 4 %"40" = load i64, ptr addrspace(5) %"36", align 4
%"41" = load i32, ptr addrspace(5) %"37", align 4
%"42" = inttoptr i64 %"40" to ptr
store i32 %"41", ptr %"42", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%"46" = add i64 %"47", 1 %"51" = inttoptr i64 %"46" to ptr
store i64 %"46", ptr addrspace(5) %"41", align 4 %"45" = load i64, ptr %"51", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr %"47" = add i64 %"48", 1
store i64 %"49", ptr %"51", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -0,0 +1,65 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @add_ftz(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca float, align 4, addrspace(5)
%"47" = alloca float, align 4, addrspace(5)
%"48" = alloca float, align 4, addrspace(5)
%"49" = alloca float, align 4, addrspace(5)
br label %1
1: ; preds = %0
br label %"70"
"70": ; preds = %1
%"50" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"50", ptr addrspace(5) %"44", align 4
%"51" = load i64, ptr addrspace(4) %"43", align 4
store i64 %"51", ptr addrspace(5) %"45", align 4
%"53" = load i64, ptr addrspace(5) %"44", align 4
%"66" = inttoptr i64 %"53" to ptr
%"52" = load float, ptr %"66", align 4
store float %"52", ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr addrspace(5) %"44", align 4
%"67" = inttoptr i64 %"54" to ptr
%"33" = getelementptr inbounds i8, ptr %"67", i64 4
%"55" = load float, ptr %"33", align 4
store float %"55", ptr addrspace(5) %"47", align 4
%"57" = load float, ptr addrspace(5) %"46", align 4
%"58" = load float, ptr addrspace(5) %"47", align 4
%"56" = fadd float %"57", %"58"
store float %"56", ptr addrspace(5) %"48", align 4
%"60" = load float, ptr addrspace(5) %"46", align 4
%"61" = load float, ptr addrspace(5) %"47", align 4
call void @llvm.amdgcn.s.setreg(i32 2305, i32 3)
%"59" = fadd float %"60", %"61"
store float %"59", ptr addrspace(5) %"49", align 4
%"62" = load i64, ptr addrspace(5) %"45", align 4
%"63" = load float, ptr addrspace(5) %"48", align 4
%"68" = inttoptr i64 %"62" to ptr
store float %"63", ptr %"68", align 4
%"64" = load i64, ptr addrspace(5) %"45", align 4
%"69" = inttoptr i64 %"64" to ptr
%"35" = getelementptr inbounds i8, ptr %"69", i64 4
%"65" = load float, ptr addrspace(5) %"49", align 4
store float %"65", ptr %"35", align 4
ret void
}
; Function Attrs: nocallback nofree nosync nounwind willreturn
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind willreturn }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
%"44" = load i64, ptr addrspace(1) %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%"46" = add i64 %"47", 1 %"51" = inttoptr i64 %"46" to ptr addrspace(1)
store i64 %"46", ptr addrspace(5) %"41", align 4 %"45" = load i64, ptr addrspace(1) %"51", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr addrspace(1) %"47" = add i64 %"48", 1
store i64 %"49", ptr addrspace(1) %"51", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr addrspace(1)
store i64 %"50", ptr addrspace(1) %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%"46" = add i64 %"47", 1 %"51" = inttoptr i64 %"46" to ptr
store i64 %"46", ptr addrspace(5) %"41", align 4 %"45" = load i64, ptr %"51", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr %"47" = add i64 %"48", 1
store i64 %"49", ptr %"51", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"61"
store i64 %"43", ptr addrspace(5) %"39", align 4
"61": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%"56" = and i32 %"50", %"51" %"52" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"56", ptr addrspace(5) %"41", align 4 %"57" = and i32 %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"57", ptr addrspace(5) %"42", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"59" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"53", ptr %"59", align 4 %"60" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"60", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,44 +12,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i32, align 4, addrspace(5) %"45" = alloca i32, align 4, addrspace(5)
%"46" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"40", align 4 br label %"68"
store i64 %"46", ptr addrspace(5) %"42", align 4
"68": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"47", ptr addrspace(5) %"43", align 4 store i64 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(4) %"42", align 4
%"60" = inttoptr i64 %"49" to ptr store i64 %"48", ptr addrspace(5) %"44", align 4
%"48" = load i32, ptr %"60", align 4 %"50" = load i64, ptr addrspace(5) %"43", align 4
store i32 %"48", ptr addrspace(5) %"44", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr %"61" = inttoptr i64 %"50" to ptr
%"31" = getelementptr inbounds i8, ptr %"61", i64 4 %"49" = load i32, ptr %"61", align 4
%"51" = load i32, ptr %"31", align 4 store i32 %"49", ptr addrspace(5) %"45", align 4
store i32 %"51", ptr addrspace(5) %"45", align 4 %"51" = load i64, ptr addrspace(5) %"43", align 4
%"52" = load i32, ptr addrspace(5) %"44", align 4 %"62" = inttoptr i64 %"51" to ptr
store i32 %"52", ptr addrspace(3) @shared_mem, align 4 %"32" = getelementptr inbounds i8, ptr %"62", i64 4
%"54" = load i32, ptr addrspace(5) %"45", align 4 %"52" = load i32, ptr %"32", align 4
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"54" syncscope("agent-one-as") monotonic, align 4 store i32 %"52", ptr addrspace(5) %"46", align 4
store i32 %2, ptr addrspace(5) %"44", align 4 %"53" = load i32, ptr addrspace(5) %"45", align 4
%"55" = load i32, ptr addrspace(3) @shared_mem, align 4 store i32 %"53", ptr addrspace(3) @shared_mem, align 4
store i32 %"55", ptr addrspace(5) %"45", align 4 %"55" = load i32, ptr addrspace(5) %"46", align 4
%"56" = load i64, ptr addrspace(5) %"43", align 4 %2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"55" syncscope("agent-one-as") monotonic, align 4
%"57" = load i32, ptr addrspace(5) %"44", align 4 store i32 %2, ptr addrspace(5) %"45", align 4
%"65" = inttoptr i64 %"56" to ptr %"56" = load i32, ptr addrspace(3) @shared_mem, align 4
store i32 %"57", ptr %"65", align 4 store i32 %"56", ptr addrspace(5) %"46", align 4
%"58" = load i64, ptr addrspace(5) %"43", align 4 %"57" = load i64, ptr addrspace(5) %"44", align 4
%"66" = inttoptr i64 %"58" to ptr %"58" = load i32, ptr addrspace(5) %"45", align 4
%"33" = getelementptr inbounds i8, ptr %"66", i64 4 %"66" = inttoptr i64 %"57" to ptr
%"59" = load i32, ptr addrspace(5) %"45", align 4 store i32 %"58", ptr %"66", align 4
store i32 %"59", ptr %"33", align 4 %"59" = load i64, ptr addrspace(5) %"44", align 4
%"67" = inttoptr i64 %"59" to ptr
%"34" = getelementptr inbounds i8, ptr %"67", i64 4
%"60" = load i32, ptr addrspace(5) %"46", align 4
store i32 %"60", ptr %"34", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,44 +12,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca float, align 4, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca float, align 4, addrspace(5) %"45" = alloca float, align 4, addrspace(5)
%"46" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"40", align 4 br label %"68"
store i64 %"46", ptr addrspace(5) %"42", align 4
"68": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"47", ptr addrspace(5) %"43", align 4 store i64 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(4) %"42", align 4
%"60" = inttoptr i64 %"49" to ptr store i64 %"48", ptr addrspace(5) %"44", align 4
%"48" = load float, ptr %"60", align 4 %"50" = load i64, ptr addrspace(5) %"43", align 4
store float %"48", ptr addrspace(5) %"44", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr %"61" = inttoptr i64 %"50" to ptr
%"31" = getelementptr inbounds i8, ptr %"61", i64 4 %"49" = load float, ptr %"61", align 4
%"51" = load float, ptr %"31", align 4 store float %"49", ptr addrspace(5) %"45", align 4
store float %"51", ptr addrspace(5) %"45", align 4 %"51" = load i64, ptr addrspace(5) %"43", align 4
%"52" = load float, ptr addrspace(5) %"44", align 4 %"62" = inttoptr i64 %"51" to ptr
store float %"52", ptr addrspace(3) @shared_mem, align 4 %"32" = getelementptr inbounds i8, ptr %"62", i64 4
%"54" = load float, ptr addrspace(5) %"45", align 4 %"52" = load float, ptr %"32", align 4
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"54" syncscope("agent-one-as") monotonic, align 4 store float %"52", ptr addrspace(5) %"46", align 4
store float %2, ptr addrspace(5) %"44", align 4 %"53" = load float, ptr addrspace(5) %"45", align 4
%"55" = load float, ptr addrspace(3) @shared_mem, align 4 store float %"53", ptr addrspace(3) @shared_mem, align 4
store float %"55", ptr addrspace(5) %"45", align 4 %"55" = load float, ptr addrspace(5) %"46", align 4
%"56" = load i64, ptr addrspace(5) %"43", align 4 %2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"55" syncscope("agent-one-as") monotonic, align 4
%"57" = load float, ptr addrspace(5) %"44", align 4 store float %2, ptr addrspace(5) %"45", align 4
%"65" = inttoptr i64 %"56" to ptr %"56" = load float, ptr addrspace(3) @shared_mem, align 4
store float %"57", ptr %"65", align 4 store float %"56", ptr addrspace(5) %"46", align 4
%"58" = load i64, ptr addrspace(5) %"43", align 4 %"57" = load i64, ptr addrspace(5) %"44", align 4
%"66" = inttoptr i64 %"58" to ptr %"58" = load float, ptr addrspace(5) %"45", align 4
%"33" = getelementptr inbounds i8, ptr %"66", i64 4 %"66" = inttoptr i64 %"57" to ptr
%"59" = load float, ptr addrspace(5) %"45", align 4 store float %"58", ptr %"66", align 4
store float %"59", ptr %"33", align 4 %"59" = load i64, ptr addrspace(5) %"44", align 4
%"67" = inttoptr i64 %"59" to ptr
%"34" = getelementptr inbounds i8, ptr %"67", i64 4
%"60" = load float, ptr addrspace(5) %"46", align 4
store float %"60", ptr %"34", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,44 +10,48 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i32, align 4, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i32, align 4, addrspace(5) %"47" = alloca i32, align 4, addrspace(5)
%"48" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"48" = load i64, ptr addrspace(4) %"42", align 4 br label %"69"
store i64 %"48", ptr addrspace(5) %"44", align 4
"69": ; preds = %1
%"49" = load i64, ptr addrspace(4) %"43", align 4 %"49" = load i64, ptr addrspace(4) %"43", align 4
store i64 %"49", ptr addrspace(5) %"45", align 4 store i64 %"49", ptr addrspace(5) %"45", align 4
%"51" = load i64, ptr addrspace(5) %"44", align 4 %"50" = load i64, ptr addrspace(4) %"44", align 4
%"61" = inttoptr i64 %"51" to ptr store i64 %"50", ptr addrspace(5) %"46", align 4
%"50" = load i32, ptr %"61", align 4 %"52" = load i64, ptr addrspace(5) %"45", align 4
store i32 %"50", ptr addrspace(5) %"46", align 4
%"52" = load i64, ptr addrspace(5) %"44", align 4
%"62" = inttoptr i64 %"52" to ptr %"62" = inttoptr i64 %"52" to ptr
%"30" = getelementptr inbounds i8, ptr %"62", i64 4 %"51" = load i32, ptr %"62", align 4
%"54" = load i32, ptr addrspace(5) %"46", align 4 store i32 %"51", ptr addrspace(5) %"47", align 4
%2 = cmpxchg ptr %"30", i32 %"54", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4 %"53" = load i64, ptr addrspace(5) %"45", align 4
%"63" = extractvalue { i32, i1 } %2, 0 %"63" = inttoptr i64 %"53" to ptr
store i32 %"63", ptr addrspace(5) %"46", align 4 %"31" = getelementptr inbounds i8, ptr %"63", i64 4
%"55" = load i64, ptr addrspace(5) %"44", align 4 %"55" = load i32, ptr addrspace(5) %"47", align 4
%"65" = inttoptr i64 %"55" to ptr %2 = cmpxchg ptr %"31", i32 %"55", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
%"33" = getelementptr inbounds i8, ptr %"65", i64 4 %"64" = extractvalue { i32, i1 } %2, 0
%"56" = load i32, ptr %"33", align 4 store i32 %"64", ptr addrspace(5) %"47", align 4
store i32 %"56", ptr addrspace(5) %"47", align 4 %"56" = load i64, ptr addrspace(5) %"45", align 4
%"57" = load i64, ptr addrspace(5) %"45", align 4 %"66" = inttoptr i64 %"56" to ptr
%"58" = load i32, ptr addrspace(5) %"46", align 4 %"34" = getelementptr inbounds i8, ptr %"66", i64 4
%"66" = inttoptr i64 %"57" to ptr %"57" = load i32, ptr %"34", align 4
store i32 %"58", ptr %"66", align 4 store i32 %"57", ptr addrspace(5) %"48", align 4
%"59" = load i64, ptr addrspace(5) %"45", align 4 %"58" = load i64, ptr addrspace(5) %"46", align 4
%"67" = inttoptr i64 %"59" to ptr %"59" = load i32, ptr addrspace(5) %"47", align 4
%"35" = getelementptr inbounds i8, ptr %"67", i64 4 %"67" = inttoptr i64 %"58" to ptr
%"60" = load i32, ptr addrspace(5) %"47", align 4 store i32 %"59", ptr %"67", align 4
store i32 %"60", ptr %"35", align 4 %"60" = load i64, ptr addrspace(5) %"46", align 4
%"68" = inttoptr i64 %"60" to ptr
%"36" = getelementptr inbounds i8, ptr %"68", i64 4
%"61" = load i32, ptr addrspace(5) %"48", align 4
store i32 %"61", ptr %"36", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,46 +10,50 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i32, align 4, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i32, align 4, addrspace(5) %"47" = alloca i32, align 4, addrspace(5)
%"48" = alloca i32, align 4, addrspace(5) %"48" = alloca i32, align 4, addrspace(5)
%"49" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"49" = load i64, ptr addrspace(4) %"42", align 4 br label %"70"
store i64 %"49", ptr addrspace(5) %"44", align 4
"70": ; preds = %1
%"50" = load i64, ptr addrspace(4) %"43", align 4 %"50" = load i64, ptr addrspace(4) %"43", align 4
store i64 %"50", ptr addrspace(5) %"45", align 4 store i64 %"50", ptr addrspace(5) %"45", align 4
%"52" = load i64, ptr addrspace(5) %"44", align 4 %"51" = load i64, ptr addrspace(4) %"44", align 4
%"63" = inttoptr i64 %"52" to ptr store i64 %"51", ptr addrspace(5) %"46", align 4
%2 = atomicrmw uinc_wrap ptr %"63", i32 101 syncscope("agent-one-as") monotonic, align 4 %"53" = load i64, ptr addrspace(5) %"45", align 4
store i32 %2, ptr addrspace(5) %"46", align 4 %"64" = inttoptr i64 %"53" to ptr
%"54" = load i64, ptr addrspace(5) %"44", align 4 %2 = atomicrmw uinc_wrap ptr %"64", i32 101 syncscope("agent-one-as") monotonic, align 4
%"64" = inttoptr i64 %"54" to ptr addrspace(1) store i32 %2, ptr addrspace(5) %"47", align 4
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"64", i32 101 syncscope("agent-one-as") monotonic, align 4 %"55" = load i64, ptr addrspace(5) %"45", align 4
store i32 %3, ptr addrspace(5) %"47", align 4 %"65" = inttoptr i64 %"55" to ptr addrspace(1)
%"56" = load i64, ptr addrspace(5) %"44", align 4 %3 = atomicrmw uinc_wrap ptr addrspace(1) %"65", i32 101 syncscope("agent-one-as") monotonic, align 4
%"65" = inttoptr i64 %"56" to ptr store i32 %3, ptr addrspace(5) %"48", align 4
%"55" = load i32, ptr %"65", align 4
store i32 %"55", ptr addrspace(5) %"48", align 4
%"57" = load i64, ptr addrspace(5) %"45", align 4 %"57" = load i64, ptr addrspace(5) %"45", align 4
%"58" = load i32, ptr addrspace(5) %"46", align 4
%"66" = inttoptr i64 %"57" to ptr %"66" = inttoptr i64 %"57" to ptr
store i32 %"58", ptr %"66", align 4 %"56" = load i32, ptr %"66", align 4
%"59" = load i64, ptr addrspace(5) %"45", align 4 store i32 %"56", ptr addrspace(5) %"49", align 4
%"67" = inttoptr i64 %"59" to ptr %"58" = load i64, ptr addrspace(5) %"46", align 4
%"33" = getelementptr inbounds i8, ptr %"67", i64 4 %"59" = load i32, ptr addrspace(5) %"47", align 4
%"60" = load i32, ptr addrspace(5) %"47", align 4 %"67" = inttoptr i64 %"58" to ptr
store i32 %"60", ptr %"33", align 4 store i32 %"59", ptr %"67", align 4
%"61" = load i64, ptr addrspace(5) %"45", align 4 %"60" = load i64, ptr addrspace(5) %"46", align 4
%"68" = inttoptr i64 %"61" to ptr %"68" = inttoptr i64 %"60" to ptr
%"35" = getelementptr inbounds i8, ptr %"68", i64 8 %"34" = getelementptr inbounds i8, ptr %"68", i64 4
%"62" = load i32, ptr addrspace(5) %"48", align 4 %"61" = load i32, ptr addrspace(5) %"48", align 4
store i32 %"62", ptr %"35", align 4 store i32 %"61", ptr %"34", align 4
%"62" = load i64, ptr addrspace(5) %"46", align 4
%"69" = inttoptr i64 %"62" to ptr
%"36" = getelementptr inbounds i8, ptr %"69", i64 8
%"63" = load i32, ptr addrspace(5) %"49", align 4
store i32 %"63", ptr %"36", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca double, align 8, addrspace(5) %"38" = alloca double, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load double, ptr addrspace(4) %"35", align 8 br label %"54"
store double %"41", ptr addrspace(5) %"37", align 8
%"42" = load i64, ptr addrspace(4) %"36", align 4 "54": ; preds = %1
store i64 %"42", ptr addrspace(5) %"39", align 4 %"42" = load double, ptr addrspace(4) %"36", align 8
%"44" = load double, ptr addrspace(5) %"37", align 8 store double %"42", ptr addrspace(5) %"38", align 8
%"50" = bitcast double %"44" to i64 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"50", ptr addrspace(5) %"38", align 4 store i64 %"43", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = load double, ptr addrspace(5) %"38", align 8
%"51" = inttoptr i64 %"46" to ptr %"51" = bitcast double %"45" to i64
%"45" = load i64, ptr %"51", align 4 store i64 %"51", ptr addrspace(5) %"39", align 4
store i64 %"45", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4 %"47" = load i64, ptr addrspace(5) %"39", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"52" = inttoptr i64 %"47" to ptr %"52" = inttoptr i64 %"47" to ptr
store i64 %"48", ptr %"52", align 4 %"46" = load i64, ptr %"52", align 4
store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"53" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"53", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,43 +12,47 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i32, align 4, addrspace(5) %"45" = alloca i32, align 4, addrspace(5)
%"46" = alloca i32, align 4, addrspace(5) %"46" = alloca i32, align 4, addrspace(5)
%"47" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"47" = load i64, ptr addrspace(4) %"40", align 4 br label %"66"
store i64 %"47", ptr addrspace(5) %"42", align 4
"66": ; preds = %1
%"48" = load i64, ptr addrspace(4) %"41", align 4 %"48" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"48", ptr addrspace(5) %"43", align 4 store i64 %"48", ptr addrspace(5) %"43", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4 %"49" = load i64, ptr addrspace(4) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr store i64 %"49", ptr addrspace(5) %"44", align 4
%"49" = load i32, ptr %"61", align 4 %"51" = load i64, ptr addrspace(5) %"43", align 4
store i32 %"49", ptr addrspace(5) %"44", align 4
%"51" = load i64, ptr addrspace(5) %"42", align 4
%"62" = inttoptr i64 %"51" to ptr %"62" = inttoptr i64 %"51" to ptr
%"31" = getelementptr inbounds i8, ptr %"62", i64 4 %"50" = load i32, ptr %"62", align 4
%"52" = load i32, ptr %"31", align 4 store i32 %"50", ptr addrspace(5) %"45", align 4
store i32 %"52", ptr addrspace(5) %"45", align 4 %"52" = load i64, ptr addrspace(5) %"43", align 4
%"53" = load i64, ptr addrspace(5) %"42", align 4 %"63" = inttoptr i64 %"52" to ptr
%"63" = inttoptr i64 %"53" to ptr %"32" = getelementptr inbounds i8, ptr %"63", i64 4
%"33" = getelementptr inbounds i8, ptr %"63", i64 8 %"53" = load i32, ptr %"32", align 4
%"54" = load i32, ptr %"33", align 4 store i32 %"53", ptr addrspace(5) %"46", align 4
store i32 %"54", ptr addrspace(5) %"46", align 4 %"54" = load i64, ptr addrspace(5) %"43", align 4
%"56" = load i32, ptr addrspace(5) %"44", align 4 %"64" = inttoptr i64 %"54" to ptr
%"34" = getelementptr inbounds i8, ptr %"64", i64 8
%"55" = load i32, ptr %"34", align 4
store i32 %"55", ptr addrspace(5) %"47", align 4
%"57" = load i32, ptr addrspace(5) %"45", align 4 %"57" = load i32, ptr addrspace(5) %"45", align 4
%"58" = load i32, ptr addrspace(5) %"46", align 4 %"58" = load i32, ptr addrspace(5) %"46", align 4
%"55" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"56", i32 %"57", i32 %"58") %"59" = load i32, ptr addrspace(5) %"47", align 4
store i32 %"55", ptr addrspace(5) %"44", align 4 %"56" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"57", i32 %"58", i32 %"59")
%"59" = load i64, ptr addrspace(5) %"43", align 4 store i32 %"56", ptr addrspace(5) %"45", align 4
%"60" = load i32, ptr addrspace(5) %"44", align 4 %"60" = load i64, ptr addrspace(5) %"44", align 4
%"64" = inttoptr i64 %"59" to ptr %"61" = load i32, ptr addrspace(5) %"45", align 4
store i32 %"60", ptr %"64", align 4 %"65" = inttoptr i64 %"60" to ptr
store i32 %"61", ptr %"65", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,50 +12,54 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i32, align 4, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i32, align 4, addrspace(5) %"48" = alloca i32, align 4, addrspace(5)
%"49" = alloca i32, align 4, addrspace(5) %"49" = alloca i32, align 4, addrspace(5)
%"50" = alloca i32, align 4, addrspace(5) %"50" = alloca i32, align 4, addrspace(5)
%"51" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"51" = load i64, ptr addrspace(4) %"43", align 4 br label %"77"
store i64 %"51", ptr addrspace(5) %"45", align 4
"77": ; preds = %1
%"52" = load i64, ptr addrspace(4) %"44", align 4 %"52" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"52", ptr addrspace(5) %"46", align 4 store i64 %"52", ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr addrspace(5) %"45", align 4 %"53" = load i64, ptr addrspace(4) %"45", align 4
%"68" = inttoptr i64 %"54" to ptr store i64 %"53", ptr addrspace(5) %"47", align 4
%"53" = load i32, ptr %"68", align 4 %"55" = load i64, ptr addrspace(5) %"46", align 4
store i32 %"53", ptr addrspace(5) %"47", align 4
%"55" = load i64, ptr addrspace(5) %"45", align 4
%"69" = inttoptr i64 %"55" to ptr %"69" = inttoptr i64 %"55" to ptr
%"32" = getelementptr inbounds i8, ptr %"69", i64 4 %"54" = load i32, ptr %"69", align 4
%"56" = load i32, ptr %"32", align 4 store i32 %"54", ptr addrspace(5) %"48", align 4
store i32 %"56", ptr addrspace(5) %"48", align 4 %"56" = load i64, ptr addrspace(5) %"46", align 4
%"57" = load i64, ptr addrspace(5) %"45", align 4 %"70" = inttoptr i64 %"56" to ptr
%"70" = inttoptr i64 %"57" to ptr %"33" = getelementptr inbounds i8, ptr %"70", i64 4
%"34" = getelementptr inbounds i8, ptr %"70", i64 8 %"57" = load i32, ptr %"33", align 4
%"58" = load i32, ptr %"34", align 4 store i32 %"57", ptr addrspace(5) %"49", align 4
store i32 %"58", ptr addrspace(5) %"49", align 4 %"58" = load i64, ptr addrspace(5) %"46", align 4
%"59" = load i64, ptr addrspace(5) %"45", align 4 %"71" = inttoptr i64 %"58" to ptr
%"71" = inttoptr i64 %"59" to ptr %"35" = getelementptr inbounds i8, ptr %"71", i64 8
%"36" = getelementptr inbounds i8, ptr %"71", i64 12 %"59" = load i32, ptr %"35", align 4
%"60" = load i32, ptr %"36", align 4 store i32 %"59", ptr addrspace(5) %"50", align 4
store i32 %"60", ptr addrspace(5) %"50", align 4 %"60" = load i64, ptr addrspace(5) %"46", align 4
%"62" = load i32, ptr addrspace(5) %"47", align 4 %"72" = inttoptr i64 %"60" to ptr
%"37" = getelementptr inbounds i8, ptr %"72", i64 12
%"61" = load i32, ptr %"37", align 4
store i32 %"61", ptr addrspace(5) %"51", align 4
%"63" = load i32, ptr addrspace(5) %"48", align 4 %"63" = load i32, ptr addrspace(5) %"48", align 4
%"64" = load i32, ptr addrspace(5) %"49", align 4 %"64" = load i32, ptr addrspace(5) %"49", align 4
%"65" = load i32, ptr addrspace(5) %"50", align 4 %"65" = load i32, ptr addrspace(5) %"50", align 4
%"72" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"62", i32 %"63", i32 %"64", i32 %"65") %"66" = load i32, ptr addrspace(5) %"51", align 4
store i32 %"72", ptr addrspace(5) %"47", align 4 %"73" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"63", i32 %"64", i32 %"65", i32 %"66")
%"66" = load i64, ptr addrspace(5) %"46", align 4 store i32 %"73", ptr addrspace(5) %"48", align 4
%"67" = load i32, ptr addrspace(5) %"47", align 4 %"67" = load i64, ptr addrspace(5) %"47", align 4
%"75" = inttoptr i64 %"66" to ptr %"68" = load i32, ptr addrspace(5) %"48", align 4
store i32 %"67", ptr %"75", align 4 %"76" = inttoptr i64 %"67" to ptr
store i32 %"68", ptr %"76", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"50" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"51" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"44" = load i64, ptr addrspace(4) %"38", align 4 br label %"58"
store i64 %"44", ptr addrspace(5) %"40", align 4
"58": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
%"55" = inttoptr i64 %"47" to ptr
%"46" = load i64, ptr %"55", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"48" = add i64 %"49", 1 %"56" = inttoptr i64 %"48" to ptr
store i64 %"48", ptr addrspace(5) %"43", align 4 %"47" = load i64, ptr %"56", align 4
%"52" = load i64, ptr addrspace(5) %"50", align 4 store i64 %"47", ptr addrspace(5) %"43", align 4
%"51" = add i64 %"52", 1 %"50" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"51", ptr addrspace(5) %"50", align 4 %"49" = add i64 %"50", 1
%"53" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"49", ptr addrspace(5) %"44", align 4
%"54" = load i64, ptr addrspace(5) %"43", align 4 %"53" = load i64, ptr addrspace(5) %"51", align 4
%"56" = inttoptr i64 %"53" to ptr %"52" = add i64 %"53", 1
store i64 %"54", ptr %"56", align 4 store i64 %"52", ptr addrspace(5) %"51", align 4
%"54" = load i64, ptr addrspace(5) %"42", align 4
%"55" = load i64, ptr addrspace(5) %"44", align 4
%"57" = inttoptr i64 %"54" to ptr
store i64 %"55", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,42 +10,46 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"40", align 4 br label %"59"
store i64 %"46", ptr addrspace(5) %"42", align 4
"59": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"47", ptr addrspace(5) %"43", align 4 store i64 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(4) %"42", align 4
%"56" = inttoptr i64 %"49" to ptr
%"48" = load i64, ptr %"56", align 4
store i64 %"48", ptr addrspace(5) %"44", align 4 store i64 %"48", ptr addrspace(5) %"44", align 4
br label %"9" %"50" = load i64, ptr addrspace(5) %"43", align 4
%"57" = inttoptr i64 %"50" to ptr
%"49" = load i64, ptr %"57", align 4
store i64 %"49", ptr addrspace(5) %"45", align 4
br label %"10"
"9": ; preds = %1 "10": ; preds = %"59"
%"51" = load i64, ptr addrspace(5) %"44", align 4 %"52" = load i64, ptr addrspace(5) %"45", align 4
%"50" = add i64 %"51", 1 %"51" = add i64 %"52", 1
store i64 %"50", ptr addrspace(5) %"45", align 4 store i64 %"51", ptr addrspace(5) %"46", align 4
br label %"11" br label %"12"
"10": ; No predecessors! "11": ; No predecessors!
%"53" = load i64, ptr addrspace(5) %"44", align 4 %"54" = load i64, ptr addrspace(5) %"45", align 4
%"52" = add i64 %"53", 2 %"53" = add i64 %"54", 2
store i64 %"52", ptr addrspace(5) %"45", align 4 store i64 %"53", ptr addrspace(5) %"46", align 4
br label %"11" br label %"12"
"11": ; preds = %"10", %"9" "12": ; preds = %"11", %"10"
%"54" = load i64, ptr addrspace(5) %"43", align 4 %"55" = load i64, ptr addrspace(5) %"44", align 4
%"55" = load i64, ptr addrspace(5) %"45", align 4 %"56" = load i64, ptr addrspace(5) %"46", align 4
%"57" = inttoptr i64 %"54" to ptr %"58" = inttoptr i64 %"55" to ptr
store i64 %"55", ptr %"57", align 4 store i64 %"56", ptr %"58", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load i32, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store i32 %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load i32, ptr addrspace(5) %"38", align 4 %"42" = load i32, ptr %"48", align 4
%"43" = call i32 @llvm.bitreverse.i32(i32 %"44") store i32 %"42", ptr addrspace(5) %"39", align 4
store i32 %"43", ptr addrspace(5) %"38", align 4 %"45" = load i32, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call i32 @llvm.bitreverse.i32(i32 %"45")
%"46" = load i32, ptr addrspace(5) %"38", align 4 store i32 %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"46", ptr %"48", align 4 %"47" = load i32, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store i32 %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.bitreverse.i32(i32) #1 declare i32 @llvm.bitreverse.i32(i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,57 +10,64 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define i64 @__zluda_ptx_impl_incr(i64 %"42") #0 { define i64 @incr(i64 %"43") #0 {
%"65" = alloca i64, align 8, addrspace(5)
%"66" = alloca i64, align 8, addrspace(5) %"66" = alloca i64, align 8, addrspace(5)
%"67" = alloca i64, align 8, addrspace(5) %"67" = alloca i64, align 8, addrspace(5)
%"68" = alloca i64, align 8, addrspace(5) %"68" = alloca i64, align 8, addrspace(5)
%"69" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
store i64 %"42", ptr addrspace(5) %"67", align 4 br label %"80"
%"69" = load i64, ptr addrspace(5) %"67", align 4
store i64 %"69", ptr addrspace(5) %"68", align 4 "80": ; preds = %1
%"71" = load i64, ptr addrspace(5) %"68", align 4 store i64 %"43", ptr addrspace(5) %"68", align 4
%"70" = add i64 %"71", 1 %"70" = load i64, ptr addrspace(5) %"68", align 4
store i64 %"70", ptr addrspace(5) %"68", align 4 store i64 %"70", ptr addrspace(5) %"69", align 4
%"72" = load i64, ptr addrspace(5) %"68", align 4 %"72" = load i64, ptr addrspace(5) %"69", align 4
store i64 %"72", ptr addrspace(5) %"66", align 4 %"71" = add i64 %"72", 1
%"73" = load i64, ptr addrspace(5) %"66", align 4 store i64 %"71", ptr addrspace(5) %"69", align 4
store i64 %"73", ptr addrspace(5) %"65", align 4 %"73" = load i64, ptr addrspace(5) %"69", align 4
%2 = load i64, ptr addrspace(5) %"65", align 4 store i64 %"73", ptr addrspace(5) %"67", align 4
%"74" = load i64, ptr addrspace(5) %"67", align 4
store i64 %"74", ptr addrspace(5) %"66", align 4
%2 = load i64, ptr addrspace(5) %"66", align 4
ret i64 %2 ret i64 %2
} }
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #0 { define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #1 {
%"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i64, align 8, addrspace(5) %"53" = alloca i64, align 8, addrspace(5)
%"54" = alloca i64, align 8, addrspace(5) %"54" = alloca i64, align 8, addrspace(5)
%"59" = alloca i64, align 8, addrspace(5) %"55" = alloca i64, align 8, addrspace(5)
%"60" = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5)
%"61" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"55" = load i64, ptr addrspace(4) %"50", align 4 br label %"79"
store i64 %"55", ptr addrspace(5) %"52", align 4
"79": ; preds = %1
%"56" = load i64, ptr addrspace(4) %"51", align 4 %"56" = load i64, ptr addrspace(4) %"51", align 4
store i64 %"56", ptr addrspace(5) %"53", align 4 store i64 %"56", ptr addrspace(5) %"53", align 4
%"58" = load i64, ptr addrspace(5) %"52", align 4 %"57" = load i64, ptr addrspace(4) %"52", align 4
%"74" = inttoptr i64 %"58" to ptr addrspace(1)
%"57" = load i64, ptr addrspace(1) %"74", align 4
store i64 %"57", ptr addrspace(5) %"54", align 4 store i64 %"57", ptr addrspace(5) %"54", align 4
%"61" = load i64, ptr addrspace(5) %"54", align 4 %"59" = load i64, ptr addrspace(5) %"53", align 4
store i64 %"61", ptr addrspace(5) %"59", align 4 %"75" = inttoptr i64 %"59" to ptr addrspace(1)
%"39" = load i64, ptr addrspace(5) %"59", align 4 %"58" = load i64, ptr addrspace(1) %"75", align 4
%"40" = call i64 @__zluda_ptx_impl_incr(i64 %"39") store i64 %"58", ptr addrspace(5) %"55", align 4
store i64 %"40", ptr addrspace(5) %"60", align 4 %"62" = load i64, ptr addrspace(5) %"55", align 4
%"62" = load i64, ptr addrspace(5) %"60", align 4 store i64 %"62", ptr addrspace(5) %"60", align 4
store i64 %"62", ptr addrspace(5) %"54", align 4 %"40" = load i64, ptr addrspace(5) %"60", align 4
%"63" = load i64, ptr addrspace(5) %"53", align 4 %"41" = call i64 @incr(i64 %"40")
store i64 %"41", ptr addrspace(5) %"61", align 4
%"63" = load i64, ptr addrspace(5) %"61", align 4
store i64 %"63", ptr addrspace(5) %"55", align 4
%"64" = load i64, ptr addrspace(5) %"54", align 4 %"64" = load i64, ptr addrspace(5) %"54", align 4
%"77" = inttoptr i64 %"63" to ptr addrspace(1) %"65" = load i64, ptr addrspace(5) %"55", align 4
store i64 %"64", ptr addrspace(1) %"77", align 4 %"78" = inttoptr i64 %"64" to ptr addrspace(1)
store i64 %"65", ptr addrspace(1) %"78", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"51"
store i64 %"39", ptr addrspace(5) %"36", align 4
"51": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load i32, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store i32 %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load i32, ptr addrspace(5) %"38", align 4 %"42" = load i32, ptr %"48", align 4
%"48" = call i32 @llvm.ctlz.i32(i32 %"44", i1 false) store i32 %"42", ptr addrspace(5) %"39", align 4
store i32 %"48", ptr addrspace(5) %"38", align 4 %"45" = load i32, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"49" = call i32 @llvm.ctlz.i32(i32 %"45", i1 false)
%"46" = load i32, ptr addrspace(5) %"38", align 4 store i32 %"49", ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"46", ptr %"49", align 4 %"47" = load i32, ptr addrspace(5) %"39", align 4
%"50" = inttoptr i64 %"46" to ptr
store i32 %"47", ptr %"50", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1 declare i32 @llvm.ctlz.i32(i32, i1 immarg) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -12,48 +12,52 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"50", ptr addrspace(4) byref(i64) %"51") #0 { define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"51", ptr addrspace(4) byref(i64) %"52") #1 {
%"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i64, align 8, addrspace(5) %"53" = alloca i64, align 8, addrspace(5)
%"54" = alloca i16, align 2, addrspace(5) %"54" = alloca i64, align 8, addrspace(5)
%"55" = alloca i16, align 2, addrspace(5) %"55" = alloca i16, align 2, addrspace(5)
%"56" = alloca i16, align 2, addrspace(5) %"56" = alloca i16, align 2, addrspace(5)
%"57" = alloca i16, align 2, addrspace(5) %"57" = alloca i16, align 2, addrspace(5)
%"58" = alloca i16, align 2, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"58" = load i64, ptr addrspace(4) %"50", align 4 br label %"85"
store i64 %"58", ptr addrspace(5) %"52", align 4
"85": ; preds = %1
%"59" = load i64, ptr addrspace(4) %"51", align 4 %"59" = load i64, ptr addrspace(4) %"51", align 4
store i64 %"59", ptr addrspace(5) %"53", align 4 store i64 %"59", ptr addrspace(5) %"53", align 4
%"60" = load i16, ptr addrspace(4) @constparams, align 2 %"60" = load i64, ptr addrspace(4) %"52", align 4
store i16 %"60", ptr addrspace(5) %"54", align 2 store i64 %"60", ptr addrspace(5) %"54", align 4
%"61" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2 %"61" = load i16, ptr addrspace(4) @constparams, align 2
store i16 %"61", ptr addrspace(5) %"55", align 2 store i16 %"61", ptr addrspace(5) %"55", align 2
%"62" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2 %"62" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
store i16 %"62", ptr addrspace(5) %"56", align 2 store i16 %"62", ptr addrspace(5) %"56", align 2
%"63" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2 %"63" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
store i16 %"63", ptr addrspace(5) %"57", align 2 store i16 %"63", ptr addrspace(5) %"57", align 2
%"64" = load i64, ptr addrspace(5) %"53", align 4 %"64" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
%"65" = load i16, ptr addrspace(5) %"54", align 2 store i16 %"64", ptr addrspace(5) %"58", align 2
%"76" = inttoptr i64 %"64" to ptr %"65" = load i64, ptr addrspace(5) %"54", align 4
store i16 %"65", ptr %"76", align 2 %"66" = load i16, ptr addrspace(5) %"55", align 2
%"66" = load i64, ptr addrspace(5) %"53", align 4 %"77" = inttoptr i64 %"65" to ptr
%"78" = inttoptr i64 %"66" to ptr store i16 %"66", ptr %"77", align 2
%"39" = getelementptr inbounds i8, ptr %"78", i64 2 %"67" = load i64, ptr addrspace(5) %"54", align 4
%"67" = load i16, ptr addrspace(5) %"55", align 2 %"79" = inttoptr i64 %"67" to ptr
store i16 %"67", ptr %"39", align 2 %"40" = getelementptr inbounds i8, ptr %"79", i64 2
%"68" = load i64, ptr addrspace(5) %"53", align 4 %"68" = load i16, ptr addrspace(5) %"56", align 2
%"80" = inttoptr i64 %"68" to ptr store i16 %"68", ptr %"40", align 2
%"41" = getelementptr inbounds i8, ptr %"80", i64 4 %"69" = load i64, ptr addrspace(5) %"54", align 4
%"69" = load i16, ptr addrspace(5) %"56", align 2 %"81" = inttoptr i64 %"69" to ptr
store i16 %"69", ptr %"41", align 2 %"42" = getelementptr inbounds i8, ptr %"81", i64 4
%"70" = load i64, ptr addrspace(5) %"53", align 4 %"70" = load i16, ptr addrspace(5) %"57", align 2
%"82" = inttoptr i64 %"70" to ptr store i16 %"70", ptr %"42", align 2
%"43" = getelementptr inbounds i8, ptr %"82", i64 6 %"71" = load i64, ptr addrspace(5) %"54", align 4
%"71" = load i16, ptr addrspace(5) %"57", align 2 %"83" = inttoptr i64 %"71" to ptr
store i16 %"71", ptr %"43", align 2 %"44" = getelementptr inbounds i8, ptr %"83", i64 6
%"72" = load i16, ptr addrspace(5) %"58", align 2
store i16 %"72", ptr %"44", align 2
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"51"
store i64 %"40", ptr addrspace(5) %"37", align 4
"51": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"48" = inttoptr i64 %"43" to ptr store i64 %"42", ptr addrspace(5) %"39", align 4
%"42" = load float, ptr %"48", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
store float %"42", ptr addrspace(5) %"39", align 4 %"49" = inttoptr i64 %"44" to ptr
%"45" = load float, ptr addrspace(5) %"39", align 4 %"43" = load float, ptr %"49", align 4
%"44" = fmul float %"45", 5.000000e-01 store float %"43", ptr addrspace(5) %"40", align 4
store float %"44", ptr addrspace(5) %"39", align 4 %"46" = load float, ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = fmul float %"46", 5.000000e-01
%"47" = load float, ptr addrspace(5) %"39", align 4 store float %"45", ptr addrspace(5) %"40", align 4
%"49" = inttoptr i64 %"46" to ptr %"47" = load i64, ptr addrspace(5) %"39", align 4
store float %"47", ptr %"49", align 4 %"48" = load float, ptr addrspace(5) %"40", align 4
%"50" = inttoptr i64 %"47" to ptr
store float %"48", ptr %"50", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"51"
store i64 %"40", ptr addrspace(5) %"37", align 4
"51": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"48" = inttoptr i64 %"43" to ptr store i64 %"42", ptr addrspace(5) %"39", align 4
%"42" = load i32, ptr %"48", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"42", ptr addrspace(5) %"39", align 4 %"49" = inttoptr i64 %"44" to ptr
%"45" = load i32, ptr addrspace(5) %"39", align 4 %"43" = load i32, ptr %"49", align 4
%"44" = mul i32 %"45", -1 store i32 %"43", ptr addrspace(5) %"40", align 4
store i32 %"44", ptr addrspace(5) %"39", align 4 %"46" = load i32, ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = mul i32 %"46", -1
%"47" = load i32, ptr addrspace(5) %"39", align 4 store i32 %"45", ptr addrspace(5) %"40", align 4
%"49" = inttoptr i64 %"46" to ptr %"47" = load i64, ptr addrspace(5) %"39", align 4
store i32 %"47", ptr %"49", align 4 %"48" = load i32, ptr addrspace(5) %"40", align 4
%"50" = inttoptr i64 %"47" to ptr
store i32 %"48", ptr %"50", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call afn float @llvm.cos.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call afn float @llvm.cos.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.cos.f32(float) #1 declare float @llvm.cos.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca double, align 8, addrspace(5) %"40" = alloca float, align 4, addrspace(5)
%"41" = alloca double, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load i64, ptr addrspace(4) %"35", align 4 br label %"52"
store i64 %"41", ptr addrspace(5) %"37", align 4
"52": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4 %"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4 store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
%"49" = inttoptr i64 %"44" to ptr addrspace(1) store i64 %"43", ptr addrspace(5) %"39", align 4
%"43" = load float, ptr addrspace(1) %"49", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
store float %"43", ptr addrspace(5) %"39", align 4 %"50" = inttoptr i64 %"45" to ptr addrspace(1)
%"46" = load float, ptr addrspace(5) %"39", align 4 %"44" = load float, ptr addrspace(1) %"50", align 4
%"45" = fpext float %"46" to double store float %"44", ptr addrspace(5) %"40", align 4
store double %"45", ptr addrspace(5) %"40", align 8 %"47" = load float, ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4 %"46" = fpext float %"47" to double
%"48" = load double, ptr addrspace(5) %"40", align 8 store double %"46", ptr addrspace(5) %"41", align 8
%"50" = inttoptr i64 %"47" to ptr %"48" = load i64, ptr addrspace(5) %"39", align 4
store double %"48", ptr %"50", align 8 %"49" = load double, ptr addrspace(5) %"41", align 8
%"51" = inttoptr i64 %"48" to ptr
store double %"49", ptr %"51", align 8
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca float, align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca float, align 4, addrspace(5) %"44" = alloca float, align 4, addrspace(5)
%"45" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"39", align 4 br label %"64"
store i64 %"45", ptr addrspace(5) %"41", align 4
"64": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
%"59" = inttoptr i64 %"48" to ptr store i64 %"47", ptr addrspace(5) %"43", align 4
%"47" = load float, ptr %"59", align 4 %"49" = load i64, ptr addrspace(5) %"42", align 4
store float %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"60" = inttoptr i64 %"49" to ptr %"60" = inttoptr i64 %"49" to ptr
%"30" = getelementptr inbounds i8, ptr %"60", i64 4 %"48" = load float, ptr %"60", align 4
%"50" = load float, ptr %"30", align 4 store float %"48", ptr addrspace(5) %"44", align 4
store float %"50", ptr addrspace(5) %"44", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = load float, ptr addrspace(5) %"43", align 4 %"61" = inttoptr i64 %"50" to ptr
%2 = call float @llvm.roundeven.f32(float %"52") %"31" = getelementptr inbounds i8, ptr %"61", i64 4
%"51" = freeze float %2 %"51" = load float, ptr %"31", align 4
store float %"51", ptr addrspace(5) %"43", align 4 store float %"51", ptr addrspace(5) %"45", align 4
%"54" = load float, ptr addrspace(5) %"44", align 4 %"53" = load float, ptr addrspace(5) %"44", align 4
%3 = call float @llvm.roundeven.f32(float %"54") %2 = call float @llvm.roundeven.f32(float %"53")
%"53" = freeze float %3 %"52" = freeze float %2
store float %"53", ptr addrspace(5) %"44", align 4 store float %"52", ptr addrspace(5) %"44", align 4
%"55" = load i64, ptr addrspace(5) %"42", align 4 %"55" = load float, ptr addrspace(5) %"45", align 4
%"56" = load float, ptr addrspace(5) %"43", align 4 %3 = call float @llvm.roundeven.f32(float %"55")
%"61" = inttoptr i64 %"55" to ptr %"54" = freeze float %3
store float %"56", ptr %"61", align 4 store float %"54", ptr addrspace(5) %"45", align 4
%"57" = load i64, ptr addrspace(5) %"42", align 4 %"56" = load i64, ptr addrspace(5) %"43", align 4
%"62" = inttoptr i64 %"57" to ptr %"57" = load float, ptr addrspace(5) %"44", align 4
%"32" = getelementptr inbounds i8, ptr %"62", i64 4 %"62" = inttoptr i64 %"56" to ptr
%"58" = load float, ptr addrspace(5) %"44", align 4 store float %"57", ptr %"62", align 4
store float %"58", ptr %"32", align 4 %"58" = load i64, ptr addrspace(5) %"43", align 4
%"63" = inttoptr i64 %"58" to ptr
%"33" = getelementptr inbounds i8, ptr %"63", i64 4
%"59" = load float, ptr addrspace(5) %"45", align 4
store float %"59", ptr %"33", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.roundeven.f32(float) #1 declare float @llvm.roundeven.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca float, align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca float, align 4, addrspace(5) %"44" = alloca float, align 4, addrspace(5)
%"45" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"39", align 4 br label %"64"
store i64 %"45", ptr addrspace(5) %"41", align 4
"64": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
%"59" = inttoptr i64 %"48" to ptr store i64 %"47", ptr addrspace(5) %"43", align 4
%"47" = load float, ptr %"59", align 4 %"49" = load i64, ptr addrspace(5) %"42", align 4
store float %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"60" = inttoptr i64 %"49" to ptr %"60" = inttoptr i64 %"49" to ptr
%"30" = getelementptr inbounds i8, ptr %"60", i64 4 %"48" = load float, ptr %"60", align 4
%"50" = load float, ptr %"30", align 4 store float %"48", ptr addrspace(5) %"44", align 4
store float %"50", ptr addrspace(5) %"44", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = load float, ptr addrspace(5) %"43", align 4 %"61" = inttoptr i64 %"50" to ptr
%2 = call float @llvm.trunc.f32(float %"52") %"31" = getelementptr inbounds i8, ptr %"61", i64 4
%"51" = freeze float %2 %"51" = load float, ptr %"31", align 4
store float %"51", ptr addrspace(5) %"43", align 4 store float %"51", ptr addrspace(5) %"45", align 4
%"54" = load float, ptr addrspace(5) %"44", align 4 %"53" = load float, ptr addrspace(5) %"44", align 4
%3 = call float @llvm.trunc.f32(float %"54") %2 = call float @llvm.trunc.f32(float %"53")
%"53" = freeze float %3 %"52" = freeze float %2
store float %"53", ptr addrspace(5) %"44", align 4 store float %"52", ptr addrspace(5) %"44", align 4
%"55" = load i64, ptr addrspace(5) %"42", align 4 %"55" = load float, ptr addrspace(5) %"45", align 4
%"56" = load float, ptr addrspace(5) %"43", align 4 %3 = call float @llvm.trunc.f32(float %"55")
%"61" = inttoptr i64 %"55" to ptr %"54" = freeze float %3
store float %"56", ptr %"61", align 4 store float %"54", ptr addrspace(5) %"45", align 4
%"57" = load i64, ptr addrspace(5) %"42", align 4 %"56" = load i64, ptr addrspace(5) %"43", align 4
%"62" = inttoptr i64 %"57" to ptr %"57" = load float, ptr addrspace(5) %"44", align 4
%"32" = getelementptr inbounds i8, ptr %"62", i64 4 %"62" = inttoptr i64 %"56" to ptr
%"58" = load float, ptr addrspace(5) %"44", align 4 store float %"57", ptr %"62", align 4
store float %"58", ptr %"32", align 4 %"58" = load i64, ptr addrspace(5) %"43", align 4
%"63" = inttoptr i64 %"58" to ptr
%"33" = getelementptr inbounds i8, ptr %"63", i64 4
%"59" = load float, ptr addrspace(5) %"45", align 4
store float %"59", ptr %"33", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.trunc.f32(float) #1 declare float @llvm.trunc.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,32 +10,36 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5) %"40" = alloca i32, align 4, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load i64, ptr addrspace(4) %"35", align 4 br label %"54"
store i64 %"41", ptr addrspace(5) %"37", align 4
"54": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4 %"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4 store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
%"49" = inttoptr i64 %"44" to ptr addrspace(1) store i64 %"43", ptr addrspace(5) %"39", align 4
%"43" = load i32, ptr addrspace(1) %"49", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"43", ptr addrspace(5) %"40", align 4 %"50" = inttoptr i64 %"45" to ptr addrspace(1)
%"46" = load i32, ptr addrspace(5) %"40", align 4 %"44" = load i32, ptr addrspace(1) %"50", align 4
%2 = trunc i32 %"46" to i8 store i32 %"44", ptr addrspace(5) %"41", align 4
%"50" = sext i8 %2 to i16 %"47" = load i32, ptr addrspace(5) %"41", align 4
%"45" = sext i16 %"50" to i32 %2 = trunc i32 %"47" to i8
store i32 %"45", ptr addrspace(5) %"39", align 4 %"51" = sext i8 %2 to i16
%"47" = load i64, ptr addrspace(5) %"38", align 4 %"46" = sext i16 %"51" to i32
%"48" = load i32, ptr addrspace(5) %"39", align 4 store i32 %"46", ptr addrspace(5) %"40", align 4
%"52" = inttoptr i64 %"47" to ptr %"48" = load i64, ptr addrspace(5) %"39", align 4
store i32 %"48", ptr %"52", align 4 %"49" = load i32, ptr addrspace(5) %"40", align 4
%"53" = inttoptr i64 %"48" to ptr
store i32 %"49", ptr %"53", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5) %"44" = alloca i32, align 4, addrspace(5)
%"45" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"39", align 4 br label %"72"
store i64 %"45", ptr addrspace(5) %"41", align 4
"72": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
%"60" = inttoptr i64 %"48" to ptr store i64 %"47", ptr addrspace(5) %"43", align 4
%"59" = load float, ptr %"60", align 4 %"49" = load i64, ptr addrspace(5) %"42", align 4
%"47" = bitcast float %"59" to i32
store i32 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"61" = inttoptr i64 %"49" to ptr %"61" = inttoptr i64 %"49" to ptr
%"30" = getelementptr inbounds i8, ptr %"61", i64 4 %"60" = load float, ptr %"61", align 4
%"62" = load float, ptr %"30", align 4 %"48" = bitcast float %"60" to i32
%"50" = bitcast float %"62" to i32 store i32 %"48", ptr addrspace(5) %"44", align 4
store i32 %"50", ptr addrspace(5) %"44", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = load i32, ptr addrspace(5) %"43", align 4 %"62" = inttoptr i64 %"50" to ptr
%"64" = bitcast i32 %"52" to float %"31" = getelementptr inbounds i8, ptr %"62", i64 4
%2 = call float @llvm.ceil.f32(float %"64") %"63" = load float, ptr %"31", align 4
%"51" = bitcast float %"63" to i32
store i32 %"51", ptr addrspace(5) %"45", align 4
%"53" = load i32, ptr addrspace(5) %"44", align 4
%"65" = bitcast i32 %"53" to float
%2 = call float @llvm.ceil.f32(float %"65")
%3 = fptosi float %2 to i32 %3 = fptosi float %2 to i32
%"63" = freeze i32 %3 %"64" = freeze i32 %3
store i32 %"63", ptr addrspace(5) %"43", align 4 store i32 %"64", ptr addrspace(5) %"44", align 4
%"54" = load i32, ptr addrspace(5) %"44", align 4 %"55" = load i32, ptr addrspace(5) %"45", align 4
%"66" = bitcast i32 %"54" to float %"67" = bitcast i32 %"55" to float
%4 = call float @llvm.ceil.f32(float %"66") %4 = call float @llvm.ceil.f32(float %"67")
%5 = fptosi float %4 to i32 %5 = fptosi float %4 to i32
%"65" = freeze i32 %5 %"66" = freeze i32 %5
store i32 %"65", ptr addrspace(5) %"44", align 4 store i32 %"66", ptr addrspace(5) %"45", align 4
%"55" = load i64, ptr addrspace(5) %"42", align 4 %"56" = load i64, ptr addrspace(5) %"43", align 4
%"56" = load i32, ptr addrspace(5) %"43", align 4 %"57" = load i32, ptr addrspace(5) %"44", align 4
%"67" = inttoptr i64 %"55" to ptr addrspace(1) %"68" = inttoptr i64 %"56" to ptr addrspace(1)
store i32 %"56", ptr addrspace(1) %"67", align 4 store i32 %"57", ptr addrspace(1) %"68", align 4
%"57" = load i64, ptr addrspace(5) %"42", align 4 %"58" = load i64, ptr addrspace(5) %"43", align 4
%"69" = inttoptr i64 %"57" to ptr addrspace(1) %"70" = inttoptr i64 %"58" to ptr addrspace(1)
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"69", i64 4 %"33" = getelementptr inbounds i8, ptr addrspace(1) %"70", i64 4
%"58" = load i32, ptr addrspace(5) %"44", align 4 %"59" = load i32, ptr addrspace(5) %"45", align 4
store i32 %"58", ptr addrspace(1) %"32", align 4 store i32 %"59", ptr addrspace(1) %"33", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.ceil.f32(float) #1 declare float @llvm.ceil.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i32, align 4, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load i64, ptr addrspace(4) %"35", align 4 br label %"54"
store i64 %"41", ptr addrspace(5) %"37", align 4
"54": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4 %"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4 store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
%"50" = inttoptr i64 %"44" to ptr store i64 %"43", ptr addrspace(5) %"39", align 4
%"49" = load i32, ptr %"50", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"49", ptr addrspace(5) %"39", align 4 %"51" = inttoptr i64 %"45" to ptr
%"46" = load i32, ptr addrspace(5) %"39", align 4 %"50" = load i32, ptr %"51", align 4
%"45" = sext i32 %"46" to i64 store i32 %"50", ptr addrspace(5) %"40", align 4
store i64 %"45", ptr addrspace(5) %"40", align 4 %"47" = load i32, ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4 %"46" = sext i32 %"47" to i64
%"48" = load i64, ptr addrspace(5) %"40", align 4 store i64 %"46", ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"47" to ptr %"48" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"48", ptr %"51", align 4 %"49" = load i64, ptr addrspace(5) %"41", align 4
%"52" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,41 +10,45 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i32, align 4, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"36", align 4 br label %"56"
store i64 %"43", ptr addrspace(5) %"38", align 4
"56": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"37", align 4 %"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4 store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = load i64, ptr addrspace(4) %"38", align 4
%"53" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"40", align 4
%"45" = load i32, ptr %"53", align 4 %"47" = load i64, ptr addrspace(5) %"39", align 4
store i32 %"45", ptr addrspace(5) %"40", align 4 %"54" = inttoptr i64 %"47" to ptr
%"48" = load i32, ptr addrspace(5) %"40", align 4 %"46" = load i32, ptr %"54", align 4
%2 = call i32 @llvm.smax.i32(i32 %"48", i32 0) store i32 %"46", ptr addrspace(5) %"41", align 4
%"49" = load i32, ptr addrspace(5) %"41", align 4
%2 = call i32 @llvm.smax.i32(i32 %"49", i32 0)
%3 = call i32 @llvm.umin.i32(i32 %2, i32 -1) %3 = call i32 @llvm.umin.i32(i32 %2, i32 -1)
store i32 %3, ptr addrspace(5) %"41", align 4 store i32 %3, ptr addrspace(5) %"42", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"50", ptr addrspace(5) %"42", align 4 store i32 %"51", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"39", align 4 %"52" = load i64, ptr addrspace(5) %"40", align 4
%"52" = load i32, ptr addrspace(5) %"42", align 4 %"53" = load i32, ptr addrspace(5) %"43", align 4
%"54" = inttoptr i64 %"51" to ptr %"55" = inttoptr i64 %"52" to ptr
store i32 %"52", ptr %"54", align 4 store i32 %"53", ptr %"55", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.smax.i32(i32, i32) #1 declare i32 @llvm.smax.i32(i32, i32) #2
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.umin.i32(i32, i32) #1 declare i32 @llvm.umin.i32(i32, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"56"
store i64 %"39", ptr addrspace(5) %"36", align 4
"56": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%2 = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"49" = addrspacecast ptr %2 to ptr addrspace(1) %"43" = load i64, ptr addrspace(5) %"37", align 4
store ptr addrspace(1) %"49", ptr addrspace(5) %"36", align 8 %2 = inttoptr i64 %"43" to ptr
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"50" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"44" to ptr store ptr addrspace(1) %"50", ptr addrspace(5) %"37", align 8
%"51" = addrspacecast ptr %3 to ptr addrspace(1) %"45" = load i64, ptr addrspace(5) %"38", align 4
store ptr addrspace(1) %"51", ptr addrspace(5) %"37", align 8 %3 = inttoptr i64 %"45" to ptr
%"46" = load i64, ptr addrspace(5) %"36", align 4 %"52" = addrspacecast ptr %3 to ptr addrspace(1)
%"53" = inttoptr i64 %"46" to ptr addrspace(1) store ptr addrspace(1) %"52", ptr addrspace(5) %"38", align 8
%"45" = load float, ptr addrspace(1) %"53", align 4
store float %"45", ptr addrspace(5) %"38", align 4
%"47" = load i64, ptr addrspace(5) %"37", align 4 %"47" = load i64, ptr addrspace(5) %"37", align 4
%"48" = load float, ptr addrspace(5) %"38", align 4
%"54" = inttoptr i64 %"47" to ptr addrspace(1) %"54" = inttoptr i64 %"47" to ptr addrspace(1)
store float %"48", ptr addrspace(1) %"54", align 4 %"46" = load float, ptr addrspace(1) %"54", align 4
store float %"46", ptr addrspace(5) %"39", align 4
%"48" = load i64, ptr addrspace(5) %"38", align 4
%"49" = load float, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"48" to ptr addrspace(1)
store float %"49", ptr addrspace(1) %"55", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca float, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca float, align 4, addrspace(5) %"42" = alloca float, align 4, addrspace(5)
%"43" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load float, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store float %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load float, ptr %"55", align 4
%"48" = load float, ptr %"30", align 4 store float %"46", ptr addrspace(5) %"42", align 4
store float %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load float, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load float, ptr %"31", align 4
store float %"49", ptr addrspace(5) %"43", align 4
%"51" = load float, ptr addrspace(5) %"42", align 4 %"51" = load float, ptr addrspace(5) %"42", align 4
%"49" = fdiv arcp afn float %"50", %"51" %"52" = load float, ptr addrspace(5) %"43", align 4
store float %"49", ptr addrspace(5) %"41", align 4 %"50" = fdiv arcp afn float %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store float %"50", ptr addrspace(5) %"42", align 4
%"53" = load float, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load float, ptr addrspace(5) %"42", align 4
store float %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store float %"54", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call float @llvm.amdgcn.exp2.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call float @llvm.amdgcn.exp2.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.amdgcn.exp2.f32(float) #1 declare float @llvm.amdgcn.exp2.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -12,30 +12,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"53"
store i64 %"40", ptr addrspace(5) %"37", align 4
"53": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"48" = inttoptr i64 %"43" to ptr addrspace(1)
%"42" = load i64, ptr addrspace(1) %"48", align 4
store i64 %"42", ptr addrspace(5) %"39", align 4 store i64 %"42", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(5) %"39", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
store i64 %"44", ptr addrspace(3) @shared_mem, align 4 %"49" = inttoptr i64 %"44" to ptr addrspace(1)
%"45" = load i64, ptr addrspace(3) @shared_mem, align 4 %"43" = load i64, ptr addrspace(1) %"49", align 4
store i64 %"45", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = load i64, ptr addrspace(5) %"40", align 4
store i64 %"45", ptr addrspace(3) @shared_mem, align 4
%"46" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"46", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4 %"47" = load i64, ptr addrspace(5) %"39", align 4
%"51" = inttoptr i64 %"46" to ptr addrspace(1) %"48" = load i64, ptr addrspace(5) %"40", align 4
store i64 %"47", ptr addrspace(1) %"51", align 4 %"52" = inttoptr i64 %"47" to ptr addrspace(1)
store i64 %"48", ptr addrspace(1) %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,46 +12,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define void @__zluda_ptx_impl_incr_shared_2_global() #0 { define void @incr_shared_2_global() #0 {
%"38" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(3) @shared_mem, align 4 br label %"63"
store i64 %"39", ptr addrspace(5) %"38", align 4
%"41" = load i64, ptr addrspace(5) %"38", align 4 "63": ; preds = %1
%"40" = add i64 %"41", 2 %"40" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"40", ptr addrspace(5) %"38", align 4 store i64 %"40", ptr addrspace(5) %"39", align 4
%"42" = load i64, ptr addrspace(5) %"38", align 4 %"42" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"42", ptr addrspace(3) @shared_mem, align 4 %"41" = add i64 %"42", 2
store i64 %"41", ptr addrspace(5) %"39", align 4
%"43" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"43", ptr addrspace(3) @shared_mem, align 4
ret void ret void
} }
define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"48" = load i64, ptr addrspace(4) %"43", align 4 br label %"64"
store i64 %"48", ptr addrspace(5) %"45", align 4
"64": ; preds = %1
%"49" = load i64, ptr addrspace(4) %"44", align 4 %"49" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"49", ptr addrspace(5) %"46", align 4 store i64 %"49", ptr addrspace(5) %"46", align 4
%"51" = load i64, ptr addrspace(5) %"45", align 4 %"50" = load i64, ptr addrspace(4) %"45", align 4
%"58" = inttoptr i64 %"51" to ptr addrspace(1)
%"50" = load i64, ptr addrspace(1) %"58", align 4
store i64 %"50", ptr addrspace(5) %"47", align 4 store i64 %"50", ptr addrspace(5) %"47", align 4
%"52" = load i64, ptr addrspace(5) %"47", align 4 %"52" = load i64, ptr addrspace(5) %"46", align 4
store i64 %"52", ptr addrspace(3) @shared_mem, align 4 %"59" = inttoptr i64 %"52" to ptr addrspace(1)
call void @__zluda_ptx_impl_incr_shared_2_global() %"51" = load i64, ptr addrspace(1) %"59", align 4
%"53" = load i64, ptr addrspace(3) @shared_mem, align 4 store i64 %"51", ptr addrspace(5) %"48", align 4
store i64 %"53", ptr addrspace(5) %"47", align 4 %"53" = load i64, ptr addrspace(5) %"48", align 4
%"54" = load i64, ptr addrspace(5) %"46", align 4 store i64 %"53", ptr addrspace(3) @shared_mem, align 4
call void @incr_shared_2_global()
%"54" = load i64, ptr addrspace(3) @shared_mem, align 4
store i64 %"54", ptr addrspace(5) %"48", align 4
%"55" = load i64, ptr addrspace(5) %"47", align 4 %"55" = load i64, ptr addrspace(5) %"47", align 4
%"61" = inttoptr i64 %"54" to ptr addrspace(1) %"56" = load i64, ptr addrspace(5) %"48", align 4
store i64 %"55", ptr addrspace(1) %"61", align 4 %"62" = inttoptr i64 %"55" to ptr addrspace(1)
store i64 %"56", ptr addrspace(1) %"62", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,47 +10,51 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 { define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca float, align 4, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca float, align 4, addrspace(5) %"45" = alloca float, align 4, addrspace(5)
%"46" = alloca float, align 4, addrspace(5) %"46" = alloca float, align 4, addrspace(5)
%"47" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"47" = load i64, ptr addrspace(4) %"40", align 4 br label %"66"
store i64 %"47", ptr addrspace(5) %"42", align 4
"66": ; preds = %1
%"48" = load i64, ptr addrspace(4) %"41", align 4 %"48" = load i64, ptr addrspace(4) %"41", align 4
store i64 %"48", ptr addrspace(5) %"43", align 4 store i64 %"48", ptr addrspace(5) %"43", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4 %"49" = load i64, ptr addrspace(4) %"42", align 4
%"61" = inttoptr i64 %"50" to ptr store i64 %"49", ptr addrspace(5) %"44", align 4
%"49" = load float, ptr %"61", align 4 %"51" = load i64, ptr addrspace(5) %"43", align 4
store float %"49", ptr addrspace(5) %"44", align 4
%"51" = load i64, ptr addrspace(5) %"42", align 4
%"62" = inttoptr i64 %"51" to ptr %"62" = inttoptr i64 %"51" to ptr
%"31" = getelementptr inbounds i8, ptr %"62", i64 4 %"50" = load float, ptr %"62", align 4
%"52" = load float, ptr %"31", align 4 store float %"50", ptr addrspace(5) %"45", align 4
store float %"52", ptr addrspace(5) %"45", align 4 %"52" = load i64, ptr addrspace(5) %"43", align 4
%"53" = load i64, ptr addrspace(5) %"42", align 4 %"63" = inttoptr i64 %"52" to ptr
%"63" = inttoptr i64 %"53" to ptr %"32" = getelementptr inbounds i8, ptr %"63", i64 4
%"33" = getelementptr inbounds i8, ptr %"63", i64 8 %"53" = load float, ptr %"32", align 4
%"54" = load float, ptr %"33", align 4 store float %"53", ptr addrspace(5) %"46", align 4
store float %"54", ptr addrspace(5) %"46", align 4 %"54" = load i64, ptr addrspace(5) %"43", align 4
%"56" = load float, ptr addrspace(5) %"44", align 4 %"64" = inttoptr i64 %"54" to ptr
%"34" = getelementptr inbounds i8, ptr %"64", i64 8
%"55" = load float, ptr %"34", align 4
store float %"55", ptr addrspace(5) %"47", align 4
%"57" = load float, ptr addrspace(5) %"45", align 4 %"57" = load float, ptr addrspace(5) %"45", align 4
%"58" = load float, ptr addrspace(5) %"46", align 4 %"58" = load float, ptr addrspace(5) %"46", align 4
%"55" = call float @llvm.fma.f32(float %"56", float %"57", float %"58") %"59" = load float, ptr addrspace(5) %"47", align 4
store float %"55", ptr addrspace(5) %"44", align 4 %"56" = call float @llvm.fma.f32(float %"57", float %"58", float %"59")
%"59" = load i64, ptr addrspace(5) %"43", align 4 store float %"56", ptr addrspace(5) %"45", align 4
%"60" = load float, ptr addrspace(5) %"44", align 4 %"60" = load i64, ptr addrspace(5) %"44", align 4
%"64" = inttoptr i64 %"59" to ptr %"61" = load float, ptr addrspace(5) %"45", align 4
store float %"60", ptr %"64", align 4 %"65" = inttoptr i64 %"60" to ptr
store float %"61", ptr %"65", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.fma.f32(float, float, float) #1 declare float @llvm.fma.f32(float, float, float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -12,25 +12,29 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"37", align 4 br label %"50"
%"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 "50": ; preds = %1
%"43" = load i64, ptr addrspace(5) %"37", align 4 store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"38", align 4
%"47" = inttoptr i64 %"43" to ptr addrspace(1) %"42" = load i64, ptr addrspace(4) %"37", align 4
%"42" = load i32, ptr addrspace(1) %"47", align 4 store i64 %"42", ptr addrspace(5) %"39", align 4
store i32 %"42", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
%"45" = load i32, ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"44" to ptr addrspace(1) %"48" = inttoptr i64 %"44" to ptr addrspace(1)
store i32 %"45", ptr addrspace(1) %"48", align 4 %"43" = load i32, ptr addrspace(1) %"48", align 4
store i32 %"43", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(5) %"39", align 4
%"46" = load i32, ptr addrspace(5) %"40", align 4
%"49" = inttoptr i64 %"45" to ptr addrspace(1)
store i32 %"46", ptr addrspace(1) %"49", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,26 +10,30 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"48"
store i64 %"39", ptr addrspace(5) %"36", align 4
"48": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"45" = inttoptr i64 %"42" to ptr
%"41" = load i64, ptr %"45", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"38", align 4
%"46" = inttoptr i64 %"43" to ptr %"46" = inttoptr i64 %"43" to ptr
store i64 %"44", ptr %"46", align 4 %"42" = load i64, ptr %"46", align 4
store i64 %"42", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"39", align 4
%"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,31 +10,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"52"
store i64 %"40", ptr addrspace(5) %"37", align 4
"52": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
store i64 81985529216486895, ptr addrspace(5) %"39", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 store i64 %"42", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"44" to ptr addrspace(1) store i64 81985529216486895, ptr addrspace(5) %"40", align 4
%"47" = load float, ptr addrspace(1) %"48", align 4
%2 = bitcast float %"47" to i32
%"43" = zext i32 %2 to i64
store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"45" to ptr addrspace(1) %"49" = inttoptr i64 %"45" to ptr addrspace(1)
%3 = trunc i64 %"46" to i32 %"48" = load float, ptr addrspace(1) %"49", align 4
%"50" = bitcast i32 %3 to float %2 = bitcast float %"48" to i32
store float %"50", ptr addrspace(1) %"49", align 4 %"44" = zext i32 %2 to i64
store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4
%"50" = inttoptr i64 %"46" to ptr addrspace(1)
%3 = trunc i64 %"47" to i32
%"51" = bitcast i32 %3 to float
store float %"51", ptr addrspace(1) %"50", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,37 +10,41 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5) %"44" = alloca i32, align 4, addrspace(5)
%"45" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"39", align 4 br label %"60"
store i64 %"45", ptr addrspace(5) %"41", align 4
"60": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"41", align 4 %"47" = load i64, ptr addrspace(4) %"41", align 4
%"55" = inttoptr i64 %"48" to ptr store i64 %"47", ptr addrspace(5) %"43", align 4
%"47" = load i32, ptr %"55", align 4 %"49" = load i64, ptr addrspace(5) %"42", align 4
store i32 %"47", ptr addrspace(5) %"43", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"49" to ptr %"56" = inttoptr i64 %"49" to ptr
%"30" = getelementptr inbounds i8, ptr %"56", i64 4 %"48" = load i32, ptr %"56", align 4
%"50" = load i32, ptr %"30", align 4 store i32 %"48", ptr addrspace(5) %"44", align 4
store i32 %"50", ptr addrspace(5) %"44", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"51" = load i64, ptr addrspace(5) %"42", align 4 %"57" = inttoptr i64 %"50" to ptr
%"52" = load i32, ptr addrspace(5) %"44", align 4 %"31" = getelementptr inbounds i8, ptr %"57", i64 4
%"57" = inttoptr i64 %"51" to ptr %"51" = load i32, ptr %"31", align 4
store i32 %"52", ptr %"57", align 4 store i32 %"51", ptr addrspace(5) %"45", align 4
%"53" = load i64, ptr addrspace(5) %"42", align 4 %"52" = load i64, ptr addrspace(5) %"43", align 4
%"58" = inttoptr i64 %"53" to ptr %"53" = load i32, ptr addrspace(5) %"45", align 4
%"32" = getelementptr inbounds i8, ptr %"58", i64 4 %"58" = inttoptr i64 %"52" to ptr
%"54" = load i32, ptr addrspace(5) %"43", align 4 store i32 %"53", ptr %"58", align 4
store i32 %"54", ptr %"32", align 4 %"54" = load i64, ptr addrspace(5) %"43", align 4
%"59" = inttoptr i64 %"54" to ptr
%"33" = getelementptr inbounds i8, ptr %"59", i64 4
%"55" = load i32, ptr addrspace(5) %"44", align 4
store i32 %"55", ptr %"33", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call float @llvm.amdgcn.log.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call float @llvm.amdgcn.log.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.amdgcn.log.f32(float) #1 declare float @llvm.amdgcn.log.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"9" = alloca [8 x i8], align 8, addrspace(5) %"10" = alloca [8 x i8], align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"49"
store i64 %"40", ptr addrspace(5) %"37", align 4
"49": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"46" = inttoptr i64 %"43" to ptr
%"42" = load i64, ptr %"46", align 4
store i64 %"42", ptr addrspace(5) %"39", align 4 store i64 %"42", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
%"45" = load i64, ptr addrspace(5) %"39", align 4
%"47" = inttoptr i64 %"44" to ptr %"47" = inttoptr i64 %"44" to ptr
store i64 %"45", ptr %"47", align 4 %"43" = load i64, ptr %"47", align 4
store i64 %"43", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"40", align 4
%"48" = inttoptr i64 %"45" to ptr
store i64 %"46", ptr %"48", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
%"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i32, align 4, addrspace(5) %"49" = alloca i64, align 8, addrspace(5)
%"50" = alloca i32, align 4, addrspace(5) %"50" = alloca i32, align 4, addrspace(5)
%"51" = alloca i32, align 4, addrspace(5) %"51" = alloca i32, align 4, addrspace(5)
%"52" = alloca i32, align 4, addrspace(5) %"52" = alloca i32, align 4, addrspace(5)
%"53" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"53" = load i64, ptr addrspace(4) %"45", align 4 br label %"78"
store i64 %"53", ptr addrspace(5) %"47", align 4
"78": ; preds = %1
%"54" = load i64, ptr addrspace(4) %"46", align 4 %"54" = load i64, ptr addrspace(4) %"46", align 4
store i64 %"54", ptr addrspace(5) %"48", align 4 store i64 %"54", ptr addrspace(5) %"48", align 4
%"56" = load i64, ptr addrspace(5) %"47", align 4 %"55" = load i64, ptr addrspace(4) %"47", align 4
%"71" = inttoptr i64 %"56" to ptr store i64 %"55", ptr addrspace(5) %"49", align 4
%"55" = load i32, ptr %"71", align 4 %"57" = load i64, ptr addrspace(5) %"48", align 4
store i32 %"55", ptr addrspace(5) %"50", align 4
%"57" = load i64, ptr addrspace(5) %"47", align 4
%"72" = inttoptr i64 %"57" to ptr %"72" = inttoptr i64 %"57" to ptr
%"32" = getelementptr inbounds i8, ptr %"72", i64 4 %"56" = load i32, ptr %"72", align 4
%"58" = load i32, ptr %"32", align 4 store i32 %"56", ptr addrspace(5) %"51", align 4
store i32 %"58", ptr addrspace(5) %"51", align 4 %"58" = load i64, ptr addrspace(5) %"48", align 4
%"59" = load i64, ptr addrspace(5) %"47", align 4 %"73" = inttoptr i64 %"58" to ptr
%"73" = inttoptr i64 %"59" to ptr %"33" = getelementptr inbounds i8, ptr %"73", i64 4
%"34" = getelementptr inbounds i8, ptr %"73", i64 8 %"59" = load i32, ptr %"33", align 4
%"60" = load i32, ptr %"34", align 4 store i32 %"59", ptr addrspace(5) %"52", align 4
store i32 %"60", ptr addrspace(5) %"52", align 4 %"60" = load i64, ptr addrspace(5) %"48", align 4
%"62" = load i32, ptr addrspace(5) %"50", align 4 %"74" = inttoptr i64 %"60" to ptr
%"35" = getelementptr inbounds i8, ptr %"74", i64 8
%"61" = load i32, ptr %"35", align 4
store i32 %"61", ptr addrspace(5) %"53", align 4
%"63" = load i32, ptr addrspace(5) %"51", align 4 %"63" = load i32, ptr addrspace(5) %"51", align 4
%"64" = load i32, ptr addrspace(5) %"52", align 4 %"64" = load i32, ptr addrspace(5) %"52", align 4
%2 = mul i32 %"62", %"63" %"65" = load i32, ptr addrspace(5) %"53", align 4
%"61" = add i32 %2, %"64" %2 = mul i32 %"63", %"64"
store i32 %"61", ptr addrspace(5) %"49", align 4 %"62" = add i32 %2, %"65"
%"65" = load i64, ptr addrspace(5) %"48", align 4 store i32 %"62", ptr addrspace(5) %"50", align 4
%"66" = load i32, ptr addrspace(5) %"49", align 4 %"66" = load i64, ptr addrspace(5) %"49", align 4
%"74" = inttoptr i64 %"65" to ptr %"67" = load i32, ptr addrspace(5) %"50", align 4
store i32 %"66", ptr %"74", align 4 %"75" = inttoptr i64 %"66" to ptr
%"67" = load i64, ptr addrspace(5) %"48", align 4 store i32 %"67", ptr %"75", align 4
%"75" = inttoptr i64 %"67" to ptr %"68" = load i64, ptr addrspace(5) %"49", align 4
%"36" = getelementptr inbounds i8, ptr %"75", i64 4 %"76" = inttoptr i64 %"68" to ptr
%"68" = load i32, ptr addrspace(5) %"49", align 4 %"37" = getelementptr inbounds i8, ptr %"76", i64 4
store i32 %"68", ptr %"36", align 4 %"69" = load i32, ptr addrspace(5) %"50", align 4
%"69" = load i64, ptr addrspace(5) %"48", align 4 store i32 %"69", ptr %"37", align 4
%"76" = inttoptr i64 %"69" to ptr %"70" = load i64, ptr addrspace(5) %"49", align 4
%"38" = getelementptr inbounds i8, ptr %"76", i64 8 %"77" = inttoptr i64 %"70" to ptr
%"70" = load i32, ptr addrspace(5) %"49", align 4 %"39" = getelementptr inbounds i8, ptr %"77", i64 8
store i32 %"70", ptr %"38", align 4 %"71" = load i32, ptr addrspace(5) %"50", align 4
store i32 %"71", ptr %"39", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -0,0 +1,53 @@
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
br label %1
1: ; preds = %0
br label %"57"
"57": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4
%"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4
br label %"10"
"58": ; No predecessors!
%"47" = load i64, ptr addrspace(5) %"41", align 4
%"54" = inttoptr i64 %"47" to ptr
%"46" = load i64, ptr %"54", align 4
store i64 %"46", ptr addrspace(5) %"42", align 4
br label %"10"
"10": ; preds = %"58", %"57"
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"55" = inttoptr i64 %"49" to ptr
%"48" = load i64, ptr %"55", align 4
store i64 %"48", ptr addrspace(5) %"42", align 4
%"51" = load i64, ptr addrspace(5) %"42", align 4
%"50" = add i64 %"51", 1
store i64 %"50", ptr addrspace(5) %"43", align 4
%"52" = load i64, ptr addrspace(5) %"41", align 4
%"53" = load i64, ptr addrspace(5) %"43", align 4
%"56" = inttoptr i64 %"52" to ptr
store i64 %"53", ptr %"56", align 4
ret void
}
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,40 +10,44 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%"49" = call i32 @llvm.smax.i32(i32 %"50", i32 %"51") %"52" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"49", ptr addrspace(5) %"41", align 4 %"50" = call i32 @llvm.smax.i32(i32 %"51", i32 %"52")
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"50", ptr addrspace(5) %"42", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"57", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.smax.i32(i32, i32) #1 declare i32 @llvm.smax.i32(i32, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"49"
store i64 %"39", ptr addrspace(5) %"36", align 4
"49": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"46" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"45" = load i32, ptr %"46", align 4
store i32 %"45", ptr addrspace(5) %"38", align 4
fence seq_cst
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
%"44" = load i32, ptr addrspace(5) %"38", align 4
%"47" = inttoptr i64 %"43" to ptr %"47" = inttoptr i64 %"43" to ptr
store i32 %"44", ptr %"47", align 4 %"46" = load i32, ptr %"47", align 4
store i32 %"46", ptr addrspace(5) %"39", align 4
fence seq_cst
%"44" = load i64, ptr addrspace(5) %"38", align 4
%"45" = load i32, ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"44" to ptr
store i32 %"45", ptr %"48", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,40 +10,44 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%"49" = call i32 @llvm.smin.i32(i32 %"50", i32 %"51") %"52" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"49", ptr addrspace(5) %"41", align 4 %"50" = call i32 @llvm.smin.i32(i32 %"51", i32 %"52")
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"50", ptr addrspace(5) %"42", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"57", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.smin.i32(i32, i32) #1 declare i32 @llvm.smin.i32(i32, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load i64, ptr addrspace(4) %"35", align 4 br label %"52"
store i64 %"41", ptr addrspace(5) %"37", align 4
"52": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4 %"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4 store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
%"49" = inttoptr i64 %"44" to ptr
%"43" = load i64, ptr %"49", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
store i64 %"46", ptr addrspace(5) %"40", align 4 %"50" = inttoptr i64 %"45" to ptr
%"47" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr %"50", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"50" = inttoptr i64 %"47" to ptr %"47" = load i64, ptr addrspace(5) %"40", align 4
store i64 %"48", ptr %"50", align 4 store i64 %"47", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"51", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,15 +10,19 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 { define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #1 {
%"9" = alloca [8 x i8], align 1, addrspace(5) %"10" = alloca [8 x i8], align 1, addrspace(5)
%"35" = alloca i64, align 8, addrspace(5) %"36" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"37" = ptrtoint ptr addrspace(5) %"9" to i64 br label %"39"
store i64 %"37", ptr addrspace(5) %"35", align 4
"39": ; preds = %1
%"38" = ptrtoint ptr addrspace(5) %"10" to i64
store i64 %"38", ptr addrspace(5) %"36", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,34 +10,38 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i32, align 4, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr store i64 %"44", ptr addrspace(5) %"40", align 4
%"44" = load i32, ptr %"50", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
store i32 %"44", ptr addrspace(5) %"40", align 4 %"51" = inttoptr i64 %"46" to ptr
%"47" = load i32, ptr addrspace(5) %"40", align 4 %"45" = load i32, ptr %"51", align 4
%"46" = call i32 @llvm.amdgcn.mul.u24(i32 %"47", i32 2) store i32 %"45", ptr addrspace(5) %"41", align 4
store i32 %"46", ptr addrspace(5) %"41", align 4 %"48" = load i32, ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"47" = call i32 @llvm.amdgcn.mul.u24(i32 %"48", i32 2)
%"49" = load i32, ptr addrspace(5) %"41", align 4 store i32 %"47", ptr addrspace(5) %"42", align 4
%"51" = inttoptr i64 %"48" to ptr %"49" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"49", ptr %"51", align 4 %"50" = load i32, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr
store i32 %"50", ptr %"52", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1 declare i32 @llvm.amdgcn.mul.u24(i32, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca float, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca float, align 4, addrspace(5) %"42" = alloca float, align 4, addrspace(5)
%"43" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load float, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store float %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load float, ptr %"55", align 4
%"48" = load float, ptr %"30", align 4 store float %"46", ptr addrspace(5) %"42", align 4
store float %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load float, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load float, ptr %"31", align 4
store float %"49", ptr addrspace(5) %"43", align 4
%"51" = load float, ptr addrspace(5) %"42", align 4 %"51" = load float, ptr addrspace(5) %"42", align 4
%"49" = fmul float %"50", %"51" %"52" = load float, ptr addrspace(5) %"43", align 4
store float %"49", ptr addrspace(5) %"41", align 4 %"50" = fmul float %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store float %"50", ptr addrspace(5) %"42", align 4
%"53" = load float, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load float, ptr addrspace(5) %"42", align 4
store float %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store float %"54", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%2 = zext i64 %"47" to i128 %"51" = inttoptr i64 %"46" to ptr
%"45" = load i64, ptr %"51", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"41", align 4
%2 = zext i64 %"48" to i128
%3 = mul i128 %2, 2 %3 = mul i128 %2, 2
%4 = lshr i128 %3, 64 %4 = lshr i128 %3, 64
%"46" = trunc i128 %4 to i64 %"47" = trunc i128 %4 to i64
store i64 %"46", ptr addrspace(5) %"41", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"49" = load i64, ptr addrspace(5) %"40", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"51" = inttoptr i64 %"48" to ptr %"52" = inttoptr i64 %"49" to ptr
store i64 %"49", ptr %"51", align 4 store i64 %"50", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%"46" = mul i64 %"47", 2 %"51" = inttoptr i64 %"46" to ptr
store i64 %"46", ptr addrspace(5) %"41", align 4 %"45" = load i64, ptr %"51", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr %"47" = mul i64 %"48", 2
store i64 %"49", ptr %"51", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca float, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca float, align 4, addrspace(5) %"42" = alloca float, align 4, addrspace(5)
%"43" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load float, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store float %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load float, ptr %"55", align 4
%"48" = load float, ptr %"30", align 4 store float %"46", ptr addrspace(5) %"42", align 4
store float %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load float, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load float, ptr %"31", align 4
store float %"49", ptr addrspace(5) %"43", align 4
%"51" = load float, ptr addrspace(5) %"42", align 4 %"51" = load float, ptr addrspace(5) %"42", align 4
%"49" = fmul float %"50", %"51" %"52" = load float, ptr addrspace(5) %"43", align 4
store float %"49", ptr addrspace(5) %"41", align 4 %"50" = fmul float %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store float %"50", ptr addrspace(5) %"42", align 4
%"53" = load float, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load float, ptr addrspace(5) %"42", align 4
store float %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store float %"54", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,39 +10,43 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5) %"43" = alloca i32, align 4, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i32, align 4, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"38", align 4 br label %"61"
store i64 %"45", ptr addrspace(5) %"40", align 4
"61": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"39", align 4 %"46" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"46", ptr addrspace(5) %"41", align 4 store i64 %"46", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4 %"47" = load i64, ptr addrspace(4) %"40", align 4
%"56" = inttoptr i64 %"48" to ptr addrspace(1) store i64 %"47", ptr addrspace(5) %"42", align 4
%"47" = load i32, ptr addrspace(1) %"56", align 4 %"49" = load i64, ptr addrspace(5) %"41", align 4
store i32 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"57" = inttoptr i64 %"49" to ptr addrspace(1) %"57" = inttoptr i64 %"49" to ptr addrspace(1)
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"57", i64 4 %"48" = load i32, ptr addrspace(1) %"57", align 4
%"50" = load i32, ptr addrspace(1) %"31", align 4 store i32 %"48", ptr addrspace(5) %"43", align 4
store i32 %"50", ptr addrspace(5) %"43", align 4 %"50" = load i64, ptr addrspace(5) %"41", align 4
%"52" = load i32, ptr addrspace(5) %"42", align 4 %"58" = inttoptr i64 %"50" to ptr addrspace(1)
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"58", i64 4
%"51" = load i32, ptr addrspace(1) %"32", align 4
store i32 %"51", ptr addrspace(5) %"44", align 4
%"53" = load i32, ptr addrspace(5) %"43", align 4 %"53" = load i32, ptr addrspace(5) %"43", align 4
%2 = sext i32 %"52" to i64 %"54" = load i32, ptr addrspace(5) %"44", align 4
%3 = sext i32 %"53" to i64 %2 = sext i32 %"53" to i64
%"51" = mul i64 %2, %3 %3 = sext i32 %"54" to i64
store i64 %"51", ptr addrspace(5) %"44", align 4 %"52" = mul i64 %2, %3
%"54" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"52", ptr addrspace(5) %"45", align 4
%"55" = load i64, ptr addrspace(5) %"44", align 4 %"55" = load i64, ptr addrspace(5) %"42", align 4
%"58" = inttoptr i64 %"54" to ptr %"56" = load i64, ptr addrspace(5) %"45", align 4
store i64 %"55", ptr %"58", align 4 %"59" = inttoptr i64 %"55" to ptr
store i64 %"56", ptr %"59", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,29 +10,33 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load i32, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store i32 %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load i32, ptr addrspace(5) %"38", align 4 %"42" = load i32, ptr %"48", align 4
%"43" = sub i32 0, %"44" store i32 %"42", ptr addrspace(5) %"39", align 4
store i32 %"43", ptr addrspace(5) %"38", align 4 %"45" = load i32, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = sub i32 0, %"45"
%"46" = load i32, ptr addrspace(5) %"38", align 4 store i32 %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"46", ptr %"48", align 4 %"47" = load i32, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store i32 %"47", ptr %"49", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,35 +10,39 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5) %"43" = alloca i32, align 4, addrspace(5)
%"44" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"44" = load i64, ptr addrspace(4) %"38", align 4 br label %"57"
store i64 %"44", ptr addrspace(5) %"40", align 4
"57": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"46" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
%"54" = inttoptr i64 %"46" to ptr addrspace(1) store i64 %"46", ptr addrspace(5) %"42", align 4
%"31" = getelementptr inbounds i8, ptr addrspace(1) %"54", i64 8 %"47" = load i64, ptr addrspace(5) %"41", align 4
%"29" = load <2 x i32>, ptr addrspace(1) %"31", align 8 %"55" = inttoptr i64 %"47" to ptr addrspace(1)
%"47" = extractelement <2 x i32> %"29", i8 0 %"32" = getelementptr inbounds i8, ptr addrspace(1) %"55", i64 8
%"48" = extractelement <2 x i32> %"29", i8 1 %"30" = load <2 x i32>, ptr addrspace(1) %"32", align 8
store i32 %"47", ptr addrspace(5) %"42", align 4 %"48" = extractelement <2 x i32> %"30", i8 0
%"49" = extractelement <2 x i32> %"30", i8 1
store i32 %"48", ptr addrspace(5) %"43", align 4 store i32 %"48", ptr addrspace(5) %"43", align 4
%"50" = load i32, ptr addrspace(5) %"42", align 4 store i32 %"49", ptr addrspace(5) %"44", align 4
%"51" = load i32, ptr addrspace(5) %"43", align 4 %"51" = load i32, ptr addrspace(5) %"43", align 4
%"49" = add i32 %"50", %"51" %"52" = load i32, ptr addrspace(5) %"44", align 4
store i32 %"49", ptr addrspace(5) %"42", align 4 %"50" = add i32 %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"41", align 4 store i32 %"50", ptr addrspace(5) %"43", align 4
%"53" = load i32, ptr addrspace(5) %"42", align 4 %"53" = load i64, ptr addrspace(5) %"42", align 4
%"55" = inttoptr i64 %"52" to ptr addrspace(1) %"54" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"53", ptr addrspace(1) %"55", align 4 %"56" = inttoptr i64 %"53" to ptr addrspace(1)
store i32 %"54", ptr addrspace(1) %"56", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"41" = load i64, ptr addrspace(4) %"35", align 4 br label %"54"
store i64 %"41", ptr addrspace(5) %"37", align 4
"54": ; preds = %1
%"42" = load i64, ptr addrspace(4) %"36", align 4 %"42" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"42", ptr addrspace(5) %"38", align 4 store i64 %"42", ptr addrspace(5) %"38", align 4
%"44" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
%"49" = inttoptr i64 %"44" to ptr
%"43" = load i64, ptr %"49", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(5) %"38", align 4
%"50" = xor i64 %"46", -1 %"50" = inttoptr i64 %"45" to ptr
store i64 %"50", ptr addrspace(5) %"40", align 4 %"44" = load i64, ptr %"50", align 4
%"47" = load i64, ptr addrspace(5) %"38", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
%"52" = inttoptr i64 %"47" to ptr %"51" = xor i64 %"47", -1
store i64 %"48", ptr %"52", align 4 store i64 %"51", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4
%"53" = inttoptr i64 %"48" to ptr
store i64 %"49", ptr %"53", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"56"
store i64 %"43", ptr addrspace(5) %"39", align 4
"56": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"53" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"53", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4 %"54" = inttoptr i64 %"47" to ptr
%"30" = call i32 @__zluda_ptx_impl_sreg_ntid(i8 0) %"46" = load i32, ptr %"54", align 4
store i32 %"30", ptr addrspace(5) %"42", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
%"49" = load i32, ptr addrspace(5) %"41", align 4 %"31" = call i32 @__zluda_ptx_impl_sreg_ntid(i8 0)
store i32 %"31", ptr addrspace(5) %"43", align 4
%"50" = load i32, ptr addrspace(5) %"42", align 4 %"50" = load i32, ptr addrspace(5) %"42", align 4
%"48" = add i32 %"49", %"50" %"51" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"48", ptr addrspace(5) %"41", align 4 %"49" = add i32 %"50", %"51"
%"51" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"49", ptr addrspace(5) %"42", align 4
%"52" = load i32, ptr addrspace(5) %"41", align 4 %"52" = load i64, ptr addrspace(5) %"41", align 4
%"54" = inttoptr i64 %"51" to ptr %"53" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"52", ptr %"54", align 4 %"55" = inttoptr i64 %"52" to ptr
store i32 %"53", ptr %"55", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"61"
store i64 %"43", ptr addrspace(5) %"39", align 4
"61": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr
%"45" = load i64, ptr %"54", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 8 %"46" = load i64, ptr %"55", align 4
%"48" = load i64, ptr %"30", align 4 store i64 %"46", ptr addrspace(5) %"42", align 4
store i64 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 8
%"49" = load i64, ptr %"31", align 4
store i64 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i64, ptr addrspace(5) %"42", align 4 %"51" = load i64, ptr addrspace(5) %"42", align 4
%"56" = or i64 %"50", %"51" %"52" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"56", ptr addrspace(5) %"41", align 4 %"57" = or i64 %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i64 %"57", ptr addrspace(5) %"42", align 4
%"53" = load i64, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"59" = inttoptr i64 %"52" to ptr %"54" = load i64, ptr addrspace(5) %"42", align 4
store i64 %"53", ptr %"59", align 4 %"60" = inttoptr i64 %"53" to ptr
store i64 %"54", ptr %"60", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"51"
store i64 %"39", ptr addrspace(5) %"36", align 4
"51": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load i32, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store i32 %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load i32, ptr addrspace(5) %"38", align 4 %"42" = load i32, ptr %"48", align 4
%"48" = call i32 @llvm.ctpop.i32(i32 %"44") store i32 %"42", ptr addrspace(5) %"39", align 4
store i32 %"48", ptr addrspace(5) %"38", align 4 %"45" = load i32, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"49" = call i32 @llvm.ctpop.i32(i32 %"45")
%"46" = load i32, ptr addrspace(5) %"38", align 4 store i32 %"49", ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"46", ptr %"49", align 4 %"47" = load i32, ptr addrspace(5) %"39", align 4
%"50" = inttoptr i64 %"46" to ptr
store i32 %"47", ptr %"50", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare i32 @llvm.ctpop.i32(i32) #1 declare i32 @llvm.ctpop.i32(i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,57 +10,61 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
%"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5) %"49" = alloca i64, align 8, addrspace(5)
%"50" = alloca i64, align 8, addrspace(5) %"50" = alloca i64, align 8, addrspace(5)
%"51" = alloca i64, align 8, addrspace(5) %"51" = alloca i64, align 8, addrspace(5)
%"52" = alloca i1, align 1, addrspace(5) %"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"53" = load i64, ptr addrspace(4) %"45", align 4 br label %"74"
store i64 %"53", ptr addrspace(5) %"47", align 4
"74": ; preds = %1
%"54" = load i64, ptr addrspace(4) %"46", align 4 %"54" = load i64, ptr addrspace(4) %"46", align 4
store i64 %"54", ptr addrspace(5) %"48", align 4 store i64 %"54", ptr addrspace(5) %"48", align 4
%"56" = load i64, ptr addrspace(5) %"47", align 4 %"55" = load i64, ptr addrspace(4) %"47", align 4
%"70" = inttoptr i64 %"56" to ptr
%"55" = load i64, ptr %"70", align 4
store i64 %"55", ptr addrspace(5) %"49", align 4 store i64 %"55", ptr addrspace(5) %"49", align 4
%"57" = load i64, ptr addrspace(5) %"47", align 4 %"57" = load i64, ptr addrspace(5) %"48", align 4
%"71" = inttoptr i64 %"57" to ptr %"71" = inttoptr i64 %"57" to ptr
%"36" = getelementptr inbounds i8, ptr %"71", i64 8 %"56" = load i64, ptr %"71", align 4
%"58" = load i64, ptr %"36", align 4 store i64 %"56", ptr addrspace(5) %"50", align 4
store i64 %"58", ptr addrspace(5) %"50", align 4 %"58" = load i64, ptr addrspace(5) %"48", align 4
%"60" = load i64, ptr addrspace(5) %"49", align 4 %"72" = inttoptr i64 %"58" to ptr
%"37" = getelementptr inbounds i8, ptr %"72", i64 8
%"59" = load i64, ptr %"37", align 4
store i64 %"59", ptr addrspace(5) %"51", align 4
%"61" = load i64, ptr addrspace(5) %"50", align 4 %"61" = load i64, ptr addrspace(5) %"50", align 4
%"59" = icmp ult i64 %"60", %"61" %"62" = load i64, ptr addrspace(5) %"51", align 4
store i1 %"59", ptr addrspace(5) %"52", align 1 %"60" = icmp ult i64 %"61", %"62"
%"63" = load i1, ptr addrspace(5) %"52", align 1 store i1 %"60", ptr addrspace(5) %"53", align 1
%"62" = xor i1 %"63", true %"64" = load i1, ptr addrspace(5) %"53", align 1
store i1 %"62", ptr addrspace(5) %"52", align 1 %"63" = xor i1 %"64", true
%"64" = load i1, ptr addrspace(5) %"52", align 1 store i1 %"63", ptr addrspace(5) %"53", align 1
br i1 %"64", label %"15", label %"16" %"65" = load i1, ptr addrspace(5) %"53", align 1
br i1 %"65", label %"16", label %"17"
"15": ; preds = %1 "16": ; preds = %"74"
store i64 1, ptr addrspace(5) %"51", align 4 store i64 1, ptr addrspace(5) %"52", align 4
br label %"16" br label %"17"
"16": ; preds = %"15", %1 "17": ; preds = %"16", %"74"
%"66" = load i1, ptr addrspace(5) %"52", align 1 %"67" = load i1, ptr addrspace(5) %"53", align 1
br i1 %"66", label %"18", label %"17" br i1 %"67", label %"19", label %"18"
"17": ; preds = %"16" "18": ; preds = %"17"
store i64 2, ptr addrspace(5) %"51", align 4 store i64 2, ptr addrspace(5) %"52", align 4
br label %"18" br label %"19"
"18": ; preds = %"17", %"16" "19": ; preds = %"18", %"17"
%"68" = load i64, ptr addrspace(5) %"48", align 4 %"69" = load i64, ptr addrspace(5) %"49", align 4
%"69" = load i64, ptr addrspace(5) %"51", align 4 %"70" = load i64, ptr addrspace(5) %"52", align 4
%"72" = inttoptr i64 %"68" to ptr %"73" = inttoptr i64 %"69" to ptr
store i64 %"69", ptr %"72", align 4 store i64 %"70", ptr %"73", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,38 +10,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"61"
store i64 %"43", ptr addrspace(5) %"39", align 4
"61": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%2 = bitcast i32 %"50" to <4 x i8> %"52" = load i32, ptr addrspace(5) %"43", align 4
%3 = bitcast i32 %"51" to <4 x i8> %2 = bitcast i32 %"51" to <4 x i8>
%"56" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7> %3 = bitcast i32 %"52" to <4 x i8>
store <4 x i8> %"56", ptr addrspace(5) %"42", align 4 %"57" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
%"52" = load i64, ptr addrspace(5) %"40", align 4 store <4 x i8> %"57", ptr addrspace(5) %"43", align 4
%"53" = load i32, ptr addrspace(5) %"42", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"59" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"53", ptr %"59", align 4 %"60" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"60", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call float @llvm.amdgcn.rcp.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call float @llvm.amdgcn.rcp.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.amdgcn.rcp.f32(float) #1 declare float @llvm.amdgcn.rcp.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 { define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
%"9" = alloca [8 x i8], align 8, addrspace(5) %"10" = alloca [8 x i8], align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"41", align 4 br label %"63"
store i64 %"46", ptr addrspace(5) %"43", align 4
"63": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"42", align 4 %"47" = load i64, ptr addrspace(4) %"42", align 4
store i64 %"47", ptr addrspace(5) %"44", align 4 store i64 %"47", ptr addrspace(5) %"44", align 4
%"49" = load i64, ptr addrspace(5) %"43", align 4 %"48" = load i64, ptr addrspace(4) %"43", align 4
%"55" = inttoptr i64 %"49" to ptr addrspace(1) store i64 %"48", ptr addrspace(5) %"45", align 4
%"54" = load i64, ptr addrspace(1) %"55", align 4 %"50" = load i64, ptr addrspace(5) %"44", align 4
store i64 %"54", ptr addrspace(5) %"45", align 4 %"56" = inttoptr i64 %"50" to ptr addrspace(1)
%"50" = load i64, ptr addrspace(5) %"45", align 4 %"55" = load i64, ptr addrspace(1) %"56", align 4
%"30" = add i64 %"50", 1 store i64 %"55", ptr addrspace(5) %"46", align 4
%"56" = addrspacecast ptr addrspace(5) %"9" to ptr %"51" = load i64, ptr addrspace(5) %"46", align 4
store i64 %"30", ptr %"56", align 4 %"31" = add i64 %"51", 1
%"58" = addrspacecast ptr addrspace(5) %"9" to ptr %"57" = addrspacecast ptr addrspace(5) %"10" to ptr
%"32" = getelementptr inbounds i8, ptr %"58", i64 0 store i64 %"31", ptr %"57", align 4
%"59" = load i64, ptr %"32", align 4 %"59" = addrspacecast ptr addrspace(5) %"10" to ptr
store i64 %"59", ptr addrspace(5) %"45", align 4 %"33" = getelementptr inbounds i8, ptr %"59", i64 0
%"52" = load i64, ptr addrspace(5) %"44", align 4 %"60" = load i64, ptr %"33", align 4
%"60" = inttoptr i64 %"52" to ptr addrspace(1) store i64 %"60", ptr addrspace(5) %"46", align 4
%"34" = getelementptr inbounds i8, ptr addrspace(1) %"60", i64 0
%"53" = load i64, ptr addrspace(5) %"45", align 4 %"53" = load i64, ptr addrspace(5) %"45", align 4
store i64 %"53", ptr addrspace(1) %"34", align 4 %"61" = inttoptr i64 %"53" to ptr addrspace(1)
%"35" = getelementptr inbounds i8, ptr addrspace(1) %"61", i64 0
%"54" = load i64, ptr addrspace(5) %"46", align 4
store i64 %"54", ptr addrspace(1) %"35", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%"49" = srem i32 %"50", %"51" %"52" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"49", ptr addrspace(5) %"41", align 4 %"50" = srem i32 %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"50", ptr addrspace(5) %"42", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca double, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca double, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load double, ptr %"47", align 8 %"43" = load i64, ptr addrspace(5) %"37", align 4
store double %"41", ptr addrspace(5) %"38", align 8 %"48" = inttoptr i64 %"43" to ptr
%"44" = load double, ptr addrspace(5) %"38", align 8 %"42" = load double, ptr %"48", align 8
%"43" = call double @llvm.amdgcn.rsq.f64(double %"44") store double %"42", ptr addrspace(5) %"39", align 8
store double %"43", ptr addrspace(5) %"38", align 8 %"45" = load double, ptr addrspace(5) %"39", align 8
%"45" = load i64, ptr addrspace(5) %"37", align 4 call void @llvm.amdgcn.s.setreg(i32 2433, i32 3)
%"46" = load double, ptr addrspace(5) %"38", align 8 %"44" = call double @llvm.amdgcn.rsq.f64(double %"45")
%"48" = inttoptr i64 %"45" to ptr store double %"44", ptr addrspace(5) %"39", align 8
store double %"46", ptr %"48", align 8 %"46" = load i64, ptr addrspace(5) %"38", align 4
%"47" = load double, ptr addrspace(5) %"39", align 8
%"49" = inttoptr i64 %"46" to ptr
store double %"47", ptr %"49", align 8
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind willreturn
declare double @llvm.amdgcn.rsq.f64(double) #1 declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } declare double @llvm.amdgcn.rsq.f64(double) #3
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind willreturn }
attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i16, align 2, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i16, align 2, addrspace(5) %"43" = alloca i16, align 2, addrspace(5)
%"44" = alloca i16, align 2, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"44" = load i64, ptr addrspace(4) %"38", align 4 br label %"59"
store i64 %"44", ptr addrspace(5) %"40", align 4
"59": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
%"55" = inttoptr i64 %"47" to ptr store i64 %"46", ptr addrspace(5) %"42", align 4
%"46" = load i16, ptr %"55", align 2 %"48" = load i64, ptr addrspace(5) %"41", align 4
store i16 %"46", ptr addrspace(5) %"42", align 2
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"56" = inttoptr i64 %"48" to ptr %"56" = inttoptr i64 %"48" to ptr
%"30" = getelementptr inbounds i8, ptr %"56", i64 2 %"47" = load i16, ptr %"56", align 2
%"49" = load i16, ptr %"30", align 2 store i16 %"47", ptr addrspace(5) %"43", align 2
store i16 %"49", ptr addrspace(5) %"43", align 2 %"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = load i16, ptr addrspace(5) %"42", align 2 %"57" = inttoptr i64 %"49" to ptr
%"31" = getelementptr inbounds i8, ptr %"57", i64 2
%"50" = load i16, ptr %"31", align 2
store i16 %"50", ptr addrspace(5) %"44", align 2
%"52" = load i16, ptr addrspace(5) %"43", align 2 %"52" = load i16, ptr addrspace(5) %"43", align 2
%"50" = select i1 false, i16 %"51", i16 %"52" %"53" = load i16, ptr addrspace(5) %"44", align 2
store i16 %"50", ptr addrspace(5) %"42", align 2 %"51" = select i1 false, i16 %"52", i16 %"53"
%"53" = load i64, ptr addrspace(5) %"41", align 4 store i16 %"51", ptr addrspace(5) %"43", align 2
%"54" = load i16, ptr addrspace(5) %"42", align 2 %"54" = load i64, ptr addrspace(5) %"42", align 4
%"57" = inttoptr i64 %"53" to ptr %"55" = load i16, ptr addrspace(5) %"43", align 2
store i16 %"54", ptr %"57", align 2 %"58" = inttoptr i64 %"54" to ptr
store i16 %"55", ptr %"58", align 2
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i16, align 2, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i16, align 2, addrspace(5) %"43" = alloca i16, align 2, addrspace(5)
%"44" = alloca i16, align 2, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"44" = load i64, ptr addrspace(4) %"38", align 4 br label %"59"
store i64 %"44", ptr addrspace(5) %"40", align 4
"59": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"45", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(4) %"40", align 4
%"55" = inttoptr i64 %"47" to ptr store i64 %"46", ptr addrspace(5) %"42", align 4
%"46" = load i16, ptr %"55", align 2 %"48" = load i64, ptr addrspace(5) %"41", align 4
store i16 %"46", ptr addrspace(5) %"42", align 2
%"48" = load i64, ptr addrspace(5) %"40", align 4
%"56" = inttoptr i64 %"48" to ptr %"56" = inttoptr i64 %"48" to ptr
%"30" = getelementptr inbounds i8, ptr %"56", i64 2 %"47" = load i16, ptr %"56", align 2
%"49" = load i16, ptr %"30", align 2 store i16 %"47", ptr addrspace(5) %"43", align 2
store i16 %"49", ptr addrspace(5) %"43", align 2 %"49" = load i64, ptr addrspace(5) %"41", align 4
%"51" = load i16, ptr addrspace(5) %"42", align 2 %"57" = inttoptr i64 %"49" to ptr
%"31" = getelementptr inbounds i8, ptr %"57", i64 2
%"50" = load i16, ptr %"31", align 2
store i16 %"50", ptr addrspace(5) %"44", align 2
%"52" = load i16, ptr addrspace(5) %"43", align 2 %"52" = load i16, ptr addrspace(5) %"43", align 2
%"50" = select i1 true, i16 %"51", i16 %"52" %"53" = load i16, ptr addrspace(5) %"44", align 2
store i16 %"50", ptr addrspace(5) %"42", align 2 %"51" = select i1 true, i16 %"52", i16 %"53"
%"53" = load i64, ptr addrspace(5) %"41", align 4 store i16 %"51", ptr addrspace(5) %"43", align 2
%"54" = load i16, ptr addrspace(5) %"42", align 2 %"54" = load i64, ptr addrspace(5) %"42", align 4
%"57" = inttoptr i64 %"53" to ptr %"55" = load i16, ptr addrspace(5) %"43", align 2
store i16 %"54", ptr %"57", align 2 %"58" = inttoptr i64 %"54" to ptr
store i16 %"55", ptr %"58", align 2
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,54 +10,58 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #0 { define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
%"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5) %"49" = alloca i64, align 8, addrspace(5)
%"50" = alloca i64, align 8, addrspace(5) %"50" = alloca i64, align 8, addrspace(5)
%"51" = alloca i64, align 8, addrspace(5) %"51" = alloca i64, align 8, addrspace(5)
%"52" = alloca i1, align 1, addrspace(5) %"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"53" = load i64, ptr addrspace(4) %"45", align 4 br label %"72"
store i64 %"53", ptr addrspace(5) %"47", align 4
"72": ; preds = %1
%"54" = load i64, ptr addrspace(4) %"46", align 4 %"54" = load i64, ptr addrspace(4) %"46", align 4
store i64 %"54", ptr addrspace(5) %"48", align 4 store i64 %"54", ptr addrspace(5) %"48", align 4
%"56" = load i64, ptr addrspace(5) %"47", align 4 %"55" = load i64, ptr addrspace(4) %"47", align 4
%"68" = inttoptr i64 %"56" to ptr
%"55" = load i64, ptr %"68", align 4
store i64 %"55", ptr addrspace(5) %"49", align 4 store i64 %"55", ptr addrspace(5) %"49", align 4
%"57" = load i64, ptr addrspace(5) %"47", align 4 %"57" = load i64, ptr addrspace(5) %"48", align 4
%"69" = inttoptr i64 %"57" to ptr %"69" = inttoptr i64 %"57" to ptr
%"36" = getelementptr inbounds i8, ptr %"69", i64 8 %"56" = load i64, ptr %"69", align 4
%"58" = load i64, ptr %"36", align 4 store i64 %"56", ptr addrspace(5) %"50", align 4
store i64 %"58", ptr addrspace(5) %"50", align 4 %"58" = load i64, ptr addrspace(5) %"48", align 4
%"60" = load i64, ptr addrspace(5) %"49", align 4 %"70" = inttoptr i64 %"58" to ptr
%"37" = getelementptr inbounds i8, ptr %"70", i64 8
%"59" = load i64, ptr %"37", align 4
store i64 %"59", ptr addrspace(5) %"51", align 4
%"61" = load i64, ptr addrspace(5) %"50", align 4 %"61" = load i64, ptr addrspace(5) %"50", align 4
%"59" = icmp ult i64 %"60", %"61" %"62" = load i64, ptr addrspace(5) %"51", align 4
store i1 %"59", ptr addrspace(5) %"52", align 1 %"60" = icmp ult i64 %"61", %"62"
%"62" = load i1, ptr addrspace(5) %"52", align 1 store i1 %"60", ptr addrspace(5) %"53", align 1
br i1 %"62", label %"15", label %"16" %"63" = load i1, ptr addrspace(5) %"53", align 1
br i1 %"63", label %"16", label %"17"
"15": ; preds = %1 "16": ; preds = %"72"
store i64 1, ptr addrspace(5) %"51", align 4 store i64 1, ptr addrspace(5) %"52", align 4
br label %"16" br label %"17"
"16": ; preds = %"15", %1 "17": ; preds = %"16", %"72"
%"64" = load i1, ptr addrspace(5) %"52", align 1 %"65" = load i1, ptr addrspace(5) %"53", align 1
br i1 %"64", label %"18", label %"17" br i1 %"65", label %"19", label %"18"
"17": ; preds = %"16" "18": ; preds = %"17"
store i64 2, ptr addrspace(5) %"51", align 4 store i64 2, ptr addrspace(5) %"52", align 4
br label %"18" br label %"19"
"18": ; preds = %"17", %"16" "19": ; preds = %"18", %"17"
%"66" = load i64, ptr addrspace(5) %"48", align 4 %"67" = load i64, ptr addrspace(5) %"49", align 4
%"67" = load i64, ptr addrspace(5) %"51", align 4 %"68" = load i64, ptr addrspace(5) %"52", align 4
%"70" = inttoptr i64 %"66" to ptr %"71" = inttoptr i64 %"67" to ptr
store i64 %"67", ptr %"70", align 4 store i64 %"68", ptr %"71", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,56 +10,60 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca float, align 4, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca float, align 4, addrspace(5) %"48" = alloca float, align 4, addrspace(5)
%"49" = alloca float, align 4, addrspace(5) %"49" = alloca float, align 4, addrspace(5)
%"50" = alloca i1, align 1, addrspace(5) %"50" = alloca float, align 4, addrspace(5)
%"51" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"51" = load i64, ptr addrspace(4) %"43", align 4 br label %"72"
store i64 %"51", ptr addrspace(5) %"45", align 4
"72": ; preds = %1
%"52" = load i64, ptr addrspace(4) %"44", align 4 %"52" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"52", ptr addrspace(5) %"46", align 4 store i64 %"52", ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr addrspace(5) %"45", align 4 %"53" = load i64, ptr addrspace(4) %"45", align 4
%"68" = inttoptr i64 %"54" to ptr store i64 %"53", ptr addrspace(5) %"47", align 4
%"53" = load float, ptr %"68", align 4 %"55" = load i64, ptr addrspace(5) %"46", align 4
store float %"53", ptr addrspace(5) %"47", align 4
%"55" = load i64, ptr addrspace(5) %"45", align 4
%"69" = inttoptr i64 %"55" to ptr %"69" = inttoptr i64 %"55" to ptr
%"36" = getelementptr inbounds i8, ptr %"69", i64 4 %"54" = load float, ptr %"69", align 4
%"56" = load float, ptr %"36", align 4 store float %"54", ptr addrspace(5) %"48", align 4
store float %"56", ptr addrspace(5) %"48", align 4 %"56" = load i64, ptr addrspace(5) %"46", align 4
%"58" = load float, ptr addrspace(5) %"47", align 4 %"70" = inttoptr i64 %"56" to ptr
%"37" = getelementptr inbounds i8, ptr %"70", i64 4
%"57" = load float, ptr %"37", align 4
store float %"57", ptr addrspace(5) %"49", align 4
%"59" = load float, ptr addrspace(5) %"48", align 4 %"59" = load float, ptr addrspace(5) %"48", align 4
%"57" = fcmp ogt float %"58", %"59" %"60" = load float, ptr addrspace(5) %"49", align 4
store i1 %"57", ptr addrspace(5) %"50", align 1 %"58" = fcmp ogt float %"59", %"60"
%"60" = load i1, ptr addrspace(5) %"50", align 1 store i1 %"58", ptr addrspace(5) %"51", align 1
br i1 %"60", label %"15", label %"16" %"61" = load i1, ptr addrspace(5) %"51", align 1
br i1 %"61", label %"16", label %"17"
"15": ; preds = %1 "16": ; preds = %"72"
%"62" = load float, ptr addrspace(5) %"47", align 4 %"63" = load float, ptr addrspace(5) %"48", align 4
store float %"62", ptr addrspace(5) %"49", align 4 store float %"63", ptr addrspace(5) %"50", align 4
br label %"16" br label %"17"
"16": ; preds = %"15", %1 "17": ; preds = %"16", %"72"
%"63" = load i1, ptr addrspace(5) %"50", align 1 %"64" = load i1, ptr addrspace(5) %"51", align 1
br i1 %"63", label %"18", label %"17" br i1 %"64", label %"19", label %"18"
"17": ; preds = %"16" "18": ; preds = %"17"
%"65" = load float, ptr addrspace(5) %"48", align 4 %"66" = load float, ptr addrspace(5) %"49", align 4
store float %"65", ptr addrspace(5) %"49", align 4 store float %"66", ptr addrspace(5) %"50", align 4
br label %"18" br label %"19"
"18": ; preds = %"17", %"16" "19": ; preds = %"18", %"17"
%"66" = load i64, ptr addrspace(5) %"46", align 4 %"67" = load i64, ptr addrspace(5) %"47", align 4
%"67" = load float, ptr addrspace(5) %"49", align 4 %"68" = load float, ptr addrspace(5) %"50", align 4
%"70" = inttoptr i64 %"66" to ptr %"71" = inttoptr i64 %"67" to ptr
store float %"67", ptr %"70", align 4 store float %"68", ptr %"71", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,56 +10,60 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #0 { define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
%"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca float, align 4, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca float, align 4, addrspace(5) %"48" = alloca float, align 4, addrspace(5)
%"49" = alloca float, align 4, addrspace(5) %"49" = alloca float, align 4, addrspace(5)
%"50" = alloca i1, align 1, addrspace(5) %"50" = alloca float, align 4, addrspace(5)
%"51" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"51" = load i64, ptr addrspace(4) %"43", align 4 br label %"72"
store i64 %"51", ptr addrspace(5) %"45", align 4
"72": ; preds = %1
%"52" = load i64, ptr addrspace(4) %"44", align 4 %"52" = load i64, ptr addrspace(4) %"44", align 4
store i64 %"52", ptr addrspace(5) %"46", align 4 store i64 %"52", ptr addrspace(5) %"46", align 4
%"54" = load i64, ptr addrspace(5) %"45", align 4 %"53" = load i64, ptr addrspace(4) %"45", align 4
%"68" = inttoptr i64 %"54" to ptr store i64 %"53", ptr addrspace(5) %"47", align 4
%"53" = load float, ptr %"68", align 4 %"55" = load i64, ptr addrspace(5) %"46", align 4
store float %"53", ptr addrspace(5) %"47", align 4
%"55" = load i64, ptr addrspace(5) %"45", align 4
%"69" = inttoptr i64 %"55" to ptr %"69" = inttoptr i64 %"55" to ptr
%"36" = getelementptr inbounds i8, ptr %"69", i64 4 %"54" = load float, ptr %"69", align 4
%"56" = load float, ptr %"36", align 4 store float %"54", ptr addrspace(5) %"48", align 4
store float %"56", ptr addrspace(5) %"48", align 4 %"56" = load i64, ptr addrspace(5) %"46", align 4
%"58" = load float, ptr addrspace(5) %"47", align 4 %"70" = inttoptr i64 %"56" to ptr
%"37" = getelementptr inbounds i8, ptr %"70", i64 4
%"57" = load float, ptr %"37", align 4
store float %"57", ptr addrspace(5) %"49", align 4
%"59" = load float, ptr addrspace(5) %"48", align 4 %"59" = load float, ptr addrspace(5) %"48", align 4
%"57" = fcmp ule float %"58", %"59" %"60" = load float, ptr addrspace(5) %"49", align 4
store i1 %"57", ptr addrspace(5) %"50", align 1 %"58" = fcmp ule float %"59", %"60"
%"60" = load i1, ptr addrspace(5) %"50", align 1 store i1 %"58", ptr addrspace(5) %"51", align 1
br i1 %"60", label %"15", label %"16" %"61" = load i1, ptr addrspace(5) %"51", align 1
br i1 %"61", label %"16", label %"17"
"15": ; preds = %1 "16": ; preds = %"72"
%"62" = load float, ptr addrspace(5) %"47", align 4 %"63" = load float, ptr addrspace(5) %"48", align 4
store float %"62", ptr addrspace(5) %"49", align 4 store float %"63", ptr addrspace(5) %"50", align 4
br label %"16" br label %"17"
"16": ; preds = %"15", %1 "17": ; preds = %"16", %"72"
%"63" = load i1, ptr addrspace(5) %"50", align 1 %"64" = load i1, ptr addrspace(5) %"51", align 1
br i1 %"63", label %"18", label %"17" br i1 %"64", label %"19", label %"18"
"17": ; preds = %"16" "18": ; preds = %"17"
%"65" = load float, ptr addrspace(5) %"48", align 4 %"66" = load float, ptr addrspace(5) %"49", align 4
store float %"65", ptr addrspace(5) %"49", align 4 store float %"66", ptr addrspace(5) %"50", align 4
br label %"18" br label %"19"
"18": ; preds = %"17", %"16" "19": ; preds = %"18", %"17"
%"66" = load i64, ptr addrspace(5) %"46", align 4 %"67" = load i64, ptr addrspace(5) %"47", align 4
%"67" = load float, ptr addrspace(5) %"49", align 4 %"68" = load float, ptr addrspace(5) %"50", align 4
%"70" = inttoptr i64 %"66" to ptr %"71" = inttoptr i64 %"67" to ptr
store float %"67", ptr %"70", align 4 store float %"68", ptr %"71", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,10 +10,9 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"87", ptr addrspace(4) byref(i64) %"88") #0 { define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"88", ptr addrspace(4) byref(i64) %"89") #1 {
%"89" = alloca i64, align 8, addrspace(5)
%"90" = alloca i64, align 8, addrspace(5) %"90" = alloca i64, align 8, addrspace(5)
%"91" = alloca float, align 4, addrspace(5) %"91" = alloca i64, align 8, addrspace(5)
%"92" = alloca float, align 4, addrspace(5) %"92" = alloca float, align 4, addrspace(5)
%"93" = alloca float, align 4, addrspace(5) %"93" = alloca float, align 4, addrspace(5)
%"94" = alloca float, align 4, addrspace(5) %"94" = alloca float, align 4, addrspace(5)
@ -21,154 +20,159 @@ define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"87", ptr addrs
%"96" = alloca float, align 4, addrspace(5) %"96" = alloca float, align 4, addrspace(5)
%"97" = alloca float, align 4, addrspace(5) %"97" = alloca float, align 4, addrspace(5)
%"98" = alloca float, align 4, addrspace(5) %"98" = alloca float, align 4, addrspace(5)
%"99" = alloca i32, align 4, addrspace(5) %"99" = alloca float, align 4, addrspace(5)
%"100" = alloca i1, align 1, addrspace(5) %"100" = alloca i32, align 4, addrspace(5)
%"101" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"101" = load i64, ptr addrspace(4) %"87", align 4 br label %"168"
store i64 %"101", ptr addrspace(5) %"89", align 4
"168": ; preds = %1
%"102" = load i64, ptr addrspace(4) %"88", align 4 %"102" = load i64, ptr addrspace(4) %"88", align 4
store i64 %"102", ptr addrspace(5) %"90", align 4 store i64 %"102", ptr addrspace(5) %"90", align 4
%"104" = load i64, ptr addrspace(5) %"89", align 4 %"103" = load i64, ptr addrspace(4) %"89", align 4
%"155" = inttoptr i64 %"104" to ptr store i64 %"103", ptr addrspace(5) %"91", align 4
%"103" = load float, ptr %"155", align 4 %"105" = load i64, ptr addrspace(5) %"90", align 4
store float %"103", ptr addrspace(5) %"91", align 4
%"105" = load i64, ptr addrspace(5) %"89", align 4
%"156" = inttoptr i64 %"105" to ptr %"156" = inttoptr i64 %"105" to ptr
%"54" = getelementptr inbounds i8, ptr %"156", i64 4 %"104" = load float, ptr %"156", align 4
%"106" = load float, ptr %"54", align 4 store float %"104", ptr addrspace(5) %"92", align 4
store float %"106", ptr addrspace(5) %"92", align 4 %"106" = load i64, ptr addrspace(5) %"90", align 4
%"107" = load i64, ptr addrspace(5) %"89", align 4 %"157" = inttoptr i64 %"106" to ptr
%"157" = inttoptr i64 %"107" to ptr %"55" = getelementptr inbounds i8, ptr %"157", i64 4
%"56" = getelementptr inbounds i8, ptr %"157", i64 8 %"107" = load float, ptr %"55", align 4
%"108" = load float, ptr %"56", align 4 store float %"107", ptr addrspace(5) %"93", align 4
store float %"108", ptr addrspace(5) %"93", align 4 %"108" = load i64, ptr addrspace(5) %"90", align 4
%"109" = load i64, ptr addrspace(5) %"89", align 4 %"158" = inttoptr i64 %"108" to ptr
%"158" = inttoptr i64 %"109" to ptr %"57" = getelementptr inbounds i8, ptr %"158", i64 8
%"58" = getelementptr inbounds i8, ptr %"158", i64 12 %"109" = load float, ptr %"57", align 4
%"110" = load float, ptr %"58", align 4 store float %"109", ptr addrspace(5) %"94", align 4
store float %"110", ptr addrspace(5) %"94", align 4 %"110" = load i64, ptr addrspace(5) %"90", align 4
%"111" = load i64, ptr addrspace(5) %"89", align 4 %"159" = inttoptr i64 %"110" to ptr
%"159" = inttoptr i64 %"111" to ptr %"59" = getelementptr inbounds i8, ptr %"159", i64 12
%"60" = getelementptr inbounds i8, ptr %"159", i64 16 %"111" = load float, ptr %"59", align 4
%"112" = load float, ptr %"60", align 4 store float %"111", ptr addrspace(5) %"95", align 4
store float %"112", ptr addrspace(5) %"95", align 4 %"112" = load i64, ptr addrspace(5) %"90", align 4
%"113" = load i64, ptr addrspace(5) %"89", align 4 %"160" = inttoptr i64 %"112" to ptr
%"160" = inttoptr i64 %"113" to ptr %"61" = getelementptr inbounds i8, ptr %"160", i64 16
%"62" = getelementptr inbounds i8, ptr %"160", i64 20 %"113" = load float, ptr %"61", align 4
%"114" = load float, ptr %"62", align 4 store float %"113", ptr addrspace(5) %"96", align 4
store float %"114", ptr addrspace(5) %"96", align 4 %"114" = load i64, ptr addrspace(5) %"90", align 4
%"115" = load i64, ptr addrspace(5) %"89", align 4 %"161" = inttoptr i64 %"114" to ptr
%"161" = inttoptr i64 %"115" to ptr %"63" = getelementptr inbounds i8, ptr %"161", i64 20
%"64" = getelementptr inbounds i8, ptr %"161", i64 24 %"115" = load float, ptr %"63", align 4
%"116" = load float, ptr %"64", align 4 store float %"115", ptr addrspace(5) %"97", align 4
store float %"116", ptr addrspace(5) %"97", align 4 %"116" = load i64, ptr addrspace(5) %"90", align 4
%"117" = load i64, ptr addrspace(5) %"89", align 4 %"162" = inttoptr i64 %"116" to ptr
%"162" = inttoptr i64 %"117" to ptr %"65" = getelementptr inbounds i8, ptr %"162", i64 24
%"66" = getelementptr inbounds i8, ptr %"162", i64 28 %"117" = load float, ptr %"65", align 4
%"118" = load float, ptr %"66", align 4 store float %"117", ptr addrspace(5) %"98", align 4
store float %"118", ptr addrspace(5) %"98", align 4 %"118" = load i64, ptr addrspace(5) %"90", align 4
%"120" = load float, ptr addrspace(5) %"91", align 4 %"163" = inttoptr i64 %"118" to ptr
%"67" = getelementptr inbounds i8, ptr %"163", i64 28
%"119" = load float, ptr %"67", align 4
store float %"119", ptr addrspace(5) %"99", align 4
%"121" = load float, ptr addrspace(5) %"92", align 4 %"121" = load float, ptr addrspace(5) %"92", align 4
%"119" = fcmp uno float %"120", %"121" %"122" = load float, ptr addrspace(5) %"93", align 4
store i1 %"119", ptr addrspace(5) %"100", align 1 %"120" = fcmp uno float %"121", %"122"
%"122" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"120", ptr addrspace(5) %"101", align 1
br i1 %"122", label %"21", label %"22" %"123" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"123", label %"22", label %"23"
"21": ; preds = %1 "22": ; preds = %"168"
store i32 1, ptr addrspace(5) %"99", align 4 store i32 1, ptr addrspace(5) %"100", align 4
br label %"22" br label %"23"
"22": ; preds = %"21", %1 "23": ; preds = %"22", %"168"
%"124" = load i1, ptr addrspace(5) %"100", align 1 %"125" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"124", label %"24", label %"23" br i1 %"125", label %"25", label %"24"
"23": ; preds = %"22" "24": ; preds = %"23"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"24" br label %"25"
"24": ; preds = %"23", %"22" "25": ; preds = %"24", %"23"
%"126" = load i64, ptr addrspace(5) %"90", align 4 %"127" = load i64, ptr addrspace(5) %"91", align 4
%"127" = load i32, ptr addrspace(5) %"99", align 4 %"128" = load i32, ptr addrspace(5) %"100", align 4
%"163" = inttoptr i64 %"126" to ptr %"164" = inttoptr i64 %"127" to ptr
store i32 %"127", ptr %"163", align 4 store i32 %"128", ptr %"164", align 4
%"129" = load float, ptr addrspace(5) %"93", align 4
%"130" = load float, ptr addrspace(5) %"94", align 4 %"130" = load float, ptr addrspace(5) %"94", align 4
%"128" = fcmp uno float %"129", %"130" %"131" = load float, ptr addrspace(5) %"95", align 4
store i1 %"128", ptr addrspace(5) %"100", align 1 %"129" = fcmp uno float %"130", %"131"
%"131" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"129", ptr addrspace(5) %"101", align 1
br i1 %"131", label %"25", label %"26" %"132" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"132", label %"26", label %"27"
"25": ; preds = %"24" "26": ; preds = %"25"
store i32 1, ptr addrspace(5) %"99", align 4 store i32 1, ptr addrspace(5) %"100", align 4
br label %"26" br label %"27"
"26": ; preds = %"25", %"24" "27": ; preds = %"26", %"25"
%"133" = load i1, ptr addrspace(5) %"100", align 1 %"134" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"133", label %"28", label %"27" br i1 %"134", label %"29", label %"28"
"27": ; preds = %"26" "28": ; preds = %"27"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"28" br label %"29"
"28": ; preds = %"27", %"26" "29": ; preds = %"28", %"27"
%"135" = load i64, ptr addrspace(5) %"90", align 4 %"136" = load i64, ptr addrspace(5) %"91", align 4
%"164" = inttoptr i64 %"135" to ptr %"165" = inttoptr i64 %"136" to ptr
%"72" = getelementptr inbounds i8, ptr %"164", i64 4 %"73" = getelementptr inbounds i8, ptr %"165", i64 4
%"136" = load i32, ptr addrspace(5) %"99", align 4 %"137" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"136", ptr %"72", align 4 store i32 %"137", ptr %"73", align 4
%"138" = load float, ptr addrspace(5) %"95", align 4
%"139" = load float, ptr addrspace(5) %"96", align 4 %"139" = load float, ptr addrspace(5) %"96", align 4
%"137" = fcmp uno float %"138", %"139" %"140" = load float, ptr addrspace(5) %"97", align 4
store i1 %"137", ptr addrspace(5) %"100", align 1 %"138" = fcmp uno float %"139", %"140"
%"140" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"138", ptr addrspace(5) %"101", align 1
br i1 %"140", label %"29", label %"30" %"141" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"141", label %"30", label %"31"
"29": ; preds = %"28" "30": ; preds = %"29"
store i32 1, ptr addrspace(5) %"99", align 4 store i32 1, ptr addrspace(5) %"100", align 4
br label %"30" br label %"31"
"30": ; preds = %"29", %"28" "31": ; preds = %"30", %"29"
%"142" = load i1, ptr addrspace(5) %"100", align 1 %"143" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"142", label %"32", label %"31" br i1 %"143", label %"33", label %"32"
"31": ; preds = %"30" "32": ; preds = %"31"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"32" br label %"33"
"32": ; preds = %"31", %"30" "33": ; preds = %"32", %"31"
%"144" = load i64, ptr addrspace(5) %"90", align 4 %"145" = load i64, ptr addrspace(5) %"91", align 4
%"165" = inttoptr i64 %"144" to ptr %"166" = inttoptr i64 %"145" to ptr
%"76" = getelementptr inbounds i8, ptr %"165", i64 8 %"77" = getelementptr inbounds i8, ptr %"166", i64 8
%"145" = load i32, ptr addrspace(5) %"99", align 4 %"146" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"145", ptr %"76", align 4 store i32 %"146", ptr %"77", align 4
%"147" = load float, ptr addrspace(5) %"97", align 4
%"148" = load float, ptr addrspace(5) %"98", align 4 %"148" = load float, ptr addrspace(5) %"98", align 4
%"146" = fcmp uno float %"147", %"148" %"149" = load float, ptr addrspace(5) %"99", align 4
store i1 %"146", ptr addrspace(5) %"100", align 1 %"147" = fcmp uno float %"148", %"149"
%"149" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"147", ptr addrspace(5) %"101", align 1
br i1 %"149", label %"33", label %"34" %"150" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"150", label %"34", label %"35"
"33": ; preds = %"32" "34": ; preds = %"33"
store i32 1, ptr addrspace(5) %"99", align 4 store i32 1, ptr addrspace(5) %"100", align 4
br label %"34" br label %"35"
"34": ; preds = %"33", %"32" "35": ; preds = %"34", %"33"
%"151" = load i1, ptr addrspace(5) %"100", align 1 %"152" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"151", label %"36", label %"35" br i1 %"152", label %"37", label %"36"
"35": ; preds = %"34" "36": ; preds = %"35"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"36" br label %"37"
"36": ; preds = %"35", %"34" "37": ; preds = %"36", %"35"
%"153" = load i64, ptr addrspace(5) %"90", align 4 %"154" = load i64, ptr addrspace(5) %"91", align 4
%"166" = inttoptr i64 %"153" to ptr %"167" = inttoptr i64 %"154" to ptr
%"80" = getelementptr inbounds i8, ptr %"166", i64 12 %"81" = getelementptr inbounds i8, ptr %"167", i64 12
%"154" = load i32, ptr addrspace(5) %"99", align 4 %"155" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"154", ptr %"80", align 4 store i32 %"155", ptr %"81", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,10 +10,9 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"87", ptr addrspace(4) byref(i64) %"88") #0 { define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"88", ptr addrspace(4) byref(i64) %"89") #1 {
%"89" = alloca i64, align 8, addrspace(5)
%"90" = alloca i64, align 8, addrspace(5) %"90" = alloca i64, align 8, addrspace(5)
%"91" = alloca float, align 4, addrspace(5) %"91" = alloca i64, align 8, addrspace(5)
%"92" = alloca float, align 4, addrspace(5) %"92" = alloca float, align 4, addrspace(5)
%"93" = alloca float, align 4, addrspace(5) %"93" = alloca float, align 4, addrspace(5)
%"94" = alloca float, align 4, addrspace(5) %"94" = alloca float, align 4, addrspace(5)
@ -21,154 +20,159 @@ define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"87", ptr addrs
%"96" = alloca float, align 4, addrspace(5) %"96" = alloca float, align 4, addrspace(5)
%"97" = alloca float, align 4, addrspace(5) %"97" = alloca float, align 4, addrspace(5)
%"98" = alloca float, align 4, addrspace(5) %"98" = alloca float, align 4, addrspace(5)
%"99" = alloca i32, align 4, addrspace(5) %"99" = alloca float, align 4, addrspace(5)
%"100" = alloca i1, align 1, addrspace(5) %"100" = alloca i32, align 4, addrspace(5)
%"101" = alloca i1, align 1, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"101" = load i64, ptr addrspace(4) %"87", align 4 br label %"168"
store i64 %"101", ptr addrspace(5) %"89", align 4
"168": ; preds = %1
%"102" = load i64, ptr addrspace(4) %"88", align 4 %"102" = load i64, ptr addrspace(4) %"88", align 4
store i64 %"102", ptr addrspace(5) %"90", align 4 store i64 %"102", ptr addrspace(5) %"90", align 4
%"104" = load i64, ptr addrspace(5) %"89", align 4 %"103" = load i64, ptr addrspace(4) %"89", align 4
%"155" = inttoptr i64 %"104" to ptr store i64 %"103", ptr addrspace(5) %"91", align 4
%"103" = load float, ptr %"155", align 4 %"105" = load i64, ptr addrspace(5) %"90", align 4
store float %"103", ptr addrspace(5) %"91", align 4
%"105" = load i64, ptr addrspace(5) %"89", align 4
%"156" = inttoptr i64 %"105" to ptr %"156" = inttoptr i64 %"105" to ptr
%"54" = getelementptr inbounds i8, ptr %"156", i64 4 %"104" = load float, ptr %"156", align 4
%"106" = load float, ptr %"54", align 4 store float %"104", ptr addrspace(5) %"92", align 4
store float %"106", ptr addrspace(5) %"92", align 4 %"106" = load i64, ptr addrspace(5) %"90", align 4
%"107" = load i64, ptr addrspace(5) %"89", align 4 %"157" = inttoptr i64 %"106" to ptr
%"157" = inttoptr i64 %"107" to ptr %"55" = getelementptr inbounds i8, ptr %"157", i64 4
%"56" = getelementptr inbounds i8, ptr %"157", i64 8 %"107" = load float, ptr %"55", align 4
%"108" = load float, ptr %"56", align 4 store float %"107", ptr addrspace(5) %"93", align 4
store float %"108", ptr addrspace(5) %"93", align 4 %"108" = load i64, ptr addrspace(5) %"90", align 4
%"109" = load i64, ptr addrspace(5) %"89", align 4 %"158" = inttoptr i64 %"108" to ptr
%"158" = inttoptr i64 %"109" to ptr %"57" = getelementptr inbounds i8, ptr %"158", i64 8
%"58" = getelementptr inbounds i8, ptr %"158", i64 12 %"109" = load float, ptr %"57", align 4
%"110" = load float, ptr %"58", align 4 store float %"109", ptr addrspace(5) %"94", align 4
store float %"110", ptr addrspace(5) %"94", align 4 %"110" = load i64, ptr addrspace(5) %"90", align 4
%"111" = load i64, ptr addrspace(5) %"89", align 4 %"159" = inttoptr i64 %"110" to ptr
%"159" = inttoptr i64 %"111" to ptr %"59" = getelementptr inbounds i8, ptr %"159", i64 12
%"60" = getelementptr inbounds i8, ptr %"159", i64 16 %"111" = load float, ptr %"59", align 4
%"112" = load float, ptr %"60", align 4 store float %"111", ptr addrspace(5) %"95", align 4
store float %"112", ptr addrspace(5) %"95", align 4 %"112" = load i64, ptr addrspace(5) %"90", align 4
%"113" = load i64, ptr addrspace(5) %"89", align 4 %"160" = inttoptr i64 %"112" to ptr
%"160" = inttoptr i64 %"113" to ptr %"61" = getelementptr inbounds i8, ptr %"160", i64 16
%"62" = getelementptr inbounds i8, ptr %"160", i64 20 %"113" = load float, ptr %"61", align 4
%"114" = load float, ptr %"62", align 4 store float %"113", ptr addrspace(5) %"96", align 4
store float %"114", ptr addrspace(5) %"96", align 4 %"114" = load i64, ptr addrspace(5) %"90", align 4
%"115" = load i64, ptr addrspace(5) %"89", align 4 %"161" = inttoptr i64 %"114" to ptr
%"161" = inttoptr i64 %"115" to ptr %"63" = getelementptr inbounds i8, ptr %"161", i64 20
%"64" = getelementptr inbounds i8, ptr %"161", i64 24 %"115" = load float, ptr %"63", align 4
%"116" = load float, ptr %"64", align 4 store float %"115", ptr addrspace(5) %"97", align 4
store float %"116", ptr addrspace(5) %"97", align 4 %"116" = load i64, ptr addrspace(5) %"90", align 4
%"117" = load i64, ptr addrspace(5) %"89", align 4 %"162" = inttoptr i64 %"116" to ptr
%"162" = inttoptr i64 %"117" to ptr %"65" = getelementptr inbounds i8, ptr %"162", i64 24
%"66" = getelementptr inbounds i8, ptr %"162", i64 28 %"117" = load float, ptr %"65", align 4
%"118" = load float, ptr %"66", align 4 store float %"117", ptr addrspace(5) %"98", align 4
store float %"118", ptr addrspace(5) %"98", align 4 %"118" = load i64, ptr addrspace(5) %"90", align 4
%"120" = load float, ptr addrspace(5) %"91", align 4 %"163" = inttoptr i64 %"118" to ptr
%"67" = getelementptr inbounds i8, ptr %"163", i64 28
%"119" = load float, ptr %"67", align 4
store float %"119", ptr addrspace(5) %"99", align 4
%"121" = load float, ptr addrspace(5) %"92", align 4 %"121" = load float, ptr addrspace(5) %"92", align 4
%"119" = fcmp ord float %"120", %"121" %"122" = load float, ptr addrspace(5) %"93", align 4
store i1 %"119", ptr addrspace(5) %"100", align 1 %"120" = fcmp ord float %"121", %"122"
%"122" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"120", ptr addrspace(5) %"101", align 1
br i1 %"122", label %"21", label %"22" %"123" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"123", label %"22", label %"23"
"21": ; preds = %1 "22": ; preds = %"168"
store i32 2, ptr addrspace(5) %"99", align 4 store i32 2, ptr addrspace(5) %"100", align 4
br label %"22" br label %"23"
"22": ; preds = %"21", %1 "23": ; preds = %"22", %"168"
%"124" = load i1, ptr addrspace(5) %"100", align 1 %"125" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"124", label %"24", label %"23" br i1 %"125", label %"25", label %"24"
"23": ; preds = %"22" "24": ; preds = %"23"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"24" br label %"25"
"24": ; preds = %"23", %"22" "25": ; preds = %"24", %"23"
%"126" = load i64, ptr addrspace(5) %"90", align 4 %"127" = load i64, ptr addrspace(5) %"91", align 4
%"127" = load i32, ptr addrspace(5) %"99", align 4 %"128" = load i32, ptr addrspace(5) %"100", align 4
%"163" = inttoptr i64 %"126" to ptr %"164" = inttoptr i64 %"127" to ptr
store i32 %"127", ptr %"163", align 4 store i32 %"128", ptr %"164", align 4
%"129" = load float, ptr addrspace(5) %"93", align 4
%"130" = load float, ptr addrspace(5) %"94", align 4 %"130" = load float, ptr addrspace(5) %"94", align 4
%"128" = fcmp ord float %"129", %"130" %"131" = load float, ptr addrspace(5) %"95", align 4
store i1 %"128", ptr addrspace(5) %"100", align 1 %"129" = fcmp ord float %"130", %"131"
%"131" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"129", ptr addrspace(5) %"101", align 1
br i1 %"131", label %"25", label %"26" %"132" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"132", label %"26", label %"27"
"25": ; preds = %"24" "26": ; preds = %"25"
store i32 2, ptr addrspace(5) %"99", align 4 store i32 2, ptr addrspace(5) %"100", align 4
br label %"26" br label %"27"
"26": ; preds = %"25", %"24" "27": ; preds = %"26", %"25"
%"133" = load i1, ptr addrspace(5) %"100", align 1 %"134" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"133", label %"28", label %"27" br i1 %"134", label %"29", label %"28"
"27": ; preds = %"26" "28": ; preds = %"27"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"28" br label %"29"
"28": ; preds = %"27", %"26" "29": ; preds = %"28", %"27"
%"135" = load i64, ptr addrspace(5) %"90", align 4 %"136" = load i64, ptr addrspace(5) %"91", align 4
%"164" = inttoptr i64 %"135" to ptr %"165" = inttoptr i64 %"136" to ptr
%"72" = getelementptr inbounds i8, ptr %"164", i64 4 %"73" = getelementptr inbounds i8, ptr %"165", i64 4
%"136" = load i32, ptr addrspace(5) %"99", align 4 %"137" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"136", ptr %"72", align 4 store i32 %"137", ptr %"73", align 4
%"138" = load float, ptr addrspace(5) %"95", align 4
%"139" = load float, ptr addrspace(5) %"96", align 4 %"139" = load float, ptr addrspace(5) %"96", align 4
%"137" = fcmp ord float %"138", %"139" %"140" = load float, ptr addrspace(5) %"97", align 4
store i1 %"137", ptr addrspace(5) %"100", align 1 %"138" = fcmp ord float %"139", %"140"
%"140" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"138", ptr addrspace(5) %"101", align 1
br i1 %"140", label %"29", label %"30" %"141" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"141", label %"30", label %"31"
"29": ; preds = %"28" "30": ; preds = %"29"
store i32 2, ptr addrspace(5) %"99", align 4 store i32 2, ptr addrspace(5) %"100", align 4
br label %"30" br label %"31"
"30": ; preds = %"29", %"28" "31": ; preds = %"30", %"29"
%"142" = load i1, ptr addrspace(5) %"100", align 1 %"143" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"142", label %"32", label %"31" br i1 %"143", label %"33", label %"32"
"31": ; preds = %"30" "32": ; preds = %"31"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"32" br label %"33"
"32": ; preds = %"31", %"30" "33": ; preds = %"32", %"31"
%"144" = load i64, ptr addrspace(5) %"90", align 4 %"145" = load i64, ptr addrspace(5) %"91", align 4
%"165" = inttoptr i64 %"144" to ptr %"166" = inttoptr i64 %"145" to ptr
%"76" = getelementptr inbounds i8, ptr %"165", i64 8 %"77" = getelementptr inbounds i8, ptr %"166", i64 8
%"145" = load i32, ptr addrspace(5) %"99", align 4 %"146" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"145", ptr %"76", align 4 store i32 %"146", ptr %"77", align 4
%"147" = load float, ptr addrspace(5) %"97", align 4
%"148" = load float, ptr addrspace(5) %"98", align 4 %"148" = load float, ptr addrspace(5) %"98", align 4
%"146" = fcmp ord float %"147", %"148" %"149" = load float, ptr addrspace(5) %"99", align 4
store i1 %"146", ptr addrspace(5) %"100", align 1 %"147" = fcmp ord float %"148", %"149"
%"149" = load i1, ptr addrspace(5) %"100", align 1 store i1 %"147", ptr addrspace(5) %"101", align 1
br i1 %"149", label %"33", label %"34" %"150" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"150", label %"34", label %"35"
"33": ; preds = %"32" "34": ; preds = %"33"
store i32 2, ptr addrspace(5) %"99", align 4 store i32 2, ptr addrspace(5) %"100", align 4
br label %"34" br label %"35"
"34": ; preds = %"33", %"32" "35": ; preds = %"34", %"33"
%"151" = load i1, ptr addrspace(5) %"100", align 1 %"152" = load i1, ptr addrspace(5) %"101", align 1
br i1 %"151", label %"36", label %"35" br i1 %"152", label %"37", label %"36"
"35": ; preds = %"34" "36": ; preds = %"35"
store i32 0, ptr addrspace(5) %"99", align 4 store i32 0, ptr addrspace(5) %"100", align 4
br label %"36" br label %"37"
"36": ; preds = %"35", %"34" "37": ; preds = %"36", %"35"
%"153" = load i64, ptr addrspace(5) %"90", align 4 %"154" = load i64, ptr addrspace(5) %"91", align 4
%"166" = inttoptr i64 %"153" to ptr %"167" = inttoptr i64 %"154" to ptr
%"80" = getelementptr inbounds i8, ptr %"166", i64 12 %"81" = getelementptr inbounds i8, ptr %"167", i64 12
%"154" = load i32, ptr addrspace(5) %"99", align 4 %"155" = load i32, ptr addrspace(5) %"100", align 4
store i32 %"154", ptr %"80", align 4 store i32 %"155", ptr %"81", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,38 +12,42 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 { define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
%"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i32, align 4, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"46" = load i64, ptr addrspace(4) %"39", align 4 br label %"63"
store i64 %"46", ptr addrspace(5) %"41", align 4
"63": ; preds = %1
%"47" = load i64, ptr addrspace(4) %"40", align 4 %"47" = load i64, ptr addrspace(4) %"40", align 4
store i64 %"47", ptr addrspace(5) %"42", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"43", align 4 %"48" = load i64, ptr addrspace(4) %"41", align 4
%"50" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"48", ptr addrspace(5) %"43", align 4
%"58" = inttoptr i64 %"50" to ptr addrspace(1) store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"44", align 4
%"49" = load i64, ptr addrspace(1) %"58", align 4 %"51" = load i64, ptr addrspace(5) %"42", align 4
store i64 %"49", ptr addrspace(5) %"44", align 4 %"59" = inttoptr i64 %"51" to ptr addrspace(1)
%"51" = load i32, ptr addrspace(5) %"43", align 4 %"50" = load i64, ptr addrspace(1) %"59", align 4
%"52" = load i64, ptr addrspace(5) %"44", align 4 store i64 %"50", ptr addrspace(5) %"45", align 4
%"59" = inttoptr i32 %"51" to ptr addrspace(3) %"52" = load i32, ptr addrspace(5) %"44", align 4
store i64 %"52", ptr addrspace(3) %"59", align 4 %"53" = load i64, ptr addrspace(5) %"45", align 4
%"53" = load i32, ptr addrspace(5) %"43", align 4 %"60" = inttoptr i32 %"52" to ptr addrspace(3)
%"60" = inttoptr i32 %"53" to ptr addrspace(3) store i64 %"53", ptr addrspace(3) %"60", align 4
%"32" = getelementptr inbounds i8, ptr addrspace(3) %"60", i64 0 %"54" = load i32, ptr addrspace(5) %"44", align 4
%"54" = load i64, ptr addrspace(3) %"32", align 4 %"61" = inttoptr i32 %"54" to ptr addrspace(3)
store i64 %"54", ptr addrspace(5) %"45", align 4 %"33" = getelementptr inbounds i8, ptr addrspace(3) %"61", i64 0
%"55" = load i64, ptr addrspace(5) %"42", align 4 %"55" = load i64, ptr addrspace(3) %"33", align 4
%"56" = load i64, ptr addrspace(5) %"45", align 4 store i64 %"55", ptr addrspace(5) %"46", align 4
%"61" = inttoptr i64 %"55" to ptr addrspace(1) %"56" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"56", ptr addrspace(1) %"61", align 4 %"57" = load i64, ptr addrspace(5) %"46", align 4
%"62" = inttoptr i64 %"56" to ptr addrspace(1)
store i64 %"57", ptr addrspace(1) %"62", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,37 +12,41 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"44" = load i64, ptr addrspace(4) %"37", align 4 br label %"61"
store i64 %"44", ptr addrspace(5) %"39", align 4
"61": ; preds = %1
%"45" = load i64, ptr addrspace(4) %"38", align 4 %"45" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"45", ptr addrspace(5) %"40", align 4 store i64 %"45", ptr addrspace(5) %"40", align 4
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"41", align 4 %"46" = load i64, ptr addrspace(4) %"39", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"46", ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"48" to ptr addrspace(1) store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"42", align 4
%"47" = load i64, ptr addrspace(1) %"56", align 4 %"49" = load i64, ptr addrspace(5) %"40", align 4
store i64 %"47", ptr addrspace(5) %"42", align 4 %"57" = inttoptr i64 %"49" to ptr addrspace(1)
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(1) %"57", align 4
store i64 %"48", ptr addrspace(5) %"43", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4 %"50" = load i64, ptr addrspace(5) %"42", align 4
%"57" = inttoptr i64 %"49" to ptr addrspace(3) %"51" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"50", ptr addrspace(3) %"57", align 4 %"58" = inttoptr i64 %"50" to ptr addrspace(3)
%"52" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"51", ptr addrspace(3) %"58", align 4
%"58" = inttoptr i64 %"52" to ptr addrspace(3) %"53" = load i64, ptr addrspace(5) %"42", align 4
%"51" = load i64, ptr addrspace(3) %"58", align 4 %"59" = inttoptr i64 %"53" to ptr addrspace(3)
store i64 %"51", ptr addrspace(5) %"43", align 4 %"52" = load i64, ptr addrspace(3) %"59", align 4
%"53" = load i64, ptr addrspace(5) %"40", align 4 store i64 %"52", ptr addrspace(5) %"44", align 4
%"54" = load i64, ptr addrspace(5) %"43", align 4 %"54" = load i64, ptr addrspace(5) %"41", align 4
%"59" = inttoptr i64 %"53" to ptr addrspace(1) %"55" = load i64, ptr addrspace(5) %"44", align 4
store i64 %"54", ptr addrspace(1) %"59", align 4 %"60" = inttoptr i64 %"54" to ptr addrspace(1)
store i64 %"55", ptr addrspace(1) %"60", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -13,68 +13,78 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define i64 @__zluda_ptx_impl_add() #0 { define i64 @add() #0 {
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4 br label %"85"
store i64 %"49", ptr addrspace(5) %"47", align 4
%"50" = load i64, ptr addrspace(3) @shared_ex, align 4 "85": ; preds = %1
%"50" = load i64, ptr addrspace(3) @shared_mod, align 4
store i64 %"50", ptr addrspace(5) %"48", align 4 store i64 %"50", ptr addrspace(5) %"48", align 4
%"52" = load i64, ptr addrspace(5) %"48", align 4 %"51" = load i64, ptr addrspace(3) @shared_ex, align 4
%"53" = load i64, ptr addrspace(5) %"47", align 4 store i64 %"51", ptr addrspace(5) %"49", align 4
%"75" = add i64 %"52", %"53" %"53" = load i64, ptr addrspace(5) %"49", align 4
store i64 %"75", ptr addrspace(5) %"46", align 4 %"54" = load i64, ptr addrspace(5) %"48", align 4
%2 = load i64, ptr addrspace(5) %"46", align 4 %"76" = add i64 %"53", %"54"
store i64 %"76", ptr addrspace(5) %"47", align 4
%2 = load i64, ptr addrspace(5) %"47", align 4
ret i64 %2 ret i64 %2
} }
define i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"15") #0 { define i64 @set_shared_temp1(i64 %"15") #0 {
%"54" = alloca i64, align 8, addrspace(5) %"55" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
br label %"86"
"86": ; preds = %1
store i64 %"15", ptr addrspace(3) @shared_ex, align 4 store i64 %"15", ptr addrspace(3) @shared_ex, align 4
%"55" = call i64 @__zluda_ptx_impl_add() %"56" = call i64 @add()
store i64 %"55", ptr addrspace(5) %"54", align 4 store i64 %"56", ptr addrspace(5) %"55", align 4
%2 = load i64, ptr addrspace(5) %"54", align 4 %2 = load i64, ptr addrspace(5) %"55", align 4
ret i64 %2 ret i64 %2
} }
define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"56", ptr addrspace(4) byref(i64) %"57") #0 { define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"57", ptr addrspace(4) byref(i64) %"58") #1 {
%"58" = alloca i64, align 8, addrspace(5)
%"59" = alloca i64, align 8, addrspace(5) %"59" = alloca i64, align 8, addrspace(5)
%"60" = alloca i64, align 8, addrspace(5) %"60" = alloca i64, align 8, addrspace(5)
%"61" = alloca i64, align 8, addrspace(5) %"61" = alloca i64, align 8, addrspace(5)
%"62" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"62" = load i64, ptr addrspace(4) %"56", align 4 br label %"87"
store i64 %"62", ptr addrspace(5) %"58", align 4
"87": ; preds = %1
%"63" = load i64, ptr addrspace(4) %"57", align 4 %"63" = load i64, ptr addrspace(4) %"57", align 4
store i64 %"63", ptr addrspace(5) %"59", align 4 store i64 %"63", ptr addrspace(5) %"59", align 4
%"65" = load i64, ptr addrspace(5) %"58", align 4 %"64" = load i64, ptr addrspace(4) %"58", align 4
%"78" = inttoptr i64 %"65" to ptr addrspace(1)
%"64" = load i64, ptr addrspace(1) %"78", align 4
store i64 %"64", ptr addrspace(5) %"60", align 4 store i64 %"64", ptr addrspace(5) %"60", align 4
%"66" = load i64, ptr addrspace(5) %"58", align 4 %"66" = load i64, ptr addrspace(5) %"59", align 4
%"79" = inttoptr i64 %"66" to ptr addrspace(1) %"79" = inttoptr i64 %"66" to ptr addrspace(1)
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"79", i64 8 %"65" = load i64, ptr addrspace(1) %"79", align 4
%"67" = load i64, ptr addrspace(1) %"39", align 4 store i64 %"65", ptr addrspace(5) %"61", align 4
store i64 %"67", ptr addrspace(5) %"61", align 4 %"67" = load i64, ptr addrspace(5) %"59", align 4
%"68" = load i64, ptr addrspace(5) %"61", align 4 %"80" = inttoptr i64 %"67" to ptr addrspace(1)
store i64 %"68", ptr addrspace(3) @shared_mod, align 4 %"40" = getelementptr inbounds i8, ptr addrspace(1) %"80", i64 8
%"70" = load i64, ptr addrspace(5) %"60", align 4 %"68" = load i64, ptr addrspace(1) %"40", align 4
%"81" = call i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"70") store i64 %"68", ptr addrspace(5) %"62", align 4
store i64 %"81", ptr addrspace(5) %"61", align 4 %"69" = load i64, ptr addrspace(5) %"62", align 4
%"71" = load i64, ptr addrspace(5) %"59", align 4 store i64 %"69", ptr addrspace(3) @shared_mod, align 4
%"72" = load i64, ptr addrspace(5) %"61", align 4 %"71" = load i64, ptr addrspace(5) %"61", align 4
%"83" = inttoptr i64 %"71" to ptr %"82" = call i64 @set_shared_temp1(i64 %"71")
store i64 %"72", ptr %"83", align 4 store i64 %"82", ptr addrspace(5) %"62", align 4
%"72" = load i64, ptr addrspace(5) %"60", align 4
%"73" = load i64, ptr addrspace(5) %"62", align 4
%"84" = inttoptr i64 %"72" to ptr
store i64 %"73", ptr %"84", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -13,65 +13,75 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define i64 @__zluda_ptx_impl_add(i64 %"10") #0 { define i64 @add(i64 %"10") #0 {
%"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
br label %"81"
"81": ; preds = %1
store i64 %"10", ptr addrspace(3) @shared_mod, align 4 store i64 %"10", ptr addrspace(3) @shared_mod, align 4
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4 %"50" = load i64, ptr addrspace(3) @shared_mod, align 4
store i64 %"49", ptr addrspace(5) %"48", align 4 store i64 %"50", ptr addrspace(5) %"49", align 4
%"101" = load i64, ptr addrspace(3) @shared_ex, align 4 %"101" = load i64, ptr addrspace(3) @shared_ex, align 4
%"51" = load i64, ptr addrspace(5) %"48", align 4 %"52" = load i64, ptr addrspace(5) %"49", align 4
%"72" = add i64 %"101", %"51" %"73" = add i64 %"101", %"52"
store i64 %"72", ptr addrspace(5) %"47", align 4 store i64 %"73", ptr addrspace(5) %"48", align 4
%2 = load i64, ptr addrspace(5) %"47", align 4 %2 = load i64, ptr addrspace(5) %"48", align 4
ret i64 %2 ret i64 %2
} }
define i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"15", i64 %"16") #0 { define i64 @set_shared_temp1(i64 %"15", i64 %"16") #0 {
%"52" = alloca i64, align 8, addrspace(5) %"53" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
br label %"82"
"82": ; preds = %1
store i64 %"15", ptr addrspace(3) @shared_ex, align 4 store i64 %"15", ptr addrspace(3) @shared_ex, align 4
%"53" = call i64 @__zluda_ptx_impl_add(i64 %"16") %"54" = call i64 @add(i64 %"16")
store i64 %"53", ptr addrspace(5) %"52", align 4 store i64 %"54", ptr addrspace(5) %"53", align 4
%2 = load i64, ptr addrspace(5) %"52", align 4 %2 = load i64, ptr addrspace(5) %"53", align 4
ret i64 %2 ret i64 %2
} }
define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #0 { define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"55", ptr addrspace(4) byref(i64) %"56") #1 {
%"56" = alloca i64, align 8, addrspace(5)
%"57" = alloca i64, align 8, addrspace(5) %"57" = alloca i64, align 8, addrspace(5)
%"58" = alloca i64, align 8, addrspace(5) %"58" = alloca i64, align 8, addrspace(5)
%"59" = alloca i64, align 8, addrspace(5) %"59" = alloca i64, align 8, addrspace(5)
%"60" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"60" = load i64, ptr addrspace(4) %"54", align 4 br label %"83"
store i64 %"60", ptr addrspace(5) %"56", align 4
"83": ; preds = %1
%"61" = load i64, ptr addrspace(4) %"55", align 4 %"61" = load i64, ptr addrspace(4) %"55", align 4
store i64 %"61", ptr addrspace(5) %"57", align 4 store i64 %"61", ptr addrspace(5) %"57", align 4
%"63" = load i64, ptr addrspace(5) %"56", align 4 %"62" = load i64, ptr addrspace(4) %"56", align 4
%"75" = inttoptr i64 %"63" to ptr addrspace(1)
%"62" = load i64, ptr addrspace(1) %"75", align 4
store i64 %"62", ptr addrspace(5) %"58", align 4 store i64 %"62", ptr addrspace(5) %"58", align 4
%"64" = load i64, ptr addrspace(5) %"56", align 4 %"64" = load i64, ptr addrspace(5) %"57", align 4
%"76" = inttoptr i64 %"64" to ptr addrspace(1) %"76" = inttoptr i64 %"64" to ptr addrspace(1)
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"76", i64 8 %"63" = load i64, ptr addrspace(1) %"76", align 4
%"65" = load i64, ptr addrspace(1) %"40", align 4 store i64 %"63", ptr addrspace(5) %"59", align 4
store i64 %"65", ptr addrspace(5) %"59", align 4 %"65" = load i64, ptr addrspace(5) %"57", align 4
%"67" = load i64, ptr addrspace(5) %"58", align 4 %"77" = inttoptr i64 %"65" to ptr addrspace(1)
%"41" = getelementptr inbounds i8, ptr addrspace(1) %"77", i64 8
%"66" = load i64, ptr addrspace(1) %"41", align 4
store i64 %"66", ptr addrspace(5) %"60", align 4
%"68" = load i64, ptr addrspace(5) %"59", align 4 %"68" = load i64, ptr addrspace(5) %"59", align 4
%"77" = call i64 @__zluda_ptx_impl_set_shared_temp1(i64 %"67", i64 %"68") %"69" = load i64, ptr addrspace(5) %"60", align 4
store i64 %"77", ptr addrspace(5) %"59", align 4 %"78" = call i64 @set_shared_temp1(i64 %"68", i64 %"69")
%"69" = load i64, ptr addrspace(5) %"57", align 4 store i64 %"78", ptr addrspace(5) %"60", align 4
%"70" = load i64, ptr addrspace(5) %"59", align 4 %"70" = load i64, ptr addrspace(5) %"58", align 4
%"79" = inttoptr i64 %"69" to ptr %"71" = load i64, ptr addrspace(5) %"60", align 4
store i64 %"70", ptr %"79", align 4 %"80" = inttoptr i64 %"70" to ptr
store i64 %"71", ptr %"80", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -12,31 +12,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"55"
store i64 %"42", ptr addrspace(5) %"38", align 4
"55": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
%"44" = load i64, ptr addrspace(1) %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
store i64 %"46", ptr addrspace(3) @shared_mem1, align 4 %"51" = inttoptr i64 %"46" to ptr addrspace(1)
%"47" = load i64, ptr addrspace(3) @shared_mem1, align 4 %"45" = load i64, ptr addrspace(1) %"51", align 4
store i64 %"47", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"47" = load i64, ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"47", ptr addrspace(3) @shared_mem1, align 4
%"53" = inttoptr i64 %"48" to ptr addrspace(1) %"48" = load i64, ptr addrspace(3) @shared_mem1, align 4
store i64 %"49", ptr addrspace(1) %"53", align 4 store i64 %"48", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"54" = inttoptr i64 %"49" to ptr addrspace(1)
store i64 %"50", ptr addrspace(1) %"54", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,31 +10,35 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"55"
store i64 %"42", ptr addrspace(5) %"38", align 4
"55": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%2 = shl i64 %"47", 2 %"51" = inttoptr i64 %"46" to ptr
%"51" = select i1 false, i64 0, i64 %2 %"45" = load i64, ptr %"51", align 4
store i64 %"51", ptr addrspace(5) %"41", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %2 = shl i64 %"48", 2
%"53" = inttoptr i64 %"48" to ptr %"52" = select i1 false, i64 0, i64 %2
store i64 %"49", ptr %"53", align 4 store i64 %"52", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"54" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"54", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 { define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
%"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i64, align 8, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"40" = load i64, ptr addrspace(4) %"35", align 4 br label %"51"
store i64 %"40", ptr addrspace(5) %"37", align 4
"51": ; preds = %1
%"41" = load i64, ptr addrspace(4) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
store i64 %"41", ptr addrspace(5) %"38", align 4 store i64 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"42" = load i64, ptr addrspace(4) %"37", align 4
%"48" = inttoptr i64 %"43" to ptr store i64 %"42", ptr addrspace(5) %"39", align 4
%"42" = load i32, ptr %"48", align 4 %"44" = load i64, ptr addrspace(5) %"38", align 4
store i32 %"42", ptr addrspace(5) %"39", align 4 %"49" = inttoptr i64 %"44" to ptr
%"45" = load i32, ptr addrspace(5) %"39", align 4 %"43" = load i32, ptr %"49", align 4
%2 = ashr i32 %"45", 1 store i32 %"43", ptr addrspace(5) %"40", align 4
%"44" = select i1 false, i32 0, i32 %2 %"46" = load i32, ptr addrspace(5) %"40", align 4
store i32 %"44", ptr addrspace(5) %"39", align 4 %2 = ashr i32 %"46", 1
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = select i1 false, i32 0, i32 %2
%"47" = load i32, ptr addrspace(5) %"39", align 4 store i32 %"45", ptr addrspace(5) %"40", align 4
%"49" = inttoptr i64 %"46" to ptr %"47" = load i64, ptr addrspace(5) %"39", align 4
store i32 %"47", ptr %"49", align 4 %"48" = load i32, ptr addrspace(5) %"40", align 4
%"50" = inttoptr i64 %"47" to ptr
store i32 %"48", ptr %"50", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,27 +10,31 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca i32, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"49"
store i64 %"39", ptr addrspace(5) %"36", align 4
"49": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"46" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"45" = load i16, ptr %"46", align 2
%"41" = sext i16 %"45" to i32
store i32 %"41", ptr addrspace(5) %"38", align 4
%"43" = load i64, ptr addrspace(5) %"37", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
%"44" = load i32, ptr addrspace(5) %"38", align 4
%"47" = inttoptr i64 %"43" to ptr %"47" = inttoptr i64 %"43" to ptr
store i32 %"44", ptr %"47", align 4 %"46" = load i16, ptr %"47", align 2
%"42" = sext i16 %"46" to i32
store i32 %"42", ptr addrspace(5) %"39", align 4
%"44" = load i64, ptr addrspace(5) %"38", align 4
%"45" = load i32, ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"44" to ptr
store i32 %"45", ptr %"48", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call afn float @llvm.sin.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call afn float @llvm.sin.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.sin.f32(float) #1 declare float @llvm.sin.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,33 +10,37 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 { define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
%"36" = alloca i64, align 8, addrspace(5)
%"37" = alloca i64, align 8, addrspace(5) %"37" = alloca i64, align 8, addrspace(5)
%"38" = alloca float, align 4, addrspace(5) %"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca float, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"39" = load i64, ptr addrspace(4) %"34", align 4 br label %"50"
store i64 %"39", ptr addrspace(5) %"36", align 4
"50": ; preds = %1
%"40" = load i64, ptr addrspace(4) %"35", align 4 %"40" = load i64, ptr addrspace(4) %"35", align 4
store i64 %"40", ptr addrspace(5) %"37", align 4 store i64 %"40", ptr addrspace(5) %"37", align 4
%"42" = load i64, ptr addrspace(5) %"36", align 4 %"41" = load i64, ptr addrspace(4) %"36", align 4
%"47" = inttoptr i64 %"42" to ptr store i64 %"41", ptr addrspace(5) %"38", align 4
%"41" = load float, ptr %"47", align 4 %"43" = load i64, ptr addrspace(5) %"37", align 4
store float %"41", ptr addrspace(5) %"38", align 4 %"48" = inttoptr i64 %"43" to ptr
%"44" = load float, ptr addrspace(5) %"38", align 4 %"42" = load float, ptr %"48", align 4
%"43" = call float @llvm.amdgcn.sqrt.f32(float %"44") store float %"42", ptr addrspace(5) %"39", align 4
store float %"43", ptr addrspace(5) %"38", align 4 %"45" = load float, ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"37", align 4 %"44" = call float @llvm.amdgcn.sqrt.f32(float %"45")
%"46" = load float, ptr addrspace(5) %"38", align 4 store float %"44", ptr addrspace(5) %"39", align 4
%"48" = inttoptr i64 %"45" to ptr %"46" = load i64, ptr addrspace(5) %"38", align 4
store float %"46", ptr %"48", align 4 %"47" = load float, ptr addrspace(5) %"39", align 4
%"49" = inttoptr i64 %"46" to ptr
store float %"47", ptr %"49", align 4
ret void ret void
} }
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare float @llvm.amdgcn.sqrt.f32(float) #1 declare float @llvm.amdgcn.sqrt.f32(float) #2
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

View file

@ -10,49 +10,53 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 { define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
%"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i32, align 4, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"64" = load i64, ptr addrspace(4) %"38", align 4 br label %"73"
store i64 %"64", ptr addrspace(5) %"40", align 4
"73": ; preds = %1
%"65" = load i64, ptr addrspace(4) %"39", align 4 %"65" = load i64, ptr addrspace(4) %"39", align 4
store i64 %"65", ptr addrspace(5) %"41", align 4 store i64 %"65", ptr addrspace(5) %"41", align 4
%"48" = load i64, ptr addrspace(5) %"40", align 4 %"66" = load i64, ptr addrspace(4) %"40", align 4
%2 = inttoptr i64 %"48" to ptr store i64 %"66", ptr addrspace(5) %"42", align 4
%"47" = addrspacecast ptr %2 to ptr addrspace(1) %"49" = load i64, ptr addrspace(5) %"41", align 4
store ptr addrspace(1) %"47", ptr addrspace(5) %"40", align 8 %2 = inttoptr i64 %"49" to ptr
%"50" = load i64, ptr addrspace(5) %"41", align 4 %"48" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"50" to ptr store ptr addrspace(1) %"48", ptr addrspace(5) %"41", align 8
%"49" = addrspacecast ptr %3 to ptr addrspace(1) %"51" = load i64, ptr addrspace(5) %"42", align 4
store ptr addrspace(1) %"49", ptr addrspace(5) %"41", align 8 %3 = inttoptr i64 %"51" to ptr
%"31" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) %"50" = addrspacecast ptr %3 to ptr addrspace(1)
store i32 %"31", ptr addrspace(5) %"42", align 4 store ptr addrspace(1) %"50", ptr addrspace(5) %"42", align 8
%"53" = load i32, ptr addrspace(5) %"42", align 4 %"32" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
%"52" = zext i32 %"53" to i64 store i32 %"32", ptr addrspace(5) %"43", align 4
store i64 %"52", ptr addrspace(5) %"43", align 4 %"54" = load i32, ptr addrspace(5) %"43", align 4
%"55" = load i64, ptr addrspace(5) %"40", align 4 %"53" = zext i32 %"54" to i64
%"56" = load i64, ptr addrspace(5) %"43", align 4 store i64 %"53", ptr addrspace(5) %"44", align 4
%"66" = add i64 %"55", %"56" %"56" = load i64, ptr addrspace(5) %"41", align 4
store i64 %"66", ptr addrspace(5) %"40", align 4 %"57" = load i64, ptr addrspace(5) %"44", align 4
%"58" = load i64, ptr addrspace(5) %"41", align 4 %"67" = add i64 %"56", %"57"
%"59" = load i64, ptr addrspace(5) %"43", align 4 store i64 %"67", ptr addrspace(5) %"41", align 4
%"68" = add i64 %"58", %"59" %"59" = load i64, ptr addrspace(5) %"42", align 4
store i64 %"68", ptr addrspace(5) %"41", align 4 %"60" = load i64, ptr addrspace(5) %"44", align 4
%"61" = load i64, ptr addrspace(5) %"40", align 4 %"69" = add i64 %"59", %"60"
%"70" = inttoptr i64 %"61" to ptr addrspace(1) store i64 %"69", ptr addrspace(5) %"42", align 4
%"60" = load i64, ptr addrspace(1) %"70", align 4
store i64 %"60", ptr addrspace(5) %"44", align 4
%"62" = load i64, ptr addrspace(5) %"41", align 4 %"62" = load i64, ptr addrspace(5) %"41", align 4
%"63" = load i64, ptr addrspace(5) %"44", align 4
%"71" = inttoptr i64 %"62" to ptr addrspace(1) %"71" = inttoptr i64 %"62" to ptr addrspace(1)
store i64 %"63", ptr addrspace(1) %"71", align 4 %"61" = load i64, ptr addrspace(1) %"71", align 4
store i64 %"61", ptr addrspace(5) %"45", align 4
%"63" = load i64, ptr addrspace(5) %"42", align 4
%"64" = load i64, ptr addrspace(5) %"45", align 4
%"72" = inttoptr i64 %"63" to ptr addrspace(1)
store i64 %"64", ptr addrspace(1) %"72", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,53 +10,57 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #0 { define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
%"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5) %"45" = alloca i64, align 8, addrspace(5)
%"46" = alloca i64, align 8, addrspace(5) %"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i64, align 8, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5) %"49" = alloca i64, align 8, addrspace(5)
%"50" = alloca i32, align 4, addrspace(5) %"50" = alloca i64, align 8, addrspace(5)
%"51" = alloca i64, align 8, addrspace(5) %"51" = alloca i32, align 4, addrspace(5)
%"52" = alloca i64, align 8, addrspace(5) %"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"72" = load i64, ptr addrspace(4) %"42", align 4 br label %"81"
store i64 %"72", ptr addrspace(5) %"44", align 4
"81": ; preds = %1
%"73" = load i64, ptr addrspace(4) %"43", align 4 %"73" = load i64, ptr addrspace(4) %"43", align 4
store i64 %"73", ptr addrspace(5) %"47", align 4 store i64 %"73", ptr addrspace(5) %"45", align 4
%"56" = load i64, ptr addrspace(5) %"44", align 4 %"74" = load i64, ptr addrspace(4) %"44", align 4
%2 = inttoptr i64 %"56" to ptr store i64 %"74", ptr addrspace(5) %"48", align 4
%"55" = addrspacecast ptr %2 to ptr addrspace(1) %"57" = load i64, ptr addrspace(5) %"45", align 4
store ptr addrspace(1) %"55", ptr addrspace(5) %"45", align 8 %2 = inttoptr i64 %"57" to ptr
%"58" = load i64, ptr addrspace(5) %"47", align 4 %"56" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"58" to ptr store ptr addrspace(1) %"56", ptr addrspace(5) %"46", align 8
%"57" = addrspacecast ptr %3 to ptr addrspace(1) %"59" = load i64, ptr addrspace(5) %"48", align 4
store ptr addrspace(1) %"57", ptr addrspace(5) %"48", align 8 %3 = inttoptr i64 %"59" to ptr
%"35" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) %"58" = addrspacecast ptr %3 to ptr addrspace(1)
store i32 %"35", ptr addrspace(5) %"50", align 4 store ptr addrspace(1) %"58", ptr addrspace(5) %"49", align 8
%"61" = load i32, ptr addrspace(5) %"50", align 4 %"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
%"60" = zext i32 %"61" to i64 store i32 %"36", ptr addrspace(5) %"51", align 4
store i64 %"60", ptr addrspace(5) %"51", align 4 %"62" = load i32, ptr addrspace(5) %"51", align 4
%"63" = load i64, ptr addrspace(5) %"45", align 4 %"61" = zext i32 %"62" to i64
%"64" = load i64, ptr addrspace(5) %"51", align 4 store i64 %"61", ptr addrspace(5) %"52", align 4
%"74" = add i64 %"63", %"64" %"64" = load i64, ptr addrspace(5) %"46", align 4
store i64 %"74", ptr addrspace(5) %"46", align 4 %"65" = load i64, ptr addrspace(5) %"52", align 4
%"66" = load i64, ptr addrspace(5) %"48", align 4 %"75" = add i64 %"64", %"65"
%"67" = load i64, ptr addrspace(5) %"51", align 4 store i64 %"75", ptr addrspace(5) %"47", align 4
%"76" = add i64 %"66", %"67" %"67" = load i64, ptr addrspace(5) %"49", align 4
store i64 %"76", ptr addrspace(5) %"49", align 4 %"68" = load i64, ptr addrspace(5) %"52", align 4
%"69" = load i64, ptr addrspace(5) %"46", align 4 %"77" = add i64 %"67", %"68"
%"78" = inttoptr i64 %"69" to ptr addrspace(1) store i64 %"77", ptr addrspace(5) %"50", align 4
%"68" = load i64, ptr addrspace(1) %"78", align 4 %"70" = load i64, ptr addrspace(5) %"47", align 4
store i64 %"68", ptr addrspace(5) %"52", align 4
%"70" = load i64, ptr addrspace(5) %"49", align 4
%"71" = load i64, ptr addrspace(5) %"52", align 4
%"79" = inttoptr i64 %"70" to ptr addrspace(1) %"79" = inttoptr i64 %"70" to ptr addrspace(1)
store i64 %"71", ptr addrspace(1) %"79", align 4 %"69" = load i64, ptr addrspace(1) %"79", align 4
store i64 %"69", ptr addrspace(5) %"53", align 4
%"71" = load i64, ptr addrspace(5) %"50", align 4
%"72" = load i64, ptr addrspace(5) %"53", align 4
%"80" = inttoptr i64 %"71" to ptr addrspace(1)
store i64 %"72", ptr addrspace(1) %"80", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,55 +10,59 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 { define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
%"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i64, align 8, addrspace(5) %"49" = alloca i64, align 8, addrspace(5)
%"50" = alloca i64, align 8, addrspace(5) %"50" = alloca i64, align 8, addrspace(5)
%"51" = alloca i64, align 8, addrspace(5) %"51" = alloca i64, align 8, addrspace(5)
%"52" = alloca i64, align 8, addrspace(5) %"52" = alloca i64, align 8, addrspace(5)
%"53" = alloca i64, align 8, addrspace(5) %"53" = alloca i64, align 8, addrspace(5)
%"54" = alloca i32, align 4, addrspace(5) %"54" = alloca i64, align 8, addrspace(5)
%"55" = alloca i64, align 8, addrspace(5) %"55" = alloca i32, align 4, addrspace(5)
%"56" = alloca i64, align 8, addrspace(5) %"56" = alloca i64, align 8, addrspace(5)
%"57" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"76" = load i64, ptr addrspace(4) %"46", align 4 br label %"87"
store i64 %"76", ptr addrspace(5) %"48", align 4
"87": ; preds = %1
%"77" = load i64, ptr addrspace(4) %"47", align 4 %"77" = load i64, ptr addrspace(4) %"47", align 4
store i64 %"77", ptr addrspace(5) %"51", align 4 store i64 %"77", ptr addrspace(5) %"49", align 4
%"60" = load i64, ptr addrspace(5) %"48", align 4 %"78" = load i64, ptr addrspace(4) %"48", align 4
%2 = inttoptr i64 %"60" to ptr store i64 %"78", ptr addrspace(5) %"52", align 4
%"59" = addrspacecast ptr %2 to ptr addrspace(1) %"61" = load i64, ptr addrspace(5) %"49", align 4
store ptr addrspace(1) %"59", ptr addrspace(5) %"49", align 8 %2 = inttoptr i64 %"61" to ptr
%"62" = load i64, ptr addrspace(5) %"51", align 4 %"60" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"62" to ptr store ptr addrspace(1) %"60", ptr addrspace(5) %"50", align 8
%"61" = addrspacecast ptr %3 to ptr addrspace(1) %"63" = load i64, ptr addrspace(5) %"52", align 4
store ptr addrspace(1) %"61", ptr addrspace(5) %"52", align 8 %3 = inttoptr i64 %"63" to ptr
%"35" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0) %"62" = addrspacecast ptr %3 to ptr addrspace(1)
store i32 %"35", ptr addrspace(5) %"54", align 4 store ptr addrspace(1) %"62", ptr addrspace(5) %"53", align 8
%"65" = load i32, ptr addrspace(5) %"54", align 4 %"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
%"64" = zext i32 %"65" to i64 store i32 %"36", ptr addrspace(5) %"55", align 4
store i64 %"64", ptr addrspace(5) %"55", align 4 %"66" = load i32, ptr addrspace(5) %"55", align 4
%"67" = load i64, ptr addrspace(5) %"49", align 4 %"65" = zext i32 %"66" to i64
%"68" = load i64, ptr addrspace(5) %"55", align 4 store i64 %"65", ptr addrspace(5) %"56", align 4
%"78" = sub i64 %"67", %"68" %"68" = load i64, ptr addrspace(5) %"50", align 4
store i64 %"78", ptr addrspace(5) %"50", align 4 %"69" = load i64, ptr addrspace(5) %"56", align 4
%"70" = load i64, ptr addrspace(5) %"52", align 4 %"79" = sub i64 %"68", %"69"
%"71" = load i64, ptr addrspace(5) %"55", align 4 store i64 %"79", ptr addrspace(5) %"51", align 4
%"81" = sub i64 %"70", %"71" %"71" = load i64, ptr addrspace(5) %"53", align 4
store i64 %"81", ptr addrspace(5) %"53", align 4 %"72" = load i64, ptr addrspace(5) %"56", align 4
%"72" = load i64, ptr addrspace(5) %"50", align 4 %"82" = sub i64 %"71", %"72"
%"84" = inttoptr i64 %"72" to ptr addrspace(1) store i64 %"82", ptr addrspace(5) %"54", align 4
%"37" = getelementptr inbounds i8, ptr addrspace(1) %"84", i64 0 %"73" = load i64, ptr addrspace(5) %"51", align 4
%"73" = load i64, ptr addrspace(1) %"37", align 4 %"85" = inttoptr i64 %"73" to ptr addrspace(1)
store i64 %"73", ptr addrspace(5) %"56", align 4 %"38" = getelementptr inbounds i8, ptr addrspace(1) %"85", i64 0
%"74" = load i64, ptr addrspace(5) %"53", align 4 %"74" = load i64, ptr addrspace(1) %"38", align 4
%"85" = inttoptr i64 %"74" to ptr addrspace(1) store i64 %"74", ptr addrspace(5) %"57", align 4
%"39" = getelementptr inbounds i8, ptr addrspace(1) %"85", i64 0 %"75" = load i64, ptr addrspace(5) %"54", align 4
%"75" = load i64, ptr addrspace(5) %"56", align 4 %"86" = inttoptr i64 %"75" to ptr addrspace(1)
store i64 %"75", ptr addrspace(1) %"39", align 4 %"40" = getelementptr inbounds i8, ptr addrspace(1) %"86", i64 0
%"76" = load i64, ptr addrspace(5) %"57", align 4
store i64 %"76", ptr addrspace(1) %"40", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"36", align 4 br label %"60"
store i64 %"43", ptr addrspace(5) %"38", align 4
"60": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"37", align 4 %"44" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"44", ptr addrspace(5) %"39", align 4 store i64 %"44", ptr addrspace(5) %"39", align 4
%"46" = load i64, ptr addrspace(5) %"38", align 4 %"45" = load i64, ptr addrspace(4) %"38", align 4
%2 = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"40", align 4
%"53" = addrspacecast ptr %2 to ptr addrspace(1) %"47" = load i64, ptr addrspace(5) %"39", align 4
store ptr addrspace(1) %"53", ptr addrspace(5) %"40", align 8 %2 = inttoptr i64 %"47" to ptr
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"54" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"48" to ptr store ptr addrspace(1) %"54", ptr addrspace(5) %"41", align 8
%"55" = addrspacecast ptr %3 to ptr addrspace(1) %"49" = load i64, ptr addrspace(5) %"40", align 4
store ptr addrspace(1) %"55", ptr addrspace(5) %"41", align 8 %3 = inttoptr i64 %"49" to ptr
%"50" = load i64, ptr addrspace(5) %"40", align 4 %"56" = addrspacecast ptr %3 to ptr addrspace(1)
%"57" = inttoptr i64 %"50" to ptr addrspace(1) store ptr addrspace(1) %"56", ptr addrspace(5) %"42", align 8
%"49" = load i64, ptr addrspace(1) %"57", align 4
store i64 %"49", ptr addrspace(5) %"42", align 4
%"51" = load i64, ptr addrspace(5) %"41", align 4 %"51" = load i64, ptr addrspace(5) %"41", align 4
%"52" = load i64, ptr addrspace(5) %"42", align 4
%"58" = inttoptr i64 %"51" to ptr addrspace(1) %"58" = inttoptr i64 %"51" to ptr addrspace(1)
store i64 %"52", ptr addrspace(1) %"58", align 4 %"50" = load i64, ptr addrspace(1) %"58", align 4
store i64 %"50", ptr addrspace(5) %"43", align 4
%"52" = load i64, ptr addrspace(5) %"42", align 4
%"53" = load i64, ptr addrspace(5) %"43", align 4
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
store i64 %"53", ptr addrspace(1) %"59", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,45 +10,49 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5) %"42" = alloca i64, align 8, addrspace(5)
%"43" = alloca i64, align 8, addrspace(5) %"43" = alloca i64, align 8, addrspace(5)
%"44" = alloca i64, align 8, addrspace(5) %"44" = alloca i64, align 8, addrspace(5)
%"45" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"45" = load i64, ptr addrspace(4) %"37", align 4 br label %"68"
store i64 %"45", ptr addrspace(5) %"39", align 4
"68": ; preds = %1
%"46" = load i64, ptr addrspace(4) %"38", align 4 %"46" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"46", ptr addrspace(5) %"40", align 4 store i64 %"46", ptr addrspace(5) %"40", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"47" = load i64, ptr addrspace(4) %"39", align 4
%2 = inttoptr i64 %"48" to ptr store i64 %"47", ptr addrspace(5) %"41", align 4
%"61" = addrspacecast ptr %2 to ptr addrspace(1) %"49" = load i64, ptr addrspace(5) %"40", align 4
store ptr addrspace(1) %"61", ptr addrspace(5) %"41", align 8 %2 = inttoptr i64 %"49" to ptr
%"50" = load i64, ptr addrspace(5) %"40", align 4 %"62" = addrspacecast ptr %2 to ptr addrspace(1)
%3 = inttoptr i64 %"50" to ptr store ptr addrspace(1) %"62", ptr addrspace(5) %"42", align 8
%"63" = addrspacecast ptr %3 to ptr addrspace(1) %"51" = load i64, ptr addrspace(5) %"41", align 4
store ptr addrspace(1) %"63", ptr addrspace(5) %"42", align 8 %3 = inttoptr i64 %"51" to ptr
%"52" = load i64, ptr addrspace(5) %"41", align 4 %"64" = addrspacecast ptr %3 to ptr addrspace(1)
store ptr addrspace(1) %"64", ptr addrspace(5) %"43", align 8
%"53" = load i64, ptr addrspace(5) %"42", align 4 %"53" = load i64, ptr addrspace(5) %"42", align 4
%"51" = add i64 %"52", %"53" %"54" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"51", ptr addrspace(5) %"43", align 4 %"52" = add i64 %"53", %"54"
%"55" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"52", ptr addrspace(5) %"44", align 4
%"56" = load i64, ptr addrspace(5) %"42", align 4 %"56" = load i64, ptr addrspace(5) %"42", align 4
%"54" = sub i64 %"55", %"56" %"57" = load i64, ptr addrspace(5) %"43", align 4
store i64 %"54", ptr addrspace(5) %"43", align 4 %"55" = sub i64 %"56", %"57"
%"58" = load i64, ptr addrspace(5) %"41", align 4 store i64 %"55", ptr addrspace(5) %"44", align 4
%"65" = inttoptr i64 %"58" to ptr addrspace(1)
%"57" = load i64, ptr addrspace(1) %"65", align 4
store i64 %"57", ptr addrspace(5) %"44", align 4
%"59" = load i64, ptr addrspace(5) %"42", align 4 %"59" = load i64, ptr addrspace(5) %"42", align 4
%"60" = load i64, ptr addrspace(5) %"44", align 4
%"66" = inttoptr i64 %"59" to ptr addrspace(1) %"66" = inttoptr i64 %"59" to ptr addrspace(1)
store i64 %"60", ptr addrspace(1) %"66", align 4 %"58" = load i64, ptr addrspace(1) %"66", align 4
store i64 %"58", ptr addrspace(5) %"45", align 4
%"60" = load i64, ptr addrspace(5) %"43", align 4
%"61" = load i64, ptr addrspace(5) %"45", align 4
%"67" = inttoptr i64 %"60" to ptr addrspace(1)
store i64 %"61", ptr addrspace(1) %"67", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i64, align 8, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"53"
store i64 %"42", ptr addrspace(5) %"38", align 4
"53": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr
%"44" = load i64, ptr %"50", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"47" = load i64, ptr addrspace(5) %"40", align 4 %"46" = load i64, ptr addrspace(5) %"39", align 4
%"46" = sub i64 %"47", 1 %"51" = inttoptr i64 %"46" to ptr
store i64 %"46", ptr addrspace(5) %"41", align 4 %"45" = load i64, ptr %"51", align 4
%"48" = load i64, ptr addrspace(5) %"39", align 4 store i64 %"45", ptr addrspace(5) %"41", align 4
%"49" = load i64, ptr addrspace(5) %"41", align 4 %"48" = load i64, ptr addrspace(5) %"41", align 4
%"51" = inttoptr i64 %"48" to ptr %"47" = sub i64 %"48", 1
store i64 %"49", ptr %"51", align 4 store i64 %"47", ptr addrspace(5) %"42", align 4
%"49" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i64, ptr addrspace(5) %"42", align 4
%"52" = inttoptr i64 %"49" to ptr
store i64 %"50", ptr %"52", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,70 +10,77 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define <2 x i32> @__zluda_ptx_impl_impl(<2 x i32> %"9") #0 { define <2 x i32> @impl(<2 x i32> %"9") #0 {
%"49" = alloca <2 x i32>, align 8, addrspace(5)
%"50" = alloca <2 x i32>, align 8, addrspace(5) %"50" = alloca <2 x i32>, align 8, addrspace(5)
%"51" = alloca i32, align 4, addrspace(5) %"51" = alloca <2 x i32>, align 8, addrspace(5)
%"52" = alloca i32, align 4, addrspace(5) %"52" = alloca i32, align 4, addrspace(5)
%"53" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"37" = extractelement <2 x i32> %"9", i8 0 br label %"91"
store i32 %"37", ptr addrspace(5) %"51", align 4
%"38" = extractelement <2 x i32> %"9", i8 1 "91": ; preds = %1
%"38" = extractelement <2 x i32> %"9", i8 0
store i32 %"38", ptr addrspace(5) %"52", align 4 store i32 %"38", ptr addrspace(5) %"52", align 4
%"56" = load i32, ptr addrspace(5) %"51", align 4 %"39" = extractelement <2 x i32> %"9", i8 1
store i32 %"39", ptr addrspace(5) %"53", align 4
%"57" = load i32, ptr addrspace(5) %"52", align 4 %"57" = load i32, ptr addrspace(5) %"52", align 4
%"55" = add i32 %"56", %"57" %"58" = load i32, ptr addrspace(5) %"53", align 4
store i32 %"55", ptr addrspace(5) %"52", align 4 %"56" = add i32 %"57", %"58"
%"58" = load i32, ptr addrspace(5) %"52", align 4 store i32 %"56", ptr addrspace(5) %"53", align 4
%"60" = load <2 x i32>, ptr addrspace(5) %"50", align 8 %"59" = load i32, ptr addrspace(5) %"53", align 4
%"59" = insertelement <2 x i32> %"60", i32 %"58", i8 0 %"61" = load <2 x i32>, ptr addrspace(5) %"51", align 8
store <2 x i32> %"59", ptr addrspace(5) %"50", align 8 %"60" = insertelement <2 x i32> %"61", i32 %"59", i8 0
%"61" = load i32, ptr addrspace(5) %"52", align 4 store <2 x i32> %"60", ptr addrspace(5) %"51", align 8
%"63" = load <2 x i32>, ptr addrspace(5) %"50", align 8 %"62" = load i32, ptr addrspace(5) %"53", align 4
%"62" = insertelement <2 x i32> %"63", i32 %"61", i8 1 %"64" = load <2 x i32>, ptr addrspace(5) %"51", align 8
store <2 x i32> %"62", ptr addrspace(5) %"50", align 8 %"63" = insertelement <2 x i32> %"64", i32 %"62", i8 1
%"64" = load <2 x i32>, ptr addrspace(5) %"50", align 8 store <2 x i32> %"63", ptr addrspace(5) %"51", align 8
%"42" = extractelement <2 x i32> %"64", i8 1 %"65" = load <2 x i32>, ptr addrspace(5) %"51", align 8
%"66" = load <2 x i32>, ptr addrspace(5) %"50", align 8 %"43" = extractelement <2 x i32> %"65", i8 1
%"65" = insertelement <2 x i32> %"66", i32 %"42", i8 0 %"67" = load <2 x i32>, ptr addrspace(5) %"51", align 8
store <2 x i32> %"65", ptr addrspace(5) %"50", align 8 %"66" = insertelement <2 x i32> %"67", i32 %"43", i8 0
%"68" = load <2 x i32>, ptr addrspace(5) %"50", align 8 store <2 x i32> %"66", ptr addrspace(5) %"51", align 8
store <2 x i32> %"68", ptr addrspace(5) %"49", align 8 %"69" = load <2 x i32>, ptr addrspace(5) %"51", align 8
%2 = load <2 x i32>, ptr addrspace(5) %"49", align 8 store <2 x i32> %"69", ptr addrspace(5) %"50", align 8
%2 = load <2 x i32>, ptr addrspace(5) %"50", align 8
ret <2 x i32> %2 ret <2 x i32> %2
} }
define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"69", ptr addrspace(4) byref(i64) %"70") #0 { define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"70", ptr addrspace(4) byref(i64) %"71") #1 {
%"71" = alloca i64, align 8, addrspace(5)
%"72" = alloca i64, align 8, addrspace(5) %"72" = alloca i64, align 8, addrspace(5)
%"73" = alloca <2 x i32>, align 8, addrspace(5) %"73" = alloca i64, align 8, addrspace(5)
%"74" = alloca i32, align 4, addrspace(5) %"74" = alloca <2 x i32>, align 8, addrspace(5)
%"75" = alloca i32, align 4, addrspace(5) %"75" = alloca i32, align 4, addrspace(5)
%"76" = alloca i64, align 8, addrspace(5) %"76" = alloca i32, align 4, addrspace(5)
%"77" = alloca i64, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"77" = load i64, ptr addrspace(4) %"69", align 4 br label %"92"
store i64 %"77", ptr addrspace(5) %"71", align 4
"92": ; preds = %1
%"78" = load i64, ptr addrspace(4) %"70", align 4 %"78" = load i64, ptr addrspace(4) %"70", align 4
store i64 %"78", ptr addrspace(5) %"72", align 4 store i64 %"78", ptr addrspace(5) %"72", align 4
%"80" = load i64, ptr addrspace(5) %"71", align 4 %"79" = load i64, ptr addrspace(4) %"71", align 4
%"87" = inttoptr i64 %"80" to ptr store i64 %"79", ptr addrspace(5) %"73", align 4
%"79" = load <2 x i32>, ptr %"87", align 8 %"81" = load i64, ptr addrspace(5) %"72", align 4
store <2 x i32> %"79", ptr addrspace(5) %"73", align 8 %"88" = inttoptr i64 %"81" to ptr
%"82" = load <2 x i32>, ptr addrspace(5) %"73", align 8 %"80" = load <2 x i32>, ptr %"88", align 8
%"81" = call <2 x i32> @__zluda_ptx_impl_impl(<2 x i32> %"82") store <2 x i32> %"80", ptr addrspace(5) %"74", align 8
store <2 x i32> %"81", ptr addrspace(5) %"73", align 8 %"83" = load <2 x i32>, ptr addrspace(5) %"74", align 8
%"84" = load <2 x i32>, ptr addrspace(5) %"73", align 8 %"82" = call <2 x i32> @impl(<2 x i32> %"83")
%"88" = bitcast <2 x i32> %"84" to i64 store <2 x i32> %"82", ptr addrspace(5) %"74", align 8
store i64 %"88", ptr addrspace(5) %"76", align 4 %"85" = load <2 x i32>, ptr addrspace(5) %"74", align 8
%"85" = load i64, ptr addrspace(5) %"72", align 4 %"89" = bitcast <2 x i32> %"85" to i64
%"86" = load <2 x i32>, ptr addrspace(5) %"73", align 8 store i64 %"89", ptr addrspace(5) %"77", align 4
%"89" = inttoptr i64 %"85" to ptr %"86" = load i64, ptr addrspace(5) %"73", align 4
store <2 x i32> %"86", ptr %"89", align 8 %"87" = load <2 x i32>, ptr addrspace(5) %"74", align 8
%"90" = inttoptr i64 %"86" to ptr
store <2 x i32> %"87", ptr %"90", align 8
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,30 +10,34 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 { define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
%"38" = alloca i64, align 8, addrspace(5)
%"39" = alloca i64, align 8, addrspace(5) %"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca <4 x i32>, align 16, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca <4 x i32>, align 16, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"42" = load i64, ptr addrspace(4) %"36", align 4 br label %"55"
store i64 %"42", ptr addrspace(5) %"38", align 4
"55": ; preds = %1
%"43" = load i64, ptr addrspace(4) %"37", align 4 %"43" = load i64, ptr addrspace(4) %"37", align 4
store i64 %"43", ptr addrspace(5) %"39", align 4 store i64 %"43", ptr addrspace(5) %"39", align 4
%"45" = load i64, ptr addrspace(5) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
%"50" = inttoptr i64 %"45" to ptr store i64 %"44", ptr addrspace(5) %"40", align 4
%"44" = load <4 x i32>, ptr %"50", align 16 %"46" = load i64, ptr addrspace(5) %"39", align 4
store <4 x i32> %"44", ptr addrspace(5) %"40", align 16 %"51" = inttoptr i64 %"46" to ptr
%"46" = load <4 x i32>, ptr addrspace(5) %"40", align 16 %"45" = load <4 x i32>, ptr %"51", align 16
%"29" = extractelement <4 x i32> %"46", i8 3 store <4 x i32> %"45", ptr addrspace(5) %"41", align 16
store i32 %"29", ptr addrspace(5) %"41", align 4 %"47" = load <4 x i32>, ptr addrspace(5) %"41", align 16
%"48" = load i64, ptr addrspace(5) %"39", align 4 %"30" = extractelement <4 x i32> %"47", i8 3
%"49" = load i32, ptr addrspace(5) %"41", align 4 store i32 %"30", ptr addrspace(5) %"42", align 4
%"53" = inttoptr i64 %"48" to ptr %"49" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"49", ptr %"53", align 4 %"50" = load i32, ptr addrspace(5) %"42", align 4
%"54" = inttoptr i64 %"49" to ptr
store i32 %"50", ptr %"54", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,86 +10,90 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #0 { define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
%"46" = alloca i64, align 8, addrspace(5)
%"47" = alloca i64, align 8, addrspace(5) %"47" = alloca i64, align 8, addrspace(5)
%"48" = alloca i16, align 2, addrspace(5) %"48" = alloca i64, align 8, addrspace(5)
%"49" = alloca i16, align 2, addrspace(5) %"49" = alloca i16, align 2, addrspace(5)
%"50" = alloca i16, align 2, addrspace(5) %"50" = alloca i16, align 2, addrspace(5)
%"51" = alloca i16, align 2, addrspace(5) %"51" = alloca i16, align 2, addrspace(5)
%"52" = alloca <4 x i16>, align 8, addrspace(5) %"52" = alloca i16, align 2, addrspace(5)
%"53" = alloca <4 x i16>, align 8, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"53" = load i64, ptr addrspace(4) %"44", align 4 br label %"94"
store i64 %"53", ptr addrspace(5) %"46", align 4
"94": ; preds = %1
%"54" = load i64, ptr addrspace(4) %"45", align 4 %"54" = load i64, ptr addrspace(4) %"45", align 4
store i64 %"54", ptr addrspace(5) %"47", align 4 store i64 %"54", ptr addrspace(5) %"47", align 4
%"55" = load i64, ptr addrspace(5) %"46", align 4 %"55" = load i64, ptr addrspace(4) %"46", align 4
%"83" = inttoptr i64 %"55" to ptr addrspace(1) store i64 %"55", ptr addrspace(5) %"48", align 4
%"32" = load <4 x i8>, ptr addrspace(1) %"83", align 4 %"56" = load i64, ptr addrspace(5) %"47", align 4
%"84" = extractelement <4 x i8> %"32", i8 0 %"84" = inttoptr i64 %"56" to ptr addrspace(1)
%"85" = extractelement <4 x i8> %"32", i8 1 %"33" = load <4 x i8>, ptr addrspace(1) %"84", align 4
%"86" = extractelement <4 x i8> %"32", i8 2 %"85" = extractelement <4 x i8> %"33", i8 0
%"87" = extractelement <4 x i8> %"32", i8 3 %"86" = extractelement <4 x i8> %"33", i8 1
%"56" = zext i8 %"84" to i16 %"87" = extractelement <4 x i8> %"33", i8 2
%"88" = extractelement <4 x i8> %"33", i8 3
%"57" = zext i8 %"85" to i16 %"57" = zext i8 %"85" to i16
%"58" = zext i8 %"86" to i16 %"58" = zext i8 %"86" to i16
%"59" = zext i8 %"87" to i16 %"59" = zext i8 %"87" to i16
store i16 %"56", ptr addrspace(5) %"48", align 2 %"60" = zext i8 %"88" to i16
store i16 %"57", ptr addrspace(5) %"49", align 2 store i16 %"57", ptr addrspace(5) %"49", align 2
store i16 %"58", ptr addrspace(5) %"50", align 2 store i16 %"58", ptr addrspace(5) %"50", align 2
store i16 %"59", ptr addrspace(5) %"51", align 2 store i16 %"59", ptr addrspace(5) %"51", align 2
%"60" = load i16, ptr addrspace(5) %"49", align 2 store i16 %"60", ptr addrspace(5) %"52", align 2
%"61" = load i16, ptr addrspace(5) %"50", align 2 %"61" = load i16, ptr addrspace(5) %"50", align 2
%"62" = load i16, ptr addrspace(5) %"51", align 2 %"62" = load i16, ptr addrspace(5) %"51", align 2
%"63" = load i16, ptr addrspace(5) %"48", align 2 %"63" = load i16, ptr addrspace(5) %"52", align 2
%2 = insertelement <4 x i16> undef, i16 %"60", i8 0 %"64" = load i16, ptr addrspace(5) %"49", align 2
%3 = insertelement <4 x i16> %2, i16 %"61", i8 1 %2 = insertelement <4 x i16> undef, i16 %"61", i8 0
%4 = insertelement <4 x i16> %3, i16 %"62", i8 2 %3 = insertelement <4 x i16> %2, i16 %"62", i8 1
%"33" = insertelement <4 x i16> %4, i16 %"63", i8 3 %4 = insertelement <4 x i16> %3, i16 %"63", i8 2
store <4 x i16> %"33", ptr addrspace(5) %"52", align 8 %"34" = insertelement <4 x i16> %4, i16 %"64", i8 3
%"65" = load <4 x i16>, ptr addrspace(5) %"52", align 8 store <4 x i16> %"34", ptr addrspace(5) %"53", align 8
%"66" = extractelement <4 x i16> %"65", i8 0 %"66" = load <4 x i16>, ptr addrspace(5) %"53", align 8
%"67" = extractelement <4 x i16> %"65", i8 1 %"67" = extractelement <4 x i16> %"66", i8 0
%"68" = extractelement <4 x i16> %"65", i8 2 %"68" = extractelement <4 x i16> %"66", i8 1
%"69" = extractelement <4 x i16> %"65", i8 3 %"69" = extractelement <4 x i16> %"66", i8 2
store i16 %"66", ptr addrspace(5) %"50", align 2 %"70" = extractelement <4 x i16> %"66", i8 3
store i16 %"67", ptr addrspace(5) %"51", align 2 store i16 %"67", ptr addrspace(5) %"51", align 2
store i16 %"68", ptr addrspace(5) %"48", align 2 store i16 %"68", ptr addrspace(5) %"52", align 2
store i16 %"69", ptr addrspace(5) %"49", align 2 store i16 %"69", ptr addrspace(5) %"49", align 2
%"70" = load i16, ptr addrspace(5) %"50", align 2 store i16 %"70", ptr addrspace(5) %"50", align 2
%"71" = load i16, ptr addrspace(5) %"51", align 2 %"71" = load i16, ptr addrspace(5) %"51", align 2
%"72" = load i16, ptr addrspace(5) %"48", align 2 %"72" = load i16, ptr addrspace(5) %"52", align 2
%"73" = load i16, ptr addrspace(5) %"49", align 2 %"73" = load i16, ptr addrspace(5) %"49", align 2
%5 = insertelement <4 x i16> undef, i16 %"70", i8 0 %"74" = load i16, ptr addrspace(5) %"50", align 2
%6 = insertelement <4 x i16> %5, i16 %"71", i8 1 %5 = insertelement <4 x i16> undef, i16 %"71", i8 0
%7 = insertelement <4 x i16> %6, i16 %"72", i8 2 %6 = insertelement <4 x i16> %5, i16 %"72", i8 1
%"36" = insertelement <4 x i16> %7, i16 %"73", i8 3 %7 = insertelement <4 x i16> %6, i16 %"73", i8 2
%"74" = extractelement <4 x i16> %"36", i8 0 %"37" = insertelement <4 x i16> %7, i16 %"74", i8 3
%"75" = extractelement <4 x i16> %"36", i8 1 %"75" = extractelement <4 x i16> %"37", i8 0
%"76" = extractelement <4 x i16> %"36", i8 2 %"76" = extractelement <4 x i16> %"37", i8 1
%"77" = extractelement <4 x i16> %"36", i8 3 %"77" = extractelement <4 x i16> %"37", i8 2
store i16 %"74", ptr addrspace(5) %"51", align 2 %"78" = extractelement <4 x i16> %"37", i8 3
store i16 %"75", ptr addrspace(5) %"48", align 2 store i16 %"75", ptr addrspace(5) %"52", align 2
store i16 %"76", ptr addrspace(5) %"49", align 2 store i16 %"76", ptr addrspace(5) %"49", align 2
store i16 %"77", ptr addrspace(5) %"50", align 2 store i16 %"77", ptr addrspace(5) %"50", align 2
%"78" = load i16, ptr addrspace(5) %"48", align 2 store i16 %"78", ptr addrspace(5) %"51", align 2
%"79" = load i16, ptr addrspace(5) %"49", align 2 %"79" = load i16, ptr addrspace(5) %"49", align 2
%"80" = load i16, ptr addrspace(5) %"50", align 2 %"80" = load i16, ptr addrspace(5) %"50", align 2
%"81" = load i16, ptr addrspace(5) %"51", align 2 %"81" = load i16, ptr addrspace(5) %"51", align 2
%"88" = trunc i16 %"78" to i8 %"82" = load i16, ptr addrspace(5) %"52", align 2
%"89" = trunc i16 %"79" to i8 %"89" = trunc i16 %"79" to i8
%"90" = trunc i16 %"80" to i8 %"90" = trunc i16 %"80" to i8
%"91" = trunc i16 %"81" to i8 %"91" = trunc i16 %"81" to i8
%8 = insertelement <4 x i8> undef, i8 %"88", i8 0 %"92" = trunc i16 %"82" to i8
%9 = insertelement <4 x i8> %8, i8 %"89", i8 1 %8 = insertelement <4 x i8> undef, i8 %"89", i8 0
%10 = insertelement <4 x i8> %9, i8 %"90", i8 2 %9 = insertelement <4 x i8> %8, i8 %"90", i8 1
%"37" = insertelement <4 x i8> %10, i8 %"91", i8 3 %10 = insertelement <4 x i8> %9, i8 %"91", i8 2
%"82" = load i64, ptr addrspace(5) %"47", align 4 %"38" = insertelement <4 x i8> %10, i8 %"92", i8 3
%"92" = inttoptr i64 %"82" to ptr addrspace(1) %"83" = load i64, ptr addrspace(5) %"48", align 4
store <4 x i8> %"37", ptr addrspace(1) %"92", align 4 %"93" = inttoptr i64 %"83" to ptr addrspace(1)
store <4 x i8> %"38", ptr addrspace(1) %"93", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }

View file

@ -10,36 +10,40 @@ declare i32 @__zluda_ptx_impl_sreg_clock() #0
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0 declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 { define amdgpu_kernel void @xor(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
%"39" = alloca i64, align 8, addrspace(5)
%"40" = alloca i64, align 8, addrspace(5) %"40" = alloca i64, align 8, addrspace(5)
%"41" = alloca i32, align 4, addrspace(5) %"41" = alloca i64, align 8, addrspace(5)
%"42" = alloca i32, align 4, addrspace(5) %"42" = alloca i32, align 4, addrspace(5)
%"43" = alloca i32, align 4, addrspace(5)
br label %1 br label %1
1: ; preds = %0 1: ; preds = %0
%"43" = load i64, ptr addrspace(4) %"37", align 4 br label %"58"
store i64 %"43", ptr addrspace(5) %"39", align 4
"58": ; preds = %1
%"44" = load i64, ptr addrspace(4) %"38", align 4 %"44" = load i64, ptr addrspace(4) %"38", align 4
store i64 %"44", ptr addrspace(5) %"40", align 4 store i64 %"44", ptr addrspace(5) %"40", align 4
%"46" = load i64, ptr addrspace(5) %"39", align 4 %"45" = load i64, ptr addrspace(4) %"39", align 4
%"54" = inttoptr i64 %"46" to ptr store i64 %"45", ptr addrspace(5) %"41", align 4
%"45" = load i32, ptr %"54", align 4 %"47" = load i64, ptr addrspace(5) %"40", align 4
store i32 %"45", ptr addrspace(5) %"41", align 4
%"47" = load i64, ptr addrspace(5) %"39", align 4
%"55" = inttoptr i64 %"47" to ptr %"55" = inttoptr i64 %"47" to ptr
%"30" = getelementptr inbounds i8, ptr %"55", i64 4 %"46" = load i32, ptr %"55", align 4
%"48" = load i32, ptr %"30", align 4 store i32 %"46", ptr addrspace(5) %"42", align 4
store i32 %"48", ptr addrspace(5) %"42", align 4 %"48" = load i64, ptr addrspace(5) %"40", align 4
%"50" = load i32, ptr addrspace(5) %"41", align 4 %"56" = inttoptr i64 %"48" to ptr
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
%"49" = load i32, ptr %"31", align 4
store i32 %"49", ptr addrspace(5) %"43", align 4
%"51" = load i32, ptr addrspace(5) %"42", align 4 %"51" = load i32, ptr addrspace(5) %"42", align 4
%"49" = xor i32 %"50", %"51" %"52" = load i32, ptr addrspace(5) %"43", align 4
store i32 %"49", ptr addrspace(5) %"41", align 4 %"50" = xor i32 %"51", %"52"
%"52" = load i64, ptr addrspace(5) %"40", align 4 store i32 %"50", ptr addrspace(5) %"42", align 4
%"53" = load i32, ptr addrspace(5) %"41", align 4 %"53" = load i64, ptr addrspace(5) %"41", align 4
%"56" = inttoptr i64 %"52" to ptr %"54" = load i32, ptr addrspace(5) %"42", align 4
store i32 %"53", ptr %"56", align 4 %"57" = inttoptr i64 %"53" to ptr
store i32 %"54", ptr %"57", align 4
ret void ret void
} }
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "no-trapping-math"="true" "uniform-work-group-size"="true" } attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }