mirror of
https://github.com/vosen/ZLUDA.git
synced 2025-04-28 21:47:57 +03:00
Update tests, renamed and clean up instruction mode -> global mode pass
This commit is contained in:
parent
826e98ba48
commit
618e47ddc6
102 changed files with 2920 additions and 4493 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -807,7 +807,6 @@ dependencies = [
|
|||
"ptx_parser",
|
||||
"quick-error",
|
||||
"rustc-hash 2.0.0",
|
||||
"smallvec",
|
||||
"strum",
|
||||
"strum_macros",
|
||||
"tempfile",
|
||||
|
|
|
@ -20,7 +20,6 @@ strum_macros = "0.26"
|
|||
petgraph = "0.7.1"
|
||||
microlp = "0.2.10"
|
||||
int-enum = "1.1"
|
||||
smallvec = "1.13"
|
||||
unwrap_or = "1.0.1"
|
||||
|
||||
[dev-dependencies]
|
||||
|
|
|
@ -452,22 +452,6 @@ impl<'a, 'input> ModuleEmitContext<'a, 'input> {
|
|||
}
|
||||
}
|
||||
|
||||
fn fun_name(
|
||||
method: Function2<ptx_parser::Instruction<SpirvWord>, SpirvWord>,
|
||||
method_emitter: &mut MethodEmitContext<'_>,
|
||||
) -> Result<(), TranslateError> {
|
||||
Ok(if method.is_kernel {
|
||||
if method.rounding_mode_f32 != ast::RoundingMode::NearestEven
|
||||
|| method.rounding_mode_f16f64 != ast::RoundingMode::NearestEven
|
||||
{
|
||||
method_emitter.emit_set_mode(ModeRegister::Rounding {
|
||||
f32: method.rounding_mode_f32,
|
||||
f16f64: method.rounding_mode_f16f64,
|
||||
})?;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn llvm_ftz(ftz: bool) -> &'static str {
|
||||
if ftz {
|
||||
"preserve-sign"
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use super::BrachCondition;
|
||||
use super::Directive2;
|
||||
use super::Function2;
|
||||
use super::GlobalStringIdentResolver2;
|
||||
use super::ModeRegister;
|
||||
use super::SpirvWord;
|
||||
use super::Statement;
|
||||
|
@ -16,7 +17,6 @@ use petgraph::Graph;
|
|||
use ptx_parser as ast;
|
||||
use rustc_hash::FxHashMap;
|
||||
use rustc_hash::FxHashSet;
|
||||
use smallvec::SmallVec;
|
||||
use std::hash::Hash;
|
||||
use std::iter;
|
||||
use std::mem;
|
||||
|
@ -115,108 +115,6 @@ struct ResolvedInstructionModes {
|
|||
rounding_f16f64: Resolved<ast::RoundingMode>,
|
||||
}
|
||||
|
||||
/*
|
||||
struct ExitInstructionModes {
|
||||
denormal_f32: Resolved<bool>,
|
||||
denormal_f16f64: Resolved<bool>,
|
||||
rounding_f32: Resolved<ast::RoundingMode>,
|
||||
rounding_f16f64: Resolved<ast::RoundingMode>,
|
||||
}
|
||||
|
||||
impl ExitInstructionModes {
|
||||
fn from_node(
|
||||
denormal: &TwinModeInsertions<DenormalMode>,
|
||||
rounding: &TwinModeInsertions<RoundingMode>,
|
||||
Node {
|
||||
label: ret_block_name,
|
||||
denormal_f32,
|
||||
denormal_f16f64,
|
||||
rounding_f32,
|
||||
rounding_f16f64,
|
||||
}: &Node,
|
||||
) -> Result<Self, TranslateError> {
|
||||
let denormal_entry = &denormal.basic_blocks;
|
||||
let rounding_entry = &rounding.basic_blocks;
|
||||
let denormal_f32 = match denormal_f32.exit {
|
||||
Some(ExtendedMode::Entry(kernel)) => Resolved::Value(
|
||||
denormal_entry
|
||||
.get(&kernel)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.ok_or_else(error_unreachable)?
|
||||
.f32
|
||||
.to_ftz(),
|
||||
),
|
||||
Some(ExtendedMode::BasicBlock(value)) => Resolved::Value(value.to_ftz()),
|
||||
None => denormal_entry
|
||||
.get(ret_block_name)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.map(|m| m.f32.to_ftz()),
|
||||
};
|
||||
/*
|
||||
let denormal_f16f64 = match denormal_f16f64.exit {
|
||||
None => denormal_entry
|
||||
.get(ret_block_name)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.map(|m| m.f16f64.to_ftz()),
|
||||
Some(ExtendedMode::Entry(kernel)) => Some(
|
||||
denormal_entry
|
||||
.get(&kernel)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.unwrap()
|
||||
.f16f64
|
||||
.to_ftz(),
|
||||
),
|
||||
Some(ExtendedMode::BasicBlock(value)) => Some(value.to_ftz()),
|
||||
};
|
||||
let rounding_f32 = match rounding_f32.exit {
|
||||
None => rounding_entry
|
||||
.get(ret_block_name)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.map(|m| m.f32.to_ast()),
|
||||
Some(ExtendedMode::Entry(kernel)) => Some(
|
||||
rounding_entry
|
||||
.get(&kernel)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.unwrap()
|
||||
.f32
|
||||
.to_ast(),
|
||||
),
|
||||
Some(ExtendedMode::BasicBlock(value)) => Some(value.to_ast()),
|
||||
};
|
||||
let rounding_f16f64 = match rounding_f16f64.exit {
|
||||
None => rounding_entry
|
||||
.get(ret_block_name)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.map(|m| m.f16f64.to_ast()),
|
||||
Some(ExtendedMode::Entry(kernel)) => Some(
|
||||
rounding_entry
|
||||
.get(&kernel)
|
||||
.ok_or_else(error_unreachable)?
|
||||
.twin_mode
|
||||
.unwrap()
|
||||
.f16f64
|
||||
.to_ast(),
|
||||
),
|
||||
Some(ExtendedMode::BasicBlock(value)) => Some(value.to_ast()),
|
||||
};
|
||||
*/
|
||||
Ok(Self {
|
||||
denormal_f32,
|
||||
denormal_f16f64,
|
||||
rounding_f32,
|
||||
rounding_f16f64,
|
||||
})
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
impl InstructionModes {
|
||||
fn fold_into(self, entry: &mut Self, exit: &mut Self) {
|
||||
fn set_if_none<T: Copy>(source: &mut Option<T>, value: Option<T>) {
|
||||
|
@ -405,6 +303,15 @@ impl ControlFlowGraph {
|
|||
node.rounding_f16f64.exit = exit.rounding_f16f64.map(ExtendedMode::BasicBlock);
|
||||
}
|
||||
|
||||
// Our control flow graph expresses function calls as edges in the graph.
|
||||
// While building the graph it's always possible to create the edge from
|
||||
// caller basic block to a function, but it's impossible to construct an
|
||||
// edge from the function return basic block to after-call basic block in
|
||||
// caller (the function might have been just a declaration for now).
|
||||
// That's why we collect:
|
||||
// * Which basic blocks does a function return to
|
||||
// * What is thew functin's return basic blocks
|
||||
// and then, after visiting all functions, we add the missing edges here
|
||||
fn fixup_function_calls(&mut self) -> Result<(), TranslateError> {
|
||||
for (fn_, follow_on_labels) in self.call_returns.iter() {
|
||||
let connecting_bb = match self.functions_rets.get(fn_) {
|
||||
|
@ -417,34 +324,25 @@ impl ControlFlowGraph {
|
|||
}
|
||||
}
|
||||
Ok(())
|
||||
/*
|
||||
for (function, source) in self.functions_rets.iter() {
|
||||
for target in self
|
||||
.call_returns
|
||||
.get(function)
|
||||
.iter()
|
||||
.map(|vec| vec.iter())
|
||||
.flatten()
|
||||
.copied()
|
||||
{
|
||||
self.graph.add_edge(*source, target, ());
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
struct ResolvedControlFlowGraph {
|
||||
entry_points: FxHashMap<SpirvWord, NodeIndex>,
|
||||
basic_blocks: FxHashMap<SpirvWord, NodeIndex>,
|
||||
// map function -> return label
|
||||
call_returns: FxHashMap<SpirvWord, Vec<NodeIndex>>,
|
||||
// map function -> return basic block
|
||||
functions_rets: FxHashMap<SpirvWord, NodeIndex>,
|
||||
graph: Graph<ResolvedNode, ()>,
|
||||
}
|
||||
|
||||
impl ResolvedControlFlowGraph {
|
||||
// This function takes the initial control flow graph. Initial control flow
|
||||
// graph only has mode values for basic blocks if any instruction in the
|
||||
// given basic block requires a mode. All the other basic blocks have no
|
||||
// value. This pass resolved the values for all basic blocks. If a basic
|
||||
// block sets no value then and there are multiple incoming edges from
|
||||
// basic block with different values then the value is set to a special
|
||||
// value "Conflict".
|
||||
// After this pass every basic block either has a concrete value or "Conflict"
|
||||
fn new(
|
||||
cfg: ControlFlowGraph,
|
||||
f32_denormal_kernels: &FxHashMap<SpirvWord, DenormalMode>,
|
||||
|
@ -626,9 +524,7 @@ impl ResolvedControlFlowGraph {
|
|||
Err(error_unreachable())
|
||||
} else {
|
||||
Ok(Self {
|
||||
entry_points: cfg.entry_points,
|
||||
basic_blocks: cfg.basic_blocks,
|
||||
call_returns: cfg.call_returns,
|
||||
functions_rets: cfg.functions_rets,
|
||||
graph,
|
||||
})
|
||||
|
@ -706,17 +602,111 @@ impl Node {
|
|||
}
|
||||
}
|
||||
|
||||
trait EnumTuple {
|
||||
const LENGTH: usize;
|
||||
|
||||
fn get(&self, x: usize) -> u8;
|
||||
fn get_mut(&mut self, x: usize) -> &mut u8;
|
||||
// This instruction convert instruction-scoped modes (denormal, rounding) in PTX
|
||||
// to globally-scoped modes as expected by AMD GPUs.
|
||||
// As a simplified example this pass converts this instruction:
|
||||
// add.ftz.rn.f32 %r1, %r2, %r3;
|
||||
// to:
|
||||
// set_ftz_mode true;
|
||||
// set_rnd_mode rn;
|
||||
// add.ftz.rn.f32 %r1, %r2, %r3;
|
||||
pub(crate) fn run<'input>(
|
||||
flat_resolver: &mut GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
let cfg = create_control_flow_graph(&directives)?;
|
||||
let (denormal_f32, denormal_f16f64, rounding_f32, rounding_f16f64) =
|
||||
compute_minimal_mode_insertions(&cfg);
|
||||
let temp = compute_full_mode_insertions(
|
||||
flat_resolver,
|
||||
&directives,
|
||||
cfg,
|
||||
denormal_f32,
|
||||
denormal_f16f64,
|
||||
rounding_f32,
|
||||
rounding_f16f64,
|
||||
)?;
|
||||
apply_global_mode_controls(directives, temp)
|
||||
}
|
||||
|
||||
pub(crate) fn run<'input>(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2<'input>,
|
||||
directives: Vec<super::Directive2<ast::Instruction<SpirvWord>, super::SpirvWord>>,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
// For every basic block this pass computes:
|
||||
// - Name of mode prologue basic blocks. Mode prologue is a basic block which
|
||||
// contains single instruction that sets mode to the desired value. It will
|
||||
// be later inserted just before the basic block and all jumps that require
|
||||
// mode change will go through this basic block
|
||||
// - Entry mode: what is the mode for both f32 and f16f64 at the first instruction.
|
||||
// This will be used when emiting instructions in the basic block. When we
|
||||
// emit an instruction we get its modes, check if they are different and if so
|
||||
// decide: do we emit new mode set statement or we fold into previous mode set.
|
||||
// We don't need to compute exit mode for every basic block because this will be
|
||||
// computed naturally when emitting instructions in a basic block.
|
||||
// Only exception is exit mode for returning (containing instruction `ret;`)
|
||||
// basic blocks for functions.
|
||||
// We need this information to handle call instructions correctly.
|
||||
fn compute_full_mode_insertions(
|
||||
flat_resolver: &mut GlobalStringIdentResolver2,
|
||||
directives: &Vec<Directive2<ptx_parser::Instruction<SpirvWord>, SpirvWord>>,
|
||||
cfg: ControlFlowGraph,
|
||||
denormal_f32: MandatoryModeInsertions<DenormalMode>,
|
||||
denormal_f16f64: MandatoryModeInsertions<DenormalMode>,
|
||||
rounding_f32: MandatoryModeInsertions<RoundingMode>,
|
||||
rounding_f16f64: MandatoryModeInsertions<RoundingMode>,
|
||||
) -> Result<FullModeInsertion, TranslateError> {
|
||||
let cfg = ResolvedControlFlowGraph::new(
|
||||
cfg,
|
||||
&denormal_f32.kernels,
|
||||
&denormal_f16f64.kernels,
|
||||
&rounding_f32.kernels,
|
||||
&rounding_f16f64.kernels,
|
||||
)?;
|
||||
join_modes(
|
||||
flat_resolver,
|
||||
directives,
|
||||
cfg,
|
||||
denormal_f32,
|
||||
denormal_f16f64,
|
||||
rounding_f32,
|
||||
rounding_f16f64,
|
||||
)
|
||||
}
|
||||
|
||||
// This function takes the control flow graph and for each global mode computes:
|
||||
// * Which basic blocks have an incoming edge from at least one basic block with
|
||||
// different mode. That means that we will later need to insert a mode
|
||||
// "prologue": an artifical basic block which sets the mode to the desired
|
||||
// value. All mode-changing edges will be redirected to than basic block
|
||||
// * What is the initial value for the mode in a kernel. Note, that only
|
||||
// computes the initial value if the value is observed by a basic block.
|
||||
// For some kernels the initial value does not matter and in that case a later
|
||||
// pass should use default value
|
||||
fn compute_minimal_mode_insertions(
|
||||
cfg: &ControlFlowGraph,
|
||||
) -> (
|
||||
MandatoryModeInsertions<DenormalMode>,
|
||||
MandatoryModeInsertions<DenormalMode>,
|
||||
MandatoryModeInsertions<RoundingMode>,
|
||||
MandatoryModeInsertions<RoundingMode>,
|
||||
) {
|
||||
let rounding_f32 = compute_single_mode_insertions(cfg, |node| node.rounding_f32);
|
||||
let denormal_f32 = compute_single_mode_insertions(cfg, |node| node.denormal_f32);
|
||||
let denormal_f16f64 = compute_single_mode_insertions(cfg, |node| node.denormal_f16f64);
|
||||
let rounding_f16f64 = compute_single_mode_insertions(cfg, |node| node.rounding_f16f64);
|
||||
let denormal_f32 =
|
||||
optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(denormal_f32);
|
||||
let denormal_f16f64 =
|
||||
optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(denormal_f16f64);
|
||||
let rounding_f32 =
|
||||
optimize_mode_insertions::<RoundingMode, { RoundingMode::COUNT }>(rounding_f32);
|
||||
let rounding_f16f64: MandatoryModeInsertions<RoundingMode> =
|
||||
optimize_mode_insertions::<RoundingMode, { RoundingMode::COUNT }>(rounding_f16f64);
|
||||
(denormal_f32, denormal_f16f64, rounding_f32, rounding_f16f64)
|
||||
}
|
||||
|
||||
// This function creates control flow graph for the whole module. This control
|
||||
// flow graph expresses function calls as edges in the control flow graph
|
||||
fn create_control_flow_graph(
|
||||
directives: &Vec<Directive2<ptx_parser::Instruction<SpirvWord>, SpirvWord>>,
|
||||
) -> Result<ControlFlowGraph, TranslateError> {
|
||||
let mut cfg = ControlFlowGraph::new();
|
||||
for directive in directives.iter() {
|
||||
match directive {
|
||||
|
@ -770,65 +760,11 @@ pub(crate) fn run<'input>(
|
|||
_ => {}
|
||||
}
|
||||
}
|
||||
//println!(
|
||||
// "{:?}",
|
||||
// petgraph::dot::Dot::with_config(&cfg.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
||||
//);
|
||||
cfg.fixup_function_calls()?;
|
||||
//println!(
|
||||
// "{:?}",
|
||||
// petgraph::dot::Dot::with_config(&cfg.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
||||
//);
|
||||
let rounding_f32 = compute_single_mode(&cfg, |node| node.rounding_f32);
|
||||
let denormal_f32 = compute_single_mode(&cfg, |node| node.denormal_f32);
|
||||
let denormal_f16f64 = compute_single_mode(&cfg, |node| node.denormal_f16f64);
|
||||
let rounding_f16f64 = compute_single_mode(&cfg, |node| node.rounding_f16f64);
|
||||
let denormal_f32 = optimize::<DenormalMode, { DenormalMode::COUNT }>(denormal_f32);
|
||||
let denormal_f16f64 = optimize::<DenormalMode, { DenormalMode::COUNT }>(denormal_f16f64);
|
||||
let rounding_f32 = optimize::<RoundingMode, { RoundingMode::COUNT }>(rounding_f32);
|
||||
let rounding_f16f64: MandatoryModeInsertions<RoundingMode> =
|
||||
optimize::<RoundingMode, { RoundingMode::COUNT }>(rounding_f16f64);
|
||||
let cfg = ResolvedControlFlowGraph::new(
|
||||
cfg,
|
||||
&denormal_f32.kernels,
|
||||
&denormal_f16f64.kernels,
|
||||
&rounding_f32.kernels,
|
||||
&rounding_f16f64.kernels,
|
||||
)?;
|
||||
let temp = join_modes2(
|
||||
flat_resolver,
|
||||
&directives,
|
||||
cfg,
|
||||
denormal_f32,
|
||||
denormal_f16f64,
|
||||
rounding_f32,
|
||||
rounding_f16f64,
|
||||
)?;
|
||||
|
||||
/*
|
||||
let denormal = join_modes(
|
||||
flat_resolver,
|
||||
&cfg,
|
||||
denormal_f32,
|
||||
|node| node.denormal_f32,
|
||||
denormal_f16f64,
|
||||
|node| node.denormal_f16f64,
|
||||
)?;
|
||||
let rounding = join_modes(
|
||||
flat_resolver,
|
||||
&cfg,
|
||||
rounding_f32,
|
||||
|node| node.rounding_f32,
|
||||
rounding_f16f64,
|
||||
|node| node.rounding_f16f64,
|
||||
)?;
|
||||
let all_modes = FullModeInsertion::new(flat_resolver, denormal, rounding)?;
|
||||
*/
|
||||
let directives = insert_mode_control(flat_resolver, directives, temp)?;
|
||||
Ok(directives)
|
||||
Ok(cfg)
|
||||
}
|
||||
|
||||
fn join_modes2(
|
||||
fn join_modes(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
directives: &Vec<super::Directive2<ast::Instruction<SpirvWord>, super::SpirvWord>>,
|
||||
cfg: ResolvedControlFlowGraph,
|
||||
|
@ -836,7 +772,7 @@ fn join_modes2(
|
|||
mandatory_denormal_f16f64: MandatoryModeInsertions<DenormalMode>,
|
||||
mandatory_rounding_f32: MandatoryModeInsertions<RoundingMode>,
|
||||
mandatory_rounding_f16f64: MandatoryModeInsertions<RoundingMode>,
|
||||
) -> Result<FullModeInsertion2, TranslateError> {
|
||||
) -> Result<FullModeInsertion, TranslateError> {
|
||||
let basic_blocks = cfg
|
||||
.graph
|
||||
.node_weights()
|
||||
|
@ -892,7 +828,7 @@ fn join_modes2(
|
|||
))
|
||||
})
|
||||
.collect::<Result<FxHashMap<_, _>, _>>()?;
|
||||
let temp = directives
|
||||
let functions_exit_modes = directives
|
||||
.iter()
|
||||
.filter_map(|directive| match directive {
|
||||
Directive2::Method(Function2 {
|
||||
|
@ -933,128 +869,15 @@ fn join_modes2(
|
|||
_ => None,
|
||||
})
|
||||
.collect::<Result<FxHashMap<_, _>, _>>()?;
|
||||
let functions_exit_modes = cfg
|
||||
.functions_rets
|
||||
.into_iter()
|
||||
.map(|(bb, node)| {
|
||||
let weights = cfg.graph.node_weight(node).ok_or_else(error_unreachable)?;
|
||||
let modes = ResolvedInstructionModes {
|
||||
denormal_f32: weights.denormal_f32.exit.map(DenormalMode::to_ftz),
|
||||
denormal_f16f64: weights.denormal_f16f64.exit.map(DenormalMode::to_ftz),
|
||||
rounding_f32: weights.rounding_f32.exit.map(RoundingMode::to_ast),
|
||||
rounding_f16f64: weights.rounding_f16f64.exit.map(RoundingMode::to_ast),
|
||||
};
|
||||
Ok((bb, modes))
|
||||
})
|
||||
.collect::<Result<FxHashMap<_, _>, _>>()?;
|
||||
Ok(FullModeInsertion2 {
|
||||
Ok(FullModeInsertion {
|
||||
basic_blocks,
|
||||
functions_exit_modes: temp,
|
||||
functions_exit_modes,
|
||||
})
|
||||
}
|
||||
|
||||
// For every basic block this pass computes:
|
||||
// - Name of mode prologue basic block. Mode prologue is a basic block which
|
||||
// contains single instruction that sets mode to the desired value. It will
|
||||
// be later inserted just before the basic block and all jumps that require
|
||||
// mode change will go through this basic block
|
||||
// - Entry mode: what is the mode for both f32 and f16f64 at the first instruction.
|
||||
// This will be used when emiting instructions in the basic block. When we
|
||||
// emit an instruction we get its modes, check if they are different and if so
|
||||
// decide: do we emit new mode set statement or we fold into previous mode set.
|
||||
// We don't need to compute exit mode because this will be computed naturally
|
||||
// when emitting instructions in a basic block. We need exit mode to know if we
|
||||
// jump directly to the next bb or jump to mode prologue
|
||||
/*
|
||||
fn join_modes<'input, T: Eq + PartialEq + Copy + Default>(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2<'input>,
|
||||
cfg: &ResolvedControlFlowGraph,
|
||||
f32_insertions: MandatoryModeInsertions<T>,
|
||||
mut f32_view: impl FnMut(&ResolvedNode) -> ResolvedMode<T>,
|
||||
f16f64_insertions: MandatoryModeInsertions<T>,
|
||||
mut f16f64_view: impl FnMut(&ResolvedNode) -> ResolvedMode<T>,
|
||||
) -> Result<TwinModeInsertions<T>, TranslateError> {
|
||||
let basic_blocks = cfg
|
||||
.graph
|
||||
.node_weights()
|
||||
.map(|basic_block| {
|
||||
let requires_prologue = f32_insertions.basic_blocks.contains(&basic_block.label)
|
||||
|| f16f64_insertions.basic_blocks.contains(&basic_block.label);
|
||||
let prologue: Option<SpirvWord> = if requires_prologue {
|
||||
Some(flat_resolver.register_unnamed(None))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
let f32 = f32_view(basic_block);
|
||||
let f16f64 = f16f64_view(basic_block);
|
||||
let twin_mode = match (f32.entry, f16f64.entry) {
|
||||
(Resolved::Conflict, Resolved::Conflict) => Resolved::Conflict,
|
||||
(f32, f16f64) => Resolved::Value(TwinMode {
|
||||
f32: f32.unwrap_of_default(),
|
||||
f16f64: f16f64.unwrap_of_default(),
|
||||
}),
|
||||
};
|
||||
Ok((
|
||||
basic_block.label,
|
||||
BasicBlockEntryState {
|
||||
prologue,
|
||||
twin_mode,
|
||||
},
|
||||
))
|
||||
})
|
||||
.collect::<Result<FxHashMap<_, _>, _>>()?;
|
||||
Ok(TwinModeInsertions { basic_blocks })
|
||||
}
|
||||
*/
|
||||
|
||||
struct TwinModeInsertions<T> {
|
||||
basic_blocks: FxHashMap<SpirvWord, BasicBlockEntryState<T>>,
|
||||
}
|
||||
|
||||
struct FullModeInsertion2 {
|
||||
basic_blocks: FxHashMap<SpirvWord, FullBasicBlockEntryState>,
|
||||
functions_exit_modes: FxHashMap<SpirvWord, ResolvedInstructionModes>,
|
||||
}
|
||||
|
||||
struct FullModeInsertion {
|
||||
basic_blocks: FxHashMap<SpirvWord, FullBasicBlockEntryState>,
|
||||
}
|
||||
|
||||
impl FullModeInsertion {
|
||||
fn new(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
denormal: TwinModeInsertions<DenormalMode>,
|
||||
rounding: TwinModeInsertions<RoundingMode>,
|
||||
) -> Result<Self, TranslateError> {
|
||||
if denormal.basic_blocks.len() != rounding.basic_blocks.len() {
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
let basic_blocks = denormal
|
||||
.basic_blocks
|
||||
.into_iter()
|
||||
.map(|(bb, denormal)| {
|
||||
let rounding = rounding
|
||||
.basic_blocks
|
||||
.get(&bb)
|
||||
.copied()
|
||||
.ok_or_else(error_unreachable)?;
|
||||
let dual_prologue = if denormal.prologue.is_some() && rounding.prologue.is_some() {
|
||||
Some(flat_resolver.register_unnamed(None))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
Ok((
|
||||
bb,
|
||||
FullBasicBlockEntryState {
|
||||
dual_prologue,
|
||||
denormal,
|
||||
rounding,
|
||||
},
|
||||
))
|
||||
})
|
||||
.collect::<Result<FxHashMap<_, _>, _>>()?;
|
||||
Ok(Self { basic_blocks })
|
||||
}
|
||||
functions_exit_modes: FxHashMap<SpirvWord, ResolvedInstructionModes>,
|
||||
}
|
||||
|
||||
struct FullBasicBlockEntryState {
|
||||
|
@ -1075,20 +898,21 @@ struct TwinMode<T> {
|
|||
f16f64: T,
|
||||
}
|
||||
|
||||
fn insert_mode_control(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
// This function goes through every method, every basic block, every instruction
|
||||
// and based on computed information inserts:
|
||||
// * Instructions that change global mode
|
||||
// * Insert additional "prelude" basic blocks that sets mode
|
||||
// * Redirect some jumps to "prelude" basic blocks
|
||||
fn apply_global_mode_controls(
|
||||
directives: Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
global_modes: FullModeInsertion2,
|
||||
global_modes: FullModeInsertion,
|
||||
) -> Result<Vec<Directive2<ast::Instruction<SpirvWord>, SpirvWord>>, TranslateError> {
|
||||
let directives_len = directives.len();
|
||||
directives
|
||||
.into_iter()
|
||||
.map(|directive| {
|
||||
let mut new_directives = SmallVec::<[_; 4]>::new();
|
||||
let (mut method, initial_mode) = match directive {
|
||||
Directive2::Variable(..) | Directive2::Method(Function2 { body: None, .. }) => {
|
||||
new_directives.push(directive);
|
||||
return Ok(new_directives);
|
||||
return Ok(directive);
|
||||
}
|
||||
Directive2::Method(
|
||||
mut method @ Function2 {
|
||||
|
@ -1114,7 +938,7 @@ fn insert_mode_control(
|
|||
(method, initial_mode)
|
||||
}
|
||||
};
|
||||
emit_mode_prelude(flat_resolver, &method, &global_modes, &mut new_directives)?;
|
||||
check_function_prelude(&method, &global_modes)?;
|
||||
let old_body = method.body.take().unwrap();
|
||||
let mut result = Vec::with_capacity(old_body.len());
|
||||
let mut bb_state = BasicBlockControlState::new(&global_modes, initial_mode);
|
||||
|
@ -1175,233 +999,34 @@ fn insert_mode_control(
|
|||
}
|
||||
}
|
||||
method.body = Some(result);
|
||||
new_directives.push(Directive2::Method(method));
|
||||
Ok(new_directives)
|
||||
})
|
||||
.try_fold(Vec::with_capacity(directives_len), |mut acc, d| {
|
||||
acc.extend(d?);
|
||||
Ok(acc)
|
||||
Ok(Directive2::Method(method))
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
}
|
||||
|
||||
fn emit_mode_prelude(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
fn check_function_prelude(
|
||||
method: &Function2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
global_modes: &FullModeInsertion2,
|
||||
new_directives: &mut SmallVec<[Directive2<ptx_parser::Instruction<SpirvWord>, SpirvWord>; 4]>,
|
||||
global_modes: &FullModeInsertion,
|
||||
) -> Result<(), TranslateError> {
|
||||
let fn_mode_state = global_modes
|
||||
.basic_blocks
|
||||
.get(&method.name)
|
||||
.ok_or_else(error_unreachable)?;
|
||||
if let Some(dual_prologue) = fn_mode_state.dual_prologue {
|
||||
new_directives.push(create_fn_wrapper(
|
||||
flat_resolver,
|
||||
method,
|
||||
dual_prologue,
|
||||
[
|
||||
ModeRegister::Denormal {
|
||||
f32: fn_mode_state
|
||||
.denormal
|
||||
.twin_mode
|
||||
.f32
|
||||
.unwrap_or_default()
|
||||
.to_ftz(),
|
||||
f16f64: fn_mode_state
|
||||
.denormal
|
||||
.twin_mode
|
||||
.f16f64
|
||||
.unwrap_or_default()
|
||||
.to_ftz(),
|
||||
},
|
||||
ModeRegister::Rounding {
|
||||
f32: fn_mode_state
|
||||
.rounding
|
||||
.twin_mode
|
||||
.f32
|
||||
.unwrap_or_default()
|
||||
.to_ast(),
|
||||
f16f64: fn_mode_state
|
||||
.rounding
|
||||
.twin_mode
|
||||
.f16f64
|
||||
.unwrap_or_default()
|
||||
.to_ast(),
|
||||
},
|
||||
]
|
||||
.into_iter(),
|
||||
));
|
||||
}
|
||||
if let Some(prologue) = fn_mode_state.denormal.prologue {
|
||||
new_directives.push(create_fn_wrapper(
|
||||
flat_resolver,
|
||||
method,
|
||||
prologue,
|
||||
[ModeRegister::Denormal {
|
||||
f32: fn_mode_state
|
||||
.denormal
|
||||
.twin_mode
|
||||
.f32
|
||||
.unwrap_or_default()
|
||||
.to_ftz(),
|
||||
f16f64: fn_mode_state
|
||||
.denormal
|
||||
.twin_mode
|
||||
.f16f64
|
||||
.unwrap_or_default()
|
||||
.to_ftz(),
|
||||
}]
|
||||
.into_iter(),
|
||||
));
|
||||
}
|
||||
if let Some(prologue) = fn_mode_state.rounding.prologue {
|
||||
new_directives.push(create_fn_wrapper(
|
||||
flat_resolver,
|
||||
method,
|
||||
prologue,
|
||||
[ModeRegister::Rounding {
|
||||
f32: fn_mode_state
|
||||
.rounding
|
||||
.twin_mode
|
||||
.f32
|
||||
.unwrap_or_default()
|
||||
.to_ast(),
|
||||
f16f64: fn_mode_state
|
||||
.rounding
|
||||
.twin_mode
|
||||
.f16f64
|
||||
.unwrap_or_default()
|
||||
.to_ast(),
|
||||
}]
|
||||
.into_iter(),
|
||||
));
|
||||
// A function should never have a prelude. Preludes happen only if there
|
||||
// is an edge in the control flow graph that requires a mode change.
|
||||
// Since functions never have a mode setting instructions that means they
|
||||
// only pass the mode from incoming edges to outgoing edges
|
||||
if fn_mode_state.dual_prologue.is_some()
|
||||
|| fn_mode_state.denormal.prologue.is_some()
|
||||
|| fn_mode_state.rounding.prologue.is_some()
|
||||
{
|
||||
return Err(error_unreachable());
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn create_fn_wrapper(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
method: &Function2<ast::Instruction<SpirvWord>, SpirvWord>,
|
||||
name: SpirvWord,
|
||||
modes: impl ExactSizeIterator<Item = ModeRegister>,
|
||||
) -> Directive2<ast::Instruction<SpirvWord>, SpirvWord> {
|
||||
// * Label
|
||||
// * return argument registers
|
||||
// * input argument registers
|
||||
// * Load input arguments
|
||||
// * set modes
|
||||
// * call
|
||||
// * return with value
|
||||
let return_arguments = rename_variables(flat_resolver, &method.return_arguments);
|
||||
let input_arguments = rename_variables(flat_resolver, &method.input_arguments);
|
||||
let mut body = Vec::with_capacity(
|
||||
1 + (input_arguments.len() * 2) + return_arguments.len() + modes.len() + 2,
|
||||
);
|
||||
body.push(Statement::Label(flat_resolver.register_unnamed(None)));
|
||||
let return_variables = append_variables(flat_resolver, &mut body, &return_arguments);
|
||||
let input_variables = append_variables(flat_resolver, &mut body, &input_arguments);
|
||||
for (index, input_reg) in input_variables.iter().enumerate() {
|
||||
body.push(Statement::Instruction(ast::Instruction::Ld {
|
||||
data: ast::LdDetails {
|
||||
qualifier: ast::LdStQualifier::Weak,
|
||||
state_space: input_arguments[index].state_space,
|
||||
caching: ast::LdCacheOperator::Cached,
|
||||
typ: input_arguments[index].v_type.clone(),
|
||||
non_coherent: false,
|
||||
},
|
||||
arguments: ast::LdArgs {
|
||||
src: input_arguments[index].name,
|
||||
dst: *input_reg,
|
||||
},
|
||||
}));
|
||||
}
|
||||
body.extend(modes.map(|mode_set| Statement::SetMode(mode_set)));
|
||||
// Out of order because we want to use return_variables before they are moved
|
||||
let ret_statement = if return_arguments.is_empty() {
|
||||
Statement::Instruction(ast::Instruction::Ret {
|
||||
data: ast::RetData { uniform: false },
|
||||
})
|
||||
} else {
|
||||
Statement::RetValue(
|
||||
ast::RetData { uniform: false },
|
||||
return_variables
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, var)| (*var, method.return_arguments[index].v_type.clone()))
|
||||
.collect(),
|
||||
)
|
||||
};
|
||||
body.push(Statement::Instruction(ast::Instruction::Call {
|
||||
data: ast::CallDetails {
|
||||
uniform: false,
|
||||
return_arguments: return_arguments
|
||||
.iter()
|
||||
.map(|arg| (arg.v_type.clone(), arg.state_space))
|
||||
.collect(),
|
||||
input_arguments: input_arguments
|
||||
.iter()
|
||||
.map(|arg| (arg.v_type.clone(), arg.state_space))
|
||||
.collect(),
|
||||
},
|
||||
arguments: ast::CallArgs {
|
||||
return_arguments: return_variables,
|
||||
func: method.name,
|
||||
input_arguments: input_variables,
|
||||
},
|
||||
}));
|
||||
body.push(ret_statement);
|
||||
Directive2::Method(Function2 {
|
||||
return_arguments,
|
||||
name,
|
||||
input_arguments,
|
||||
body: Some(body),
|
||||
is_kernel: false,
|
||||
import_as: None,
|
||||
tuning: Vec::new(),
|
||||
linkage: ast::LinkingDirective::NONE,
|
||||
flush_to_zero_f32: false,
|
||||
flush_to_zero_f16f64: false,
|
||||
rounding_mode_f32: ptx_parser::RoundingMode::NearestEven,
|
||||
rounding_mode_f16f64: ptx_parser::RoundingMode::NearestEven,
|
||||
})
|
||||
}
|
||||
|
||||
fn rename_variables(
|
||||
flat_resolver: &mut super::GlobalStringIdentResolver2,
|
||||
variables: &Vec<ast::Variable<SpirvWord>>,
|
||||
) -> Vec<ast::Variable<SpirvWord>> {
|
||||
variables
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|arg| ast::Variable {
|
||||
name: flat_resolver.register_unnamed(Some((arg.v_type.clone(), arg.state_space))),
|
||||
..arg
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn append_variables<'a, 'input: 'a>(
|
||||
flat_resolver: &'a mut super::GlobalStringIdentResolver2<'input>,
|
||||
body: &mut Vec<Statement<ast::Instruction<SpirvWord>, SpirvWord>>,
|
||||
arguments: &'a Vec<ast::Variable<SpirvWord>>,
|
||||
) -> Vec<SpirvWord> {
|
||||
let mut result = Vec::with_capacity(arguments.len());
|
||||
for arg in arguments {
|
||||
let name = flat_resolver.register_unnamed(Some((arg.v_type.clone(), ast::StateSpace::Reg)));
|
||||
body.push(Statement::Variable(ast::Variable {
|
||||
align: None,
|
||||
v_type: arg.v_type.clone(),
|
||||
state_space: ast::StateSpace::Reg,
|
||||
name,
|
||||
array_init: Vec::new(),
|
||||
}));
|
||||
result.push(name);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
struct BasicBlockControlState<'a> {
|
||||
global_modes: &'a FullModeInsertion2,
|
||||
global_modes: &'a FullModeInsertion,
|
||||
denormal_f32: RegisterState<bool>,
|
||||
denormal_f16f64: RegisterState<bool>,
|
||||
rounding_f32: RegisterState<ast::RoundingMode>,
|
||||
|
@ -1429,7 +1054,7 @@ impl<T> RegisterState<T> {
|
|||
}
|
||||
|
||||
impl<'a> BasicBlockControlState<'a> {
|
||||
fn new(global_modes: &'a FullModeInsertion2, initial_mode: &FullBasicBlockEntryState) -> Self {
|
||||
fn new(global_modes: &'a FullModeInsertion, initial_mode: &FullBasicBlockEntryState) -> Self {
|
||||
let denormal_f32 = RegisterState::new(initial_mode.denormal.twin_mode.f32);
|
||||
let denormal_f16f64 = RegisterState::new(initial_mode.denormal.twin_mode.f16f64);
|
||||
let rounding_f32 = RegisterState::new(initial_mode.rounding.twin_mode.f32);
|
||||
|
@ -1600,7 +1225,7 @@ impl<'a> BasicBlockControlState<'a> {
|
|||
}
|
||||
|
||||
fn redirect_jump_impl(
|
||||
global_modes: &FullModeInsertion2,
|
||||
global_modes: &FullModeInsertion,
|
||||
current_mode: &ResolvedInstructionModes,
|
||||
jump_target: &mut SpirvWord,
|
||||
) -> Result<(), TranslateError> {
|
||||
|
@ -1918,7 +1543,7 @@ impl<'a> Drop for BasicBlockState<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn compute_single_mode<T: Copy + Eq>(
|
||||
fn compute_single_mode_insertions<T: Copy + Eq>(
|
||||
graph: &ControlFlowGraph,
|
||||
mut getter: impl FnMut(&Node) -> Mode<T>,
|
||||
) -> PartialModeInsertion<T> {
|
||||
|
@ -1988,7 +1613,7 @@ struct PartialModeInsertion<T> {
|
|||
}
|
||||
|
||||
// Only returns kernel mode insertions if a kernel is relevant to the optimization problem
|
||||
fn optimize<
|
||||
fn optimize_mode_insertions<
|
||||
T: Copy + Into<usize> + strum::VariantArray + std::fmt::Debug + Default,
|
||||
const N: usize,
|
||||
>(
|
|
@ -43,7 +43,7 @@ fn transitive_mixed() {
|
|||
graph.add_jump(empty, false2_id);
|
||||
let false2_ = graph.get_or_add_basic_block(false2_id);
|
||||
graph.set_modes(false2_, ftz(), ftz());
|
||||
let partial_result = super::compute_single_mode(&graph, |node| node.denormal_f32);
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
assert_eq!(
|
||||
|
@ -51,7 +51,7 @@ fn transitive_mixed() {
|
|||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
||||
|
@ -73,7 +73,7 @@ fn transitive_change_twice() {
|
|||
graph.add_jump(empty, true_id);
|
||||
let true_ = graph.get_or_add_basic_block(true_id);
|
||||
graph.set_modes(true_, preserve(), preserve());
|
||||
let partial_result = super::compute_single_mode(&graph, |node| node.denormal_f32);
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 1);
|
||||
assert!(partial_result.bb_must_insert_mode.contains(&true_id));
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
|
@ -82,7 +82,7 @@ fn transitive_change_twice() {
|
|||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks, iter::once(true_id).collect());
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
||||
|
@ -100,7 +100,7 @@ fn transitive_change() {
|
|||
graph.add_jump(empty, true_id);
|
||||
let true_ = graph.get_or_add_basic_block(true_id);
|
||||
graph.set_modes(true_, preserve(), preserve());
|
||||
let partial_result = super::compute_single_mode(&graph, |node| node.denormal_f32);
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 1);
|
||||
assert_eq!(
|
||||
|
@ -108,7 +108,7 @@ fn transitive_change() {
|
|||
(DenormalMode::Preserve, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::Preserve);
|
||||
|
@ -143,7 +143,7 @@ fn codependency() {
|
|||
// "{:?}",
|
||||
// petgraph::dot::Dot::with_config(&graph.graph, &[petgraph::dot::Config::EdgeNoLabel])
|
||||
//);
|
||||
let partial_result = super::compute_single_mode(&graph, |node| node.denormal_f32);
|
||||
let partial_result = super::compute_single_mode_insertions(&graph, |node| node.denormal_f32);
|
||||
assert_eq!(partial_result.bb_must_insert_mode.len(), 0);
|
||||
assert_eq!(partial_result.bb_maybe_insert_mode.len(), 2);
|
||||
assert_eq!(
|
||||
|
@ -155,7 +155,7 @@ fn codependency() {
|
|||
(DenormalMode::FlushToZero, iter::once(entry_id).collect())
|
||||
);
|
||||
|
||||
let result = optimize::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
let result = optimize_mode_insertions::<DenormalMode, { DenormalMode::COUNT }>(partial_result);
|
||||
assert_eq!(result.basic_blocks.len(), 0);
|
||||
assert_eq!(result.kernels.len(), 1);
|
||||
assert_eq!(result.kernels[&entry_id], DenormalMode::FlushToZero);
|
|
@ -17,7 +17,7 @@ mod expand_operands;
|
|||
mod fix_special_registers2;
|
||||
mod hoist_globals;
|
||||
mod insert_explicit_load_store;
|
||||
mod insert_ftz_control;
|
||||
mod instruction_mode_to_global_mode;
|
||||
mod insert_implicit_conversions2;
|
||||
mod normalize_basic_blocks;
|
||||
mod normalize_identifiers2;
|
||||
|
@ -54,7 +54,7 @@ pub fn to_llvm_module<'input>(ast: ast::Module<'input>) -> Result<Module, Transl
|
|||
let directives = deparamize_functions::run(&mut flat_resolver, directives)?;
|
||||
let directives = normalize_basic_blocks::run(&mut flat_resolver, directives)?;
|
||||
let directives = remove_unreachable_basic_blocks::run(directives)?;
|
||||
let directives = insert_ftz_control::run(&mut flat_resolver, directives)?;
|
||||
let directives = instruction_mode_to_global_mode::run(&mut flat_resolver, directives)?;
|
||||
let directives = insert_explicit_load_store::run(&mut flat_resolver, directives)?;
|
||||
let directives = insert_implicit_conversions2::run(&mut flat_resolver, directives)?;
|
||||
let directives = replace_instructions_with_function_calls::run(&mut flat_resolver, directives)?;
|
||||
|
|
|
@ -1,34 +1,22 @@
|
|||
declare i32 @__zluda_ptx_impl_activemask() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @activemask(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #1 {
|
||||
%"31" = alloca i64, align 8, addrspace(5)
|
||||
%"32" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"28"
|
||||
|
||||
"28": ; preds = %1
|
||||
%"39" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"37", align 4
|
||||
%"40" = call i32 @__zluda_ptx_impl_activemask()
|
||||
store i32 %"40", ptr addrspace(5) %"38", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"43" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"43", align 4
|
||||
%"33" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"33", ptr addrspace(5) %"31", align 4
|
||||
%"34" = call i32 @__zluda_ptx_impl_activemask()
|
||||
store i32 %"34", ptr addrspace(5) %"32", align 4
|
||||
%"35" = load i64, ptr addrspace(5) %"31", align 4
|
||||
%"36" = load i32, ptr addrspace(5) %"32", align 4
|
||||
%"37" = inttoptr i64 %"35" to ptr
|
||||
store i32 %"36", ptr %"37", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @add(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"48" = add i64 %"49", 1
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,65 +1,52 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @add_ftz(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca float, align 4, addrspace(5)
|
||||
%"48" = alloca float, align 4, addrspace(5)
|
||||
%"49" = alloca float, align 4, addrspace(5)
|
||||
%"50" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @add_ftz(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
%"42" = alloca float, align 4, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"51" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(4) %"44", align 4
|
||||
store i64 %"52", ptr addrspace(5) %"46", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"67" = inttoptr i64 %"54" to ptr
|
||||
%"53" = load float, ptr %"67", align 4
|
||||
store float %"53", ptr addrspace(5) %"47", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"68" = inttoptr i64 %"55" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||
%"56" = load float, ptr %"33", align 4
|
||||
store float %"56", ptr addrspace(5) %"48", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"47", align 4
|
||||
%"59" = load float, ptr addrspace(5) %"48", align 4
|
||||
%"57" = fadd float %"58", %"59"
|
||||
store float %"57", ptr addrspace(5) %"49", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"61" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load float, ptr %"61", align 4
|
||||
store float %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"50" = load float, ptr %"33", align 4
|
||||
store float %"50", ptr addrspace(5) %"42", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"42", align 4
|
||||
%"51" = fadd float %"52", %"53"
|
||||
store float %"51", ptr addrspace(5) %"43", align 4
|
||||
call void @llvm.amdgcn.s.setreg(i32 6401, i32 3)
|
||||
%"61" = load float, ptr addrspace(5) %"47", align 4
|
||||
%"62" = load float, ptr addrspace(5) %"48", align 4
|
||||
%"60" = fadd float %"61", %"62"
|
||||
store float %"60", ptr addrspace(5) %"50", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"64" = load float, ptr addrspace(5) %"49", align 4
|
||||
%"69" = inttoptr i64 %"63" to ptr
|
||||
store float %"64", ptr %"69", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"70" = inttoptr i64 %"65" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"70", i64 4
|
||||
%"66" = load float, ptr addrspace(5) %"50", align 4
|
||||
store float %"66", ptr %"35", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"56" = load float, ptr addrspace(5) %"42", align 4
|
||||
%"54" = fadd float %"55", %"56"
|
||||
store float %"54", ptr addrspace(5) %"44", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store float %"58", ptr %"63", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"64" = inttoptr i64 %"59" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"60" = load float, ptr addrspace(5) %"44", align 4
|
||||
store float %"60", ptr %"35", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #2
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind willreturn }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind willreturn }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @add_non_coherent(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||
%"46" = load i64, ptr addrspace(1) %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"48" = add i64 %"49", 1
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
store i64 %"51", ptr addrspace(1) %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr addrspace(1)
|
||||
%"40" = load i64, ptr addrspace(1) %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||
store i64 %"45", ptr addrspace(1) %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @add_tuning(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"48" = add i64 %"49", 1
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = add i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @and(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"56", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load i32, ptr %"31", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"58" = and i32 %"52", %"53"
|
||||
store i32 %"58", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"61" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"61", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"52" = and i32 %"46", %"47"
|
||||
store i32 %"52", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"55", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,59 +1,46 @@
|
|||
@shared_mem = external addrspace(3) global [1024 x i8], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
%"47" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @atom_add(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"62" = inttoptr i64 %"51" to ptr
|
||||
%"50" = load i32, ptr %"62", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"46", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"52" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"53" = load i32, ptr %"32", align 4
|
||||
store i32 %"53", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"46", align 4
|
||||
store i32 %"54", ptr addrspace(3) @shared_mem, align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"47", align 4
|
||||
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"56" syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"46", align 4
|
||||
%"57" = load i32, ptr addrspace(3) @shared_mem, align 4
|
||||
store i32 %"57", ptr addrspace(5) %"47", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"67" = inttoptr i64 %"58" to ptr
|
||||
store i32 %"59", ptr %"67", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"68" = inttoptr i64 %"60" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||
%"61" = load i32, ptr addrspace(5) %"47", align 4
|
||||
store i32 %"61", ptr %"34", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i32, ptr %"56", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"47" = load i32, ptr %"32", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
store i32 %"48", ptr addrspace(3) @shared_mem, align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%2 = atomicrmw add ptr addrspace(3) @shared_mem, i32 %"50" syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i32, ptr addrspace(3) @shared_mem, align 4
|
||||
store i32 %"51", ptr addrspace(5) %"41", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
store i32 %"53", ptr %"61", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"54" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"55" = load i32, ptr addrspace(5) %"41", align 4
|
||||
store i32 %"55", ptr %"34", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,59 +1,46 @@
|
|||
@shared_mem = external addrspace(3) global [1024 x i8], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca float, align 4, addrspace(5)
|
||||
%"47" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @atom_add_float(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"62" = inttoptr i64 %"51" to ptr
|
||||
%"50" = load float, ptr %"62", align 4
|
||||
store float %"50", ptr addrspace(5) %"46", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"52" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"53" = load float, ptr %"32", align 4
|
||||
store float %"53", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"46", align 4
|
||||
store float %"54", ptr addrspace(3) @shared_mem, align 4
|
||||
%"56" = load float, ptr addrspace(5) %"47", align 4
|
||||
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"56" syncscope("agent-one-as") monotonic, align 4
|
||||
store float %2, ptr addrspace(5) %"46", align 4
|
||||
%"57" = load float, ptr addrspace(3) @shared_mem, align 4
|
||||
store float %"57", ptr addrspace(5) %"47", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"59" = load float, ptr addrspace(5) %"46", align 4
|
||||
%"67" = inttoptr i64 %"58" to ptr
|
||||
store float %"59", ptr %"67", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"68" = inttoptr i64 %"60" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||
%"61" = load float, ptr addrspace(5) %"47", align 4
|
||||
store float %"61", ptr %"34", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load float, ptr %"56", align 4
|
||||
store float %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"47" = load float, ptr %"32", align 4
|
||||
store float %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
store float %"48", ptr addrspace(3) @shared_mem, align 4
|
||||
%"50" = load float, ptr addrspace(5) %"41", align 4
|
||||
%2 = atomicrmw fadd ptr addrspace(3) @shared_mem, float %"50" syncscope("agent-one-as") monotonic, align 4
|
||||
store float %2, ptr addrspace(5) %"40", align 4
|
||||
%"51" = load float, ptr addrspace(3) @shared_mem, align 4
|
||||
store float %"51", ptr addrspace(5) %"41", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
store float %"53", ptr %"61", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"62" = inttoptr i64 %"54" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"55" = load float, ptr addrspace(5) %"41", align 4
|
||||
store float %"55", ptr %"34", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,57 +1,44 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i32, align 4, addrspace(5)
|
||||
%"49" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @atom_cas(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %1
|
||||
%"50" = load i64, ptr addrspace(4) %"44", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"46", align 4
|
||||
%"51" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"47", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"63" = inttoptr i64 %"53" to ptr
|
||||
%"52" = load i32, ptr %"63", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"48", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"64" = inttoptr i64 %"54" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%2 = cmpxchg ptr %"31", i32 %"56", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||
%"65" = extractvalue { i32, i1 } %2, 0
|
||||
store i32 %"65", ptr addrspace(5) %"48", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"67" = inttoptr i64 %"57" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"67", i64 4
|
||||
%"58" = load i32, ptr %"34", align 4
|
||||
store i32 %"58", ptr addrspace(5) %"49", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%"68" = inttoptr i64 %"59" to ptr
|
||||
store i32 %"60", ptr %"68", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"69" = inttoptr i64 %"61" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"69", i64 4
|
||||
%"62" = load i32, ptr addrspace(5) %"49", align 4
|
||||
store i32 %"62", ptr %"36", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"57" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i32, ptr %"57", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"58" = inttoptr i64 %"48" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%2 = cmpxchg ptr %"31", i32 %"50", i32 100 syncscope("agent-one-as") monotonic monotonic, align 4
|
||||
%"59" = extractvalue { i32, i1 } %2, 0
|
||||
store i32 %"59", ptr addrspace(5) %"42", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"51" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"61", i64 4
|
||||
%"52" = load i32, ptr %"34", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"62" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"62", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"63" = inttoptr i64 %"55" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
store i32 %"56", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,59 +1,46 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i32, align 4, addrspace(5)
|
||||
%"49" = alloca i32, align 4, addrspace(5)
|
||||
%"50" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @atom_inc(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #0 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %1
|
||||
%"51" = load i64, ptr addrspace(4) %"44", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"46", align 4
|
||||
%"52" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"52", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"65" = inttoptr i64 %"54" to ptr
|
||||
%2 = atomicrmw uinc_wrap ptr %"65", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"48", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"66" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"66", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %3, ptr addrspace(5) %"49", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"67" = inttoptr i64 %"58" to ptr
|
||||
%"57" = load i32, ptr %"67", align 4
|
||||
store i32 %"57", ptr addrspace(5) %"50", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%"68" = inttoptr i64 %"59" to ptr
|
||||
store i32 %"60", ptr %"68", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"69" = inttoptr i64 %"61" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"69", i64 4
|
||||
%"62" = load i32, ptr addrspace(5) %"49", align 4
|
||||
store i32 %"62", ptr %"34", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"70" = inttoptr i64 %"63" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"70", i64 8
|
||||
%"64" = load i32, ptr addrspace(5) %"50", align 4
|
||||
store i32 %"64", ptr %"36", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"59" = inttoptr i64 %"48" to ptr
|
||||
%2 = atomicrmw uinc_wrap ptr %"59", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %2, ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"60" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
%3 = atomicrmw uinc_wrap ptr addrspace(1) %"60", i32 101 syncscope("agent-one-as") monotonic, align 4
|
||||
store i32 %3, ptr addrspace(5) %"43", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"61" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i32, ptr %"61", align 4
|
||||
store i32 %"51", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"62" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"62", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"63" = inttoptr i64 %"55" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"43", align 4
|
||||
store i32 %"56", ptr %"34", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"57" to ptr
|
||||
%"36" = getelementptr inbounds i8, ptr %"64", i64 8
|
||||
%"58" = load i32, ptr addrspace(5) %"44", align 4
|
||||
store i32 %"58", ptr %"36", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca double, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @b64tof64(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca double, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load double, ptr addrspace(4) %"37", align 8
|
||||
store double %"43", ptr addrspace(5) %"39", align 8
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load double, ptr addrspace(5) %"39", align 8
|
||||
%"52" = bitcast double %"46" to i64
|
||||
store i64 %"52", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"53" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i64, ptr %"53", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"54" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"54", align 4
|
||||
%"37" = load double, ptr addrspace(4) %"31", align 8
|
||||
store double %"37", ptr addrspace(5) %"33", align 8
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load double, ptr addrspace(5) %"33", align 8
|
||||
%"46" = bitcast double %"40" to i64
|
||||
store i64 %"46", ptr addrspace(5) %"34", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"47" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i64, ptr %"47", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"48" = inttoptr i64 %"43" to ptr
|
||||
store i64 %"44", ptr %"48", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,56 +1,44 @@
|
|||
declare i32 @__zluda_ptx_impl_bfe_u32(i32, i32, i32) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
%"47" = alloca i32, align 4, addrspace(5)
|
||||
%"48" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @bfe(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i32, ptr %"63", align 4
|
||||
store i32 %"51", ptr addrspace(5) %"46", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"64" = inttoptr i64 %"53" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"54" = load i32, ptr %"32", align 4
|
||||
store i32 %"54", ptr addrspace(5) %"47", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"65" = inttoptr i64 %"55" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"65", i64 8
|
||||
%"56" = load i32, ptr %"34", align 4
|
||||
store i32 %"56", ptr addrspace(5) %"48", align 4
|
||||
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"47", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%"57" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"58", i32 %"59", i32 %"60")
|
||||
store i32 %"57", ptr addrspace(5) %"46", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"62" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"66" = inttoptr i64 %"61" to ptr
|
||||
store i32 %"62", ptr %"66", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"45" = load i32, ptr %"57", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"58" = inttoptr i64 %"47" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"48" = load i32, ptr %"32", align 4
|
||||
store i32 %"48", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"59" = inttoptr i64 %"49" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"59", i64 8
|
||||
%"50" = load i32, ptr %"34", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"42", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"51" = call i32 @__zluda_ptx_impl_bfe_u32(i32 %"52", i32 %"53", i32 %"54")
|
||||
store i32 %"51", ptr addrspace(5) %"40", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"60" = inttoptr i64 %"55" to ptr
|
||||
store i32 %"56", ptr %"60", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,63 +1,51 @@
|
|||
declare i32 @__zluda_ptx_impl_bfi_b32(i32, i32, i32, i32) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca i32, align 4, addrspace(5)
|
||||
%"50" = alloca i32, align 4, addrspace(5)
|
||||
%"51" = alloca i32, align 4, addrspace(5)
|
||||
%"52" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @bfi(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"38"
|
||||
|
||||
"38": ; preds = %1
|
||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"70" = inttoptr i64 %"56" to ptr
|
||||
%"55" = load i32, ptr %"70", align 4
|
||||
store i32 %"55", ptr addrspace(5) %"49", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"71" = inttoptr i64 %"57" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"71", i64 4
|
||||
%"58" = load i32, ptr %"33", align 4
|
||||
store i32 %"58", ptr addrspace(5) %"50", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"72" = inttoptr i64 %"59" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"72", i64 8
|
||||
%"60" = load i32, ptr %"35", align 4
|
||||
store i32 %"60", ptr addrspace(5) %"51", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"73" = inttoptr i64 %"61" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"73", i64 12
|
||||
%"62" = load i32, ptr %"37", align 4
|
||||
store i32 %"62", ptr addrspace(5) %"52", align 4
|
||||
%"64" = load i32, ptr addrspace(5) %"49", align 4
|
||||
%"65" = load i32, ptr addrspace(5) %"50", align 4
|
||||
%"66" = load i32, ptr addrspace(5) %"51", align 4
|
||||
%"67" = load i32, ptr addrspace(5) %"52", align 4
|
||||
%"74" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"64", i32 %"65", i32 %"66", i32 %"67")
|
||||
store i32 %"74", ptr addrspace(5) %"49", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"69" = load i32, ptr addrspace(5) %"49", align 4
|
||||
%"77" = inttoptr i64 %"68" to ptr
|
||||
store i32 %"69", ptr %"77", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load i32, ptr %"64", align 4
|
||||
store i32 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"65" = inttoptr i64 %"51" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"65", i64 4
|
||||
%"52" = load i32, ptr %"33", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"66" = inttoptr i64 %"53" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"66", i64 8
|
||||
%"54" = load i32, ptr %"35", align 4
|
||||
store i32 %"54", ptr addrspace(5) %"45", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"67" = inttoptr i64 %"55" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"67", i64 12
|
||||
%"56" = load i32, ptr %"37", align 4
|
||||
store i32 %"56", ptr addrspace(5) %"46", align 4
|
||||
%"58" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"61" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"68" = call i32 @__zluda_ptx_impl_bfi_b32(i32 %"58", i32 %"59", i32 %"60", i32 %"61")
|
||||
store i32 %"68", ptr addrspace(5) %"43", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"63" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"71" = inttoptr i64 %"62" to ptr
|
||||
store i32 %"63", ptr %"71", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,47 +1,34 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @block(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"48" = load i64, ptr %"57", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"50" = add i64 %"51", 1
|
||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"53" = add i64 %"54", 1
|
||||
store i64 %"53", ptr addrspace(5) %"52", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"58" = inttoptr i64 %"55" to ptr
|
||||
store i64 %"56", ptr %"58", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"42" = load i64, ptr %"51", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"44" = add i64 %"45", 1
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"47" = add i64 %"48", 1
|
||||
store i64 %"47", ptr addrspace(5) %"46", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"52" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @bra(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"56" = inttoptr i64 %"51" to ptr
|
||||
%"50" = load i64, ptr %"56", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"46", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i64, ptr %"50", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
br label %"10"
|
||||
|
||||
"10": ; preds = %"35"
|
||||
%"53" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"52" = add i64 %"53", 1
|
||||
store i64 %"52", ptr addrspace(5) %"47", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"46" = add i64 %"47", 1
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
br label %"12"
|
||||
|
||||
"12": ; preds = %"10"
|
||||
%"54" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"57" = inttoptr i64 %"54" to ptr
|
||||
store i64 %"55", ptr %"57", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr
|
||||
store i64 %"49", ptr %"51", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @brev(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"49", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call i32 @llvm.bitreverse.i32(i32 %"46")
|
||||
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i32, ptr %"43", align 4
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call i32 @llvm.bitreverse.i32(i32 %"40")
|
||||
store i32 %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.bitreverse.i32(i32) #2
|
||||
declare i32 @llvm.bitreverse.i32(i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,74 +1,62 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define i64 @incr(i64 %"43") #0 {
|
||||
%"69" = alloca i64, align 8, addrspace(5)
|
||||
%"70" = alloca i64, align 8, addrspace(5)
|
||||
%"71" = alloca i64, align 8, addrspace(5)
|
||||
%"72" = alloca i64, align 8, addrspace(5)
|
||||
%"63" = alloca i64, align 8, addrspace(5)
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
%"65" = alloca i64, align 8, addrspace(5)
|
||||
%"66" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"46"
|
||||
|
||||
"46": ; preds = %1
|
||||
store i64 %"43", ptr addrspace(5) %"71", align 4
|
||||
%"73" = load i64, ptr addrspace(5) %"71", align 4
|
||||
store i64 %"73", ptr addrspace(5) %"72", align 4
|
||||
%"75" = load i64, ptr addrspace(5) %"72", align 4
|
||||
%"74" = add i64 %"75", 1
|
||||
store i64 %"74", ptr addrspace(5) %"72", align 4
|
||||
%"76" = load i64, ptr addrspace(5) %"72", align 4
|
||||
store i64 %"76", ptr addrspace(5) %"70", align 4
|
||||
%"77" = load i64, ptr addrspace(5) %"70", align 4
|
||||
store i64 %"77", ptr addrspace(5) %"69", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"69", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"65", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"65", align 4
|
||||
store i64 %"67", ptr addrspace(5) %"66", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"66", align 4
|
||||
%"68" = add i64 %"69", 1
|
||||
store i64 %"68", ptr addrspace(5) %"66", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"66", align 4
|
||||
store i64 %"70", ptr addrspace(5) %"64", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"64", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"63", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"63", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #1 {
|
||||
%"56" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @call(ptr addrspace(4) byref(i64) %"48", ptr addrspace(4) byref(i64) %"49") #1 {
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"57" = alloca i64, align 8, addrspace(5)
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"63" = alloca i64, align 8, addrspace(5)
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"44"
|
||||
|
||||
"44": ; preds = %1
|
||||
%"59" = load i64, ptr addrspace(4) %"54", align 4
|
||||
store i64 %"59", ptr addrspace(5) %"56", align 4
|
||||
%"60" = load i64, ptr addrspace(4) %"55", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"57", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"56", align 4
|
||||
%"78" = inttoptr i64 %"62" to ptr addrspace(1)
|
||||
%"61" = load i64, ptr addrspace(1) %"78", align 4
|
||||
store i64 %"61", ptr addrspace(5) %"58", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"58", align 4
|
||||
store i64 %"65", ptr addrspace(5) %"63", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"63", align 4
|
||||
%"53" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"50", align 4
|
||||
%"54" = load i64, ptr addrspace(4) %"49", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"51", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"72" = inttoptr i64 %"56" to ptr addrspace(1)
|
||||
%"55" = load i64, ptr addrspace(1) %"72", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"52", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"52", align 4
|
||||
store i64 %"59", ptr addrspace(5) %"57", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"41" = call i64 @incr(i64 %"40")
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %"44"
|
||||
store i64 %"41", ptr addrspace(5) %"64", align 4
|
||||
%"66" = load i64, ptr addrspace(5) %"64", align 4
|
||||
store i64 %"66", ptr addrspace(5) %"58", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"81" = inttoptr i64 %"67" to ptr addrspace(1)
|
||||
store i64 %"68", ptr addrspace(1) %"81", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"58", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"58", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"52", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"75" = inttoptr i64 %"61" to ptr addrspace(1)
|
||||
store i64 %"62", ptr addrspace(1) %"75", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @clz(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"49", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"50" = call i32 @llvm.ctlz.i32(i32 %"46", i1 false)
|
||||
store i32 %"50", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"51" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"51", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i32, ptr %"43", align 4
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"44" = call i32 @llvm.ctlz.i32(i32 %"40", i1 false)
|
||||
store i32 %"44", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"45" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #2
|
||||
declare i32 @llvm.ctlz.i32(i32, i1 immarg) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,63 +1,50 @@
|
|||
@constparams = addrspace(4) global [4 x i16] [i16 10, i16 20, i16 30, i16 40], align 8
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"52", ptr addrspace(4) byref(i64) %"53") #1 {
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
%"55" = alloca i64, align 8, addrspace(5)
|
||||
%"56" = alloca i16, align 2, addrspace(5)
|
||||
%"57" = alloca i16, align 2, addrspace(5)
|
||||
%"58" = alloca i16, align 2, addrspace(5)
|
||||
%"59" = alloca i16, align 2, addrspace(5)
|
||||
define amdgpu_kernel void @const(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #0 {
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i16, align 2, addrspace(5)
|
||||
%"51" = alloca i16, align 2, addrspace(5)
|
||||
%"52" = alloca i16, align 2, addrspace(5)
|
||||
%"53" = alloca i16, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %1
|
||||
%"60" = load i64, ptr addrspace(4) %"52", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"54", align 4
|
||||
%"61" = load i64, ptr addrspace(4) %"53", align 4
|
||||
store i64 %"61", ptr addrspace(5) %"55", align 4
|
||||
%"62" = load i16, ptr addrspace(4) @constparams, align 2
|
||||
store i16 %"62", ptr addrspace(5) %"56", align 2
|
||||
%"63" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
|
||||
store i16 %"63", ptr addrspace(5) %"57", align 2
|
||||
%"64" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
|
||||
store i16 %"64", ptr addrspace(5) %"58", align 2
|
||||
%"65" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
|
||||
store i16 %"65", ptr addrspace(5) %"59", align 2
|
||||
%"66" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"67" = load i16, ptr addrspace(5) %"56", align 2
|
||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||
%"56" = load i16, ptr addrspace(4) @constparams, align 2
|
||||
store i16 %"56", ptr addrspace(5) %"50", align 2
|
||||
%"57" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 2), align 2
|
||||
store i16 %"57", ptr addrspace(5) %"51", align 2
|
||||
%"58" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 4), align 2
|
||||
store i16 %"58", ptr addrspace(5) %"52", align 2
|
||||
%"59" = load i16, ptr addrspace(4) getelementptr inbounds (i8, ptr addrspace(4) @constparams, i64 6), align 2
|
||||
store i16 %"59", ptr addrspace(5) %"53", align 2
|
||||
%"60" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"61" = load i16, ptr addrspace(5) %"50", align 2
|
||||
%"72" = inttoptr i64 %"60" to ptr
|
||||
store i16 %"61", ptr %"72", align 2
|
||||
%"62" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"74" = inttoptr i64 %"62" to ptr
|
||||
%"40" = getelementptr inbounds i8, ptr %"74", i64 2
|
||||
%"63" = load i16, ptr addrspace(5) %"51", align 2
|
||||
store i16 %"63", ptr %"40", align 2
|
||||
%"64" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"76" = inttoptr i64 %"64" to ptr
|
||||
%"42" = getelementptr inbounds i8, ptr %"76", i64 4
|
||||
%"65" = load i16, ptr addrspace(5) %"52", align 2
|
||||
store i16 %"65", ptr %"42", align 2
|
||||
%"66" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"78" = inttoptr i64 %"66" to ptr
|
||||
store i16 %"67", ptr %"78", align 2
|
||||
%"68" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"80" = inttoptr i64 %"68" to ptr
|
||||
%"40" = getelementptr inbounds i8, ptr %"80", i64 2
|
||||
%"69" = load i16, ptr addrspace(5) %"57", align 2
|
||||
store i16 %"69", ptr %"40", align 2
|
||||
%"70" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"82" = inttoptr i64 %"70" to ptr
|
||||
%"42" = getelementptr inbounds i8, ptr %"82", i64 4
|
||||
%"71" = load i16, ptr addrspace(5) %"58", align 2
|
||||
store i16 %"71", ptr %"42", align 2
|
||||
%"72" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"84" = inttoptr i64 %"72" to ptr
|
||||
%"44" = getelementptr inbounds i8, ptr %"84", i64 6
|
||||
%"73" = load i16, ptr addrspace(5) %"59", align 2
|
||||
store i16 %"73", ptr %"44", align 2
|
||||
%"44" = getelementptr inbounds i8, ptr %"78", i64 6
|
||||
%"67" = load i16, ptr addrspace(5) %"53", align 2
|
||||
store i16 %"67", ptr %"44", align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,42 +1,29 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @constant_f32(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load float, ptr %"50", align 4
|
||||
store float %"44", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"46" = fmul float %"47", 5.000000e-01
|
||||
store float %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"49" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr
|
||||
store float %"49", ptr %"51", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = inttoptr i64 %"39" to ptr
|
||||
%"38" = load float, ptr %"44", align 4
|
||||
store float %"38", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load float, ptr addrspace(5) %"35", align 4
|
||||
%"40" = fmul float %"41", 5.000000e-01
|
||||
store float %"40", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"43" = load float, ptr addrspace(5) %"35", align 4
|
||||
%"45" = inttoptr i64 %"42" to ptr
|
||||
store float %"43", ptr %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,42 +1,29 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @constant_negative(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i32, ptr %"50", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"46" = mul i32 %"47", -1
|
||||
store i32 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"51", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = inttoptr i64 %"39" to ptr
|
||||
%"38" = load i32, ptr %"44", align 4
|
||||
store i32 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"40" = mul i32 %"41", -1
|
||||
store i32 %"40", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"43" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"45" = inttoptr i64 %"42" to ptr
|
||||
store i32 %"43", ptr %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cos(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call afn float @llvm.cos.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call afn float @llvm.cos.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.cos.f32(float) #2
|
||||
declare float @llvm.cos.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
%"42" = alloca double, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_f64_f32(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca float, align 4, addrspace(5)
|
||||
%"36" = alloca double, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"51" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
%"45" = load float, ptr addrspace(1) %"51", align 4
|
||||
store float %"45", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"47" = fpext float %"48" to double
|
||||
store double %"47", ptr addrspace(5) %"42", align 8
|
||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"50" = load double, ptr addrspace(5) %"42", align 8
|
||||
%"52" = inttoptr i64 %"49" to ptr
|
||||
store double %"50", ptr %"52", align 8
|
||||
%"37" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"45" = inttoptr i64 %"40" to ptr addrspace(1)
|
||||
%"39" = load float, ptr addrspace(1) %"45", align 4
|
||||
store float %"39", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"35", align 4
|
||||
%"41" = fpext float %"42" to double
|
||||
store double %"41", ptr addrspace(5) %"36", align 8
|
||||
%"43" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"44" = load double, ptr addrspace(5) %"36", align 8
|
||||
%"46" = inttoptr i64 %"43" to ptr
|
||||
store double %"44", ptr %"46", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,62 +1,49 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca float, align 4, addrspace(5)
|
||||
%"46" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_rni(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca float, align 4, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"61" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load float, ptr %"61", align 4
|
||||
store float %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"62" = inttoptr i64 %"51" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"52" = load float, ptr %"31", align 4
|
||||
store float %"52", ptr addrspace(5) %"46", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"45", align 4
|
||||
%2 = call float @llvm.roundeven.f32(float %"54")
|
||||
%"53" = freeze float %2
|
||||
store float %"53", ptr addrspace(5) %"45", align 4
|
||||
%"56" = load float, ptr addrspace(5) %"46", align 4
|
||||
%3 = call float @llvm.roundeven.f32(float %"56")
|
||||
%"55" = freeze float %3
|
||||
store float %"55", ptr addrspace(5) %"46", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"45", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store float %"58", ptr %"63", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"64" = inttoptr i64 %"59" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"60" = load float, ptr addrspace(5) %"46", align 4
|
||||
store float %"60", ptr %"33", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"55" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"55", align 4
|
||||
store float %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||
%"46" = load float, ptr %"31", align 4
|
||||
store float %"46", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"39", align 4
|
||||
%2 = call float @llvm.roundeven.f32(float %"48")
|
||||
%"47" = freeze float %2
|
||||
store float %"47", ptr addrspace(5) %"39", align 4
|
||||
%"50" = load float, ptr addrspace(5) %"40", align 4
|
||||
%3 = call float @llvm.roundeven.f32(float %"50")
|
||||
%"49" = freeze float %3
|
||||
store float %"49", ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"39", align 4
|
||||
%"57" = inttoptr i64 %"51" to ptr
|
||||
store float %"52", ptr %"57", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"58" = inttoptr i64 %"53" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"54" = load float, ptr addrspace(5) %"40", align 4
|
||||
store float %"54", ptr %"33", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.roundeven.f32(float) #2
|
||||
declare float @llvm.roundeven.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,62 +1,54 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca float, align 4, addrspace(5)
|
||||
%"46" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_rzi(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca float, align 4, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"61" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load float, ptr %"61", align 4
|
||||
store float %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"62" = inttoptr i64 %"51" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"62", i64 4
|
||||
%"52" = load float, ptr %"31", align 4
|
||||
store float %"52", ptr addrspace(5) %"46", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"45", align 4
|
||||
%2 = call float @llvm.trunc.f32(float %"54")
|
||||
%"53" = freeze float %2
|
||||
store float %"53", ptr addrspace(5) %"45", align 4
|
||||
%"56" = load float, ptr addrspace(5) %"46", align 4
|
||||
%3 = call float @llvm.trunc.f32(float %"56")
|
||||
%"55" = freeze float %3
|
||||
store float %"55", ptr addrspace(5) %"46", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"45", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr
|
||||
store float %"58", ptr %"63", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"64" = inttoptr i64 %"59" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"60" = load float, ptr addrspace(5) %"46", align 4
|
||||
store float %"60", ptr %"33", align 4
|
||||
call void @llvm.amdgcn.s.setreg(i32 6145, i32 3)
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"55" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"55", align 4
|
||||
store float %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"56" = inttoptr i64 %"45" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"56", i64 4
|
||||
%"46" = load float, ptr %"31", align 4
|
||||
store float %"46", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"39", align 4
|
||||
%2 = call float @llvm.trunc.f32(float %"48")
|
||||
%"47" = freeze float %2
|
||||
store float %"47", ptr addrspace(5) %"39", align 4
|
||||
%"50" = load float, ptr addrspace(5) %"40", align 4
|
||||
%3 = call float @llvm.trunc.f32(float %"50")
|
||||
%"49" = freeze float %3
|
||||
store float %"49", ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"39", align 4
|
||||
%"57" = inttoptr i64 %"51" to ptr
|
||||
store float %"52", ptr %"57", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"58" = inttoptr i64 %"53" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"54" = load float, ptr addrspace(5) %"40", align 4
|
||||
store float %"54", ptr %"33", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind willreturn
|
||||
declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #1
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.trunc.f32(float) #2
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind willreturn }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,45 +1,32 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_s16_s8(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i32, align 4, addrspace(5)
|
||||
%"36" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"51" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
%"45" = load i32, ptr addrspace(1) %"51", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%2 = trunc i32 %"48" to i8
|
||||
%"52" = sext i8 %2 to i16
|
||||
%"47" = sext i16 %"52" to i32
|
||||
store i32 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"54" = inttoptr i64 %"49" to ptr
|
||||
store i32 %"50", ptr %"54", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"45" = inttoptr i64 %"40" to ptr addrspace(1)
|
||||
%"39" = load i32, ptr addrspace(1) %"45", align 4
|
||||
store i32 %"39", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"36", align 4
|
||||
%2 = trunc i32 %"42" to i8
|
||||
%"46" = sext i8 %2 to i16
|
||||
%"41" = sext i16 %"46" to i32
|
||||
store i32 %"41", ptr addrspace(5) %"35", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"44" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"48" = inttoptr i64 %"43" to ptr
|
||||
store i32 %"44", ptr %"48", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,68 +1,55 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_s32_f32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"62" = inttoptr i64 %"50" to ptr
|
||||
%"61" = load float, ptr %"62", align 4
|
||||
%"49" = bitcast float %"61" to i32
|
||||
store i32 %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"63" = inttoptr i64 %"51" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"63", i64 4
|
||||
%"64" = load float, ptr %"31", align 4
|
||||
%"52" = bitcast float %"64" to i32
|
||||
store i32 %"52", ptr addrspace(5) %"46", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"66" = bitcast i32 %"54" to float
|
||||
%2 = call float @llvm.ceil.f32(float %"66")
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"56" = inttoptr i64 %"44" to ptr
|
||||
%"55" = load float, ptr %"56", align 4
|
||||
%"43" = bitcast float %"55" to i32
|
||||
store i32 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"57" = inttoptr i64 %"45" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"58" = load float, ptr %"31", align 4
|
||||
%"46" = bitcast float %"58" to i32
|
||||
store i32 %"46", ptr addrspace(5) %"40", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"60" = bitcast i32 %"48" to float
|
||||
%2 = call float @llvm.ceil.f32(float %"60")
|
||||
%3 = fptosi float %2 to i32
|
||||
%"65" = freeze i32 %3
|
||||
store i32 %"65", ptr addrspace(5) %"45", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"68" = bitcast i32 %"56" to float
|
||||
%4 = call float @llvm.ceil.f32(float %"68")
|
||||
%"59" = freeze i32 %3
|
||||
store i32 %"59", ptr addrspace(5) %"39", align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"62" = bitcast i32 %"50" to float
|
||||
%4 = call float @llvm.ceil.f32(float %"62")
|
||||
%5 = fptosi float %4 to i32
|
||||
%"67" = freeze i32 %5
|
||||
store i32 %"67", ptr addrspace(5) %"46", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"58" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"69" = inttoptr i64 %"57" to ptr addrspace(1)
|
||||
store i32 %"58", ptr addrspace(1) %"69", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"71" = inttoptr i64 %"59" to ptr addrspace(1)
|
||||
%"33" = getelementptr inbounds i8, ptr addrspace(1) %"71", i64 4
|
||||
%"60" = load i32, ptr addrspace(5) %"46", align 4
|
||||
store i32 %"60", ptr addrspace(1) %"33", align 4
|
||||
%"61" = freeze i32 %5
|
||||
store i32 %"61", ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"63" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||
store i32 %"52", ptr addrspace(1) %"63", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"65" = inttoptr i64 %"53" to ptr addrspace(1)
|
||||
%"33" = getelementptr inbounds i8, ptr addrspace(1) %"65", i64 4
|
||||
%"54" = load i32, ptr addrspace(5) %"40", align 4
|
||||
store i32 %"54", ptr addrspace(1) %"33", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.ceil.f32(float) #2
|
||||
declare float @llvm.ceil.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_s64_s32(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i32, align 4, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"52" = inttoptr i64 %"46" to ptr
|
||||
%"51" = load i32, ptr %"52", align 4
|
||||
store i32 %"51", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"47" = sext i32 %"48" to i64
|
||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"53" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"53", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"46" = inttoptr i64 %"40" to ptr
|
||||
%"45" = load i32, ptr %"46", align 4
|
||||
store i32 %"45", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"41" = sext i32 %"42" to i64
|
||||
store i64 %"41", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"47" = inttoptr i64 %"43" to ptr
|
||||
store i64 %"44", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,54 +1,41 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvt_sat_s_u(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i32, align 4, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"55", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%2 = call i32 @llvm.smax.i32(i32 %"50", i32 0)
|
||||
%"39" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"49" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"49", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"36", align 4
|
||||
%"44" = load i32, ptr addrspace(5) %"36", align 4
|
||||
%2 = call i32 @llvm.smax.i32(i32 %"44", i32 0)
|
||||
%3 = call i32 @llvm.umin.i32(i32 %2, i32 -1)
|
||||
store i32 %3, ptr addrspace(5) %"43", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"56" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"56", align 4
|
||||
store i32 %3, ptr addrspace(5) %"37", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"38", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"50", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.smax.i32(i32, i32) #2
|
||||
declare i32 @llvm.smax.i32(i32, i32) #1
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.umin.i32(i32, i32) #2
|
||||
declare i32 @llvm.umin.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,47 +1,34 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @cvta(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%2 = inttoptr i64 %"44" to ptr
|
||||
%"51" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"51", ptr addrspace(5) %"38", align 8
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%3 = inttoptr i64 %"46" to ptr
|
||||
%"53" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"53", ptr addrspace(5) %"39", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||
%"47" = load float, ptr addrspace(1) %"55", align 4
|
||||
store float %"47", ptr addrspace(5) %"40", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"56" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||
store float %"50", ptr addrspace(1) %"56", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%2 = inttoptr i64 %"38" to ptr
|
||||
%"45" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"45", ptr addrspace(5) %"32", align 8
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%3 = inttoptr i64 %"40" to ptr
|
||||
%"47" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"47", ptr addrspace(5) %"33", align 8
|
||||
%"42" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"49" = inttoptr i64 %"42" to ptr addrspace(1)
|
||||
%"41" = load float, ptr addrspace(1) %"49", align 4
|
||||
store float %"41", ptr addrspace(5) %"34", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"50" = inttoptr i64 %"43" to ptr addrspace(1)
|
||||
store float %"44", ptr addrspace(1) %"50", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @div_approx(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca float, align 4, addrspace(5)
|
||||
%"38" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load float, ptr %"56", align 4
|
||||
store float %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load float, ptr %"31", align 4
|
||||
store float %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"44", align 4
|
||||
%"51" = fdiv arcp afn float %"52", %"53"
|
||||
store float %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store float %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load float, ptr %"50", align 4
|
||||
store float %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load float, ptr %"31", align 4
|
||||
store float %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load float, ptr addrspace(5) %"38", align 4
|
||||
%"45" = fdiv arcp afn float %"46", %"47"
|
||||
store float %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store float %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @ex2(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call float @llvm.amdgcn.exp2.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call float @llvm.amdgcn.exp2.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.amdgcn.exp2.f32(float) #2
|
||||
declare float @llvm.amdgcn.exp2.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,45 +1,32 @@
|
|||
@shared_mem = external addrspace(3) global [0 x i32]
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @extern_shared(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||
%"44" = load i64, ptr addrspace(1) %"50", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"41", align 4
|
||||
store i64 %"46", ptr addrspace(3) @shared_mem, align 4
|
||||
%"47" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"53" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||
store i64 %"49", ptr addrspace(1) %"53", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = inttoptr i64 %"39" to ptr addrspace(1)
|
||||
%"38" = load i64, ptr addrspace(1) %"44", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"35", align 4
|
||||
store i64 %"40", ptr addrspace(3) @shared_mem, align 4
|
||||
%"41" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"41", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"47" = inttoptr i64 %"42" to ptr addrspace(1)
|
||||
store i64 %"43", ptr addrspace(1) %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,65 +1,53 @@
|
|||
@shared_mem = external addrspace(3) global [0 x i32], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define void @incr_shared_2_global() #0 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"43", ptr addrspace(5) %"42", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"44" = add i64 %"45", 2
|
||||
store i64 %"44", ptr addrspace(5) %"42", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"42", align 4
|
||||
store i64 %"46", ptr addrspace(3) @shared_mem, align 4
|
||||
%"37" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"37", ptr addrspace(5) %"36", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"38" = add i64 %"39", 2
|
||||
store i64 %"38", ptr addrspace(5) %"36", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"36", align 4
|
||||
store i64 %"40", ptr addrspace(3) @shared_mem, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @extern_shared_call(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"52" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"52", ptr addrspace(5) %"49", align 4
|
||||
%"53" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"50", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"62" = inttoptr i64 %"55" to ptr addrspace(1)
|
||||
%"54" = load i64, ptr addrspace(1) %"62", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"51", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"51", align 4
|
||||
store i64 %"56", ptr addrspace(3) @shared_mem, align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"43", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"44", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"56" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||
%"48" = load i64, ptr addrspace(1) %"56", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"45", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"45", align 4
|
||||
store i64 %"50", ptr addrspace(3) @shared_mem, align 4
|
||||
call void @incr_shared_2_global()
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %"34"
|
||||
%"57" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"57", ptr addrspace(5) %"51", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"65" = inttoptr i64 %"58" to ptr addrspace(1)
|
||||
store i64 %"59", ptr addrspace(1) %"65", align 4
|
||||
%"51" = load i64, ptr addrspace(3) @shared_mem, align 4
|
||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
|
||||
store i64 %"53", ptr addrspace(1) %"59", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,60 +1,47 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"42", ptr addrspace(4) byref(i64) %"43") #1 {
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca float, align 4, addrspace(5)
|
||||
%"47" = alloca float, align 4, addrspace(5)
|
||||
%"48" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @fma(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #0 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
%"41" = alloca float, align 4, addrspace(5)
|
||||
%"42" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %1
|
||||
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"63" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load float, ptr %"63", align 4
|
||||
store float %"51", ptr addrspace(5) %"46", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"64" = inttoptr i64 %"53" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"64", i64 4
|
||||
%"54" = load float, ptr %"32", align 4
|
||||
store float %"54", ptr addrspace(5) %"47", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"65" = inttoptr i64 %"55" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"65", i64 8
|
||||
%"56" = load float, ptr %"34", align 4
|
||||
store float %"56", ptr addrspace(5) %"48", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"46", align 4
|
||||
%"59" = load float, ptr addrspace(5) %"47", align 4
|
||||
%"60" = load float, ptr addrspace(5) %"48", align 4
|
||||
%"57" = call float @llvm.fma.f32(float %"58", float %"59", float %"60")
|
||||
store float %"57", ptr addrspace(5) %"46", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"62" = load float, ptr addrspace(5) %"46", align 4
|
||||
%"66" = inttoptr i64 %"61" to ptr
|
||||
store float %"62", ptr %"66", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"57" = inttoptr i64 %"46" to ptr
|
||||
%"45" = load float, ptr %"57", align 4
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"58" = inttoptr i64 %"47" to ptr
|
||||
%"32" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"48" = load float, ptr %"32", align 4
|
||||
store float %"48", ptr addrspace(5) %"41", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"59" = inttoptr i64 %"49" to ptr
|
||||
%"34" = getelementptr inbounds i8, ptr %"59", i64 8
|
||||
%"50" = load float, ptr %"34", align 4
|
||||
store float %"50", ptr addrspace(5) %"42", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"41", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"42", align 4
|
||||
%"51" = call float @llvm.fma.f32(float %"52", float %"53", float %"54")
|
||||
store float %"51", ptr addrspace(5) %"40", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"56" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"60" = inttoptr i64 %"55" to ptr
|
||||
store float %"56", ptr %"60", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.fma.f32(float, float, float) #2
|
||||
declare float @llvm.fma.f32(float, float, float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,40 +1,27 @@
|
|||
@foobar = addrspace(1) global [4 x i32] [i32 1, i32 0, i32 0, i32 0]
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @global_array(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"49" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||
%"44" = load i32, ptr addrspace(1) %"49", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"50" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
store i32 %"47", ptr addrspace(1) %"50", align 4
|
||||
store i64 ptrtoint (ptr addrspace(1) @foobar to i64), ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"43" = inttoptr i64 %"39" to ptr addrspace(1)
|
||||
%"38" = load i32, ptr addrspace(1) %"43", align 4
|
||||
store i32 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"44" = inttoptr i64 %"40" to ptr addrspace(1)
|
||||
store i32 %"41", ptr addrspace(1) %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,39 +1,26 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @ld_st(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i64, ptr %"47", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"48" = inttoptr i64 %"45" to ptr
|
||||
store i64 %"46", ptr %"48", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"41" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i64, ptr %"41", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"42" = inttoptr i64 %"39" to ptr
|
||||
store i64 %"40", ptr %"42", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,44 +1,31 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @ld_st_implicit(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
store i64 81985529216486895, ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
%"49" = load float, ptr addrspace(1) %"50", align 4
|
||||
%2 = bitcast float %"49" to i32
|
||||
%"45" = zext i32 %2 to i64
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||
%3 = trunc i64 %"48" to i32
|
||||
%"52" = bitcast i32 %3 to float
|
||||
store float %"52", ptr addrspace(1) %"51", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
store i64 81985529216486895, ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = inttoptr i64 %"40" to ptr addrspace(1)
|
||||
%"43" = load float, ptr addrspace(1) %"44", align 4
|
||||
%2 = bitcast float %"43" to i32
|
||||
%"39" = zext i32 %2 to i64
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = inttoptr i64 %"41" to ptr addrspace(1)
|
||||
%3 = trunc i64 %"42" to i32
|
||||
%"46" = bitcast i32 %3 to float
|
||||
store float %"46", ptr addrspace(1) %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,50 +1,37 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @ld_st_offset(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"57" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load i32, ptr %"57", align 4
|
||||
store i32 %"49", ptr addrspace(5) %"45", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"51" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"58", i64 4
|
||||
%"52" = load i32, ptr %"31", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"46", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"59" = inttoptr i64 %"53" to ptr
|
||||
store i32 %"54", ptr %"59", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"60" = inttoptr i64 %"55" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 4
|
||||
%"56" = load i32, ptr addrspace(5) %"45", align 4
|
||||
store i32 %"56", ptr %"33", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"51" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"51", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"45" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"52", i64 4
|
||||
%"46" = load i32, ptr %"31", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"53" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"53", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"54" = inttoptr i64 %"49" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"54", i64 4
|
||||
%"50" = load i32, ptr addrspace(5) %"39", align 4
|
||||
store i32 %"50", ptr %"33", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @lg2(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call float @llvm.amdgcn.log.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call float @llvm.amdgcn.log.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.amdgcn.log.f32(float) #2
|
||||
declare float @llvm.amdgcn.log.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,40 +1,27 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
define amdgpu_kernel void @local_align(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"10" = alloca [8 x i8], align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i64, ptr %"48", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"49" = inttoptr i64 %"46" to ptr
|
||||
store i64 %"47", ptr %"49", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = inttoptr i64 %"39" to ptr
|
||||
%"38" = load i64, ptr %"42", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"43" = inttoptr i64 %"40" to ptr
|
||||
store i64 %"41", ptr %"43", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,68 +1,55 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i32, align 4, addrspace(5)
|
||||
%"52" = alloca i32, align 4, addrspace(5)
|
||||
%"53" = alloca i32, align 4, addrspace(5)
|
||||
%"54" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @mad_s32(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i32, align 4, addrspace(5)
|
||||
%"47" = alloca i32, align 4, addrspace(5)
|
||||
%"48" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"40"
|
||||
|
||||
"40": ; preds = %1
|
||||
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||
%"56" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"50", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"73" = inttoptr i64 %"58" to ptr
|
||||
%"57" = load i32, ptr %"73", align 4
|
||||
store i32 %"57", ptr addrspace(5) %"52", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"74" = inttoptr i64 %"59" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"74", i64 4
|
||||
%"60" = load i32, ptr %"33", align 4
|
||||
store i32 %"60", ptr addrspace(5) %"53", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"75" = inttoptr i64 %"61" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"75", i64 8
|
||||
%"62" = load i32, ptr %"35", align 4
|
||||
store i32 %"62", ptr addrspace(5) %"54", align 4
|
||||
%"64" = load i32, ptr addrspace(5) %"52", align 4
|
||||
%"65" = load i32, ptr addrspace(5) %"53", align 4
|
||||
%"66" = load i32, ptr addrspace(5) %"54", align 4
|
||||
%2 = mul i32 %"64", %"65"
|
||||
%"63" = add i32 %2, %"66"
|
||||
store i32 %"63", ptr addrspace(5) %"51", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"68" = load i32, ptr addrspace(5) %"51", align 4
|
||||
%"76" = inttoptr i64 %"67" to ptr
|
||||
store i32 %"68", ptr %"76", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"77" = inttoptr i64 %"69" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"77", i64 4
|
||||
%"70" = load i32, ptr addrspace(5) %"51", align 4
|
||||
store i32 %"70", ptr %"37", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"78" = inttoptr i64 %"71" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"78", i64 8
|
||||
%"72" = load i32, ptr addrspace(5) %"51", align 4
|
||||
store i32 %"72", ptr %"39", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"67" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i32, ptr %"67", align 4
|
||||
store i32 %"51", ptr addrspace(5) %"46", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"68" = inttoptr i64 %"53" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"68", i64 4
|
||||
%"54" = load i32, ptr %"33", align 4
|
||||
store i32 %"54", ptr addrspace(5) %"47", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"69" = inttoptr i64 %"55" to ptr
|
||||
%"35" = getelementptr inbounds i8, ptr %"69", i64 8
|
||||
%"56" = load i32, ptr %"35", align 4
|
||||
store i32 %"56", ptr addrspace(5) %"48", align 4
|
||||
%"58" = load i32, ptr addrspace(5) %"46", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"47", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%2 = mul i32 %"58", %"59"
|
||||
%"57" = add i32 %2, %"60"
|
||||
store i32 %"57", ptr addrspace(5) %"45", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"62" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"70" = inttoptr i64 %"61" to ptr
|
||||
store i32 %"62", ptr %"70", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"71" = inttoptr i64 %"63" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"71", i64 4
|
||||
%"64" = load i32, ptr addrspace(5) %"45", align 4
|
||||
store i32 %"64", ptr %"37", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"72" = inttoptr i64 %"65" to ptr
|
||||
%"39" = getelementptr inbounds i8, ptr %"72", i64 8
|
||||
%"66" = load i32, ptr addrspace(5) %"45", align 4
|
||||
store i32 %"66", ptr %"39", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @malformed_label(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
br label %"10"
|
||||
|
||||
"10": ; preds = %"32"
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"54" = inttoptr i64 %"49" to ptr
|
||||
%"48" = load i64, ptr %"54", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"50" = add i64 %"51", 1
|
||||
store i64 %"50", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"55" = inttoptr i64 %"52" to ptr
|
||||
store i64 %"53", ptr %"55", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"48" = inttoptr i64 %"43" to ptr
|
||||
%"42" = load i64, ptr %"48", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"44" = add i64 %"45", 1
|
||||
store i64 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"49" = inttoptr i64 %"46" to ptr
|
||||
store i64 %"47", ptr %"49", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,53 +1,40 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @max(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"56", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load i32, ptr %"31", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"51" = call i32 @llvm.smax.i32(i32 %"52", i32 %"53")
|
||||
store i32 %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"45" = call i32 @llvm.smax.i32(i32 %"46", i32 %"47")
|
||||
store i32 %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.smax.i32(i32, i32) #2
|
||||
declare i32 @llvm.smax.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,40 +1,27 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @membar(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"48" = inttoptr i64 %"44" to ptr
|
||||
%"47" = load i32, ptr %"48", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"40", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"42" = inttoptr i64 %"38" to ptr
|
||||
%"41" = load i32, ptr %"42", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"34", align 4
|
||||
fence seq_cst
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"49" = inttoptr i64 %"45" to ptr
|
||||
store i32 %"46", ptr %"49", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"43" = inttoptr i64 %"39" to ptr
|
||||
store i32 %"40", ptr %"43", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,53 +1,40 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @min(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"56", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load i32, ptr %"31", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"51" = call i32 @llvm.smin.i32(i32 %"52", i32 %"53")
|
||||
store i32 %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"45" = call i32 @llvm.smin.i32(i32 %"46", i32 %"47")
|
||||
store i32 %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.smin.i32(i32, i32) #2
|
||||
declare i32 @llvm.smin.i32(i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,42 +1,29 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @mov(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"51" = inttoptr i64 %"46" to ptr
|
||||
%"45" = load i64, ptr %"51", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"52" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"52", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"45" = inttoptr i64 %"40" to ptr
|
||||
%"39" = load i64, ptr %"45", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"46" = inttoptr i64 %"43" to ptr
|
||||
store i64 %"44", ptr %"46", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,28 +1,15 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||
define amdgpu_kernel void @mov_address(ptr addrspace(4) byref(i64) %"29", ptr addrspace(4) byref(i64) %"30") #0 {
|
||||
%"10" = alloca [8 x i8], align 1, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"31" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"28"
|
||||
|
||||
"28": ; preds = %1
|
||||
%"39" = ptrtoint ptr addrspace(5) %"10" to i64
|
||||
store i64 %"39", ptr addrspace(5) %"37", align 4
|
||||
%"33" = ptrtoint ptr addrspace(5) %"10" to i64
|
||||
store i64 %"33", ptr addrspace(5) %"31", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,47 +1,34 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i32, align 4, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @mul24(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i32, align 4, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i32, ptr %"52", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"42", align 4
|
||||
%"48" = call i32 @llvm.amdgcn.mul.u24(i32 %"49", i32 2)
|
||||
store i32 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i32 %"51", ptr %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i32, ptr %"46", align 4
|
||||
store i32 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i32, ptr addrspace(5) %"36", align 4
|
||||
%"42" = call i32 @llvm.amdgcn.mul.u24(i32 %"43", i32 2)
|
||||
store i32 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i32 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.amdgcn.mul.u24(i32, i32) #2
|
||||
declare i32 @llvm.amdgcn.mul.u24(i32, i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @mul_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca float, align 4, addrspace(5)
|
||||
%"38" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load float, ptr %"56", align 4
|
||||
store float %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load float, ptr %"31", align 4
|
||||
store float %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"44", align 4
|
||||
%"51" = fmul float %"52", %"53"
|
||||
store float %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store float %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load float, ptr %"50", align 4
|
||||
store float %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load float, ptr %"31", align 4
|
||||
store float %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load float, ptr addrspace(5) %"38", align 4
|
||||
%"45" = fmul float %"46", %"47"
|
||||
store float %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store float %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @mul_hi(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%2 = zext i64 %"49" to i128
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%2 = zext i64 %"43" to i128
|
||||
%3 = mul i128 %2, 2
|
||||
%4 = lshr i128 %3, 64
|
||||
%"48" = trunc i128 %4 to i64
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"53", align 4
|
||||
%"42" = trunc i128 %4 to i64
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @mul_lo(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"48" = mul i64 %"49", 2
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = mul i64 %"43", 2
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @mul_non_ftz(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca float, align 4, addrspace(5)
|
||||
%"38" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load float, ptr %"56", align 4
|
||||
store float %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load float, ptr %"31", align 4
|
||||
store float %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load float, ptr addrspace(5) %"44", align 4
|
||||
%"51" = fmul float %"52", %"53"
|
||||
store float %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store float %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load float, ptr %"50", align 4
|
||||
store float %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load float, ptr %"31", align 4
|
||||
store float %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load float, ptr addrspace(5) %"38", align 4
|
||||
%"45" = fmul float %"46", %"47"
|
||||
store float %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load float, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store float %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,52 +1,39 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @mul_wide(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"58" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
%"49" = load i32, ptr addrspace(1) %"58", align 4
|
||||
store i32 %"49", ptr addrspace(5) %"44", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"59" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"59", i64 4
|
||||
%"52" = load i32, ptr addrspace(1) %"32", align 4
|
||||
store i32 %"52", ptr addrspace(5) %"45", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%2 = sext i32 %"54" to i64
|
||||
%3 = sext i32 %"55" to i64
|
||||
%"53" = mul i64 %2, %3
|
||||
store i64 %"53", ptr addrspace(5) %"46", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"60" = inttoptr i64 %"56" to ptr
|
||||
store i64 %"57", ptr %"60", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"52" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||
%"43" = load i32, ptr addrspace(1) %"52", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"38", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"53" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"53", i64 4
|
||||
%"46" = load i32, ptr addrspace(1) %"32", align 4
|
||||
store i32 %"46", ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%2 = sext i32 %"48" to i64
|
||||
%3 = sext i32 %"49" to i64
|
||||
%"47" = mul i64 %2, %3
|
||||
store i64 %"47", ptr addrspace(5) %"40", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"54" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"54", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,42 +1,29 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @neg(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"49", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"45" = sub i32 0, %"46"
|
||||
store i32 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i32, ptr %"43", align 4
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"39" = sub i32 0, %"40"
|
||||
store i32 %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,48 +1,35 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @non_scalar_ptr_offset(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 8
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr addrspace(1)
|
||||
%"32" = getelementptr inbounds i8, ptr addrspace(1) %"50", i64 8
|
||||
%"30" = load <2 x i32>, ptr addrspace(1) %"32", align 8
|
||||
%"49" = extractelement <2 x i32> %"30", i8 0
|
||||
%"50" = extractelement <2 x i32> %"30", i8 1
|
||||
store i32 %"49", ptr addrspace(5) %"44", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"51" = add i32 %"52", %"53"
|
||||
store i32 %"51", ptr addrspace(5) %"44", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"57" = inttoptr i64 %"54" to ptr addrspace(1)
|
||||
store i32 %"55", ptr addrspace(1) %"57", align 4
|
||||
%"43" = extractelement <2 x i32> %"30", i8 0
|
||||
%"44" = extractelement <2 x i32> %"30", i8 1
|
||||
store i32 %"43", ptr addrspace(5) %"38", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"45" = add i32 %"46", %"47"
|
||||
store i32 %"45", ptr addrspace(5) %"38", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||
store i32 %"49", ptr addrspace(1) %"51", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @not(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"43" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"51" = inttoptr i64 %"46" to ptr
|
||||
%"45" = load i64, ptr %"51", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"52" = xor i64 %"48", -1
|
||||
store i64 %"52", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"54" = inttoptr i64 %"49" to ptr
|
||||
store i64 %"50", ptr %"54", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"45" = inttoptr i64 %"40" to ptr
|
||||
%"39" = load i64, ptr %"45", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"46" = xor i64 %"42", -1
|
||||
store i64 %"46", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"48" = inttoptr i64 %"43" to ptr
|
||||
store i64 %"44", ptr %"48", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,47 +1,37 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @ntid(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #1 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"56" = inttoptr i64 %"49" to ptr
|
||||
%"48" = load i32, ptr %"56", align 4
|
||||
store i32 %"48", ptr addrspace(5) %"44", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"38", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"51" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"51", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"39", align 4
|
||||
%"31" = call i32 @__zluda_ptx_impl_sreg_ntid(i8 0)
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %"32"
|
||||
store i32 %"31", ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"51" = add i32 %"52", %"53"
|
||||
store i32 %"51", ptr addrspace(5) %"44", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"57" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"57", align 4
|
||||
store i32 %"31", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"46" = add i32 %"47", %"48"
|
||||
store i32 %"46", ptr addrspace(5) %"39", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"50" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"52" = inttoptr i64 %"49" to ptr
|
||||
store i32 %"50", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @or(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i64, ptr %"56", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 8
|
||||
%"50" = load i64, ptr %"31", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"58" = or i64 %"52", %"53"
|
||||
store i64 %"58", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"61" = inttoptr i64 %"54" to ptr
|
||||
store i64 %"55", ptr %"61", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i64, ptr %"50", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 8
|
||||
%"44" = load i64, ptr %"31", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"52" = or i64 %"46", %"47"
|
||||
store i64 %"52", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr
|
||||
store i64 %"49", ptr %"55", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @popc(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load i32, ptr %"49", align 4
|
||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"50" = call i32 @llvm.ctpop.i32(i32 %"46")
|
||||
store i32 %"50", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"51" = inttoptr i64 %"47" to ptr
|
||||
store i32 %"48", ptr %"51", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load i32, ptr %"43", align 4
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"44" = call i32 @llvm.ctpop.i32(i32 %"40")
|
||||
store i32 %"44", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"45" = inttoptr i64 %"41" to ptr
|
||||
store i32 %"42", ptr %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare i32 @llvm.ctpop.i32(i32) #2
|
||||
declare i32 @llvm.ctpop.i32(i32) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,70 +1,57 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"53" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i1, align 1, addrspace(5)
|
||||
define amdgpu_kernel void @pred_not(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"40"
|
||||
|
||||
"40": ; preds = %1
|
||||
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||
%"56" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"50", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"72" = inttoptr i64 %"58" to ptr
|
||||
%"57" = load i64, ptr %"72", align 4
|
||||
store i64 %"57", ptr addrspace(5) %"51", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"73" = inttoptr i64 %"59" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"73", i64 8
|
||||
%"60" = load i64, ptr %"37", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"52", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"61" = icmp ult i64 %"62", %"63"
|
||||
store i1 %"61", ptr addrspace(5) %"54", align 1
|
||||
%"65" = load i1, ptr addrspace(5) %"54", align 1
|
||||
%"64" = xor i1 %"65", true
|
||||
store i1 %"64", ptr addrspace(5) %"54", align 1
|
||||
%"66" = load i1, ptr addrspace(5) %"54", align 1
|
||||
br i1 %"66", label %"16", label %"17"
|
||||
%"49" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"66" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i64, ptr %"66", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"67" = inttoptr i64 %"53" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"67", i64 8
|
||||
%"54" = load i64, ptr %"37", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"46", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"55" = icmp ult i64 %"56", %"57"
|
||||
store i1 %"55", ptr addrspace(5) %"48", align 1
|
||||
%"59" = load i1, ptr addrspace(5) %"48", align 1
|
||||
%"58" = xor i1 %"59", true
|
||||
store i1 %"58", ptr addrspace(5) %"48", align 1
|
||||
%"60" = load i1, ptr addrspace(5) %"48", align 1
|
||||
br i1 %"60", label %"16", label %"17"
|
||||
|
||||
"16": ; preds = %"40"
|
||||
store i64 1, ptr addrspace(5) %"53", align 4
|
||||
store i64 1, ptr addrspace(5) %"47", align 4
|
||||
br label %"17"
|
||||
|
||||
"17": ; preds = %"16", %"40"
|
||||
%"68" = load i1, ptr addrspace(5) %"54", align 1
|
||||
br i1 %"68", label %"19", label %"18"
|
||||
%"62" = load i1, ptr addrspace(5) %"48", align 1
|
||||
br i1 %"62", label %"19", label %"18"
|
||||
|
||||
"18": ; preds = %"17"
|
||||
store i64 2, ptr addrspace(5) %"53", align 4
|
||||
store i64 2, ptr addrspace(5) %"47", align 4
|
||||
br label %"19"
|
||||
|
||||
"19": ; preds = %"18", %"17"
|
||||
%"70" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"74" = inttoptr i64 %"70" to ptr
|
||||
store i64 %"71", ptr %"74", align 4
|
||||
%"64" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"68" = inttoptr i64 %"64" to ptr
|
||||
store i64 %"65", ptr %"68", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,51 +1,38 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @prmt(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"56", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load i32, ptr %"31", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%2 = bitcast i32 %"52" to <4 x i8>
|
||||
%3 = bitcast i32 %"53" to <4 x i8>
|
||||
%"58" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
store <4 x i8> %"58", ptr addrspace(5) %"44", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"61" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"61", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%2 = bitcast i32 %"46" to <4 x i8>
|
||||
%3 = bitcast i32 %"47" to <4 x i8>
|
||||
%"52" = shufflevector <4 x i8> %2, <4 x i8> %3, <4 x i32> <i32 4, i32 0, i32 6, i32 7>
|
||||
store <4 x i8> %"52", ptr addrspace(5) %"38", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"55" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"55", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @rcp(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call float @llvm.amdgcn.rcp.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call float @llvm.amdgcn.rcp.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.amdgcn.rcp.f32(float) #2
|
||||
declare float @llvm.amdgcn.rcp.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"43", ptr addrspace(4) byref(i64) %"44") #1 {
|
||||
define amdgpu_kernel void @reg_local(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #0 {
|
||||
%"10" = alloca [8 x i8], align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"36"
|
||||
|
||||
"36": ; preds = %1
|
||||
%"48" = load i64, ptr addrspace(4) %"43", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"45", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"44", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"46", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"57" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||
%"56" = load i64, ptr addrspace(1) %"57", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"47", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"31" = add i64 %"52", 1
|
||||
%"58" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||
store i64 %"31", ptr %"58", align 4
|
||||
%"60" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"60", i64 0
|
||||
%"61" = load i64, ptr %"33", align 4
|
||||
store i64 %"61", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"62" = inttoptr i64 %"54" to ptr addrspace(1)
|
||||
%"35" = getelementptr inbounds i8, ptr addrspace(1) %"62", i64 0
|
||||
%"55" = load i64, ptr addrspace(5) %"47", align 4
|
||||
store i64 %"55", ptr addrspace(1) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"51" = inttoptr i64 %"45" to ptr addrspace(1)
|
||||
%"50" = load i64, ptr addrspace(1) %"51", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"31" = add i64 %"46", 1
|
||||
%"52" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||
store i64 %"31", ptr %"52", align 4
|
||||
%"54" = addrspacecast ptr addrspace(5) %"10" to ptr
|
||||
%"33" = getelementptr inbounds i8, ptr %"54", i64 0
|
||||
%"55" = load i64, ptr %"33", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr addrspace(1)
|
||||
%"35" = getelementptr inbounds i8, ptr addrspace(1) %"56", i64 0
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
store i64 %"49", ptr addrspace(1) %"35", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
%"44" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @rem(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
%"38" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"56" = inttoptr i64 %"48" to ptr
|
||||
%"47" = load i32, ptr %"56", align 4
|
||||
store i32 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"57", i64 4
|
||||
%"50" = load i32, ptr %"31", align 4
|
||||
store i32 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"44", align 4
|
||||
%"51" = srem i32 %"52", %"53"
|
||||
store i32 %"51", ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"58" = inttoptr i64 %"54" to ptr
|
||||
store i32 %"55", ptr %"58", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"50" = inttoptr i64 %"42" to ptr
|
||||
%"41" = load i32, ptr %"50", align 4
|
||||
store i32 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"51", i64 4
|
||||
%"44" = load i32, ptr %"31", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"38", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"38", align 4
|
||||
%"45" = srem i32 %"46", %"47"
|
||||
store i32 %"45", ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"52" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"52", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca double, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @rsqrt(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca double, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load double, ptr %"49", align 8
|
||||
store double %"43", ptr addrspace(5) %"40", align 8
|
||||
%"46" = load double, ptr addrspace(5) %"40", align 8
|
||||
%"45" = call double @llvm.amdgcn.rsq.f64(double %"46")
|
||||
store double %"45", ptr addrspace(5) %"40", align 8
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load double, ptr addrspace(5) %"40", align 8
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store double %"48", ptr %"50", align 8
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load double, ptr %"43", align 8
|
||||
store double %"37", ptr addrspace(5) %"34", align 8
|
||||
%"40" = load double, ptr addrspace(5) %"34", align 8
|
||||
%"39" = call double @llvm.amdgcn.rsq.f64(double %"40")
|
||||
store double %"39", ptr addrspace(5) %"34", align 8
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load double, ptr addrspace(5) %"34", align 8
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store double %"42", ptr %"44", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare double @llvm.amdgcn.rsq.f64(double) #2
|
||||
declare double @llvm.amdgcn.rsq.f64(double) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="ieee" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i16, align 2, addrspace(5)
|
||||
%"45" = alloca i16, align 2, addrspace(5)
|
||||
define amdgpu_kernel void @selp(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i16, align 2, addrspace(5)
|
||||
%"39" = alloca i16, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"48" = load i16, ptr %"57", align 2
|
||||
store i16 %"48", ptr addrspace(5) %"44", align 2
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"58" = inttoptr i64 %"50" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"58", i64 2
|
||||
%"51" = load i16, ptr %"31", align 2
|
||||
store i16 %"51", ptr addrspace(5) %"45", align 2
|
||||
%"53" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"54" = load i16, ptr addrspace(5) %"45", align 2
|
||||
%"52" = select i1 false, i16 %"53", i16 %"54"
|
||||
store i16 %"52", ptr addrspace(5) %"44", align 2
|
||||
%"55" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"56" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"59" = inttoptr i64 %"55" to ptr
|
||||
store i16 %"56", ptr %"59", align 2
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"42" = load i16, ptr %"51", align 2
|
||||
store i16 %"42", ptr addrspace(5) %"38", align 2
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"52" = inttoptr i64 %"44" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"52", i64 2
|
||||
%"45" = load i16, ptr %"31", align 2
|
||||
store i16 %"45", ptr addrspace(5) %"39", align 2
|
||||
%"47" = load i16, ptr addrspace(5) %"38", align 2
|
||||
%"48" = load i16, ptr addrspace(5) %"39", align 2
|
||||
%"46" = select i1 false, i16 %"47", i16 %"48"
|
||||
store i16 %"46", ptr addrspace(5) %"38", align 2
|
||||
%"49" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"50" = load i16, ptr addrspace(5) %"38", align 2
|
||||
%"53" = inttoptr i64 %"49" to ptr
|
||||
store i16 %"50", ptr %"53", align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i16, align 2, addrspace(5)
|
||||
%"45" = alloca i16, align 2, addrspace(5)
|
||||
define amdgpu_kernel void @selp_true(ptr addrspace(4) byref(i64) %"34", ptr addrspace(4) byref(i64) %"35") #0 {
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i16, align 2, addrspace(5)
|
||||
%"39" = alloca i16, align 2, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"57" = inttoptr i64 %"49" to ptr
|
||||
%"48" = load i16, ptr %"57", align 2
|
||||
store i16 %"48", ptr addrspace(5) %"44", align 2
|
||||
%"50" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"58" = inttoptr i64 %"50" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"58", i64 2
|
||||
%"51" = load i16, ptr %"31", align 2
|
||||
store i16 %"51", ptr addrspace(5) %"45", align 2
|
||||
%"53" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"54" = load i16, ptr addrspace(5) %"45", align 2
|
||||
%"52" = select i1 true, i16 %"53", i16 %"54"
|
||||
store i16 %"52", ptr addrspace(5) %"44", align 2
|
||||
%"55" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"56" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"59" = inttoptr i64 %"55" to ptr
|
||||
store i16 %"56", ptr %"59", align 2
|
||||
%"40" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"51" = inttoptr i64 %"43" to ptr
|
||||
%"42" = load i16, ptr %"51", align 2
|
||||
store i16 %"42", ptr addrspace(5) %"38", align 2
|
||||
%"44" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"52" = inttoptr i64 %"44" to ptr
|
||||
%"31" = getelementptr inbounds i8, ptr %"52", i64 2
|
||||
%"45" = load i16, ptr %"31", align 2
|
||||
store i16 %"45", ptr addrspace(5) %"39", align 2
|
||||
%"47" = load i16, ptr addrspace(5) %"38", align 2
|
||||
%"48" = load i16, ptr addrspace(5) %"39", align 2
|
||||
%"46" = select i1 true, i16 %"47", i16 %"48"
|
||||
store i16 %"46", ptr addrspace(5) %"38", align 2
|
||||
%"49" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"50" = load i16, ptr addrspace(5) %"38", align 2
|
||||
%"53" = inttoptr i64 %"49" to ptr
|
||||
store i16 %"50", ptr %"53", align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,67 +1,54 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"47", ptr addrspace(4) byref(i64) %"48") #1 {
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"53" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i1, align 1, addrspace(5)
|
||||
define amdgpu_kernel void @setp(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #0 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"40"
|
||||
|
||||
"40": ; preds = %1
|
||||
%"55" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"49", align 4
|
||||
%"56" = load i64, ptr addrspace(4) %"48", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"50", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"70" = inttoptr i64 %"58" to ptr
|
||||
%"57" = load i64, ptr %"70", align 4
|
||||
store i64 %"57", ptr addrspace(5) %"51", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"71" = inttoptr i64 %"59" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"71", i64 8
|
||||
%"60" = load i64, ptr %"37", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"52", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"61" = icmp ult i64 %"62", %"63"
|
||||
store i1 %"61", ptr addrspace(5) %"54", align 1
|
||||
%"64" = load i1, ptr addrspace(5) %"54", align 1
|
||||
br i1 %"64", label %"16", label %"17"
|
||||
%"49" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"44", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"64" = inttoptr i64 %"52" to ptr
|
||||
%"51" = load i64, ptr %"64", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"45", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"65" = inttoptr i64 %"53" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"65", i64 8
|
||||
%"54" = load i64, ptr %"37", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"46", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"55" = icmp ult i64 %"56", %"57"
|
||||
store i1 %"55", ptr addrspace(5) %"48", align 1
|
||||
%"58" = load i1, ptr addrspace(5) %"48", align 1
|
||||
br i1 %"58", label %"16", label %"17"
|
||||
|
||||
"16": ; preds = %"40"
|
||||
store i64 1, ptr addrspace(5) %"53", align 4
|
||||
store i64 1, ptr addrspace(5) %"47", align 4
|
||||
br label %"17"
|
||||
|
||||
"17": ; preds = %"16", %"40"
|
||||
%"66" = load i1, ptr addrspace(5) %"54", align 1
|
||||
br i1 %"66", label %"19", label %"18"
|
||||
%"60" = load i1, ptr addrspace(5) %"48", align 1
|
||||
br i1 %"60", label %"19", label %"18"
|
||||
|
||||
"18": ; preds = %"17"
|
||||
store i64 2, ptr addrspace(5) %"53", align 4
|
||||
store i64 2, ptr addrspace(5) %"47", align 4
|
||||
br label %"19"
|
||||
|
||||
"19": ; preds = %"18", %"17"
|
||||
%"68" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"72" = inttoptr i64 %"68" to ptr
|
||||
store i64 %"69", ptr %"72", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"66" = inttoptr i64 %"62" to ptr
|
||||
store i64 %"63", ptr %"66", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,69 +1,56 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca float, align 4, addrspace(5)
|
||||
%"50" = alloca float, align 4, addrspace(5)
|
||||
%"51" = alloca float, align 4, addrspace(5)
|
||||
%"52" = alloca i1, align 1, addrspace(5)
|
||||
define amdgpu_kernel void @setp_gt(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
%"45" = alloca float, align 4, addrspace(5)
|
||||
%"46" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"38"
|
||||
|
||||
"38": ; preds = %1
|
||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"70" = inttoptr i64 %"56" to ptr
|
||||
%"55" = load float, ptr %"70", align 4
|
||||
store float %"55", ptr addrspace(5) %"49", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"71" = inttoptr i64 %"57" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"71", i64 4
|
||||
%"58" = load float, ptr %"37", align 4
|
||||
store float %"58", ptr addrspace(5) %"50", align 4
|
||||
%"60" = load float, ptr addrspace(5) %"49", align 4
|
||||
%"61" = load float, ptr addrspace(5) %"50", align 4
|
||||
%"59" = fcmp ogt float %"60", %"61"
|
||||
store i1 %"59", ptr addrspace(5) %"52", align 1
|
||||
%"62" = load i1, ptr addrspace(5) %"52", align 1
|
||||
br i1 %"62", label %"16", label %"17"
|
||||
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load float, ptr %"64", align 4
|
||||
store float %"49", ptr addrspace(5) %"43", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"65" = inttoptr i64 %"51" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"65", i64 4
|
||||
%"52" = load float, ptr %"37", align 4
|
||||
store float %"52", ptr addrspace(5) %"44", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"44", align 4
|
||||
%"53" = fcmp ogt float %"54", %"55"
|
||||
store i1 %"53", ptr addrspace(5) %"46", align 1
|
||||
%"56" = load i1, ptr addrspace(5) %"46", align 1
|
||||
br i1 %"56", label %"16", label %"17"
|
||||
|
||||
"16": ; preds = %"38"
|
||||
%"64" = load float, ptr addrspace(5) %"49", align 4
|
||||
store float %"64", ptr addrspace(5) %"51", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"43", align 4
|
||||
store float %"58", ptr addrspace(5) %"45", align 4
|
||||
br label %"17"
|
||||
|
||||
"17": ; preds = %"16", %"38"
|
||||
%"65" = load i1, ptr addrspace(5) %"52", align 1
|
||||
br i1 %"65", label %"19", label %"18"
|
||||
%"59" = load i1, ptr addrspace(5) %"46", align 1
|
||||
br i1 %"59", label %"19", label %"18"
|
||||
|
||||
"18": ; preds = %"17"
|
||||
%"67" = load float, ptr addrspace(5) %"50", align 4
|
||||
store float %"67", ptr addrspace(5) %"51", align 4
|
||||
%"61" = load float, ptr addrspace(5) %"44", align 4
|
||||
store float %"61", ptr addrspace(5) %"45", align 4
|
||||
br label %"19"
|
||||
|
||||
"19": ; preds = %"18", %"17"
|
||||
%"68" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"69" = load float, ptr addrspace(5) %"51", align 4
|
||||
%"72" = inttoptr i64 %"68" to ptr
|
||||
store float %"69", ptr %"72", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"63" = load float, ptr addrspace(5) %"45", align 4
|
||||
%"66" = inttoptr i64 %"62" to ptr
|
||||
store float %"63", ptr %"66", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,69 +1,56 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca float, align 4, addrspace(5)
|
||||
%"50" = alloca float, align 4, addrspace(5)
|
||||
%"51" = alloca float, align 4, addrspace(5)
|
||||
%"52" = alloca i1, align 1, addrspace(5)
|
||||
define amdgpu_kernel void @setp_leu(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #0 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca float, align 4, addrspace(5)
|
||||
%"44" = alloca float, align 4, addrspace(5)
|
||||
%"45" = alloca float, align 4, addrspace(5)
|
||||
%"46" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"38"
|
||||
|
||||
"38": ; preds = %1
|
||||
%"53" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"47", align 4
|
||||
%"54" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"54", ptr addrspace(5) %"48", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"70" = inttoptr i64 %"56" to ptr
|
||||
%"55" = load float, ptr %"70", align 4
|
||||
store float %"55", ptr addrspace(5) %"49", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"71" = inttoptr i64 %"57" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"71", i64 4
|
||||
%"58" = load float, ptr %"37", align 4
|
||||
store float %"58", ptr addrspace(5) %"50", align 4
|
||||
%"60" = load float, ptr addrspace(5) %"49", align 4
|
||||
%"61" = load float, ptr addrspace(5) %"50", align 4
|
||||
%"59" = fcmp ule float %"60", %"61"
|
||||
store i1 %"59", ptr addrspace(5) %"52", align 1
|
||||
%"62" = load i1, ptr addrspace(5) %"52", align 1
|
||||
br i1 %"62", label %"16", label %"17"
|
||||
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = inttoptr i64 %"50" to ptr
|
||||
%"49" = load float, ptr %"64", align 4
|
||||
store float %"49", ptr addrspace(5) %"43", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"65" = inttoptr i64 %"51" to ptr
|
||||
%"37" = getelementptr inbounds i8, ptr %"65", i64 4
|
||||
%"52" = load float, ptr %"37", align 4
|
||||
store float %"52", ptr addrspace(5) %"44", align 4
|
||||
%"54" = load float, ptr addrspace(5) %"43", align 4
|
||||
%"55" = load float, ptr addrspace(5) %"44", align 4
|
||||
%"53" = fcmp ule float %"54", %"55"
|
||||
store i1 %"53", ptr addrspace(5) %"46", align 1
|
||||
%"56" = load i1, ptr addrspace(5) %"46", align 1
|
||||
br i1 %"56", label %"16", label %"17"
|
||||
|
||||
"16": ; preds = %"38"
|
||||
%"64" = load float, ptr addrspace(5) %"49", align 4
|
||||
store float %"64", ptr addrspace(5) %"51", align 4
|
||||
%"58" = load float, ptr addrspace(5) %"43", align 4
|
||||
store float %"58", ptr addrspace(5) %"45", align 4
|
||||
br label %"17"
|
||||
|
||||
"17": ; preds = %"16", %"38"
|
||||
%"65" = load i1, ptr addrspace(5) %"52", align 1
|
||||
br i1 %"65", label %"19", label %"18"
|
||||
%"59" = load i1, ptr addrspace(5) %"46", align 1
|
||||
br i1 %"59", label %"19", label %"18"
|
||||
|
||||
"18": ; preds = %"17"
|
||||
%"67" = load float, ptr addrspace(5) %"50", align 4
|
||||
store float %"67", ptr addrspace(5) %"51", align 4
|
||||
%"61" = load float, ptr addrspace(5) %"44", align 4
|
||||
store float %"61", ptr addrspace(5) %"45", align 4
|
||||
br label %"19"
|
||||
|
||||
"19": ; preds = %"18", %"17"
|
||||
%"68" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"69" = load float, ptr addrspace(5) %"51", align 4
|
||||
%"72" = inttoptr i64 %"68" to ptr
|
||||
store float %"69", ptr %"72", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"63" = load float, ptr addrspace(5) %"45", align 4
|
||||
%"66" = inttoptr i64 %"62" to ptr
|
||||
store float %"63", ptr %"66", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,178 +1,165 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"89", ptr addrspace(4) byref(i64) %"90") #1 {
|
||||
%"91" = alloca i64, align 8, addrspace(5)
|
||||
%"92" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @setp_nan(ptr addrspace(4) byref(i64) %"83", ptr addrspace(4) byref(i64) %"84") #0 {
|
||||
%"85" = alloca i64, align 8, addrspace(5)
|
||||
%"86" = alloca i64, align 8, addrspace(5)
|
||||
%"87" = alloca float, align 4, addrspace(5)
|
||||
%"88" = alloca float, align 4, addrspace(5)
|
||||
%"89" = alloca float, align 4, addrspace(5)
|
||||
%"90" = alloca float, align 4, addrspace(5)
|
||||
%"91" = alloca float, align 4, addrspace(5)
|
||||
%"92" = alloca float, align 4, addrspace(5)
|
||||
%"93" = alloca float, align 4, addrspace(5)
|
||||
%"94" = alloca float, align 4, addrspace(5)
|
||||
%"95" = alloca float, align 4, addrspace(5)
|
||||
%"96" = alloca float, align 4, addrspace(5)
|
||||
%"97" = alloca float, align 4, addrspace(5)
|
||||
%"98" = alloca float, align 4, addrspace(5)
|
||||
%"99" = alloca float, align 4, addrspace(5)
|
||||
%"100" = alloca float, align 4, addrspace(5)
|
||||
%"101" = alloca i32, align 4, addrspace(5)
|
||||
%"102" = alloca i1, align 1, addrspace(5)
|
||||
%"95" = alloca i32, align 4, addrspace(5)
|
||||
%"96" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"82"
|
||||
|
||||
"82": ; preds = %1
|
||||
%"103" = load i64, ptr addrspace(4) %"89", align 4
|
||||
store i64 %"103", ptr addrspace(5) %"91", align 4
|
||||
%"104" = load i64, ptr addrspace(4) %"90", align 4
|
||||
store i64 %"104", ptr addrspace(5) %"92", align 4
|
||||
%"106" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"157" = inttoptr i64 %"106" to ptr
|
||||
%"105" = load float, ptr %"157", align 4
|
||||
store float %"105", ptr addrspace(5) %"93", align 4
|
||||
%"107" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"158" = inttoptr i64 %"107" to ptr
|
||||
%"55" = getelementptr inbounds i8, ptr %"158", i64 4
|
||||
%"108" = load float, ptr %"55", align 4
|
||||
store float %"108", ptr addrspace(5) %"94", align 4
|
||||
%"109" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"159" = inttoptr i64 %"109" to ptr
|
||||
%"57" = getelementptr inbounds i8, ptr %"159", i64 8
|
||||
%"110" = load float, ptr %"57", align 4
|
||||
store float %"110", ptr addrspace(5) %"95", align 4
|
||||
%"111" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"160" = inttoptr i64 %"111" to ptr
|
||||
%"59" = getelementptr inbounds i8, ptr %"160", i64 12
|
||||
%"112" = load float, ptr %"59", align 4
|
||||
store float %"112", ptr addrspace(5) %"96", align 4
|
||||
%"113" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"161" = inttoptr i64 %"113" to ptr
|
||||
%"61" = getelementptr inbounds i8, ptr %"161", i64 16
|
||||
%"114" = load float, ptr %"61", align 4
|
||||
store float %"114", ptr addrspace(5) %"97", align 4
|
||||
%"115" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"162" = inttoptr i64 %"115" to ptr
|
||||
%"63" = getelementptr inbounds i8, ptr %"162", i64 20
|
||||
%"116" = load float, ptr %"63", align 4
|
||||
store float %"116", ptr addrspace(5) %"98", align 4
|
||||
%"117" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"163" = inttoptr i64 %"117" to ptr
|
||||
%"65" = getelementptr inbounds i8, ptr %"163", i64 24
|
||||
%"118" = load float, ptr %"65", align 4
|
||||
store float %"118", ptr addrspace(5) %"99", align 4
|
||||
%"119" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"164" = inttoptr i64 %"119" to ptr
|
||||
%"67" = getelementptr inbounds i8, ptr %"164", i64 28
|
||||
%"120" = load float, ptr %"67", align 4
|
||||
store float %"120", ptr addrspace(5) %"100", align 4
|
||||
%"122" = load float, ptr addrspace(5) %"93", align 4
|
||||
%"123" = load float, ptr addrspace(5) %"94", align 4
|
||||
%"121" = fcmp uno float %"122", %"123"
|
||||
store i1 %"121", ptr addrspace(5) %"102", align 1
|
||||
%"124" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"124", label %"22", label %"23"
|
||||
%"97" = load i64, ptr addrspace(4) %"83", align 4
|
||||
store i64 %"97", ptr addrspace(5) %"85", align 4
|
||||
%"98" = load i64, ptr addrspace(4) %"84", align 4
|
||||
store i64 %"98", ptr addrspace(5) %"86", align 4
|
||||
%"100" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"151" = inttoptr i64 %"100" to ptr
|
||||
%"99" = load float, ptr %"151", align 4
|
||||
store float %"99", ptr addrspace(5) %"87", align 4
|
||||
%"101" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"152" = inttoptr i64 %"101" to ptr
|
||||
%"55" = getelementptr inbounds i8, ptr %"152", i64 4
|
||||
%"102" = load float, ptr %"55", align 4
|
||||
store float %"102", ptr addrspace(5) %"88", align 4
|
||||
%"103" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"153" = inttoptr i64 %"103" to ptr
|
||||
%"57" = getelementptr inbounds i8, ptr %"153", i64 8
|
||||
%"104" = load float, ptr %"57", align 4
|
||||
store float %"104", ptr addrspace(5) %"89", align 4
|
||||
%"105" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"154" = inttoptr i64 %"105" to ptr
|
||||
%"59" = getelementptr inbounds i8, ptr %"154", i64 12
|
||||
%"106" = load float, ptr %"59", align 4
|
||||
store float %"106", ptr addrspace(5) %"90", align 4
|
||||
%"107" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"155" = inttoptr i64 %"107" to ptr
|
||||
%"61" = getelementptr inbounds i8, ptr %"155", i64 16
|
||||
%"108" = load float, ptr %"61", align 4
|
||||
store float %"108", ptr addrspace(5) %"91", align 4
|
||||
%"109" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"156" = inttoptr i64 %"109" to ptr
|
||||
%"63" = getelementptr inbounds i8, ptr %"156", i64 20
|
||||
%"110" = load float, ptr %"63", align 4
|
||||
store float %"110", ptr addrspace(5) %"92", align 4
|
||||
%"111" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"157" = inttoptr i64 %"111" to ptr
|
||||
%"65" = getelementptr inbounds i8, ptr %"157", i64 24
|
||||
%"112" = load float, ptr %"65", align 4
|
||||
store float %"112", ptr addrspace(5) %"93", align 4
|
||||
%"113" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"158" = inttoptr i64 %"113" to ptr
|
||||
%"67" = getelementptr inbounds i8, ptr %"158", i64 28
|
||||
%"114" = load float, ptr %"67", align 4
|
||||
store float %"114", ptr addrspace(5) %"94", align 4
|
||||
%"116" = load float, ptr addrspace(5) %"87", align 4
|
||||
%"117" = load float, ptr addrspace(5) %"88", align 4
|
||||
%"115" = fcmp uno float %"116", %"117"
|
||||
store i1 %"115", ptr addrspace(5) %"96", align 1
|
||||
%"118" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"118", label %"22", label %"23"
|
||||
|
||||
"22": ; preds = %"82"
|
||||
store i32 1, ptr addrspace(5) %"101", align 4
|
||||
store i32 1, ptr addrspace(5) %"95", align 4
|
||||
br label %"23"
|
||||
|
||||
"23": ; preds = %"22", %"82"
|
||||
%"126" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"126", label %"25", label %"24"
|
||||
%"120" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"120", label %"25", label %"24"
|
||||
|
||||
"24": ; preds = %"23"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"25"
|
||||
|
||||
"25": ; preds = %"24", %"23"
|
||||
%"128" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"129" = load i32, ptr addrspace(5) %"101", align 4
|
||||
%"165" = inttoptr i64 %"128" to ptr
|
||||
store i32 %"129", ptr %"165", align 4
|
||||
%"131" = load float, ptr addrspace(5) %"95", align 4
|
||||
%"132" = load float, ptr addrspace(5) %"96", align 4
|
||||
%"130" = fcmp uno float %"131", %"132"
|
||||
store i1 %"130", ptr addrspace(5) %"102", align 1
|
||||
%"133" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"133", label %"26", label %"27"
|
||||
%"122" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"123" = load i32, ptr addrspace(5) %"95", align 4
|
||||
%"159" = inttoptr i64 %"122" to ptr
|
||||
store i32 %"123", ptr %"159", align 4
|
||||
%"125" = load float, ptr addrspace(5) %"89", align 4
|
||||
%"126" = load float, ptr addrspace(5) %"90", align 4
|
||||
%"124" = fcmp uno float %"125", %"126"
|
||||
store i1 %"124", ptr addrspace(5) %"96", align 1
|
||||
%"127" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"127", label %"26", label %"27"
|
||||
|
||||
"26": ; preds = %"25"
|
||||
store i32 1, ptr addrspace(5) %"101", align 4
|
||||
store i32 1, ptr addrspace(5) %"95", align 4
|
||||
br label %"27"
|
||||
|
||||
"27": ; preds = %"26", %"25"
|
||||
%"135" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"135", label %"29", label %"28"
|
||||
%"129" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"129", label %"29", label %"28"
|
||||
|
||||
"28": ; preds = %"27"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %"28", %"27"
|
||||
%"137" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"166" = inttoptr i64 %"137" to ptr
|
||||
%"73" = getelementptr inbounds i8, ptr %"166", i64 4
|
||||
%"138" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"138", ptr %"73", align 4
|
||||
%"140" = load float, ptr addrspace(5) %"97", align 4
|
||||
%"141" = load float, ptr addrspace(5) %"98", align 4
|
||||
%"139" = fcmp uno float %"140", %"141"
|
||||
store i1 %"139", ptr addrspace(5) %"102", align 1
|
||||
%"142" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"142", label %"30", label %"31"
|
||||
%"131" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"160" = inttoptr i64 %"131" to ptr
|
||||
%"73" = getelementptr inbounds i8, ptr %"160", i64 4
|
||||
%"132" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"132", ptr %"73", align 4
|
||||
%"134" = load float, ptr addrspace(5) %"91", align 4
|
||||
%"135" = load float, ptr addrspace(5) %"92", align 4
|
||||
%"133" = fcmp uno float %"134", %"135"
|
||||
store i1 %"133", ptr addrspace(5) %"96", align 1
|
||||
%"136" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"136", label %"30", label %"31"
|
||||
|
||||
"30": ; preds = %"29"
|
||||
store i32 1, ptr addrspace(5) %"101", align 4
|
||||
store i32 1, ptr addrspace(5) %"95", align 4
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %"30", %"29"
|
||||
%"144" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"144", label %"33", label %"32"
|
||||
%"138" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"138", label %"33", label %"32"
|
||||
|
||||
"32": ; preds = %"31"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %"32", %"31"
|
||||
%"146" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"167" = inttoptr i64 %"146" to ptr
|
||||
%"77" = getelementptr inbounds i8, ptr %"167", i64 8
|
||||
%"147" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"147", ptr %"77", align 4
|
||||
%"149" = load float, ptr addrspace(5) %"99", align 4
|
||||
%"150" = load float, ptr addrspace(5) %"100", align 4
|
||||
%"148" = fcmp uno float %"149", %"150"
|
||||
store i1 %"148", ptr addrspace(5) %"102", align 1
|
||||
%"151" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"151", label %"34", label %"35"
|
||||
%"140" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"161" = inttoptr i64 %"140" to ptr
|
||||
%"77" = getelementptr inbounds i8, ptr %"161", i64 8
|
||||
%"141" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"141", ptr %"77", align 4
|
||||
%"143" = load float, ptr addrspace(5) %"93", align 4
|
||||
%"144" = load float, ptr addrspace(5) %"94", align 4
|
||||
%"142" = fcmp uno float %"143", %"144"
|
||||
store i1 %"142", ptr addrspace(5) %"96", align 1
|
||||
%"145" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"145", label %"34", label %"35"
|
||||
|
||||
"34": ; preds = %"33"
|
||||
store i32 1, ptr addrspace(5) %"101", align 4
|
||||
store i32 1, ptr addrspace(5) %"95", align 4
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %"34", %"33"
|
||||
%"153" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"153", label %"37", label %"36"
|
||||
%"147" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"147", label %"37", label %"36"
|
||||
|
||||
"36": ; preds = %"35"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %"36", %"35"
|
||||
%"155" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"168" = inttoptr i64 %"155" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"168", i64 12
|
||||
%"156" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"156", ptr %"81", align 4
|
||||
%"149" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"162" = inttoptr i64 %"149" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"162", i64 12
|
||||
%"150" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"150", ptr %"81", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,178 +1,165 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"89", ptr addrspace(4) byref(i64) %"90") #1 {
|
||||
%"91" = alloca i64, align 8, addrspace(5)
|
||||
%"92" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @setp_num(ptr addrspace(4) byref(i64) %"83", ptr addrspace(4) byref(i64) %"84") #0 {
|
||||
%"85" = alloca i64, align 8, addrspace(5)
|
||||
%"86" = alloca i64, align 8, addrspace(5)
|
||||
%"87" = alloca float, align 4, addrspace(5)
|
||||
%"88" = alloca float, align 4, addrspace(5)
|
||||
%"89" = alloca float, align 4, addrspace(5)
|
||||
%"90" = alloca float, align 4, addrspace(5)
|
||||
%"91" = alloca float, align 4, addrspace(5)
|
||||
%"92" = alloca float, align 4, addrspace(5)
|
||||
%"93" = alloca float, align 4, addrspace(5)
|
||||
%"94" = alloca float, align 4, addrspace(5)
|
||||
%"95" = alloca float, align 4, addrspace(5)
|
||||
%"96" = alloca float, align 4, addrspace(5)
|
||||
%"97" = alloca float, align 4, addrspace(5)
|
||||
%"98" = alloca float, align 4, addrspace(5)
|
||||
%"99" = alloca float, align 4, addrspace(5)
|
||||
%"100" = alloca float, align 4, addrspace(5)
|
||||
%"101" = alloca i32, align 4, addrspace(5)
|
||||
%"102" = alloca i1, align 1, addrspace(5)
|
||||
%"95" = alloca i32, align 4, addrspace(5)
|
||||
%"96" = alloca i1, align 1, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"82"
|
||||
|
||||
"82": ; preds = %1
|
||||
%"103" = load i64, ptr addrspace(4) %"89", align 4
|
||||
store i64 %"103", ptr addrspace(5) %"91", align 4
|
||||
%"104" = load i64, ptr addrspace(4) %"90", align 4
|
||||
store i64 %"104", ptr addrspace(5) %"92", align 4
|
||||
%"106" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"157" = inttoptr i64 %"106" to ptr
|
||||
%"105" = load float, ptr %"157", align 4
|
||||
store float %"105", ptr addrspace(5) %"93", align 4
|
||||
%"107" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"158" = inttoptr i64 %"107" to ptr
|
||||
%"55" = getelementptr inbounds i8, ptr %"158", i64 4
|
||||
%"108" = load float, ptr %"55", align 4
|
||||
store float %"108", ptr addrspace(5) %"94", align 4
|
||||
%"109" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"159" = inttoptr i64 %"109" to ptr
|
||||
%"57" = getelementptr inbounds i8, ptr %"159", i64 8
|
||||
%"110" = load float, ptr %"57", align 4
|
||||
store float %"110", ptr addrspace(5) %"95", align 4
|
||||
%"111" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"160" = inttoptr i64 %"111" to ptr
|
||||
%"59" = getelementptr inbounds i8, ptr %"160", i64 12
|
||||
%"112" = load float, ptr %"59", align 4
|
||||
store float %"112", ptr addrspace(5) %"96", align 4
|
||||
%"113" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"161" = inttoptr i64 %"113" to ptr
|
||||
%"61" = getelementptr inbounds i8, ptr %"161", i64 16
|
||||
%"114" = load float, ptr %"61", align 4
|
||||
store float %"114", ptr addrspace(5) %"97", align 4
|
||||
%"115" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"162" = inttoptr i64 %"115" to ptr
|
||||
%"63" = getelementptr inbounds i8, ptr %"162", i64 20
|
||||
%"116" = load float, ptr %"63", align 4
|
||||
store float %"116", ptr addrspace(5) %"98", align 4
|
||||
%"117" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"163" = inttoptr i64 %"117" to ptr
|
||||
%"65" = getelementptr inbounds i8, ptr %"163", i64 24
|
||||
%"118" = load float, ptr %"65", align 4
|
||||
store float %"118", ptr addrspace(5) %"99", align 4
|
||||
%"119" = load i64, ptr addrspace(5) %"91", align 4
|
||||
%"164" = inttoptr i64 %"119" to ptr
|
||||
%"67" = getelementptr inbounds i8, ptr %"164", i64 28
|
||||
%"120" = load float, ptr %"67", align 4
|
||||
store float %"120", ptr addrspace(5) %"100", align 4
|
||||
%"122" = load float, ptr addrspace(5) %"93", align 4
|
||||
%"123" = load float, ptr addrspace(5) %"94", align 4
|
||||
%"121" = fcmp ord float %"122", %"123"
|
||||
store i1 %"121", ptr addrspace(5) %"102", align 1
|
||||
%"124" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"124", label %"22", label %"23"
|
||||
%"97" = load i64, ptr addrspace(4) %"83", align 4
|
||||
store i64 %"97", ptr addrspace(5) %"85", align 4
|
||||
%"98" = load i64, ptr addrspace(4) %"84", align 4
|
||||
store i64 %"98", ptr addrspace(5) %"86", align 4
|
||||
%"100" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"151" = inttoptr i64 %"100" to ptr
|
||||
%"99" = load float, ptr %"151", align 4
|
||||
store float %"99", ptr addrspace(5) %"87", align 4
|
||||
%"101" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"152" = inttoptr i64 %"101" to ptr
|
||||
%"55" = getelementptr inbounds i8, ptr %"152", i64 4
|
||||
%"102" = load float, ptr %"55", align 4
|
||||
store float %"102", ptr addrspace(5) %"88", align 4
|
||||
%"103" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"153" = inttoptr i64 %"103" to ptr
|
||||
%"57" = getelementptr inbounds i8, ptr %"153", i64 8
|
||||
%"104" = load float, ptr %"57", align 4
|
||||
store float %"104", ptr addrspace(5) %"89", align 4
|
||||
%"105" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"154" = inttoptr i64 %"105" to ptr
|
||||
%"59" = getelementptr inbounds i8, ptr %"154", i64 12
|
||||
%"106" = load float, ptr %"59", align 4
|
||||
store float %"106", ptr addrspace(5) %"90", align 4
|
||||
%"107" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"155" = inttoptr i64 %"107" to ptr
|
||||
%"61" = getelementptr inbounds i8, ptr %"155", i64 16
|
||||
%"108" = load float, ptr %"61", align 4
|
||||
store float %"108", ptr addrspace(5) %"91", align 4
|
||||
%"109" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"156" = inttoptr i64 %"109" to ptr
|
||||
%"63" = getelementptr inbounds i8, ptr %"156", i64 20
|
||||
%"110" = load float, ptr %"63", align 4
|
||||
store float %"110", ptr addrspace(5) %"92", align 4
|
||||
%"111" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"157" = inttoptr i64 %"111" to ptr
|
||||
%"65" = getelementptr inbounds i8, ptr %"157", i64 24
|
||||
%"112" = load float, ptr %"65", align 4
|
||||
store float %"112", ptr addrspace(5) %"93", align 4
|
||||
%"113" = load i64, ptr addrspace(5) %"85", align 4
|
||||
%"158" = inttoptr i64 %"113" to ptr
|
||||
%"67" = getelementptr inbounds i8, ptr %"158", i64 28
|
||||
%"114" = load float, ptr %"67", align 4
|
||||
store float %"114", ptr addrspace(5) %"94", align 4
|
||||
%"116" = load float, ptr addrspace(5) %"87", align 4
|
||||
%"117" = load float, ptr addrspace(5) %"88", align 4
|
||||
%"115" = fcmp ord float %"116", %"117"
|
||||
store i1 %"115", ptr addrspace(5) %"96", align 1
|
||||
%"118" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"118", label %"22", label %"23"
|
||||
|
||||
"22": ; preds = %"82"
|
||||
store i32 2, ptr addrspace(5) %"101", align 4
|
||||
store i32 2, ptr addrspace(5) %"95", align 4
|
||||
br label %"23"
|
||||
|
||||
"23": ; preds = %"22", %"82"
|
||||
%"126" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"126", label %"25", label %"24"
|
||||
%"120" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"120", label %"25", label %"24"
|
||||
|
||||
"24": ; preds = %"23"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"25"
|
||||
|
||||
"25": ; preds = %"24", %"23"
|
||||
%"128" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"129" = load i32, ptr addrspace(5) %"101", align 4
|
||||
%"165" = inttoptr i64 %"128" to ptr
|
||||
store i32 %"129", ptr %"165", align 4
|
||||
%"131" = load float, ptr addrspace(5) %"95", align 4
|
||||
%"132" = load float, ptr addrspace(5) %"96", align 4
|
||||
%"130" = fcmp ord float %"131", %"132"
|
||||
store i1 %"130", ptr addrspace(5) %"102", align 1
|
||||
%"133" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"133", label %"26", label %"27"
|
||||
%"122" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"123" = load i32, ptr addrspace(5) %"95", align 4
|
||||
%"159" = inttoptr i64 %"122" to ptr
|
||||
store i32 %"123", ptr %"159", align 4
|
||||
%"125" = load float, ptr addrspace(5) %"89", align 4
|
||||
%"126" = load float, ptr addrspace(5) %"90", align 4
|
||||
%"124" = fcmp ord float %"125", %"126"
|
||||
store i1 %"124", ptr addrspace(5) %"96", align 1
|
||||
%"127" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"127", label %"26", label %"27"
|
||||
|
||||
"26": ; preds = %"25"
|
||||
store i32 2, ptr addrspace(5) %"101", align 4
|
||||
store i32 2, ptr addrspace(5) %"95", align 4
|
||||
br label %"27"
|
||||
|
||||
"27": ; preds = %"26", %"25"
|
||||
%"135" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"135", label %"29", label %"28"
|
||||
%"129" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"129", label %"29", label %"28"
|
||||
|
||||
"28": ; preds = %"27"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %"28", %"27"
|
||||
%"137" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"166" = inttoptr i64 %"137" to ptr
|
||||
%"73" = getelementptr inbounds i8, ptr %"166", i64 4
|
||||
%"138" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"138", ptr %"73", align 4
|
||||
%"140" = load float, ptr addrspace(5) %"97", align 4
|
||||
%"141" = load float, ptr addrspace(5) %"98", align 4
|
||||
%"139" = fcmp ord float %"140", %"141"
|
||||
store i1 %"139", ptr addrspace(5) %"102", align 1
|
||||
%"142" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"142", label %"30", label %"31"
|
||||
%"131" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"160" = inttoptr i64 %"131" to ptr
|
||||
%"73" = getelementptr inbounds i8, ptr %"160", i64 4
|
||||
%"132" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"132", ptr %"73", align 4
|
||||
%"134" = load float, ptr addrspace(5) %"91", align 4
|
||||
%"135" = load float, ptr addrspace(5) %"92", align 4
|
||||
%"133" = fcmp ord float %"134", %"135"
|
||||
store i1 %"133", ptr addrspace(5) %"96", align 1
|
||||
%"136" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"136", label %"30", label %"31"
|
||||
|
||||
"30": ; preds = %"29"
|
||||
store i32 2, ptr addrspace(5) %"101", align 4
|
||||
store i32 2, ptr addrspace(5) %"95", align 4
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %"30", %"29"
|
||||
%"144" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"144", label %"33", label %"32"
|
||||
%"138" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"138", label %"33", label %"32"
|
||||
|
||||
"32": ; preds = %"31"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %"32", %"31"
|
||||
%"146" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"167" = inttoptr i64 %"146" to ptr
|
||||
%"77" = getelementptr inbounds i8, ptr %"167", i64 8
|
||||
%"147" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"147", ptr %"77", align 4
|
||||
%"149" = load float, ptr addrspace(5) %"99", align 4
|
||||
%"150" = load float, ptr addrspace(5) %"100", align 4
|
||||
%"148" = fcmp ord float %"149", %"150"
|
||||
store i1 %"148", ptr addrspace(5) %"102", align 1
|
||||
%"151" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"151", label %"34", label %"35"
|
||||
%"140" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"161" = inttoptr i64 %"140" to ptr
|
||||
%"77" = getelementptr inbounds i8, ptr %"161", i64 8
|
||||
%"141" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"141", ptr %"77", align 4
|
||||
%"143" = load float, ptr addrspace(5) %"93", align 4
|
||||
%"144" = load float, ptr addrspace(5) %"94", align 4
|
||||
%"142" = fcmp ord float %"143", %"144"
|
||||
store i1 %"142", ptr addrspace(5) %"96", align 1
|
||||
%"145" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"145", label %"34", label %"35"
|
||||
|
||||
"34": ; preds = %"33"
|
||||
store i32 2, ptr addrspace(5) %"101", align 4
|
||||
store i32 2, ptr addrspace(5) %"95", align 4
|
||||
br label %"35"
|
||||
|
||||
"35": ; preds = %"34", %"33"
|
||||
%"153" = load i1, ptr addrspace(5) %"102", align 1
|
||||
br i1 %"153", label %"37", label %"36"
|
||||
%"147" = load i1, ptr addrspace(5) %"96", align 1
|
||||
br i1 %"147", label %"37", label %"36"
|
||||
|
||||
"36": ; preds = %"35"
|
||||
store i32 0, ptr addrspace(5) %"101", align 4
|
||||
store i32 0, ptr addrspace(5) %"95", align 4
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %"36", %"35"
|
||||
%"155" = load i64, ptr addrspace(5) %"92", align 4
|
||||
%"168" = inttoptr i64 %"155" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"168", i64 12
|
||||
%"156" = load i32, ptr addrspace(5) %"101", align 4
|
||||
store i32 %"156", ptr %"81", align 4
|
||||
%"149" = load i64, ptr addrspace(5) %"86", align 4
|
||||
%"162" = inttoptr i64 %"149" to ptr
|
||||
%"81" = getelementptr inbounds i8, ptr %"162", i64 12
|
||||
%"150" = load i32, ptr addrspace(5) %"95", align 4
|
||||
store i32 %"150", ptr %"81", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,53 +1,40 @@
|
|||
@shared_mem1 = external addrspace(3) global [128 x i8], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shared_ptr_32(ptr addrspace(4) byref(i64) %"35", ptr addrspace(4) byref(i64) %"36") #0 {
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i32, align 4, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %1
|
||||
%"48" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"45", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"60" = inttoptr i64 %"52" to ptr addrspace(1)
|
||||
%"51" = load i64, ptr addrspace(1) %"60", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"46", align 4
|
||||
%"53" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"61" = inttoptr i32 %"53" to ptr addrspace(3)
|
||||
store i64 %"54", ptr addrspace(3) %"61", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"62" = inttoptr i32 %"55" to ptr addrspace(3)
|
||||
%"33" = getelementptr inbounds i8, ptr addrspace(3) %"62", i64 0
|
||||
%"56" = load i64, ptr addrspace(3) %"33", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"47", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"63" = inttoptr i64 %"57" to ptr addrspace(1)
|
||||
store i64 %"58", ptr addrspace(1) %"63", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"35", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
||||
store i32 ptrtoint (ptr addrspace(3) @shared_mem1 to i32), ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"54" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
%"45" = load i64, ptr addrspace(1) %"54", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"55" = inttoptr i32 %"47" to ptr addrspace(3)
|
||||
store i64 %"48", ptr addrspace(3) %"55", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"39", align 4
|
||||
%"56" = inttoptr i32 %"49" to ptr addrspace(3)
|
||||
%"33" = getelementptr inbounds i8, ptr addrspace(3) %"56", i64 0
|
||||
%"50" = load i64, ptr addrspace(3) %"33", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"57" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||
store i64 %"52", ptr addrspace(1) %"57", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,52 +1,39 @@
|
|||
@shared_mem = external addrspace(3) global [0 x i8], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shared_ptr_take_address(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"42", align 4
|
||||
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"58" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
%"49" = load i64, ptr addrspace(1) %"58", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"44", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"59" = inttoptr i64 %"51" to ptr addrspace(3)
|
||||
store i64 %"52", ptr addrspace(3) %"59", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"60" = inttoptr i64 %"54" to ptr addrspace(3)
|
||||
%"53" = load i64, ptr addrspace(3) %"60", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"45", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%"61" = inttoptr i64 %"55" to ptr addrspace(1)
|
||||
store i64 %"56", ptr addrspace(1) %"61", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"36", align 4
|
||||
store i64 ptrtoint (ptr addrspace(3) @shared_mem to i64), ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"52" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||
%"43" = load i64, ptr addrspace(1) %"52", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"38", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"53" = inttoptr i64 %"45" to ptr addrspace(3)
|
||||
store i64 %"46", ptr addrspace(3) %"53", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"54" = inttoptr i64 %"48" to ptr addrspace(3)
|
||||
%"47" = load i64, ptr addrspace(3) %"54", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"39", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"55" = inttoptr i64 %"49" to ptr addrspace(1)
|
||||
store i64 %"50", ptr addrspace(1) %"55", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,42 +1,30 @@
|
|||
@shared_ex = external addrspace(3) global [0 x i32]
|
||||
@shared_mod = external addrspace(3) global [4 x i32]
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define i64 @add() #0 {
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"53" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"41"
|
||||
|
||||
"41": ; preds = %1
|
||||
%"55" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||
store i64 %"55", ptr addrspace(5) %"53", align 4
|
||||
%"56" = load i64, ptr addrspace(3) @shared_ex, align 4
|
||||
store i64 %"56", ptr addrspace(5) %"54", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"54", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"81" = add i64 %"58", %"59"
|
||||
store i64 %"81", ptr addrspace(5) %"52", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||
store i64 %"49", ptr addrspace(5) %"47", align 4
|
||||
%"50" = load i64, ptr addrspace(3) @shared_ex, align 4
|
||||
store i64 %"50", ptr addrspace(5) %"48", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"75" = add i64 %"52", %"53"
|
||||
store i64 %"75", ptr addrspace(5) %"46", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"46", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @set_shared_temp1(i64 %"15") #0 {
|
||||
%"60" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
|
@ -44,51 +32,51 @@ define i64 @set_shared_temp1(i64 %"15") #0 {
|
|||
|
||||
"42": ; preds = %1
|
||||
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
||||
%"61" = call i64 @add()
|
||||
store i64 %"61", ptr addrspace(5) %"60", align 4
|
||||
%"55" = call i64 @add()
|
||||
store i64 %"55", ptr addrspace(5) %"54", align 4
|
||||
br label %"43"
|
||||
|
||||
"43": ; preds = %"42"
|
||||
%2 = load i64, ptr addrspace(5) %"60", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"54", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"62", ptr addrspace(4) byref(i64) %"63") #1 {
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
%"65" = alloca i64, align 8, addrspace(5)
|
||||
%"66" = alloca i64, align 8, addrspace(5)
|
||||
%"67" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shared_unify_extern(ptr addrspace(4) byref(i64) %"56", ptr addrspace(4) byref(i64) %"57") #1 {
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"59" = alloca i64, align 8, addrspace(5)
|
||||
%"60" = alloca i64, align 8, addrspace(5)
|
||||
%"61" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"44"
|
||||
|
||||
"44": ; preds = %1
|
||||
%"68" = load i64, ptr addrspace(4) %"62", align 4
|
||||
store i64 %"68", ptr addrspace(5) %"64", align 4
|
||||
%"69" = load i64, ptr addrspace(4) %"63", align 4
|
||||
store i64 %"69", ptr addrspace(5) %"65", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"64", align 4
|
||||
%"84" = inttoptr i64 %"71" to ptr addrspace(1)
|
||||
%"70" = load i64, ptr addrspace(1) %"84", align 4
|
||||
store i64 %"70", ptr addrspace(5) %"66", align 4
|
||||
%"72" = load i64, ptr addrspace(5) %"64", align 4
|
||||
%"85" = inttoptr i64 %"72" to ptr addrspace(1)
|
||||
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"85", i64 8
|
||||
%"73" = load i64, ptr addrspace(1) %"40", align 4
|
||||
store i64 %"73", ptr addrspace(5) %"67", align 4
|
||||
%"74" = load i64, ptr addrspace(5) %"67", align 4
|
||||
store i64 %"74", ptr addrspace(3) @shared_mod, align 4
|
||||
%"76" = load i64, ptr addrspace(5) %"66", align 4
|
||||
%"87" = call i64 @set_shared_temp1(i64 %"76")
|
||||
store i64 %"87", ptr addrspace(5) %"67", align 4
|
||||
%"62" = load i64, ptr addrspace(4) %"56", align 4
|
||||
store i64 %"62", ptr addrspace(5) %"58", align 4
|
||||
%"63" = load i64, ptr addrspace(4) %"57", align 4
|
||||
store i64 %"63", ptr addrspace(5) %"59", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"78" = inttoptr i64 %"65" to ptr addrspace(1)
|
||||
%"64" = load i64, ptr addrspace(1) %"78", align 4
|
||||
store i64 %"64", ptr addrspace(5) %"60", align 4
|
||||
%"66" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"79" = inttoptr i64 %"66" to ptr addrspace(1)
|
||||
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"79", i64 8
|
||||
%"67" = load i64, ptr addrspace(1) %"40", align 4
|
||||
store i64 %"67", ptr addrspace(5) %"61", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"61", align 4
|
||||
store i64 %"68", ptr addrspace(3) @shared_mod, align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"60", align 4
|
||||
%"81" = call i64 @set_shared_temp1(i64 %"70")
|
||||
store i64 %"81", ptr addrspace(5) %"61", align 4
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %"44"
|
||||
%"77" = load i64, ptr addrspace(5) %"65", align 4
|
||||
%"78" = load i64, ptr addrspace(5) %"67", align 4
|
||||
%"89" = inttoptr i64 %"77" to ptr
|
||||
store i64 %"78", ptr %"89", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"59", align 4
|
||||
%"72" = load i64, ptr addrspace(5) %"61", align 4
|
||||
%"83" = inttoptr i64 %"71" to ptr
|
||||
store i64 %"72", ptr %"83", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,21 +1,9 @@
|
|||
@shared_ex = external addrspace(3) global [0 x i32]
|
||||
@shared_mod = external addrspace(3) global i64, align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define i64 @add(i64 %"10") #0 {
|
||||
%"53" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
|
@ -23,18 +11,18 @@ define i64 @add(i64 %"10") #0 {
|
|||
|
||||
"42": ; preds = %1
|
||||
store i64 %"10", ptr addrspace(3) @shared_mod, align 4
|
||||
%"55" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||
store i64 %"55", ptr addrspace(5) %"54", align 4
|
||||
%"49" = load i64, ptr addrspace(3) @shared_mod, align 4
|
||||
store i64 %"49", ptr addrspace(5) %"48", align 4
|
||||
%"101" = load i64, ptr addrspace(3) @shared_ex, align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"54", align 4
|
||||
%"78" = add i64 %"101", %"57"
|
||||
store i64 %"78", ptr addrspace(5) %"53", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"72" = add i64 %"101", %"51"
|
||||
store i64 %"72", ptr addrspace(5) %"47", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"47", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define i64 @set_shared_temp1(i64 %"15", i64 %"16") #0 {
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
|
@ -42,50 +30,50 @@ define i64 @set_shared_temp1(i64 %"15", i64 %"16") #0 {
|
|||
|
||||
"43": ; preds = %1
|
||||
store i64 %"15", ptr addrspace(3) @shared_ex, align 4
|
||||
%"59" = call i64 @add(i64 %"16")
|
||||
store i64 %"59", ptr addrspace(5) %"58", align 4
|
||||
%"53" = call i64 @add(i64 %"16")
|
||||
store i64 %"53", ptr addrspace(5) %"52", align 4
|
||||
br label %"44"
|
||||
|
||||
"44": ; preds = %"43"
|
||||
%2 = load i64, ptr addrspace(5) %"58", align 4
|
||||
%2 = load i64, ptr addrspace(5) %"52", align 4
|
||||
ret i64 %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"60", ptr addrspace(4) byref(i64) %"61") #1 {
|
||||
%"62" = alloca i64, align 8, addrspace(5)
|
||||
%"63" = alloca i64, align 8, addrspace(5)
|
||||
%"64" = alloca i64, align 8, addrspace(5)
|
||||
%"65" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shared_unify_local(ptr addrspace(4) byref(i64) %"54", ptr addrspace(4) byref(i64) %"55") #1 {
|
||||
%"56" = alloca i64, align 8, addrspace(5)
|
||||
%"57" = alloca i64, align 8, addrspace(5)
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"59" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %1
|
||||
%"66" = load i64, ptr addrspace(4) %"60", align 4
|
||||
store i64 %"66", ptr addrspace(5) %"62", align 4
|
||||
%"67" = load i64, ptr addrspace(4) %"61", align 4
|
||||
store i64 %"67", ptr addrspace(5) %"63", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"62", align 4
|
||||
%"81" = inttoptr i64 %"69" to ptr addrspace(1)
|
||||
%"68" = load i64, ptr addrspace(1) %"81", align 4
|
||||
store i64 %"68", ptr addrspace(5) %"64", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"62", align 4
|
||||
%"82" = inttoptr i64 %"70" to ptr addrspace(1)
|
||||
%"41" = getelementptr inbounds i8, ptr addrspace(1) %"82", i64 8
|
||||
%"71" = load i64, ptr addrspace(1) %"41", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"65", align 4
|
||||
%"73" = load i64, ptr addrspace(5) %"64", align 4
|
||||
%"74" = load i64, ptr addrspace(5) %"65", align 4
|
||||
%"83" = call i64 @set_shared_temp1(i64 %"73", i64 %"74")
|
||||
store i64 %"83", ptr addrspace(5) %"65", align 4
|
||||
%"60" = load i64, ptr addrspace(4) %"54", align 4
|
||||
store i64 %"60", ptr addrspace(5) %"56", align 4
|
||||
%"61" = load i64, ptr addrspace(4) %"55", align 4
|
||||
store i64 %"61", ptr addrspace(5) %"57", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"56", align 4
|
||||
%"75" = inttoptr i64 %"63" to ptr addrspace(1)
|
||||
%"62" = load i64, ptr addrspace(1) %"75", align 4
|
||||
store i64 %"62", ptr addrspace(5) %"58", align 4
|
||||
%"64" = load i64, ptr addrspace(5) %"56", align 4
|
||||
%"76" = inttoptr i64 %"64" to ptr addrspace(1)
|
||||
%"41" = getelementptr inbounds i8, ptr addrspace(1) %"76", i64 8
|
||||
%"65" = load i64, ptr addrspace(1) %"41", align 4
|
||||
store i64 %"65", ptr addrspace(5) %"59", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"59", align 4
|
||||
%"77" = call i64 @set_shared_temp1(i64 %"67", i64 %"68")
|
||||
store i64 %"77", ptr addrspace(5) %"59", align 4
|
||||
br label %"46"
|
||||
|
||||
"46": ; preds = %"45"
|
||||
%"75" = load i64, ptr addrspace(5) %"63", align 4
|
||||
%"76" = load i64, ptr addrspace(5) %"65", align 4
|
||||
%"85" = inttoptr i64 %"75" to ptr
|
||||
store i64 %"76", ptr %"85", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"57", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"59", align 4
|
||||
%"79" = inttoptr i64 %"69" to ptr
|
||||
store i64 %"70", ptr %"79", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,46 +1,33 @@
|
|||
@shared_mem1 = external addrspace(3) global [128 x i8], align 4
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shared_variable(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||
%"46" = load i64, ptr addrspace(1) %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"42", align 4
|
||||
store i64 %"48", ptr addrspace(3) @shared_mem1, align 4
|
||||
%"49" = load i64, ptr addrspace(3) @shared_mem1, align 4
|
||||
store i64 %"49", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"55" = inttoptr i64 %"50" to ptr addrspace(1)
|
||||
store i64 %"51", ptr addrspace(1) %"55", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr addrspace(1)
|
||||
%"40" = load i64, ptr addrspace(1) %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"36", align 4
|
||||
store i64 %"42", ptr addrspace(3) @shared_mem1, align 4
|
||||
%"43" = load i64, ptr addrspace(3) @shared_mem1, align 4
|
||||
store i64 %"43", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr addrspace(1)
|
||||
store i64 %"45", ptr addrspace(1) %"49", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,44 +1,31 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @shl(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%2 = shl i64 %"49", 2
|
||||
%"53" = select i1 false, i64 0, i64 %2
|
||||
store i64 %"53", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"55" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"55", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%2 = shl i64 %"43", 2
|
||||
%"47" = select i1 false, i64 0, i64 %2
|
||||
store i64 %"47", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"49", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"37", ptr addrspace(4) byref(i64) %"38") #1 {
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @shr(ptr addrspace(4) byref(i64) %"31", ptr addrspace(4) byref(i64) %"32") #0 {
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"30"
|
||||
|
||||
"30": ; preds = %1
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"43" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"50" = inttoptr i64 %"45" to ptr
|
||||
%"44" = load i32, ptr %"50", align 4
|
||||
store i32 %"44", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%2 = ashr i32 %"47", 1
|
||||
%"46" = select i1 false, i32 0, i32 %2
|
||||
store i32 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"49" = load i32, ptr addrspace(5) %"41", align 4
|
||||
%"51" = inttoptr i64 %"48" to ptr
|
||||
store i32 %"49", ptr %"51", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"37" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"44" = inttoptr i64 %"39" to ptr
|
||||
%"38" = load i32, ptr %"44", align 4
|
||||
store i32 %"38", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%2 = ashr i32 %"41", 1
|
||||
%"40" = select i1 false, i32 0, i32 %2
|
||||
store i32 %"40", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"43" = load i32, ptr addrspace(5) %"35", align 4
|
||||
%"45" = inttoptr i64 %"42" to ptr
|
||||
store i32 %"43", ptr %"45", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,40 +1,27 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @sign_extend(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"48" = inttoptr i64 %"44" to ptr
|
||||
%"47" = load i16, ptr %"48", align 2
|
||||
%"43" = sext i16 %"47" to i32
|
||||
store i32 %"43", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"49" = inttoptr i64 %"45" to ptr
|
||||
store i32 %"46", ptr %"49", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"42" = inttoptr i64 %"38" to ptr
|
||||
%"41" = load i16, ptr %"42", align 2
|
||||
%"37" = sext i16 %"41" to i32
|
||||
store i32 %"37", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"40" = load i32, ptr addrspace(5) %"34", align 4
|
||||
%"43" = inttoptr i64 %"39" to ptr
|
||||
store i32 %"40", ptr %"43", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @sin(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call afn float @llvm.sin.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call afn float @llvm.sin.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.sin.f32(float) #2
|
||||
declare float @llvm.sin.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,46 +1,33 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca float, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @sqrt(ptr addrspace(4) byref(i64) %"30", ptr addrspace(4) byref(i64) %"31") #0 {
|
||||
%"32" = alloca i64, align 8, addrspace(5)
|
||||
%"33" = alloca i64, align 8, addrspace(5)
|
||||
%"34" = alloca float, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"29"
|
||||
|
||||
"29": ; preds = %1
|
||||
%"41" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"38", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"39", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
%"43" = load float, ptr %"49", align 4
|
||||
store float %"43", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"45" = call float @llvm.amdgcn.sqrt.f32(float %"46")
|
||||
store float %"45", ptr addrspace(5) %"40", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"48" = load float, ptr addrspace(5) %"40", align 4
|
||||
%"50" = inttoptr i64 %"47" to ptr
|
||||
store float %"48", ptr %"50", align 4
|
||||
%"35" = load i64, ptr addrspace(4) %"30", align 4
|
||||
store i64 %"35", ptr addrspace(5) %"32", align 4
|
||||
%"36" = load i64, ptr addrspace(4) %"31", align 4
|
||||
store i64 %"36", ptr addrspace(5) %"33", align 4
|
||||
%"38" = load i64, ptr addrspace(5) %"32", align 4
|
||||
%"43" = inttoptr i64 %"38" to ptr
|
||||
%"37" = load float, ptr %"43", align 4
|
||||
store float %"37", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"39" = call float @llvm.amdgcn.sqrt.f32(float %"40")
|
||||
store float %"39", ptr addrspace(5) %"34", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"33", align 4
|
||||
%"42" = load float, ptr addrspace(5) %"34", align 4
|
||||
%"44" = inttoptr i64 %"41" to ptr
|
||||
store float %"42", ptr %"44", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
|
||||
declare float @llvm.amdgcn.sqrt.f32(float) #2
|
||||
declare float @llvm.amdgcn.sqrt.f32(float) #1
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="ieee" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
|
@ -1,63 +1,53 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"41", ptr addrspace(4) byref(i64) %"42") #1 {
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i32, align 4, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid(ptr addrspace(4) byref(i64) %"36", ptr addrspace(4) byref(i64) %"37") #1 {
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i32, align 4, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"33"
|
||||
|
||||
"33": ; preds = %1
|
||||
%"67" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"67", ptr addrspace(5) %"43", align 4
|
||||
%"68" = load i64, ptr addrspace(4) %"42", align 4
|
||||
store i64 %"68", ptr addrspace(5) %"44", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%2 = inttoptr i64 %"51" to ptr
|
||||
%"50" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"50", ptr addrspace(5) %"43", align 8
|
||||
%"53" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%3 = inttoptr i64 %"53" to ptr
|
||||
%"52" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"52", ptr addrspace(5) %"44", align 8
|
||||
%"62" = load i64, ptr addrspace(4) %"36", align 4
|
||||
store i64 %"62", ptr addrspace(5) %"38", align 4
|
||||
%"63" = load i64, ptr addrspace(4) %"37", align 4
|
||||
store i64 %"63", ptr addrspace(5) %"39", align 4
|
||||
%"46" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%2 = inttoptr i64 %"46" to ptr
|
||||
%"45" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"45", ptr addrspace(5) %"38", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%3 = inttoptr i64 %"48" to ptr
|
||||
%"47" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"47", ptr addrspace(5) %"39", align 8
|
||||
%"32" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||
br label %"34"
|
||||
|
||||
"34": ; preds = %"33"
|
||||
store i32 %"32", ptr addrspace(5) %"45", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"45", align 4
|
||||
%"55" = zext i32 %"56" to i64
|
||||
store i64 %"55", ptr addrspace(5) %"46", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"69" = add i64 %"58", %"59"
|
||||
store i64 %"69", ptr addrspace(5) %"43", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"71" = add i64 %"61", %"62"
|
||||
store i64 %"71", ptr addrspace(5) %"44", align 4
|
||||
%"64" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"73" = inttoptr i64 %"64" to ptr addrspace(1)
|
||||
%"63" = load i64, ptr addrspace(1) %"73", align 4
|
||||
store i64 %"63", ptr addrspace(5) %"47", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"66" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"74" = inttoptr i64 %"65" to ptr addrspace(1)
|
||||
store i64 %"66", ptr addrspace(1) %"74", align 4
|
||||
store i32 %"32", ptr addrspace(5) %"40", align 4
|
||||
%"51" = load i32, ptr addrspace(5) %"40", align 4
|
||||
%"50" = zext i32 %"51" to i64
|
||||
store i64 %"50", ptr addrspace(5) %"41", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"64" = add i64 %"53", %"54"
|
||||
store i64 %"64", ptr addrspace(5) %"38", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"66" = add i64 %"56", %"57"
|
||||
store i64 %"66", ptr addrspace(5) %"39", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"68" = inttoptr i64 %"59" to ptr addrspace(1)
|
||||
%"58" = load i64, ptr addrspace(1) %"68", align 4
|
||||
store i64 %"58", ptr addrspace(5) %"42", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"39", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"69" = inttoptr i64 %"60" to ptr addrspace(1)
|
||||
store i64 %"61", ptr addrspace(1) %"69", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,67 +1,57 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"45", ptr addrspace(4) byref(i64) %"46") #1 {
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid_chain(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #1 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i32, align 4, addrspace(5)
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"53" = alloca i32, align 4, addrspace(5)
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
%"55" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"37"
|
||||
|
||||
"37": ; preds = %1
|
||||
%"75" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"75", ptr addrspace(5) %"47", align 4
|
||||
%"76" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"76", ptr addrspace(5) %"50", align 4
|
||||
%"59" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%2 = inttoptr i64 %"59" to ptr
|
||||
%"58" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"58", ptr addrspace(5) %"48", align 8
|
||||
%"61" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%3 = inttoptr i64 %"61" to ptr
|
||||
%"60" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"60", ptr addrspace(5) %"51", align 8
|
||||
%"70" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"70", ptr addrspace(5) %"42", align 4
|
||||
%"71" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"45", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%2 = inttoptr i64 %"54" to ptr
|
||||
%"53" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"53", ptr addrspace(5) %"43", align 8
|
||||
%"56" = load i64, ptr addrspace(5) %"45", align 4
|
||||
%3 = inttoptr i64 %"56" to ptr
|
||||
%"55" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"55", ptr addrspace(5) %"46", align 8
|
||||
%"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||
br label %"38"
|
||||
|
||||
"38": ; preds = %"37"
|
||||
store i32 %"36", ptr addrspace(5) %"53", align 4
|
||||
%"64" = load i32, ptr addrspace(5) %"53", align 4
|
||||
%"63" = zext i32 %"64" to i64
|
||||
store i64 %"63", ptr addrspace(5) %"54", align 4
|
||||
%"66" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"54", align 4
|
||||
%"77" = add i64 %"66", %"67"
|
||||
store i64 %"77", ptr addrspace(5) %"49", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"54", align 4
|
||||
%"79" = add i64 %"69", %"70"
|
||||
store i64 %"79", ptr addrspace(5) %"52", align 4
|
||||
%"72" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"81" = inttoptr i64 %"72" to ptr addrspace(1)
|
||||
%"71" = load i64, ptr addrspace(1) %"81", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"55", align 4
|
||||
%"73" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"74" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"82" = inttoptr i64 %"73" to ptr addrspace(1)
|
||||
store i64 %"74", ptr addrspace(1) %"82", align 4
|
||||
store i32 %"36", ptr addrspace(5) %"48", align 4
|
||||
%"59" = load i32, ptr addrspace(5) %"48", align 4
|
||||
%"58" = zext i32 %"59" to i64
|
||||
store i64 %"58", ptr addrspace(5) %"49", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"72" = add i64 %"61", %"62"
|
||||
store i64 %"72", ptr addrspace(5) %"44", align 4
|
||||
%"64" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"74" = add i64 %"64", %"65"
|
||||
store i64 %"74", ptr addrspace(5) %"47", align 4
|
||||
%"67" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"76" = inttoptr i64 %"67" to ptr addrspace(1)
|
||||
%"66" = load i64, ptr addrspace(1) %"76", align 4
|
||||
store i64 %"66", ptr addrspace(5) %"50", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"77" = inttoptr i64 %"68" to ptr addrspace(1)
|
||||
store i64 %"69", ptr addrspace(1) %"77", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,69 +1,59 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"49", ptr addrspace(4) byref(i64) %"50") #1 {
|
||||
define amdgpu_kernel void @stateful_ld_st_ntid_sub(ptr addrspace(4) byref(i64) %"44", ptr addrspace(4) byref(i64) %"45") #1 {
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
%"47" = alloca i64, align 8, addrspace(5)
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i64, align 8, addrspace(5)
|
||||
%"51" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i64, align 8, addrspace(5)
|
||||
%"52" = alloca i32, align 4, addrspace(5)
|
||||
%"53" = alloca i64, align 8, addrspace(5)
|
||||
%"54" = alloca i64, align 8, addrspace(5)
|
||||
%"55" = alloca i64, align 8, addrspace(5)
|
||||
%"56" = alloca i64, align 8, addrspace(5)
|
||||
%"57" = alloca i32, align 4, addrspace(5)
|
||||
%"58" = alloca i64, align 8, addrspace(5)
|
||||
%"59" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"41"
|
||||
|
||||
"41": ; preds = %1
|
||||
%"79" = load i64, ptr addrspace(4) %"49", align 4
|
||||
store i64 %"79", ptr addrspace(5) %"51", align 4
|
||||
%"80" = load i64, ptr addrspace(4) %"50", align 4
|
||||
store i64 %"80", ptr addrspace(5) %"54", align 4
|
||||
%"63" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%2 = inttoptr i64 %"63" to ptr
|
||||
%"62" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"62", ptr addrspace(5) %"52", align 8
|
||||
%"65" = load i64, ptr addrspace(5) %"54", align 4
|
||||
%3 = inttoptr i64 %"65" to ptr
|
||||
%"64" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"64", ptr addrspace(5) %"55", align 8
|
||||
%"74" = load i64, ptr addrspace(4) %"44", align 4
|
||||
store i64 %"74", ptr addrspace(5) %"46", align 4
|
||||
%"75" = load i64, ptr addrspace(4) %"45", align 4
|
||||
store i64 %"75", ptr addrspace(5) %"49", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%2 = inttoptr i64 %"58" to ptr
|
||||
%"57" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"57", ptr addrspace(5) %"47", align 8
|
||||
%"60" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%3 = inttoptr i64 %"60" to ptr
|
||||
%"59" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"59", ptr addrspace(5) %"50", align 8
|
||||
%"36" = call i32 @__zluda_ptx_impl_sreg_tid(i8 0)
|
||||
br label %"42"
|
||||
|
||||
"42": ; preds = %"41"
|
||||
store i32 %"36", ptr addrspace(5) %"57", align 4
|
||||
%"68" = load i32, ptr addrspace(5) %"57", align 4
|
||||
%"67" = zext i32 %"68" to i64
|
||||
store i64 %"67", ptr addrspace(5) %"58", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"52", align 4
|
||||
%"71" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"81" = sub i64 %"70", %"71"
|
||||
store i64 %"81", ptr addrspace(5) %"53", align 4
|
||||
%"73" = load i64, ptr addrspace(5) %"55", align 4
|
||||
%"74" = load i64, ptr addrspace(5) %"58", align 4
|
||||
%"84" = sub i64 %"73", %"74"
|
||||
store i64 %"84", ptr addrspace(5) %"56", align 4
|
||||
%"75" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"87" = inttoptr i64 %"75" to ptr addrspace(1)
|
||||
%"38" = getelementptr inbounds i8, ptr addrspace(1) %"87", i64 0
|
||||
%"76" = load i64, ptr addrspace(1) %"38", align 4
|
||||
store i64 %"76", ptr addrspace(5) %"59", align 4
|
||||
%"77" = load i64, ptr addrspace(5) %"56", align 4
|
||||
%"88" = inttoptr i64 %"77" to ptr addrspace(1)
|
||||
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"88", i64 0
|
||||
%"78" = load i64, ptr addrspace(5) %"59", align 4
|
||||
store i64 %"78", ptr addrspace(1) %"40", align 4
|
||||
store i32 %"36", ptr addrspace(5) %"52", align 4
|
||||
%"63" = load i32, ptr addrspace(5) %"52", align 4
|
||||
%"62" = zext i32 %"63" to i64
|
||||
store i64 %"62", ptr addrspace(5) %"53", align 4
|
||||
%"65" = load i64, ptr addrspace(5) %"47", align 4
|
||||
%"66" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"76" = sub i64 %"65", %"66"
|
||||
store i64 %"76", ptr addrspace(5) %"48", align 4
|
||||
%"68" = load i64, ptr addrspace(5) %"50", align 4
|
||||
%"69" = load i64, ptr addrspace(5) %"53", align 4
|
||||
%"79" = sub i64 %"68", %"69"
|
||||
store i64 %"79", ptr addrspace(5) %"51", align 4
|
||||
%"70" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"82" = inttoptr i64 %"70" to ptr addrspace(1)
|
||||
%"38" = getelementptr inbounds i8, ptr addrspace(1) %"82", i64 0
|
||||
%"71" = load i64, ptr addrspace(1) %"38", align 4
|
||||
store i64 %"71", ptr addrspace(5) %"54", align 4
|
||||
%"72" = load i64, ptr addrspace(5) %"51", align 4
|
||||
%"83" = inttoptr i64 %"72" to ptr addrspace(1)
|
||||
%"40" = getelementptr inbounds i8, ptr addrspace(1) %"83", i64 0
|
||||
%"73" = load i64, ptr addrspace(5) %"54", align 4
|
||||
store i64 %"73", ptr addrspace(1) %"40", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,49 +1,36 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @stateful_ld_st_simple(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"45" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"40", align 4
|
||||
%"46" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%2 = inttoptr i64 %"48" to ptr
|
||||
%"55" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"55", ptr addrspace(5) %"42", align 8
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%3 = inttoptr i64 %"50" to ptr
|
||||
%"57" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"57", ptr addrspace(5) %"43", align 8
|
||||
%"52" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"59" = inttoptr i64 %"52" to ptr addrspace(1)
|
||||
%"51" = load i64, ptr addrspace(1) %"59", align 4
|
||||
store i64 %"51", ptr addrspace(5) %"44", align 4
|
||||
%"53" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"60" = inttoptr i64 %"53" to ptr addrspace(1)
|
||||
store i64 %"54", ptr addrspace(1) %"60", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"34", align 4
|
||||
%"40" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%2 = inttoptr i64 %"42" to ptr
|
||||
%"49" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"49", ptr addrspace(5) %"36", align 8
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%3 = inttoptr i64 %"44" to ptr
|
||||
%"51" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"51", ptr addrspace(5) %"37", align 8
|
||||
%"46" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"53" = inttoptr i64 %"46" to ptr addrspace(1)
|
||||
%"45" = load i64, ptr addrspace(1) %"53", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"38", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"48" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"54" = inttoptr i64 %"47" to ptr addrspace(1)
|
||||
store i64 %"48", ptr addrspace(1) %"54", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,58 +1,45 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"39", ptr addrspace(4) byref(i64) %"40") #1 {
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i64, align 8, addrspace(5)
|
||||
%"45" = alloca i64, align 8, addrspace(5)
|
||||
%"46" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @stateful_neg_offset(ptr addrspace(4) byref(i64) %"33", ptr addrspace(4) byref(i64) %"34") #0 {
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
%"38" = alloca i64, align 8, addrspace(5)
|
||||
%"39" = alloca i64, align 8, addrspace(5)
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"32"
|
||||
|
||||
"32": ; preds = %1
|
||||
%"47" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"47", ptr addrspace(5) %"41", align 4
|
||||
%"48" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"48", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%2 = inttoptr i64 %"50" to ptr
|
||||
%"63" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"63", ptr addrspace(5) %"43", align 8
|
||||
%"52" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%3 = inttoptr i64 %"52" to ptr
|
||||
%"65" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"65", ptr addrspace(5) %"44", align 8
|
||||
%"54" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"53" = add i64 %"54", %"55"
|
||||
store i64 %"53", ptr addrspace(5) %"45", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"58" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"56" = sub i64 %"57", %"58"
|
||||
store i64 %"56", ptr addrspace(5) %"45", align 4
|
||||
%"60" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"67" = inttoptr i64 %"60" to ptr addrspace(1)
|
||||
%"59" = load i64, ptr addrspace(1) %"67", align 4
|
||||
store i64 %"59", ptr addrspace(5) %"46", align 4
|
||||
%"61" = load i64, ptr addrspace(5) %"44", align 4
|
||||
%"62" = load i64, ptr addrspace(5) %"46", align 4
|
||||
%"68" = inttoptr i64 %"61" to ptr addrspace(1)
|
||||
store i64 %"62", ptr addrspace(1) %"68", align 4
|
||||
%"41" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"41", ptr addrspace(5) %"35", align 4
|
||||
%"42" = load i64, ptr addrspace(4) %"34", align 4
|
||||
store i64 %"42", ptr addrspace(5) %"36", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%2 = inttoptr i64 %"44" to ptr
|
||||
%"57" = addrspacecast ptr %2 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"57", ptr addrspace(5) %"37", align 8
|
||||
%"46" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%3 = inttoptr i64 %"46" to ptr
|
||||
%"59" = addrspacecast ptr %3 to ptr addrspace(1)
|
||||
store ptr addrspace(1) %"59", ptr addrspace(5) %"38", align 8
|
||||
%"48" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"47" = add i64 %"48", %"49"
|
||||
store i64 %"47", ptr addrspace(5) %"39", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"52" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"50" = sub i64 %"51", %"52"
|
||||
store i64 %"50", ptr addrspace(5) %"39", align 4
|
||||
%"54" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"61" = inttoptr i64 %"54" to ptr addrspace(1)
|
||||
%"53" = load i64, ptr addrspace(1) %"61", align 4
|
||||
store i64 %"53", ptr addrspace(5) %"40", align 4
|
||||
%"55" = load i64, ptr addrspace(5) %"38", align 4
|
||||
%"56" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"62" = inttoptr i64 %"55" to ptr addrspace(1)
|
||||
store i64 %"56", ptr addrspace(1) %"62", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @sub(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca i64, align 8, addrspace(5)
|
||||
%"37" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load i64, ptr %"52", align 4
|
||||
store i64 %"46", ptr addrspace(5) %"42", align 4
|
||||
%"49" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"48" = sub i64 %"49", 1
|
||||
store i64 %"48", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"53" = inttoptr i64 %"50" to ptr
|
||||
store i64 %"51", ptr %"53", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load i64, ptr %"46", align 4
|
||||
store i64 %"40", ptr addrspace(5) %"36", align 4
|
||||
%"43" = load i64, ptr addrspace(5) %"36", align 4
|
||||
%"42" = sub i64 %"43", 1
|
||||
store i64 %"42", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i64, ptr addrspace(5) %"37", align 4
|
||||
%"47" = inttoptr i64 %"44" to ptr
|
||||
store i64 %"45", ptr %"47", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,20 +1,8 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define <2 x i32> @impl(<2 x i32> %"9") #0 {
|
||||
%"53" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"54" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"55" = alloca i32, align 4, addrspace(5)
|
||||
%"56" = alloca i32, align 4, addrspace(5)
|
||||
%"47" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"48" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"49" = alloca i32, align 4, addrspace(5)
|
||||
%"50" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
|
@ -22,66 +10,66 @@ define <2 x i32> @impl(<2 x i32> %"9") #0 {
|
|||
|
||||
"44": ; preds = %1
|
||||
%"38" = extractelement <2 x i32> %"9", i8 0
|
||||
store i32 %"38", ptr addrspace(5) %"55", align 4
|
||||
store i32 %"38", ptr addrspace(5) %"49", align 4
|
||||
%"39" = extractelement <2 x i32> %"9", i8 1
|
||||
store i32 %"39", ptr addrspace(5) %"56", align 4
|
||||
%"60" = load i32, ptr addrspace(5) %"55", align 4
|
||||
%"61" = load i32, ptr addrspace(5) %"56", align 4
|
||||
%"59" = add i32 %"60", %"61"
|
||||
store i32 %"59", ptr addrspace(5) %"56", align 4
|
||||
%"62" = load i32, ptr addrspace(5) %"56", align 4
|
||||
%"64" = load <2 x i32>, ptr addrspace(5) %"54", align 8
|
||||
%"63" = insertelement <2 x i32> %"64", i32 %"62", i8 0
|
||||
store <2 x i32> %"63", ptr addrspace(5) %"54", align 8
|
||||
%"65" = load i32, ptr addrspace(5) %"56", align 4
|
||||
%"67" = load <2 x i32>, ptr addrspace(5) %"54", align 8
|
||||
%"66" = insertelement <2 x i32> %"67", i32 %"65", i8 1
|
||||
store <2 x i32> %"66", ptr addrspace(5) %"54", align 8
|
||||
%"68" = load <2 x i32>, ptr addrspace(5) %"54", align 8
|
||||
%"43" = extractelement <2 x i32> %"68", i8 1
|
||||
%"70" = load <2 x i32>, ptr addrspace(5) %"54", align 8
|
||||
%"69" = insertelement <2 x i32> %"70", i32 %"43", i8 0
|
||||
store <2 x i32> %"69", ptr addrspace(5) %"54", align 8
|
||||
%"72" = load <2 x i32>, ptr addrspace(5) %"54", align 8
|
||||
store <2 x i32> %"72", ptr addrspace(5) %"53", align 8
|
||||
%2 = load <2 x i32>, ptr addrspace(5) %"53", align 8
|
||||
store i32 %"39", ptr addrspace(5) %"50", align 4
|
||||
%"54" = load i32, ptr addrspace(5) %"49", align 4
|
||||
%"55" = load i32, ptr addrspace(5) %"50", align 4
|
||||
%"53" = add i32 %"54", %"55"
|
||||
store i32 %"53", ptr addrspace(5) %"50", align 4
|
||||
%"56" = load i32, ptr addrspace(5) %"50", align 4
|
||||
%"58" = load <2 x i32>, ptr addrspace(5) %"48", align 8
|
||||
%"57" = insertelement <2 x i32> %"58", i32 %"56", i8 0
|
||||
store <2 x i32> %"57", ptr addrspace(5) %"48", align 8
|
||||
%"59" = load i32, ptr addrspace(5) %"50", align 4
|
||||
%"61" = load <2 x i32>, ptr addrspace(5) %"48", align 8
|
||||
%"60" = insertelement <2 x i32> %"61", i32 %"59", i8 1
|
||||
store <2 x i32> %"60", ptr addrspace(5) %"48", align 8
|
||||
%"62" = load <2 x i32>, ptr addrspace(5) %"48", align 8
|
||||
%"43" = extractelement <2 x i32> %"62", i8 1
|
||||
%"64" = load <2 x i32>, ptr addrspace(5) %"48", align 8
|
||||
%"63" = insertelement <2 x i32> %"64", i32 %"43", i8 0
|
||||
store <2 x i32> %"63", ptr addrspace(5) %"48", align 8
|
||||
%"66" = load <2 x i32>, ptr addrspace(5) %"48", align 8
|
||||
store <2 x i32> %"66", ptr addrspace(5) %"47", align 8
|
||||
%2 = load <2 x i32>, ptr addrspace(5) %"47", align 8
|
||||
ret <2 x i32> %2
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"73", ptr addrspace(4) byref(i64) %"74") #1 {
|
||||
%"75" = alloca i64, align 8, addrspace(5)
|
||||
%"76" = alloca i64, align 8, addrspace(5)
|
||||
%"77" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"78" = alloca i32, align 4, addrspace(5)
|
||||
%"79" = alloca i32, align 4, addrspace(5)
|
||||
%"80" = alloca i64, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @vector(ptr addrspace(4) byref(i64) %"67", ptr addrspace(4) byref(i64) %"68") #1 {
|
||||
%"69" = alloca i64, align 8, addrspace(5)
|
||||
%"70" = alloca i64, align 8, addrspace(5)
|
||||
%"71" = alloca <2 x i32>, align 8, addrspace(5)
|
||||
%"72" = alloca i32, align 4, addrspace(5)
|
||||
%"73" = alloca i32, align 4, addrspace(5)
|
||||
%"74" = alloca i64, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"45"
|
||||
|
||||
"45": ; preds = %1
|
||||
%"81" = load i64, ptr addrspace(4) %"73", align 4
|
||||
store i64 %"81", ptr addrspace(5) %"75", align 4
|
||||
%"82" = load i64, ptr addrspace(4) %"74", align 4
|
||||
store i64 %"82", ptr addrspace(5) %"76", align 4
|
||||
%"84" = load i64, ptr addrspace(5) %"75", align 4
|
||||
%"91" = inttoptr i64 %"84" to ptr
|
||||
%"83" = load <2 x i32>, ptr %"91", align 8
|
||||
store <2 x i32> %"83", ptr addrspace(5) %"77", align 8
|
||||
%"86" = load <2 x i32>, ptr addrspace(5) %"77", align 8
|
||||
%"85" = call <2 x i32> @impl(<2 x i32> %"86")
|
||||
store <2 x i32> %"85", ptr addrspace(5) %"77", align 8
|
||||
%"75" = load i64, ptr addrspace(4) %"67", align 4
|
||||
store i64 %"75", ptr addrspace(5) %"69", align 4
|
||||
%"76" = load i64, ptr addrspace(4) %"68", align 4
|
||||
store i64 %"76", ptr addrspace(5) %"70", align 4
|
||||
%"78" = load i64, ptr addrspace(5) %"69", align 4
|
||||
%"85" = inttoptr i64 %"78" to ptr
|
||||
%"77" = load <2 x i32>, ptr %"85", align 8
|
||||
store <2 x i32> %"77", ptr addrspace(5) %"71", align 8
|
||||
%"80" = load <2 x i32>, ptr addrspace(5) %"71", align 8
|
||||
%"79" = call <2 x i32> @impl(<2 x i32> %"80")
|
||||
store <2 x i32> %"79", ptr addrspace(5) %"71", align 8
|
||||
br label %"46"
|
||||
|
||||
"46": ; preds = %"45"
|
||||
%"88" = load <2 x i32>, ptr addrspace(5) %"77", align 8
|
||||
%"92" = bitcast <2 x i32> %"88" to i64
|
||||
store i64 %"92", ptr addrspace(5) %"80", align 4
|
||||
%"89" = load i64, ptr addrspace(5) %"76", align 4
|
||||
%"90" = load <2 x i32>, ptr addrspace(5) %"77", align 8
|
||||
%"93" = inttoptr i64 %"89" to ptr
|
||||
store <2 x i32> %"90", ptr %"93", align 8
|
||||
%"82" = load <2 x i32>, ptr addrspace(5) %"71", align 8
|
||||
%"86" = bitcast <2 x i32> %"82" to i64
|
||||
store i64 %"86", ptr addrspace(5) %"74", align 4
|
||||
%"83" = load i64, ptr addrspace(5) %"70", align 4
|
||||
%"84" = load <2 x i32>, ptr addrspace(5) %"71", align 8
|
||||
%"87" = inttoptr i64 %"83" to ptr
|
||||
store <2 x i32> %"84", ptr %"87", align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
|
|
|
@ -1,43 +1,30 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"38", ptr addrspace(4) byref(i64) %"39") #1 {
|
||||
%"40" = alloca i64, align 8, addrspace(5)
|
||||
%"41" = alloca i64, align 8, addrspace(5)
|
||||
%"42" = alloca <4 x i32>, align 16, addrspace(5)
|
||||
%"43" = alloca i32, align 4, addrspace(5)
|
||||
define amdgpu_kernel void @vector4(ptr addrspace(4) byref(i64) %"32", ptr addrspace(4) byref(i64) %"33") #0 {
|
||||
%"34" = alloca i64, align 8, addrspace(5)
|
||||
%"35" = alloca i64, align 8, addrspace(5)
|
||||
%"36" = alloca <4 x i32>, align 16, addrspace(5)
|
||||
%"37" = alloca i32, align 4, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"31"
|
||||
|
||||
"31": ; preds = %1
|
||||
%"44" = load i64, ptr addrspace(4) %"38", align 4
|
||||
store i64 %"44", ptr addrspace(5) %"40", align 4
|
||||
%"45" = load i64, ptr addrspace(4) %"39", align 4
|
||||
store i64 %"45", ptr addrspace(5) %"41", align 4
|
||||
%"47" = load i64, ptr addrspace(5) %"40", align 4
|
||||
%"52" = inttoptr i64 %"47" to ptr
|
||||
%"46" = load <4 x i32>, ptr %"52", align 16
|
||||
store <4 x i32> %"46", ptr addrspace(5) %"42", align 16
|
||||
%"48" = load <4 x i32>, ptr addrspace(5) %"42", align 16
|
||||
%"30" = extractelement <4 x i32> %"48", i8 3
|
||||
store i32 %"30", ptr addrspace(5) %"43", align 4
|
||||
%"50" = load i64, ptr addrspace(5) %"41", align 4
|
||||
%"51" = load i32, ptr addrspace(5) %"43", align 4
|
||||
%"55" = inttoptr i64 %"50" to ptr
|
||||
store i32 %"51", ptr %"55", align 4
|
||||
%"38" = load i64, ptr addrspace(4) %"32", align 4
|
||||
store i64 %"38", ptr addrspace(5) %"34", align 4
|
||||
%"39" = load i64, ptr addrspace(4) %"33", align 4
|
||||
store i64 %"39", ptr addrspace(5) %"35", align 4
|
||||
%"41" = load i64, ptr addrspace(5) %"34", align 4
|
||||
%"46" = inttoptr i64 %"41" to ptr
|
||||
%"40" = load <4 x i32>, ptr %"46", align 16
|
||||
store <4 x i32> %"40", ptr addrspace(5) %"36", align 16
|
||||
%"42" = load <4 x i32>, ptr addrspace(5) %"36", align 16
|
||||
%"30" = extractelement <4 x i32> %"42", i8 3
|
||||
store i32 %"30", ptr addrspace(5) %"37", align 4
|
||||
%"44" = load i64, ptr addrspace(5) %"35", align 4
|
||||
%"45" = load i32, ptr addrspace(5) %"37", align 4
|
||||
%"49" = inttoptr i64 %"44" to ptr
|
||||
store i32 %"45", ptr %"49", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
|
@ -1,99 +1,86 @@
|
|||
declare i32 @__zluda_ptx_impl_sreg_tid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ntid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_ctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_nctaid(i8) #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_clock() #0
|
||||
|
||||
declare i32 @__zluda_ptx_impl_sreg_lanemask_lt() #0
|
||||
|
||||
define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"46", ptr addrspace(4) byref(i64) %"47") #1 {
|
||||
%"48" = alloca i64, align 8, addrspace(5)
|
||||
%"49" = alloca i64, align 8, addrspace(5)
|
||||
%"50" = alloca i16, align 2, addrspace(5)
|
||||
%"51" = alloca i16, align 2, addrspace(5)
|
||||
%"52" = alloca i16, align 2, addrspace(5)
|
||||
%"53" = alloca i16, align 2, addrspace(5)
|
||||
%"54" = alloca <4 x i16>, align 8, addrspace(5)
|
||||
define amdgpu_kernel void @vector_extract(ptr addrspace(4) byref(i64) %"40", ptr addrspace(4) byref(i64) %"41") #0 {
|
||||
%"42" = alloca i64, align 8, addrspace(5)
|
||||
%"43" = alloca i64, align 8, addrspace(5)
|
||||
%"44" = alloca i16, align 2, addrspace(5)
|
||||
%"45" = alloca i16, align 2, addrspace(5)
|
||||
%"46" = alloca i16, align 2, addrspace(5)
|
||||
%"47" = alloca i16, align 2, addrspace(5)
|
||||
%"48" = alloca <4 x i16>, align 8, addrspace(5)
|
||||
br label %1
|
||||
|
||||
1: ; preds = %0
|
||||
br label %"39"
|
||||
|
||||
"39": ; preds = %1
|
||||
%"55" = load i64, ptr addrspace(4) %"46", align 4
|
||||
store i64 %"55", ptr addrspace(5) %"48", align 4
|
||||
%"56" = load i64, ptr addrspace(4) %"47", align 4
|
||||
store i64 %"56", ptr addrspace(5) %"49", align 4
|
||||
%"57" = load i64, ptr addrspace(5) %"48", align 4
|
||||
%"85" = inttoptr i64 %"57" to ptr addrspace(1)
|
||||
%"33" = load <4 x i8>, ptr addrspace(1) %"85", align 4
|
||||
%"86" = extractelement <4 x i8> %"33", i8 0
|
||||
%"87" = extractelement <4 x i8> %"33", i8 1
|
||||
%"88" = extractelement <4 x i8> %"33", i8 2
|
||||
%"89" = extractelement <4 x i8> %"33", i8 3
|
||||
%"58" = zext i8 %"86" to i16
|
||||
%"59" = zext i8 %"87" to i16
|
||||
%"60" = zext i8 %"88" to i16
|
||||
%"61" = zext i8 %"89" to i16
|
||||
store i16 %"58", ptr addrspace(5) %"50", align 2
|
||||
store i16 %"59", ptr addrspace(5) %"51", align 2
|
||||
store i16 %"60", ptr addrspace(5) %"52", align 2
|
||||
store i16 %"61", ptr addrspace(5) %"53", align 2
|
||||
%"62" = load i16, ptr addrspace(5) %"51", align 2
|
||||
%"63" = load i16, ptr addrspace(5) %"52", align 2
|
||||
%"64" = load i16, ptr addrspace(5) %"53", align 2
|
||||
%"65" = load i16, ptr addrspace(5) %"50", align 2
|
||||
%2 = insertelement <4 x i16> undef, i16 %"62", i8 0
|
||||
%3 = insertelement <4 x i16> %2, i16 %"63", i8 1
|
||||
%4 = insertelement <4 x i16> %3, i16 %"64", i8 2
|
||||
%"34" = insertelement <4 x i16> %4, i16 %"65", i8 3
|
||||
store <4 x i16> %"34", ptr addrspace(5) %"54", align 8
|
||||
%"67" = load <4 x i16>, ptr addrspace(5) %"54", align 8
|
||||
%"68" = extractelement <4 x i16> %"67", i8 0
|
||||
%"69" = extractelement <4 x i16> %"67", i8 1
|
||||
%"70" = extractelement <4 x i16> %"67", i8 2
|
||||
%"71" = extractelement <4 x i16> %"67", i8 3
|
||||
store i16 %"68", ptr addrspace(5) %"52", align 2
|
||||
store i16 %"69", ptr addrspace(5) %"53", align 2
|
||||
store i16 %"70", ptr addrspace(5) %"50", align 2
|
||||
store i16 %"71", ptr addrspace(5) %"51", align 2
|
||||
%"72" = load i16, ptr addrspace(5) %"52", align 2
|
||||
%"73" = load i16, ptr addrspace(5) %"53", align 2
|
||||
%"74" = load i16, ptr addrspace(5) %"50", align 2
|
||||
%"75" = load i16, ptr addrspace(5) %"51", align 2
|
||||
%5 = insertelement <4 x i16> undef, i16 %"72", i8 0
|
||||
%6 = insertelement <4 x i16> %5, i16 %"73", i8 1
|
||||
%7 = insertelement <4 x i16> %6, i16 %"74", i8 2
|
||||
%"37" = insertelement <4 x i16> %7, i16 %"75", i8 3
|
||||
%"76" = extractelement <4 x i16> %"37", i8 0
|
||||
%"77" = extractelement <4 x i16> %"37", i8 1
|
||||
%"78" = extractelement <4 x i16> %"37", i8 2
|
||||
%"79" = extractelement <4 x i16> %"37", i8 3
|
||||
store i16 %"76", ptr addrspace(5) %"53", align 2
|
||||
store i16 %"77", ptr addrspace(5) %"50", align 2
|
||||
store i16 %"78", ptr addrspace(5) %"51", align 2
|
||||
store i16 %"79", ptr addrspace(5) %"52", align 2
|
||||
%"80" = load i16, ptr addrspace(5) %"50", align 2
|
||||
%"81" = load i16, ptr addrspace(5) %"51", align 2
|
||||
%"82" = load i16, ptr addrspace(5) %"52", align 2
|
||||
%"83" = load i16, ptr addrspace(5) %"53", align 2
|
||||
%"90" = trunc i16 %"80" to i8
|
||||
%"91" = trunc i16 %"81" to i8
|
||||
%"92" = trunc i16 %"82" to i8
|
||||
%"93" = trunc i16 %"83" to i8
|
||||
%8 = insertelement <4 x i8> undef, i8 %"90", i8 0
|
||||
%9 = insertelement <4 x i8> %8, i8 %"91", i8 1
|
||||
%10 = insertelement <4 x i8> %9, i8 %"92", i8 2
|
||||
%"38" = insertelement <4 x i8> %10, i8 %"93", i8 3
|
||||
%"84" = load i64, ptr addrspace(5) %"49", align 4
|
||||
%"94" = inttoptr i64 %"84" to ptr addrspace(1)
|
||||
store <4 x i8> %"38", ptr addrspace(1) %"94", align 4
|
||||
%"49" = load i64, ptr addrspace(4) %"40", align 4
|
||||
store i64 %"49", ptr addrspace(5) %"42", align 4
|
||||
%"50" = load i64, ptr addrspace(4) %"41", align 4
|
||||
store i64 %"50", ptr addrspace(5) %"43", align 4
|
||||
%"51" = load i64, ptr addrspace(5) %"42", align 4
|
||||
%"79" = inttoptr i64 %"51" to ptr addrspace(1)
|
||||
%"33" = load <4 x i8>, ptr addrspace(1) %"79", align 4
|
||||
%"80" = extractelement <4 x i8> %"33", i8 0
|
||||
%"81" = extractelement <4 x i8> %"33", i8 1
|
||||
%"82" = extractelement <4 x i8> %"33", i8 2
|
||||
%"83" = extractelement <4 x i8> %"33", i8 3
|
||||
%"52" = zext i8 %"80" to i16
|
||||
%"53" = zext i8 %"81" to i16
|
||||
%"54" = zext i8 %"82" to i16
|
||||
%"55" = zext i8 %"83" to i16
|
||||
store i16 %"52", ptr addrspace(5) %"44", align 2
|
||||
store i16 %"53", ptr addrspace(5) %"45", align 2
|
||||
store i16 %"54", ptr addrspace(5) %"46", align 2
|
||||
store i16 %"55", ptr addrspace(5) %"47", align 2
|
||||
%"56" = load i16, ptr addrspace(5) %"45", align 2
|
||||
%"57" = load i16, ptr addrspace(5) %"46", align 2
|
||||
%"58" = load i16, ptr addrspace(5) %"47", align 2
|
||||
%"59" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%2 = insertelement <4 x i16> undef, i16 %"56", i8 0
|
||||
%3 = insertelement <4 x i16> %2, i16 %"57", i8 1
|
||||
%4 = insertelement <4 x i16> %3, i16 %"58", i8 2
|
||||
%"34" = insertelement <4 x i16> %4, i16 %"59", i8 3
|
||||
store <4 x i16> %"34", ptr addrspace(5) %"48", align 8
|
||||
%"61" = load <4 x i16>, ptr addrspace(5) %"48", align 8
|
||||
%"62" = extractelement <4 x i16> %"61", i8 0
|
||||
%"63" = extractelement <4 x i16> %"61", i8 1
|
||||
%"64" = extractelement <4 x i16> %"61", i8 2
|
||||
%"65" = extractelement <4 x i16> %"61", i8 3
|
||||
store i16 %"62", ptr addrspace(5) %"46", align 2
|
||||
store i16 %"63", ptr addrspace(5) %"47", align 2
|
||||
store i16 %"64", ptr addrspace(5) %"44", align 2
|
||||
store i16 %"65", ptr addrspace(5) %"45", align 2
|
||||
%"66" = load i16, ptr addrspace(5) %"46", align 2
|
||||
%"67" = load i16, ptr addrspace(5) %"47", align 2
|
||||
%"68" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"69" = load i16, ptr addrspace(5) %"45", align 2
|
||||
%5 = insertelement <4 x i16> undef, i16 %"66", i8 0
|
||||
%6 = insertelement <4 x i16> %5, i16 %"67", i8 1
|
||||
%7 = insertelement <4 x i16> %6, i16 %"68", i8 2
|
||||
%"37" = insertelement <4 x i16> %7, i16 %"69", i8 3
|
||||
%"70" = extractelement <4 x i16> %"37", i8 0
|
||||
%"71" = extractelement <4 x i16> %"37", i8 1
|
||||
%"72" = extractelement <4 x i16> %"37", i8 2
|
||||
%"73" = extractelement <4 x i16> %"37", i8 3
|
||||
store i16 %"70", ptr addrspace(5) %"47", align 2
|
||||
store i16 %"71", ptr addrspace(5) %"44", align 2
|
||||
store i16 %"72", ptr addrspace(5) %"45", align 2
|
||||
store i16 %"73", ptr addrspace(5) %"46", align 2
|
||||
%"74" = load i16, ptr addrspace(5) %"44", align 2
|
||||
%"75" = load i16, ptr addrspace(5) %"45", align 2
|
||||
%"76" = load i16, ptr addrspace(5) %"46", align 2
|
||||
%"77" = load i16, ptr addrspace(5) %"47", align 2
|
||||
%"84" = trunc i16 %"74" to i8
|
||||
%"85" = trunc i16 %"75" to i8
|
||||
%"86" = trunc i16 %"76" to i8
|
||||
%"87" = trunc i16 %"77" to i8
|
||||
%8 = insertelement <4 x i8> undef, i8 %"84", i8 0
|
||||
%9 = insertelement <4 x i8> %8, i8 %"85", i8 1
|
||||
%10 = insertelement <4 x i8> %9, i8 %"86", i8 2
|
||||
%"38" = insertelement <4 x i8> %10, i8 %"87", i8 3
|
||||
%"78" = load i64, ptr addrspace(5) %"43", align 4
|
||||
%"88" = inttoptr i64 %"78" to ptr addrspace(1)
|
||||
store <4 x i8> %"38", ptr addrspace(1) %"88", align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="dynamic" "denormal-fp-math-f32"="dynamic" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
||||
attributes #0 = { "amdgpu-unsafe-fp-atomics"="true" "denormal-fp-math"="preserve-sign" "denormal-fp-math-f32"="preserve-sign" "no-trapping-math"="true" "uniform-work-group-size"="true" }
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue