From e34907025d0f879d862929d8d7ce861d8fba0c43 Mon Sep 17 00:00:00 2001 From: vyuuui Date: Mon, 24 Mar 2025 07:16:14 -0700 Subject: [PATCH] Add numeric label support to assembler --- Source/Core/Common/Assembler/GekkoIRGen.cpp | 90 +++++++++++++++++++ Source/Core/Common/Assembler/GekkoLexer.cpp | 44 +++++++-- Source/Core/Common/Assembler/GekkoLexer.h | 2 + Source/Core/Common/Assembler/GekkoParser.cpp | 40 +++++++++ Source/Core/Common/Assembler/GekkoParser.h | 3 + .../Debugger/GekkoSyntaxHighlight.cpp | 9 ++ Source/UnitTests/Common/AssemblerTest.cpp | 45 +++++++++- 7 files changed, 224 insertions(+), 9 deletions(-) diff --git a/Source/Core/Common/Assembler/GekkoIRGen.cpp b/Source/Core/Common/Assembler/GekkoIRGen.cpp index 4640ec529e..6afda9cde6 100644 --- a/Source/Core/Common/Assembler/GekkoIRGen.cpp +++ b/Source/Core/Common/Assembler/GekkoIRGen.cpp @@ -46,6 +46,7 @@ public: void OnLoaddr(std::string_view id) override; void OnCloseParen(ParenType type) override; void OnLabelDecl(std::string_view name) override; + void OnNumericLabelDecl(std::string_view name, u32 num) override; void OnVarDecl(std::string_view name) override; void PostParseAction() override; @@ -75,6 +76,7 @@ public: void AddAbsoluteAddressConv(); void AddLiteral(u32 lit); void AddSymbolResolve(std::string_view sym, bool absolute); + void AddNumLabelSymResolve(std::string_view sym, u32 num); void RunFixups(); @@ -97,6 +99,8 @@ private: u64* m_active_var; size_t m_operand_scan_begin; + // Ordered top-to-bottom, stores (label number, address) + std::vector> m_numlabs; std::map> m_labels; std::map> m_constants; std::set m_symset; @@ -362,6 +366,11 @@ void GekkoIRPlugin::OnLabelDecl(std::string_view name) m_symset.insert(name_str); } +void GekkoIRPlugin::OnNumericLabelDecl(std::string_view, u32 num) +{ + m_numlabs.emplace_back(num, m_active_block->BlockEndAddress()); +} + void GekkoIRPlugin::OnVarDecl(std::string_view name) { const std::string name_str(name); @@ -555,6 +564,35 @@ void GekkoIRPlugin::AddSymbolResolve(std::string_view sym, bool absolute) }); } +void GekkoIRPlugin::AddNumLabelSymResolve(std::string_view sym, u32 num) +{ + const u32 source_address = m_active_block->BlockEndAddress(); + AssemblerError err_on_fail = AssemblerError{ + fmt::format("No numeric label '{}' found below here", num), + m_owner->lexer.CurrentLine(), + m_owner->lexer.LineNumber(), + // Lexer should currently point to the label, as it hasn't been eaten yet + m_owner->lexer.ColNumber(), + sym.size(), + }; + + // Searching forward only + size_t search_start_idx = static_cast(m_numlabs.size()); + m_fixup_stack.emplace( + [this, num, source_address, search_start_idx, err_on_fail = std::move(err_on_fail)]() { + for (size_t i = search_start_idx; i < m_numlabs.size(); i++) + { + if (num == m_numlabs[i].first) + { + return m_numlabs[i].second - source_address; + } + } + + m_owner->error = std::move(err_on_fail); + return u32{0}; + }); +} + void GekkoIRPlugin::SaveOperandFixup(size_t str_left, size_t str_right) { m_operand_fixups.emplace_back(std::move(m_fixup_stack.top())); @@ -715,6 +753,33 @@ void GekkoIRPlugin::EvalTerminalRel(Terminal type, const AssemblerToken& tok) break; } + case Terminal::NumLabFwd: + { + std::optional val = tok.EvalToken(); + ASSERT(val.has_value()); + AddNumLabelSymResolve(tok.token_val, *val); + break; + } + + case Terminal::NumLabBwd: + { + std::optional mval = tok.EvalToken(); + ASSERT(mval.has_value()); + u32 val = *mval; + if (auto label_it = std::find_if(m_numlabs.rbegin(), m_numlabs.rend(), + [val](std::pair p) { return p.first == val; }); + label_it != m_numlabs.rend()) + { + AddLiteral(label_it->second - CurrentAddress()); + } + else + { + m_owner->EmitErrorHere(fmt::format("No numeric label '{}' found above here", val)); + return; + } + break; + } + // Parser should disallow this from happening default: ASSERT(false); @@ -780,6 +845,31 @@ void GekkoIRPlugin::EvalTerminalAbs(Terminal type, const AssemblerToken& tok) break; } + case Terminal::NumLabFwd: + m_owner->EmitErrorHere( + fmt::format("Forward label references not supported in fully resolved expressons")); + break; + + case Terminal::NumLabBwd: + { + std::optional mval = tok.EvalToken(); + ASSERT(mval.has_value()); + u32 val = *mval; + if (auto label_it = std::find_if(m_numlabs.rbegin(), m_numlabs.rend(), + [val](std::pair p) { return p.first == val; }); + label_it != m_numlabs.rend()) + { + m_eval_stack.push_back(label_it->second); + } + else + { + m_owner->EmitErrorHere(fmt::format("No numeric label '{}' found above here", val)); + return; + } + + break; + } + // Parser should disallow this from happening default: ASSERT(false); diff --git a/Source/Core/Common/Assembler/GekkoLexer.cpp b/Source/Core/Common/Assembler/GekkoLexer.cpp index d14270d2b9..2b643601e1 100644 --- a/Source/Core/Common/Assembler/GekkoLexer.cpp +++ b/Source/Core/Common/Assembler/GekkoLexer.cpp @@ -201,6 +201,9 @@ std::optional EvalIntegral(TokenType tp, std::string_view val) return T{2}; case TokenType::So: return T{3}; + case TokenType::NumLabFwd: + case TokenType::NumLabBwd: + return std::accumulate(val.begin(), val.end() - 1, T{0}, dec_step); default: return std::nullopt; } @@ -723,9 +726,9 @@ AssemblerToken Lexer::LexSingle() const } else if (h == '0') { - const char imm_type = Peek(); + const char nextch = Peek(); - if (imm_type == 'x') + if (nextch == 'x') { token_type = TokenType::HexadecimalLit; Step(); @@ -733,21 +736,33 @@ AssemblerToken Lexer::LexSingle() const { } } - else if (imm_type == 'b') + else if (nextch == 'b') { - token_type = TokenType::BinaryLit; Step(); - for (char c = Peek(); IsBinary(c); c = Step().Peek()) + if (!IsBinary(Peek())) { + token_type = TokenType::NumLabBwd; + } + else + { + token_type = TokenType::BinaryLit; + for (char c = Peek(); IsBinary(c); c = Step().Peek()) + { + } } } - else if (IsOctal(imm_type)) + else if (IsOctal(nextch)) { token_type = TokenType::OctalLit; for (char c = Peek(); IsOctal(c); c = Step().Peek()) { } } + else if (nextch == 'f') + { + Step(); + token_type = TokenType::NumLabFwd; + } else { token_type = TokenType::DecimalLit; @@ -758,7 +773,22 @@ AssemblerToken Lexer::LexSingle() const for (char c = Peek(); std::isdigit(c); c = Step().Peek()) { } - token_type = TokenType::DecimalLit; + switch (Peek()) + { + case 'f': + token_type = TokenType::NumLabFwd; + Step(); + break; + + case 'b': + token_type = TokenType::NumLabBwd; + Step(); + break; + + default: + token_type = TokenType::DecimalLit; + break; + } } else if (h == '<' || h == '>') { diff --git a/Source/Core/Common/Assembler/GekkoLexer.h b/Source/Core/Common/Assembler/GekkoLexer.h index 9ff78c04d0..bd99fa511b 100644 --- a/Source/Core/Common/Assembler/GekkoLexer.h +++ b/Source/Core/Common/Assembler/GekkoLexer.h @@ -37,6 +37,8 @@ enum class TokenType Gt, Eq, So, + NumLabFwd, + NumLabBwd, // EOL signifies boundaries between instructions, a la ';' Eol, Eof, diff --git a/Source/Core/Common/Assembler/GekkoParser.cpp b/Source/Core/Common/Assembler/GekkoParser.cpp index 26b5bb1082..ddd3f65e1f 100644 --- a/Source/Core/Common/Assembler/GekkoParser.cpp +++ b/Source/Core/Common/Assembler/GekkoParser.cpp @@ -84,6 +84,26 @@ void ParseId(ParseState* state) } } +void ParseNumLocation(ParseState* state) +{ + AssemblerToken tok = state->lexer.Lookahead(); + switch (tok.token_type) + { + case TokenType::NumLabFwd: + state->plugin.OnTerminal(Terminal::NumLabFwd, tok); + break; + + case TokenType::NumLabBwd: + state->plugin.OnTerminal(Terminal::NumLabBwd, tok); + break; + + default: + state->EmitErrorHere(fmt::format("Invalid {} with value '{}'", tok.TypeStr(), tok.ValStr())); + return; + } + state->lexer.Eat(); +} + void ParseIdLocation(ParseState* state) { std::array toks; @@ -184,6 +204,11 @@ void ParseBaseexpr(ParseState* state) ParsePpcBuiltin(state); break; + case TokenType::NumLabFwd: + case TokenType::NumLabBwd: + ParseNumLocation(state); + break; + case TokenType::Dot: state->plugin.OnTerminal(Terminal::Dot, state->lexer.Lookahead()); if (state->error) @@ -589,6 +614,21 @@ void ParseLabel(ParseState* state) } state->lexer.EatN<2>(); } + + if (tokens[0].token_type == TokenType::DecimalLit && tokens[1].token_type == TokenType::Colon) + { + std::optional labnum = tokens[0].EvalToken(); + if (!labnum) + { + return; + } + state->plugin.OnNumericLabelDecl(tokens[0].token_val, *labnum); + if (state->error) + { + return; + } + state->lexer.EatN<2>(); + } } void ParseResolvedExpr(ParseState* state) diff --git a/Source/Core/Common/Assembler/GekkoParser.h b/Source/Core/Common/Assembler/GekkoParser.h index 4258b5d582..d3b270258d 100644 --- a/Source/Core/Common/Assembler/GekkoParser.h +++ b/Source/Core/Common/Assembler/GekkoParser.h @@ -63,6 +63,8 @@ enum class Terminal Eq, So, Dot, + NumLabFwd, + NumLabBwd, }; enum class ParenType @@ -111,6 +113,7 @@ public: virtual void OnCloseParen(ParenType type) {} virtual void OnError() {} virtual void OnLabelDecl(std::string_view name) {} + virtual void OnNumericLabelDecl(std::string_view name, u32 parse_num) {} virtual void OnVarDecl(std::string_view name) {} protected: diff --git a/Source/Core/DolphinQt/Debugger/GekkoSyntaxHighlight.cpp b/Source/Core/DolphinQt/Debugger/GekkoSyntaxHighlight.cpp index d29bf100db..3311bae28b 100644 --- a/Source/Core/DolphinQt/Debugger/GekkoSyntaxHighlight.cpp +++ b/Source/Core/DolphinQt/Debugger/GekkoSyntaxHighlight.cpp @@ -35,6 +35,8 @@ public: { switch (type) { + case Terminal::NumLabFwd: + case Terminal::NumLabBwd: case Terminal::Id: HighlightCurToken(HighlightFormat::Symbol); break; @@ -119,6 +121,13 @@ public: m_formatting.emplace_back(len, off, HighlightFormat::Symbol); } + void OnNumericLabelDecl(std::string_view name, u32 parse_num) override + { + const int len = static_cast(m_owner->lexer.LookaheadRef().token_val.length()); + const int off = static_cast(m_owner->lexer.ColNumber()); + m_formatting.emplace_back(len, off, HighlightFormat::Symbol); + } + void OnVarDecl(std::string_view name) override { OnLabelDecl(name); } private: diff --git a/Source/UnitTests/Common/AssemblerTest.cpp b/Source/UnitTests/Common/AssemblerTest.cpp index 965c645cd5..10acbf7fd4 100644 --- a/Source/UnitTests/Common/AssemblerTest.cpp +++ b/Source/UnitTests/Common/AssemblerTest.cpp @@ -2029,7 +2029,7 @@ TEST(Assembler, RangeTest) EXPECT_TRUE(!IsFailure(Assemble(uimm_range_3, 0))); } -TEST(Assembly, MalformedExpressions) +TEST(Assembler, MalformedExpressions) { constexpr char missing_arg[] = "add 0, 1"; constexpr char missing_paren_0[] = ".4byte (1 + 2), ((3 * 6) + 7"; @@ -2065,7 +2065,7 @@ TEST(Assembly, MalformedExpressions) // Modified listing of a subroutine, listing generated by IDA // Expect bytes are based on disc contents -TEST(Assembly, RealAssembly) +TEST(Assembler, RealAssembly) { constexpr char real_assembly[] = ".locate 0x8046A690\n" ".defvar back_chain, -0x30\n" @@ -2223,3 +2223,44 @@ TEST(Assembly, RealAssembly) EXPECT_EQ(code_blocks[0].instructions[i], real_expect[i]) << " -> i=" << i; } } + +TEST(Assembler, NumericLabels) +{ + constexpr char assembly[] = "0:b 0b\nb 0f\n0:.4byte 0\n0:\n1:.4byte 1b"; + constexpr u8 expect[] = {0x48, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c}; + + auto res = Assemble(assembly, 0); + ASSERT_TRUE(!IsFailure(res)); + auto&& code_blocks = GetT(res); + ASSERT_EQ(code_blocks.size(), 1); + + ASSERT_EQ(code_blocks[0].instructions.size(), sizeof(expect)); + for (size_t i = 0; i < code_blocks[0].instructions.size(); i++) + { + EXPECT_EQ(code_blocks[0].instructions[i], expect[i]) << " -> i=" << i; + } +} + +TEST(Assembler, InvalidNumericLabels) +{ + constexpr char missing_forward[] = "0:b 0f"; + constexpr char missing_backward[] = "b 0b\n0:"; + constexpr char forward_directive[] = ".4byte 0f\n0:"; + constexpr char missing_backward_directive[] = ".4byte 0b\n0:"; + + auto res = Assemble(missing_forward, 0); + EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found below here") + << GetFailure(res).message; + res = Assemble(missing_backward, 0); + EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found above here") + << GetFailure(res).message; + res = Assemble(forward_directive, 0); + EXPECT_TRUE(IsFailure(res) && + GetFailure(res).message == + "Forward label references not supported in fully resolved expressons") + << GetFailure(res).message; + res = Assemble(missing_backward_directive, 0); + EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found above here") + << GetFailure(res).message; +}