Merge pull request #13451 from vyuuui/numeric_labels

Add numeric label support to assembler
This commit is contained in:
Tilka 2025-04-25 01:56:43 +01:00 committed by GitHub
commit 805307f432
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 224 additions and 9 deletions

View file

@ -46,6 +46,7 @@ public:
void OnLoaddr(std::string_view id) override;
void OnCloseParen(ParenType type) override;
void OnLabelDecl(std::string_view name) override;
void OnNumericLabelDecl(std::string_view name, u32 num) override;
void OnVarDecl(std::string_view name) override;
void PostParseAction() override;
@ -75,6 +76,7 @@ public:
void AddAbsoluteAddressConv();
void AddLiteral(u32 lit);
void AddSymbolResolve(std::string_view sym, bool absolute);
void AddNumLabelSymResolve(std::string_view sym, u32 num);
void RunFixups();
@ -97,6 +99,8 @@ private:
u64* m_active_var;
size_t m_operand_scan_begin;
// Ordered top-to-bottom, stores (label number, address)
std::vector<std::pair<u32, u32>> m_numlabs;
std::map<std::string, u32, std::less<>> m_labels;
std::map<std::string, u64, std::less<>> m_constants;
std::set<std::string> m_symset;
@ -362,6 +366,11 @@ void GekkoIRPlugin::OnLabelDecl(std::string_view name)
m_symset.insert(name_str);
}
void GekkoIRPlugin::OnNumericLabelDecl(std::string_view, u32 num)
{
m_numlabs.emplace_back(num, m_active_block->BlockEndAddress());
}
void GekkoIRPlugin::OnVarDecl(std::string_view name)
{
const std::string name_str(name);
@ -555,6 +564,35 @@ void GekkoIRPlugin::AddSymbolResolve(std::string_view sym, bool absolute)
});
}
void GekkoIRPlugin::AddNumLabelSymResolve(std::string_view sym, u32 num)
{
const u32 source_address = m_active_block->BlockEndAddress();
AssemblerError err_on_fail = AssemblerError{
fmt::format("No numeric label '{}' found below here", num),
m_owner->lexer.CurrentLine(),
m_owner->lexer.LineNumber(),
// Lexer should currently point to the label, as it hasn't been eaten yet
m_owner->lexer.ColNumber(),
sym.size(),
};
// Searching forward only
size_t search_start_idx = static_cast<size_t>(m_numlabs.size());
m_fixup_stack.emplace(
[this, num, source_address, search_start_idx, err_on_fail = std::move(err_on_fail)]() {
for (size_t i = search_start_idx; i < m_numlabs.size(); i++)
{
if (num == m_numlabs[i].first)
{
return m_numlabs[i].second - source_address;
}
}
m_owner->error = std::move(err_on_fail);
return u32{0};
});
}
void GekkoIRPlugin::SaveOperandFixup(size_t str_left, size_t str_right)
{
m_operand_fixups.emplace_back(std::move(m_fixup_stack.top()));
@ -715,6 +753,33 @@ void GekkoIRPlugin::EvalTerminalRel(Terminal type, const AssemblerToken& tok)
break;
}
case Terminal::NumLabFwd:
{
std::optional<u32> val = tok.EvalToken<u32>();
ASSERT(val.has_value());
AddNumLabelSymResolve(tok.token_val, *val);
break;
}
case Terminal::NumLabBwd:
{
std::optional<u32> mval = tok.EvalToken<u32>();
ASSERT(mval.has_value());
u32 val = *mval;
if (auto label_it = std::find_if(m_numlabs.rbegin(), m_numlabs.rend(),
[val](std::pair<u32, u32> p) { return p.first == val; });
label_it != m_numlabs.rend())
{
AddLiteral(label_it->second - CurrentAddress());
}
else
{
m_owner->EmitErrorHere(fmt::format("No numeric label '{}' found above here", val));
return;
}
break;
}
// Parser should disallow this from happening
default:
ASSERT(false);
@ -780,6 +845,31 @@ void GekkoIRPlugin::EvalTerminalAbs(Terminal type, const AssemblerToken& tok)
break;
}
case Terminal::NumLabFwd:
m_owner->EmitErrorHere(
fmt::format("Forward label references not supported in fully resolved expressons"));
break;
case Terminal::NumLabBwd:
{
std::optional<u32> mval = tok.EvalToken<u32>();
ASSERT(mval.has_value());
u32 val = *mval;
if (auto label_it = std::find_if(m_numlabs.rbegin(), m_numlabs.rend(),
[val](std::pair<u32, u32> p) { return p.first == val; });
label_it != m_numlabs.rend())
{
m_eval_stack.push_back(label_it->second);
}
else
{
m_owner->EmitErrorHere(fmt::format("No numeric label '{}' found above here", val));
return;
}
break;
}
// Parser should disallow this from happening
default:
ASSERT(false);

View file

@ -201,6 +201,9 @@ std::optional<T> EvalIntegral(TokenType tp, std::string_view val)
return T{2};
case TokenType::So:
return T{3};
case TokenType::NumLabFwd:
case TokenType::NumLabBwd:
return std::accumulate(val.begin(), val.end() - 1, T{0}, dec_step);
default:
return std::nullopt;
}
@ -723,9 +726,9 @@ AssemblerToken Lexer::LexSingle() const
}
else if (h == '0')
{
const char imm_type = Peek();
const char nextch = Peek();
if (imm_type == 'x')
if (nextch == 'x')
{
token_type = TokenType::HexadecimalLit;
Step();
@ -733,21 +736,33 @@ AssemblerToken Lexer::LexSingle() const
{
}
}
else if (imm_type == 'b')
else if (nextch == 'b')
{
token_type = TokenType::BinaryLit;
Step();
for (char c = Peek(); IsBinary(c); c = Step().Peek())
if (!IsBinary(Peek()))
{
token_type = TokenType::NumLabBwd;
}
else
{
token_type = TokenType::BinaryLit;
for (char c = Peek(); IsBinary(c); c = Step().Peek())
{
}
}
}
else if (IsOctal(imm_type))
else if (IsOctal(nextch))
{
token_type = TokenType::OctalLit;
for (char c = Peek(); IsOctal(c); c = Step().Peek())
{
}
}
else if (nextch == 'f')
{
Step();
token_type = TokenType::NumLabFwd;
}
else
{
token_type = TokenType::DecimalLit;
@ -758,7 +773,22 @@ AssemblerToken Lexer::LexSingle() const
for (char c = Peek(); std::isdigit(c); c = Step().Peek())
{
}
token_type = TokenType::DecimalLit;
switch (Peek())
{
case 'f':
token_type = TokenType::NumLabFwd;
Step();
break;
case 'b':
token_type = TokenType::NumLabBwd;
Step();
break;
default:
token_type = TokenType::DecimalLit;
break;
}
}
else if (h == '<' || h == '>')
{

View file

@ -37,6 +37,8 @@ enum class TokenType
Gt,
Eq,
So,
NumLabFwd,
NumLabBwd,
// EOL signifies boundaries between instructions, a la ';'
Eol,
Eof,

View file

@ -84,6 +84,26 @@ void ParseId(ParseState* state)
}
}
void ParseNumLocation(ParseState* state)
{
AssemblerToken tok = state->lexer.Lookahead();
switch (tok.token_type)
{
case TokenType::NumLabFwd:
state->plugin.OnTerminal(Terminal::NumLabFwd, tok);
break;
case TokenType::NumLabBwd:
state->plugin.OnTerminal(Terminal::NumLabBwd, tok);
break;
default:
state->EmitErrorHere(fmt::format("Invalid {} with value '{}'", tok.TypeStr(), tok.ValStr()));
return;
}
state->lexer.Eat();
}
void ParseIdLocation(ParseState* state)
{
std::array<AssemblerToken, 3> toks;
@ -184,6 +204,11 @@ void ParseBaseexpr(ParseState* state)
ParsePpcBuiltin(state);
break;
case TokenType::NumLabFwd:
case TokenType::NumLabBwd:
ParseNumLocation(state);
break;
case TokenType::Dot:
state->plugin.OnTerminal(Terminal::Dot, state->lexer.Lookahead());
if (state->error)
@ -589,6 +614,21 @@ void ParseLabel(ParseState* state)
}
state->lexer.EatN<2>();
}
if (tokens[0].token_type == TokenType::DecimalLit && tokens[1].token_type == TokenType::Colon)
{
std::optional<u32> labnum = tokens[0].EvalToken<u32>();
if (!labnum)
{
return;
}
state->plugin.OnNumericLabelDecl(tokens[0].token_val, *labnum);
if (state->error)
{
return;
}
state->lexer.EatN<2>();
}
}
void ParseResolvedExpr(ParseState* state)

View file

@ -63,6 +63,8 @@ enum class Terminal
Eq,
So,
Dot,
NumLabFwd,
NumLabBwd,
};
enum class ParenType
@ -111,6 +113,7 @@ public:
virtual void OnCloseParen(ParenType type) {}
virtual void OnError() {}
virtual void OnLabelDecl(std::string_view name) {}
virtual void OnNumericLabelDecl(std::string_view name, u32 parse_num) {}
virtual void OnVarDecl(std::string_view name) {}
protected:

View file

@ -35,6 +35,8 @@ public:
{
switch (type)
{
case Terminal::NumLabFwd:
case Terminal::NumLabBwd:
case Terminal::Id:
HighlightCurToken(HighlightFormat::Symbol);
break;
@ -119,6 +121,13 @@ public:
m_formatting.emplace_back(len, off, HighlightFormat::Symbol);
}
void OnNumericLabelDecl(std::string_view name, u32 parse_num) override
{
const int len = static_cast<int>(m_owner->lexer.LookaheadRef().token_val.length());
const int off = static_cast<int>(m_owner->lexer.ColNumber());
m_formatting.emplace_back(len, off, HighlightFormat::Symbol);
}
void OnVarDecl(std::string_view name) override { OnLabelDecl(name); }
private:

View file

@ -2029,7 +2029,7 @@ TEST(Assembler, RangeTest)
EXPECT_TRUE(!IsFailure(Assemble(uimm_range_3, 0)));
}
TEST(Assembly, MalformedExpressions)
TEST(Assembler, MalformedExpressions)
{
constexpr char missing_arg[] = "add 0, 1";
constexpr char missing_paren_0[] = ".4byte (1 + 2), ((3 * 6) + 7";
@ -2065,7 +2065,7 @@ TEST(Assembly, MalformedExpressions)
// Modified listing of a subroutine, listing generated by IDA
// Expect bytes are based on disc contents
TEST(Assembly, RealAssembly)
TEST(Assembler, RealAssembly)
{
constexpr char real_assembly[] = ".locate 0x8046A690\n"
".defvar back_chain, -0x30\n"
@ -2223,3 +2223,44 @@ TEST(Assembly, RealAssembly)
EXPECT_EQ(code_blocks[0].instructions[i], real_expect[i]) << " -> i=" << i;
}
}
TEST(Assembler, NumericLabels)
{
constexpr char assembly[] = "0:b 0b\nb 0f\n0:.4byte 0\n0:\n1:.4byte 1b";
constexpr u8 expect[] = {0x48, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x04,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0c};
auto res = Assemble(assembly, 0);
ASSERT_TRUE(!IsFailure(res));
auto&& code_blocks = GetT(res);
ASSERT_EQ(code_blocks.size(), 1);
ASSERT_EQ(code_blocks[0].instructions.size(), sizeof(expect));
for (size_t i = 0; i < code_blocks[0].instructions.size(); i++)
{
EXPECT_EQ(code_blocks[0].instructions[i], expect[i]) << " -> i=" << i;
}
}
TEST(Assembler, InvalidNumericLabels)
{
constexpr char missing_forward[] = "0:b 0f";
constexpr char missing_backward[] = "b 0b\n0:";
constexpr char forward_directive[] = ".4byte 0f\n0:";
constexpr char missing_backward_directive[] = ".4byte 0b\n0:";
auto res = Assemble(missing_forward, 0);
EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found below here")
<< GetFailure(res).message;
res = Assemble(missing_backward, 0);
EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found above here")
<< GetFailure(res).message;
res = Assemble(forward_directive, 0);
EXPECT_TRUE(IsFailure(res) &&
GetFailure(res).message ==
"Forward label references not supported in fully resolved expressons")
<< GetFailure(res).message;
res = Assemble(missing_backward_directive, 0);
EXPECT_TRUE(IsFailure(res) && GetFailure(res).message == "No numeric label '0' found above here")
<< GetFailure(res).message;
}