2024-10-02 10:19:36 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
import re
|
|
|
|
from dataclasses import dataclass
|
|
|
|
from enum import StrEnum, auto
|
|
|
|
from pathlib import Path
|
|
|
|
|
2024-10-09 16:57:22 +02:00
|
|
|
from parsimonious.exceptions import ParseError
|
|
|
|
from parsimonious.grammar import Grammar
|
|
|
|
from parsimonious.nodes import NodeVisitor
|
|
|
|
|
|
|
|
# Define a simple grammar for C declarations
|
|
|
|
grammar = Grammar(
|
|
|
|
r"""
|
|
|
|
output = decl ";" (_ ~"//.*")?
|
|
|
|
decl = function_decl / var_decl / function_pointer_decl
|
|
|
|
function_decl = type _ function_name _ parameters
|
|
|
|
var_decl = type _ var_name array_subscript (_ "=" _ ~"[^;]*")?
|
|
|
|
function_pointer_decl = type _ ("(*" _ qualifier _ function_name _ array_subscript _ ")") _ parameters
|
|
|
|
|
|
|
|
var_name = identifier
|
|
|
|
function_name = identifier
|
|
|
|
parameters = ("(" _ "void" _ ")") / ("(" _ (parameter_list?) _ ")")
|
|
|
|
parameter_list = parameter _ ("," _ parameter _)*
|
2024-10-22 11:56:48 +02:00
|
|
|
parameter = decl / type
|
2024-10-09 16:57:22 +02:00
|
|
|
qualifier = ~"const|__cdecl|__stdcall|__thiscall|__fastcall"
|
|
|
|
array_subscript = (_ "[" number? "]")*
|
|
|
|
|
|
|
|
type = (qualifier _)? ~r"[a-zA-Z_][a-zA-Z0-9_]*" _ (qualifier _)? "*"* (qualifier _)?
|
|
|
|
|
|
|
|
number = ~"0[0xX][0-9a-fA-F]+" / ~"[0-9]+"
|
|
|
|
identifier = ~r"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
_ = ~r"\s*"
|
|
|
|
"""
|
2024-10-02 10:19:36 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
2024-10-09 16:57:22 +02:00
|
|
|
class DeclarationVisitor(NodeVisitor):
|
|
|
|
def visit_output(self, node, visited_children):
|
|
|
|
return visited_children[0]
|
|
|
|
|
|
|
|
def visit_decl(self, node, visited_children):
|
|
|
|
return visited_children[0]
|
|
|
|
|
|
|
|
def visit_function_decl(self, node, visited_children):
|
|
|
|
return visited_children[2]
|
|
|
|
|
|
|
|
def visit_function_pointer_decl(self, node, visited_children):
|
|
|
|
return visited_children[2][4]
|
|
|
|
|
|
|
|
def visit_identifier(self, node, visited_children):
|
|
|
|
return node.text
|
|
|
|
|
|
|
|
def visit_var_decl(self, node, visited_children):
|
|
|
|
return visited_children[2]
|
|
|
|
|
|
|
|
def visit_function_name(self, node, visited_children):
|
|
|
|
return node
|
|
|
|
|
|
|
|
def visit_var_name(self, node, visited_children):
|
|
|
|
return node
|
|
|
|
|
|
|
|
def generic_visit(self, node, visited_children):
|
|
|
|
return visited_children or node
|
|
|
|
|
|
|
|
|
|
|
|
def extract_symbol_name(c_declaration: str) -> str | None:
|
|
|
|
result: str | None
|
|
|
|
try:
|
|
|
|
visitor = DeclarationVisitor()
|
|
|
|
tree = grammar.parse(c_declaration)
|
|
|
|
result = visitor.visit(tree)
|
|
|
|
except ParseError:
|
|
|
|
result = None
|
|
|
|
return result
|
2024-10-02 10:19:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
class SymbolStatus(StrEnum):
|
|
|
|
DECOMPILED = auto()
|
|
|
|
KNOWN = auto()
|
|
|
|
TODO = auto()
|
|
|
|
UNUSED = auto()
|
|
|
|
|
|
|
|
|
|
|
|
class ProgressFileSection(StrEnum):
|
|
|
|
TYPES = "types"
|
|
|
|
FUNCTIONS = "functions"
|
|
|
|
VARIABLES = "variables"
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Symbol:
|
|
|
|
offset: int
|
|
|
|
signature: str
|
|
|
|
size: int | None = None
|
|
|
|
flags: str = ""
|
|
|
|
|
2024-10-09 16:57:22 +02:00
|
|
|
@property
|
|
|
|
def name(self) -> str:
|
|
|
|
return extract_symbol_name(self.signature)
|
|
|
|
|
2024-10-02 10:19:36 +02:00
|
|
|
@property
|
|
|
|
def offset_str(self) -> str:
|
|
|
|
return f"0x{self.offset:08X}"
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_decompiled(self) -> bool:
|
2024-11-15 15:52:46 +01:00
|
|
|
return "+" in self.flags or "@" in self.flags
|
2024-10-02 10:19:36 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def is_called(self) -> bool:
|
|
|
|
return "*" in self.flags
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_unused(self) -> bool:
|
|
|
|
return "x" in self.flags
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_known(self) -> bool:
|
|
|
|
return not re.search(r"(\s|^)sub_", self.signature)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def status(self) -> SymbolStatus:
|
|
|
|
if self.is_decompiled:
|
|
|
|
return SymbolStatus.DECOMPILED
|
|
|
|
elif self.is_unused:
|
|
|
|
return SymbolStatus.UNUSED
|
|
|
|
elif self.is_known:
|
|
|
|
return SymbolStatus.KNOWN
|
|
|
|
return SymbolStatus.TODO
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class ProgressFile:
|
|
|
|
types: list[str]
|
|
|
|
functions: list[Symbol]
|
|
|
|
variables: list[Symbol]
|
|
|
|
|
|
|
|
|
|
|
|
def to_int(source: str) -> int | None:
|
|
|
|
source = source.strip()
|
|
|
|
if source.startswith("/*"):
|
|
|
|
source = source[2:]
|
|
|
|
if source.endswith("*/"):
|
|
|
|
source = source[:-2]
|
|
|
|
source = source.strip()
|
|
|
|
if not source.replace("-", ""):
|
|
|
|
return None
|
|
|
|
if source.startswith(("0x", "0X")):
|
|
|
|
source = source[2:]
|
|
|
|
return int(source, 16)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_progress_file(path: Path) -> ProgressFile:
|
|
|
|
result = ProgressFile(types=[], functions=[], variables=[])
|
|
|
|
|
|
|
|
type_contents = ""
|
|
|
|
|
|
|
|
section: ProgressFileSection | None = None
|
|
|
|
for line in path.read_text(encoding="utf-8").splitlines():
|
|
|
|
if match := re.match("^# ([A-Z]+)$", line.strip()):
|
|
|
|
section_name = match.group(1).lower()
|
|
|
|
if section_name in list(ProgressFileSection):
|
|
|
|
section = ProgressFileSection(section_name)
|
|
|
|
|
|
|
|
if line.strip().startswith("#"):
|
|
|
|
continue
|
|
|
|
|
|
|
|
if section == ProgressFileSection.TYPES:
|
|
|
|
type_contents += line + "\n"
|
|
|
|
|
|
|
|
line = line.strip()
|
|
|
|
if not line:
|
|
|
|
continue
|
|
|
|
|
|
|
|
if section == ProgressFileSection.FUNCTIONS:
|
|
|
|
offset, size, flags, signature = re.split(r"\s+", line, maxsplit=3)
|
|
|
|
result.functions.append(
|
|
|
|
Symbol(
|
|
|
|
signature=signature,
|
|
|
|
offset=to_int(offset),
|
|
|
|
size=to_int(size),
|
|
|
|
flags=flags,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
if section == ProgressFileSection.VARIABLES:
|
|
|
|
offset, flags, signature = re.split(r"\s+", line, maxsplit=2)
|
|
|
|
result.variables.append(
|
|
|
|
Symbol(
|
|
|
|
signature=signature,
|
|
|
|
offset=to_int(offset),
|
|
|
|
flags=flags,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
result.types = [
|
|
|
|
definition
|
|
|
|
for definition in re.split(r"\n\n(?!\s)", type_contents, flags=re.M)
|
|
|
|
if definition.strip()
|
|
|
|
]
|
|
|
|
return result
|