TRX/tools/shared/ida_progress.py

202 lines
5.4 KiB
Python
Raw Normal View History

2024-10-02 10:19:36 +02:00
#!/usr/bin/env python3
import re
from dataclasses import dataclass
from enum import StrEnum, auto
from pathlib import Path
from parsimonious.exceptions import ParseError
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor
# Define a simple grammar for C declarations
grammar = Grammar(
r"""
output = decl ";" (_ ~"//.*")?
decl = function_decl / var_decl / function_pointer_decl
function_decl = type _ function_name _ parameters
var_decl = type _ var_name array_subscript (_ "=" _ ~"[^;]*")?
function_pointer_decl = type _ ("(*" _ qualifier _ function_name _ array_subscript _ ")") _ parameters
var_name = identifier
function_name = identifier
parameters = ("(" _ "void" _ ")") / ("(" _ (parameter_list?) _ ")")
parameter_list = parameter _ ("," _ parameter _)*
parameter = decl / type
qualifier = ~"const|__cdecl|__stdcall|__thiscall|__fastcall"
array_subscript = (_ "[" number? "]")*
type = (qualifier _)? ~r"[a-zA-Z_][a-zA-Z0-9_]*" _ (qualifier _)? "*"* (qualifier _)?
number = ~"0[0xX][0-9a-fA-F]+" / ~"[0-9]+"
identifier = ~r"[a-zA-Z_][a-zA-Z0-9_]*"
_ = ~r"\s*"
"""
2024-10-02 10:19:36 +02:00
)
class DeclarationVisitor(NodeVisitor):
def visit_output(self, node, visited_children):
return visited_children[0]
def visit_decl(self, node, visited_children):
return visited_children[0]
def visit_function_decl(self, node, visited_children):
return visited_children[2]
def visit_function_pointer_decl(self, node, visited_children):
return visited_children[2][4]
def visit_identifier(self, node, visited_children):
return node.text
def visit_var_decl(self, node, visited_children):
return visited_children[2]
def visit_function_name(self, node, visited_children):
return node
def visit_var_name(self, node, visited_children):
return node
def generic_visit(self, node, visited_children):
return visited_children or node
def extract_symbol_name(c_declaration: str) -> str | None:
result: str | None
try:
visitor = DeclarationVisitor()
tree = grammar.parse(c_declaration)
result = visitor.visit(tree)
except ParseError:
result = None
return result
2024-10-02 10:19:36 +02:00
class SymbolStatus(StrEnum):
DECOMPILED = auto()
KNOWN = auto()
TODO = auto()
UNUSED = auto()
class ProgressFileSection(StrEnum):
TYPES = "types"
FUNCTIONS = "functions"
VARIABLES = "variables"
@dataclass
class Symbol:
offset: int
signature: str
size: int | None = None
flags: str = ""
@property
def name(self) -> str:
return extract_symbol_name(self.signature)
2024-10-02 10:19:36 +02:00
@property
def offset_str(self) -> str:
return f"0x{self.offset:08X}"
@property
def is_decompiled(self) -> bool:
return "+" in self.flags or "@" in self.flags
2024-10-02 10:19:36 +02:00
@property
def is_called(self) -> bool:
return "*" in self.flags
@property
def is_unused(self) -> bool:
return "x" in self.flags
@property
def is_known(self) -> bool:
return not re.search(r"(\s|^)sub_", self.signature)
@property
def status(self) -> SymbolStatus:
if self.is_decompiled:
return SymbolStatus.DECOMPILED
elif self.is_unused:
return SymbolStatus.UNUSED
elif self.is_known:
return SymbolStatus.KNOWN
return SymbolStatus.TODO
@dataclass
class ProgressFile:
types: list[str]
functions: list[Symbol]
variables: list[Symbol]
def to_int(source: str) -> int | None:
source = source.strip()
if source.startswith("/*"):
source = source[2:]
if source.endswith("*/"):
source = source[:-2]
source = source.strip()
if not source.replace("-", ""):
return None
if source.startswith(("0x", "0X")):
source = source[2:]
return int(source, 16)
def parse_progress_file(path: Path) -> ProgressFile:
result = ProgressFile(types=[], functions=[], variables=[])
type_contents = ""
section: ProgressFileSection | None = None
for line in path.read_text(encoding="utf-8").splitlines():
if match := re.match("^# ([A-Z]+)$", line.strip()):
section_name = match.group(1).lower()
if section_name in list(ProgressFileSection):
section = ProgressFileSection(section_name)
if line.strip().startswith("#"):
continue
if section == ProgressFileSection.TYPES:
type_contents += line + "\n"
line = line.strip()
if not line:
continue
if section == ProgressFileSection.FUNCTIONS:
offset, size, flags, signature = re.split(r"\s+", line, maxsplit=3)
result.functions.append(
Symbol(
signature=signature,
offset=to_int(offset),
size=to_int(size),
flags=flags,
)
)
if section == ProgressFileSection.VARIABLES:
offset, flags, signature = re.split(r"\s+", line, maxsplit=2)
result.variables.append(
Symbol(
signature=signature,
offset=to_int(offset),
flags=flags,
)
)
result.types = [
definition
for definition in re.split(r"\n\n(?!\s)", type_contents, flags=re.M)
if definition.strip()
]
return result