Browse Source

Introduce memory indirection asserts

This allows us to start validating against bytes elsewhere in the
file (as long as we can reach them from the configuration; no
cross-config handling yet).
nosoop 7 months ago
parent
commit
bf42d42fd4
2 changed files with 75 additions and 0 deletions
  1. 31 0
      README.md
  2. 44 0
      src/smgdc/validate.py

+ 31 - 0
README.md

@@ -58,6 +58,37 @@ Types of specifications include:
 - `vfn`: takes a virtual method symbol and gets the vtable index for it on Linux and Windows
 (guesstimate on the latter)
 
+#### Assertions
+
+Some entries accept an `assert` option, which allows the evaluation of single Python
+expressions.  Returning `False` causes validation to fail, indicating that an entry requires
+review.
+
+> [!WARNING]
+> Obviously, [Python's `eval` is dangerous][eval-danger]; this application was never designed
+> to accomodate adversarial inputs.  You should never blindly perform validations with
+> user-submitted specification files.
+
+Depending on the entry type, one or more of these variables will be made available:
+
+- `addr`: An object representing a position in the binary.  This is set to the configuration's
+result address and provides the following methods:
+	- `read(offset: int = 0)`: Returns a new `BinaryPosition`, acting as if the position + offset
+	was dereferenced as an absolute address.
+	- `value(struct, offset = 0)`: Extracts the current position using a
+	[struct.Struct][]-format string into a single result.
+	- `string_value(encoding = "utf-8", errors = "strict", offset = 0)`: Extracts a string from
+	the current position.
+	- This is designed to mimic SourceMod's gamedata API of using reads via method chaining:
+	```
+	# process an indirection by dereferencing at position +3h
+	addr.read(0x3).value("<I") == 0xDEADBEEF
+	```
+- `value`: In a `value` entry, this returns the value that will be inserted into the output.
+
+[eval-danger]: https://nedbatchelder.com/blog/201206/eval_really_is_dangerous.html
+[struct.Struct]: https://docs.python.org/3.11/library/struct.html#struct.Struct
+
 ### Constraint file
 
 > [!IMPORTANT]

+ 44 - 0
src/smgdc/validate.py

@@ -2,6 +2,7 @@
 
 import configparser
 import contextlib
+import dataclasses
 import enum
 import functools
 import gzip
@@ -123,6 +124,42 @@ class NumericOutputFormat(enum.StrEnum):
         raise NotImplementedError(f"Missing numeric output for {self}")
 
 
+@dataclasses.dataclass
+class BinaryPosition:
+    """
+    Represents a file-offset pair.
+
+    This is designed to mimic SourceMod's gamedata address API; a value is produced after
+    chaining read operations.
+    """
+
+    position: int
+    bin: BaseBinary
+
+    def read(self, offset: int = 0) -> "BinaryPosition":
+        s = struct.Struct("<I")
+        next_position, *_ = s.unpack(self.bin.read(self.position + offset, s.size))
+        return BinaryPosition(next_position, self.bin)
+
+    def value(self, struct_str: str, offset: int = 0) -> typing.Any:
+        s = struct.Struct(struct_str)
+        result, *_ = s.unpack(self.bin.read(self.position + offset, s.size))
+        return result
+
+    def string_value(
+        self, encoding: str = "utf-8", errors: str = "strict", offset: int = 0
+    ) -> str:
+        """
+        Returns the zero-terminated string at the given position.
+        """
+        with self.bin.mmap() as mm:
+            start = self.position + offset
+            end = mm.find(b"\x00", start)
+            if end == -1:
+                raise ValueError(f"Could not find null terminator after position {start:02x}")
+            return mm[start:end].decode(encoding, errors)
+
+
 class BaseEntry(msgspec.Struct, kw_only=True):
     # the partial path pointing to a binary
     target: pathlib.Path
@@ -227,6 +264,8 @@ class ByteSigEntry(LocationEntry, tag="bytesig", kw_only=True):
     # most bytesigs are expected to be unique; escape hatch for those that are just typecasted
     allow_multiple: bool = False
 
+    assert_stmt: Code | None = msgspec.field(default=None, name="assert")
+
     def process(self, bin: PlatformBinary) -> ResultValues:
         outputs = {
             KEY_AS_IS: self.contents.gameconf_str,
@@ -246,6 +285,11 @@ class ByteSigEntry(LocationEntry, tag="bytesig", kw_only=True):
                 raise AssertionError(
                     f"Assertion failed: {self.contents.display_str} != {actual_disp}"
                 )
+
+            bp = BinaryPosition(address, bin)
+            if self.assert_stmt and not self.assert_stmt.eval(addr=bp, **eval_functions):
+                raise AssertionError(f"'{self.assert_stmt}' failed")
+
             return outputs
 
         with bin.mmap() as memory: