Browse Source

Compress serialized angr projects

This results in an on-disk reduction by 80%.  Compression only
happens once(*) so it's effectively free over the course of
multiple runs.

(*) unless we opt to expand our analyses to CFG recovery as well
nosoop 10 months ago
parent
commit
d1b3787c2a
1 changed files with 8 additions and 3 deletions
  1. 8 3
      src/smgdc/validate.py

+ 8 - 3
src/smgdc/validate.py

@@ -4,6 +4,7 @@ import configparser
 import contextlib
 import enum
 import functools
+import gzip
 import hashlib
 import io
 import itertools
@@ -68,12 +69,16 @@ class BaseBinary:
         self._file = open(self.path, "rb")
 
         self.hash = hashlib.file_digest(self._file, "sha256").hexdigest()
-        cached_proj = (cache_path or pathlib.Path()) / f"{self.hash}.angr.pkl"
+        cached_proj = (cache_path or pathlib.Path()) / f"{self.hash}.angr.pkl.gz"
         if not cached_proj.exists():
             self.angr = angr.Project(self.path, load_options={"auto_load_libs": False})
-            cached_proj.write_bytes(pickle.dumps(self.angr))
+            with gzip.GzipFile(cached_proj, "wb") as f:
+                pkr = pickle.Pickler(f)
+                pkr.dump(self.angr)
         else:
-            self.angr = pickle.loads(cached_proj.read_bytes())
+            with gzip.GzipFile(cached_proj, "rb") as f:
+                upk = pickle.Unpickler(f)
+                self.angr = upk.load()
         self._mm = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)
 
     @contextlib.contextmanager