diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 957b9561f9..0b4b2ec32b 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -572,6 +572,40 @@ def test_create_hardlink_outside_sandbox(self, sandbox: FileSystem): assert not os.path.lexists(output_path) assert sandbox.problems + @pytest.mark.parametrize("path", [Path("ok-path"), Path("../outside-path")]) + def test_open(self, path: Path, sandbox: FileSystem): + # can perform normal file operations + with sandbox.open(path) as f: + f.seek(100) + f.write(b"text") + assert f.tell() == 104 + f.seek(102) + assert f.read(3) == b"xt" + + # and it is also persisted + with sandbox.open(path, "rb+") as f: + assert f.read() == bytes(100) + b"text" + + def test_open_no_path_traversal(self, sandbox: FileSystem): + path = Path("file") + with sandbox.open(path) as f: + f.write(b"content") + + assert (sandbox.root / path).read_bytes() == b"content" + assert sandbox.problems == [] + + def test_open_outside_sandbox(self, sandbox: FileSystem): + path = Path("../file") + with sandbox.open(path) as f: + f.write(b"content") + + assert not (sandbox.root / path).exists() + assert sandbox.problems + # the open is redirected to a lost+found directory, as path traversal is most probably a handler problem + # and the extraction could be successful on real hw/fw, we just do not know where to extract + real_out_path = ".unblob-lost+found/_e90583b491d2138aab0c8a12478ee050701910fd80c84289ae747e7c/file" + assert (sandbox.root / real_out_path).read_bytes() == b"content" + @pytest.mark.parametrize( "input_path, expected_path", diff --git a/unblob/file_utils.py b/unblob/file_utils.py index 1d88831344..34ef472d93 100644 --- a/unblob/file_utils.py +++ b/unblob/file_utils.py @@ -10,7 +10,7 @@ import struct import unicodedata from pathlib import Path -from typing import Iterable, Iterator, List, Optional, Tuple, Union +from typing import Iterable, Iterator, List, Literal, Optional, Tuple, Union from dissect.cstruct import Instance, cstruct from structlog import get_logger @@ -590,3 +590,16 @@ def create_hardlink(self, src: Path, dst: Path): "Not enough privileges to create hardlink to block/char device." ) self.record_problem(safe_link.format_report(not_enough_privileges)) + + def open( # noqa: A003 + self, path, mode: Literal["wb+", "rb+", "xb+"] = "wb+" + ) -> io.BufferedRandom: + """Create/open binary file for random access read-writing. + + There is no intention in supporting anything other than binary files opened for random access. + """ + logger.debug("create/open binary file for writing", file_path=path) + safe_path = self._get_extraction_path(path, "open") + + self._ensure_parent_dir(safe_path) + return safe_path.open(mode) diff --git a/vulture_whitelist.py b/vulture_whitelist.py index e32864e38b..ba6782a9e0 100644 --- a/vulture_whitelist.py +++ b/vulture_whitelist.py @@ -5,7 +5,7 @@ import unblob.plugins from unblob import cli -from unblob.file_utils import File, iterbits, round_down +from unblob.file_utils import File, FileSystem, iterbits, round_down from unblob.handlers.compression.lzo import HeaderFlags as LZOHeaderFlags from unblob.models import SingleFile, TaskResult, _JSONEncoder from unblob.parser import _HexStringToRegex @@ -32,6 +32,7 @@ unblob.plugins.hookimpl File.from_bytes +FileSystem.open iterbits round_down