From 71a93bd1829f6d0a1c2e63f7eb6d30a4ff412b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:02:12 +0200 Subject: [PATCH 1/6] refact: move python submodule initialization to rust submodule --- benches/benches_main.rs | 2 +- src/lib.rs | 12 ++---------- src/{math.rs => math_tools.rs} | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 11 deletions(-) rename src/{math.rs => math_tools.rs} (55%) diff --git a/benches/benches_main.rs b/benches/benches_main.rs index ddd11b6..626b1d2 100755 --- a/benches/benches_main.rs +++ b/benches/benches_main.rs @@ -19,7 +19,7 @@ fn shannon_entropy(c: &mut Criterion) { BenchmarkId::from_parameter(sample_size), &sample_size, |b, &size| { - b.iter(|| unblob_native::math::shannon_entropy(&sample[0..size])); + b.iter(|| unblob_native::math_tools::shannon_entropy(&sample[0..size])); }, ); } diff --git a/src/lib.rs b/src/lib.rs index a1de8a2..14f0ef4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,11 @@ -pub mod math; +pub mod math_tools; use pyo3::prelude::*; -/// Calculates Shannon entropy of data -#[pyfunction(text_signature = "(data)")] -pub fn shannon_entropy(py: Python, data: &[u8]) -> PyResult { - py.allow_threads(|| Ok(math::shannon_entropy(data))) -} - /// Performance-critical functionality #[pymodule] fn _native(py: Python, m: &PyModule) -> PyResult<()> { - let math_module = PyModule::new(py, "math_tools")?; - math_module.add_function(wrap_pyfunction!(shannon_entropy, math_module)?)?; + math_tools::init_module(py, m)?; - m.add_submodule(math_module)?; Ok(()) } diff --git a/src/math.rs b/src/math_tools.rs similarity index 55% rename from src/math.rs rename to src/math_tools.rs index 221ba0c..826adf0 100644 --- a/src/math.rs +++ b/src/math_tools.rs @@ -1,3 +1,5 @@ +use pyo3::prelude::*; + pub fn shannon_entropy(data: &[u8]) -> f64 { let mut entropy = 0.0; let mut counts = [0; 256]; @@ -17,6 +19,20 @@ pub fn shannon_entropy(data: &[u8]) -> f64 { entropy } +/// Calculates Shannon entropy of data +#[pyfunction(text_signature = "(data)", name = "shannon_entropy")] +pub fn py_shannon_entropy(py: Python, data: &[u8]) -> PyResult { + py.allow_threads(|| Ok(shannon_entropy(data))) +} + +pub fn init_module(py: Python, root_module: &PyModule) -> PyResult<()> { + let module = PyModule::new(py, "math_tools")?; + module.add_function(wrap_pyfunction!(py_shannon_entropy, module)?)?; + + root_module.add_submodule(module)?; + + Ok(()) +} #[cfg(test)] mod tests { From 590617c66ca991a93eb949c23264a1103b3989f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:03:06 +0200 Subject: [PATCH 2/6] chore: allow importing submodule from python This code makes it possible to import math as a Python submodule. Extension modules are not packages, so their submodules are not affected by the usual rules from import machinery. --- src/math_tools.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/math_tools.rs b/src/math_tools.rs index 826adf0..7368c2a 100644 --- a/src/math_tools.rs +++ b/src/math_tools.rs @@ -31,6 +31,10 @@ pub fn init_module(py: Python, root_module: &PyModule) -> PyResult<()> { root_module.add_submodule(module)?; + py.import("sys")? + .getattr("modules")? + .set_item("unblob_native.math", module)?; + Ok(()) } From 9bacee3484252aec5e79cd6ba9fb336d3e9af492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:11:09 +0200 Subject: [PATCH 3/6] chore: signature is no longer needed to provide named arguments --- src/math_tools.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/math_tools.rs b/src/math_tools.rs index 7368c2a..1597444 100644 --- a/src/math_tools.rs +++ b/src/math_tools.rs @@ -20,7 +20,7 @@ pub fn shannon_entropy(data: &[u8]) -> f64 { entropy } /// Calculates Shannon entropy of data -#[pyfunction(text_signature = "(data)", name = "shannon_entropy")] +#[pyfunction(name = "shannon_entropy")] pub fn py_shannon_entropy(py: Python, data: &[u8]) -> PyResult { py.allow_threads(|| Ok(shannon_entropy(data))) } From ba264e4dc1ad66e59aefde197524e91c7c2b9aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Fri, 2 Jun 2023 19:37:08 +0200 Subject: [PATCH 4/6] chore: verbose build output in CI The default output doesn't contain enough information to diagnose issues. --- .github/workflows/CI.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 8a68980..afc2c35 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -86,7 +86,7 @@ jobs: key: pytest-${{ matrix.os }} - name: Install dependencies run: | - pdm sync -d + pdm sync -v -d - name: Run Tests run: | pdm pytest @@ -109,7 +109,7 @@ jobs: - uses: actions-rust-lang/setup-rust-toolchain@v1 - name: Install dependencies run: | - pdm sync -d + pdm sync -v -d - name: Type-Check run: | pdm pyright @@ -156,7 +156,7 @@ jobs: with: target: ${{ matrix.target }} container: ${{ env.CONTAINER }} - args: --release --out dist + args: --verbose --release --out dist sccache: ${{ matrix.target == 'musllinux_1_1' }} manylinux: auto docker-options: -e CARGO_NET_GIT_FETCH_WITH_CLI=true @@ -171,7 +171,7 @@ jobs: cd /usr/src curl -sSL https://raw.githubusercontent.com/pdm-project/pdm/main/install-pdm.py | python3.10 - export PATH=/root/.local/bin:$PATH - pdm sync -d --no-self -G test + pdm sync -v -d --no-self -G test pdm run python -m ensurepip pdm run python -m pip install dist/*.whl pdm pytest @@ -196,7 +196,7 @@ jobs: uses: PyO3/maturin-action@v1 with: target: ${{ matrix.target }} - args: --release --out dist + args: --verbose --release --out dist sccache: "true" - name: Upload wheels uses: actions/upload-artifact@v3 @@ -208,7 +208,7 @@ jobs: - name: Test wheels if: ${{ matrix.target == 'x86_64' }} run: | - pdm sync -d --no-self -G test + pdm sync -v -d --no-self -G test pdm run python -m ensurepip pdm run python -m pip install dist/*.whl pdm pytest From d04c2e48d16e499653bd48badbebd72001132b5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Vask=C3=B3?= <1771332+vlaci@users.noreply.github.com> Date: Fri, 2 Jun 2023 15:23:05 +0200 Subject: [PATCH 5/6] feat: add landlock based access restriction functionality Landlock is a kernel API for unprivileged access control. We take advantage of it to limit where unblob can write to and read from on the filesystem. This is a Linux-only feature that won't be enabled on OSX. For more information, see https://docs.kernel.org/userspace-api/landlock.html We use Landlock ABI version 2 since it introduced the LANDLOCK_ACCESS_FS_REFER permission that's required to create hardlinks. Co-authored-by: Quentin Kaiser --- Cargo.lock | 71 +++++++++++++++++ Cargo.toml | 5 ++ pyproject.toml | 1 + python/unblob_native/_native/__init__.pyi | 3 - .../{_native => }/math_tools.pyi | 0 python/unblob_native/sandbox.pyi | 20 +++++ src/lib.rs | 4 + src/sandbox/linux.rs | 74 ++++++++++++++++++ src/sandbox/mod.rs | 77 +++++++++++++++++++ src/sandbox/unsupported.rs | 9 +++ tests/test_sandbox.py | 65 ++++++++++++++++ 11 files changed, 326 insertions(+), 3 deletions(-) delete mode 100644 python/unblob_native/_native/__init__.pyi rename python/unblob_native/{_native => }/math_tools.pyi (100%) create mode 100644 python/unblob_native/sandbox.pyi create mode 100644 src/sandbox/linux.rs create mode 100644 src/sandbox/mod.rs create mode 100644 src/sandbox/unsupported.rs create mode 100644 tests/test_sandbox.py diff --git a/Cargo.lock b/Cargo.lock index 2035b28..cb7689b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + [[package]] name = "atty" version = "0.2.14" @@ -178,6 +184,26 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +[[package]] +name = "enumflags2" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c041f5090df68b32bcd905365fd51769c8b9d553fe87fde0b683534f10c01bd2" +dependencies = [ + "enumflags2_derive", +] + +[[package]] +name = "enumflags2_derive" +version = "0.7.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e9a1f9f7d83e59740248a6e14ecf93929ade55027844dfcea78beafccc15745" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.25", +] + [[package]] name = "getrandom" version = "0.2.10" @@ -240,6 +266,17 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "landlock" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520baa32708c4e957d2fc3a186bc5bd8d26637c33137f399ddfc202adb240068" +dependencies = [ + "enumflags2", + "libc", + "thiserror", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -426,6 +463,17 @@ dependencies = [ "pyo3-build-config", ] +[[package]] +name = "pyo3-log" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9c8b57fe71fb5dcf38970ebedc2b1531cf1c14b1b9b4c560a182a57e115575c" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + [[package]] name = "pyo3-macros" version = "0.18.3" @@ -650,6 +698,26 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thiserror" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.25", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -666,7 +734,10 @@ version = "0.1.1" dependencies = [ "approx", "criterion", + "landlock", + "log", "pyo3", + "pyo3-log", "rand", ] diff --git a/Cargo.toml b/Cargo.toml index 0390c58..c82c9ff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,12 @@ crate-type = [ ] [dependencies] +log = "0.4.18" pyo3 = "0.18.3" +pyo3-log = "0.8.1" + +[target.'cfg(target_os = "linux")'.dependencies] +landlock = "0.2.0" [dev-dependencies] approx = "0.5.0" diff --git a/pyproject.toml b/pyproject.toml index 014a49a..f1fabb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,6 +93,7 @@ ignore = [ "D203", # one-blank-line-before-class: D211 (no-blank-line-before-class) is used instead "D213", # multi-line-summary-second-line: D212 (multi-line-summary-first-line) is used instead "E501", # line-too-long: Let black handle line length violations + "UP007", # non-pep604-annotation: Python 3.8 support needs legacy annotations ] [tool.ruff.per-file-ignores] diff --git a/python/unblob_native/_native/__init__.pyi b/python/unblob_native/_native/__init__.pyi deleted file mode 100644 index 023b129..0000000 --- a/python/unblob_native/_native/__init__.pyi +++ /dev/null @@ -1,3 +0,0 @@ -from . import math_tools as math_tools - -__all__ = ["math_tools"] diff --git a/python/unblob_native/_native/math_tools.pyi b/python/unblob_native/math_tools.pyi similarity index 100% rename from python/unblob_native/_native/math_tools.pyi rename to python/unblob_native/math_tools.pyi diff --git a/python/unblob_native/sandbox.pyi b/python/unblob_native/sandbox.pyi new file mode 100644 index 0000000..2db1175 --- /dev/null +++ b/python/unblob_native/sandbox.pyi @@ -0,0 +1,20 @@ +import os +import typing + +import typing_extensions + +_Path: typing_extensions.TypeAlias = typing.Union[os.PathLike, str] + +class AccessFS: + @staticmethod + def read(access_dir: _Path) -> AccessFS: ... + @staticmethod + def read_write(access_dir: _Path) -> AccessFS: ... + @staticmethod + def make_reg(access_dir: _Path) -> AccessFS: ... + @staticmethod + def make_dir(access_dir: _Path) -> AccessFS: ... + +def restrict_access(*args: AccessFS) -> None: ... + +class SandboxError(Exception): ... diff --git a/src/lib.rs b/src/lib.rs index 14f0ef4..d9e70f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ pub mod math_tools; +pub mod sandbox; use pyo3::prelude::*; @@ -6,6 +7,9 @@ use pyo3::prelude::*; #[pymodule] fn _native(py: Python, m: &PyModule) -> PyResult<()> { math_tools::init_module(py, m)?; + sandbox::init_module(py, m)?; + + pyo3_log::init(); Ok(()) } diff --git a/src/sandbox/linux.rs b/src/sandbox/linux.rs new file mode 100644 index 0000000..75d6182 --- /dev/null +++ b/src/sandbox/linux.rs @@ -0,0 +1,74 @@ +use landlock::{ + path_beneath_rules, Access, AccessFs, Ruleset, RulesetAttr, RulesetCreatedAttr, ABI, +}; +use log; + +use std::path::Path; + +use crate::sandbox::AccessFS; + +impl AccessFS { + fn read(&self) -> Option<&Path> { + if let Self::Read(path) = self { + Some(path) + } else { + None + } + } + + fn read_write(&self) -> Option<&Path> { + if let Self::ReadWrite(path) = self { + Some(path) + } else { + None + } + } + + fn make_reg(&self) -> Option<&Path> { + if let Self::MakeReg(path) = self { + Some(path) + } else { + None + } + } + + fn make_dir(&self) -> Option<&Path> { + if let Self::MakeDir(path) = self { + Some(path) + } else { + None + } + } +} + +pub fn restrict_access(access_rules: &[AccessFS]) -> Result<(), Box> { + let abi = ABI::V2; + + let read_only: Vec<&Path> = access_rules.iter().filter_map(AccessFS::read).collect(); + + let read_write: Vec<&Path> = access_rules + .iter() + .filter_map(AccessFS::read_write) + .collect(); + + let create_file: Vec<&Path> = access_rules.iter().filter_map(AccessFS::make_reg).collect(); + + let create_directory: Vec<&Path> = access_rules.iter().filter_map(AccessFS::make_dir).collect(); + + let status = Ruleset::new() + .handle_access(AccessFs::from_all(abi))? + .create()? + .add_rules(path_beneath_rules(read_write, AccessFs::from_all(abi)))? + .add_rules(path_beneath_rules(create_file, AccessFs::MakeReg))? + .add_rules(path_beneath_rules(create_directory, AccessFs::MakeDir))? + .add_rules(path_beneath_rules(read_only, AccessFs::from_read(abi)))? + .restrict_self()?; + + log::info!( + "Activated FS access restrictions; rules={:?}, status={:?}", + access_rules, + status.ruleset + ); + + Ok(()) +} diff --git a/src/sandbox/mod.rs b/src/sandbox/mod.rs new file mode 100644 index 0000000..4bbedb8 --- /dev/null +++ b/src/sandbox/mod.rs @@ -0,0 +1,77 @@ +#[cfg_attr(target_os = "linux", path = "linux.rs")] +#[cfg_attr(not(target_os = "linux"), path = "unsupported.rs")] +mod sandbox_impl; + +use pyo3::{create_exception, exceptions::PyException, prelude::*, types::PyTuple}; +use std::path::PathBuf; + +#[derive(Clone, Debug)] +pub enum AccessFS { + Read(PathBuf), + ReadWrite(PathBuf), + MakeReg(PathBuf), + MakeDir(PathBuf), +} + +/// Enforces access restrictions +#[pyfunction(name = "restrict_access", signature=(*rules))] +fn py_restrict_access(rules: &PyTuple) -> PyResult<()> { + sandbox_impl::restrict_access( + &rules + .iter() + .map(|r| Ok(r.extract::()?.access)) + .collect::>>()?, + ) + .map_err(|err| SandboxError::new_err(err.to_string())) +} + +create_exception!(unblob_native.sandbox, SandboxError, PyException); + +#[pyclass(name = "AccessFS", module = "unblob_native.sandbox")] +#[derive(Clone)] +struct PyAccessFS { + access: AccessFS, +} + +impl PyAccessFS { + fn new(access: AccessFS) -> Self { + Self { access } + } +} + +#[pymethods] +impl PyAccessFS { + #[staticmethod] + fn read(dir: PathBuf) -> Self { + Self::new(AccessFS::Read(dir)) + } + + #[staticmethod] + fn read_write(dir: PathBuf) -> Self { + Self::new(AccessFS::ReadWrite(dir)) + } + + #[staticmethod] + fn make_reg(dir: PathBuf) -> Self { + Self::new(AccessFS::MakeReg(dir)) + } + + #[staticmethod] + fn make_dir(dir: PathBuf) -> Self { + Self::new(AccessFS::MakeDir(dir)) + } +} + +pub fn init_module(py: Python, root_module: &PyModule) -> PyResult<()> { + let module = PyModule::new(py, "sandbox")?; + module.add_function(wrap_pyfunction!(py_restrict_access, module)?)?; + module.add_class::()?; + module.add("SandboxError", py.get_type::())?; + + root_module.add_submodule(module)?; + py.import("sys")? + .getattr("modules")? + .set_item("unblob_native.sandbox", module)?; + + Ok(()) +} diff --git a/src/sandbox/unsupported.rs b/src/sandbox/unsupported.rs new file mode 100644 index 0000000..2b7d6c7 --- /dev/null +++ b/src/sandbox/unsupported.rs @@ -0,0 +1,9 @@ +use log; + +use crate::sandbox::AccessFS; + +pub fn restrict_access(_access_rules: &[AccessFS]) -> Result<(), Box> { + log::warn!("Sandboxing FS access is unavailable on this system"); + + Ok(()) +} diff --git a/tests/test_sandbox.py b/tests/test_sandbox.py new file mode 100644 index 0000000..3b2f4ab --- /dev/null +++ b/tests/test_sandbox.py @@ -0,0 +1,65 @@ +import platform +from pathlib import Path + +import pytest + +from unblob_native.sandbox import AccessFS, SandboxError, restrict_access + +FILE_CONTENT = b"HELLO" + + +@pytest.mark.skipif(platform.system() == "Linux", reason="Linux is supported.") +def test_unsupported_platform(): + with pytest.raises(SandboxError): + restrict_access(AccessFS.read("/")) + + +@pytest.fixture(scope="session") +def sandbox_path(tmp_path_factory: pytest.TempPathFactory) -> Path: + sandbox_path = tmp_path_factory.mktemp("sandbox") + + file_path = sandbox_path / "file.txt" + dir_path = sandbox_path / "dir" + link_path = sandbox_path / "link" + + with file_path.open("wb") as f: + assert f.write(FILE_CONTENT) == len(FILE_CONTENT) + + dir_path.mkdir() + link_path.symlink_to(file_path) + + return sandbox_path + + +@pytest.mark.skipif( + platform.system() != "Linux" or platform.machine() != "x86_64", + reason="Only supported on Linux x86-64.", +) +def test_read_sandboxing(request: pytest.FixtureRequest, sandbox_path: Path): + restrict_access( + AccessFS.read("/"), + AccessFS.read(sandbox_path), + # allow pytest caching, coverage, etc... + AccessFS.read_write(request.config.rootpath), + ) + + with pytest.raises(PermissionError): + (sandbox_path / "some-dir").mkdir() + + with pytest.raises(PermissionError): + (sandbox_path / "some-file").touch() + + with pytest.raises(PermissionError): + (sandbox_path / "some-link").symlink_to("file.txt") + + for path in sandbox_path.rglob("**/*"): + if path.is_file() or path.is_symlink(): + with path.open("rb") as f: + assert f.read() == FILE_CONTENT + with pytest.raises(PermissionError): + assert path.open("r+") + with pytest.raises(PermissionError): + assert path.unlink() + elif path.is_dir(): + with pytest.raises(PermissionError): + path.rmdir() From e2763efe6169a1f79c33a3ed1844a6b552e5de59 Mon Sep 17 00:00:00 2001 From: Quentin Kaiser Date: Thu, 11 Jan 2024 16:06:47 +0100 Subject: [PATCH 6/6] fix: raise SandboxError exception if unsupported platform --- src/sandbox/unsupported.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sandbox/unsupported.rs b/src/sandbox/unsupported.rs index 2b7d6c7..55a148a 100644 --- a/src/sandbox/unsupported.rs +++ b/src/sandbox/unsupported.rs @@ -1,9 +1,5 @@ -use log; - use crate::sandbox::AccessFS; pub fn restrict_access(_access_rules: &[AccessFS]) -> Result<(), Box> { - log::warn!("Sandboxing FS access is unavailable on this system"); - - Ok(()) + Err("Sandboxing FS access is unavailable on this system")? }