diff --git a/docs/development.md b/docs/development.md index 178100f81c..e6ace9958e 100644 --- a/docs/development.md +++ b/docs/development.md @@ -515,7 +515,7 @@ class Extractor(abc.ABC): return [] @abc.abstractmethod - def extract(self, inpath: Path, outdir: Path): + def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]: """Extract the carved out chunk. Raises ExtractError on failure.""" ``` @@ -526,6 +526,15 @@ Two methods are exposed by this class: - `extract()`: you must override this function. This is where you'll perform the extraction of `inpath` content into `outdir` extraction directory +!!! Recommendation + + Although it is possible to implement `extract()` with path manipulations, + checks for path traversals, and performing io by using Python libraries + (`os`, `pathlib.Path`), but it turns out somewhat tedious. + Instead we recommend to remove boilerplate and use a helper class `FileSystem` from + [unblob/file_utils.py](https://github.com/onekey-sec/unblob/blob/main/unblob/file_utils.py) + which ensures that all file objects are created under its root. + ### DirectoryExtractor class The `DirectoryExtractor` interface is defined in @@ -538,7 +547,7 @@ class DirectoryExtractor(abc.ABC): return [] @abc.abstractmethod - def extract(self, paths: List[Path], outdir: Path): + def extract(self, paths: List[Path], outdir: Path) -> Optional[ExtractResult]: """Extract from a multi file path list. Raises ExtractError on failure. @@ -552,6 +561,11 @@ Two methods are exposed by this class: - `extract()`: you must override this function. This is where you'll perform the extraction of `paths` files into `outdir` extraction directory +!!! Recommendation + + Similarly to `Extractor`, it is recommended to use the `FileSystem` helper class to + implement `extract`. + ### Example Extractor Extractors are quite complex beasts, so rather than trying to come up with a