Skip to content

Commit

Permalink
Introducing Type Hints
Browse files Browse the repository at this point in the history
As we are producing a binary extension via Cython, the regular source-based mechanism of type discovery is not available.
This makes using libzim a little less pleasant for those relying on types.

This adds type stubs that we will have to manually maintain.

I had to tweak a bit the wrapper to be able to expose the get_indexdata thing.
Previously, we were relying on the fact that the Item had no `get_indexdata` method to decide
whether to use the libzim auto-index feature (relied on heavily for HTML entries).

Conditional method is not really compatible with static typing.

In order to retain functionality and API, I chose to add a `get_indexdata` variable on all `Item`, set to `None`.
If this variable is `None` (no action taken), then we run the auto-index.
If it is not None but returns NULL, then no index (no auto neither)
If it is set and returns a proper IndexData, it is used.
If the variable is missing (not sub-classing `Item`), run the auto-index.

In other words, behavior stays the same but type checker have a `get_indexdata` variable to look-for.
  • Loading branch information
rgaudin committed Sep 5, 2024
1 parent dee26e4 commit 5821694
Show file tree
Hide file tree
Showing 13 changed files with 281 additions and 18 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added

- Windows (x64) support (#91)
- Type stubs (#198)

### Changed

Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,12 @@ with Creator("test.zim") as creator:
creator.add_item(item2)
```

#### Type hints

`libzim` being a binary extension, there is no Python source to provide types information. We provide them as type stub files. When using `pyright`, you would normally receive a warning when importing from `libzim` as there could be discrepencies between actual sources and the (manually crafted) stub files.

You can disable the warning via `reportMissingModuleSource = "none"`.

## Building

`libzim` package building offers different behaviors via environment variables
Expand Down
7 changes: 7 additions & 0 deletions libzim/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from libzim import (
reader, # noqa: F401 # pyright: ignore[reportUnusedImport]
search, # noqa: F401 # pyright: ignore[reportUnusedImport]
suggestion, # noqa: F401 # pyright: ignore[reportUnusedImport]
version, # noqa: F401 # pyright: ignore[reportUnusedImport]
writer, # noqa: F401 # pyright: ignore[reportUnusedImport]
)
4 changes: 4 additions & 0 deletions libzim/libwrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,13 @@ WriterItemWrapper::getContentProvider() const
std::shared_ptr<zim::writer::IndexData>
WriterItemWrapper::getIndexData() const
{
// Item without method defined (should not happen on proper subclass)
if (!obj_has_attribute(m_obj, "get_indexdata")) {
return zim::writer::Item::getIndexData();
}
if (method_is_none(m_obj, "get_indexdata")) {
return zim::writer::Item::getIndexData();
}
return callMethodOnObj<std::shared_ptr<zim::writer::IndexData>>(m_obj, "get_indexdata");
}

Expand Down
9 changes: 8 additions & 1 deletion libzim/libzim.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,12 @@ cdef object call_method(object obj, string method):
# object to the correct cpp type.
# Will be used by cpp side to call python method.
cdef public api:

# this tells whether a method/property is none or not
bool method_is_none(object obj, string method) with gil:
func = getattr(obj, method.decode('UTF-8'))
return func is None

bool obj_has_attribute(object obj, string attribute) with gil:
"""Check if a object has a given attribute"""
return hasattr(obj, attribute.decode('UTF-8'))
Expand Down Expand Up @@ -537,6 +543,7 @@ class BaseWritingItem:

def __init__(self):
self._blob = None
get_indexdata = None

def get_path(self) -> str:
"""Full path of item"""
Expand Down Expand Up @@ -567,7 +574,7 @@ class BaseWritingItem:

class Creator(_Creator):
__module__ = writer_module_name
def config_compression(self, compression: Compression):
def config_compression(self, compression: Union[Compression, str]):
if not isinstance(compression, Compression):
compression = getattr(Compression, compression.lower())
return super().config_compression(compression)
Expand Down
79 changes: 79 additions & 0 deletions libzim/reader.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from __future__ import annotations

import pathlib
from uuid import UUID

class Item:
@property
def title(self) -> str: ...
@property
def path(self) -> str: ...
@property
def content(self) -> memoryview: ...
@property
def mimetype(self) -> str: ...
@property
def _index(self) -> int: ...
@property
def size(self) -> int: ...
def __repr__(self) -> str: ...

class Entry:
@property
def title(self) -> str: ...
@property
def path(self) -> str: ...
@property
def _index(self) -> int: ...
@property
def is_redirect(self) -> bool: ...
def get_redirect_entry(self) -> Entry: ...
def get_item(self) -> Item: ...
def __repr__(self) -> str: ...

class Archive:
def __init__(self, filename: pathlib.Path) -> None: ...
@property
def filename(self) -> pathlib.Path: ...
@property
def filesize(self) -> int: ...
def has_entry_by_path(self, path: str) -> bool: ...
def get_entry_by_path(self, path: str) -> Entry: ...
def has_entry_by_title(self, title: str) -> bool: ...
def get_entry_by_title(self, title: str) -> Entry: ...
@property
def metadata_keys(self) -> list[str]: ...
def get_metadata_item(self, name: str) -> Item: ...
def get_metadata(self, name: str) -> bytes: ...
def _get_entry_by_id(self, entry_id: int) -> Entry: ...
@property
def has_main_entry(self) -> bool: ...
@property
def main_entry(self) -> Entry: ...
@property
def uuid(self) -> UUID: ...
@property
def has_new_namespace_scheme(self) -> bool: ...
@property
def is_multipart(self) -> bool: ...
@property
def has_fulltext_index(self) -> bool: ...
@property
def has_title_index(self) -> bool: ...
@property
def has_checksum(self) -> str: ...
@property
def checksum(self) -> str: ...
def check(self) -> bool: ...
@property
def entry_count(self) -> int: ...
@property
def all_entry_count(self) -> int: ...
@property
def article_count(self) -> int: ...
@property
def media_count(self) -> int: ...
def get_illustration_sizes(self) -> set[int]: ...
def has_illustration(self, size: int | None = None) -> bool: ...
def get_illustration_item(self, size: int | None = None) -> Item: ...
def __repr__(self) -> str: ...
20 changes: 20 additions & 0 deletions libzim/search.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from collections.abc import Iterator
from typing import Self

from libzim.reader import Archive

class Query:
def set_query(self, query: str) -> Self: ...

class SearchResultSet:
def __iter__(self) -> Iterator[str]: ...

class Search:
def getEstimatedMatches(self) -> int: ... # noqa: N802
def getResults(self, start: int, count: int) -> SearchResultSet: ... # noqa: N802

class Searcher:
def __init__(self, archive: Archive) -> None: ...
def search(self, query: Query) -> Search: ...
18 changes: 18 additions & 0 deletions libzim/suggestion.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from __future__ import annotations

from collections.abc import Iterator

from libzim.reader import Archive

class SuggestionResultSet:
def __iter__(self) -> Iterator[str]: ...

class SuggestionSearch:
def getEstimatedMatches(self) -> int: ... # noqa: N802
def getResults( # noqa: N802
self, start: int, count: int
) -> SuggestionResultSet: ...

class SuggestionSearcher:
def __init__(self, archive: Archive) -> None: ...
def suggest(self, query: str) -> SuggestionSearch: ...
9 changes: 9 additions & 0 deletions libzim/version.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

import sys
from collections import OrderedDict
from typing import TextIO

def print_versions(out: TextIO = sys.stdout) -> None: ...
def get_versions() -> OrderedDict[str, str]: ...
def get_libzim_version() -> str: ...
97 changes: 97 additions & 0 deletions libzim/writer.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from __future__ import annotations

import datetime
import enum
import pathlib
import types
from collections.abc import Callable, Generator
from typing import Self

class Compression(enum.Enum):
none: Self
zstd: Self

class Hint(enum.Enum):
COMPRESS: Self
FRONT_ARTICLE: Self

class Blob:
def __init__(self, content: str | bytes) -> None: ...
def size(self) -> int: ...
ref_content: bytes

class ContentProvider:
def feed(self) -> Blob: ...
def get_size(self) -> int: ...
def gen_blob(self) -> Generator[Blob, None, None]: ...

generator: Generator[Blob, None, None]

class StringProvider(ContentProvider):
def __init__(self, content: str | bytes) -> None: ...

class FileProvider(ContentProvider):
def __init__(self, filepath: pathlib.Path | str) -> None: ...

class Item:
def get_path(self) -> str: ...
def get_title(self) -> str: ...
def get_mimetype(self) -> str: ...
def get_contentprovider(self) -> ContentProvider: ...
def get_hints(self) -> dict[Hint, int]: ...
def __repr__(self) -> str: ...

get_indexdata: Callable[[], IndexData | None] | None
_blob: Blob

class IndexData:
def has_indexdata(self) -> bool: ...
def get_title(self) -> str: ...
def get_content(self) -> str: ...
def get_keywords(self) -> str: ...
def get_wordcount(self) -> int: ...
def get_geoposition(self) -> tuple[float, float] | None: ...

class Creator:
def __init__(self, filename: pathlib.Path) -> None: ...
def config_verbose(self, verbose: bool) -> Self: ...
def config_compression(self, compression: Compression | str) -> Self: ...
def config_clustersize(self, size: int) -> Self: ...
def config_indexing(self, indexing: bool, language: str) -> Self: ...
def config_nbworkers(self, nbWorkers: int) -> Self: ... # noqa: N803
def set_mainpath(self, mainPath: str) -> Self: ... # noqa: N803
def add_illustration(self, size: int, content: bytes) -> None: ...
def add_item(self, writer_item: Item) -> None: ...
def add_metadata(
self,
name: str,
content: str | bytes | datetime.date | datetime.datetime,
mimetype: str = "text/plain;charset=UTF-8",
) -> None: ...
def add_redirection(
self,
path: str,
title: str,
targetPath: str, # noqa: N803
hints: dict[Hint, int],
) -> None: ...
def add_alias(
self,
path: str,
title: str,
targetPath: str, # noqa: N803
hints: dict[Hint, int],
) -> None: ...
def __enter__(self) -> Self: ...
def __exit__(
self,
exc_type: type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: types.TracebackType | None,
) -> None: ...
@property
def filename(self) -> pathlib.Path: ...
def __repr__(self) -> str: ...

_filename: pathlib.Path
_started: bool
Empty file added py.typed
Empty file.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ packages = [ "libzim" ]

[tool.setuptools.package-data]
libzim = [
"py.typed",
"*.pyi",
"libzim.9.dylib",
"libzim.so.9",
"zim-9.dll",
Expand Down Expand Up @@ -313,7 +315,7 @@ exclude_lines = [

[tool.pyright]
include = ["libzim", "tests", "tasks.py"]
exclude = [".env/**", ".venv/**", "libzim/libzim.pyi"]
exclude = [".env/**", ".venv/**"]
pythonVersion = "3.12"
typeCheckingMode="basic"
disableBytesTypePromotions = true
Loading

0 comments on commit 5821694

Please sign in to comment.