diff --git a/.gitignore b/.gitignore index 16bfbf7..3cf0755 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,14 @@ # IDE dirs .idea/ *.komodoproject +.vscode/ # Auto-generated *.pyc dist/ build/ .cache/ +.pytest_cache/ # generated by coverage .coverage diff --git a/CHANGELOG.md b/CHANGELOG.md index 8cd7bc2..9ff53be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,12 @@ # Change log ## Version 3.0.0 -- Dropped support for Python 2.6, 3.2, and 3.3 -- Add support for Python 3.5 and 3.6 +- Breaking: + - `search(begin, end, strict)` has been replaced with `at(point)`, `overlap(begin, end)`, and `envelop(begin, end)` + - `extend(items)` has been deleted, use `update(items)` instead + - Methods that take a `strict=True/False` argument now consistently default to `strict=True` + - Dropped support for Python 2.6, 3.2, and 3.3 + - Add support for Python 3.5, 3.6, and 3.7 - Faster `Interval` overlap checking (@tuxzz, #56) - Updated README: - new restructuring methods from 2.1.0 @@ -13,10 +17,12 @@ - `Interval.distance_to()` gave an incorrect distance when passed the `Interval`'s upper boundary - `Node.pop_greatest_child()` sometimes forgot to `rotate()` when creating new child nodes. (@escalonn, #41, #42) - `IntervalTree.begin()` and `end()` are O(1), not O(n). (@ProgVal, #40) + - `intersection_update()` and `symmetric_difference()` and `symmetric_difference_update()` didn't actually work. Now they do. + - `collections.abc` deprecation warning no longer happens - Maintainers: - - use github.com/kennethreitz/pyandoc + - PyPi accepts Markdown! Woohoo! - reorganize tests - - more tests added to improve code coverage (We're at 95%! Woohoo!) + - more tests added to improve code coverage (We're at 96%! Yay!) - test for issue #4 had a broken import reference ## Version 2.1.0 diff --git a/Makefile b/Makefile index 6051dcc..3802ca2 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,7 @@ TEMPS=$(shell \ -o \( -type d -name '__pycache__' \) \ ) -PYTHONS:=2.7.14 3.4.3 3.5.4 3.6.3 +PYTHONS:=2.7.15 3.4.9 3.5.6 3.6.7 3.7.1 PYTHON_MAJORS:=$(shell \ echo "$(PYTHONS)" | \ tr ' ' '\n' | cut -d. -f1 | \ @@ -28,9 +28,9 @@ PYPI=pypitest # default target all: test -test: pytest rst +test: pytest -quicktest: rst +quicktest: PYPI=$(PYPI) python setup.py test coverage: @@ -43,7 +43,7 @@ pytest: deps-dev clean: clean-build clean-eggs clean-temps -distclean: clean clean-deps +distclean: clean clean-build: rm -rf dist build @@ -51,10 +51,6 @@ clean-build: clean-eggs: rm -rf *.egg* .eggs/ -clean-deps: - rm -rf pyandoc docutils bin - rm -f pandoc - clean-temps: rm -rf $(TEMPS) @@ -70,13 +66,8 @@ install-develop: uninstall: pip uninstall intervaltree -# Convert README to rst and check the result -rst: pydocutils pyandoc - PYPI=$(PYPI) python setup.py check --restructuredtext - @echo "README is ready for PyPI" - # Register at PyPI -register: rst +register: PYPI=$(PYPI) python setup.py register -r $(PYPI) # Setup for live upload @@ -87,35 +78,17 @@ release: sdist-upload: PYPI=$(PYPI) python setup.py sdist upload -r $(PYPI) -deps-ci: pyandoc - -deps-dev: pyandoc pyenv-install-versions - -pyandoc: pandoc-bin - [[ -d pyandoc/pandoc ]] || git clone --depth=50 git://github.com/kennethreitz/pyandoc.git - [[ "`readlink pandoc`" == "pyandoc/pandoc" ]] || ln -s pyandoc/pandoc pandoc +deps-dev: pyenv-install-versions -pandoc-bin: pm-update - pandoc -h &>/dev/null || brew install pandoc &>/dev/null || sudo apt-get install pandoc - -pydocutils: - $(eval PYPKG=docutils) - python -c 'import $(PYPKG)' &>/dev/null || \ - pip install --upgrade $(PYPKG) || \ - pip install --upgrade --install-options="--install-purelib='$(PWD)'" docutils - -pm-update: - pandoc -h &>/dev/null || brew update &>/dev/null || sudo apt-get update - # Uploads to test server, unless the release target was run too upload: test clean sdist-upload pyenv-is-installed: - pyenv --version || (echo "ERROR: pyenv not installed" && false) + pyenv --version &>/dev/null || (echo "ERROR: pyenv not installed" && false) pyenv-install-versions: pyenv-is-installed for pyver in $(PYTHONS); do (echo N | pyenv install $$pyver) || true; done - for pyver in $(PYTHONS); do export PYENV_VERSION=$$pyver; pip install -U pip; pip install -U pytest; done + for pyver in $(PYTHONS); do export PYENV_VERSION=$$pyver; pip install -U pip; pip install -U pytest; done | grep -v 'Requirement already satisfied, skipping upgrade' pyenv rehash # for debugging the Makefile @@ -136,7 +109,6 @@ env: distclean \ clean-build \ clean-eggs \ - clean-deps \ clean-temps \ install-testpypi \ install-pypi \ @@ -144,15 +116,11 @@ env: pyenv-install-versions \ pyenv-is-installed \ uninstall \ - rst \ register \ release \ sdist-upload \ deps-ci \ deps-dev \ - pyandoc \ - pandoc-bin \ - pydocutils \ pm-update \ upload \ env diff --git a/README.md b/README.md index 210716b..90096d5 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,15 @@ A mutable, self-balancing interval tree for Python 2 and 3. Queries may be by po This library was designed to allow tagging text and time intervals, where the intervals include the lower bound but not the upper bound. +**Version 3 changes!** + +* The `search(begin, end, strict)` method no longer exists. Instead, use one of these: + * `at(point)` + * `overlap(begin, end)` + * `envelop(begin, end)` +* The `extend(items)` method no longer exists. Instead, use `update(items)`. +* Methods like `merge_overlaps()` which took a `strict` argument consistently default to `strict=True`. Before, some methods defaulted to `True` and others to `False`. + Installing ---------- @@ -17,7 +26,7 @@ pip install intervaltree Features -------- -* Supports Python 2.7 and Python 3.4+ (Tested under 2.7, and 3.4 thru 3.6) +* Supports Python 2.7 and Python 3.4+ (Tested under 2.7, and 3.4 thru 3.7) * Initializing * blank `tree = IntervalTree()` * from an iterable of `Interval` objects (`tree = IntervalTree(intervals)`) @@ -37,14 +46,16 @@ Features * `tree.remove_overlap(begin, end)` (removes all overlapping the range) * `tree.remove_envelop(begin, end)` (removes all enveloped in the range) -* Overlap queries +* Point queries * `tree[point]` + * `tree.at(point)` (same as previous) + +* Overlap queries * `tree[begin:end]` - * `tree.search(point)` - * `tree.search(begin, end)` + * `tree.overlap(begin, end)` (same as previous) * Envelop queries - * `tree.search(begin, end, strict=True)` + * `tree.envelop(begin, end)` * Membership queries * `interval_obj in tree` (this is fastest, O(1)) @@ -121,7 +132,7 @@ Examples >>> t = IntervalTree() >>> t IntervalTree() - + ``` * Adding intervals - any object works! @@ -130,20 +141,20 @@ Examples >>> t[1:2] = "1-2" >>> t[4:7] = (4, 7) >>> t[5:9] = {5: 9} - + ``` * Query by point The result of a query is a `set` object, so if ordering is important, you must sort it first. - + ``` python >>> sorted(t[6]) [Interval(4, 7, (4, 7)), Interval(5, 9, {5: 9})] >>> sorted(t[6])[0] Interval(4, 7, (4, 7)) - + ``` * Query by range @@ -153,15 +164,26 @@ Examples ``` python >>> sorted(t[2:4]) [] - + ``` - But: + Since our search was over `2 ≤ x < 4`, neither `Interval(1, 2)` nor `Interval(4, 7)` + was included. The first interval, `1 ≤ x < 2` does not include `x = 2`. The second + interval, `4 ≤ x < 7`, does include `x = 4`, but our search interval excludes it. So, + there were no overlapping intervals. However: ``` python >>> sorted(t[1:5]) [Interval(1, 2, '1-2'), Interval(4, 7, (4, 7))] - + + ``` + + To only return intervals that are completely enveloped by the search range: + + ``` python + >>> sorted(t.envelop(1, 5)) + [Interval(1, 2, '1-2')] + ``` * Accessing an `Interval` object @@ -174,7 +196,7 @@ Examples 7 >>> iv.data (4, 7) - + >>> begin, end, data = iv >>> begin 4 @@ -182,7 +204,7 @@ Examples 7 >>> data (4, 7) - + ``` * Constructing from lists of intervals @@ -194,40 +216,40 @@ Examples >>> t = IntervalTree( ... Interval(begin, end, "%d-%d" % (begin, end)) for begin, end in ivs ... ) - + ``` Or, if we don't need the data fields: ``` python >>> t2 = IntervalTree(Interval(*iv) for iv in ivs) - + ``` - + Or even: - + ``` python >>> t2 = IntervalTree.from_tuples(ivs) - + ``` * Removing intervals - + ``` python - >>> t.remove( Interval(1, 2, "1-2") ) + >>> t.remove(Interval(1, 2, "1-2")) >>> sorted(t) [Interval(4, 7, '4-7'), Interval(5, 9, '5-9')] - >>> t.remove( Interval(500, 1000, "Doesn't exist")) # raises ValueError + >>> t.remove(Interval(500, 1000, "Doesn't exist")) # raises ValueError Traceback (most recent call last): ValueError - + >>> t.discard(Interval(500, 1000, "Doesn't exist")) # quietly does nothing >>> del t[5] # same as t.remove_overlap(5) >>> t IntervalTree() - + ``` We could also empty a tree entirely: @@ -236,11 +258,11 @@ Examples >>> t2.clear() >>> t2 IntervalTree() - + ``` - + Or remove intervals that overlap a range: - + ``` python >>> t = IntervalTree([ ... Interval(0, 10), @@ -252,30 +274,30 @@ Examples [Interval(0, 10), Interval(10, 20)] ``` - + We can also remove only those intervals completely enveloped in a range: - + ``` python >>> t.remove_envelop(5, 20) >>> sorted(t) [Interval(0, 10)] - + ``` - + * Chopping We could also chop out parts of the tree: - + ``` python >>> t = IntervalTree([Interval(0, 10)]) >>> t.chop(3, 7) >>> sorted(t) [Interval(0, 3), Interval(7, 10)] - + ``` - + To modify the new intervals' data fields based on which side of the interval is being chopped: - + ``` python >>> def datafunc(iv, islower): ... oldlimit = iv[islower] @@ -286,23 +308,23 @@ Examples Interval(0, 3, 'oldlimit: 10, islower: True') >>> sorted(t)[1] Interval(7, 10, 'oldlimit: 0, islower: False') - + ``` * Slicing You can also slice intervals in the tree without removing them: - + ``` python >>> t = IntervalTree([Interval(0, 10), Interval(5, 15)]) >>> t.slice(3) >>> sorted(t) [Interval(0, 3), Interval(3, 10), Interval(5, 15)] - + ``` - + You can also set the data fields, for example, re-using `datafunc()` from above: - + ``` python >>> t = IntervalTree([Interval(5, 15)]) >>> t.slice(10, datafunc) @@ -310,7 +332,7 @@ Examples Interval(5, 10, 'oldlimit: 15, islower: True') >>> sorted(t)[1] Interval(10, 15, 'oldlimit: 5, islower: False') - + ``` Future improvements @@ -332,7 +354,7 @@ Based on Copyright --------- -* [Chaim-Leib Halbert][GH], 2013-2017 +* [Chaim Leib Halbert][GH], 2013-2018 * Modifications, [Konstantin Tretyakov][Konstantin intervaltree], 2014 Licensed under the [Apache License, version 2.0][Apache]. diff --git a/README.rst b/README.rst deleted file mode 100644 index 65e2e85..0000000 --- a/README.rst +++ /dev/null @@ -1,638 +0,0 @@ -.. This file is automatically generated by setup.py from README.md and CHANGELOG.md. - -intervaltree -============ - -A mutable, self-balancing interval tree for Python 2 and 3. Queries may -be by point, by range overlap, or by range envelopment. - -This library was designed to allow tagging text and time intervals, -where the intervals include the lower bound but not the upper bound. - -Installing ----------- - -.. code:: sh - - pip install intervaltree - -Features --------- - -- Supports Python 2.7 and Python 3.4+ (Tested under 2.7, and 3.4 thru - 3.6) -- Initializing - - - blank ``tree = IntervalTree()`` - - from an iterable of ``Interval`` objects - (``tree = IntervalTree(intervals)``) - - from an iterable of tuples - (``tree = IntervalTree.from_tuples(interval_tuples)``) - -- Insertions - - - ``tree[begin:end] = data`` - - ``tree.add(interval)`` - - ``tree.addi(begin, end, data)`` - -- Deletions - - - ``tree.remove(interval)`` (raises ``ValueError`` if not present) - - ``tree.discard(interval)`` (quiet if not present) - - ``tree.removei(begin, end, data)`` (short for - ``tree.remove(Interval(begin, end, data))``) - - ``tree.discardi(begin, end, data)`` (short for - ``tree.discard(Interval(begin, end, data))``) - - ``tree.remove_overlap(point)`` - - ``tree.remove_overlap(begin, end)`` (removes all overlapping the - range) - - ``tree.remove_envelop(begin, end)`` (removes all enveloped in the - range) - -- Overlap queries - - - ``tree[point]`` - - ``tree[begin:end]`` - - ``tree.search(point)`` - - ``tree.search(begin, end)`` - -- Envelop queries - - - ``tree.search(begin, end, strict=True)`` - -- Membership queries - - - ``interval_obj in tree`` (this is fastest, O(1)) - - ``tree.containsi(begin, end, data)`` - - ``tree.overlaps(point)`` - - ``tree.overlaps(begin, end)`` - -- Iterable - - - ``for interval_obj in tree:`` - - ``tree.items()`` - -- Sizing - - - ``len(tree)`` - - ``tree.is_empty()`` - - ``not tree`` - - ``tree.begin()`` (the ``begin`` coordinate of the leftmost - interval) - - ``tree.end()`` (the ``end`` coordinate of the rightmost interval) - -- Set-like operations - - - union - - - ``result_tree = tree.union(iterable)`` - - ``result_tree = tree1 | tree2`` - - ``tree.update(iterable)`` - - ``tree |= other_tree`` - - - difference - - - ``result_tree = tree.difference(iterable)`` - - ``result_tree = tree1 - tree2`` - - ``tree.difference_update(iterable)`` - - ``tree -= other_tree`` - - - intersection - - - ``result_tree = tree.intersection(iterable)`` - - ``result_tree = tree1 & tree2`` - - ``tree.intersection_update(iterable)`` - - ``tree &= other_tree`` - - - symmetric difference - - - ``result_tree = tree.symmetric_difference(iterable)`` - - ``result_tree = tree1 ^ tree2`` - - ``tree.symmetric_difference_update(iterable)`` - - ``tree ^= other_tree`` - - - comparison - - - ``tree1.issubset(tree2)`` or ``tree1 <= tree2`` - - ``tree1 <= tree2`` - - ``tree1.issuperset(tree2)`` or ``tree1 > tree2`` - - ``tree1 >= tree2`` - - ``tree1 == tree2`` - -- Restructuring - - - ``chop(begin, end)`` (slice intervals and remove everything - between ``begin`` and ``end``, optionally modifying the data - fields of the chopped-up intervals) - - ``slice(point)`` (slice intervals at ``point``) - - ``split_overlaps()`` (slice at all interval boundaries, optionally - modifying the data field) - - ``merge_overlaps()`` (joins overlapping intervals into a single - interval, optionally merging the data fields) - - ``merge_equals()`` (joins intervals with matching ranges into a - single interval, optionally merging the data fields) - -- Copying and typecasting - - - ``IntervalTree(tree)`` (``Interval`` objects are same as those in - tree) - - ``tree.copy()`` (``Interval`` objects are shallow copies of those - in tree) - - ``set(tree)`` (can later be fed into ``IntervalTree()``) - - ``list(tree)`` (ditto) - -- Pickle-friendly -- Automatic AVL balancing - -Examples --------- - -- Getting started - - .. code:: python - - >>> from intervaltree import Interval, IntervalTree - >>> t = IntervalTree() - >>> t - IntervalTree() - -- Adding intervals - any object works! - - .. code:: python - - >>> t[1:2] = "1-2" - >>> t[4:7] = (4, 7) - >>> t[5:9] = {5: 9} - -- Query by point - - The result of a query is a ``set`` object, so if ordering is - important, you must sort it first. - - .. code:: python - - >>> sorted(t[6]) - [Interval(4, 7, (4, 7)), Interval(5, 9, {5: 9})] - >>> sorted(t[6])[0] - Interval(4, 7, (4, 7)) - -- Query by range - - Note that ranges are inclusive of the lower limit, but non-inclusive - of the upper limit. So: - - .. code:: python - - >>> sorted(t[2:4]) - [] - - But: - - .. code:: python - - >>> sorted(t[1:5]) - [Interval(1, 2, '1-2'), Interval(4, 7, (4, 7))] - -- Accessing an ``Interval`` object - - .. code:: python - - >>> iv = Interval(4, 7, (4, 7)) - >>> iv.begin - 4 - >>> iv.end - 7 - >>> iv.data - (4, 7) - - >>> begin, end, data = iv - >>> begin - 4 - >>> end - 7 - >>> data - (4, 7) - -- Constructing from lists of intervals - - We could have made a similar tree this way: - - .. code:: python - - >>> ivs = [(1, 2), (4, 7), (5, 9)] - >>> t = IntervalTree( - ... Interval(begin, end, "%d-%d" % (begin, end)) for begin, end in ivs - ... ) - - Or, if we don't need the data fields: - - .. code:: python - - >>> t2 = IntervalTree(Interval(*iv) for iv in ivs) - - Or even: - - .. code:: python - - >>> t2 = IntervalTree.from_tuples(ivs) - -- Removing intervals - - .. code:: python - - >>> t.remove( Interval(1, 2, "1-2") ) - >>> sorted(t) - [Interval(4, 7, '4-7'), Interval(5, 9, '5-9')] - - >>> t.remove( Interval(500, 1000, "Doesn't exist")) # raises ValueError - Traceback (most recent call last): - ValueError - - >>> t.discard(Interval(500, 1000, "Doesn't exist")) # quietly does nothing - - >>> del t[5] # same as t.remove_overlap(5) - >>> t - IntervalTree() - - We could also empty a tree entirely: - - .. code:: python - - >>> t2.clear() - >>> t2 - IntervalTree() - - Or remove intervals that overlap a range: - - .. code:: python - - >>> t = IntervalTree([ - ... Interval(0, 10), - ... Interval(10, 20), - ... Interval(20, 30), - ... Interval(30, 40)]) - >>> t.remove_overlap(25, 35) - >>> sorted(t) - [Interval(0, 10), Interval(10, 20)] - - We can also remove only those intervals completely enveloped in a - range: - - .. code:: python - - >>> t.remove_envelop(5, 20) - >>> sorted(t) - [Interval(0, 10)] - -- Chopping - - We could also chop out parts of the tree: - - .. code:: python - - >>> t = IntervalTree([Interval(0, 10)]) - >>> t.chop(3, 7) - >>> sorted(t) - [Interval(0, 3), Interval(7, 10)] - - To modify the new intervals' data fields based on which side of the - interval is being chopped: - - .. code:: python - - >>> def datafunc(iv, islower): - ... oldlimit = iv[islower] - ... return "oldlimit: {0}, islower: {1}".format(oldlimit, islower) - >>> t = IntervalTree([Interval(0, 10)]) - >>> t.chop(3, 7, datafunc) - >>> sorted(t)[0] - Interval(0, 3, 'oldlimit: 10, islower: True') - >>> sorted(t)[1] - Interval(7, 10, 'oldlimit: 0, islower: False') - -- Slicing - - You can also slice intervals in the tree without removing them: - - .. code:: python - - >>> t = IntervalTree([Interval(0, 10), Interval(5, 15)]) - >>> t.slice(3) - >>> sorted(t) - [Interval(0, 3), Interval(3, 10), Interval(5, 15)] - - You can also set the data fields, for example, re-using - ``datafunc()`` from above: - - .. code:: python - - >>> t = IntervalTree([Interval(5, 15)]) - >>> t.slice(10, datafunc) - >>> sorted(t)[0] - Interval(5, 10, 'oldlimit: 15, islower: True') - >>> sorted(t)[1] - Interval(10, 15, 'oldlimit: 5, islower: False') - -Future improvements -------------------- - -See the issue tracker on GitHub. - -Based on --------- - -- Eternally Confuzzled's AVL tree -- Wikipedia's Interval Tree -- Heavily modified from Tyler Kahn's Interval Tree implementation in - Python (GitHub project) -- Incorporates contributions from: - - - konstantint/Konstantin Tretyakov of the University of Tartu - (Estonia) - - siniG/Avi Gabay - - lmcarril/Luis M. Carril of the Karlsruhe Institute for Technology - (Germany) - -Copyright ---------- - -- Chaim-Leib Halbert, 2013-2017 -- Modifications, Konstantin Tretyakov, 2014 - -Licensed under the Apache License, version 2.0. - -The source code for this project is at -https://github.com/chaimleib/intervaltree - -Change log -========== - -Version 3.0.0 -------------- - -- Dropped support for Python 2.6, 3.2, and 3.3 -- Add support for Python 3.5 and 3.6 -- Faster ``Interval`` overlap checking (@tuxzz, #56) -- Updated README: - - - new restructuring methods from 2.1.0 - - example of ``from_tuples()`` added - - more info about ``chop()``, ``split_overlaps()``, - ``merge_overlaps()`` and ``merge_equals()``. - -- Fixes: - - - ``Node.from_tuples()`` will now raise an error if given an empty - iterable. This should never happen, and it should error if it - does. - - ``Interval.distance_to()`` gave an incorrect distance when passed - the ``Interval``'s upper boundary - - ``Node.pop_greatest_child()`` sometimes forgot to ``rotate()`` - when creating new child nodes. (@escalonn, #41, #42) - - ``IntervalTree.begin()`` and ``end()`` are O(1), not O(n). - (@ProgVal, #40) - -- Maintainers: - - - use github.com/kennethreitz/pyandoc - - reorganize tests - - more tests added to improve code coverage (We're at 95%! Woohoo!) - - test for issue #4 had a broken import reference - -Version 2.1.0 -------------- - -- Added: - - - ``merge_overlaps()`` method and tests - - ``merge_equals()`` method and tests - - ``range()`` method - - ``span()`` method, for returning the difference between ``end()`` - and ``begin()`` - -- Fixes: - - - Development version numbering is changing to be compliant with - PEP440. Version numbering now contains major, minor and micro - release numbers, plus the number of builds following the stable - release version, e.g. 2.0.4b34 - - Speed improvement: ``begin()`` and ``end()`` methods used - iterative ``min()`` and ``max()`` builtins instead of the more - efficient ``iloc`` member available to ``SortedDict`` - - ``overlaps()`` method used to return ``True`` even if provided - null test interval - -- Maintainers: - - - Added coverage test (``make coverage``) with html report - (``htmlcov/index.html``) - - Tests run slightly faster - -Version 2.0.4 -------------- - -- Fix: Issue #27: README incorrectly showed using a comma instead of a - colon when querying the ``IntervalTree``: it showed - ``tree[begin, end]`` instead of ``tree[begin:end]`` - -Version 2.0.3 -------------- - -- Fix: README showed using + operator for setlike union instead of the - correct \| operator -- Removed tests from release package to speed up installation; to get - the tests, download from GitHub - -Version 2.0.2 -------------- - -- Fix: Issue #20: performance enhancement for large trees. - ``IntervalTree.search()`` made a copy of the entire - ``boundary_table`` resulting in linear search time. The - ``sortedcollections`` package is now the sole install dependency - -Version 2.0.1 -------------- - -- Fix: Issue #26: failed to prune empty ``Node`` after a rotation - promoted contents of ``s_center`` - -Version 2.0.0 -------------- - -- ``IntervalTree`` now supports the full ``collections.MutableSet`` API -- Added: - - - ``__delitem__`` to ``IntervalTree`` - - ``Interval`` comparison methods ``lt()``, ``gt()``, ``le()`` and - ``ge()`` to ``Interval``, as an alternative to the comparison - operators, which are designed for sorting - - ``IntervalTree.from_tuples(iterable)`` - - ``IntervalTree.clear()`` - - ``IntervalTree.difference(iterable)`` - - ``IntervalTree.difference_update(iterable)`` - - ``IntervalTree.union(iterable)`` - - ``IntervalTree.intersection(iterable)`` - - ``IntervalTree.intersection_update(iterable)`` - - ``IntervalTree.symmetric_difference(iterable)`` - - ``IntervalTree.symmetric_difference_update(iterable)`` - - ``IntervalTree.chop(a, b)`` - - ``IntervalTree.slice(point)`` - -- Deprecated ``IntervalTree.extend()`` -- use ``update()`` instead -- Internal improvements: - - - More verbose tests with progress bars - - More tests for comparison and sorting behavior - - Code in the README is included in the unit tests - -- Fixes - - - BACKWARD INCOMPATIBLE: On ranged queries where ``begin >= end``, - the query operated on the overlaps of ``begin``. This behavior was - documented as expected in 1.x; it is now changed to be more - consistent with the definition of ``Interval``\ s, which are - half-open. - - Issue #25: pruning empty Nodes with staggered descendants could - result in invalid trees - - Sorting ``Interval``\ s and numbers in the same list gathered all - the numbers at the beginning and the ``Interval``\ s at the end - - ``IntervalTree.overlaps()`` and friends returned ``None`` instead - of ``False`` - - Maintainers: ``make install-testpypi`` failed because the ``pip`` - was missing a ``--pre`` flag - -Version 1.1.1 -------------- - -- Removed requirement for pyandoc in order to run functionality tests. - -Version 1.1.0 -------------- - -- Added ability to use ``Interval.distance_to()`` with points, not just - ``Intervals`` -- Added documentation on return types to ``IntervalTree`` and - ``Interval`` -- ``Interval.__cmp__()`` works with points too -- Fix: ``IntervalTree.score()`` returned maximum score of 0.5 instead - of 1.0. Now returns max of subscores instead of avg -- Internal improvements: - - - Development version numbering scheme, based on ``git describe`` - the "building towards" release is appended after a hyphen, eg. - 1.0.2-37-g2da2ef0-1.10. The previous tagged release is 1.0.2, and - there have been 37 commits since then, current tag is g2da2ef0, - and we are getting ready for a 1.1.0 release - - Optimality tests added - - ``Interval`` overlap tests for ranges, ``Interval``\ s and points - added - -Version 1.0.2 -------------- - --Bug fixes: - ``Node.depth_score_helper()`` raised ``AttributeError`` - -README formatting - -Version 1.0.1 -------------- - -- Fix: pip install failure because of failure to generate README.rst - -Version 1.0.0 -------------- - -- Renamed from PyIntervalTree to intervaltree -- Speed improvements for adding and removing Intervals (~70% faster - than 0.4) -- Bug fixes: - - - BACKWARD INCOMPATIBLE: ``len()`` of an ``Interval`` is always 3, - reverting to default behavior for ``namedtuples``. In Python 3, - ``len`` returning a non-integer raises an exception. Instead, use - ``Interval.length()``, which returns 0 for null intervals and - ``end - begin`` otherwise. Also, if the ``len() === 0``, then - ``not iv`` is ``True``. - - When inserting an ``Interval`` via ``__setitem__`` and improper - parameters given, all errors were transformed to ``IndexError`` - - ``split_overlaps`` did not update the ``boundary_table`` counts - -- Internal improvements: - - - More robust local testing tools - - Long series of interdependent tests have been separated into - sections - -Version 0.4 ------------ - -- Faster balancing (~80% faster) -- Bug fixes: - - - Double rotations were performed in place of a single rotation when - presented an unbalanced Node with a balanced child. - - During single rotation, kept referencing an unrotated Node instead - of the new, rotated one - -Version 0.3.3 -------------- - -- Made IntervalTree crash if inited with a null Interval (end <= begin) -- IntervalTree raises ValueError instead of AssertionError when a null - Interval is inserted - -Version 0.3.2 -------------- - -- Support for Python 3.2+ and 2.6+ -- Changed license from LGPL to more permissive Apache license -- Merged changes from https://github.com/konstantint/PyIntervalTree to - https://github.com/chaimleib/PyIntervalTree - - - Interval now inherits from a namedtuple. Benefits: should be - faster. Drawbacks: slight behavioural change (Intervals not - mutable anymore). - - Added float tests - - Use setup.py for tests - - Automatic testing via travis-ci - - Removed dependency on six - -- Interval improvements: - - - Intervals without data have a cleaner string representation - - Intervals without data are pickled more compactly - - Better hashing - - Intervals are ordered by begin, then end, then by data. If data is - not orderable, sorts by type(data) - -- Bug fixes: - - - Fixed crash when querying empty tree - - Fixed missing close parenthesis in examples - - Made IntervalTree crash earlier if a null Interval is added - -- Internals: - - - New test directory - - Nicer display of data structures for debugging, using custom - test/pprint.py (Python 2.6, 2.7) - - More sensitive exception handling - - Local script to test in all supported versions of Python - - Added IntervalTree.score() to measure how optimally a tree is - structured - -Version 0.2.3 -------------- - -- Slight changes for inclusion in PyPI. -- Some documentation changes -- Added tests -- Bug fix: interval addition via [] was broken in Python 2.7 (see - http://bugs.python.org/issue21785) -- Added intervaltree.bio subpackage, adding some utilities for use in - bioinformatics - -Version 0.2.2b --------------- - -- Forked from https://github.com/MusashiAharon/PyIntervalTree diff --git a/intervaltree/__init__.py b/intervaltree/__init__.py index ae802fe..50dcc6a 100644 --- a/intervaltree/__init__.py +++ b/intervaltree/__init__.py @@ -4,7 +4,7 @@ Root package. -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/intervaltree/interval.py b/intervaltree/interval.py index cf6c0a9..d9d80d6 100644 --- a/intervaltree/interval.py +++ b/intervaltree/interval.py @@ -4,7 +4,7 @@ Interval class -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Modifications copyright 2014 Konstantin Tretyakov Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/intervaltree/intervaltree.py b/intervaltree/intervaltree.py index 7d9449f..7f29e65 100644 --- a/intervaltree/intervaltree.py +++ b/intervaltree/intervaltree.py @@ -4,7 +4,7 @@ Core logic. -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Modifications Copyright 2014 Konstantin Tretyakov Licensed under the Apache License, Version 2.0 (the "License"); @@ -22,11 +22,15 @@ from .interval import Interval from .node import Node from numbers import Number -import collections from sortedcontainers import SortedDict from copy import copy from warnings import warn +try: + from collections.abc import MutableSet # Python 3? +except ImportError: + from collections import MutableSet + try: xrange # Python 2? except NameError: # pragma: no cover @@ -34,38 +38,38 @@ # noinspection PyBroadException -class IntervalTree(collections.MutableSet): +class IntervalTree(MutableSet): """ A binary lookup tree of intervals. The intervals contained in the tree are represented using ``Interval(a, b, data)`` objects. Each such object represents a half-open interval ``[a, b)`` with optional data. - + Examples: --------- - + Initialize a blank tree:: - + >>> tree = IntervalTree() >>> tree IntervalTree() - + Initialize a tree from an iterable set of Intervals in O(n * log n):: - + >>> tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)]) >>> tree IntervalTree([Interval(-20.0, -10.0), Interval(-10, 10)]) >>> len(tree) 2 - + Note that this is a set, i.e. repeated intervals are ignored. However, Intervals with different data fields are regarded as different:: - + >>> tree = IntervalTree([Interval(-10, 10), Interval(-10, 10), Interval(-10, 10, "x")]) >>> tree IntervalTree([Interval(-10, 10), Interval(-10, 10, 'x')]) >>> len(tree) 2 - + Insertions:: >>> tree = IntervalTree() >>> tree[0:1] = "data" @@ -102,16 +106,16 @@ class IntervalTree(collections.MutableSet): >>> tree.discard(Interval(-10, 10)) # Same as remove, but no exception on failure >>> tree IntervalTree([Interval(-20, -10), Interval(10, 20)]) - + Delete intervals, overlapping a given point:: - + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(1.1) >>> tree IntervalTree([Interval(-1.1, 1.1)]) - + Delete intervals, overlapping an interval:: - + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_overlap(0, 0.5) >>> tree @@ -125,9 +129,9 @@ class IntervalTree(collections.MutableSet): >>> tree.remove_overlap(1.6, 1.5) # Ditto >>> tree IntervalTree([Interval(0.5, 1.7)]) - + Delete intervals, enveloped in the range:: - + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) >>> tree.remove_envelop(-1.0, 1.5) >>> tree @@ -141,23 +145,29 @@ class IntervalTree(collections.MutableSet): >>> tree.remove_envelop(0.5, 1.7) >>> tree IntervalTree() - - Point/interval overlap queries:: - + + Point queries:: + + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) + >>> assert tree[-1.1] == set([Interval(-1.1, 1.1)]) + >>> assert tree.at(1.1) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # Same as tree[1.1] + >>> assert tree.at(1.5) == set([Interval(0.5, 1.7)]) # Same as tree[1.5] + + Interval overlap queries + + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) + >>> assert tree.overlap(1.7, 1.8) == set() + >>> assert tree.overlap(1.5, 1.8) == set([Interval(0.5, 1.7)]) + >>> assert tree[1.5:1.8] == set([Interval(0.5, 1.7)]) # same as previous + >>> assert tree.overlap(1.1, 1.8) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) + >>> assert tree[1.1:1.8] == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # same as previous + + Interval envelop queries:: + >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) - >>> assert tree[-1.1] == set([Interval(-1.1, 1.1)]) - >>> assert tree.search(1.1) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # Same as tree[1.1] - >>> assert tree[-0.5:0.5] == set([Interval(-0.5, 1.5), Interval(-1.1, 1.1)]) # Interval overlap query - >>> assert tree.search(1.5, 1.5) == set() # Same as tree[1.5:1.5] - >>> assert tree.search(1.5) == set([Interval(0.5, 1.7)]) # Same as tree[1.5] - - >>> assert tree.search(1.7, 1.8) == set() - - Envelop queries:: - - >>> assert tree.search(-0.5, 0.5, strict=True) == set() - >>> assert tree.search(-0.4, 1.7, strict=True) == set([Interval(0.5, 1.7)]) - + >>> assert tree.envelop(-0.5, 0.5) == set() + >>> assert tree.envelop(-0.5, 1.5) == set([Interval(-0.5, 1.5)]) + Membership queries:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) @@ -177,7 +187,7 @@ class IntervalTree(collections.MutableSet): False >>> tree.overlaps(-1.2, -1.0) True - + Sizing:: >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)]) @@ -195,7 +205,7 @@ class IntervalTree(collections.MutableSet): -1.1 >>> print(tree.end()) # ditto 1.7 - + Iteration:: >>> tree = IntervalTree([Interval(-11, 11), Interval(-5, 15), Interval(5, 17)]) @@ -204,7 +214,7 @@ class IntervalTree(collections.MutableSet): >>> assert tree.items() == set([Interval(-5, 15), Interval(-11, 11), Interval(5, 17)]) Copy- and typecasting, pickling:: - + >>> tree0 = IntervalTree([Interval(0, 1, "x"), Interval(1, 2, ["x"])]) >>> tree1 = IntervalTree(tree0) # Shares Interval objects >>> tree2 = tree0.copy() # Shallow copy (same as above, as Intervals are singletons) @@ -219,9 +229,9 @@ class IntervalTree(collections.MutableSet): IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])]) >>> tree3 IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['x'])]) - + Equality testing:: - + >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1)]) True >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1, "x")]) @@ -238,9 +248,9 @@ def from_tuples(cls, tups): def __init__(self, intervals=None): """ - Set up a tree. If intervals is provided, add all the intervals + Set up a tree. If intervals is provided, add all the intervals to the tree. - + Completes in O(n*log n) time. """ intervals = set(intervals) if intervals is not None else set() @@ -258,30 +268,30 @@ def __init__(self, intervals=None): def copy(self): """ - Construct a new IntervalTree using shallow copies of the + Construct a new IntervalTree using shallow copies of the intervals in the source tree. - + Completes in O(n*log n) time. :rtype: IntervalTree """ return IntervalTree(iv.copy() for iv in self) - + def _add_boundaries(self, interval): """ Records the boundaries of the interval in the boundary table. """ begin = interval.begin end = interval.end - if begin in self.boundary_table: + if begin in self.boundary_table: self.boundary_table[begin] += 1 else: self.boundary_table[begin] = 1 - + if end in self.boundary_table: self.boundary_table[end] += 1 else: self.boundary_table[end] = 1 - + def _remove_boundaries(self, interval): """ Removes the boundaries of the interval from the boundary table. @@ -292,19 +302,19 @@ def _remove_boundaries(self, interval): del self.boundary_table[begin] else: self.boundary_table[begin] -= 1 - + if self.boundary_table[end] == 1: del self.boundary_table[end] else: self.boundary_table[end] -= 1 - + def add(self, interval): """ Adds an interval to the tree, if not already present. - + Completes in O(log n) time. """ - if interval in self: + if interval in self: return if interval.is_null(): @@ -320,38 +330,31 @@ def add(self, interval): self.all_intervals.add(interval) self._add_boundaries(interval) append = add - + def addi(self, begin, end, data=None): """ Shortcut for add(Interval(begin, end, data)). - + Completes in O(log n) time. """ return self.add(Interval(begin, end, data)) appendi = addi - + def update(self, intervals): """ Given an iterable of intervals, add them to the tree. - - Completes in O(m*log(n+m), where m = number of intervals to + + Completes in O(m*log(n+m), where m = number of intervals to add. """ for iv in intervals: self.add(iv) - def extend(self, intervals): - """ - Deprecated: Replaced by update(). - """ - warn("IntervalTree.extend() has been deprecated. Consider using update() instead", DeprecationWarning) - self.update(intervals) - def remove(self, interval): """ - Removes an interval from the tree, if present. If not, raises + Removes an interval from the tree, if present. If not, raises ValueError. - + Completes in O(log n) time. """ #self.verify() @@ -362,20 +365,20 @@ def remove(self, interval): self.all_intervals.remove(interval) self._remove_boundaries(interval) #self.verify() - + def removei(self, begin, end, data=None): """ Shortcut for remove(Interval(begin, end, data)). - + Completes in O(log n) time. """ return self.remove(Interval(begin, end, data)) - + def discard(self, interval): """ - Removes an interval from the tree, if present. If not, does + Removes an interval from the tree, if present. If not, does nothing. - + Completes in O(log n) time. """ if interval not in self: @@ -383,11 +386,11 @@ def discard(self, interval): self.all_intervals.discard(interval) self.top_node = self.top_node.discard(interval) self._remove_boundaries(interval) - + def discardi(self, begin, end, data=None): """ Shortcut for discard(Interval(begin, end, data)). - + Completes in O(log n) time. """ return self.discard(Interval(begin, end, data)) @@ -433,7 +436,8 @@ def intersection_update(self, other): """ Removes intervals from self unless they also exist in other. """ - for iv in self: + ivs = list(self) + for iv in ivs: if iv not in other: self.remove(iv) @@ -444,7 +448,7 @@ def symmetric_difference(self, other): """ if not isinstance(other, set): other = set(other) me = set(self) - ivs = me - other + (other - me) + ivs = me.difference(other).union(other.difference(me)) return IntervalTree(ivs) def symmetric_difference_update(self, other): @@ -453,7 +457,8 @@ def symmetric_difference_update(self, other): not both. """ other = set(other) - for iv in self: + ivs = list(self) + for iv in ivs: if iv in other: self.remove(iv) other.remove(iv) @@ -462,26 +467,26 @@ def symmetric_difference_update(self, other): def remove_overlap(self, begin, end=None): """ Removes all intervals overlapping the given point or range. - + Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches * r = size of the search range (this is 1 for a point) """ - hitlist = self.search(begin, end) - for iv in hitlist: + hitlist = self.at(begin) if end is None else self.overlap(begin, end) + for iv in hitlist: self.remove(iv) def remove_envelop(self, begin, end): """ Removes all intervals completely enveloped in the given range. - + Completes in O((r+m)*log n) time, where: * n = size of the tree * m = number of matches - * r = size of the search range (this is 1 for a point) + * r = size of the search range """ - hitlist = self.search(begin, end, strict=True) + hitlist = self.envelop(begin, end) for iv in hitlist: self.remove(iv) @@ -491,8 +496,8 @@ def chop(self, begin, end, datafunc=None): the chopped area so that nothing overlaps. """ insertions = set() - begin_hits = [iv for iv in self[begin] if iv.begin < begin] - end_hits = [iv for iv in self[end] if iv.end > end] + begin_hits = [iv for iv in self.at(begin) if iv.begin < begin] + end_hits = [iv for iv in self.at(end) if iv.end > end] if datafunc: for iv in begin_hits: @@ -519,7 +524,7 @@ def slice(self, point, datafunc=None): :param datafunc(interval, isupper): callable returning a new value for the interval's data field """ - hitlist = set(iv for iv in self[point] if iv.begin < point) + hitlist = set(iv for iv in self.at(point) if iv.begin < point) insertions = set() if datafunc: for iv in hitlist: @@ -542,31 +547,31 @@ def clear(self): def find_nested(self): """ - Returns a dictionary mapping parent intervals to sets of + Returns a dictionary mapping parent intervals to sets of intervals overlapped by and contained in the parent. - + Completes in O(n^2) time. :rtype: dict of [Interval, set of Interval] """ result = {} - + def add_if_nested(): if parent.contains_interval(child): if parent not in result: result[parent] = set() result[parent].add(child) - + long_ivs = sorted(self.all_intervals, key=Interval.length, reverse=True) for i, parent in enumerate(long_ivs): for child in long_ivs[i + 1:]: add_if_nested() return result - + def overlaps(self, begin, end=None): """ Returns whether some interval in the tree overlaps the given point or range. - + Completes in O(r*log n) time, where r is the size of the search range. :rtype: bool @@ -577,24 +582,24 @@ def overlaps(self, begin, end=None): return self.overlaps_point(begin) else: return self.overlaps_range(begin.begin, begin.end) - + def overlaps_point(self, p): """ Returns whether some interval in the tree overlaps p. - + Completes in O(log n) time. :rtype: bool """ if self.is_empty(): return False return bool(self.top_node.contains_point(p)) - + def overlaps_range(self, begin, end): """ Returns whether some interval in the tree overlaps the given range. Returns False if given a null interval over which to test. - + Completes in O(r*log n) time, where r is the range length and n is the table size. :rtype: bool @@ -606,17 +611,17 @@ def overlaps_range(self, begin, end): elif self.overlaps_point(begin): return True return any( - self.overlaps_point(bound) - for bound in self.boundary_table + self.overlaps_point(bound) + for bound in self.boundary_table if begin < bound < end ) - + def split_overlaps(self): """ Finds all intervals with overlapping ranges and splits them along the range boundaries. - - Completes in worst-case O(n^2*log n) time (many interval + + Completes in worst-case O(n^2*log n) time (many interval boundaries are inside many intervals), best-case O(n*log n) time (small number of overlaps << n per interval). """ @@ -634,7 +639,7 @@ def split_overlaps(self): self.__init__(new_ivs) - def merge_overlaps(self, data_reducer=None, data_initializer=None): + def merge_overlaps(self, data_reducer=None, data_initializer=None, strict=True): """ Finds all intervals with overlapping ranges and merges them into a single interval. If provided, uses data_reducer and @@ -653,8 +658,12 @@ def merge_overlaps(self, data_reducer=None, data_initializer=None): Interval's data field as the first value for current_reduced_data. If data_initializer is not None, current_reduced_data is set to a shallow copy of - data_initiazer created with - copy.copy(data_initializer). + data_initializer created with copy.copy(data_initializer). + + If strict is True (default), intervals are only merged if + their ranges actually overlap; adjacent, touching intervals + will not be merged. If strict is False, intervals are merged + even if they are only end-to-end adjacent. Completes in O(n*logn). """ @@ -680,7 +689,8 @@ def new_series(): for higher in sorted_intervals: if merged: # series already begun lower = merged[-1] - if higher.begin <= lower.end: # should merge + if (higher.begin < lower.end or + not strict and higher.begin == lower.end): # should merge upper_bound = max(lower.end, higher.end) if data_reducer is not None: current_reduced[0] = data_reducer(current_reduced[0], higher.data) @@ -756,78 +766,113 @@ def new_series(): def items(self): """ - Constructs and returns a set of all intervals in the tree. - + Constructs and returns a set of all intervals in the tree. + Completes in O(n) time. :rtype: set of Interval """ return set(self.all_intervals) - + def is_empty(self): """ Returns whether the tree is empty. - + Completes in O(1) time. :rtype: bool """ return 0 == len(self) - def search(self, begin, end=None, strict=False): + def at(self, p): + """ + Returns the set of all intervals that contain p. + + Completes in O(m + log n) time, where: + * n = size of the tree + * m = number of matches + :rtype: set of Interval + """ + root = self.top_node + if not root: + return set() + return root.search_point(p, set()) + + def envelop(self, begin, end=None): """ - Returns a set of all intervals overlapping the given range. Or, - if strict is True, returns the set of all intervals fully - contained in the range [begin, end]. - + Returns the set of all intervals fully contained in the range + [begin, end). + Completes in O(m + k*log n) time, where: * n = size of the tree * m = number of matches - * k = size of the search range (this is 1 for a point) + * k = size of the search range :rtype: set of Interval """ root = self.top_node if not root: return set() if end is None: - try: - iv = begin - return self.search(iv.begin, iv.end, strict=strict) - except: - return root.search_point(begin, set()) + iv = begin + return self.envelop(iv.begin, iv.end) elif begin >= end: return set() - else: - result = root.search_point(begin, set()) - - boundary_table = self.boundary_table - bound_begin = boundary_table.bisect_left(begin) - bound_end = boundary_table.bisect_left(end) # exclude final end bound - result.update(root.search_overlap( - # slice notation is slightly slower - boundary_table.keys()[index] for index in xrange(bound_begin, bound_end) - )) - - # TODO: improve strict search to use node info instead of less-efficient filtering - if strict: - result = set( - iv for iv in result - if iv.begin >= begin and iv.end <= end - ) - return result - + result = root.search_point(begin, set()) # bound_begin might be greater + boundary_table = self.boundary_table + bound_begin = boundary_table.bisect_left(begin) + bound_end = boundary_table.bisect_left(end) # up to, but not including end + result.update(root.search_overlap( + # slice notation is slightly slower + boundary_table.keys()[index] for index in xrange(bound_begin, bound_end) + )) + + # TODO: improve envelop() to use node info instead of less-efficient filtering + result = set( + iv for iv in result + if iv.begin >= begin and iv.end <= end + ) + return result + + def overlap(self, begin, end=None): + """ + Returns a set of all intervals overlapping the given range. + + Completes in O(m + k*log n) time, where: + * n = size of the tree + * m = number of matches + * k = size of the search range + :rtype: set of Interval + """ + root = self.top_node + if not root: + return set() + if end is None: + iv = begin + return self.overlap(iv.begin, iv.end) + elif begin >= end: + return set() + result = root.search_point(begin, set()) # bound_begin might be greater + boundary_table = self.boundary_table + bound_begin = boundary_table.bisect_left(begin) + bound_end = boundary_table.bisect_left(end) # up to, but not including end + result.update(root.search_overlap( + # slice notation is slightly slower + boundary_table.keys()[index] for index in xrange(bound_begin, bound_end) + )) + return result + def begin(self): """ Returns the lower bound of the first interval in the tree. - + Completes in O(1) time. """ if not self.boundary_table: return 0 return self.boundary_table.keys()[0] - + def end(self): """ Returns the upper bound of the last interval in the tree. - + Completes in O(1) time. """ if not self.boundary_table: @@ -856,7 +901,7 @@ def span(self): def print_structure(self, tostring=False): """ ## FOR DEBUGGING ONLY ## - Pretty-prints the structure of the tree. + Pretty-prints the structure of the tree. If tostring is true, prints nothing and returns a string. :rtype: None or str """ @@ -868,7 +913,7 @@ def print_structure(self, tostring=False): print(result) else: return result - + def verify(self): """ ## FOR DEBUGGING ONLY ## @@ -978,9 +1023,9 @@ def s_center_score(): def __getitem__(self, index): """ - Returns a set of all intervals overlapping the given index or + Returns a set of all intervals overlapping the given index or slice. - + Completes in O(k * log(n) + m) time, where: * n = size of the tree * m = number of matches @@ -995,18 +1040,18 @@ def __getitem__(self, index): return set(self) if stop is None: stop = self.end() - return self.search(start, stop) + return self.overlap(start, stop) except AttributeError: - return self.search(index) - + return self.at(index) + def __setitem__(self, index, value): """ Adds a new interval to the tree. A shortcut for add(Interval(index.start, index.stop, value)). - - If an identical Interval object with equal range and data + + If an identical Interval object with equal range and data already exists, does nothing. - + Completes in O(log n) time. """ self.addi(index.start, index.stop, value) @@ -1022,7 +1067,7 @@ def __contains__(self, item): Returns whether item exists as an Interval in the tree. This method only returns True for exact matches; for overlaps, see the overlaps() method. - + Completes in O(1) time. :rtype: bool """ @@ -1032,47 +1077,47 @@ def __contains__(self, item): return item in self.all_intervals #else: # return self.contains_point(item) - + def containsi(self, begin, end, data=None): """ Shortcut for (Interval(begin, end, data) in tree). - + Completes in O(1) time. :rtype: bool """ return Interval(begin, end, data) in self - + def __iter__(self): """ Returns an iterator over all the intervals in the tree. - + Completes in O(1) time. :rtype: collections.Iterable[Interval] """ return self.all_intervals.__iter__() iter = __iter__ - + def __len__(self): """ Returns how many intervals are in the tree. - + Completes in O(1) time. :rtype: int """ return len(self.all_intervals) - + def __eq__(self, other): """ Whether two IntervalTrees are equal. - + Completes in O(n) time if sizes are equal; O(1) time otherwise. :rtype: bool """ return ( - isinstance(other, IntervalTree) and + isinstance(other, IntervalTree) and self.all_intervals == other.all_intervals ) - + def __repr__(self): """ :rtype: str diff --git a/intervaltree/node.py b/intervaltree/node.py index ac5ee23..9cdbfbb 100644 --- a/intervaltree/node.py +++ b/intervaltree/node.py @@ -4,7 +4,7 @@ Core logic: internal tree nodes. -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Modifications Copyright 2014 Konstantin Tretyakov Licensed under the Apache License, Version 2.0 (the "License"); @@ -393,26 +393,26 @@ def get_new_s_center(): else: #print('Pop descent to {}'.format(self[1].x_center)) (greatest_child, self[1]) = self[1].pop_greatest_child() - self.refresh_balance() - new_self = self.rotate() # Move any overlaps into greatest_child - for iv in set(new_self.s_center): + for iv in set(self.s_center): if iv.contains_point(greatest_child.x_center): - new_self.s_center.remove(iv) + self.s_center.remove(iv) greatest_child.add(iv) #print('Pop Returning child = {}'.format( # greatest_child.print_structure(tostring=True) # )) - if new_self.s_center: + if self.s_center: #print('and returning newnode = {}'.format( # new_self.print_structure(tostring=True) # )) #new_self.verify() + self.refresh_balance() + new_self = self.rotate() return greatest_child, new_self else: - new_self = new_self.prune() + new_self = self.prune() #print('and returning prune = {}'.format( # new_self.print_structure(tostring=True) # )) diff --git a/scripts/testall.sh b/scripts/testall.sh index 3628d33..7cf8aed 100755 --- a/scripts/testall.sh +++ b/scripts/testall.sh @@ -2,12 +2,14 @@ # Tests using `python setup.py test` using different versions of python. this_dir="$(dirname "$0")" -base_dir="$(dirname "$this_dir")" +export base_dir="$(dirname "$this_dir")" -for ver in $PYTHONS; do - echo "$ver" +function testWithPython() { + ver="$1" export PYENV_VERSION="$ver" python --version python "$base_dir/setup.py" test || exit 1 -done +} +export -f testWithPython +parallel testWithPython ::: $PYTHONS diff --git a/setup.py b/setup.py index 493a375..6d559ab 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ Note that "python setup.py test" invokes pytest on the package. With appropriately configured setup.cfg, this will check both xxx_test modules and docstrings. -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -26,11 +26,10 @@ from setuptools import setup from setuptools.command.test import test as TestCommand -from utils import fs, doc, version +from utils import version ## CONFIG target_version = '3.0.0' -create_rst = True version_info = version.version_info(target_version) if version_info['is_dev_version']: @@ -40,6 +39,8 @@ print("!!!>>> This is a RELEASE version <<= 2.0, < 3.0'], description='Editable interval tree data structure for Python 2 and 3', - long_description=doc.get_rst(), + long_description=long_description, classifiers=[ # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers 'Development Status :: 4 - Beta', + 'Programming Language :: Python :: Implementation :: PyPy', 'Intended Audience :: Developers', 'Intended Audience :: Information Technology', 'Intended Audience :: Science/Research', @@ -74,6 +76,7 @@ def run_tests(self): 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'License :: OSI Approved :: Apache Software License', 'Topic :: Scientific/Engineering :: Artificial Intelligence', 'Topic :: Scientific/Engineering :: Bio-Informatics', @@ -84,7 +87,7 @@ def run_tests(self): 'Topic :: Text Processing :: Markup', ], keywords='interval-tree data-structure intervals tree', # Separate with spaces - author='Chaim-Leib Halbert, Konstantin Tretyakov', + author='Chaim Leib Halbert, Konstantin Tretyakov', author_email='chaim.leib.halbert@gmail.com', url='https://github.com/chaimleib/intervaltree', download_url='https://github.com/chaimleib/intervaltree/tarball/{version}'.format(**version_info), @@ -93,6 +96,5 @@ def run_tests(self): include_package_data=True, zip_safe=True, tests_require=['pytest'], - cmdclass={'test': PyTest}, - entry_points={} + cmdclass={'test': PyTest} ) diff --git a/test/__init__.py b/test/__init__.py index 7fe2ae7..a1bcf22 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2013-2017 Chaim-Leib Halbert +# Copyright 2013-2018 Chaim Leib Halbert # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/test/interval_methods/__init__.py b/test/interval_methods/__init__.py index ee65411..57ba27a 100644 --- a/test/interval_methods/__init__.py +++ b/test/interval_methods/__init__.py @@ -4,7 +4,7 @@ Test module: Interval methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/interval_methods/binary_test.py b/test/interval_methods/binary_test.py index 9f5334c..4b8451b 100644 --- a/test/interval_methods/binary_test.py +++ b/test/interval_methods/binary_test.py @@ -4,7 +4,7 @@ Test module: Intervals, methods on two intervals -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/interval_methods/sorting_test.py b/test/interval_methods/sorting_test.py index 777346a..0521f37 100644 --- a/test/interval_methods/sorting_test.py +++ b/test/interval_methods/sorting_test.py @@ -4,7 +4,7 @@ Test module: Intervals, sorting methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/interval_methods/unary_test.py b/test/interval_methods/unary_test.py index 861f53a..4e0196e 100644 --- a/test/interval_methods/unary_test.py +++ b/test/interval_methods/unary_test.py @@ -4,7 +4,7 @@ Test module: Intervals, methods on self only -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervals.py b/test/intervals.py index 1061d88..7c2612e 100644 --- a/test/intervals.py +++ b/test/intervals.py @@ -4,7 +4,7 @@ Test module: utilities to generate intervals -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/__init__.py b/test/intervaltree_methods/__init__.py index 8005d0d..dd9822c 100644 --- a/test/intervaltree_methods/__init__.py +++ b/test/intervaltree_methods/__init__.py @@ -4,7 +4,7 @@ Test module: IntervalTree methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/copy_test.py b/test/intervaltree_methods/copy_test.py index 920e75a..e974eb7 100644 --- a/test/intervaltree_methods/copy_test.py +++ b/test/intervaltree_methods/copy_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Copying -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/debug_test.py b/test/intervaltree_methods/debug_test.py index e4f9993..90d231a 100644 --- a/test/intervaltree_methods/debug_test.py +++ b/test/intervaltree_methods/debug_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Basic query methods (read-only) -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/delete_test.py b/test/intervaltree_methods/delete_test.py index ae7129f..1ff6927 100644 --- a/test/intervaltree_methods/delete_test.py +++ b/test/intervaltree_methods/delete_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Basic deletion methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/init_test.py b/test/intervaltree_methods/init_test.py index beef84c..08d570a 100644 --- a/test/intervaltree_methods/init_test.py +++ b/test/intervaltree_methods/init_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, initialization methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/intervaltree_methods/insert_test.py b/test/intervaltree_methods/insert_test.py index daaabf9..f7ac8fd 100644 --- a/test/intervaltree_methods/insert_test.py +++ b/test/intervaltree_methods/insert_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Basic insertion methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ def test_insert(): Interval(10, 20), ]) - tree.extend([Interval(19.9, 20.1), Interval(20.1, 30)]) + tree.update([Interval(19.9, 20.1), Interval(20.1, 30)]) assert len(tree) == 5 assert tree.items() == set([ Interval(0, 1, "data"), @@ -76,7 +76,7 @@ def test_duplicate_insert(): assert len(tree) == 1 assert tree.items() == contents - tree.extend([Interval(-10, 20, "arbitrary data")]) + tree.update([Interval(-10, 20, "arbitrary data")]) assert len(tree) == 1 assert tree.items() == contents @@ -98,7 +98,7 @@ def test_duplicate_insert(): assert len(tree) == 2 assert tree.items() == contents - tree.extend([Interval(-10, 20), Interval(-10, 20, "arbitrary data")]) + tree.update([Interval(-10, 20), Interval(-10, 20, "arbitrary data")]) assert len(tree) == 2 assert tree.items() == contents diff --git a/test/intervaltree_methods/query_test.py b/test/intervaltree_methods/query_test.py index eae200f..29e782e 100644 --- a/test/intervaltree_methods/query_test.py +++ b/test/intervaltree_methods/query_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Basic query methods (read-only) -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -39,6 +39,8 @@ def test_empty_queries(): assert t.begin() == 0 assert t.end() == 0 assert t[t.begin():t.end()] == e + assert t.overlap(t.begin(), t.end()) == e + assert t.envelop(t.begin(), t.end()) == e assert t.items() == e assert set(t) == e assert set(t.copy()) == e @@ -48,20 +50,35 @@ def test_empty_queries(): t.verify() -def test_queries(): +def test_point_queries(): t = IntervalTree.from_tuples(data.ivs1.data) - assert match.set_data(t[4]) == set(['[4,7)']) - assert match.set_data(t[4:5]) == set(['[4,7)']) - assert match.set_data(t[4:6]) == set(['[4,7)', '[5,9)']) + assert match.set_data(t.at(4)) == set(['[4,7)']) assert match.set_data(t[9]) == set(['[6,10)', '[8,10)', '[8,15)']) + assert match.set_data(t.at(9)) == set(['[6,10)', '[8,10)', '[8,15)']) assert match.set_data(t[15]) == set() - assert match.set_data(t.search(5)) == set(['[4,7)', '[5,9)']) - assert match.set_data(t.search(6, 11, strict=True)) == set(['[6,10)', '[8,10)']) + assert match.set_data(t.at(15)) == set() + assert match.set_data(t[5]) == set(['[4,7)', '[5,9)']) + assert match.set_data(t.at(5)) == set(['[4,7)', '[5,9)']) + assert match.set_data(t[4:5]) == set(['[4,7)']) -def test_partial_slice_query(): - def assert_chop(t, limit): +def test_envelop_vs_overlap_queries(): + t = IntervalTree.from_tuples(data.ivs1.data) + assert match.set_data(t.envelop(4, 5)) == set() + assert match.set_data(t.overlap(4, 5)) == set(['[4,7)']) + assert match.set_data(t.envelop(4, 6)) == set() + assert match.set_data(t.overlap(4, 6)) == set(['[4,7)', '[5,9)']) + assert match.set_data(t.envelop(6, 10)) == set(['[6,10)', '[8,10)']) + assert match.set_data(t.overlap(6, 10)) == set([ + '[4,7)', '[5,9)', '[6,10)', '[8,10)', '[8,15)']) + assert match.set_data(t.envelop(6, 11)) == set(['[6,10)', '[8,10)']) + assert match.set_data(t.overlap(6, 11)) == set([ + '[4,7)', '[5,9)', '[6,10)', '[8,10)', '[8,15)', '[10,12)']) + + +def test_partial_get_query(): + def assert_get(t, limit): s = set(t) assert t[:] == s @@ -71,8 +88,8 @@ def assert_chop(t, limit): s = set(iv for iv in t if iv.end > limit) assert t[limit:] == s - assert_chop(IntervalTree.from_tuples(data.ivs1.data), 7) - assert_chop(IntervalTree.from_tuples(data.ivs2.data), -3) + assert_get(IntervalTree.from_tuples(data.ivs1.data), 7) + assert_get(IntervalTree.from_tuples(data.ivs2.data), -3) def test_tree_bounds(): diff --git a/test/intervaltree_methods/restructure_test.py b/test/intervaltree_methods/restructure_test.py index 7969491..5aaeb0f 100644 --- a/test/intervaltree_methods/restructure_test.py +++ b/test/intervaltree_methods/restructure_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Special methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -62,16 +62,20 @@ def test_merge_overlaps_empty(): def test_merge_overlaps_gapless(): + # default strict=True t = IntervalTree.from_tuples(data.ivs2.data) - rng = t.range() - t.merge_overlaps() t.verify() + assert [(iv.begin, iv.end, iv.data) for iv in sorted(t)] == data.ivs2.data + # strict=False + t = IntervalTree.from_tuples(data.ivs2.data) + rng = t.range() + t.merge_overlaps(strict=False) + t.verify() assert len(t) == 1 assert t.pop() == rng - def test_merge_overlaps_with_gap(): t = IntervalTree.from_tuples(data.ivs1.data) @@ -398,7 +402,7 @@ def test_split_overlap(): while t: iv = set(t).pop() t.remove(iv) - for other in t.search(iv): + for other in t.overlap(iv): assert other.begin == iv.begin assert other.end == iv.end diff --git a/test/intervaltree_methods/setlike_test.py b/test/intervaltree_methods/setlike_test.py index 0eafb82..f6a6298 100644 --- a/test/intervaltree_methods/setlike_test.py +++ b/test/intervaltree_methods/setlike_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree, Special methods -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -38,26 +38,12 @@ def test_update(): assert len(t) == 1 assert set(t).pop() == interval - t.clear() - assert not t - t.extend(s) - t.extend(s) - assert isinstance(t, IntervalTree) - assert len(t) == 1 - assert set(t).pop() == interval - interval = Interval(2, 3) t.update([interval]) assert isinstance(t, IntervalTree) assert len(t) == 2 assert sorted(t)[1] == interval - t = IntervalTree(s) - t.extend([interval]) - assert isinstance(t, IntervalTree) - assert len(t) == 2 - assert sorted(t)[1] == interval - def test_invalid_update(): t = IntervalTree() @@ -68,12 +54,6 @@ def test_invalid_update(): with pytest.raises(ValueError): t.update([Interval(1, 1)]) - with pytest.raises(ValueError): - t.extend([Interval(1, 0)]) - - with pytest.raises(ValueError): - t.extend([Interval(1, 1)]) - def test_union(): t = IntervalTree() @@ -91,26 +71,12 @@ def test_union(): assert len(t) == 1 assert set(t).pop() == interval - # extend with duplicates - t.extend(s) - t.extend(s) - assert len(t) == 1 - assert set(t).pop() == interval - # update with non-dupe interval = Interval(2, 3) t.update([interval]) assert len(t) == 2 assert sorted(t)[1] == interval - # extend with non-dupe - t.remove(interval) - assert len(t) == 1 - assert interval not in t - t.extend([interval]) - assert len(t) == 2 - assert sorted(t)[1] == interval - # commutativity with full overlaps, then no overlaps a = IntervalTree.from_tuples(data.ivs1.data) b = IntervalTree.from_tuples(data.ivs2.data) @@ -194,13 +160,6 @@ def test_invalid_union(): t.union([Interval(1, 0)]) -def test_invalid_update(): - t = IntervalTree() - - with pytest.raises(ValueError): - t.update([Interval(1, 1)]) - - def test_difference(): minuend = IntervalTree.from_tuples(data.ivs1.data) assert isinstance(minuend, IntervalTree) @@ -264,12 +223,30 @@ def test_intersection(): # intersections with e assert a.intersection(e) == e + ae = a.copy() + ae.intersection_update(e) + assert ae == e + assert b.intersection(e) == e + be = b.copy() + be.intersection_update(e) + assert be == e + assert e.intersection(e) == e + ee = e.copy() + ee.intersection_update(e) + assert ee == e # intersections with self assert a.intersection(a) == a + aa = a.copy() + aa.intersection_update(a) + assert aa == a + assert b.intersection(b) == b + bb = b.copy() + bb.intersection(b) == b + assert bb == b # commutativity resulting in empty ab = a.intersection(b) @@ -279,10 +256,18 @@ def test_intersection(): assert ab == ba assert len(ab) == 0 # no overlaps, so empty tree + ab = a.copy() + ab.intersection_update(b) + ba = b.copy() + ba.intersection_update(a) + ab.verify() + ba.verify() + assert ab == ba + assert len(ab) == 0 # no overlaps, so empty tree + # commutativity on non-overlapping sets ab = a.union(b) ba = b.union(a) - aba = ab.intersection(a) # these should yield no change abb = ab.intersection(b) bab = ba.intersection(b) @@ -296,6 +281,25 @@ def test_intersection(): assert bab == b assert baa == a + ab = a.union(b) + ba = b.union(a) + aba = ab.copy() + aba.intersection_update(a) # these should yield no change + abb = ab.copy() + abb.intersection_update(b) + bab = ba.copy() + bab.intersection_update(b) + baa = ba.copy() + baa.intersection_update(a) + aba.verify() + abb.verify() + bab.verify() + baa.verify() + assert aba == a + assert abb == b + assert bab == b + assert baa == a + # commutativity with overlapping sets c = IntervalTree.from_tuples(data.ivs3.data) bc = b.intersection(c) @@ -306,16 +310,154 @@ def test_intersection(): assert len(bc) < len(b) assert len(bc) < len(c) assert len(bc) > 0 - assert b.containsi(13, 23) assert c.containsi(13, 23) assert bc.containsi(13, 23) + assert not b.containsi(819, 828) + assert not c.containsi(0, 1) + assert not bc.containsi(819, 828) + assert not bc.containsi(0, 1) + bc = b.copy() + bc.intersection_update(c) + cb = c.copy() + cb.intersection_update(b) + bc.verify() + cb.verify() + assert bc == cb + assert len(bc) < len(b) + assert len(bc) < len(c) + assert len(bc) > 0 + assert b.containsi(13, 23) + assert c.containsi(13, 23) + assert bc.containsi(13, 23) assert not b.containsi(819, 828) assert not c.containsi(0, 1) - assert not bc.containsi(819, 820) + assert not bc.containsi(819, 828) assert not bc.containsi(0, 1) +def test_symmetric_difference(): + a = IntervalTree.from_tuples(data.ivs1.data) + b = IntervalTree.from_tuples(data.ivs2.data) + e = IntervalTree() + + # symdiffs with e + assert a.symmetric_difference(e) == a + ae = a.copy() + ae.symmetric_difference_update(e) + assert ae == a + + assert b.symmetric_difference(e) == b + be = b.copy() + be.symmetric_difference_update(e) + assert be == b + + assert e.symmetric_difference(e) == e + ee = e.copy() + ee.symmetric_difference_update(e) + assert ee == e + + # symdiff with self + assert a.symmetric_difference(a) == e + aa = a.copy() + aa.symmetric_difference_update(a) + assert aa == e + + assert b.symmetric_difference(b) == e + bb = b.copy() + bb.symmetric_difference_update(b) == e + assert bb == e + + # commutativity resulting in empty + ab = a.symmetric_difference(b) + ba = b.symmetric_difference(a) + ab.verify() + ba.verify() + assert ab == ba + assert len(ab) == len(a) + len(b) # no overlaps, so sum + + ab = a.copy() + ab.symmetric_difference_update(b) + ba = b.copy() + ba.symmetric_difference_update(a) + ab.verify() + ba.verify() + assert ab == ba + assert len(ab) == len(a) + len(b) # no overlaps, so sum + + # commutativity on non-overlapping sets + ab = a.union(b) + ba = b.union(a) + aba = ab.symmetric_difference(a) + abb = ab.symmetric_difference(b) + bab = ba.symmetric_difference(b) + baa = ba.symmetric_difference(a) + aba.verify() + abb.verify() + bab.verify() + baa.verify() + assert aba == b + assert abb == a + assert bab == a + assert baa == b + + ab = a.union(b) + ba = b.union(a) + aba = ab.copy() + aba.symmetric_difference_update(a) + abb = ab.copy() + abb.symmetric_difference_update(b) + bab = ba.copy() + bab.symmetric_difference_update(b) + baa = ba.copy() + baa.symmetric_difference_update(a) + aba.verify() + abb.verify() + bab.verify() + baa.verify() + assert aba == b + assert abb == a + assert bab == a + assert baa == b + + # commutativity with overlapping sets + c = IntervalTree.from_tuples(data.ivs3.data) + bc = b.symmetric_difference(c) + cb = c.symmetric_difference(b) + bc.verify() + cb.verify() + assert bc == cb + assert len(bc) > 0 + assert len(bc) < len(b) + len(c) + assert b.containsi(13, 23) + assert c.containsi(13, 23) + assert not bc.containsi(13, 23) + assert c.containsi(819, 828) + assert not b.containsi(819, 828) + assert b.containsi(0, 1) + assert not c.containsi(0, 1) + assert bc.containsi(819, 828) + assert bc.containsi(0, 1) + + bc = b.copy() + bc.symmetric_difference_update(c) + cb = c.copy() + cb.symmetric_difference_update(b) + bc.verify() + cb.verify() + assert bc == cb + assert len(bc) > 0 + assert len(bc) < len(b) + len(c) + assert b.containsi(13, 23) + assert c.containsi(13, 23) + assert not bc.containsi(13, 23) + assert c.containsi(819, 828) + assert not b.containsi(819, 828) + assert b.containsi(0, 1) + assert not c.containsi(0, 1) + assert bc.containsi(819, 828) + assert bc.containsi(0, 1) + if __name__ == "__main__": pytest.main([__file__, '-v']) diff --git a/test/intervaltrees.py b/test/intervaltrees.py index d32fdd2..e57c487 100644 --- a/test/intervaltrees.py +++ b/test/intervaltrees.py @@ -4,7 +4,7 @@ Test module: utilities to generate test trees -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/issues/__init__.py b/test/issues/__init__.py index 2d34645..457f28e 100644 --- a/test/issues/__init__.py +++ b/test/issues/__init__.py @@ -4,7 +4,7 @@ Test module: issues by tracking number -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/issues/issue25_test.py b/test/issues/issue25_test.py index 99c28dc..67d1c29 100644 --- a/test/issues/issue25_test.py +++ b/test/issues/issue25_test.py @@ -5,7 +5,7 @@ Test module: IntervalTree, insertion and removal of float intervals Submitted as issue #25 (Incorrect KeyError) by sciencectn -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/issues/issue26_test.py b/test/issues/issue26_test.py index c9d78a4..9ebab49 100644 --- a/test/issues/issue26_test.py +++ b/test/issues/issue26_test.py @@ -6,7 +6,7 @@ Submitted as issue #26 (Pop from empty list error) by sciencectn Ensure that rotations that promote Intervals prune when necessary -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/issues/issue27_test.py b/test/issues/issue27_test.py index 7f3b97a..a78ed39 100644 --- a/test/issues/issue27_test.py +++ b/test/issues/issue27_test.py @@ -6,7 +6,7 @@ Submitted as issue #26 (Pop from empty list error) by sciencectn Ensure that rotations that promote Intervals prune when necessary -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -34,20 +34,20 @@ def original_print(): print(it[iobj.begin, iobj.end]) # set(), should be using : for iobj in it: - print(it.search(iobj.begin, iobj.end)) + print(it.envelop(iobj.begin, iobj.end)) # set([Interval(6, 9, 'rad')]) # set([Interval(1, 3, 'dude'), Interval(2, 4, 'sweet')]) # set([Interval(1, 3, 'dude'), Interval(2, 4, 'sweet')]) -def test_brackets_vs_search(): +def test_brackets_vs_overlap(): it = IntervalTree() it.addi(1, 3, "dude") it.addi(2, 4, "sweet") it.addi(6, 9, "rad") for iobj in it: - assert it[iobj.begin:iobj.end] == it.search(iobj.begin, iobj.end) + assert it[iobj.begin:iobj.end] == it.overlap(iobj.begin, iobj.end) # set([Interval(6, 9, 'rad')]) # set([Interval(1, 3, 'dude'), Interval(2, 4, 'sweet')]) diff --git a/test/issues/issue41_test.py b/test/issues/issue41_test.py index 82f8a60..3bdfd0d 100644 --- a/test/issues/issue41_test.py +++ b/test/issues/issue41_test.py @@ -5,7 +5,7 @@ Test module: IntervalTree, removal of intervals Submitted as issue #41 (Interval removal breaks this tree) by escalonn -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/issues/issue67_test.py b/test/issues/issue67_test.py new file mode 100644 index 0000000..5097bb6 --- /dev/null +++ b/test/issues/issue67_test.py @@ -0,0 +1,73 @@ +""" +intervaltree: A mutable, self-balancing interval tree for Python 2 and 3. +Queries may be by point, by range overlap, or by range envelopment. + +Test module: IntervalTree, insertion of a sequence of intervals caused +invariant violation +Submitted as issue #67 (Inserting intervals in specific sequence results in +invalid tree) by suola + +Copyright 2013-2018 Chaim Leib Halbert + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +from __future__ import absolute_import +from intervaltree import IntervalTree +import pytest + +def test_interval_insersion_67(): + intervals = ( + (3657433088, 3665821696), + (2415132672, 2415394816), + (201326592, 268435456), + (163868672, 163870720), + (3301965824, 3303014400), + (4026531840, 4294967296), + (3579899904, 3579904000), + (3439329280, 3443523584), + (3431201536, 3431201664), + (3589144576, 3589275648), + (2531000320, 2531033088), + (4187287552, 4187291648), + (3561766912, 3561783296), + (3046182912, 3046187008), + (3506438144, 3506962432), + (3724953872, 3724953888), + (3518234624, 3518496768), + (3840335872, 3840344064), + (3492279181, 3492279182), + (3447717888, 3456106496), + (3589390336, 3589398528), + (3486372962, 3486372963), + (3456106496, 3472883712), + (3508595496, 3508595498), + (3511853376, 3511853440), + (3452226160, 3452226168), + (3544510720, 3544510736), + (3525894144, 3525902336), + (3524137920, 3524137984), + (3508853334, 3508853335), + (3467337728, 3467341824), + (3463212256, 3463212260), + (3446643456, 3446643712), + (3473834176, 3473834240), + (3487039488, 3487105024), + (3444686112, 3444686144), + (3459268608, 3459276800), + (3483369472, 3485466624), + ) + tree = IntervalTree() + for interval in intervals: + tree.addi(*interval) + tree.verify() + diff --git a/test/match.py b/test/match.py index 795d441..d4ccd73 100644 --- a/test/match.py +++ b/test/match.py @@ -4,7 +4,7 @@ Test module: IntervalTree, test utilities -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/optimality/__init__.py b/test/optimality/__init__.py index ebf23e8..b04a452 100644 --- a/test/optimality/__init__.py +++ b/test/optimality/__init__.py @@ -4,7 +4,7 @@ Test module: IntervalTree structure optimality tests -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/optimality/optimality_test.py b/test/optimality/optimality_test.py index ea17082..eb2223d 100644 --- a/test/optimality/optimality_test.py +++ b/test/optimality/optimality_test.py @@ -4,7 +4,7 @@ Test module: IntervalTree optimality -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/optimality/optimality_test_matrix.py b/test/optimality/optimality_test_matrix.py index e8df302..5a2c40c 100644 --- a/test/optimality/optimality_test_matrix.py +++ b/test/optimality/optimality_test_matrix.py @@ -4,7 +4,7 @@ Test module: IntervalTree optimality -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/progress_bar.py b/test/progress_bar.py index ca44de7..c1966eb 100644 --- a/test/progress_bar.py +++ b/test/progress_bar.py @@ -4,7 +4,7 @@ Test module: progress bar -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/utils/doc.py b/utils/doc.py deleted file mode 100644 index ba5dba1..0000000 --- a/utils/doc.py +++ /dev/null @@ -1,140 +0,0 @@ -""" -intervaltree: A mutable, self-balancing interval tree for Python 2 and 3. -Queries may be by point, by range overlap, or by range envelopment. - -Documentation utilities - -Copyright 2013-2017 Chaim-Leib Halbert - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -from __future__ import absolute_import -import os -from warnings import warn -import re - -from . import fs - -def get_rst(create_rst=True): - if os.path.isdir('pyandoc/pandoc') and os.path.islink('pandoc'): - print("Generating README.rst from README.md and CHANGELOG.md") - return generate_rst(create_rst).decode('utf-8') - elif os.path.isfile('README.rst'): - print("Reading README.rst") - return fs.read_file('README.rst').decode('utf-8') - else: - warn("No README.rst found!") - print("Reading README.md") - data = ''.join([ - fs.read_file('README.md'), - '\n', - fs.read_file('CHANGELOG.md'), - ]) - return data.decode('utf-8') - - -## Convert README to rst for PyPI -def generate_rst(create_rst): - """Converts Markdown to RST for PyPI""" - md = fs.read_file("README.md") - - md = pypi_sanitize_markdown(md) - rst = markdown2rst(md) - rst = pypi_prepare_rst(rst) - - changes_md = pypi_sanitize_markdown(fs.read_file("CHANGELOG.md")) - changes_rst = markdown2rst(changes_md) - rst += "\n".encode('utf-8') + changes_rst - - # Write it - if create_rst: - fs.update_file('README.rst', rst) - else: - fs.rm_f('README.rst') - - return rst - - -def markdown2rst(md): - """Convert markdown to rst format using pandoc. No other processing.""" - # import here, because outside it might not used - try: - import pandoc - except ImportError as e: - raise - else: - pandoc.PANDOC_PATH = 'pandoc' # until pyandoc gets updated - - converter = pandoc.Document() - converter.markdown_github = md - rst = converter.rst - - return rst - - -## Sanitizers -def pypi_sanitize_markdown(md): - """Prepare markdown for conversion to PyPI rst""" - md = chop_markdown_header(md) - md = remove_markdown_links(md) - - return md - - -def pypi_prepare_rst(rst): - """Add a notice that the rst was auto-generated""" - head = """\ -.. This file is automatically generated by setup.py from README.md and CHANGELOG.md. - -""" - rst = head.encode('utf-8') + rst - - return rst - - -def chop_markdown_header(md): - """ - Remove empty lines and travis-ci header from markdown string. - :param md: input markdown string - :type md: str - :return: simplified markdown string data - :rtype: str - """ - md = md.splitlines() - while not md[0].strip() or md[0].startswith('[!['.encode('utf-8')): - md = md[1:] - md = '\n'.encode('utf-8').join(md) - return md - - -def remove_markdown_links(md): - """PyPI doesn't like links, so we remove them.""" - # named links, e.g. [hello][url to hello] or [hello][] - md = md.decode('utf-8') - - md = re.sub( - r'\[((?:[^\]]|\\\])+)\]' # link text - r'\[((?:[^\]]|\\\])*)\]', # link name - '\\1', - md - ) - - # url links, e.g. [example.com](http://www.example.com) - md = re.sub( - r'\[((?:[^\]]|\\\])+)\]' # link text - r'\(((?:[^\]]|\\\])*)\)', # link url - '\\1', - md - ) - - return md.encode('utf-8') diff --git a/utils/fs.py b/utils/fs.py deleted file mode 100644 index 0f97435..0000000 --- a/utils/fs.py +++ /dev/null @@ -1,68 +0,0 @@ -""" -intervaltree: A mutable, self-balancing interval tree for Python 2 and 3. -Queries may be by point, by range overlap, or by range envelopment. - -File utilities - -Copyright 2013-2017 Chaim-Leib Halbert - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -import os -import errno - -## Filesystem utilities -def read_file(path): - """Reads file into string.""" - with open(path, 'rb') as f: - data = f.read() - return data - - -def mkdir_p(path): - """Like `mkdir -p` in unix""" - if not path.strip(): - return - try: - os.makedirs(path) - except OSError as e: - if e.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def rm_f(path): - """Like `rm -f` in unix""" - try: - os.unlink(path) - except OSError as e: - if e.errno == errno.ENOENT: - pass - else: - raise - - -def update_file(path, data): - """Writes data to path, creating path if it doesn't exist""" - # delete file if already exists - rm_f(path) - - # create parent dirs if needed - parent_dir = os.path.dirname(path) - if not os.path.isdir(os.path.dirname(parent_dir)): - mkdir_p(parent_dir) - - # write file - with open(path, 'wb') as f: - f.write(data) diff --git a/utils/version.py b/utils/version.py index a2a5f21..f50ea96 100644 --- a/utils/version.py +++ b/utils/version.py @@ -4,7 +4,7 @@ Version utilities -Copyright 2013-2017 Chaim-Leib Halbert +Copyright 2013-2018 Chaim Leib Halbert Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License.