Merge pull request #16 from chrisburr/add-docs

Add docs and auto expression type detection
scikit-hep · Feb 27, 2018 · 588e793 · 588e793
2 parents 8ad8b2c + 878f95f
commit 588e793
Show file tree

Hide file tree

Showing 9 changed files with 144 additions and 19 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -21,6 +21,7 @@ install:
   - pip install coveralls
   - pip install pytest
   - pip install pytest-cov
+  - pip install numexpr
   - pip install -e .
 
 script:

diff --git a/README.rst b/README.rst
@@ -1,6 +1,3 @@
-🚧 **Formulate is currently under construction and not yet
-ready for production use! Expect things to break!** 🚧
-
 Formulate
 =========
 
@@ -29,7 +26,7 @@ Install formulate like any other Python package:
 
     pip install --user formulate
 
-or similar (use ``sudo``, ``virtualenv``, or ``conda`` if you wish).
+or similar (use ```sudo``, ```virtualenv``, or ```conda``` if you wish).
 
 
 Usage
@@ -62,10 +59,64 @@ Command line usage
 API
 """
 
-**TODO**
+The most basic usage involves calling ``from_$BACKEND`` and then ``to_$BACKEND``, for example when starting with a ROOT style expression:
+
+.. code-block:: python
+
+    >>> import formulate
+    >>> momentum = formulate.from_root('TMath::Sqrt(X_PX**2 + X_PY**2 + X_PZ**2)')
+    >>> momentum
+    Expression<SQRT>(Expression<ADD>(Expression<POW>(Variable(X_PX), UnnamedConstant(2)), Expression<POW>(Variable(X_PY),
+    UnnamedConstant(2)), Expression<POW>(Variable(X_PZ), UnnamedConstant(2))))
+    >>> momentum.to_numexpr()
+    'sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
+    >>> momentum.to_root()
+    'TMath::Sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
+
+Similarly, when starting with a ``numexpr`` style expression:
+
+.. code-block:: python
+
+    >>> my_selection = formulate.from_numexpr('X_PT > 5 & (Mu_NHits > 3 | Mu_PT > 10)')
+    >>> my_selection.to_root()
+    '(X_PT > 5) && ((Mu_NHits > 3) || (Mu_PT > 10))'
+    >>> my_selection.to_numexpr()
+    '(X_PT > 5) & ((Mu_NHits > 3) | (Mu_PT > 10))'
+
+If the the type of expression isn't known in advance ``formulate`` can also auto detect it:
+
+.. code-block:: python
+
+    >>> my_sum = formulate.from_auto('True + False')
+    >>> my_sum.to_root()
+    'true + false'
+    >>> my_sum.to_numexpr()
+    'True + False'
+
+
+The ``Expression`` Object
+"""""""""""""""""""""""""
+
+When calling ``from_*`` the returned object is derived from ``formulate.ExpressionComponent``. From this object you can inspect the expression to find it's dependencies:
+
+.. code-block:: python
+
+    >>> my_check = formulate.from_auto('(X_THETA*TMath::DegToRad() > pi/4) && D_PE > 9.2')
+    >>> my_check.variables
+    {'D_PE', 'X_THETA'}
+    >>> my_check.named_constants
+    {'DEG2RAD', 'PI'}
+    >>> my_check.unnamed_constants
+    {'4', '9.2'}
+
+Additionally ``ExpressionComponent`` s can be combined using both operators and ``numpy`` functions:
+
+.. code-block:: python
 
+    >>> new_selection = (momentum > 100) and (my_check or (numpy.sqrt(my_sum) < 1))
+    >>> new_selection.to_numexpr()
+    '((X_THETA * 0.017453292519943295) > (3.141592653589793 / 4)) & (D_PE > 9.2)'
 
-The `Expression` Object
------------------------
+As the ``==`` operator returns a new expression, it can't be used to check for equality. Instead the ``.equivalent`` method should be used:
 
-**TODO**
+**TODO: Implement this using** ``expression.equivalent`` **!**
diff --git a/formulate/__init__.py b/formulate/__init__.py
@@ -5,7 +5,7 @@
 
 from pyparsing import ParserElement
 
-from .backends import from_numexpr, to_numexpr
+from .backends import from_auto, from_numexpr, to_numexpr
 from .backends import from_root, to_root
 from .expression import ExpressionComponent, SingleComponent, Expression, Variable, NamedConstant, UnnamedConstant
 from .parser import ParsingException
@@ -20,6 +20,7 @@
     'NamedConstant',
     'UnnamedConstant',
     'ParsingException',
+    'from_auto',
     # numexpr
     'from_numexpr',
     'to_numexpr',

diff --git a/formulate/backends/__init__.py b/formulate/backends/__init__.py
@@ -3,19 +3,46 @@
 from __future__ import division
 from __future__ import print_function
 
+import re
+
+from ..parser import ParsingException
 from .numexpr import numexpr_parser
 from .ROOT import root_parser
 
 
 __all__ = [
+    'from_auto',
     'from_numexpr',
     'to_numexpr',
     'from_root',
     'to_root',
 ]
 
+
 from_numexpr = numexpr_parser.to_expression
 to_numexpr = numexpr_parser.to_string
 
 from_root = root_parser.to_expression
 to_root = root_parser.to_string
+
+
+def from_auto(string):
+    # Intelligently detect which kind of string is passed
+    if any(x in string for x in ['&&', '||', 'TMath::', 'true', 'false']):
+        return from_root(string)
+    elif (re.findall(r'([^\&]\&[^\&])|([^\|]\|[^\|])', string) or
+          'True' in string or 'False' in string):
+        return from_numexpr(string)
+
+    # Intelligently detecting failed so fall back to brute force
+    try:
+        return from_root(string)
+    except ParsingException:
+        pass
+
+    try:
+        return from_numexpr(string)
+    except ParsingException:
+        pass
+
+    raise ParsingException('No available backend which can parse: '+string)
diff --git a/formulate/expression.py b/formulate/expression.py
@@ -3,6 +3,8 @@
 from __future__ import division
 from __future__ import print_function
 
+import numbers
+
 from .identifiers import IDs
 from .logging import add_logging
 
@@ -18,6 +20,14 @@
 
 
 class ExpressionComponent(object):
+    def to_numexpr(self, *args, **kwargs):
+        from .backends.numexpr import numexpr_parser
+        return numexpr_parser.to_string(self, *args, **kwargs)
+
+    def to_root(self, *args, **kwargs):
+        from .backends.ROOT import root_parser
+        return root_parser.to_string(self, *args, **kwargs)
+
     # Binary arithmetic operators
     def __add__(self, value):
         return Expression(IDs.ADD, self, value)
@@ -209,9 +219,18 @@ class SingleComponent(ExpressionComponent):
 
 class Expression(ExpressionComponent):
     def __init__(self, id, *args):
-        assert all(isinstance(arg, ExpressionComponent) for arg in args)
+        checked_args = []
+        for arg in args:
+            if isinstance(arg, numbers.Number):
+                checked_args.append(UnnamedConstant(str(arg)))
+            elif isinstance(arg, str):
+                checked_args.append(Variable(str(arg)))
+            elif isinstance(arg, ExpressionComponent):
+                checked_args.append(arg)
+            else:
+                raise ValueError(repr(arg)+' is not a valid type')
         self._id = id
-        self._args = args
+        self._args = checked_args
 
     def __repr__(self):
         try:

diff --git a/formulate/parser.py b/formulate/parser.py
@@ -204,10 +204,7 @@ def precedence(self):
     def to_string(self, expression, config, constants):
         args = []
         for arg in expression.args:
-            if isinstance(arg, Expression):
-                arg = arg.to_string(config, constants)
-            else:
-                arg = str(arg)
+            arg = arg.to_string(config, constants)
             args.append(arg)
 
         if self._rhs_only:

diff --git a/formulate/version.py b/formulate/version.py
@@ -11,6 +11,6 @@
 ]
 
 
-__version__ = '0.0.6'
+__version__ = '0.0.7'
 version = __version__
 version_info = __version__.split('.')
diff --git a/setup.py b/setup.py
@@ -33,9 +33,9 @@ def get_version():
       test_suite='tests',
       install_requires=['numpy', 'pyparsing>=2.1.9', 'colorlog', 'aenum', 'scipy'],
       setup_requires=['pytest-runner'],
-      tests_require=['pytest'],
+      tests_require=['pytest', 'numexpr'],
       classifiers=[
-          'Development Status :: 2 - Pre-Alpha',
+          'Development Status :: 3 - Alpha',
           'Intended Audience :: Developers',
           'Intended Audience :: Information Technology',
           'Intended Audience :: Science/Research',

diff --git a/tests/backends/test_backends.py b/tests/backends/test_backends.py
@@ -3,7 +3,11 @@
 from __future__ import division
 from __future__ import print_function
 
-from formulate import from_numexpr, to_numexpr, from_root, to_root
+import pytest
+import numpy as np
+import numexpr
+
+from formulate import from_auto, from_numexpr, to_numexpr, from_root, to_root
 
 from ..utils import assert_equal_expressions
 
@@ -15,6 +19,8 @@ def test():
         assert_equal_expressions(root_expression, numexpr_expression)
         assert to_numexpr(root_expression) == to_numexpr(numexpr_expression)
         assert to_root(root_expression) == to_root(numexpr_expression)
+        assert root_expression.to_numexpr() == numexpr_expression.to_numexpr()
+        assert root_expression.to_root() == numexpr_expression.to_root()
 
     return test
 
@@ -27,3 +33,26 @@ def test():
 test_006 = do_checks('A & B & C & D', 'A && B && C && D')
 test_007 = do_checks('A & B | C & D', 'A && B || C && D')
 test_008 = do_checks('A & ~B | C & D', 'A && !B || C && D')
+
+
+def test_readme():
+    momentum = from_root('TMath::Sqrt(X_PX**2 + X_PY**2 + X_PZ**2)')
+    assert momentum.to_numexpr() == 'sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
+    assert momentum.to_root() == 'TMath::Sqrt(((X_PX ** 2) + (X_PY ** 2) + (X_PZ ** 2)))'
+    my_selection = from_numexpr('X_PT > 5 & (Mu_NHits > 3 | Mu_PT > 10)')
+    assert my_selection.to_root() == '(X_PT > 5) && ((Mu_NHits > 3) || (Mu_PT > 10))'
+    assert my_selection.to_numexpr() == '(X_PT > 5) & ((Mu_NHits > 3) | (Mu_PT > 10))'
+    my_sum = from_auto('True + False')
+    assert my_sum.to_root() == 'true + false'
+    assert my_sum.to_numexpr() == 'True + False'
+    my_check = from_auto('(X_THETA*TMath::DegToRad() > pi/4) && D_PE > 9.2')
+    assert my_check.variables == {'D_PE', 'X_THETA'}
+    assert my_check.named_constants == {'DEG2RAD', 'PI'}
+    assert my_check.unnamed_constants == {'4', '9.2'}
+    new_selection = (momentum > 100) and (my_check or (np.sqrt(my_sum) < 1))
+
+    def numexpr_eval(string):
+        return numexpr.evaluate(string, local_dict=dict(X_THETA=1234, D_PE=678))
+
+    assert pytest.approx(numexpr_eval(new_selection.to_numexpr()),
+                         numexpr_eval('((X_THETA * 0.017453292519943295) > (3.141592653589793 / 4)) & (D_PE > 9.2)'))