Merge branch 'release/0.4.0'

cp2k · Jul 19, 2021 · b7c9950 · b7c9950
2 parents d4ee83c + db036e2
commit b7c9950
Show file tree

Hide file tree

Showing 52 changed files with 911 additions and 201 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+version: 2
+updates:
+- package-ecosystem: pip
+  directory: "/"
+  schedule:
+    interval: daily
+  open-pull-requests-limit: 10
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
@@ -10,7 +10,7 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - uses: actions/setup-python@v1
-    - uses: dschep/install-poetry-action@v1.3
+    - uses: actions/setup-python@v2
+    - uses: snok/install-poetry@v1.1.2
     - run: poetry config pypi-token.pypi ${{ secrets.PYPI_TOKEN }}
     - run: poetry publish --build
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -6,11 +6,5 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
-    - uses: actions/setup-python@v1
-    - name: set PY
-      run: echo "::set-env name=PY::$(python --version --version | sha256sum | cut -d' ' -f1)"
-    - uses: actions/cache@v1
-      with:
-        path: ~/.cache/pre-commit
-        key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
-    - uses: pre-commit/[email protected]
+    - uses: actions/setup-python@v2
+    - uses: pre-commit/[email protected]
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,30 +1,30 @@
-name: tests
+name: Run Tests
 on: [push, pull_request]
 
 jobs:
   pytest:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8, pypy3]
+        python-version: [3.6, 3.7, 3.8, 3.9, pypy3]
 
     steps:
     - uses: actions/checkout@v2
 
-    - name: Set up python
-      uses: actions/setup-python@v1
+    - name: Set up python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
         architecture: x64
 
     - name: Install Poetry
-      uses: dschep/install-poetry-action@v1.3
+      uses: snok/install-poetry@v1.1.2
 
-    - name: Install Dependencies
+    - name: Install dependencies
       run: poetry install -E yaml
 
     - name: Run pytest
-      run: poetry run pytest --cov-report xml --cov-report term-missing --cov-append --cov cp2k_output_tools tests/
+      run: poetry run pytest --cov-report=xml --cov-report=term-missing --cov-append --cov=cp2k_output_tools tests/
 
     - uses: codecov/codecov-action@v1
       with:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,16 +2,24 @@
 # # Install pre-commit hooks via
 # pre-commit install
 
-default_language_version:
-  python: python3
-
 repos:
+- repo: https://github.com/pycqa/isort
+  rev: 5.9.1
+  hooks:
+  - id: isort
+    name: isort (python)
+  - id: isort
+    name: isort (cython)
+    types: [cython]
+  - id: isort
+    name: isort (pyi)
+    types: [pyi]
 - repo: https://github.com/ambv/black
-  rev: stable
+  rev: 21.6b0
   hooks:
   - id: black
 - repo: https://gitlab.com/pycqa/flake8
-  rev: 3.7.0
+  rev: 3.9.2
   hooks:
   - id: flake8
     additional_dependencies: ["flake8-bugbear"]
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,12 @@
+## [0.4.0] - 2021-07-19
+
+* cp2kparse/api: can now parse forces
+* cp2kparse: gained a highlight mode to show what gets matched
+* cp2kparse: moved to click for providing the CLI to improve UX
+* cp2kparse/api: fix bug with parsing line continued kv
+* api: introduced BlockMatch to provide more info about the match (like the char span)
+* cp2k_bs2csv: generate files in CWD rather than in source dir
+
 ## [0.3.1] - 2020-09-09
 
 * cp2k_pdos: add support for list-of-atoms output

diff --git a/README.md b/README.md
@@ -1,18 +1,21 @@
 # cp2k-output-tools
 
-[![Build Status](https://github.com/cp2k/cp2k-output-tools/workflows/tests/badge.svg)](https://github.com/cp2k/cp2k-output-tools/actions) [![codecov](https://codecov.io/gh/cp2k/cp2k-output-tools/branch/develop/graph/badge.svg)](https://codecov.io/gh/cp2k/cp2k-output-tools) [![PyPI](https://img.shields.io/pypi/pyversions/cp2k-output-tools)](https://pypi.org/project/cp2k-output-tools/)
+[![Build Status](https://github.com/cp2k/cp2k-output-tools/actions/workflows/test.yml/badge.svg)](https://github.com/cp2k/cp2k-output-tools/actions) [![codecov](https://codecov.io/gh/cp2k/cp2k-output-tools/branch/develop/graph/badge.svg)](https://codecov.io/gh/cp2k/cp2k-output-tools) [![PyPI](https://img.shields.io/pypi/pyversions/cp2k-output-tools)](https://pypi.org/project/cp2k-output-tools/)
 
 Modular CP2K output file parsers, mostly in the form of regular expressions plus other tools to mangle various CP2K output:
 
-  * `cp2kparse` ... parse CP2K output files (for restart & input files look at the [cp2k-input-tools](https://github.com/cp2k/cp2k-input-tools) project)
-  * `xyz_restart_parser` ... when restarts occur during an MD you may end up with duplicated frames in the trajectory, this tool filters them
+  * `cp2kparse` ... parse CP2K output (for restart & input files look at the [cp2k-input-tools](https://github.com/cp2k/cp2k-input-tools) project) and allow easy selection of common values.
+  * `xyz_restart_parser` ... when restarts occur during an MD you may end up with duplicated frames in the trajectory, this tool filters them (and can easily handle huge files)
   * `cp2k_bs2csv` ... convert a CP2K band structure file to multiple (one-per-set) CSV files for easier plotting. There is also an API available if you need to import bandstructure data into your application.
-  * `cp2k_pdos` ... bring CP2Ks PDOS dump into a more CSV-like form for easier plotting/parsing
+  * `cp2k_pdos` ... apply a convolution with Gaussians on a regular grid on the CP2K PDOS output and generate a CSV file for further processing or plotting. The same grid is used for all input files with the min/max of the grid automatically determined, but no summation of the different projections is done.
 
 ## Requirements
 
 * Python 3.6+
-* regex 2020+
+* regex 2021+
+* click 8+
+* numpy 1.19+
+* optional: ruamel.yaml
 
 For development: https://poetry.eustace.io/ https://pytest.org/
 
@@ -23,18 +26,21 @@ There is a simple command-line interface `cp2kparse`:
 
 ```console
 $ cp2kparse --help
-usage: cp2kparse [-h] [-y] [-k <path>] [<file>]
+Usage: cp2kparse [OPTIONS] [FILE|-]
 
-Parse the CP2K output file and return a JSON
+  Parse the CP2K output FILE and return a structured output
 
-positional arguments:
-  <file>                CP2K output file, stdin if not specified
-
-optional arguments:
-  -h, --help            show this help message and exit
-  -y, --yaml            output yaml instead of json
-  -k <path>, --key <path>
-                        Path, ex.: 'energies/total force_eval'
+Options:
+  -f, --format [json|yaml|highlight]
+                                  Output format (json or yaml are structure
+                                  formats, highlight shows which lines of the
+                                  output have been matched)
+  --color [auto|always]           When to colorize output
+  -s, --safe-keys                 generate 'safe' key names (e.g. without
+                                  spaces, dashes, ..)
+  -S, --statistics                print some statistics to stderr
+  -k, --key <PATH>                Path, ex.: 'energies/total force_eval'
+  --help                          Show this message and exit.
 
 $ cp2kparse calc.out
 {
@@ -506,6 +512,41 @@ found restart point @1, dropping 1 frames, flushing 1
 flushing remaining 2 frames
 ```
 
+## Usage: cp2k_bs2csv
+
+Given a `.bs` file as written by CP2K the script will generate CSV files with the same name and suffixes `.set-X.csv` in the same directory as the source file:
+
+```console
+$ cp2k_bs2csv WO3.bs
+writing point set WO3.bs.set-1.csv (total number of k-points: 11)
+with the following special points:
+     GAMMA: 0.00000000 / 0.00000000 / 0.00000000
+         X: 0.00000000 / 0.50000000 / 0.00000000
+writing point set WO3.bs.set-2.csv (total number of k-points: 11)
+with the following special points:
+         X: 0.00000000 / 0.50000000 / 0.00000000
+         M: 0.50000000 / 0.50000000 / 0.00000000
+writing point set WO3.bsset-3.csv (total number of k-points: 11)
+with the following special points:
+         M: 0.50000000 / 0.50000000 / 0.00000000
+     GAMMA: 0.00000000 / 0.00000000 / 0.00000000
+writing point set WO3.bs.set-4.csv (total number of k-points: 11)
+with the following special points:
+     GAMMA: 0.00000000 / 0.00000000 / 0.00000000
+         R: 0.50000000 / 0.50000000 / 0.50000000
+writing point set WO3.bs.set-5.csv (total number of k-points: 11)
+with the following special points:
+         R: 0.50000000 / 0.50000000 / 0.50000000
+         X: 0.00000000 / 0.50000000 / 0.00000000
+writing point set WO3.bs.set-6.csv (total number of k-points: 11)
+with the following special points:
+         R: 0.50000000 / 0.50000000 / 0.50000000
+         M: 0.50000000 / 0.50000000 / 0.00000000
+$ ls
+WO3.cp2k-8.bs            WO3.cp2k-8.bs.set-2.csv  WO3.cp2k-8.bs.set-4.csv  WO3.cp2k-8.bs.set-6.csv
+WO3.cp2k-8.bs.set-1.csv  WO3.cp2k-8.bs.set-3.csv  WO3.cp2k-8.bs.set-5.csv
+```
+
 ## Development
 
 ```console

diff --git a/cp2k_output_tools/__init__.py b/cp2k_output_tools/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.3.1"
+__version__ = "0.4.0"
 
 __all__ = ["builtin_matchers", "parse_iter"]
 

diff --git a/cp2k_output_tools/blocks/__init__.py b/cp2k_output_tools/blocks/__init__.py
@@ -1,8 +1,9 @@
 from .condition_number import match_overlap_matrix_condition_number
+from .energies import match_energies
+from .forces import match_forces
+from .kv_sections import match_kv_sections
 from .mulliken import match_mulliken_population_analysis
 from .program_info import match_program_info
-from .kv_sections import match_kv_sections
-from .energies import match_energies
 from .warnings import match_warnings
 
 builtin_matchers = [
@@ -11,5 +12,6 @@
     match_program_info,
     match_kv_sections,
     match_energies,
+    match_forces,
     match_warnings,
 ]
diff --git a/cp2k_output_tools/blocks/common.py b/cp2k_output_tools/blocks/common.py
@@ -1,3 +1,6 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Tuple, Union
+
 # floating point regex
 FLOAT = r"[\+\-]?(\d*[\.]\d+|\d+[\.]?\d*)([Ee][\+\-]?\d+)?"
 
@@ -18,3 +21,26 @@ def safe_string(string):
         .replace("{", "_")
         .replace("}", "_")
     )
+
+
+@dataclass
+class BlockMatch:
+    data: Dict[str, Any]  # the parsed/structured data
+    spans: List[Union[int, Tuple[int, int]]]  # start and end character indices of the matches
+
+
+def merged_spans(spans: List[Tuple[int, int]]):
+    merged = [(-1, -1)]
+
+    for start, end in sorted(spans):
+        if start > merged[-1][1]:  # if the new start is after the latest end, add a new span
+            merged.append((start, end))
+        else:  # if not, keep the current start and replace its end
+            merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+
+    del merged[0]
+    return merged
+
+
+def span_char_count(spans: List[Tuple[int, int]]):
+    return sum(end - start for start, end in spans)
diff --git a/cp2k_output_tools/blocks/condition_number.py b/cp2k_output_tools/blocks/condition_number.py
@@ -1,6 +1,8 @@
+from typing import Optional
+
 import regex as re
-from .common import FLOAT
 
+from .common import FLOAT, BlockMatch
 
 OVERLAP_MATRIX_CONDITION_NUMBER_RE = re.compile(
     rf"""
@@ -35,33 +37,36 @@
 )
 
 
-def match_overlap_matrix_condition_number(content):
+def match_overlap_matrix_condition_number(content: str) -> Optional[BlockMatch]:
     match = OVERLAP_MATRIX_CONDITION_NUMBER_RE.search(content)
 
     if match is None:
         return None
 
     captures = match.groupdict()
 
-    return {
-        "overlap_matrix_condition_number": {
-            "1-norm (estimate)": {
-                "|A|": float(captures["norm1_estimate_A"]),
-                "|A^-1|": float(captures["norm1_estimate_Ainv"]),
-                "CN": float(captures["norm1_estimate"]),
-                "Log(CN)": float(captures["norm1_estimate_log"]),
-            },
-            "1-norm (using diagonalization)": {
-                "|A|": float(captures["norm1_diag_A"]),
-                "|A^-1|": float(captures["norm1_diag_Ainv"]),
-                "CN": float(captures["norm1_diag"]),
-                "Log(CN)": float(captures["norm1_diag_log"]),
-            },
-            "2-norm (using diagonalization)": {
-                "max EV": float(captures["norm2_diag_max_ev"]),
-                "min EV": float(captures["norm2_diag_min_ev"]),
-                "CN": float(captures["norm2_diag"]),
-                "Log(CN)": float(captures["norm2_diag_log"]),
-            },
-        }
-    }
+    return BlockMatch(
+        {
+            "overlap_matrix_condition_number": {
+                "1-norm (estimate)": {
+                    "|A|": float(captures["norm1_estimate_A"]),
+                    "|A^-1|": float(captures["norm1_estimate_Ainv"]),
+                    "CN": float(captures["norm1_estimate"]),
+                    "Log(CN)": float(captures["norm1_estimate_log"]),
+                },
+                "1-norm (using diagonalization)": {
+                    "|A|": float(captures["norm1_diag_A"]),
+                    "|A^-1|": float(captures["norm1_diag_Ainv"]),
+                    "CN": float(captures["norm1_diag"]),
+                    "Log(CN)": float(captures["norm1_diag_log"]),
+                },
+                "2-norm (using diagonalization)": {
+                    "max EV": float(captures["norm2_diag_max_ev"]),
+                    "min EV": float(captures["norm2_diag_min_ev"]),
+                    "CN": float(captures["norm2_diag"]),
+                    "Log(CN)": float(captures["norm2_diag_log"]),
+                },
+            }
+        },
+        match.spans(0),
+    )
diff --git a/cp2k_output_tools/blocks/energies.py b/cp2k_output_tools/blocks/energies.py
@@ -1,6 +1,8 @@
+from typing import Optional
+
 import regex as re
 
-from .common import FLOAT
+from .common import FLOAT, BlockMatch
 
 FORCE_EVAL_ENERGY_RE = re.compile(
     rf"""
@@ -10,10 +12,10 @@
 )
 
 
-def match_energies(content):
+def match_energies(content: str) -> Optional[BlockMatch]:
     match = FORCE_EVAL_ENERGY_RE.search(content)
 
     if not match:
         return None
 
-    return {"energies": {"total force_eval": float(match["value"])}}
+    return BlockMatch({"energies": {"total force_eval": float(match["value"])}}, match.spans(0))