Run formatting precommit hooks on all files. (#59)

Co-authored-by: Katie Doroschak <[email protected]>
uwmisl · May 1, 2020 · b73b334 · b73b334
1 parent 5c211e1
commit b73b334
Show file tree

Hide file tree

Showing 11 changed files with 414 additions and 231 deletions.
diff --git a/.github/workflows/build_without_artifacts.yml b/.github/workflows/build_without_artifacts.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Install Nix
       uses: cachix/install-nix-action@v8
     # Runs a set of commands using the runners shell
-    - name: Build application 
+    - name: Build application
       shell: bash
-      run: | 
+      run: |
            nix-build -A app
diff --git a/.gitignore b/.gitignore
@@ -107,7 +107,7 @@ venv.bak/
 
 # Nix
 #  'result' is generated by the nix-build command
-result
+result/
 
 # Visual Studio Code
 .vscode/

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
     -   id: isort
         name: Sort imports
         always_run: true
-        args: [-]
+        args: [--multi-line=3, --trailing-comma, --force-grid-wrap=0, --use-parentheses, --line-width=99]
 
     #####################################
     #     Strips trailing whitespace    #

diff --git a/Readme.md b/Readme.md
@@ -1,4 +1,4 @@
-Poretitioner 
+Poretitioner
 =============
 ![CI](https://github.com/uwmisl/poretitioner/workflows/CI/badge.svg?branch=master)
 
@@ -8,7 +8,7 @@ Poretitioner is an application for reading reporter proteins from nanopore senso
 
 ## Getting Started: Users  
 
-Here's how to use Poretitioner out of the box. 
+Here's how to use Poretitioner out of the box.
 
 - Download [Docker](https://www.docker.com/), if you don't have it already.
 
@@ -27,11 +27,11 @@ docker run poretitioner:latest
 ```
 
 
-## Getting Started: Developers 
+## Getting Started: Developers
 
 If you're interested in contributing to Poretitioner, here's how to get started:
 
-- Clone the project 
+- Clone the project
 
 ```
 git clone https://github.com/uwmisl/poretitioner.git
@@ -40,45 +40,44 @@ git clone https://github.com/uwmisl/poretitioner.git
 - Navigate to the repository `cd poretitioner`
 
 - Run `bash ./bootstrap_dev.sh`
-   - This will set you up with Nix (our package manager) and all other developer dependencies needed to build and contribute to the project 
-
-- You're all set! 
+   - This will set you up with Nix (our package manager) and all other developer dependencies needed to build and contribute to the project
 
-### Build the application 
+- You're all set!
+
+### Build the application
 
 - Run `nix-build -A app`
 - The poretitioner binary now exists at `./result/bin/poretitioner`
-- Run 
+- Run
 
 ```
 ./result/bin/poretitioner
 ```
 
-### Build a docker image of the application 
-Docker images can only be built on Linux machines. 
+### Build a docker image of the application
+Docker images can only be built on Linux machines.
 
-- Run 
+- Run
 
 ```docker_image=$(nix-build -A docker)```
 
-- The environment variable `docker_image` now contains a path to the docker image, copy this file wherever you need it. 
+- The environment variable `docker_image` now contains a path to the docker image, copy this file wherever you need it.
 
 ```docker load < ${docker_image}```
 
 
-### Uninstall 
+### Uninstall
 
 To uninstall Nix and this project's dependencies (for example, if you want to wipe your workspace totally clean and start over), run
 
 ```
-./boostrap_dev uninstall 
+./boostrap_dev uninstall
 ```
 
-On Mac OS 10.15 (Catalina) and up, this will require some additional steps that the script will elaborate on. 
+On Mac OS 10.15 (Catalina) and up, this will require some additional steps that the script will elaborate on.
 
 
-# How it works 
+# How it works
 
 The full paper, [*Multiplexed direct detection of barcoded protein reporters on a nanopore array*](https://www.biorxiv.org/content/10.1101/837542v1), describes the application in detail. Here's a summary in GIF form:
 ![](NTER_gif_1_sm.gif)
-
diff --git a/poretitioner/utils/classify.py b/poretitioner/utils/classify.py
@@ -9,17 +9,20 @@
 import logging
 import os
 import warnings
+
 import h5py
 import joblib
 import numpy as np
 import pandas as pd
 import torch
 import torch.nn as nn
-from .NTERs_trained_cnn_05152019 import load_cnn
 
 from . import raw_signal_utils
+from .NTERs_trained_cnn_05152019 import load_cnn
 
-warnings.filterwarnings("ignore")  # TODO : Why is this here? : https://github.com/uwmisl/poretitioner/issues/48
+warnings.filterwarnings(
+    "ignore"
+)  # TODO : Why is this here? : https://github.com/uwmisl/poretitioner/issues/48
 use_cuda = True
 
 

diff --git a/poretitioner/utils/quantify.py b/poretitioner/utils/quantify.py
@@ -10,9 +10,11 @@
 import logging
 import os
 import re
+
 import h5py
 import numpy as np
 import pandas as pd
+
 from .raw_signal_utils import find_segments_below_threshold
 from .yaml_assistant import YAMLAssistant
 
@@ -64,7 +66,9 @@ def get_related_files(input_file, raw_file_dir="", capture_file_dir=""):
     if input_file.endswith(".csv"):
         # Given file is the filtered file and we're looking for the capture file
         filtered_file = input_file
-        capture_file = [x for x in os.listdir(capture_file_dir) if x.endswith(run_name + ".pkl")][0]
+        capture_file = [x for x in os.listdir(capture_file_dir) if x.endswith(run_name + ".pkl")][
+            0
+        ]
     elif input_file.endswith(".pkl"):
         # Given file is the capture file and filtered file is unspecified
         capture_file = input_file
@@ -90,7 +94,7 @@ def get_overlapping_regions(window, regions):
     Incomplete overlaps are returned.
 
     # TODO move to raw_signal_utils -- general purpose signal fn not specific to quant
-    
+
     Parameters
     ----------
     window : tuple of numerics (start, end)
@@ -170,29 +174,27 @@ def calc_time_until_capture(capture_windows, captures, blockages=None):
         # Get all the captures & blockages within that window
         captures_in_window = get_overlapping_regions(capture_window, captures)
         if blockages is not None:
-            blockages_in_window = get_overlapping_regions(capture_window,
-                                                          blockages)
+            blockages_in_window = get_overlapping_regions(capture_window, blockages)
         else:
             blockages_in_window = []
         # If there are no captures in the window, add the window to the elapsed
         # time and subtract any blockages.
         if len(captures_in_window) == 0:
             elapsed_time_until_capture += capture_window[1] - capture_window[0]
             for blockage in blockages_in_window:
-                elapsed_time_until_capture -= (blockage[1] - blockage[0])
+                elapsed_time_until_capture -= blockage[1] - blockage[0]
             continue
         # If there's a capture in the window, add the partial window to the
         # elapsed time. Subtract blockages that came before the capture.
         else:
             last_capture_end = capture_window[0]
             for capture_i, capture in enumerate(captures_in_window):
-                elapsed_time_until_capture += (capture[0] - last_capture_end)
+                elapsed_time_until_capture += capture[0] - last_capture_end
                 for blockage in blockages_in_window:
                     # Blockage must start after the last capture ended and
                     # finish before the next capture starts; otherwise skip
-                    if blockage[0] >= last_capture_end \
-                            and blockage[1] < capture[0]:
-                        elapsed_time_until_capture -= (blockage[1] - blockage[0])
+                    if blockage[0] >= last_capture_end and blockage[1] < capture[0]:
+                        elapsed_time_until_capture -= blockage[1] - blockage[0]
                         blockages.pop(0)
                 all_capture_times.append(elapsed_time_until_capture)
                 # Reset elapsed time.
@@ -331,9 +333,8 @@ def get_time_between_captures(
                 if not channel_captures.empty and not captures_segment.empty:
 
                     time_until_capture = calc_time_until_capture(
-                        voltage_changes_segment,
-                        captures_segment,
-                        blockages=blockages)
+                        voltage_changes_segment, captures_segment, blockages=blockages
+                    )
                     # Add time since channel's last capture from previous
                     # tsegs to time until first capture in current tseg
                     time_until_capture[0] += time_elapsed[i]
@@ -346,8 +347,7 @@ def get_time_between_captures(
                         if voltage_changes_segment[voltage_ix][0] > captures_segment[-1].end_obs:
                             time_elapsed[i] += np.sum(
                                 calc_time_until_capture(
-                                    voltage_changes_segment[voltage_ix:],
-                                    blockages
+                                    voltage_changes_segment[voltage_ix:], blockages
                                 )
                             )
                             break
@@ -483,7 +483,7 @@ def get_capture_freq(
                     )
                 else:
                     capture_counts.append(0)
-            all_capture_freq.append(np.mean(capture_counts) / (time_segments[0] / 600000.0))
+            all_capture_freq.append(np.mean(capture_counts) / (time_segments[0] / 600_000.0))
             checkpoint = end_voltage_seg
         else:
             logger.warn(

diff --git a/poretitioner/utils/raw_signal_utils.py b/poretitioner/utils/raw_signal_utils.py
@@ -9,11 +9,11 @@
 """
 
 import re
+
 import h5py
 import numpy as np
 
 
-
 def natkey(string_):
     """Natural sorting key -- sort strings containing numerics so numerical
     ordering is honored/preserved.
@@ -55,8 +55,9 @@ def compute_fractional_blockage(scaled_raw, open_channel):
     return scaled_raw
 
 
-def get_fractional_blockage(f5, channel_no, start=None, end=None,
-                            open_channel_guess=220, open_channel_bound=15):
+def get_fractional_blockage(
+    f5, channel_no, start=None, end=None, open_channel_guess=220, open_channel_bound=15
+):
     """Retrieve the scaled raw signal for the channel, compute the open pore
     current, and return the fractional blockage for that channel.
 
@@ -108,7 +109,9 @@ def get_local_fractional_blockage(
     for start in range(0, len(signal), local_window_sz):
         end = start + local_window_sz
         local_chunk = signal[start:end]
-        local_open_channel = find_open_channel_current(local_chunk, open_channel, bound=open_channel_bound)
+        local_open_channel = find_open_channel_current(
+            local_chunk, open_channel, bound=open_channel_bound
+        )
         if local_open_channel is None:
             local_open_channel = open_channel
         frac[start:end] = compute_fractional_blockage(local_chunk, local_open_channel)
@@ -350,7 +353,7 @@ def find_signal_off_regions(raw, window_sz=200, slide=100, current_range=50):
     """
     off = []
     for start in range(0, len(raw), slide):
-        window_mean = np.mean(raw[start: start + window_sz])
+        window_mean = np.mean(raw[start : start + window_sz])
         if window_mean < np.abs(current_range) and window_mean > -np.abs(current_range):
             off.append(True)
         else:
@@ -391,8 +394,7 @@ def find_segments_below_threshold(time_series, threshold):
         Each item in the list represents the (start, end) points of regions
         where the input array drops at or below the threshold.
     """
-    diff_points = np.where(np.abs(np.diff(
-        np.where(time_series <= threshold, 1, 0))) == 1)[0]
+    diff_points = np.where(np.abs(np.diff(np.where(time_series <= threshold, 1, 0))) == 1)[0]
     if time_series[0] <= threshold:
         diff_points = np.hstack([[0], diff_points])
     if time_series[-1] <= threshold: