From 4ff0b9f2ca44eacbe673b15cb554c14bc5064f86 Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:21:19 -0500
Subject: [PATCH 1/6] Add C implementation of fast_median

---
 examples/lco_com/cosmics.ipynb                | 185 +----
 python/cextern/README.txt                     |   8 +
 python/cextern/fast_median/fast_median.py     |  87 +++
 python/cextern/fast_median/src/Makefile       |  14 +
 .../cextern/fast_median/src/fast_median.cpp   | 669 ++++++++++++++++++
 .../cextern/fast_median/src/fast_median.hpp   |  27 +
 python/cextern/fast_median/test/testfilter.py |  15 +
 .../cextern/fast_median/test/testfilter2.py   |  46 ++
 .../cextern/fast_median/test/testfilter3.py   |  51 ++
 python/lvmdrp/core/image.py                   |   6 +-
 10 files changed, 937 insertions(+), 171 deletions(-)
 create mode 100644 python/cextern/README.txt
 create mode 100644 python/cextern/fast_median/fast_median.py
 create mode 100644 python/cextern/fast_median/src/Makefile
 create mode 100644 python/cextern/fast_median/src/fast_median.cpp
 create mode 100644 python/cextern/fast_median/src/fast_median.hpp
 create mode 100644 python/cextern/fast_median/test/testfilter.py
 create mode 100644 python/cextern/fast_median/test/testfilter2.py
 create mode 100644 python/cextern/fast_median/test/testfilter3.py
diff --git a/examples/lco_com/cosmics.ipynb b/examples/lco_com/cosmics.ipynb
index 3d4ced4c..0042e861 100644
--- a/examples/lco_com/cosmics.ipynb
+++ b/examples/lco_com/cosmics.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "id": "d8de6690",
    "metadata": {},
    "outputs": [],
@@ -26,28 +26,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "id": "57777075",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\u001b[0;31m[ERROR]: \u001b[0mTraceback (most recent call last):\n",
-      "  File \u001b[36m\"/opt/miniconda/envs/lvmdrp/lib/python3.12/site-packages/IPython/core/interactiveshell.py\"\u001b[39;49;00m, line \u001b[34m3577\u001b[39;49;00m, in run_code\u001b[37m\u001b[39;49;00m\n",
-      "\u001b[37m    \u001b[39;49;00mexec(code_obj, \u001b[36mself\u001b[39;49;00m.user_global_ns, \u001b[36mself\u001b[39;49;00m.user_ns)\u001b[37m\u001b[39;49;00m\n",
-      "  File \u001b[36m\"/var/folders/31/fxk1ql6s5bx7q3kh6kwpf8v8c5vp86/T/ipykernel_46348/3959995733.py\"\u001b[39;49;00m, line \u001b[34m3\u001b[39;49;00m, in <module>\u001b[37m\u001b[39;49;00m\n",
-      "\u001b[37m    \u001b[39;49;00m\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mlvmdrp\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mcore\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mimage\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m reject_cosmics\u001b[37m\u001b[39;49;00m\n",
-      "\u001b[91mImportError\u001b[39;49;00m: cannot import name 'reject_cosmics' from 'lvmdrp.core.image' (/Users/droryn/prog/lvm/lvmdrp/python/lvmdrp/core/image.py)\u001b[37m\u001b[39;49;00m\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from lvmdrp.functions import imageMethod as image_tasks\n",
     "from lvmdrp.main import get_master_mjd\n",
-    "from lvmdrp.core.image import reject_cosmics\n",
+    "from lvmdrp.utils.timer import Timer\n",
     "\n",
     "MJD = 60356\n",
     "\n",
@@ -87,10 +73,11 @@
     "        #     continue\n",
     "        \n",
     "        # preprocess frame\n",
-    "        image_tasks.preproc_raw_frame(in_image=sci_path, out_image=psci_path, in_mask=mpixmask_path)\n",
+    "        #image_tasks.preproc_raw_frame(in_image=sci_path, out_image=psci_path, in_mask=mpixmask_path)\n",
     "        \n",
     "        # detrend frame\n",
-    "        image_tasks.detrend_frame(in_image=psci_path, out_image=dsci_path, in_bias=mbias_path, in_dark=mdark_path, in_slitmap=Table(fibermap.data))\n",
+    "        with Timer():\n",
+    "            image_tasks.detrend_frame(in_image=psci_path, out_image=dsci_path, in_bias=mbias_path, in_dark=mdark_path, in_slitmap=Table(fibermap.data))\n",
     "        \n",
     "        # # extract 1d spectra\n",
     "        # image_tasks.extract_spectra(in_image=dsci_path, out_rss=xsci_path, in_trace=mtrace_path, method=\"aperture\", aperture=3)\n",
@@ -104,12 +91,14 @@
     "        # rss_tasks.apply_fiberflat(in_rss=wsci_path, out_rss=wsci_path, in_flat=mflat_path)\n",
     "        \n",
     "        # # list paths for spectrograph combination\n",
-    "        # spec_paths.append(wsci_path)"
+    "        # spec_paths.append(wsci_path)\n",
+    "\n",
+    "f()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": null,
    "id": "5e8bb7a6",
    "metadata": {},
    "outputs": [],
@@ -221,143 +210,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": null,
    "id": "aaa5d5cc",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The line_profiler extension is already loaded. To reload it, use:\n",
-      "  %reload_ext line_profiler\n",
-      "  A value of 1.00 is used for the electron read-out noise\n",
-      "  Start iteration 1\n",
-      "  Total number of detected cosmics: 0 out of 16000000 pixels\n",
-      "  Start iteration 2\n",
-      "  Total number of detected cosmics: 0 out of 16000000 pixels\n",
-      "  Start iteration 3\n",
-      "  Total number of detected cosmics: 0 out of 16000000 pixels\n",
-      "  Start iteration 4\n",
-      "  Total number of detected cosmics: 0 out of 16000000 pixels\n",
-      "  Start iteration 5\n",
-      "  Total number of detected cosmics: 0 out of 16000000 pixels\n",
-      "  Total number after binary closing: 0 pixels\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Timer unit: 1e-09 s\n",
-      "\n",
-      "Total time: 66.9971 s\n",
-      "File: /var/folders/31/fxk1ql6s5bx7q3kh6kwpf8v8c5vp86/T/ipykernel_46348/3630395383.py\n",
-      "Function: reject_cosmics at line 6\n",
-      "\n",
-      "Line #      Hits         Time  Per Hit   % Time  Line Contents\n",
-      "==============================================================\n",
-      "     6                                           def reject_cosmics(image, sigma_det=5, rlim=1.2, iterations=5, fwhm_gauss=[2.0,2.0], replace_box=[5, 5],\n",
-      "     7                                                   replace_error=1e6, increase_radius=0, binary_closure=True,\n",
-      "     8                                                   gain=1.0, rdnoise=1.0, bias=0.0, verbose=False, inplace=True):\n",
-      "     9                                           \n",
-      "    10                                               # convert all parameters to proper type\n",
-      "    11         1       1000.0   1000.0      0.0      sigma_x = fwhm_gauss[0] / 2.354\n",
-      "    12         1          0.0      0.0      0.0      sigma_y = fwhm_gauss[1] / 2.354\n",
-      "    13         1       1000.0   1000.0      0.0      box_x = int(replace_box[0])\n",
-      "    14         1          0.0      0.0      0.0      box_y = int(replace_box[1])\n",
-      "    15                                           \n",
-      "    16                                               # define Laplacian convolution kernal\n",
-      "    17         1      30000.0  30000.0      0.0      LA_kernel = 0.25*numpy.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]], dtype=numpy.float32)\n",
-      "    18                                           \n",
-      "    19                                               # Initiate image instances\n",
-      "    20         1    6159000.0    6e+06      0.0      img_original = Image(data=image)\n",
-      "    21         1    6380000.0    6e+06      0.0      img = Image(data=image)\n",
-      "    22                                           \n",
-      "    23                                               # subtract bias if applicable\n",
-      "    24         1       1000.0   1000.0      0.0      if (bias > 0.0) and verbose:\n",
-      "    25                                                   print(f'Subtract bias level {bias:.2f} from image')\n",
-      "    26         1   13054000.0    1e+07      0.0      img = img - bias\n",
-      "    27         1   11617000.0    1e+07      0.0      img_original = img_original - bias\n",
-      "    28                                           \n",
-      "    29                                               # apply gain factor to data if applicable\n",
-      "    30         1       1000.0   1000.0      0.0      if (gain != 1.0) and verbose:\n",
-      "    31                                                   print('  Convert image from ADUs to electrons using a gain factor of {gain:.2f}')\n",
-      "    32         1   11372000.0    1e+07      0.0      img = img * gain\n",
-      "    33         1   13550000.0    1e+07      0.0      img_original = img_original * gain\n",
-      "    34                                           \n",
-      "    35                                               # compute noise using read-noise value\n",
-      "    36         1       2000.0   2000.0      0.0      if (rdnoise > 0.0) and verbose:\n",
-      "    37         1      44000.0  44000.0      0.0          print(f'  A value of {rdnoise:.2f} is used for the electron read-out noise')\n",
-      "    38         1   15353000.0    2e+07      0.0      img_original._error = numpy.sqrt((numpy.clip(img_original._data, a_min=0.0, a_max=None) + rdnoise**2))\n",
-      "    39                                           \n",
-      "    40         1    1167000.0    1e+06      0.0      select = numpy.zeros(img._dim, dtype=bool)\n",
-      "    41         1    1118000.0    1e+06      0.0      img_original._mask = numpy.zeros(img._dim, dtype=bool)\n",
-      "    42         1    1171000.0    1e+06      0.0      img._mask = numpy.zeros(img._dim, dtype=bool)\n",
-      "    43                                           \n",
-      "    44                                               # start iteration\n",
-      "    45         1          0.0      0.0      0.0      out = img\n",
-      "    46         6       6000.0   1000.0      0.0      for i in range(iterations):\n",
-      "    47         5       5000.0   1000.0      0.0          if verbose:\n",
-      "    48         5     155000.0  31000.0      0.0              print(f'  Start iteration {i+1}')\n",
-      "    49                                           \n",
-      "    50                                                   # create smoothed noise fromimage\n",
-      "    51         5        3e+10    5e+09     38.3          noise = out.medianImg((box_y, box_x))\n",
-      "    52         5    9986000.0    2e+06      0.0          select_neg2 = noise._data <= 0\n",
-      "    53         5    2837000.0 567400.0      0.0          noise.replace_subselect(select_neg2, data=0)\n",
-      "    54         5  129006000.0    3e+07      0.2          noise = (noise + rdnoise ** 2).sqrt()\n",
-      "    55                                           \n",
-      "    56         5  319920000.0    6e+07      0.5          sub = img.subsampleImg()  # subsample image\n",
-      "    57         5 2662706000.0    5e+08      4.0          conv = sub.convolveImg(LA_kernel)  # convolve subsampled image with kernel\n",
-      "    58         5   38342000.0    8e+06      0.1          select_neg = conv < 0\n",
-      "    59         5  830789000.0    2e+08      1.2          conv.replace_subselect(select_neg, data=0)  # replace all negative values with 0\n",
-      "    60         5 2312520000.0    5e+08      3.5          Lap = conv.rebin(2, 2)  # rebin the data to original resolution\n",
-      "    61         5  136499000.0    3e+07      0.2          S = Lap/(noise*2)  # normalize Laplacian image by the noise\n",
-      "    62         5        3e+10    5e+09     37.9          S_prime = S-S.medianImg((5, 5))  # cleaning of the normalized Laplacian image\n",
-      "    63                                           \n",
-      "    64                                                   # Perform additional clean using a 2D Gaussian smoothing kernel\n",
-      "    65         5 1943532000.0    4e+08      2.9          fine = out.convolveGaussImg(sigma_x, sigma_y, mask=True)  # convolve image with a 2D Gaussian\n",
-      "    66         5   71890000.0    1e+07      0.1          fine_norm = out/fine\n",
-      "    67         5   12287000.0    2e+06      0.0          select_neg = fine_norm < 0\n",
-      "    68         5    2786000.0 557200.0      0.0          fine_norm.replace_subselect(select_neg, data=0)\n",
-      "    69         5  314685000.0    6e+07      0.5          sub_norm = fine_norm.subsampleImg()  # subsample image\n",
-      "    70         5 2630482000.0    5e+08      3.9          Lap2 = sub_norm.convolveImg(LA_kernel)\n",
-      "    71         5 2344623000.0    5e+08      3.5          Lap2 = Lap2.rebin(2, 2)  # rebin the data to original resolution\n",
-      "    72                                           \n",
-      "    73         5   27675000.0    6e+06      0.0          select = numpy.logical_or(numpy.logical_and(Lap2 > rlim, S_prime > sigma_det), select)\n",
-      "    74                                           \n",
-      "    75         5       4000.0    800.0      0.0          if verbose:\n",
-      "    76         5       4000.0    800.0      0.0              dim = img_original._dim\n",
-      "    77         5   16537000.0    3e+06      0.0              det_pix = numpy.sum(select)\n",
-      "    78         5     551000.0 110200.0      0.0              print(f'  Total number of detected cosmics: {det_pix} out of {dim[0] * dim[1]} pixels')\n",
-      "    79                                           \n",
-      "    80         5       8000.0   1600.0      0.0          if i == iterations-1:\n",
-      "    81         1     557000.0 557000.0      0.0              img_original.replace_subselect(select, mask=True)  # set the new mask\n",
-      "    82         1          0.0      0.0      0.0              if increase_radius > 0:\n",
-      "    83                                                           mask_img = Image(data=img_original._mask)\n",
-      "    84                                                           mask_new = mask_img.convolveImg(kernel=numpy.ones((2*increase_radius+1, 2*increase_radius+1)))\n",
-      "    85                                                           img_original._mask = mask_new\n",
-      "    86         1          0.0      0.0      0.0              if binary_closure:\n",
-      "    87         1    4083000.0    4e+06      0.0                  bmask = img_original._mask > 0\n",
-      "    88         1     147000.0 147000.0      0.0                  bc_mask = numpy.zeros(bmask.shape, dtype=img_original._mask.dtype)\n",
-      "    89         8       9000.0   1125.0      0.0                  for ang in [20, 45, 70, 90, 110, 135, 160]:\n",
-      "    90                                                               # leave out the dispersion direction (0 degrees), see DESI, Guy et al., ApJ, 2023, 165, 144\n",
-      "    91         7     304000.0  43428.6      0.0                      lse = LinearSelectionElement(11, 11, ang)\n",
-      "    92         7 1809760000.0    3e+08      2.7                      bc_mask = bc_mask | ndimage.binary_closing(bmask, structure=lse.se)\n",
-      "    93         1     279000.0 279000.0      0.0                  img_original._mask = bc_mask\n",
-      "    94         1          0.0      0.0      0.0                  if verbose:\n",
-      "    95         1    3405000.0    3e+06      0.0                      print(f'  Total number after binary closing: {numpy.sum(bc_mask)} pixels')\n",
-      "    96                                           \n",
-      "    97                                                       # replace possible corrput pixel with median for final output\n",
-      "    98         1   54418000.0    5e+07      0.1              out = img_original.replaceMaskMedian(box_x, box_y, replace_error=replace_error)\n",
-      "    99                                                   else:\n",
-      "   100         4    2266000.0 566500.0      0.0              out.replace_subselect(select, mask=True)  # set the new mask\n",
-      "   101         4  167549000.0    4e+07      0.3              out = out.replaceMaskMedian(box_x, box_y, replace_error=None)  # replace possible corrput pixel with median\n",
-      "   102                                           \n",
-      "   103         1       2000.0   2000.0      0.0      return out"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "image = numpy.random.rand(4000,4000)\n",
     "\n",
@@ -367,23 +223,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": null,
    "id": "eebac638",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "319 ms ± 243 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n",
-      "308 ms ± 6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import scipy.ndimage\n",
     "from scipy import signal\n",
-    "image = numpy.random.rand(1000 ,1000)\n",
+    "image = numpy.random.rand(4000 ,4000)\n",
     "%timeit ndimage.median_filter(image, size=(5,5))\n",
     "%timeit signal.medfilt2d(image, kernel_size=(5,5))"
    ]
diff --git a/python/cextern/README.txt b/python/cextern/README.txt
new file mode 100644
index 00000000..90626535
--- /dev/null
+++ b/python/cextern/README.txt
@@ -0,0 +1,8 @@
+C code to be compiled on setup goes here.
+
+fast_median.cpp is a very fast median filter for 1d and 2d ndarrays
+with float or double data. The glue code is in fast_median.py
+
+compile using the Makefile in src/
+
+
diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py
new file mode 100644
index 00000000..8788b7c3
--- /dev/null
+++ b/python/cextern/fast_median/fast_median.py
@@ -0,0 +1,87 @@
+import ctypes
+import numpy
+from numpy.ctypeslib import ndpointer
+from scipy.ndimage import median_filter
+
+HAVE_MEDIAN_SO = False
+try:
+    import platform
+    from os import path, environ
+    s = platform.system()
+    resources_dir = environ.get('LVMDRP_LIB_DIR') or path.join(path.dirname(__file__), 'src')
+    if s=='Linux':
+        resources_dir = path.join(resources_dir, 'filter.so')
+        #print(resources_dir)
+        lib = ctypes.cdll.LoadLibrary(resources_dir)
+    elif s=='Darwin':
+        resources_dir = path.join(resources_dir, 'filter.dylib')
+        #print(resources_dir)
+        lib = ctypes.cdll.LoadLibrary(resources_dir)
+    else:
+        raise Exception('Unknown platform: '+s)
+
+    #template <typename T>
+    #void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out);
+    lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int,
+                                        ctypes.c_int, ctypes.c_int,
+                                        ctypes.c_int,
+                                        ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"),
+                                        ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"))
+    lib.median_filter_2d_float.restype = None
+    lib.median_filter_2d_double.argtypes = (ctypes.c_int, ctypes.c_int,
+                                        ctypes.c_int, ctypes.c_int,
+                                        ctypes.c_int,
+                                        ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
+                                        ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"))
+    lib.median_filter_2d_double.restype = None
+
+    lib.median_filter_1d_float.argtypes = (ctypes.c_int, 
+                                        ctypes.c_int,
+                                        ctypes.c_int,
+                                        ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"),
+                                        ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"))
+    lib.median_filter_1d_float.restype = None
+    lib.median_filter_1d_double.argtypes = (ctypes.c_int,
+                                        ctypes.c_int,
+                                        ctypes.c_int,
+                                        ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"),
+                                        ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"))
+    lib.median_filter_1d_double.restype = None
+
+    HAVE_MEDIAN_SO = True
+except Exception:
+    raise
+    print(Exception)
+    pass
+
+print('HAVE_MEDIAN_SO ',HAVE_MEDIAN_SO)
+
+if HAVE_MEDIAN_SO is True:
+    def fast_median_filter_2d(input, size=None):
+        assert len(input.shape) == 2, 'fast_median_filter_2d requires 2-dimensional input'
+        assert len(size) == 2, 'fast_median_filter_2d requires two element size tuple, list or array'
+        out = numpy.empty(input.shape, dtype=input.dtype)
+        # Note we have to flip the shape and box x/y to match python's indexing
+        if input.dtype==numpy.float32:
+            lib.median_filter_2d_float(input.shape[1], input.shape[0], size[1], size[0], 0, input, out)
+        elif input.dtype==numpy.float64:
+            lib.median_filter_2d_double(input.shape[1], input.shape[0], size[1], size[0], 0, input, out)
+        else:
+            raise TypeError('median_filter_2d not implemented for type '+str(input.dtype))
+        return out
+
+    def fast_median_filter_1d(input, size=None):
+        assert len(input.shape) == 1, 'fast_median_filter_1d requires 1-dimensional input'
+        out = numpy.empty(input.shape, dtype=input.dtype)
+        if input.dtype==numpy.float32:
+            lib.median_filter_1d_float(input.shape[0], size, 0, input, out)
+        elif input.dtype==numpy.float64:
+            lib.median_filter_1d_double(input.shape[0], size, 0, input, out)
+        else:
+            raise TypeError('median_filter_1d not implemented for type '+str(input.dtype))
+        return out
+else:
+    def fast_median_filter_2d(input, size=None):
+        return median_filter(input, size=size)
+    def fast_median_filter_1d(input, size=None):
+        return median_filter(input, size=size)
diff --git a/python/cextern/fast_median/src/Makefile b/python/cextern/fast_median/src/Makefile
new file mode 100644
index 00000000..2622d744
--- /dev/null
+++ b/python/cextern/fast_median/src/Makefile
@@ -0,0 +1,14 @@
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+	FLAGS = -fPIC -shared -O3 -march=native
+	TRG = fast_median.so
+	COMPILER = g++
+endif
+ifeq ($(UNAME_S),Darwin)
+        FLAGS = -dynamiclib -current_version 1.0 -compatibility_version 1.0 -O3 -march=native
+	TRG = fast_median.dylib
+	COMPILER = clang++
+endif
+
+all: fast_median.cpp fast_median.hpp
+	$(COMPILER) $(FLAGS) -o $(TRG) fast_median.cpp
diff --git a/python/cextern/fast_median/src/fast_median.cpp b/python/cextern/fast_median/src/fast_median.cpp
new file mode 100644
index 00000000..baa4d67d
--- /dev/null
+++ b/python/cextern/fast_median/src/fast_median.cpp
@@ -0,0 +1,669 @@
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <limits>
+#include <stdexcept>
+#include <stdint.h>
+
+#ifndef __arm64__
+#include <x86intrin.h>
+#endif
+
+// see https://github.com/suomela/mf2d
+
+#include "fast_median.hpp"
+
+const uint64_t ONE64 = 1;
+
+// Reasonable values based on benchmarks
+
+inline int choose_blocksize_1d(int h)
+{
+    return 8 * (h + 2);
+}
+
+inline int choose_blocksize_2d(int h)
+{
+    return 4 * (h + 2);
+}
+
+// Find nth bit that is set and return its index
+// (no such bit: output undefined)
+
+inline int findnth64(uint64_t x, int n)
+{
+#ifdef __AVX2__
+    x = _pdep_u64(ONE64 << n, x);
+#else
+    for (int i = 0; i < n; ++i)
+    {
+        x &= x - 1;
+    }
+#endif
+    return __builtin_ctzll(x);
+}
+
+inline int popcnt64(uint64_t x)
+{
+    return __builtin_popcountll(x);
+}
+
+// Grid dimensions.
+
+class Dim
+{
+public:
+    Dim(int b_, int size_, int h_)
+        : size(size_),
+          h(h_),
+          step(calc_step(b_, h_)),
+          count(calc_count(b_, size_, h_))
+    {
+        assert(2 * h + 1 < b_);
+        assert(count >= 1);
+        assert(2 * h + count * step >= size);
+        assert(2 * h + (count - 1) * step < size || count == 1);
+    }
+
+    const int size;
+    const int h;
+    const int step;
+    const int count;
+
+private:
+    inline static int calc_step(int b, int h)
+    {
+        return b - 2 * h;
+    }
+
+    inline static int calc_count(int b, int size, int h)
+    {
+        if (size <= b)
+        {
+            return 1;
+        }
+        else
+        {
+            int interior = size - 2 * h;
+            int step = calc_step(b, h);
+            return (interior + step - 1) / step;
+        }
+    }
+};
+
+// Slot i in the grid.
+
+struct BDim
+{
+    BDim(Dim dim_) : dim(dim_)
+    {
+        set(0);
+    }
+
+    inline void set(int i)
+    {
+        bool is_first = (i == 0);
+        bool is_last = (i + 1 == dim.count);
+        start = dim.step * i;
+        int end;
+        if (is_last)
+        {
+            end = dim.size;
+        }
+        else
+        {
+            end = 2 * dim.h + (i + 1) * dim.step;
+        }
+        size = end - start;
+        b0 = is_first ? 0 : dim.h;
+        b1 = is_last ? size : size - dim.h;
+    }
+
+    // The window around point v is [w0(v), w1(v)).
+    // 0 <= w0(v) <= v < w1(v) <= size
+    inline int w0(int v) const
+    {
+        assert(b0 <= v);
+        assert(v < b1);
+        return std::max(0, v - dim.h);
+    }
+
+    inline int w1(int v) const
+    {
+        assert(b0 <= v);
+        assert(v < b1);
+        return std::min(v + 1 + dim.h, size);
+    }
+
+    // Block i is located at coordinates [start, end) in the image.
+    // Within the block, median is needed for coordinates [b0, b1).
+    // 0 <= start < end < dim.size
+    // 0 <= b0 < b1 < size <= dim.b
+    const Dim dim;
+    int start;
+    int size;
+    int b0;
+    int b1;
+};
+
+// Data structure for the sliding window.
+
+class Window
+{
+public:
+    Window(int bb)
+        : words(get_words(bb)),
+          buf(new uint64_t[words])
+    {
+    }
+
+    ~Window()
+    {
+        delete[] buf;
+    }
+
+    inline void clear()
+    {
+        for (int i = 0; i < words; ++i)
+        {
+            buf[i] = 0;
+        }
+        half[0] = 0;
+        half[1] = 0;
+        p = words / 2;
+    }
+
+    inline void update(int op, int s)
+    {
+        assert(op == -1 || op == +1);
+        int i = s >> WORD_SHIFT;
+        int j = s & WORD_MASK;
+        if (op == +1)
+        {
+            assert(!(buf[i] & (ONE64 << j)));
+        }
+        else
+        {
+            assert(buf[i] & (ONE64 << j));
+        }
+        buf[i] ^= (ONE64 << j);
+        half[i >= p] += op;
+    }
+
+    inline int size() const
+    {
+        return half[0] + half[1];
+    }
+
+    inline int find(int goal)
+    {
+        while (half[0] > goal)
+        {
+            --p;
+            half[0] -= popcnt64(buf[p]);
+            half[1] += popcnt64(buf[p]);
+        }
+        while (half[0] + popcnt64(buf[p]) <= goal)
+        {
+            half[0] += popcnt64(buf[p]);
+            half[1] -= popcnt64(buf[p]);
+            ++p;
+        }
+        int n = goal - half[0];
+        assert(0 <= n && n < popcnt64(buf[p]));
+        int j = findnth64(buf[p], n);
+        return (p << WORD_SHIFT) | j;
+    }
+
+private:
+    static inline int get_words(int bb)
+    {
+        assert(bb >= 1);
+        return (bb + WORD_SIZE - 1) / WORD_SIZE;
+    }
+
+    static const int WORD_SHIFT = 6;
+    static const int WORD_SIZE = 1 << WORD_SHIFT;
+    static const int WORD_MASK = WORD_SIZE - 1;
+
+    // Size of buf.
+    const int words;
+    // Bit number s is on iff element s is inside the window.
+    uint64_t *const buf;
+    // half[0] = popcount of buf[0] ... buf[p-1]
+    // half[1] = popcount of buf[p] ... buf[words-1]
+    int half[2];
+    // The current guess is that the median is in buf[p].
+    int p;
+};
+
+template <typename T>
+class WindowRank
+{
+public:
+    WindowRank(int bb_)
+        : sorted(new std::pair<T, int>[bb_]),
+          rank(new int[bb_]),
+          window(bb_),
+          bb(bb_)
+    {
+    }
+
+    ~WindowRank()
+    {
+        delete[] sorted;
+        delete[] rank;
+    }
+
+    void init_start()
+    {
+        size = 0;
+    }
+
+    inline void init_feed(T value, int slot)
+    {
+        if (std::isnan(value))
+        {
+            rank[slot] = NAN_MARKER;
+        }
+        else
+        {
+            sorted[size] = std::make_pair(value, slot);
+            ++size;
+        }
+    }
+
+    void init_finish()
+    {
+        std::sort(sorted, sorted + size);
+        for (int i = 0; i < size; ++i)
+        {
+            rank[sorted[i].second] = i;
+        }
+    }
+
+    inline void clear()
+    {
+        window.clear();
+    }
+
+    inline void update(int op, int slot)
+    {
+        int s = rank[slot];
+        if (s != NAN_MARKER)
+        {
+            window.update(op, s);
+        }
+    }
+
+    inline T get_med()
+    {
+        int total = window.size();
+        if (total == 0)
+        {
+            return std::numeric_limits<T>::quiet_NaN();
+        }
+        else
+        {
+            int goal1 = (total - 1) / 2;
+            int goal2 = (total - 0) / 2;
+            int med1 = window.find(goal1);
+            T value = sorted[med1].first;
+            if (goal2 != goal1)
+            {
+                int med2 = window.find(goal2);
+                assert(med2 > med1);
+                value += sorted[med2].first;
+                value /= 2;
+            }
+            return value;
+        }
+    }
+
+private:
+    std::pair<T, int> *const sorted;
+    int *const rank;
+    Window window;
+    const int bb;
+    int size;
+    static const int NAN_MARKER = -1;
+};
+
+// MedCalc2D.run(i,j) calculates medians for block (i,j).
+
+template <typename T>
+class MedCalc2D
+{
+public:
+    MedCalc2D(int b_, Dim dimx_, Dim dimy_, const T *in_, T *out_)
+        : wr(b_ * b_), bx(dimx_), by(dimy_), in(in_), out(out_)
+    {
+    }
+
+    void run(int bx_, int by_)
+    {
+        bx.set(bx_);
+        by.set(by_);
+        calc_rank();
+        medians();
+    }
+
+private:
+    void calc_rank()
+    {
+        wr.init_start();
+        for (int y = 0; y < by.size; ++y)
+        {
+            for (int x = 0; x < bx.size; ++x)
+            {
+                wr.init_feed(in[coord(x, y)], pack(x, y));
+            }
+        }
+        wr.init_finish();
+    }
+
+    void medians()
+    {
+#ifdef NAIVE
+        for (int y = by.b0; y < by.b1; ++y)
+        {
+            for (int x = bx.b0; x < bx.b1; ++x)
+            {
+                wr.clear();
+                update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w1(y));
+                set_med(x, y);
+            }
+        }
+#else
+        wr.clear();
+        int x = bx.b0;
+        int y = by.b0;
+        update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w1(y));
+        set_med(x, y);
+        bool down = true;
+        while (true)
+        {
+            bool right = false;
+            if (down)
+            {
+                if (y + 1 == by.b1)
+                {
+                    right = true;
+                    down = false;
+                }
+            }
+            else
+            {
+                if (y == by.b0)
+                {
+                    right = true;
+                    down = true;
+                }
+            }
+            if (right)
+            {
+                if (x + 1 == bx.b1)
+                {
+                    break;
+                }
+            }
+            if (right)
+            {
+                update_block(-1, bx.w0(x), bx.w0(x + 1), by.w0(y), by.w1(y));
+                ++x;
+                update_block(+1, bx.w1(x - 1), bx.w1(x), by.w0(y), by.w1(y));
+            }
+            else if (down)
+            {
+                update_block(-1, bx.w0(x), bx.w1(x), by.w0(y), by.w0(y + 1));
+                ++y;
+                update_block(+1, bx.w0(x), bx.w1(x), by.w1(y - 1), by.w1(y));
+            }
+            else
+            {
+                update_block(-1, bx.w0(x), bx.w1(x), by.w1(y - 1), by.w1(y));
+                --y;
+                update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w0(y + 1));
+            }
+            set_med(x, y);
+        }
+#endif
+    }
+
+    inline void update_block(int op, int x0, int x1, int y0, int y1)
+    {
+        for (int y = y0; y < y1; ++y)
+        {
+            for (int x = x0; x < x1; ++x)
+            {
+                wr.update(op, pack(x, y));
+            }
+        }
+    }
+
+    inline void set_med(int x, int y)
+    {
+        out[coord(x, y)] = wr.get_med();
+    }
+
+    inline int pack(int x, int y) const
+    {
+        return y * bx.size + x;
+    }
+
+    inline int coord(int x, int y) const
+    {
+        return (y + by.start) * bx.dim.size + (x + bx.start);
+    }
+
+    WindowRank<T> wr;
+    BDim bx;
+    BDim by;
+    const T *const in;
+    T *const out;
+};
+
+template <typename T>
+class MedCalc1D
+{
+public:
+    MedCalc1D(int b_, Dim dimx_, const T *in_, T *out_)
+        : wr(b_), bx(dimx_), in(in_), out(out_)
+    {
+    }
+
+    void run(int bx_)
+    {
+        bx.set(bx_);
+        calc_rank();
+        medians();
+    }
+
+private:
+    void calc_rank()
+    {
+        wr.init_start();
+        for (int x = 0; x < bx.size; ++x)
+        {
+            wr.init_feed(in[coord(x)], pack(x));
+        }
+        wr.init_finish();
+    }
+
+    void medians()
+    {
+#ifdef NAIVE
+        for (int x = bx.b0; x < bx.b1; ++x)
+        {
+            wr.clear();
+            update_block(+1, bx.w0(x), bx.w1(x));
+            set_med(x);
+        }
+#else
+        wr.clear();
+        int x = bx.b0;
+        update_block(+1, bx.w0(x), bx.w1(x));
+        set_med(x);
+        while (x + 1 < bx.b1)
+        {
+            if (x >= bx.dim.h)
+            {
+                wr.update(-1, pack(x - bx.dim.h));
+            }
+            ++x;
+            if (x + bx.dim.h < bx.size)
+            {
+                wr.update(+1, pack(x + bx.dim.h));
+            }
+            set_med(x);
+        }
+#endif
+    }
+
+    inline void update_block(int op, int x0, int x1)
+    {
+        for (int x = x0; x < x1; ++x)
+        {
+            wr.update(op, pack(x));
+        }
+    }
+
+    inline void set_med(int x)
+    {
+        out[coord(x)] = wr.get_med();
+    }
+
+    inline int pack(int x) const
+    {
+        return x;
+    }
+
+    inline int coord(int x) const
+    {
+        return x + bx.start;
+    }
+
+    WindowRank<T> wr;
+    BDim bx;
+    const T *const in;
+    T *const out;
+};
+
+
+// T: float or double.
+// x, y: image dimensions, width x pixels and height y pixels.
+// hx, hy: window RADIUS in x and y directions.
+// blockhint: preferred block size, 0 = pick automatically.
+// in: input image.
+// out: output image.
+//
+// Total window size will be (2*hx+1) * (2*hy+1) pixels.
+//
+// Pixel (i,j) for 0 <= i < x and 0 <= j < y is located at
+// in[j*x + i] and out[j*x + i].
+template <typename T>
+void median_filter_impl_2d(int x, int y, int hx, int hy, int b, const T *in, T *out)
+{
+    if (2 * hx + 1 > b || 2 * hy + 1 > b)
+    {
+        throw std::invalid_argument("window too large for this block size");
+    }
+    Dim dimx(b, x, hx);
+    Dim dimy(b, y, hy);
+#pragma omp parallel
+    {
+        MedCalc2D<T> mc(b, dimx, dimy, in, out);
+#pragma omp for collapse(2)
+        for (int by = 0; by < dimy.count; ++by)
+        {
+            for (int bx = 0; bx < dimx.count; ++bx)
+            {
+                mc.run(bx, by);
+            }
+        }
+    }
+}
+
+// T: float or double.
+// x image dimensions, width x pixels.
+// hx: window RADIUS
+// blockhint: preferred block size, 0 = pick automatically.
+// in: input image.
+// out: output image.
+//
+// Total window size will be (2*hx+1) pixels.
+template <typename T>
+void median_filter_impl_1d(int x, int hx, int b, const T *in, T *out)
+{
+    if (2 * hx + 1 > b)
+    {
+        throw std::invalid_argument("window too large for this block size");
+    }
+    Dim dimx(b, x, hx);
+#pragma omp parallel
+    {
+        MedCalc1D<T> mc(b, dimx, in, out);
+#pragma omp for
+        for (int bx = 0; bx < dimx.count; ++bx)
+        {
+            mc.run(bx);
+        }
+    }
+}
+
+template <typename T>
+void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T *in, T *out)
+{
+    int h = std::max(hx, hy);
+    int blocksize = blockhint ? blockhint : choose_blocksize_2d(h);
+    median_filter_impl_2d<T>(x, y, hx, hy, blocksize, in, out);
+}
+
+template <typename T>
+void median_filter_1d(int x, int hx, int blockhint, const T *in, T *out)
+{
+    int blocksize = blockhint ? blockhint : choose_blocksize_1d(hx);
+    median_filter_impl_1d<T>(x, hx, blocksize, in, out);
+}
+
+template void median_filter_2d<float>(int nx, int ny, int hx, int hy, int blockhint, const float *in, float *out);
+template void median_filter_2d<double>(int nx, int ny, int hx, int hy, int blockhint, const double *in, double *out);
+
+template void median_filter_1d<float>(int nx, int hx, int blockhint, const float *in, float *out);
+template void median_filter_1d<double>(int nx, int hx, int blockhint, const double *in, double *out);
+
+
+/*
+ *   Interface to Python:
+ *   Follow interface of ndimage.median_filter where size=(dx,dy) is box size (not radius).
+ */
+void median_filter_2d_float(int nx, int ny, int box_x, int box_y, int blockhint, const float *in, float *out)
+{
+    int hx = (box_x-1)/2; // radius from box size
+    int hy = (box_y-1)/2;
+    return median_filter_2d<float>(nx, ny, hx, hy, blockhint, in, out);
+}
+
+void median_filter_2d_double(int nx, int ny, int box_x, int box_y, int blockhint, const double *in, double *out)
+{
+    int hx = (box_x-1)/2; // radius from box size
+    int hy = (box_y-1)/2;
+    return median_filter_2d<double>(nx, ny, hx, hy, blockhint, in, out);
+}
+
+/*
+ *   Interface to Python:
+ *   Follow interface of ndimage.median_filter where size=(dx,dy) is box size (not radius).
+ */
+void median_filter_1d_float(int nx, int box_x, int blockhint, const float *in, float *out)
+{
+    int hx = (box_x-1)/2; // radius from box size
+    return median_filter_1d<float>(nx, hx, blockhint, in, out);
+}
+
+void median_filter_1d_double(int nx, int box_x, int blockhint, const double *in, double *out)
+{
+    int hx = (box_x-1)/2; // radius from box size
+    return median_filter_1d<double>(nx, hx, blockhint, in, out);
+}
+
diff --git a/python/cextern/fast_median/src/fast_median.hpp b/python/cextern/fast_median/src/fast_median.hpp
new file mode 100644
index 00000000..988a867d
--- /dev/null
+++ b/python/cextern/fast_median/src/fast_median.hpp
@@ -0,0 +1,27 @@
+#ifndef FILTER_H
+#define FILTER_H
+
+// T: float or double.
+// x, y: image dimensions, width x pixels and height y pixels.
+// hx, hy: window size in x and y directions.
+// blockhint: preferred block size, 0 = pick automatically.
+// in: input image.
+// out: output image.
+
+extern "C"
+{
+    void median_filter_2d_float(int nx, int ny, int box_x, int box_y, int blockhint, const float *in, float *out);
+    void median_filter_2d_double(int nx, int ny, int box_x, int box_y, int blockhint, const double *in, double *out);
+    void median_filter_1d_float(int nx, int box_x, int blockhint, const float *in, float *out);
+    void median_filter_1d_double(int nx, int box_x, int blockhint, const double *in, double *out);
+}
+
+template <typename T>
+void median_filter_2d(int nx, int ny, int hx, int hy, int blockhint, const T *in, T *out);
+
+// As above, for the special case y = 1, hy = 0.
+
+template <typename T>
+void median_filter_1d(int x, int hx, int blockhint, const T *in, T *out);
+
+#endif
diff --git a/python/cextern/fast_median/test/testfilter.py b/python/cextern/fast_median/test/testfilter.py
new file mode 100644
index 00000000..06706be5
--- /dev/null
+++ b/python/cextern/fast_median/test/testfilter.py
@@ -0,0 +1,15 @@
+import numpy as np
+from cextern.fast_median.fast_median import fast_median_filter_2d
+from scipy.ndimage import median_filter
+import time
+
+s = int(4000)
+img = np.random.random(size=[s,s])
+img = img.astype(np.float32)
+
+t = time.time()
+img_r = fast_median_filter_2d(img, size=(5,5))
+print(f'lib.median_filter_2d_float: {time.time()-t:.2f} s')
+t = time.time()
+tmp = median_filter(img, size=(5,5))
+print(f'ndimage.median_filter: {time.time()-t:.2f} s')
diff --git a/python/cextern/fast_median/test/testfilter2.py b/python/cextern/fast_median/test/testfilter2.py
new file mode 100644
index 00000000..96e2027c
--- /dev/null
+++ b/python/cextern/fast_median/test/testfilter2.py
@@ -0,0 +1,46 @@
+import ctypes
+import numpy as np
+from numpy.ctypeslib import ndpointer
+from scipy.ndimage import median_filter
+
+HAVE_MEDIAN_SO = True
+try:
+    import platform
+    s = platform.system()
+    if s=='Linux':
+        lib = ctypes.cdll.LoadLibrary("../src/filter.so")
+    elif s=='Darwin':
+        lib = ctypes.cdll.LoadLibrary("../src/filter.dylib")
+    else:
+        raise Exception('Unknown platform: '+s)
+    HAVE_MEDIAN_SO = True
+except Exception:
+    print(Exception)
+    pass
+
+s1 = 11
+s2 = 7
+b1 = 3
+b2 = 4
+img = np.zeros((s1,s2), dtype=np.float32)
+img_r = np.zeros(img.shape, dtype=np.float32)
+
+img = np.random.random_integers(0,10, size=(s1,s2)).astype(np.float32)
+
+#template <typename T>
+#void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out);
+
+lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int,
+                                       ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"),
+                                       ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"))
+
+lib.median_filter_2d_float.restype = None
+
+# Note we have to flip the shape and box x/y to match python's indexing
+lib.median_filter_2d_float(s2, s1, b2, b1, 0, img, img_r)
+img_p = median_filter(img, size=(b1,b2), mode='nearest')
+print(img)
+print(img_r)
+print(img_p)
diff --git a/python/cextern/fast_median/test/testfilter3.py b/python/cextern/fast_median/test/testfilter3.py
new file mode 100644
index 00000000..9f3c2273
--- /dev/null
+++ b/python/cextern/fast_median/test/testfilter3.py
@@ -0,0 +1,51 @@
+import ctypes
+import numpy as np
+from numpy.ctypeslib import ndpointer
+from scipy.ndimage import median_filter
+from astropy.io import fits
+
+HAVE_MEDIAN_SO = True
+try:
+    import platform
+    s = platform.system()
+    if s=='Linux':
+        lib = ctypes.cdll.LoadLibrary("../src/filter.so")
+    elif s=='Darwin':
+        lib = ctypes.cdll.LoadLibrary("../src/filter.dylib")
+    else:
+        raise Exception('Unknown platform: '+s)
+    HAVE_MEDIAN_SO = True
+except Exception:
+    print(Exception)
+    pass
+
+#template <typename T>
+#void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out);
+
+
+
+lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int, ctypes.c_int,
+                                       ctypes.c_int,
+                                       ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"),
+                                       ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"))
+
+lib.median_filter_2d_float.restype = None
+
+# C-ordering: last index varies most rapidly
+
+#// Pixel (i,j) for 0 <= i < x and 0 <= j < y is located at
+#// in[j*x + i] and out[j*x + i].
+
+with fits.open('/Users/droryn/Downloads/lvmSFrame-00012618.fits') as hdu:
+    img = hdu[1].data.astype(np.float32)
+    img_r = np.zeros(img.shape).astype(np.float32)
+
+img[~np.isfinite(img)] = 0
+
+# Note we have to flip the shape and box x/y to match python's indexing
+lib.median_filter_2d_float(img.shape[1], img.shape[0], 25, 3, 0, img, img_r)
+fits.writeto('/Users/droryn/Downloads/img_r.fits', img_r, overwrite=True)
+
+img_p = median_filter(img, size=(3,25), mode='nearest')
+fits.writeto('/Users/droryn/Downloads/img_p.fits', img_p, overwrite=True)
diff --git a/python/lvmdrp/core/image.py b/python/lvmdrp/core/image.py
index aa6c942c..ad19fbd9 100644
--- a/python/lvmdrp/core/image.py
+++ b/python/lvmdrp/core/image.py
@@ -25,6 +25,8 @@
 from lvmdrp.core.tracemask import TraceMask
 from lvmdrp.core.spectrum1d import Spectrum1D, _cross_match_float, _cross_match, _spec_from_lines
 
+from cextern.fast_median.fast_median import fast_median_filter_2d
+
 from lvmdrp import __version__ as drpver
 
 def _fill_column_list(columns, width):
@@ -1787,9 +1789,9 @@ def medianImg(self, size, propagate_error=False):
         new_data = copy(self._data)
         new_error = copy(self._error)
 
-        new_data = ndimage.median_filter(new_data, size, mode="nearest")
+        new_data = fast_median_filter_2d(new_data, size)
         if propagate_error and new_error is not None:
-            new_error = numpy.sqrt(ndimage.median_filter(new_error ** 2, size, mode="nearest"))
+            new_error = numpy.sqrt(fast_median_filter_2d(new_error ** 2, size, mode="nearest"))
 
         image = Image(data=new_data, error=new_error, mask=self._mask, header=self._header,
                       origin=self._origin, individual_frames=self._individual_frames, slitmap=self._slitmap)

From 5ef5033db1de2a3f5517928920361cd7e89207e9 Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:21:48 -0500
Subject: [PATCH 2/6] print exception if occurs

---
 python/cextern/fast_median/fast_median.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py
index 8788b7c3..9d89fe99 100644
--- a/python/cextern/fast_median/fast_median.py
+++ b/python/cextern/fast_median/fast_median.py
@@ -50,7 +50,6 @@
 
     HAVE_MEDIAN_SO = True
 except Exception:
-    raise
     print(Exception)
     pass
 

From 9a591733773f868b7f54a52dbf39edb5000ed66b Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:31:44 -0500
Subject: [PATCH 3/6] Fix typo in name of shared object

---
 python/cextern/fast_median/fast_median.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py
index 9d89fe99..d9362c80 100644
--- a/python/cextern/fast_median/fast_median.py
+++ b/python/cextern/fast_median/fast_median.py
@@ -10,11 +10,11 @@
     s = platform.system()
     resources_dir = environ.get('LVMDRP_LIB_DIR') or path.join(path.dirname(__file__), 'src')
     if s=='Linux':
-        resources_dir = path.join(resources_dir, 'filter.so')
+        resources_dir = path.join(resources_dir, 'fast_median.so')
         #print(resources_dir)
         lib = ctypes.cdll.LoadLibrary(resources_dir)
     elif s=='Darwin':
-        resources_dir = path.join(resources_dir, 'filter.dylib')
+        resources_dir = path.join(resources_dir, 'fast_median.dylib')
         #print(resources_dir)
         lib = ctypes.cdll.LoadLibrary(resources_dir)
     else:

From ca0f801edfec412f5661b871b78b8cc8d08ad471 Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:46:04 -0500
Subject: [PATCH 4/6] remove unused import

---
 python/lvmdrp/core/image.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/lvmdrp/core/image.py b/python/lvmdrp/core/image.py
index ad19fbd9..8e5c3eac 100644
--- a/python/lvmdrp/core/image.py
+++ b/python/lvmdrp/core/image.py
@@ -13,7 +13,7 @@
 from astropy.io import fits as pyfits
 from astropy.modeling import fitting, models
 from astropy.stats.biweight import biweight_location, biweight_scale
-from scipy import ndimage, signal
+from scipy import ndimage
 from scipy import interpolate
 
 from lvmdrp import log

From f1b4567fd870045eba1ee161bcfe3df8b0e364d5 Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:47:00 -0500
Subject: [PATCH 5/6] move cextern

---
 cextern/README.txt | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 cextern/README.txt

diff --git a/cextern/README.txt b/cextern/README.txt
deleted file mode 100644
index 71a85903..00000000
--- a/cextern/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-C code to be compiled on setup goes here.

From 0f1cce863d7a7b144e4b0054e1a34978d39eb416 Mon Sep 17 00:00:00 2001
From: Niv Drory <drory@astro.as.utexas.edu>
Date: Fri, 13 Sep 2024 13:54:43 -0500
Subject: [PATCH 6/6] Add instructions to compile cextern code to installation
 instructions

---
 README.md | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8bc39264..38ffe70b 100644
--- a/README.md
+++ b/README.md
@@ -63,10 +63,21 @@ To install the DRP along with its dependencies, you need to run the following st
     cd lvmdrp
     ```
 
+3. Optional, but recommended: compile c-code for better performance
 
-3. Install the DRP package in the current python environment (see [contributing](#contributing-to-lvm-drp-development) section below for a replacement of this step):
+    ```bash
+    pushd lvmdrp/python/cextern/fast_median/src
+    make
+    popd
+    ```
+    This should have created a fast_median.so (Linux) or fast_median.dylib (MacOS) in the src directory.
+    Set environment variable `LVMDRP_LIB_DIR` if you move the shared library. If you leave it in src/, no need to set this.
+    We will automate this step eventually ;-)
+
+4. Install the DRP package in the current python environment (see [contributing](#contributing-to-lvm-drp-development) section below for a replacement of this step). Make sure you're back in the lvmdrp directory.
 
     ```bash
+    cd lvmdrp
     pip install .
     ```