From 4ff0b9f2ca44eacbe673b15cb554c14bc5064f86 Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:21:19 -0500 Subject: [PATCH 1/6] Add C implementation of fast_median --- examples/lco_com/cosmics.ipynb | 185 +---- python/cextern/README.txt | 8 + python/cextern/fast_median/fast_median.py | 87 +++ python/cextern/fast_median/src/Makefile | 14 + .../cextern/fast_median/src/fast_median.cpp | 669 ++++++++++++++++++ .../cextern/fast_median/src/fast_median.hpp | 27 + python/cextern/fast_median/test/testfilter.py | 15 + .../cextern/fast_median/test/testfilter2.py | 46 ++ .../cextern/fast_median/test/testfilter3.py | 51 ++ python/lvmdrp/core/image.py | 6 +- 10 files changed, 937 insertions(+), 171 deletions(-) create mode 100644 python/cextern/README.txt create mode 100644 python/cextern/fast_median/fast_median.py create mode 100644 python/cextern/fast_median/src/Makefile create mode 100644 python/cextern/fast_median/src/fast_median.cpp create mode 100644 python/cextern/fast_median/src/fast_median.hpp create mode 100644 python/cextern/fast_median/test/testfilter.py create mode 100644 python/cextern/fast_median/test/testfilter2.py create mode 100644 python/cextern/fast_median/test/testfilter3.py diff --git a/examples/lco_com/cosmics.ipynb b/examples/lco_com/cosmics.ipynb index 3d4ced4c..0042e861 100644 --- a/examples/lco_com/cosmics.ipynb +++ b/examples/lco_com/cosmics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "d8de6690", "metadata": {}, "outputs": [], @@ -26,28 +26,14 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "57777075", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[0;31m[ERROR]: \u001b[0mTraceback (most recent call last):\n", - " File \u001b[36m\"/opt/miniconda/envs/lvmdrp/lib/python3.12/site-packages/IPython/core/interactiveshell.py\"\u001b[39;49;00m, line \u001b[34m3577\u001b[39;49;00m, in run_code\u001b[37m\u001b[39;49;00m\n", - "\u001b[37m \u001b[39;49;00mexec(code_obj, \u001b[36mself\u001b[39;49;00m.user_global_ns, \u001b[36mself\u001b[39;49;00m.user_ns)\u001b[37m\u001b[39;49;00m\n", - " File \u001b[36m\"/var/folders/31/fxk1ql6s5bx7q3kh6kwpf8v8c5vp86/T/ipykernel_46348/3959995733.py\"\u001b[39;49;00m, line \u001b[34m3\u001b[39;49;00m, in \u001b[37m\u001b[39;49;00m\n", - "\u001b[37m \u001b[39;49;00m\u001b[34mfrom\u001b[39;49;00m \u001b[04m\u001b[36mlvmdrp\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mcore\u001b[39;49;00m\u001b[04m\u001b[36m.\u001b[39;49;00m\u001b[04m\u001b[36mimage\u001b[39;49;00m \u001b[34mimport\u001b[39;49;00m reject_cosmics\u001b[37m\u001b[39;49;00m\n", - "\u001b[91mImportError\u001b[39;49;00m: cannot import name 'reject_cosmics' from 'lvmdrp.core.image' (/Users/droryn/prog/lvm/lvmdrp/python/lvmdrp/core/image.py)\u001b[37m\u001b[39;49;00m\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from lvmdrp.functions import imageMethod as image_tasks\n", "from lvmdrp.main import get_master_mjd\n", - "from lvmdrp.core.image import reject_cosmics\n", + "from lvmdrp.utils.timer import Timer\n", "\n", "MJD = 60356\n", "\n", @@ -87,10 +73,11 @@ " # continue\n", " \n", " # preprocess frame\n", - " image_tasks.preproc_raw_frame(in_image=sci_path, out_image=psci_path, in_mask=mpixmask_path)\n", + " #image_tasks.preproc_raw_frame(in_image=sci_path, out_image=psci_path, in_mask=mpixmask_path)\n", " \n", " # detrend frame\n", - " image_tasks.detrend_frame(in_image=psci_path, out_image=dsci_path, in_bias=mbias_path, in_dark=mdark_path, in_slitmap=Table(fibermap.data))\n", + " with Timer():\n", + " image_tasks.detrend_frame(in_image=psci_path, out_image=dsci_path, in_bias=mbias_path, in_dark=mdark_path, in_slitmap=Table(fibermap.data))\n", " \n", " # # extract 1d spectra\n", " # image_tasks.extract_spectra(in_image=dsci_path, out_rss=xsci_path, in_trace=mtrace_path, method=\"aperture\", aperture=3)\n", @@ -104,12 +91,14 @@ " # rss_tasks.apply_fiberflat(in_rss=wsci_path, out_rss=wsci_path, in_flat=mflat_path)\n", " \n", " # # list paths for spectrograph combination\n", - " # spec_paths.append(wsci_path)" + " # spec_paths.append(wsci_path)\n", + "\n", + "f()" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "5e8bb7a6", "metadata": {}, "outputs": [], @@ -221,143 +210,10 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "aaa5d5cc", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The line_profiler extension is already loaded. To reload it, use:\n", - " %reload_ext line_profiler\n", - " A value of 1.00 is used for the electron read-out noise\n", - " Start iteration 1\n", - " Total number of detected cosmics: 0 out of 16000000 pixels\n", - " Start iteration 2\n", - " Total number of detected cosmics: 0 out of 16000000 pixels\n", - " Start iteration 3\n", - " Total number of detected cosmics: 0 out of 16000000 pixels\n", - " Start iteration 4\n", - " Total number of detected cosmics: 0 out of 16000000 pixels\n", - " Start iteration 5\n", - " Total number of detected cosmics: 0 out of 16000000 pixels\n", - " Total number after binary closing: 0 pixels\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Timer unit: 1e-09 s\n", - "\n", - "Total time: 66.9971 s\n", - "File: /var/folders/31/fxk1ql6s5bx7q3kh6kwpf8v8c5vp86/T/ipykernel_46348/3630395383.py\n", - "Function: reject_cosmics at line 6\n", - "\n", - "Line # Hits Time Per Hit % Time Line Contents\n", - "==============================================================\n", - " 6 def reject_cosmics(image, sigma_det=5, rlim=1.2, iterations=5, fwhm_gauss=[2.0,2.0], replace_box=[5, 5],\n", - " 7 replace_error=1e6, increase_radius=0, binary_closure=True,\n", - " 8 gain=1.0, rdnoise=1.0, bias=0.0, verbose=False, inplace=True):\n", - " 9 \n", - " 10 # convert all parameters to proper type\n", - " 11 1 1000.0 1000.0 0.0 sigma_x = fwhm_gauss[0] / 2.354\n", - " 12 1 0.0 0.0 0.0 sigma_y = fwhm_gauss[1] / 2.354\n", - " 13 1 1000.0 1000.0 0.0 box_x = int(replace_box[0])\n", - " 14 1 0.0 0.0 0.0 box_y = int(replace_box[1])\n", - " 15 \n", - " 16 # define Laplacian convolution kernal\n", - " 17 1 30000.0 30000.0 0.0 LA_kernel = 0.25*numpy.array([[0, -1, 0], [-1, 4, -1], [0, -1, 0]], dtype=numpy.float32)\n", - " 18 \n", - " 19 # Initiate image instances\n", - " 20 1 6159000.0 6e+06 0.0 img_original = Image(data=image)\n", - " 21 1 6380000.0 6e+06 0.0 img = Image(data=image)\n", - " 22 \n", - " 23 # subtract bias if applicable\n", - " 24 1 1000.0 1000.0 0.0 if (bias > 0.0) and verbose:\n", - " 25 print(f'Subtract bias level {bias:.2f} from image')\n", - " 26 1 13054000.0 1e+07 0.0 img = img - bias\n", - " 27 1 11617000.0 1e+07 0.0 img_original = img_original - bias\n", - " 28 \n", - " 29 # apply gain factor to data if applicable\n", - " 30 1 1000.0 1000.0 0.0 if (gain != 1.0) and verbose:\n", - " 31 print(' Convert image from ADUs to electrons using a gain factor of {gain:.2f}')\n", - " 32 1 11372000.0 1e+07 0.0 img = img * gain\n", - " 33 1 13550000.0 1e+07 0.0 img_original = img_original * gain\n", - " 34 \n", - " 35 # compute noise using read-noise value\n", - " 36 1 2000.0 2000.0 0.0 if (rdnoise > 0.0) and verbose:\n", - " 37 1 44000.0 44000.0 0.0 print(f' A value of {rdnoise:.2f} is used for the electron read-out noise')\n", - " 38 1 15353000.0 2e+07 0.0 img_original._error = numpy.sqrt((numpy.clip(img_original._data, a_min=0.0, a_max=None) + rdnoise**2))\n", - " 39 \n", - " 40 1 1167000.0 1e+06 0.0 select = numpy.zeros(img._dim, dtype=bool)\n", - " 41 1 1118000.0 1e+06 0.0 img_original._mask = numpy.zeros(img._dim, dtype=bool)\n", - " 42 1 1171000.0 1e+06 0.0 img._mask = numpy.zeros(img._dim, dtype=bool)\n", - " 43 \n", - " 44 # start iteration\n", - " 45 1 0.0 0.0 0.0 out = img\n", - " 46 6 6000.0 1000.0 0.0 for i in range(iterations):\n", - " 47 5 5000.0 1000.0 0.0 if verbose:\n", - " 48 5 155000.0 31000.0 0.0 print(f' Start iteration {i+1}')\n", - " 49 \n", - " 50 # create smoothed noise fromimage\n", - " 51 5 3e+10 5e+09 38.3 noise = out.medianImg((box_y, box_x))\n", - " 52 5 9986000.0 2e+06 0.0 select_neg2 = noise._data <= 0\n", - " 53 5 2837000.0 567400.0 0.0 noise.replace_subselect(select_neg2, data=0)\n", - " 54 5 129006000.0 3e+07 0.2 noise = (noise + rdnoise ** 2).sqrt()\n", - " 55 \n", - " 56 5 319920000.0 6e+07 0.5 sub = img.subsampleImg() # subsample image\n", - " 57 5 2662706000.0 5e+08 4.0 conv = sub.convolveImg(LA_kernel) # convolve subsampled image with kernel\n", - " 58 5 38342000.0 8e+06 0.1 select_neg = conv < 0\n", - " 59 5 830789000.0 2e+08 1.2 conv.replace_subselect(select_neg, data=0) # replace all negative values with 0\n", - " 60 5 2312520000.0 5e+08 3.5 Lap = conv.rebin(2, 2) # rebin the data to original resolution\n", - " 61 5 136499000.0 3e+07 0.2 S = Lap/(noise*2) # normalize Laplacian image by the noise\n", - " 62 5 3e+10 5e+09 37.9 S_prime = S-S.medianImg((5, 5)) # cleaning of the normalized Laplacian image\n", - " 63 \n", - " 64 # Perform additional clean using a 2D Gaussian smoothing kernel\n", - " 65 5 1943532000.0 4e+08 2.9 fine = out.convolveGaussImg(sigma_x, sigma_y, mask=True) # convolve image with a 2D Gaussian\n", - " 66 5 71890000.0 1e+07 0.1 fine_norm = out/fine\n", - " 67 5 12287000.0 2e+06 0.0 select_neg = fine_norm < 0\n", - " 68 5 2786000.0 557200.0 0.0 fine_norm.replace_subselect(select_neg, data=0)\n", - " 69 5 314685000.0 6e+07 0.5 sub_norm = fine_norm.subsampleImg() # subsample image\n", - " 70 5 2630482000.0 5e+08 3.9 Lap2 = sub_norm.convolveImg(LA_kernel)\n", - " 71 5 2344623000.0 5e+08 3.5 Lap2 = Lap2.rebin(2, 2) # rebin the data to original resolution\n", - " 72 \n", - " 73 5 27675000.0 6e+06 0.0 select = numpy.logical_or(numpy.logical_and(Lap2 > rlim, S_prime > sigma_det), select)\n", - " 74 \n", - " 75 5 4000.0 800.0 0.0 if verbose:\n", - " 76 5 4000.0 800.0 0.0 dim = img_original._dim\n", - " 77 5 16537000.0 3e+06 0.0 det_pix = numpy.sum(select)\n", - " 78 5 551000.0 110200.0 0.0 print(f' Total number of detected cosmics: {det_pix} out of {dim[0] * dim[1]} pixels')\n", - " 79 \n", - " 80 5 8000.0 1600.0 0.0 if i == iterations-1:\n", - " 81 1 557000.0 557000.0 0.0 img_original.replace_subselect(select, mask=True) # set the new mask\n", - " 82 1 0.0 0.0 0.0 if increase_radius > 0:\n", - " 83 mask_img = Image(data=img_original._mask)\n", - " 84 mask_new = mask_img.convolveImg(kernel=numpy.ones((2*increase_radius+1, 2*increase_radius+1)))\n", - " 85 img_original._mask = mask_new\n", - " 86 1 0.0 0.0 0.0 if binary_closure:\n", - " 87 1 4083000.0 4e+06 0.0 bmask = img_original._mask > 0\n", - " 88 1 147000.0 147000.0 0.0 bc_mask = numpy.zeros(bmask.shape, dtype=img_original._mask.dtype)\n", - " 89 8 9000.0 1125.0 0.0 for ang in [20, 45, 70, 90, 110, 135, 160]:\n", - " 90 # leave out the dispersion direction (0 degrees), see DESI, Guy et al., ApJ, 2023, 165, 144\n", - " 91 7 304000.0 43428.6 0.0 lse = LinearSelectionElement(11, 11, ang)\n", - " 92 7 1809760000.0 3e+08 2.7 bc_mask = bc_mask | ndimage.binary_closing(bmask, structure=lse.se)\n", - " 93 1 279000.0 279000.0 0.0 img_original._mask = bc_mask\n", - " 94 1 0.0 0.0 0.0 if verbose:\n", - " 95 1 3405000.0 3e+06 0.0 print(f' Total number after binary closing: {numpy.sum(bc_mask)} pixels')\n", - " 96 \n", - " 97 # replace possible corrput pixel with median for final output\n", - " 98 1 54418000.0 5e+07 0.1 out = img_original.replaceMaskMedian(box_x, box_y, replace_error=replace_error)\n", - " 99 else:\n", - " 100 4 2266000.0 566500.0 0.0 out.replace_subselect(select, mask=True) # set the new mask\n", - " 101 4 167549000.0 4e+07 0.3 out = out.replaceMaskMedian(box_x, box_y, replace_error=None) # replace possible corrput pixel with median\n", - " 102 \n", - " 103 1 2000.0 2000.0 0.0 return out" - ] - } - ], + "outputs": [], "source": [ "image = numpy.random.rand(4000,4000)\n", "\n", @@ -367,23 +223,14 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "eebac638", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "319 ms ± 243 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", - "308 ms ± 6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" - ] - } - ], + "outputs": [], "source": [ "import scipy.ndimage\n", "from scipy import signal\n", - "image = numpy.random.rand(1000 ,1000)\n", + "image = numpy.random.rand(4000 ,4000)\n", "%timeit ndimage.median_filter(image, size=(5,5))\n", "%timeit signal.medfilt2d(image, kernel_size=(5,5))" ] diff --git a/python/cextern/README.txt b/python/cextern/README.txt new file mode 100644 index 00000000..90626535 --- /dev/null +++ b/python/cextern/README.txt @@ -0,0 +1,8 @@ +C code to be compiled on setup goes here. + +fast_median.cpp is a very fast median filter for 1d and 2d ndarrays +with float or double data. The glue code is in fast_median.py + +compile using the Makefile in src/ + + diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py new file mode 100644 index 00000000..8788b7c3 --- /dev/null +++ b/python/cextern/fast_median/fast_median.py @@ -0,0 +1,87 @@ +import ctypes +import numpy +from numpy.ctypeslib import ndpointer +from scipy.ndimage import median_filter + +HAVE_MEDIAN_SO = False +try: + import platform + from os import path, environ + s = platform.system() + resources_dir = environ.get('LVMDRP_LIB_DIR') or path.join(path.dirname(__file__), 'src') + if s=='Linux': + resources_dir = path.join(resources_dir, 'filter.so') + #print(resources_dir) + lib = ctypes.cdll.LoadLibrary(resources_dir) + elif s=='Darwin': + resources_dir = path.join(resources_dir, 'filter.dylib') + #print(resources_dir) + lib = ctypes.cdll.LoadLibrary(resources_dir) + else: + raise Exception('Unknown platform: '+s) + + #template + #void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out); + lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS")) + lib.median_filter_2d_float.restype = None + lib.median_filter_2d_double.argtypes = (ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS")) + lib.median_filter_2d_double.restype = None + + lib.median_filter_1d_float.argtypes = (ctypes.c_int, + ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS")) + lib.median_filter_1d_float.restype = None + lib.median_filter_1d_double.argtypes = (ctypes.c_int, + ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_double, flags="C_CONTIGUOUS")) + lib.median_filter_1d_double.restype = None + + HAVE_MEDIAN_SO = True +except Exception: + raise + print(Exception) + pass + +print('HAVE_MEDIAN_SO ',HAVE_MEDIAN_SO) + +if HAVE_MEDIAN_SO is True: + def fast_median_filter_2d(input, size=None): + assert len(input.shape) == 2, 'fast_median_filter_2d requires 2-dimensional input' + assert len(size) == 2, 'fast_median_filter_2d requires two element size tuple, list or array' + out = numpy.empty(input.shape, dtype=input.dtype) + # Note we have to flip the shape and box x/y to match python's indexing + if input.dtype==numpy.float32: + lib.median_filter_2d_float(input.shape[1], input.shape[0], size[1], size[0], 0, input, out) + elif input.dtype==numpy.float64: + lib.median_filter_2d_double(input.shape[1], input.shape[0], size[1], size[0], 0, input, out) + else: + raise TypeError('median_filter_2d not implemented for type '+str(input.dtype)) + return out + + def fast_median_filter_1d(input, size=None): + assert len(input.shape) == 1, 'fast_median_filter_1d requires 1-dimensional input' + out = numpy.empty(input.shape, dtype=input.dtype) + if input.dtype==numpy.float32: + lib.median_filter_1d_float(input.shape[0], size, 0, input, out) + elif input.dtype==numpy.float64: + lib.median_filter_1d_double(input.shape[0], size, 0, input, out) + else: + raise TypeError('median_filter_1d not implemented for type '+str(input.dtype)) + return out +else: + def fast_median_filter_2d(input, size=None): + return median_filter(input, size=size) + def fast_median_filter_1d(input, size=None): + return median_filter(input, size=size) diff --git a/python/cextern/fast_median/src/Makefile b/python/cextern/fast_median/src/Makefile new file mode 100644 index 00000000..2622d744 --- /dev/null +++ b/python/cextern/fast_median/src/Makefile @@ -0,0 +1,14 @@ +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + FLAGS = -fPIC -shared -O3 -march=native + TRG = fast_median.so + COMPILER = g++ +endif +ifeq ($(UNAME_S),Darwin) + FLAGS = -dynamiclib -current_version 1.0 -compatibility_version 1.0 -O3 -march=native + TRG = fast_median.dylib + COMPILER = clang++ +endif + +all: fast_median.cpp fast_median.hpp + $(COMPILER) $(FLAGS) -o $(TRG) fast_median.cpp diff --git a/python/cextern/fast_median/src/fast_median.cpp b/python/cextern/fast_median/src/fast_median.cpp new file mode 100644 index 00000000..baa4d67d --- /dev/null +++ b/python/cextern/fast_median/src/fast_median.cpp @@ -0,0 +1,669 @@ +#include +#include +#include +#include +#include +#include + +#ifndef __arm64__ +#include +#endif + +// see https://github.com/suomela/mf2d + +#include "fast_median.hpp" + +const uint64_t ONE64 = 1; + +// Reasonable values based on benchmarks + +inline int choose_blocksize_1d(int h) +{ + return 8 * (h + 2); +} + +inline int choose_blocksize_2d(int h) +{ + return 4 * (h + 2); +} + +// Find nth bit that is set and return its index +// (no such bit: output undefined) + +inline int findnth64(uint64_t x, int n) +{ +#ifdef __AVX2__ + x = _pdep_u64(ONE64 << n, x); +#else + for (int i = 0; i < n; ++i) + { + x &= x - 1; + } +#endif + return __builtin_ctzll(x); +} + +inline int popcnt64(uint64_t x) +{ + return __builtin_popcountll(x); +} + +// Grid dimensions. + +class Dim +{ +public: + Dim(int b_, int size_, int h_) + : size(size_), + h(h_), + step(calc_step(b_, h_)), + count(calc_count(b_, size_, h_)) + { + assert(2 * h + 1 < b_); + assert(count >= 1); + assert(2 * h + count * step >= size); + assert(2 * h + (count - 1) * step < size || count == 1); + } + + const int size; + const int h; + const int step; + const int count; + +private: + inline static int calc_step(int b, int h) + { + return b - 2 * h; + } + + inline static int calc_count(int b, int size, int h) + { + if (size <= b) + { + return 1; + } + else + { + int interior = size - 2 * h; + int step = calc_step(b, h); + return (interior + step - 1) / step; + } + } +}; + +// Slot i in the grid. + +struct BDim +{ + BDim(Dim dim_) : dim(dim_) + { + set(0); + } + + inline void set(int i) + { + bool is_first = (i == 0); + bool is_last = (i + 1 == dim.count); + start = dim.step * i; + int end; + if (is_last) + { + end = dim.size; + } + else + { + end = 2 * dim.h + (i + 1) * dim.step; + } + size = end - start; + b0 = is_first ? 0 : dim.h; + b1 = is_last ? size : size - dim.h; + } + + // The window around point v is [w0(v), w1(v)). + // 0 <= w0(v) <= v < w1(v) <= size + inline int w0(int v) const + { + assert(b0 <= v); + assert(v < b1); + return std::max(0, v - dim.h); + } + + inline int w1(int v) const + { + assert(b0 <= v); + assert(v < b1); + return std::min(v + 1 + dim.h, size); + } + + // Block i is located at coordinates [start, end) in the image. + // Within the block, median is needed for coordinates [b0, b1). + // 0 <= start < end < dim.size + // 0 <= b0 < b1 < size <= dim.b + const Dim dim; + int start; + int size; + int b0; + int b1; +}; + +// Data structure for the sliding window. + +class Window +{ +public: + Window(int bb) + : words(get_words(bb)), + buf(new uint64_t[words]) + { + } + + ~Window() + { + delete[] buf; + } + + inline void clear() + { + for (int i = 0; i < words; ++i) + { + buf[i] = 0; + } + half[0] = 0; + half[1] = 0; + p = words / 2; + } + + inline void update(int op, int s) + { + assert(op == -1 || op == +1); + int i = s >> WORD_SHIFT; + int j = s & WORD_MASK; + if (op == +1) + { + assert(!(buf[i] & (ONE64 << j))); + } + else + { + assert(buf[i] & (ONE64 << j)); + } + buf[i] ^= (ONE64 << j); + half[i >= p] += op; + } + + inline int size() const + { + return half[0] + half[1]; + } + + inline int find(int goal) + { + while (half[0] > goal) + { + --p; + half[0] -= popcnt64(buf[p]); + half[1] += popcnt64(buf[p]); + } + while (half[0] + popcnt64(buf[p]) <= goal) + { + half[0] += popcnt64(buf[p]); + half[1] -= popcnt64(buf[p]); + ++p; + } + int n = goal - half[0]; + assert(0 <= n && n < popcnt64(buf[p])); + int j = findnth64(buf[p], n); + return (p << WORD_SHIFT) | j; + } + +private: + static inline int get_words(int bb) + { + assert(bb >= 1); + return (bb + WORD_SIZE - 1) / WORD_SIZE; + } + + static const int WORD_SHIFT = 6; + static const int WORD_SIZE = 1 << WORD_SHIFT; + static const int WORD_MASK = WORD_SIZE - 1; + + // Size of buf. + const int words; + // Bit number s is on iff element s is inside the window. + uint64_t *const buf; + // half[0] = popcount of buf[0] ... buf[p-1] + // half[1] = popcount of buf[p] ... buf[words-1] + int half[2]; + // The current guess is that the median is in buf[p]. + int p; +}; + +template +class WindowRank +{ +public: + WindowRank(int bb_) + : sorted(new std::pair[bb_]), + rank(new int[bb_]), + window(bb_), + bb(bb_) + { + } + + ~WindowRank() + { + delete[] sorted; + delete[] rank; + } + + void init_start() + { + size = 0; + } + + inline void init_feed(T value, int slot) + { + if (std::isnan(value)) + { + rank[slot] = NAN_MARKER; + } + else + { + sorted[size] = std::make_pair(value, slot); + ++size; + } + } + + void init_finish() + { + std::sort(sorted, sorted + size); + for (int i = 0; i < size; ++i) + { + rank[sorted[i].second] = i; + } + } + + inline void clear() + { + window.clear(); + } + + inline void update(int op, int slot) + { + int s = rank[slot]; + if (s != NAN_MARKER) + { + window.update(op, s); + } + } + + inline T get_med() + { + int total = window.size(); + if (total == 0) + { + return std::numeric_limits::quiet_NaN(); + } + else + { + int goal1 = (total - 1) / 2; + int goal2 = (total - 0) / 2; + int med1 = window.find(goal1); + T value = sorted[med1].first; + if (goal2 != goal1) + { + int med2 = window.find(goal2); + assert(med2 > med1); + value += sorted[med2].first; + value /= 2; + } + return value; + } + } + +private: + std::pair *const sorted; + int *const rank; + Window window; + const int bb; + int size; + static const int NAN_MARKER = -1; +}; + +// MedCalc2D.run(i,j) calculates medians for block (i,j). + +template +class MedCalc2D +{ +public: + MedCalc2D(int b_, Dim dimx_, Dim dimy_, const T *in_, T *out_) + : wr(b_ * b_), bx(dimx_), by(dimy_), in(in_), out(out_) + { + } + + void run(int bx_, int by_) + { + bx.set(bx_); + by.set(by_); + calc_rank(); + medians(); + } + +private: + void calc_rank() + { + wr.init_start(); + for (int y = 0; y < by.size; ++y) + { + for (int x = 0; x < bx.size; ++x) + { + wr.init_feed(in[coord(x, y)], pack(x, y)); + } + } + wr.init_finish(); + } + + void medians() + { +#ifdef NAIVE + for (int y = by.b0; y < by.b1; ++y) + { + for (int x = bx.b0; x < bx.b1; ++x) + { + wr.clear(); + update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w1(y)); + set_med(x, y); + } + } +#else + wr.clear(); + int x = bx.b0; + int y = by.b0; + update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w1(y)); + set_med(x, y); + bool down = true; + while (true) + { + bool right = false; + if (down) + { + if (y + 1 == by.b1) + { + right = true; + down = false; + } + } + else + { + if (y == by.b0) + { + right = true; + down = true; + } + } + if (right) + { + if (x + 1 == bx.b1) + { + break; + } + } + if (right) + { + update_block(-1, bx.w0(x), bx.w0(x + 1), by.w0(y), by.w1(y)); + ++x; + update_block(+1, bx.w1(x - 1), bx.w1(x), by.w0(y), by.w1(y)); + } + else if (down) + { + update_block(-1, bx.w0(x), bx.w1(x), by.w0(y), by.w0(y + 1)); + ++y; + update_block(+1, bx.w0(x), bx.w1(x), by.w1(y - 1), by.w1(y)); + } + else + { + update_block(-1, bx.w0(x), bx.w1(x), by.w1(y - 1), by.w1(y)); + --y; + update_block(+1, bx.w0(x), bx.w1(x), by.w0(y), by.w0(y + 1)); + } + set_med(x, y); + } +#endif + } + + inline void update_block(int op, int x0, int x1, int y0, int y1) + { + for (int y = y0; y < y1; ++y) + { + for (int x = x0; x < x1; ++x) + { + wr.update(op, pack(x, y)); + } + } + } + + inline void set_med(int x, int y) + { + out[coord(x, y)] = wr.get_med(); + } + + inline int pack(int x, int y) const + { + return y * bx.size + x; + } + + inline int coord(int x, int y) const + { + return (y + by.start) * bx.dim.size + (x + bx.start); + } + + WindowRank wr; + BDim bx; + BDim by; + const T *const in; + T *const out; +}; + +template +class MedCalc1D +{ +public: + MedCalc1D(int b_, Dim dimx_, const T *in_, T *out_) + : wr(b_), bx(dimx_), in(in_), out(out_) + { + } + + void run(int bx_) + { + bx.set(bx_); + calc_rank(); + medians(); + } + +private: + void calc_rank() + { + wr.init_start(); + for (int x = 0; x < bx.size; ++x) + { + wr.init_feed(in[coord(x)], pack(x)); + } + wr.init_finish(); + } + + void medians() + { +#ifdef NAIVE + for (int x = bx.b0; x < bx.b1; ++x) + { + wr.clear(); + update_block(+1, bx.w0(x), bx.w1(x)); + set_med(x); + } +#else + wr.clear(); + int x = bx.b0; + update_block(+1, bx.w0(x), bx.w1(x)); + set_med(x); + while (x + 1 < bx.b1) + { + if (x >= bx.dim.h) + { + wr.update(-1, pack(x - bx.dim.h)); + } + ++x; + if (x + bx.dim.h < bx.size) + { + wr.update(+1, pack(x + bx.dim.h)); + } + set_med(x); + } +#endif + } + + inline void update_block(int op, int x0, int x1) + { + for (int x = x0; x < x1; ++x) + { + wr.update(op, pack(x)); + } + } + + inline void set_med(int x) + { + out[coord(x)] = wr.get_med(); + } + + inline int pack(int x) const + { + return x; + } + + inline int coord(int x) const + { + return x + bx.start; + } + + WindowRank wr; + BDim bx; + const T *const in; + T *const out; +}; + + +// T: float or double. +// x, y: image dimensions, width x pixels and height y pixels. +// hx, hy: window RADIUS in x and y directions. +// blockhint: preferred block size, 0 = pick automatically. +// in: input image. +// out: output image. +// +// Total window size will be (2*hx+1) * (2*hy+1) pixels. +// +// Pixel (i,j) for 0 <= i < x and 0 <= j < y is located at +// in[j*x + i] and out[j*x + i]. +template +void median_filter_impl_2d(int x, int y, int hx, int hy, int b, const T *in, T *out) +{ + if (2 * hx + 1 > b || 2 * hy + 1 > b) + { + throw std::invalid_argument("window too large for this block size"); + } + Dim dimx(b, x, hx); + Dim dimy(b, y, hy); +#pragma omp parallel + { + MedCalc2D mc(b, dimx, dimy, in, out); +#pragma omp for collapse(2) + for (int by = 0; by < dimy.count; ++by) + { + for (int bx = 0; bx < dimx.count; ++bx) + { + mc.run(bx, by); + } + } + } +} + +// T: float or double. +// x image dimensions, width x pixels. +// hx: window RADIUS +// blockhint: preferred block size, 0 = pick automatically. +// in: input image. +// out: output image. +// +// Total window size will be (2*hx+1) pixels. +template +void median_filter_impl_1d(int x, int hx, int b, const T *in, T *out) +{ + if (2 * hx + 1 > b) + { + throw std::invalid_argument("window too large for this block size"); + } + Dim dimx(b, x, hx); +#pragma omp parallel + { + MedCalc1D mc(b, dimx, in, out); +#pragma omp for + for (int bx = 0; bx < dimx.count; ++bx) + { + mc.run(bx); + } + } +} + +template +void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T *in, T *out) +{ + int h = std::max(hx, hy); + int blocksize = blockhint ? blockhint : choose_blocksize_2d(h); + median_filter_impl_2d(x, y, hx, hy, blocksize, in, out); +} + +template +void median_filter_1d(int x, int hx, int blockhint, const T *in, T *out) +{ + int blocksize = blockhint ? blockhint : choose_blocksize_1d(hx); + median_filter_impl_1d(x, hx, blocksize, in, out); +} + +template void median_filter_2d(int nx, int ny, int hx, int hy, int blockhint, const float *in, float *out); +template void median_filter_2d(int nx, int ny, int hx, int hy, int blockhint, const double *in, double *out); + +template void median_filter_1d(int nx, int hx, int blockhint, const float *in, float *out); +template void median_filter_1d(int nx, int hx, int blockhint, const double *in, double *out); + + +/* + * Interface to Python: + * Follow interface of ndimage.median_filter where size=(dx,dy) is box size (not radius). + */ +void median_filter_2d_float(int nx, int ny, int box_x, int box_y, int blockhint, const float *in, float *out) +{ + int hx = (box_x-1)/2; // radius from box size + int hy = (box_y-1)/2; + return median_filter_2d(nx, ny, hx, hy, blockhint, in, out); +} + +void median_filter_2d_double(int nx, int ny, int box_x, int box_y, int blockhint, const double *in, double *out) +{ + int hx = (box_x-1)/2; // radius from box size + int hy = (box_y-1)/2; + return median_filter_2d(nx, ny, hx, hy, blockhint, in, out); +} + +/* + * Interface to Python: + * Follow interface of ndimage.median_filter where size=(dx,dy) is box size (not radius). + */ +void median_filter_1d_float(int nx, int box_x, int blockhint, const float *in, float *out) +{ + int hx = (box_x-1)/2; // radius from box size + return median_filter_1d(nx, hx, blockhint, in, out); +} + +void median_filter_1d_double(int nx, int box_x, int blockhint, const double *in, double *out) +{ + int hx = (box_x-1)/2; // radius from box size + return median_filter_1d(nx, hx, blockhint, in, out); +} + diff --git a/python/cextern/fast_median/src/fast_median.hpp b/python/cextern/fast_median/src/fast_median.hpp new file mode 100644 index 00000000..988a867d --- /dev/null +++ b/python/cextern/fast_median/src/fast_median.hpp @@ -0,0 +1,27 @@ +#ifndef FILTER_H +#define FILTER_H + +// T: float or double. +// x, y: image dimensions, width x pixels and height y pixels. +// hx, hy: window size in x and y directions. +// blockhint: preferred block size, 0 = pick automatically. +// in: input image. +// out: output image. + +extern "C" +{ + void median_filter_2d_float(int nx, int ny, int box_x, int box_y, int blockhint, const float *in, float *out); + void median_filter_2d_double(int nx, int ny, int box_x, int box_y, int blockhint, const double *in, double *out); + void median_filter_1d_float(int nx, int box_x, int blockhint, const float *in, float *out); + void median_filter_1d_double(int nx, int box_x, int blockhint, const double *in, double *out); +} + +template +void median_filter_2d(int nx, int ny, int hx, int hy, int blockhint, const T *in, T *out); + +// As above, for the special case y = 1, hy = 0. + +template +void median_filter_1d(int x, int hx, int blockhint, const T *in, T *out); + +#endif diff --git a/python/cextern/fast_median/test/testfilter.py b/python/cextern/fast_median/test/testfilter.py new file mode 100644 index 00000000..06706be5 --- /dev/null +++ b/python/cextern/fast_median/test/testfilter.py @@ -0,0 +1,15 @@ +import numpy as np +from cextern.fast_median.fast_median import fast_median_filter_2d +from scipy.ndimage import median_filter +import time + +s = int(4000) +img = np.random.random(size=[s,s]) +img = img.astype(np.float32) + +t = time.time() +img_r = fast_median_filter_2d(img, size=(5,5)) +print(f'lib.median_filter_2d_float: {time.time()-t:.2f} s') +t = time.time() +tmp = median_filter(img, size=(5,5)) +print(f'ndimage.median_filter: {time.time()-t:.2f} s') diff --git a/python/cextern/fast_median/test/testfilter2.py b/python/cextern/fast_median/test/testfilter2.py new file mode 100644 index 00000000..96e2027c --- /dev/null +++ b/python/cextern/fast_median/test/testfilter2.py @@ -0,0 +1,46 @@ +import ctypes +import numpy as np +from numpy.ctypeslib import ndpointer +from scipy.ndimage import median_filter + +HAVE_MEDIAN_SO = True +try: + import platform + s = platform.system() + if s=='Linux': + lib = ctypes.cdll.LoadLibrary("../src/filter.so") + elif s=='Darwin': + lib = ctypes.cdll.LoadLibrary("../src/filter.dylib") + else: + raise Exception('Unknown platform: '+s) + HAVE_MEDIAN_SO = True +except Exception: + print(Exception) + pass + +s1 = 11 +s2 = 7 +b1 = 3 +b2 = 4 +img = np.zeros((s1,s2), dtype=np.float32) +img_r = np.zeros(img.shape, dtype=np.float32) + +img = np.random.random_integers(0,10, size=(s1,s2)).astype(np.float32) + +#template +#void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out); + +lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS")) + +lib.median_filter_2d_float.restype = None + +# Note we have to flip the shape and box x/y to match python's indexing +lib.median_filter_2d_float(s2, s1, b2, b1, 0, img, img_r) +img_p = median_filter(img, size=(b1,b2), mode='nearest') +print(img) +print(img_r) +print(img_p) diff --git a/python/cextern/fast_median/test/testfilter3.py b/python/cextern/fast_median/test/testfilter3.py new file mode 100644 index 00000000..9f3c2273 --- /dev/null +++ b/python/cextern/fast_median/test/testfilter3.py @@ -0,0 +1,51 @@ +import ctypes +import numpy as np +from numpy.ctypeslib import ndpointer +from scipy.ndimage import median_filter +from astropy.io import fits + +HAVE_MEDIAN_SO = True +try: + import platform + s = platform.system() + if s=='Linux': + lib = ctypes.cdll.LoadLibrary("../src/filter.so") + elif s=='Darwin': + lib = ctypes.cdll.LoadLibrary("../src/filter.dylib") + else: + raise Exception('Unknown platform: '+s) + HAVE_MEDIAN_SO = True +except Exception: + print(Exception) + pass + +#template +#void median_filter_2d(int x, int y, int hx, int hy, int blockhint, const T* in, T* out); + + + +lib.median_filter_2d_float.argtypes = (ctypes.c_int, ctypes.c_int, + ctypes.c_int, ctypes.c_int, + ctypes.c_int, + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS"), + ndpointer(ctypes.c_float, flags="C_CONTIGUOUS")) + +lib.median_filter_2d_float.restype = None + +# C-ordering: last index varies most rapidly + +#// Pixel (i,j) for 0 <= i < x and 0 <= j < y is located at +#// in[j*x + i] and out[j*x + i]. + +with fits.open('/Users/droryn/Downloads/lvmSFrame-00012618.fits') as hdu: + img = hdu[1].data.astype(np.float32) + img_r = np.zeros(img.shape).astype(np.float32) + +img[~np.isfinite(img)] = 0 + +# Note we have to flip the shape and box x/y to match python's indexing +lib.median_filter_2d_float(img.shape[1], img.shape[0], 25, 3, 0, img, img_r) +fits.writeto('/Users/droryn/Downloads/img_r.fits', img_r, overwrite=True) + +img_p = median_filter(img, size=(3,25), mode='nearest') +fits.writeto('/Users/droryn/Downloads/img_p.fits', img_p, overwrite=True) diff --git a/python/lvmdrp/core/image.py b/python/lvmdrp/core/image.py index aa6c942c..ad19fbd9 100644 --- a/python/lvmdrp/core/image.py +++ b/python/lvmdrp/core/image.py @@ -25,6 +25,8 @@ from lvmdrp.core.tracemask import TraceMask from lvmdrp.core.spectrum1d import Spectrum1D, _cross_match_float, _cross_match, _spec_from_lines +from cextern.fast_median.fast_median import fast_median_filter_2d + from lvmdrp import __version__ as drpver def _fill_column_list(columns, width): @@ -1787,9 +1789,9 @@ def medianImg(self, size, propagate_error=False): new_data = copy(self._data) new_error = copy(self._error) - new_data = ndimage.median_filter(new_data, size, mode="nearest") + new_data = fast_median_filter_2d(new_data, size) if propagate_error and new_error is not None: - new_error = numpy.sqrt(ndimage.median_filter(new_error ** 2, size, mode="nearest")) + new_error = numpy.sqrt(fast_median_filter_2d(new_error ** 2, size, mode="nearest")) image = Image(data=new_data, error=new_error, mask=self._mask, header=self._header, origin=self._origin, individual_frames=self._individual_frames, slitmap=self._slitmap) From 5ef5033db1de2a3f5517928920361cd7e89207e9 Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:21:48 -0500 Subject: [PATCH 2/6] print exception if occurs --- python/cextern/fast_median/fast_median.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py index 8788b7c3..9d89fe99 100644 --- a/python/cextern/fast_median/fast_median.py +++ b/python/cextern/fast_median/fast_median.py @@ -50,7 +50,6 @@ HAVE_MEDIAN_SO = True except Exception: - raise print(Exception) pass From 9a591733773f868b7f54a52dbf39edb5000ed66b Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:31:44 -0500 Subject: [PATCH 3/6] Fix typo in name of shared object --- python/cextern/fast_median/fast_median.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cextern/fast_median/fast_median.py b/python/cextern/fast_median/fast_median.py index 9d89fe99..d9362c80 100644 --- a/python/cextern/fast_median/fast_median.py +++ b/python/cextern/fast_median/fast_median.py @@ -10,11 +10,11 @@ s = platform.system() resources_dir = environ.get('LVMDRP_LIB_DIR') or path.join(path.dirname(__file__), 'src') if s=='Linux': - resources_dir = path.join(resources_dir, 'filter.so') + resources_dir = path.join(resources_dir, 'fast_median.so') #print(resources_dir) lib = ctypes.cdll.LoadLibrary(resources_dir) elif s=='Darwin': - resources_dir = path.join(resources_dir, 'filter.dylib') + resources_dir = path.join(resources_dir, 'fast_median.dylib') #print(resources_dir) lib = ctypes.cdll.LoadLibrary(resources_dir) else: From ca0f801edfec412f5661b871b78b8cc8d08ad471 Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:46:04 -0500 Subject: [PATCH 4/6] remove unused import --- python/lvmdrp/core/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lvmdrp/core/image.py b/python/lvmdrp/core/image.py index ad19fbd9..8e5c3eac 100644 --- a/python/lvmdrp/core/image.py +++ b/python/lvmdrp/core/image.py @@ -13,7 +13,7 @@ from astropy.io import fits as pyfits from astropy.modeling import fitting, models from astropy.stats.biweight import biweight_location, biweight_scale -from scipy import ndimage, signal +from scipy import ndimage from scipy import interpolate from lvmdrp import log From f1b4567fd870045eba1ee161bcfe3df8b0e364d5 Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:47:00 -0500 Subject: [PATCH 5/6] move cextern --- cextern/README.txt | 1 - 1 file changed, 1 deletion(-) delete mode 100644 cextern/README.txt diff --git a/cextern/README.txt b/cextern/README.txt deleted file mode 100644 index 71a85903..00000000 --- a/cextern/README.txt +++ /dev/null @@ -1 +0,0 @@ -C code to be compiled on setup goes here. From 0f1cce863d7a7b144e4b0054e1a34978d39eb416 Mon Sep 17 00:00:00 2001 From: Niv Drory Date: Fri, 13 Sep 2024 13:54:43 -0500 Subject: [PATCH 6/6] Add instructions to compile cextern code to installation instructions --- README.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 8bc39264..38ffe70b 100644 --- a/README.md +++ b/README.md @@ -63,10 +63,21 @@ To install the DRP along with its dependencies, you need to run the following st cd lvmdrp ``` +3. Optional, but recommended: compile c-code for better performance -3. Install the DRP package in the current python environment (see [contributing](#contributing-to-lvm-drp-development) section below for a replacement of this step): + ```bash + pushd lvmdrp/python/cextern/fast_median/src + make + popd + ``` + This should have created a fast_median.so (Linux) or fast_median.dylib (MacOS) in the src directory. + Set environment variable `LVMDRP_LIB_DIR` if you move the shared library. If you leave it in src/, no need to set this. + We will automate this step eventually ;-) + +4. Install the DRP package in the current python environment (see [contributing](#contributing-to-lvm-drp-development) section below for a replacement of this step). Make sure you're back in the lvmdrp directory. ```bash + cd lvmdrp pip install . ```